├── BayesDiscri ├── BayesDiscri.py ├── example.py └── readme.md ├── CorreCoef ├── CorreCoef.py ├── example.py └── readme.md ├── DecisionTree ├── DecisionTree.py ├── RandomForest.py └── readme.md ├── Distance ├── Distance.py ├── example.py └── readme.md ├── DistanceDiscri ├── DistanceDiscri.py ├── example │ ├── DistanceDiscri.py │ ├── example.py │ ├── readme.md │ ├── readme.pdf │ ├── test.csv │ └── train.csv └── readme.md ├── FacAnaly ├── FacAnaly.py ├── data.csv ├── example.py ├── pics │ ├── QQ截图20190417005321.png │ ├── QQ截图20190417005816.png │ ├── QQ截图20190417095029.png │ ├── QQ截图20190417095125.png │ ├── QQ截图20190417101717.png │ ├── QQ截图20190417101728.png │ └── readme.md └── readme.md ├── FisherDiscri ├── FisherDiscri.py └── readme.md ├── GeneralStats ├── GeneralStats.py ├── example.py └── readme.md ├── Kmeans ├── Kmeans.py ├── example.py └── readme.md ├── LICENSE ├── LinearRegre └── LinearRegre.py ├── PCA ├── PCA.py ├── example.py └── readme.md ├── README.md ├── SCM ├── SCM.py ├── example.py ├── example2.py ├── readme.md └── 图片1.png └── VarAnaly ├── VarAnaly.py ├── example.py └── readme.md /BayesDiscri/BayesDiscri.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | class BayesDiscri: 4 | 5 | def __init__(self): 6 | ''' 7 | :__init__: 初始化BayesDiscri类 8 | ''' 9 | self.varipro=[] # 各个特征xk在各个类别yi下的条件概率 10 | self.priorpro={} # 各个类别yi的先验概率 11 | self.respro=[] # 测试集中每个样本向量属于各个类别的概率 12 | 13 | def train(self, data, rowvar=False): 14 | ''' 15 | :train: 使用训练集进行训练 16 | :param data: 训练数据集矩阵,矩阵元素可以是数字或者表示特征取值的字符串,其最后一行或者最后一列为样本的类别标签,训练数据集矩阵至少有两个样本和两个特征 17 | :type data: np.array 18 | :param rowvar: 指定每行或者每列代表一个变量;rowvar=True指定每行作为一个变量,每列作为一个样本向量;rowvar=False指定每列作为一个变量,每行作为一个样本向量。默认值为rowvar=False 19 | :type rowvar: bool 20 | ''' 21 | # 1. 首先训练集矩阵统一转换为rowvar=False的情况,即每行为一个样本向量 22 | if rowvar==True: 23 | data=data.T 24 | 25 | # 2. 计算各个类别yi的先验概率,最后一列为样本标签 26 | size=np.shape(data)[0] # 样本数量 27 | count=np.shape(data)[1] # 特征数量 28 | 29 | dic={} 30 | for i in range(size): 31 | if data[i][count-1] in dic.keys(): 32 | dic[str(data[i][count-1])]+=1 33 | else: 34 | dic[str(data[i][count-1])]=1 35 | 36 | for i in dic.keys(): 37 | dic[i]/=size 38 | 39 | self.priorpro=dic 40 | 41 | # 3. 计算各个特征xk在各个类别yi下的条件概率 42 | for i in range(count-1): 43 | dic={} 44 | for k in range(size): 45 | temp=str(data[k][i])+'|'+str(data[k][count-1]) # dic的标签形式为: 特征取值+'|'+类别标签,表示条件概率p(特征取值|类别标签) 46 | if temp in dic.keys(): 47 | dic[temp]+=1 48 | else: 49 | dic[temp]=1 50 | for k in dic.keys(): 51 | kind=k.split('|')[1] # 抽取类别标签 52 | dic[k]/=data[:,count-1].tolist().count(kind) # 统计类别标签的数目 53 | self.varipro.append(dic) 54 | 55 | # print(self.priorpro) 56 | # print(self.varipro) 57 | 58 | return 59 | 60 | def discriminate(self, data, rowvar=False): 61 | ''' 62 | :discriminate: 对测试集进行分类 63 | :param data: 测试数据集矩阵,矩阵元素可以是数字或者表示特征取值的字符串 64 | :type data: np.array 65 | :param rowvar: 指定每行或者每列代表一个变量;rowvar=True指定每行作为一个变量,每列作为一个样本向量;rowvar=False指定每列作为一个变量,每行作为一个样本向量。默认值为rowvar=False 66 | :type rowvar: bool 67 | :return: 元组(res, respro) 68 | : res: 分类结果列表,类型为list,其中res[i]为行数或者列数下标为i(下标从0开始)的样本向量的类别标签 69 | : respro: 样本属于各个类别的概率列表,类型为list,其中respro[i]为行数或者列数下标为i(下标从0开始)的样本向量属于各个类别的概率 70 | : 示例: 假设有两个测试集样本,可能的一个返回值为(res,respro),其中res=['类别A','类别A'],respro=[{'类别A':0.22,'类别B':0.78}, {'类别A':0.99,'类别B':0.01}] 71 | :rtype: tuple 72 | ''' 73 | # 1. 首先训练集矩阵统一转换为rowvar=False的情况,即每行为一个样本向量 74 | if rowvar==True: 75 | data=data.T 76 | if data.ndim==1: 77 | data=np.array([data]) 78 | 79 | # 2. 对于各个测试集的样本向量,对类别的每一个取值yi,首先计算p(x|yi)p(yi)=p(x1|yi)*p(x2|yi)*...*p(xn|yi)p(yi),计算结果最大的一个作为分类结果 80 | size=np.shape(data)[0] 81 | count=np.shape(data)[1] 82 | 83 | res=[] #分类结果 84 | 85 | for i in range(size): 86 | p=[] 87 | kind=[] 88 | for k in self.priorpro.keys(): 89 | prior=self.priorpro[k] 90 | for m in range(count): 91 | name=str(data[i][m])+'|'+str(k) 92 | if name in self.varipro[m].keys(): 93 | prior*=self.varipro[m][name] 94 | else: 95 | prior*=0 96 | break 97 | p.append(prior) # 类别yi的后验概率的分子部分p(x|yi)p(yi) 98 | kind.append(k) # 类别yi的对应标签 99 | res.append(kind[p.index(max(p))]) 100 | add=sum(p) 101 | p=[x/add for x in p] # 计算后验概率,因为后验概率的分母部分均相同,因此后验概率的分母部分即为各个分子部分之和,而无需重新计算 102 | self.respro.append(dict(zip(kind,p))) 103 | 104 | return (res,self.respro) 105 | -------------------------------------------------------------------------------- /BayesDiscri/example.py: -------------------------------------------------------------------------------- 1 | import BayesDiscri as bayes 2 | import numpy as np 3 | 4 | if __name__ == "__main__": 5 | 6 | train_data=np.array([['帅','不好','高','不上进','不嫁'], 7 | ['不帅','好','高','上进','不嫁'], 8 | ['帅','好','矮','上进','嫁'], 9 | ['不帅','好','高','上进','嫁'], 10 | ['帅','不好','矮','上进','不嫁'], 11 | ['帅','不好','矮','上进','不嫁'], 12 | ['帅','好','高','不上进','嫁'], 13 | ['不帅','好','中','上进','嫁'], 14 | ['帅','好','中','上进','嫁'], 15 | ['不帅','不好','高','上进','嫁'], 16 | ['帅','好','矮','不上进','不嫁'], 17 | ['帅','好','矮','不上进','不嫁']]) 18 | 19 | by=bayes.BayesDiscri() 20 | by.train(train_data,rowvar=False) 21 | 22 | test_data=np.array([['不帅','不好','矮','不上进'], 23 | ['不帅','好','高','不上进']]) 24 | 25 | res=by.discriminate(test_data,rowvar=False) 26 | print('分类结果: res = ', res[0]) 27 | print('测试集样本属于各个类别的概率: res = ', res[1]) 28 | 29 | 30 | -------------------------------------------------------------------------------- /BayesDiscri/readme.md: -------------------------------------------------------------------------------- 1 | # BayesDiscri 朴素贝叶斯判别模块 2 | 3 | 朴素贝叶斯判别算法是统计分析方法中最为经典和基础的判别算法。相关的算法介绍可以参见: 4 | https://zhuanlan.zhihu.com/p/26262151 5 | 本说明文档仅提供本人实现的贝叶斯判别模块的具体使用方法。 6 | 7 | BayesDiscri朴素贝叶斯判别模块主要包括朴素贝叶斯判别算法的实现。 8 | 9 | ## 1. 引用头文件"BayesDiscri.py" 10 | 11 | import BayesDiscri as bayes 12 | 13 | ## 2. 创建BayesDiscri对象 14 | 15 | by=bayes.BayesDiscri() 16 | 17 | ## 3. 使用训练集样本进行训练 18 | > 0. 函数原型 19 | 20 | def train(self, data, rowvar=False) 21 | 22 | > 1. 使用train成员方法使用训练集样本进行训练。 23 | > 2. 第一个参数data为训练集矩阵,类型为np.array。矩阵元素可以是数字或者表示特征取值的字符串,其最后一行或者最后一列为样本的类别标签。为保证训练的质量,推荐训练数据集矩阵至少有两个样本和两个特征。 24 | > 3. 第二个参数rowvar指定每行或者每列代表一个变量,类型为bool。rowvar=True指定每行作为一个变量,每列作为一个样本向量;rowvar=False指定每列作为一个变量,每行作为一个样本向量。默认值为rowvar=False 25 | 26 | # 训练集:婚恋小型调查问卷数据集,其中每一行为一个样本,每一列代表一个特征,前四列分别代表一个男性的特征(帅否?性格好否?高否?上进否?),最后一列代表该男性所属类别(女性是否倾向于嫁给他)。 27 | train_data=np.array([['帅','不好','高','不上进','不嫁'], 28 | ['不帅','好','高','上进','不嫁'], 29 | ['帅','好','矮','上进','嫁'], 30 | ['不帅','好','高','上进','嫁'], 31 | ['帅','不好','矮','上进','不嫁'], 32 | ['帅','不好','矮','上进','不嫁'], 33 | ['帅','好','高','不上进','嫁'], 34 | ['不帅','好','中','上进','嫁'], 35 | ['帅','好','中','上进','嫁'], 36 | ['不帅','不好','高','上进','嫁'], 37 | ['帅','好','矮','不上进','不嫁'], 38 | ['帅','好','矮','不上进','不嫁']]) 39 | 40 | by=bayes.BayesDiscri() 41 | by.train(train_data,rowvar=False) # 使用train成员方法使用训练集进行训练 42 | 43 | ## 4. 对测试集样本进行判别和分类 44 | > 0. 函数原型 45 | 46 | def discriminate(self, data, rowvar=False) 47 | 48 | > 1. 使用discriminate成员方法对测试集样本进行判别和分类。 49 | > 2. 第一个参数data为测试集矩阵,类型为np.array。矩阵元素可以是数字或者表示特征取值的字符串。 50 | > 3. 第二个参数rowvar指定每行或者每列代表一个变量,类型为bool。rowvar=True指定每行作为一个变量,每列作为一个样本向量;rowvar=False指定每列作为一个变量,每行作为一个样本向量。默认值为rowvar=False 51 | > 4. 返回值为元组(res, respro),其中: 52 | >> res: 分类结果列表,类型为list,其中res[i]为行数或者列数下标为i(下标从0开始)的样本向量的类别标签 53 | 54 | >> respro: 样本属于各个类别的概率列表,类型为list,其中respro[i]为行数或者列数下标为i(下标从0开始)的样本向量属于各个类别的概率 55 | 56 | >> 示例: 假设有两个测试集样本,可能的一个返回值为(res,respro),其中res=\['类别A','类别A'],respro=\[{'类别A':0.22,'类别B':0.78}, {'类别A':0.99,'类别B':0.01}] 57 | 58 | test_data=np.array([['不帅','不好','矮','不上进'], 59 | ['不帅','好','高','不上进']]) 60 | 61 | res=by.discriminate(test_data,rowvar=False) 62 | print('分类结果: res = ', res[0]) 63 | print('测试集样本属于各个类别的概率: res = ', res[1]) 64 | 65 | >>> 输出 66 | 分类结果: res = ['不嫁', '嫁'] 67 | 测试集样本属于各个类别的概率: res = [{'不嫁': 0.9230769230769231, '嫁': 0.07692307692307693}, {'不嫁': 0.2857142857142857, '嫁': 0.7142857142857143}] 68 | 69 | ## 附注: 70 | > 1. example.py中提供了基于婚恋小型调查问卷数据集的贝叶斯判别的一份示例代码。 71 | 72 | -------------------------------------------------------------------------------- /CorreCoef/CorreCoef.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | class CorreCoef: 4 | 5 | def pearson_coef(self, data, rowvar=True): 6 | ''' 7 | :pearson_coef: 求解pearson相关系数矩阵 8 | :param data: 样本向量矩阵 9 | :type data: np.array 10 | :param rowvar: 指定每一行或者每一列作为样本向量;rowvar=True指定每一列作为一个样本向量,rowvar=False指定每一行作为一个样本向量 11 | :type rowvar: bool 12 | :return: 样本向量矩阵data的pearson相关系数矩阵 13 | :rtype: np.array 14 | ''' 15 | 16 | # 1. 计算每对任意两行,也即两个变量之间的相关系数 17 | cov=np.cov(data,rowvar=rowvar) 18 | 19 | # 2. 生成pearson相关系数矩阵 20 | size=np.shape(data)[0] if rowvar==True else np.shape(data)[1] 21 | res=np.zeros((size,size)) 22 | 23 | for i in range(size): 24 | for k in range(size): 25 | res[i][k]=cov[i][k]/np.sqrt((cov[i][i]*cov[k][k])) 26 | 27 | return res 28 | 29 | def spearman_coef(self, data, rowvar=True): 30 | ''' 31 | :spearman_coef: 求解spearman相关系数矩阵 32 | :param data: 样本向量矩阵 33 | :type data: np.array 34 | :param rowvar: 指定每一行或者每一列作为样本向量;rowvar=True指定每一列作为一个样本向量,rowvar=False指定每一行作为一个样本向量 35 | :type rowvar: bool 36 | :return: 样本向量矩阵data的spearman相关系数矩阵 37 | :rtype: np.array 38 | ''' 39 | # 1. 若每一列为一个变量的取值,则直接将其转置为每一行为一个变量的取值 40 | if rowvar==False: 41 | data=data.T 42 | 43 | # 2. 对每一行进行排序,返回每个变量取值的排序序号 44 | for i in range(np.shape(data)[0]): 45 | data[i]=np.argsort(data[i]) 46 | 47 | # 3. 根据公式计算spearman相关系数 48 | size=np.shape(data)[0] 49 | res=np.zeros((size,size)) 50 | 51 | for i in range(np.shape(data)[0]): 52 | for k in range(np.shape(data)[0]): 53 | ranksum=0.0 54 | n=np.shape(data)[1] 55 | for r in range(n): 56 | ranksum+=np.square(data[i][r]-data[k][r]) 57 | res[i][k]=1-6*ranksum/(n*(n**2-1)) 58 | 59 | return res 60 | 61 | def kendall_coef(self, data, rowvar=True): 62 | ''' 63 | :kendall_coef: 求解kendall相关系数矩阵 64 | :param data: 样本向量矩阵 65 | :type data: np.array 66 | :param rowvar: 指定每一行或者每一列作为样本向量;rowvar=True指定每一列作为一个样本向量,rowvar=False指定每一行作为一个样本向量 67 | :type rowvar: bool 68 | :return: 样本向量矩阵data的kendall相关系数矩阵 69 | :rtype: np.array 70 | ''' 71 | # 1. 若每一列为一个变量的取值,则直接将其转置为每一行为一个变量的取值 72 | if rowvar==False: 73 | data=data.T 74 | 75 | for i in range(np.shape(data)[0]): 76 | data[i]=np.argsort(data[i]) 77 | 78 | # 2. 对每个两个变量之间计算kendall系数,并放入相关系数矩阵的对应位置 79 | size=np.shape(data)[0] # 变量数量 80 | count=np.shape(data)[1] # 样本向量数量 81 | res=np.zeros((size,size)) 82 | 83 | for i in range(size): 84 | for k in range(size): 85 | P=Q=T=U=0 86 | # 检查每个元组对是concordant pair,discordant pair,tied pair in x或者是tied pair in y 87 | # concordant pair: {(x1,y1),(x2,y2)} and sgn(x2-x1)==sgn(y2-y1) 88 | # discordant pair: {(x1,y1),(x2,y2)} and sgn(x2-x1)==-sgn(y2-y1) 89 | # tied pair: {(x1,y1),(x2,y2)} and sgn(x2-x1)==0 or sgn(y2-y1)==0 90 | # tied pair in x: {(x1,y1),(x2,y2)} and sgn(x2-x1)==0 and sgn(y2-y1)!=0 91 | # tied pair in y: {(x1,y1),(x2,y2)} and sgn(y2-y1)==0 and sgn(x2-x1)!=0 92 | for m in range(count): 93 | for n in range(m+1,count): 94 | value1=np.sign(data[i][m]-data[i][n]) 95 | value2=np.sign(data[k][m]-data[k][n]) 96 | if value1==0 and value2==0: 97 | pass 98 | if value1==0 and value2!=0: 99 | T+=1 100 | elif value2==0 and value1!=0: 101 | U+=1 102 | elif value1==value2: 103 | P+=1 104 | elif value1==-value2: 105 | Q+=1 106 | res[i][k]=(P-Q)/np.sqrt((P+Q+T)*(P+Q+U)) 107 | 108 | return res 109 | 110 | 111 | -------------------------------------------------------------------------------- /CorreCoef/example.py: -------------------------------------------------------------------------------- 1 | import CorreCoef as co 2 | import numpy as np 3 | import scipy.stats as sc 4 | 5 | if __name__ == "__main__": 6 | 7 | # 1. pearson相关系数 8 | # pearson相关系数计算公式参见https://docs.scipy.org/doc/numpy/reference/generated/numpy.corrcoef.html?highlight=corrcoef#numpy.corrcoef 9 | 10 | data=np.array([[1, 1, 2, 2, 3],[2, 2, 3, 3, 5],[1, 4, 2, 2, 3]]) 11 | coe=co.CorreCoef() 12 | 13 | res=coe.pearson_coef(data, rowvar=False) 14 | print("使用本库的pearson相关矩阵计算结果 = ") 15 | print(res) 16 | 17 | res1=np.corrcoef(data, rowvar=False) 18 | print("使用numpy.corrcoef标准库的计算结果 = ") 19 | print(res1) 20 | 21 | # 2. spearman相关系数 22 | res=coe.spearman_coef(data, rowvar=False) 23 | print("使用本库的spearman相关矩阵计算结果 = ") 24 | print(res) 25 | 26 | res1=sc.spearmanr(data, axis=0) 27 | print("使用scipy.spearmanr标准库的计算结果 = ") 28 | print(res1[0]) 29 | 30 | # 3. kendall相关系数 31 | res=coe.kendall_coef(data, rowvar=False) 32 | print("使用本库的kendall相关矩阵计算结果 = ") 33 | print(res) 34 | 35 | data1=data.T 36 | size=np.shape(data)[1] 37 | res1=np.zeros((size,size)) 38 | for i in range(size): 39 | for k in range(size): 40 | res1[i][k]=sc.kendalltau(data1[i],data1[k])[0] 41 | print("使用scipy.kendalltau标准库的计算结果 = ") 42 | print(res1) 43 | 44 | 45 | 46 | -------------------------------------------------------------------------------- /CorreCoef/readme.md: -------------------------------------------------------------------------------- 1 | # CorreCoef相关系数与相关系数矩阵模块 2 | 3 | CorreCoef相关系数与相关系数矩阵模块包含统计分析学中常用的3种相关系数——Pearson,Spearman和Kendall相关系数的求解以及对应的相关系数矩阵的计算与求解。 4 | 5 | ## 1. 引用头文件"CorreCoef.py" 6 | import CorreCoef as co 7 | 8 | ## 2. 创建CorreCoef对象 9 | > 1. 创建CorreCoef对象不需要提供任何参数 10 | 11 | coe=co.CorreCoef() 12 | 13 | ## 3. 计算pearson相关系数矩阵 14 | > 0. 函数原型 15 | 16 | def pearson_coef(self, data, rowvar=True) 17 | 18 | > 1. 使用pearson_coef成员方法计算pearson相关系数矩阵。 19 | > 2. 第一个参数data为由各个变量取值,或者由各个样本向量组成的矩阵。由相关系数的定义,变量和样本的数量均至少有两个,即至少为2×2矩阵。类型为np.array。 20 | > 3. 第二个参数rowvar指定每一行或者每一列作为样本向量,类型为bool:rowvar=True指定每一列作为一个样本向量,即每一行代表一个变量;rowvar=False指定每一行作为一个样本向量,即每一列代表一个变量。 21 | > 4. 返回值为pearson相关系数矩阵,类型为np.array。 22 | 23 | data=np.array([[1, 1, 2, 2, 3],[2, 2, 3, 3, 5],[1, 4, 2, 2, 3]]) 24 | res=coe.pearson_coef(data, rowvar=False) 25 | print("使用本库的pearson相关矩阵计算结果 = ") 26 | print(res) 27 | 28 | >>> 输出 29 | 使用本库的pearson相关矩阵计算结果 = 30 | [[ 1. -0.18898224 1. 1. 1. ] 31 | [-0.18898224 1. -0.18898224 -0.18898224 -0.18898224] 32 | [ 1. -0.18898224 1. 1. 1. ] 33 | [ 1. -0.18898224 1. 1. 1. ] 34 | [ 1. -0.18898224 1. 1. 1. ]] 35 | 36 | ## 4. 计算spearman相关系数矩阵 37 | > 0. 函数原型 38 | 39 | def spearman_coef(self, data, rowvar=True) 40 | 41 | > 1. 使用spearman_coef成员方法计算spearman相关系数矩阵。 42 | > 2. 第一个参数data为由各个变量取值,或者由各个样本向量组成的矩阵。由相关系数的定义,变量和样本的数量均至少有两个,即至少为2×2矩阵。类型为np.array。 43 | > 3. 第二个参数rowvar指定每一行或者每一列作为样本向量,类型为bool:rowvar=True指定每一列作为一个样本向量,即每一行代表一个变量;rowvar=False指定每一行作为一个样本向量,即每一列代表一个变量。变量的数量必须大于或者等于2。 44 | > 4. 返回值为spearman相关系数矩阵,类型为np.array。 45 | 46 | data=np.array([[1, 1, 2, 2, 3],[2, 2, 3, 3, 5],[1, 4, 2, 2, 3]]) 47 | res=coe.spearman_coef(data, rowvar=False) 48 | print("使用本库的spearman相关矩阵计算结果 = ") 49 | print(res) 50 | 51 | >>> 输出 52 | 使用本库的spearman相关矩阵计算结果 = 53 | [[1. 0.5 1. 1. 1. ] 54 | [0.5 1. 0.5 0.5 0.5] 55 | [1. 0.5 1. 1. 1. ] 56 | [1. 0.5 1. 1. 1. ] 57 | [1. 0.5 1. 1. 1. ]] 58 | 59 | ## 5. 计算kendall相关系数矩阵 60 | > 0. 函数原型 61 | 62 | def kendall_coef(self, data, rowvar=True) 63 | 64 | > 1. 使用kendall_coef成员方法计算kendall相关系数矩阵。 65 | > 2. 第一个参数data为由各个变量取值,或者由各个样本向量组成的矩阵。由相关系数的定义,变量和样本的数量均至少有两个,即至少为2×2矩阵。类型为np.array。 66 | > 3. 第二个参数rowvar指定每一行或者每一列作为样本向量,类型为bool:rowvar=True指定每一列作为一个样本向量,即每一行代表一个变量;rowvar=False指定每一行作为一个样本向量,即每一列代表一个变量。变量的数量必须大于或者等于2。 67 | > 4. 返回值为kendall相关系数矩阵,类型为np.array。 68 | 69 | data=np.array([[1, 1, 2, 2, 3],[2, 2, 3, 3, 5],[1, 4, 2, 2, 3]]) 70 | res=coe.kendall_coef(data, rowvar=False) 71 | print("使用本库的kendall相关矩阵计算结果 = ") 72 | print(res) 73 | 74 | >>> 输出 75 | 使用本库的kendall相关矩阵计算结果 = 76 | [[1. 0.33333333 1. 1. 1. ] 77 | [0.33333333 1. 0.33333333 0.33333333 0.33333333] 78 | [1. 0.33333333 1. 1. 1. ] 79 | [1. 0.33333333 1. 1. 1. ] 80 | [1. 0.33333333 1. 1. 1. ]] 81 | 82 | ## 附注: 83 | > 1. example.py中提供了一份使用CorreCoef相关系数与相关系数矩阵求解模块的示例代码。 84 | 85 | -------------------------------------------------------------------------------- /DecisionTree/DecisionTree.py: -------------------------------------------------------------------------------- 1 | import time 2 | import pandas as pd 3 | import numpy as np 4 | 5 | class TreeNode: 6 | def __init__(self, data_index, left=None, right=None, feature=None, split=None, out = None): 7 | self.data_index = data_index # 当前结点的集合的行索引 8 | self.left = left # 当前结点的左子结点下标 9 | self.right = right # 当前结点的右子结点下标 10 | self.feature = feature # 分裂特征值 11 | self.split = split # 结点分割值 12 | self.out = out # 叶子结点的输出值 13 | 14 | 15 | class DecisionTree: 16 | def __init__(self, S, min_sample_leaf): 17 | ''' 18 | : __init__: 构造CART决策树 19 | : param S: 训练和构造决策树的训练集 20 | : type S: pd.DataFrame 21 | : param min_sample_leaf: 每个叶结点的最小样本数 22 | : type min_sample_leaf: int 23 | ''' 24 | self.root = TreeNode(S.index) # 决策树的根结点 25 | self.tree = [] # 决策树的结点列表 26 | self.tree.append(self.root) 27 | i = 0 # 指向当前处理的叶节点的下标 28 | j = 0 # 指向最后一个叶结点的下标 29 | # 1. 循环构造决策树的每一个结点 30 | while True: 31 | res = self.divide(S, self.tree[i], min_sample_leaf) 32 | if res: # 1.1 若当前结点可以划分,则将当前结点进行划分 33 | self.tree.extend(res) 34 | self.tree[i].left = j+1 35 | self.tree[i].right = j+2 36 | j += 2 37 | i += 1 38 | elif i == j: 39 | break 40 | else: # 1.2 若当前结点不可以划分,则按顺序处理下一个结点 41 | i += 1 42 | 43 | 44 | def divide(self, S, leaf, min_sample_leaf): 45 | ''' 46 | : divide: 判断叶子结点是否可以划分 47 | : param S: 当前结点的训练集 48 | : type S: pd.DataFrame 49 | : param leaf: 当前叶子结点 50 | : type leaf: TreeNode 51 | : param min_sample_leaf: 每个叶结点的最小样本数 52 | : type min_sample_leaf: int 53 | ''' 54 | # 1. 得到当前结点的数据集 55 | data = S.loc[leaf.data_index] 56 | res = self.gini_divide(data,min_sample_leaf) 57 | if not res: 58 | leaf.out = data.iloc[:,0].mode()[0] # 出现次数最多的值就是当前结点的值 59 | return None 60 | feature, split = res 61 | leaf.feature = feature 62 | leaf.split = split 63 | left = TreeNode(data[data[feature] <= split].index) 64 | right = TreeNode(data[data[feature] > split].index) 65 | return left, right 66 | 67 | 68 | def gini_divide(self, data, min_sample_leaf): 69 | ''' 70 | : gini_divide: 计算特征的分割值 71 | : return: (当前特征的最佳分割特征,分割值) 72 | : rtype: (float, float) 73 | ''' 74 | # 1. 根据基尼系数得到数据集上的最佳划分 75 | res = [] 76 | S = data.shape[0] 77 | for feature in np.arange(1,data.shape[1]): 78 | if self.is_one_hot(data,feature): 79 | index_bool_value = data.iloc[:,feature] == 0 80 | s1 = data.loc[index_bool_value,data.columns[0]] 81 | S1 = s1.shape[0] 82 | S2 = S-S1 83 | if S1 向量a和向量b的欧式距离 13 | :rtype: float 14 | ''' 15 | res=np.sqrt(np.sum(np.square(a-b))) 16 | return res 17 | 18 | def mah_distance(self,a,b,cov_vec): 19 | ''' 20 | :mah_distance :求解马氏距离 21 | :comment: 该方法存在如下两种主要的调用方法: 22 | :1. 计算一个未分类向量到一个类别的马氏距离: 这时a为未分类向量,b为一个类别的均值向量,cov_vec为该类别的协方差矩阵,返回未分类变量a到某个类别的马氏距离 23 | :2. 计算同一个类别中两个样本向量的马氏距离: 这时a, b分别为属于同一类别的两个样本向量,cov_vec为该列别的协方差矩阵,返回同类别下的向量a, b间的马氏距离 24 | 25 | :type a: np.array 26 | :type b: np.array 27 | :type cov_vec: np.array 28 | :rtype: float 29 | ''' 30 | rev_vec=np.linalg.pinv(cov_vec) #求协方差矩阵的逆矩阵 31 | tmp=a-b #行向量, tmp.T为列向量 32 | res=np.sqrt(np.dot(np.dot(tmp,rev_vec),tmp.T)) 33 | 34 | return res 35 | 36 | def man_distance(self,a,b): 37 | ''' 38 | :man_distance: 求解曼哈顿距离 39 | :param a: 向量a 40 | :type a: np.array 41 | :param b: 向量b 42 | :type b: np.array 43 | :return -> 向量a和向量b的曼哈顿距离 44 | :rtype: float 45 | ''' 46 | res=np.sum(np.abs(a-b)) 47 | return res 48 | 49 | def min_distance(self,a,b,p): 50 | ''' 51 | :min_distance: 求解闵可夫斯基距离 52 | :param a: 向量a 53 | :type a: np.array 54 | :param b: 向量b 55 | :type b: np.array 56 | :param p: 闵科夫斯基距离的维数p 57 | :type p: int 58 | :return -> 向量a和向量b的闵可夫斯基距离 59 | :rtype: float 60 | ''' 61 | res=np.power(np.sum(np.power(np.abs(a-b),p)),1/p) 62 | return res 63 | 64 | def standard_euc_distance(self,a,b,s): 65 | ''' 66 | :standard_euc_distance: 求解标准欧几里得距离 67 | :param a: 向量a 68 | :type a: np.array 69 | :param b: 向量b 70 | :type b: np.array 71 | :param s: 向量a, b所属的类别的方差向量s 72 | :type s: np.array 73 | :return -> 向量a和向量b的标准欧几里得距离 74 | :rtype: float 75 | ''' 76 | res=np.power(np.sum(np.divide(np.power(a-b,2),s)),1/2) 77 | return res 78 | 79 | def cos_distance(self,a,b): 80 | ''' 81 | :standard_euc_distance: 求解余弦距离 82 | :param a: 向量a 83 | :type a: np.array 84 | :param b: 向量b 85 | :type b: np.array 86 | :return -> 向量a和向量b的余弦距离 87 | :rtype: float 88 | ''' 89 | res1=np.sum(np.multiply(a,b)) 90 | res2=np.sqrt(np.sum(np.square(a)))*np.sqrt(np.sum(np.square(b))) 91 | res=res1/res2 92 | return res 93 | 94 | 95 | -------------------------------------------------------------------------------- /Distance/example.py: -------------------------------------------------------------------------------- 1 | import Distance as di 2 | import numpy as np 3 | from scipy.spatial.distance import pdist 4 | 5 | if __name__ == "__main__": 6 | 7 | dis=di.Distance() 8 | a=np.array([1.5 for i in range(128)]) 9 | b=np.array([2 for i in range(128)]) 10 | s=np.array([0.27 for i in range(128)]) 11 | 12 | print("使用本库的计算结果: ") 13 | print("欧氏距离 = ", dis.euc_distance(a,b)) #欧氏距离 14 | print("曼哈顿距离 = ", dis.man_distance(a,b)) #曼哈顿距离 15 | print("闵科夫斯基距离 = ", dis.min_distance(a,b,3)) #闵可夫斯基距离 (p=2时相当于欧氏距离) 16 | print("标准欧氏距离 = ", dis.standard_euc_distance(a,b,s)) #标准欧氏距离 17 | print("余弦距离 = ", dis.cos_distance(a,b)) #余弦距离 18 | 19 | print("使用python标准库的计算结果: ") 20 | print("欧氏距离 = ", pdist(np.vstack([a,b]),'euclidean')) #欧氏距离 21 | print("曼哈顿距离 = ", pdist(np.vstack([a,b]),'cityblock')) #曼哈顿距离 22 | print("闵科夫斯基距离 = ", pdist(np.vstack([a,b]),'minkowski', p=3))#闵可夫斯基距离 (p=2时相当于欧氏距离) 23 | print("标准欧氏距离 = ", pdist(np.vstack([a,b]),'seuclidean', V=s)) #标准欧氏距离 24 | print("余弦距离 = ", 1-pdist(np.vstack([a,b]),'cosine')) #余弦距离(pdist中求得的是1-余弦距离) 25 | 26 | 27 | -------------------------------------------------------------------------------- /Distance/readme.md: -------------------------------------------------------------------------------- 1 | # Distance距离度量模块 2 | 3 | Distance距离度量模块包括6种统计分析学中常用的距离度量——欧氏距离,马氏距离,曼哈顿距离,闵可夫斯基距离,标准欧氏距离和余弦距离。 4 | 5 | ## 1. 引用头文件"Distance.py" 6 | import Distance as di 7 | 8 | ## 2. 创建一个Distance对象 9 | > 1. 初始化一个Pattern对象无需提供任何参数。 10 | 11 | dis=di.Distance() 12 | 13 | ## 3. 求解欧氏距离 14 | > 0. 函数原型 15 | 16 | def euc_distance(self,a,b) 17 | 18 | > 1. 使用euc_distance成员方法来计算和求解欧氏距离。 19 | > 2. 第一个参数为第一个向量a,类型为np.array。 20 | > 3. 第二个参数为第二个向量b,类型为np.array。 21 | 22 | a=np.array([1.5 for i in range(128)]) 23 | b=np.array([2 for i in range(128)]) 24 | res=dis.euc_distance(a,b) #求解欧氏距离 25 | print("欧氏距离 = ", res) 26 | 27 | >>> 输出 28 | 欧氏距离 = 5.656854249492381 29 | 30 | ## 4. 求解马氏距离 31 | > 0. 函数原型 32 | 33 | def mah_distance(self,a,b,cov_vec) 34 | 35 | > 1. 使用mah_distance成员方法来计算和求解马氏距离,该马氏距离成员函数有如下三种常见的应用场景,对应于不同的参数调用方法。假设第一个参数为向量a,第二个参数为向量b,第三个参数为协方差矩阵cov_vec。 36 | > 2. 场景1:计算一个未分类向量到一个类别的马氏距离: 这时a为未分类向量,b为一个类别的均值向量,cov_vec为该类别的协方差矩阵,返回未分类变量a到某个类别的马氏距离。 37 | > 3. 场景2:计算同一个类别中两个样本向量的马氏距离: 这时a, b分别为属于同一类别的两个样本向量,cov_vec为该列别的协方差矩阵,返回同类别下的向量a, b间的马氏距离 38 | 39 | a=np.array([1.5 for i in range(128)]) 40 | b=np.array([2 for i in range(128)]) 41 | cov=np.cov(np.vstack((a,b)),rowvar=False) 42 | res=dis.mah_distance(a,b,cov) #求解马氏距离 43 | print("马氏距离 = ", res) 44 | 45 | >>> 输出 46 | 马氏距离 = 1.4142135623730951 47 | 48 | 49 | ## 5. 求解曼哈顿距离 50 | > 0. 函数原型 51 | 52 | def man_distance(self,a,b) 53 | 54 | > 1. 使用man_distance成员方法来计算和求解曼哈顿距离。 55 | > 2. 第一个参数为第一个向量a,类型为np.array。 56 | > 3. 第二个参数为第二个向量b,类型为np.array。 57 | 58 | a=np.array([1.5 for i in range(128)]) 59 | b=np.array([2 for i in range(128)]) 60 | res=dis.man_distance(a,b) #求解曼哈顿距离 61 | print("曼哈顿距离 = ", res) 62 | 63 | >>> 输出 64 | 曼哈顿距离 = 64.0 65 | 66 | ## 6. 求解闵可夫斯基距离 67 | > 0. 函数原型 68 | 69 | def min_distance(self,a,b,p) 70 | 71 | > 1. 使用min_distance成员方法来计算和求解闵可夫斯基距离。 72 | > 2. 第一个参数为第一个向量a,类型为np.array。 73 | > 3. 第二个参数为第二个向量b,类型为np.array。 74 | > 4. 第三个参数为闵可夫斯基距离范数/维数p,类型为int。 75 | 76 | a=np.array([1.5 for i in range(128)]) 77 | b=np.array([2 for i in range(128)]) 78 | res=dis.min_distance(a,b,3) #求解闵可夫斯基距离 79 | print("闵可夫斯基距离 = ", res) 80 | 81 | >>> 输出 82 | 闵科夫斯基距离 = 2.5198420997897464 83 | 84 | ## 7. 求解标准欧氏距离 85 | > 0. 函数原型 86 | 87 | def standard_euc_distance(self,a,b,s) 88 | 89 | > 1. 使用standard_euc_distance成员方法来计算和求解标准欧氏距离。 90 | > 2. 第一个参数为第一个向量a,类型为np.array。 91 | > 3. 第二个参数为第二个向量b,类型为np.array。 92 | > 4. 第三个参数为向量a, b所属的类别的样本集合的方差向量s,类型为np.array。 93 | 94 | a=np.array([1.5 for i in range(128)]) 95 | b=np.array([2 for i in range(128)]) 96 | s=np.array([0.27 for i in range(128)]) 97 | res=dis.standard_euc_distance(a,b,s) #求解标准欧氏距离 98 | print("标准欧氏距离 = ", res) 99 | 100 | >>> 输出 101 | 标准欧氏距离 = 10.886621079036345 102 | 103 | ## 8. 求解余弦距离 104 | > 0. 函数原型 105 | 106 | def cos_distance(self,a,b) 107 | 108 | > 1. 使用cos_distance成员方法来计算和求解余弦距离。 109 | > 2. 第一个参数为第一个向量a,类型为np.array。 110 | > 3. 第二个参数为第二个向量b,类型为np.array。 111 | 112 | a=np.array([1.5 for i in range(128)]) 113 | b=np.array([2 for i in range(128)]) 114 | res=dis.cos_distance(a,b) #求解余弦距离 115 | print("标准余弦距离 = ", res) 116 | 117 | >>> 输出 118 | 余弦距离 = 1.0 119 | 120 | ## 附注: 121 | > 1. example.py中给出了使用Distance距离度量模块的一份示例代码。 122 | 123 | 124 | -------------------------------------------------------------------------------- /DistanceDiscri/DistanceDiscri.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | class DistanceDiscri: 4 | 5 | def __init__(self): 6 | ''' 7 | :__init: DistanceDiscri类的初始化方法 8 | ''' 9 | self.cov=[] # 各个类别的协方差矩阵的列表 10 | self.avg=[] # 各个类别的协方差矩阵的列表 11 | self.label=[] # 各个类别的标签名称 12 | 13 | return 14 | 15 | def train(self, *data, rowvar=True, label=[]): 16 | ''' 17 | :train: 各个类别的训练集,该方法从训练集中计算出各个类别的协方差矩阵和均值向量(重心) 18 | :param *data: 不定数目的各个类别的训练集,这些类别分别被编号为0,1,... 19 | :type *data: np.array 20 | :param rowvar: 指定每行或者每列代表一个变量;rowvar=True指定每行作为一个变量,每列作为一个样本向量;rowvar=False指定每列作为一个变量,每行作为一个样本向量 21 | :type rowvar: bool 22 | :param label: 各个类别的训练集所对应的类别名称,该名称必须与训练集在参数的顺序一一对应 23 | :type label: list 24 | ''' 25 | # 1. tuple转换为list 26 | data=list(data) 27 | 28 | # 2. 将各个类别的原始训练集统一转换为rowvar=False的情况,即每列作为一个变量,每行作为一个样本向量 29 | if rowvar==True: 30 | data=[x.T for x in data] 31 | 32 | # 3. 计算各个类别训练集的均值向量(重心)和协方差矩阵,并储存在类中,同时储存类别的标签 33 | for i in range(len(data)): 34 | self.cov.append(np.cov(data[i],rowvar=False)) 35 | self.avg.append(np.average(data[i],axis=0)) 36 | self.label=label 37 | 38 | return 39 | 40 | def discriminate(self, data, rowvar=True): 41 | ''' 42 | :discrinate: 对测试集的样本进行距离判别 43 | :param data: 测试集样本矩阵 44 | :type data: np.array 45 | :param rowvar: 指定每行或者每列代表一个变量;rowvar=True指定每行作为一个变量,每列作为一个样本向量;rowvar=False指定每列作为一个变量,每行作为一个样本向量 46 | :type rowvar: bool 47 | :return: 各个样本的分类结果res,res[i]为第i个样本所属的类别标签 48 | :rtype: list 49 | ''' 50 | # 1. 将各个类别的原始训练集统一转换为rowvar=False的情况,即每列作为一个变量,每行作为一个样本向量 51 | if rowvar==True: 52 | data=data.T 53 | 54 | if data.ndim==1: 55 | data=np.array([data]) 56 | 57 | # 2. 对每个样本向量,计算该样本向量到所有类别均值向量的马氏距离 58 | res=[] # 分类结果 59 | size=np.shape(data)[0] # 测试集的样本向量数量 60 | count=len(self.label) # 分类的类别个数 61 | 62 | for i in range(size): 63 | dist=[0 for i in range(count)] #当前样本向量到各个类别的距离 64 | for k in range(count): 65 | dist[k]=self.__mah_distance(data[i],self.avg[k],self.cov[k]) 66 | res.append(self.label[dist.index(min(dist))]) 67 | 68 | return res 69 | 70 | def __mah_distance(self, a, b, cov_vec): 71 | ''' 72 | :mah_distance :求解马氏距离 73 | :comment: 该方法存在如下两种主要的调用方法: 74 | :1. 计算一个未分类向量到一个类别的马氏距离: 这时a为未分类向量,b为一个类别的均值向量,cov_vec为该类别的协方差矩阵,返回未分类变量a到某个类别的马氏距离 75 | :2. 计算同一个类别中两个样本向量的马氏距离: 这时a, b分别为属于同一类别的两个样本向量,cov_vec为该列别的协方差矩阵,返回同类别下的向量a, b间的马氏距离 76 | 77 | :type a: np.array 78 | :type b: np.array 79 | :type cov_vec: np.array 80 | :rtype: float 81 | ''' 82 | if cov_vec.ndim<2: #判断协方差矩阵是否为2维以上,若非2维以上,则无法求逆,因此无法求解马氏距离,这时直接返回欧氏距离 83 | return self.__euc_distance(a,b) 84 | rev_vec=np.linalg.pinv(cov_vec) # 求协方差矩阵的逆矩阵 85 | tmp=a-b # 行向量, tmp.T为列向量 86 | res=np.sqrt(np.dot(np.dot(tmp,rev_vec),tmp.T)) 87 | 88 | return res -------------------------------------------------------------------------------- /DistanceDiscri/example/DistanceDiscri.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | class DistanceDiscri: 4 | 5 | def __init__(self): 6 | ''' 7 | :__init: DistanceDiscri类的初始化方法 8 | ''' 9 | self.cov=[] # 各个类别的协方差矩阵的列表 10 | self.avg=[] # 各个类别的协方差矩阵的列表 11 | self.label=[] # 各个类别的标签名称 12 | 13 | return 14 | 15 | def train(self, *data, rowvar=True, label=[]): 16 | ''' 17 | :train: 各个类别的训练集,该方法从训练集中计算出各个类别的协方差矩阵和均值向量(重心) 18 | :param *data: 不定数目的各个类别的训练集,这些类别分别被编号为0,1,... 19 | :type *data: np.array 20 | :param rowvar: 指定每行或者每列代表一个变量;rowvar=True指定每行作为一个变量,每列作为一个样本向量;rowvar=False指定每列作为一个变量,每行作为一个样本向量 21 | :type rowvar: bool 22 | :param label: 各个类别的训练集所对应的类别名称,该名称必须与训练集在参数的顺序一一对应 23 | :type label: list 24 | ''' 25 | # 1. tuple转换为list 26 | data=list(data) 27 | 28 | # 2. 将各个类别的原始训练集统一转换为rowvar=False的情况,即每列作为一个变量,每行作为一个样本向量 29 | if rowvar==True: 30 | data=[x.T for x in data] 31 | 32 | # 3. 计算各个类别训练集的均值向量(重心)和协方差矩阵,并储存在类中,同时储存类别的标签 33 | for i in range(len(data)): 34 | self.cov.append(np.cov(data[i],rowvar=False)) 35 | self.avg.append(np.average(data[i],axis=0)) 36 | self.label=label 37 | 38 | return 39 | 40 | def discriminate(self, data, rowvar=True): 41 | ''' 42 | :discrinate: 对测试集的样本进行距离判别 43 | :param data: 测试集样本矩阵 44 | :type data: np.array 45 | :param rowvar: 指定每行或者每列代表一个变量;rowvar=True指定每行作为一个变量,每列作为一个样本向量;rowvar=False指定每列作为一个变量,每行作为一个样本向量 46 | :type rowvar: bool 47 | :return: 各个样本的分类结果res,res[i]为第i个样本所属的类别标签 48 | :rtype: list 49 | ''' 50 | # 1. 将各个类别的原始训练集统一转换为rowvar=False的情况,即每列作为一个变量,每行作为一个样本向量 51 | if rowvar==True: 52 | data=data.T 53 | 54 | if data.ndim==1: 55 | data=np.array([data]) 56 | 57 | # 2. 对每个样本向量,计算该样本向量到所有类别均值向量的马氏距离 58 | res=[] # 分类结果 59 | size=np.shape(data)[0] # 测试集的样本向量数量 60 | count=len(self.label) # 分类的类别个数 61 | 62 | for i in range(size): 63 | dist=[0 for i in range(count)] #当前样本向量到各个类别的距离 64 | for k in range(count): 65 | dist[k]=self.__mah_distance(data[i],self.avg[k],self.cov[k]) 66 | res.append(self.label[dist.index(min(dist))]) 67 | 68 | return res 69 | 70 | def __mah_distance(self, a, b, cov_vec): 71 | ''' 72 | :mah_distance :求解马氏距离 73 | :comment: 该方法存在如下两种主要的调用方法: 74 | :1. 计算一个未分类向量到一个类别的马氏距离: 这时a为未分类向量,b为一个类别的均值向量,cov_vec为该类别的协方差矩阵,返回未分类变量a到某个类别的马氏距离 75 | :2. 计算同一个类别中两个样本向量的马氏距离: 这时a, b分别为属于同一类别的两个样本向量,cov_vec为该列别的协方差矩阵,返回同类别下的向量a, b间的马氏距离 76 | 77 | :type a: np.array 78 | :type b: np.array 79 | :type cov_vec: np.array 80 | :rtype: float 81 | ''' 82 | if cov_vec.ndim<2: #判断协方差矩阵是否为2维以上,若非2维以上,则无法求逆,因此无法求解马氏距离,这时直接返回欧氏距离 83 | return self.__euc_distance(a,b) 84 | rev_vec=np.linalg.pinv(cov_vec) # 求协方差矩阵的逆矩阵 85 | tmp=a-b # 行向量, tmp.T为列向量 86 | res=np.sqrt(np.dot(np.dot(tmp,rev_vec),tmp.T)) 87 | 88 | return res -------------------------------------------------------------------------------- /DistanceDiscri/example/example.py: -------------------------------------------------------------------------------- 1 | import DistanceDiscri 2 | import numpy as np 3 | import csv #导入python的csv库文件 4 | import os 5 | import sys 6 | import math 7 | import numpy as np 8 | import pandas as pd 9 | 10 | if __name__ == "__main__": 11 | # 1. 从csv文件中读取训练集中的所有数据 12 | data=pd.read_csv(sys.path[0]+'/train.csv') 13 | del data['safety'] #删除本实验中不使用的safety列 14 | #print(data) #需要特别注意data中所有的元素均以字符串形式读入 15 | 16 | 17 | # 2. 训练集查找和替换,将除了综合评价(最后一列)以外的列中的等级全部替换为数值0,1,2,... 18 | buying_replace_dic={'low':0,'med':1,'high':2,'vhigh':3} 19 | maint_replace_dic={'low':0,'med':1,'high':2,'vhigh':3} 20 | doors_replace_dic={'2':0,'3':1,'4':2,'5more':3} 21 | persons_replace_dic={'2':0,'4':1,'more':2} 22 | lug_boot_replace_dic={'small':0,'med':1,'big':2} 23 | 24 | data['buying'].replace(buying_replace_dic,inplace=True) 25 | data['maint'].replace(maint_replace_dic,inplace=True) 26 | data['doors'].replace(doors_replace_dic,inplace=True) 27 | data['persons'].replace(persons_replace_dic,inplace=True) 28 | data['lug_boot'].replace(lug_boot_replace_dic,inplace=True) 29 | 30 | 31 | # 3. 将所有训练数据按照综合评价的结果(最后一列)划分为四个样本集unacc,acc,good,vgood,并将结果转化为np.array类型 32 | train_unacc=data[data['remark']=='unacc'] 33 | del train_unacc['remark'] 34 | train_unacc=train_unacc.values # unacc类别的训练集 35 | 36 | train_acc=data[data['remark']=='acc'] 37 | del train_acc['remark'] 38 | train_acc=train_acc.values # acc类别的训练集 39 | 40 | train_good=data[data['remark']=='good'] 41 | del train_good['remark'] 42 | train_good=train_good.values # good类别的训练集 43 | 44 | train_vgood=data[data['remark']=='vgood'] 45 | del train_vgood['remark'] 46 | train_vgood=train_vgood.values # vgood类别的训练集 47 | 48 | # 4. 读取和预处理测试集数据 49 | test_data=pd.read_csv(sys.path[0]+'/test.csv') 50 | 51 | test_data['buying'].replace(buying_replace_dic,inplace=True) 52 | test_data['maint'].replace(maint_replace_dic,inplace=True) 53 | test_data['doors'].replace(doors_replace_dic,inplace=True) 54 | test_data['persons'].replace(persons_replace_dic,inplace=True) 55 | test_data['lug_boot'].replace(lug_boot_replace_dic,inplace=True) 56 | 57 | del test_data['safety'] 58 | del test_data['remark'] 59 | 60 | test_data=test_data.values 61 | 62 | # print(train_unacc) 63 | # print(train_acc) 64 | # print(train_good) 65 | # print(train_vgood) 66 | # print(test_data) 67 | # print(type(train_acc)) 68 | 69 | # 5. 通过上述步骤已经得到了四个类别'unacc','acc','good','vgood'的训练集,这些训练集分别为train_unacc,train_acc,train_good,train_vood 70 | # 而待进行判别分析的测试集为test_data,其中每一行为一个样本,每一列代表一个变量 71 | 72 | dcr=DistanceDiscri.DistanceDiscri() 73 | 74 | dcr.train(train_unacc, train_acc, train_good, train_vgood, rowvar=False, label=['unacc','acc','good','vgood']) 75 | 76 | res=dcr.discriminate(test_data,rowvar=False) 77 | 78 | print(res) 79 | 80 | -------------------------------------------------------------------------------- /DistanceDiscri/example/readme.md: -------------------------------------------------------------------------------- 1 | # 示例程序:基于汽车评价数据库的简单判别分析 2 | 3 | # 1. 题目要求 4 | 题目要求请参加本文件夹下的readme.pdf文件。 5 | 6 | # 2. 实验代码 7 | 实验代码请参见本文件夹下的example.py文件。 8 | 9 | # 3. 训练集与测试集 10 | 训练集请参见本文件夹下的train.csv文件,测试集请参见本文件夹下的test.csv文件。 11 | 12 | 13 | 14 | 15 | -------------------------------------------------------------------------------- /DistanceDiscri/example/readme.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Happyxianyueveryday/statslibrary/01494043bc7fb82d4aa6d7d550a4e7dc2ac0503a/DistanceDiscri/example/readme.pdf -------------------------------------------------------------------------------- /DistanceDiscri/example/test.csv: -------------------------------------------------------------------------------- 1 | buying,maint,doors,persons,lug_boot,safety,remark 2 | low,low,5more,more,big,high, 3 | low,low,5more,more,big,med, 4 | low,low,5more,more,med,low, 5 | low,low,5more,more,med,med, 6 | low,low,5more,more,med,high, 7 | low,low,2,more,big,med, 8 | low,low,2,more,big,high, 9 | low,low,3,2,med,med, 10 | low,vhigh,5more,4,med,med, 11 | low,vhigh,5more,4,med,high, 12 | low,vhigh,3,4,big,low, 13 | low,vhigh,3,4,big,med, 14 | low,vhigh,3,4,big,high, 15 | low,vhigh,3,more,small,high, 16 | low,med,4,more,small,high, 17 | low,med,4,more,med,low, 18 | low,med,4,more,med,med, 19 | low,med,4,more,med,high, 20 | low,med,4,more,big,high, 21 | low,med,5more,2,small,low, 22 | -------------------------------------------------------------------------------- /DistanceDiscri/example/train.csv: -------------------------------------------------------------------------------- 1 | buying,maint,doors,persons,lug_boot,safety,remark 2 | vhigh,vhigh,2,2,small,low,unacc 3 | vhigh,vhigh,2,2,small,med,unacc 4 | vhigh,vhigh,2,2,small,high,unacc 5 | vhigh,vhigh,2,2,med,low,unacc 6 | vhigh,vhigh,2,2,med,med,unacc 7 | vhigh,vhigh,2,2,med,high,unacc 8 | vhigh,vhigh,2,2,big,low,unacc 9 | vhigh,vhigh,2,2,big,med,unacc 10 | vhigh,vhigh,2,2,big,high,unacc 11 | vhigh,vhigh,2,4,small,low,unacc 12 | vhigh,vhigh,2,4,small,med,unacc 13 | vhigh,vhigh,2,4,small,high,unacc 14 | vhigh,vhigh,2,4,med,low,unacc 15 | vhigh,vhigh,2,4,med,med,unacc 16 | vhigh,vhigh,2,4,med,high,unacc 17 | vhigh,vhigh,2,4,big,low,unacc 18 | vhigh,vhigh,2,4,big,med,unacc 19 | vhigh,vhigh,2,4,big,high,unacc 20 | vhigh,vhigh,2,more,small,low,unacc 21 | vhigh,vhigh,2,more,small,med,unacc 22 | vhigh,vhigh,2,more,small,high,unacc 23 | vhigh,vhigh,2,more,med,low,unacc 24 | vhigh,vhigh,2,more,med,med,unacc 25 | vhigh,vhigh,2,more,med,high,unacc 26 | vhigh,vhigh,2,more,big,low,unacc 27 | vhigh,vhigh,2,more,big,med,unacc 28 | vhigh,vhigh,2,more,big,high,unacc 29 | vhigh,vhigh,3,2,small,low,unacc 30 | vhigh,vhigh,3,2,small,med,unacc 31 | vhigh,vhigh,3,2,small,high,unacc 32 | vhigh,vhigh,3,2,med,low,unacc 33 | vhigh,vhigh,3,2,med,med,unacc 34 | vhigh,vhigh,3,2,med,high,unacc 35 | vhigh,vhigh,3,2,big,low,unacc 36 | vhigh,vhigh,3,2,big,med,unacc 37 | vhigh,vhigh,3,2,big,high,unacc 38 | vhigh,vhigh,3,4,small,low,unacc 39 | vhigh,vhigh,3,4,small,med,unacc 40 | vhigh,vhigh,3,4,small,high,unacc 41 | vhigh,vhigh,3,4,med,low,unacc 42 | vhigh,vhigh,3,4,med,med,unacc 43 | vhigh,vhigh,3,4,med,high,unacc 44 | vhigh,vhigh,3,4,big,low,unacc 45 | vhigh,vhigh,3,4,big,med,unacc 46 | vhigh,vhigh,3,4,big,high,unacc 47 | vhigh,vhigh,3,more,small,low,unacc 48 | vhigh,vhigh,3,more,small,med,unacc 49 | vhigh,vhigh,3,more,small,high,unacc 50 | vhigh,vhigh,3,more,med,low,unacc 51 | vhigh,vhigh,3,more,med,med,unacc 52 | vhigh,vhigh,3,more,med,high,unacc 53 | vhigh,vhigh,3,more,big,low,unacc 54 | vhigh,vhigh,3,more,big,med,unacc 55 | vhigh,vhigh,3,more,big,high,unacc 56 | vhigh,vhigh,4,2,small,low,unacc 57 | vhigh,vhigh,4,2,small,med,unacc 58 | vhigh,vhigh,4,2,small,high,unacc 59 | vhigh,vhigh,4,2,med,low,unacc 60 | vhigh,vhigh,4,2,med,med,unacc 61 | vhigh,vhigh,4,2,med,high,unacc 62 | vhigh,vhigh,4,2,big,low,unacc 63 | vhigh,vhigh,4,2,big,med,unacc 64 | vhigh,vhigh,4,2,big,high,unacc 65 | vhigh,vhigh,4,4,small,low,unacc 66 | vhigh,vhigh,4,4,small,med,unacc 67 | vhigh,vhigh,4,4,small,high,unacc 68 | vhigh,vhigh,4,4,med,low,unacc 69 | vhigh,vhigh,4,4,med,med,unacc 70 | vhigh,vhigh,4,4,med,high,unacc 71 | vhigh,vhigh,4,4,big,low,unacc 72 | vhigh,vhigh,4,4,big,med,unacc 73 | vhigh,vhigh,4,4,big,high,unacc 74 | vhigh,vhigh,4,more,small,low,unacc 75 | vhigh,vhigh,4,more,small,med,unacc 76 | vhigh,vhigh,4,more,small,high,unacc 77 | vhigh,vhigh,4,more,med,low,unacc 78 | vhigh,vhigh,4,more,med,med,unacc 79 | vhigh,vhigh,4,more,med,high,unacc 80 | vhigh,vhigh,4,more,big,low,unacc 81 | vhigh,vhigh,4,more,big,med,unacc 82 | vhigh,vhigh,4,more,big,high,unacc 83 | vhigh,vhigh,5more,2,small,low,unacc 84 | vhigh,vhigh,5more,2,small,med,unacc 85 | vhigh,vhigh,5more,2,small,high,unacc 86 | vhigh,vhigh,5more,2,med,low,unacc 87 | vhigh,vhigh,5more,2,med,med,unacc 88 | vhigh,vhigh,5more,2,med,high,unacc 89 | vhigh,vhigh,5more,2,big,low,unacc 90 | vhigh,vhigh,5more,2,big,med,unacc 91 | vhigh,vhigh,5more,2,big,high,unacc 92 | vhigh,vhigh,5more,4,small,low,unacc 93 | vhigh,vhigh,5more,4,small,med,unacc 94 | vhigh,vhigh,5more,4,small,high,unacc 95 | vhigh,vhigh,5more,4,med,low,unacc 96 | vhigh,vhigh,5more,4,med,med,unacc 97 | vhigh,vhigh,5more,4,med,high,unacc 98 | vhigh,vhigh,5more,4,big,low,unacc 99 | vhigh,vhigh,5more,4,big,med,unacc 100 | vhigh,vhigh,5more,4,big,high,unacc 101 | vhigh,vhigh,5more,more,small,low,unacc 102 | vhigh,vhigh,5more,more,small,med,unacc 103 | vhigh,vhigh,5more,more,small,high,unacc 104 | vhigh,vhigh,5more,more,med,low,unacc 105 | vhigh,vhigh,5more,more,med,med,unacc 106 | vhigh,vhigh,5more,more,med,high,unacc 107 | vhigh,vhigh,5more,more,big,low,unacc 108 | vhigh,vhigh,5more,more,big,med,unacc 109 | vhigh,vhigh,5more,more,big,high,unacc 110 | vhigh,high,2,2,small,low,unacc 111 | vhigh,high,2,2,small,med,unacc 112 | vhigh,high,2,2,small,high,unacc 113 | vhigh,high,2,2,med,low,unacc 114 | vhigh,high,2,2,med,med,unacc 115 | vhigh,high,2,2,med,high,unacc 116 | vhigh,high,2,2,big,low,unacc 117 | vhigh,high,2,2,big,med,unacc 118 | vhigh,high,2,2,big,high,unacc 119 | vhigh,high,2,4,small,low,unacc 120 | vhigh,high,2,4,small,med,unacc 121 | vhigh,high,2,4,small,high,unacc 122 | vhigh,high,2,4,med,low,unacc 123 | vhigh,high,2,4,med,med,unacc 124 | vhigh,high,2,4,med,high,unacc 125 | vhigh,high,2,4,big,low,unacc 126 | vhigh,high,2,4,big,med,unacc 127 | vhigh,high,2,4,big,high,unacc 128 | vhigh,high,2,more,small,low,unacc 129 | vhigh,high,2,more,small,med,unacc 130 | vhigh,high,2,more,small,high,unacc 131 | vhigh,high,2,more,med,low,unacc 132 | vhigh,high,2,more,med,med,unacc 133 | vhigh,high,2,more,med,high,unacc 134 | vhigh,high,2,more,big,low,unacc 135 | vhigh,high,2,more,big,med,unacc 136 | vhigh,high,2,more,big,high,unacc 137 | vhigh,high,3,2,small,low,unacc 138 | vhigh,high,3,2,small,med,unacc 139 | vhigh,high,3,2,small,high,unacc 140 | vhigh,high,3,2,med,low,unacc 141 | vhigh,high,3,2,med,med,unacc 142 | vhigh,high,3,2,med,high,unacc 143 | vhigh,high,3,2,big,low,unacc 144 | vhigh,high,3,2,big,med,unacc 145 | vhigh,high,3,2,big,high,unacc 146 | vhigh,high,3,4,small,low,unacc 147 | vhigh,high,3,4,small,med,unacc 148 | vhigh,high,3,4,small,high,unacc 149 | vhigh,high,3,4,med,low,unacc 150 | vhigh,high,3,4,med,med,unacc 151 | vhigh,high,3,4,med,high,unacc 152 | vhigh,high,3,4,big,low,unacc 153 | vhigh,high,3,4,big,med,unacc 154 | vhigh,high,3,4,big,high,unacc 155 | vhigh,high,3,more,small,low,unacc 156 | vhigh,high,3,more,small,med,unacc 157 | vhigh,high,3,more,small,high,unacc 158 | vhigh,high,3,more,med,low,unacc 159 | vhigh,high,3,more,med,med,unacc 160 | vhigh,high,3,more,med,high,unacc 161 | vhigh,high,3,more,big,low,unacc 162 | vhigh,high,3,more,big,med,unacc 163 | vhigh,high,3,more,big,high,unacc 164 | vhigh,high,4,2,small,low,unacc 165 | vhigh,high,4,2,small,med,unacc 166 | vhigh,high,4,2,small,high,unacc 167 | vhigh,high,4,2,med,low,unacc 168 | vhigh,high,4,2,med,med,unacc 169 | vhigh,high,4,2,med,high,unacc 170 | vhigh,high,4,2,big,low,unacc 171 | vhigh,high,4,2,big,med,unacc 172 | vhigh,high,4,2,big,high,unacc 173 | vhigh,high,4,4,small,low,unacc 174 | vhigh,high,4,4,small,med,unacc 175 | vhigh,high,4,4,small,high,unacc 176 | vhigh,high,4,4,med,low,unacc 177 | vhigh,high,4,4,med,med,unacc 178 | vhigh,high,4,4,med,high,unacc 179 | vhigh,high,4,4,big,low,unacc 180 | vhigh,high,4,4,big,med,unacc 181 | vhigh,high,4,4,big,high,unacc 182 | vhigh,high,4,more,small,low,unacc 183 | vhigh,high,4,more,small,med,unacc 184 | vhigh,high,4,more,small,high,unacc 185 | vhigh,high,4,more,med,low,unacc 186 | vhigh,high,4,more,med,med,unacc 187 | vhigh,high,4,more,med,high,unacc 188 | vhigh,high,4,more,big,low,unacc 189 | vhigh,high,4,more,big,med,unacc 190 | vhigh,high,4,more,big,high,unacc 191 | vhigh,high,5more,2,small,low,unacc 192 | vhigh,high,5more,2,small,med,unacc 193 | vhigh,high,5more,2,small,high,unacc 194 | vhigh,high,5more,2,med,low,unacc 195 | vhigh,high,5more,2,med,med,unacc 196 | vhigh,high,5more,2,med,high,unacc 197 | vhigh,high,5more,2,big,low,unacc 198 | vhigh,high,5more,2,big,med,unacc 199 | vhigh,high,5more,2,big,high,unacc 200 | vhigh,high,5more,4,small,low,unacc 201 | vhigh,high,5more,4,small,med,unacc 202 | vhigh,high,5more,4,small,high,unacc 203 | vhigh,high,5more,4,med,low,unacc 204 | vhigh,high,5more,4,med,med,unacc 205 | vhigh,high,5more,4,med,high,unacc 206 | vhigh,high,5more,4,big,low,unacc 207 | vhigh,high,5more,4,big,med,unacc 208 | vhigh,high,5more,4,big,high,unacc 209 | vhigh,high,5more,more,small,low,unacc 210 | vhigh,high,5more,more,small,med,unacc 211 | vhigh,high,5more,more,small,high,unacc 212 | vhigh,high,5more,more,med,low,unacc 213 | vhigh,high,5more,more,med,med,unacc 214 | vhigh,high,5more,more,med,high,unacc 215 | vhigh,high,5more,more,big,low,unacc 216 | vhigh,high,5more,more,big,med,unacc 217 | vhigh,high,5more,more,big,high,unacc 218 | vhigh,med,2,2,small,low,unacc 219 | vhigh,med,2,2,small,med,unacc 220 | vhigh,med,2,2,small,high,unacc 221 | vhigh,med,2,2,med,low,unacc 222 | vhigh,med,2,2,med,med,unacc 223 | vhigh,med,2,2,med,high,unacc 224 | vhigh,med,2,2,big,low,unacc 225 | vhigh,med,2,2,big,med,unacc 226 | vhigh,med,2,2,big,high,unacc 227 | vhigh,med,2,4,small,low,unacc 228 | vhigh,med,2,4,small,med,unacc 229 | vhigh,med,2,4,small,high,acc 230 | vhigh,med,2,4,med,low,unacc 231 | vhigh,med,2,4,med,med,unacc 232 | vhigh,med,2,4,med,high,acc 233 | vhigh,med,2,4,big,low,unacc 234 | vhigh,med,2,4,big,med,acc 235 | vhigh,med,2,4,big,high,acc 236 | vhigh,med,2,more,small,low,unacc 237 | vhigh,med,2,more,small,med,unacc 238 | vhigh,med,2,more,small,high,unacc 239 | vhigh,med,2,more,med,low,unacc 240 | vhigh,med,2,more,med,med,unacc 241 | vhigh,med,2,more,med,high,acc 242 | vhigh,med,2,more,big,low,unacc 243 | vhigh,med,2,more,big,med,acc 244 | vhigh,med,2,more,big,high,acc 245 | vhigh,med,3,2,small,low,unacc 246 | vhigh,med,3,2,small,med,unacc 247 | vhigh,med,3,2,small,high,unacc 248 | vhigh,med,3,2,med,low,unacc 249 | vhigh,med,3,2,med,med,unacc 250 | vhigh,med,3,2,med,high,unacc 251 | vhigh,med,3,2,big,low,unacc 252 | vhigh,med,3,2,big,med,unacc 253 | vhigh,med,3,2,big,high,unacc 254 | vhigh,med,3,4,small,low,unacc 255 | vhigh,med,3,4,small,med,unacc 256 | vhigh,med,3,4,small,high,acc 257 | vhigh,med,3,4,med,low,unacc 258 | vhigh,med,3,4,med,med,unacc 259 | vhigh,med,3,4,med,high,acc 260 | vhigh,med,3,4,big,low,unacc 261 | vhigh,med,3,4,big,med,acc 262 | vhigh,med,3,4,big,high,acc 263 | vhigh,med,3,more,small,low,unacc 264 | vhigh,med,3,more,small,med,unacc 265 | vhigh,med,3,more,small,high,acc 266 | vhigh,med,3,more,med,low,unacc 267 | vhigh,med,3,more,med,med,acc 268 | vhigh,med,3,more,med,high,acc 269 | vhigh,med,3,more,big,low,unacc 270 | vhigh,med,3,more,big,med,acc 271 | vhigh,med,3,more,big,high,acc 272 | vhigh,med,4,2,small,low,unacc 273 | vhigh,med,4,2,small,med,unacc 274 | vhigh,med,4,2,small,high,unacc 275 | vhigh,med,4,2,med,low,unacc 276 | vhigh,med,4,2,med,med,unacc 277 | vhigh,med,4,2,med,high,unacc 278 | vhigh,med,4,2,big,low,unacc 279 | vhigh,med,4,2,big,med,unacc 280 | vhigh,med,4,2,big,high,unacc 281 | vhigh,med,4,4,small,low,unacc 282 | vhigh,med,4,4,small,med,unacc 283 | vhigh,med,4,4,small,high,acc 284 | vhigh,med,4,4,med,low,unacc 285 | vhigh,med,4,4,med,med,acc 286 | vhigh,med,4,4,med,high,acc 287 | vhigh,med,4,4,big,low,unacc 288 | vhigh,med,4,4,big,med,acc 289 | vhigh,med,4,4,big,high,acc 290 | vhigh,med,4,more,small,low,unacc 291 | vhigh,med,4,more,small,med,unacc 292 | vhigh,med,4,more,small,high,acc 293 | vhigh,med,4,more,med,low,unacc 294 | vhigh,med,4,more,med,med,acc 295 | vhigh,med,4,more,med,high,acc 296 | vhigh,med,4,more,big,low,unacc 297 | vhigh,med,4,more,big,med,acc 298 | vhigh,med,4,more,big,high,acc 299 | vhigh,med,5more,2,small,low,unacc 300 | vhigh,med,5more,2,small,med,unacc 301 | vhigh,med,5more,2,small,high,unacc 302 | vhigh,med,5more,2,med,low,unacc 303 | vhigh,med,5more,2,med,med,unacc 304 | vhigh,med,5more,2,med,high,unacc 305 | vhigh,med,5more,2,big,low,unacc 306 | vhigh,med,5more,2,big,med,unacc 307 | vhigh,med,5more,2,big,high,unacc 308 | vhigh,med,5more,4,small,low,unacc 309 | vhigh,med,5more,4,small,med,unacc 310 | vhigh,med,5more,4,small,high,acc 311 | vhigh,med,5more,4,med,low,unacc 312 | vhigh,med,5more,4,med,med,acc 313 | vhigh,med,5more,4,med,high,acc 314 | vhigh,med,5more,4,big,low,unacc 315 | vhigh,med,5more,4,big,med,acc 316 | vhigh,med,5more,4,big,high,acc 317 | vhigh,med,5more,more,small,low,unacc 318 | vhigh,med,5more,more,small,med,unacc 319 | vhigh,med,5more,more,small,high,acc 320 | vhigh,med,5more,more,med,low,unacc 321 | vhigh,med,5more,more,med,med,acc 322 | vhigh,med,5more,more,med,high,acc 323 | vhigh,med,5more,more,big,low,unacc 324 | vhigh,med,5more,more,big,med,acc 325 | vhigh,med,5more,more,big,high,acc 326 | vhigh,low,2,2,small,low,unacc 327 | vhigh,low,2,2,small,med,unacc 328 | vhigh,low,2,2,small,high,unacc 329 | vhigh,low,2,2,med,low,unacc 330 | vhigh,low,2,2,med,med,unacc 331 | vhigh,low,2,2,med,high,unacc 332 | vhigh,low,2,2,big,low,unacc 333 | vhigh,low,2,2,big,med,unacc 334 | vhigh,low,2,2,big,high,unacc 335 | vhigh,low,2,4,small,low,unacc 336 | vhigh,low,2,4,small,med,unacc 337 | vhigh,low,2,4,small,high,acc 338 | vhigh,low,2,4,med,low,unacc 339 | vhigh,low,2,4,med,med,unacc 340 | vhigh,low,2,4,med,high,acc 341 | vhigh,low,2,4,big,low,unacc 342 | vhigh,low,2,4,big,med,acc 343 | vhigh,low,2,4,big,high,acc 344 | vhigh,low,2,more,small,low,unacc 345 | vhigh,low,2,more,small,med,unacc 346 | vhigh,low,2,more,small,high,unacc 347 | vhigh,low,2,more,med,low,unacc 348 | vhigh,low,2,more,med,med,unacc 349 | vhigh,low,2,more,med,high,acc 350 | vhigh,low,2,more,big,low,unacc 351 | vhigh,low,2,more,big,med,acc 352 | vhigh,low,2,more,big,high,acc 353 | vhigh,low,3,2,small,low,unacc 354 | vhigh,low,3,2,small,med,unacc 355 | vhigh,low,3,2,small,high,unacc 356 | vhigh,low,3,2,med,low,unacc 357 | vhigh,low,3,2,med,med,unacc 358 | vhigh,low,3,2,med,high,unacc 359 | vhigh,low,3,2,big,low,unacc 360 | vhigh,low,3,2,big,med,unacc 361 | vhigh,low,3,2,big,high,unacc 362 | vhigh,low,3,4,small,low,unacc 363 | vhigh,low,3,4,small,med,unacc 364 | vhigh,low,3,4,small,high,acc 365 | vhigh,low,3,4,med,low,unacc 366 | vhigh,low,3,4,med,med,unacc 367 | vhigh,low,3,4,med,high,acc 368 | vhigh,low,3,4,big,low,unacc 369 | vhigh,low,3,4,big,med,acc 370 | vhigh,low,3,4,big,high,acc 371 | vhigh,low,3,more,small,low,unacc 372 | vhigh,low,3,more,small,med,unacc 373 | vhigh,low,3,more,small,high,acc 374 | vhigh,low,3,more,med,low,unacc 375 | vhigh,low,3,more,med,med,acc 376 | vhigh,low,3,more,med,high,acc 377 | vhigh,low,3,more,big,low,unacc 378 | vhigh,low,3,more,big,med,acc 379 | vhigh,low,3,more,big,high,acc 380 | vhigh,low,4,2,small,low,unacc 381 | vhigh,low,4,2,small,med,unacc 382 | vhigh,low,4,2,small,high,unacc 383 | vhigh,low,4,2,med,low,unacc 384 | vhigh,low,4,2,med,med,unacc 385 | vhigh,low,4,2,med,high,unacc 386 | vhigh,low,4,2,big,low,unacc 387 | vhigh,low,4,2,big,med,unacc 388 | vhigh,low,4,2,big,high,unacc 389 | vhigh,low,4,4,small,low,unacc 390 | vhigh,low,4,4,small,med,unacc 391 | vhigh,low,4,4,small,high,acc 392 | vhigh,low,4,4,med,low,unacc 393 | vhigh,low,4,4,med,med,acc 394 | vhigh,low,4,4,med,high,acc 395 | vhigh,low,4,4,big,low,unacc 396 | vhigh,low,4,4,big,med,acc 397 | vhigh,low,4,4,big,high,acc 398 | vhigh,low,4,more,small,low,unacc 399 | vhigh,low,4,more,small,med,unacc 400 | vhigh,low,4,more,small,high,acc 401 | vhigh,low,4,more,med,low,unacc 402 | vhigh,low,4,more,med,med,acc 403 | vhigh,low,4,more,med,high,acc 404 | vhigh,low,4,more,big,low,unacc 405 | vhigh,low,4,more,big,med,acc 406 | vhigh,low,4,more,big,high,acc 407 | vhigh,low,5more,2,small,low,unacc 408 | vhigh,low,5more,2,small,med,unacc 409 | vhigh,low,5more,2,small,high,unacc 410 | vhigh,low,5more,2,med,low,unacc 411 | vhigh,low,5more,2,med,med,unacc 412 | vhigh,low,5more,2,med,high,unacc 413 | vhigh,low,5more,2,big,low,unacc 414 | vhigh,low,5more,2,big,med,unacc 415 | vhigh,low,5more,2,big,high,unacc 416 | vhigh,low,5more,4,small,low,unacc 417 | vhigh,low,5more,4,small,med,unacc 418 | vhigh,low,5more,4,small,high,acc 419 | vhigh,low,5more,4,med,low,unacc 420 | vhigh,low,5more,4,med,med,acc 421 | vhigh,low,5more,4,med,high,acc 422 | vhigh,low,5more,4,big,low,unacc 423 | vhigh,low,5more,4,big,med,acc 424 | vhigh,low,5more,4,big,high,acc 425 | vhigh,low,5more,more,small,low,unacc 426 | vhigh,low,5more,more,small,med,unacc 427 | vhigh,low,5more,more,small,high,acc 428 | vhigh,low,5more,more,med,low,unacc 429 | vhigh,low,5more,more,med,med,acc 430 | vhigh,low,5more,more,med,high,acc 431 | vhigh,low,5more,more,big,low,unacc 432 | vhigh,low,5more,more,big,med,acc 433 | vhigh,low,5more,more,big,high,acc 434 | high,vhigh,2,2,small,low,unacc 435 | high,vhigh,2,2,small,med,unacc 436 | high,vhigh,2,2,small,high,unacc 437 | high,vhigh,2,2,med,low,unacc 438 | high,vhigh,2,2,med,med,unacc 439 | high,vhigh,2,2,med,high,unacc 440 | high,vhigh,2,2,big,low,unacc 441 | high,vhigh,2,2,big,med,unacc 442 | high,vhigh,2,2,big,high,unacc 443 | high,vhigh,2,4,small,low,unacc 444 | high,vhigh,2,4,small,med,unacc 445 | high,vhigh,2,4,small,high,unacc 446 | high,vhigh,2,4,med,low,unacc 447 | high,vhigh,2,4,med,med,unacc 448 | high,vhigh,2,4,med,high,unacc 449 | high,vhigh,2,4,big,low,unacc 450 | high,vhigh,2,4,big,med,unacc 451 | high,vhigh,2,4,big,high,unacc 452 | high,vhigh,2,more,small,low,unacc 453 | high,vhigh,2,more,small,med,unacc 454 | high,vhigh,2,more,small,high,unacc 455 | high,vhigh,2,more,med,low,unacc 456 | high,vhigh,2,more,med,med,unacc 457 | high,vhigh,2,more,med,high,unacc 458 | high,vhigh,2,more,big,low,unacc 459 | high,vhigh,2,more,big,med,unacc 460 | high,vhigh,2,more,big,high,unacc 461 | high,vhigh,3,2,small,low,unacc 462 | high,vhigh,3,2,small,med,unacc 463 | high,vhigh,3,2,small,high,unacc 464 | high,vhigh,3,2,med,low,unacc 465 | high,vhigh,3,2,med,med,unacc 466 | high,vhigh,3,2,med,high,unacc 467 | high,vhigh,3,2,big,low,unacc 468 | high,vhigh,3,2,big,med,unacc 469 | high,vhigh,3,2,big,high,unacc 470 | high,vhigh,3,4,small,low,unacc 471 | high,vhigh,3,4,small,med,unacc 472 | high,vhigh,3,4,small,high,unacc 473 | high,vhigh,3,4,med,low,unacc 474 | high,vhigh,3,4,med,med,unacc 475 | high,vhigh,3,4,med,high,unacc 476 | high,vhigh,3,4,big,low,unacc 477 | high,vhigh,3,4,big,med,unacc 478 | high,vhigh,3,4,big,high,unacc 479 | high,vhigh,3,more,small,low,unacc 480 | high,vhigh,3,more,small,med,unacc 481 | high,vhigh,3,more,small,high,unacc 482 | high,vhigh,3,more,med,low,unacc 483 | high,vhigh,3,more,med,med,unacc 484 | high,vhigh,3,more,med,high,unacc 485 | high,vhigh,3,more,big,low,unacc 486 | high,vhigh,3,more,big,med,unacc 487 | high,vhigh,3,more,big,high,unacc 488 | high,vhigh,4,2,small,low,unacc 489 | high,vhigh,4,2,small,med,unacc 490 | high,vhigh,4,2,small,high,unacc 491 | high,vhigh,4,2,med,low,unacc 492 | high,vhigh,4,2,med,med,unacc 493 | high,vhigh,4,2,med,high,unacc 494 | high,vhigh,4,2,big,low,unacc 495 | high,vhigh,4,2,big,med,unacc 496 | high,vhigh,4,2,big,high,unacc 497 | high,vhigh,4,4,small,low,unacc 498 | high,vhigh,4,4,small,med,unacc 499 | high,vhigh,4,4,small,high,unacc 500 | high,vhigh,4,4,med,low,unacc 501 | high,vhigh,4,4,med,med,unacc 502 | high,vhigh,4,4,med,high,unacc 503 | high,vhigh,4,4,big,low,unacc 504 | high,vhigh,4,4,big,med,unacc 505 | high,vhigh,4,4,big,high,unacc 506 | high,vhigh,4,more,small,low,unacc 507 | high,vhigh,4,more,small,med,unacc 508 | high,vhigh,4,more,small,high,unacc 509 | high,vhigh,4,more,med,low,unacc 510 | high,vhigh,4,more,med,med,unacc 511 | high,vhigh,4,more,med,high,unacc 512 | high,vhigh,4,more,big,low,unacc 513 | high,vhigh,4,more,big,med,unacc 514 | high,vhigh,4,more,big,high,unacc 515 | high,vhigh,5more,2,small,low,unacc 516 | high,vhigh,5more,2,small,med,unacc 517 | high,vhigh,5more,2,small,high,unacc 518 | high,vhigh,5more,2,med,low,unacc 519 | high,vhigh,5more,2,med,med,unacc 520 | high,vhigh,5more,2,med,high,unacc 521 | high,vhigh,5more,2,big,low,unacc 522 | high,vhigh,5more,2,big,med,unacc 523 | high,vhigh,5more,2,big,high,unacc 524 | high,vhigh,5more,4,small,low,unacc 525 | high,vhigh,5more,4,small,med,unacc 526 | high,vhigh,5more,4,small,high,unacc 527 | high,vhigh,5more,4,med,low,unacc 528 | high,vhigh,5more,4,med,med,unacc 529 | high,vhigh,5more,4,med,high,unacc 530 | high,vhigh,5more,4,big,low,unacc 531 | high,vhigh,5more,4,big,med,unacc 532 | high,vhigh,5more,4,big,high,unacc 533 | high,vhigh,5more,more,small,low,unacc 534 | high,vhigh,5more,more,small,med,unacc 535 | high,vhigh,5more,more,small,high,unacc 536 | high,vhigh,5more,more,med,low,unacc 537 | high,vhigh,5more,more,med,med,unacc 538 | high,vhigh,5more,more,med,high,unacc 539 | high,vhigh,5more,more,big,low,unacc 540 | high,vhigh,5more,more,big,med,unacc 541 | high,vhigh,5more,more,big,high,unacc 542 | high,high,2,2,small,low,unacc 543 | high,high,2,2,small,med,unacc 544 | high,high,2,2,small,high,unacc 545 | high,high,2,2,med,low,unacc 546 | high,high,2,2,med,med,unacc 547 | high,high,2,2,med,high,unacc 548 | high,high,2,2,big,low,unacc 549 | high,high,2,2,big,med,unacc 550 | high,high,2,2,big,high,unacc 551 | high,high,2,4,small,low,unacc 552 | high,high,2,4,small,med,unacc 553 | high,high,2,4,small,high,acc 554 | high,high,2,4,med,low,unacc 555 | high,high,2,4,med,med,unacc 556 | high,high,2,4,med,high,acc 557 | high,high,2,4,big,low,unacc 558 | high,high,2,4,big,med,acc 559 | high,high,2,4,big,high,acc 560 | high,high,2,more,small,low,unacc 561 | high,high,2,more,small,med,unacc 562 | high,high,2,more,small,high,unacc 563 | high,high,2,more,med,low,unacc 564 | high,high,2,more,med,med,unacc 565 | high,high,2,more,med,high,acc 566 | high,high,2,more,big,low,unacc 567 | high,high,2,more,big,med,acc 568 | high,high,2,more,big,high,acc 569 | high,high,3,2,small,low,unacc 570 | high,high,3,2,small,med,unacc 571 | high,high,3,2,small,high,unacc 572 | high,high,3,2,med,low,unacc 573 | high,high,3,2,med,med,unacc 574 | high,high,3,2,med,high,unacc 575 | high,high,3,2,big,low,unacc 576 | high,high,3,2,big,med,unacc 577 | high,high,3,2,big,high,unacc 578 | high,high,3,4,small,low,unacc 579 | high,high,3,4,small,med,unacc 580 | high,high,3,4,small,high,acc 581 | high,high,3,4,med,low,unacc 582 | high,high,3,4,med,med,unacc 583 | high,high,3,4,med,high,acc 584 | high,high,3,4,big,low,unacc 585 | high,high,3,4,big,med,acc 586 | high,high,3,4,big,high,acc 587 | high,high,3,more,small,low,unacc 588 | high,high,3,more,small,med,unacc 589 | high,high,3,more,small,high,acc 590 | high,high,3,more,med,low,unacc 591 | high,high,3,more,med,med,acc 592 | high,high,3,more,med,high,acc 593 | high,high,3,more,big,low,unacc 594 | high,high,3,more,big,med,acc 595 | high,high,3,more,big,high,acc 596 | high,high,4,2,small,low,unacc 597 | high,high,4,2,small,med,unacc 598 | high,high,4,2,small,high,unacc 599 | high,high,4,2,med,low,unacc 600 | high,high,4,2,med,med,unacc 601 | high,high,4,2,med,high,unacc 602 | high,high,4,2,big,low,unacc 603 | high,high,4,2,big,med,unacc 604 | high,high,4,2,big,high,unacc 605 | high,high,4,4,small,low,unacc 606 | high,high,4,4,small,med,unacc 607 | high,high,4,4,small,high,acc 608 | high,high,4,4,med,low,unacc 609 | high,high,4,4,med,med,acc 610 | high,high,4,4,med,high,acc 611 | high,high,4,4,big,low,unacc 612 | high,high,4,4,big,med,acc 613 | high,high,4,4,big,high,acc 614 | high,high,4,more,small,low,unacc 615 | high,high,4,more,small,med,unacc 616 | high,high,4,more,small,high,acc 617 | high,high,4,more,med,low,unacc 618 | high,high,4,more,med,med,acc 619 | high,high,4,more,med,high,acc 620 | high,high,4,more,big,low,unacc 621 | high,high,4,more,big,med,acc 622 | high,high,4,more,big,high,acc 623 | high,high,5more,2,small,low,unacc 624 | high,high,5more,2,small,med,unacc 625 | high,high,5more,2,small,high,unacc 626 | high,high,5more,2,med,low,unacc 627 | high,high,5more,2,med,med,unacc 628 | high,high,5more,2,med,high,unacc 629 | high,high,5more,2,big,low,unacc 630 | high,high,5more,2,big,med,unacc 631 | high,high,5more,2,big,high,unacc 632 | high,high,5more,4,small,low,unacc 633 | high,high,5more,4,small,med,unacc 634 | high,high,5more,4,small,high,acc 635 | high,high,5more,4,med,low,unacc 636 | high,high,5more,4,med,med,acc 637 | high,high,5more,4,med,high,acc 638 | high,high,5more,4,big,low,unacc 639 | high,high,5more,4,big,med,acc 640 | high,high,5more,4,big,high,acc 641 | high,high,5more,more,small,low,unacc 642 | high,high,5more,more,small,med,unacc 643 | high,high,5more,more,small,high,acc 644 | high,high,5more,more,med,low,unacc 645 | high,high,5more,more,med,med,acc 646 | high,high,5more,more,med,high,acc 647 | high,high,5more,more,big,low,unacc 648 | high,high,5more,more,big,med,acc 649 | high,high,5more,more,big,high,acc 650 | high,med,2,2,small,low,unacc 651 | high,med,2,2,small,med,unacc 652 | high,med,2,2,small,high,unacc 653 | high,med,2,2,med,low,unacc 654 | high,med,2,2,med,med,unacc 655 | high,med,2,2,med,high,unacc 656 | high,med,2,2,big,low,unacc 657 | high,med,2,2,big,med,unacc 658 | high,med,2,2,big,high,unacc 659 | high,med,2,4,small,low,unacc 660 | high,med,2,4,small,med,unacc 661 | high,med,2,4,small,high,acc 662 | high,med,2,4,med,low,unacc 663 | high,med,2,4,med,med,unacc 664 | high,med,2,4,med,high,acc 665 | high,med,2,4,big,low,unacc 666 | high,med,2,4,big,med,acc 667 | high,med,2,4,big,high,acc 668 | high,med,2,more,small,low,unacc 669 | high,med,2,more,small,med,unacc 670 | high,med,2,more,small,high,unacc 671 | high,med,2,more,med,low,unacc 672 | high,med,2,more,med,med,unacc 673 | high,med,2,more,med,high,acc 674 | high,med,2,more,big,low,unacc 675 | high,med,2,more,big,med,acc 676 | high,med,2,more,big,high,acc 677 | high,med,3,2,small,low,unacc 678 | high,med,3,2,small,med,unacc 679 | high,med,3,2,small,high,unacc 680 | high,med,3,2,med,low,unacc 681 | high,med,3,2,med,med,unacc 682 | high,med,3,2,med,high,unacc 683 | high,med,3,2,big,low,unacc 684 | high,med,3,2,big,med,unacc 685 | high,med,3,2,big,high,unacc 686 | high,med,3,4,small,low,unacc 687 | high,med,3,4,small,med,unacc 688 | high,med,3,4,small,high,acc 689 | high,med,3,4,med,low,unacc 690 | high,med,3,4,med,med,unacc 691 | high,med,3,4,med,high,acc 692 | high,med,3,4,big,low,unacc 693 | high,med,3,4,big,med,acc 694 | high,med,3,4,big,high,acc 695 | high,med,3,more,small,low,unacc 696 | high,med,3,more,small,med,unacc 697 | high,med,3,more,small,high,acc 698 | high,med,3,more,med,low,unacc 699 | high,med,3,more,med,med,acc 700 | high,med,3,more,med,high,acc 701 | high,med,3,more,big,low,unacc 702 | high,med,3,more,big,med,acc 703 | high,med,3,more,big,high,acc 704 | high,med,4,2,small,low,unacc 705 | high,med,4,2,small,med,unacc 706 | high,med,4,2,small,high,unacc 707 | high,med,4,2,med,low,unacc 708 | high,med,4,2,med,med,unacc 709 | high,med,4,2,med,high,unacc 710 | high,med,4,2,big,low,unacc 711 | high,med,4,2,big,med,unacc 712 | high,med,4,2,big,high,unacc 713 | high,med,4,4,small,low,unacc 714 | high,med,4,4,small,med,unacc 715 | high,med,4,4,small,high,acc 716 | high,med,4,4,med,low,unacc 717 | high,med,4,4,med,med,acc 718 | high,med,4,4,med,high,acc 719 | high,med,4,4,big,low,unacc 720 | high,med,4,4,big,med,acc 721 | high,med,4,4,big,high,acc 722 | high,med,4,more,small,low,unacc 723 | high,med,4,more,small,med,unacc 724 | high,med,4,more,small,high,acc 725 | high,med,4,more,med,low,unacc 726 | high,med,4,more,med,med,acc 727 | high,med,4,more,med,high,acc 728 | high,med,4,more,big,low,unacc 729 | high,med,4,more,big,med,acc 730 | high,med,4,more,big,high,acc 731 | high,med,5more,2,small,low,unacc 732 | high,med,5more,2,small,med,unacc 733 | high,med,5more,2,small,high,unacc 734 | high,med,5more,2,med,low,unacc 735 | high,med,5more,2,med,med,unacc 736 | high,med,5more,2,med,high,unacc 737 | high,med,5more,2,big,low,unacc 738 | high,med,5more,2,big,med,unacc 739 | high,med,5more,2,big,high,unacc 740 | high,med,5more,4,small,low,unacc 741 | high,med,5more,4,small,med,unacc 742 | high,med,5more,4,small,high,acc 743 | high,med,5more,4,med,low,unacc 744 | high,med,5more,4,med,med,acc 745 | high,med,5more,4,med,high,acc 746 | high,med,5more,4,big,low,unacc 747 | high,med,5more,4,big,med,acc 748 | high,med,5more,4,big,high,acc 749 | high,med,5more,more,small,low,unacc 750 | high,med,5more,more,small,med,unacc 751 | high,med,5more,more,small,high,acc 752 | high,med,5more,more,med,low,unacc 753 | high,med,5more,more,med,med,acc 754 | high,med,5more,more,med,high,acc 755 | high,med,5more,more,big,low,unacc 756 | high,med,5more,more,big,med,acc 757 | high,med,5more,more,big,high,acc 758 | high,low,2,2,small,low,unacc 759 | high,low,2,2,small,med,unacc 760 | high,low,2,2,small,high,unacc 761 | high,low,2,2,med,low,unacc 762 | high,low,2,2,med,med,unacc 763 | high,low,2,2,med,high,unacc 764 | high,low,2,2,big,low,unacc 765 | high,low,2,2,big,med,unacc 766 | high,low,2,2,big,high,unacc 767 | high,low,2,4,small,low,unacc 768 | high,low,2,4,small,med,unacc 769 | high,low,2,4,small,high,acc 770 | high,low,2,4,med,low,unacc 771 | high,low,2,4,med,med,unacc 772 | high,low,2,4,med,high,acc 773 | high,low,2,4,big,low,unacc 774 | high,low,2,4,big,med,acc 775 | high,low,2,4,big,high,acc 776 | high,low,2,more,small,low,unacc 777 | high,low,2,more,small,med,unacc 778 | high,low,2,more,small,high,unacc 779 | high,low,2,more,med,low,unacc 780 | high,low,2,more,med,med,unacc 781 | high,low,2,more,med,high,acc 782 | high,low,2,more,big,low,unacc 783 | high,low,2,more,big,med,acc 784 | high,low,2,more,big,high,acc 785 | high,low,3,2,small,low,unacc 786 | high,low,3,2,small,med,unacc 787 | high,low,3,2,small,high,unacc 788 | high,low,3,2,med,low,unacc 789 | high,low,3,2,med,med,unacc 790 | high,low,3,2,med,high,unacc 791 | high,low,3,2,big,low,unacc 792 | high,low,3,2,big,med,unacc 793 | high,low,3,2,big,high,unacc 794 | high,low,3,4,small,low,unacc 795 | high,low,3,4,small,med,unacc 796 | high,low,3,4,small,high,acc 797 | high,low,3,4,med,low,unacc 798 | high,low,3,4,med,med,unacc 799 | high,low,3,4,med,high,acc 800 | high,low,3,4,big,low,unacc 801 | high,low,3,4,big,med,acc 802 | high,low,3,4,big,high,acc 803 | high,low,3,more,small,low,unacc 804 | high,low,3,more,small,med,unacc 805 | high,low,3,more,small,high,acc 806 | high,low,3,more,med,low,unacc 807 | high,low,3,more,med,med,acc 808 | high,low,3,more,med,high,acc 809 | high,low,3,more,big,low,unacc 810 | high,low,3,more,big,med,acc 811 | high,low,3,more,big,high,acc 812 | high,low,4,2,small,low,unacc 813 | high,low,4,2,small,med,unacc 814 | high,low,4,2,small,high,unacc 815 | high,low,4,2,med,low,unacc 816 | high,low,4,2,med,med,unacc 817 | high,low,4,2,med,high,unacc 818 | high,low,4,2,big,low,unacc 819 | high,low,4,2,big,med,unacc 820 | high,low,4,2,big,high,unacc 821 | high,low,4,4,small,low,unacc 822 | high,low,4,4,small,med,unacc 823 | high,low,4,4,small,high,acc 824 | high,low,4,4,med,low,unacc 825 | high,low,4,4,med,med,acc 826 | high,low,4,4,med,high,acc 827 | high,low,4,4,big,low,unacc 828 | high,low,4,4,big,med,acc 829 | high,low,4,4,big,high,acc 830 | high,low,4,more,small,low,unacc 831 | high,low,4,more,small,med,unacc 832 | high,low,4,more,small,high,acc 833 | high,low,4,more,med,low,unacc 834 | high,low,4,more,med,med,acc 835 | high,low,4,more,med,high,acc 836 | high,low,4,more,big,low,unacc 837 | high,low,4,more,big,med,acc 838 | high,low,4,more,big,high,acc 839 | high,low,5more,2,small,low,unacc 840 | high,low,5more,2,small,med,unacc 841 | high,low,5more,2,small,high,unacc 842 | high,low,5more,2,med,low,unacc 843 | high,low,5more,2,med,med,unacc 844 | high,low,5more,2,med,high,unacc 845 | high,low,5more,2,big,low,unacc 846 | high,low,5more,2,big,med,unacc 847 | high,low,5more,2,big,high,unacc 848 | high,low,5more,4,small,low,unacc 849 | high,low,5more,4,small,med,unacc 850 | high,low,5more,4,small,high,acc 851 | high,low,5more,4,med,low,unacc 852 | high,low,5more,4,med,med,acc 853 | high,low,5more,4,med,high,acc 854 | high,low,5more,4,big,low,unacc 855 | high,low,5more,4,big,med,acc 856 | high,low,5more,4,big,high,acc 857 | high,low,5more,more,small,low,unacc 858 | high,low,5more,more,small,med,unacc 859 | high,low,5more,more,small,high,acc 860 | high,low,5more,more,med,low,unacc 861 | high,low,5more,more,med,med,acc 862 | high,low,5more,more,med,high,acc 863 | high,low,5more,more,big,low,unacc 864 | high,low,5more,more,big,med,acc 865 | high,low,5more,more,big,high,acc 866 | med,vhigh,2,2,small,low,unacc 867 | med,vhigh,2,2,small,med,unacc 868 | med,vhigh,2,2,small,high,unacc 869 | med,vhigh,2,2,med,low,unacc 870 | med,vhigh,2,2,med,med,unacc 871 | med,vhigh,2,2,med,high,unacc 872 | med,vhigh,2,2,big,low,unacc 873 | med,vhigh,2,2,big,med,unacc 874 | med,vhigh,2,2,big,high,unacc 875 | med,vhigh,2,4,small,low,unacc 876 | med,vhigh,2,4,small,med,unacc 877 | med,vhigh,2,4,small,high,acc 878 | med,vhigh,2,4,med,low,unacc 879 | med,vhigh,2,4,med,med,unacc 880 | med,vhigh,2,4,med,high,acc 881 | med,vhigh,2,4,big,low,unacc 882 | med,vhigh,2,4,big,med,acc 883 | med,vhigh,2,4,big,high,acc 884 | med,vhigh,2,more,small,low,unacc 885 | med,vhigh,2,more,small,med,unacc 886 | med,vhigh,2,more,small,high,unacc 887 | med,vhigh,2,more,med,low,unacc 888 | med,vhigh,2,more,med,med,unacc 889 | med,vhigh,2,more,med,high,acc 890 | med,vhigh,2,more,big,low,unacc 891 | med,vhigh,2,more,big,med,acc 892 | med,vhigh,2,more,big,high,acc 893 | med,vhigh,3,2,small,low,unacc 894 | med,vhigh,3,2,small,med,unacc 895 | med,vhigh,3,2,small,high,unacc 896 | med,vhigh,3,2,med,low,unacc 897 | med,vhigh,3,2,med,med,unacc 898 | med,vhigh,3,2,med,high,unacc 899 | med,vhigh,3,2,big,low,unacc 900 | med,vhigh,3,2,big,med,unacc 901 | med,vhigh,3,2,big,high,unacc 902 | med,vhigh,3,4,small,low,unacc 903 | med,vhigh,3,4,small,med,unacc 904 | med,vhigh,3,4,small,high,acc 905 | med,vhigh,3,4,med,low,unacc 906 | med,vhigh,3,4,med,med,unacc 907 | med,vhigh,3,4,med,high,acc 908 | med,vhigh,3,4,big,low,unacc 909 | med,vhigh,3,4,big,med,acc 910 | med,vhigh,3,4,big,high,acc 911 | med,vhigh,3,more,small,low,unacc 912 | med,vhigh,3,more,small,med,unacc 913 | med,vhigh,3,more,small,high,acc 914 | med,vhigh,3,more,med,low,unacc 915 | med,vhigh,3,more,med,med,acc 916 | med,vhigh,3,more,med,high,acc 917 | med,vhigh,3,more,big,low,unacc 918 | med,vhigh,3,more,big,med,acc 919 | med,vhigh,3,more,big,high,acc 920 | med,vhigh,4,2,small,low,unacc 921 | med,vhigh,4,2,small,med,unacc 922 | med,vhigh,4,2,small,high,unacc 923 | med,vhigh,4,2,med,low,unacc 924 | med,vhigh,4,2,med,med,unacc 925 | med,vhigh,4,2,med,high,unacc 926 | med,vhigh,4,2,big,low,unacc 927 | med,vhigh,4,2,big,med,unacc 928 | med,vhigh,4,2,big,high,unacc 929 | med,vhigh,4,4,small,low,unacc 930 | med,vhigh,4,4,small,med,unacc 931 | med,vhigh,4,4,small,high,acc 932 | med,vhigh,4,4,med,low,unacc 933 | med,vhigh,4,4,med,med,acc 934 | med,vhigh,4,4,med,high,acc 935 | med,vhigh,4,4,big,low,unacc 936 | med,vhigh,4,4,big,med,acc 937 | med,vhigh,4,4,big,high,acc 938 | med,vhigh,4,more,small,low,unacc 939 | med,vhigh,4,more,small,med,unacc 940 | med,vhigh,4,more,small,high,acc 941 | med,vhigh,4,more,med,low,unacc 942 | med,vhigh,4,more,med,med,acc 943 | med,vhigh,4,more,med,high,acc 944 | med,vhigh,4,more,big,low,unacc 945 | med,vhigh,4,more,big,med,acc 946 | med,vhigh,4,more,big,high,acc 947 | med,vhigh,5more,2,small,low,unacc 948 | med,vhigh,5more,2,small,med,unacc 949 | med,vhigh,5more,2,small,high,unacc 950 | med,vhigh,5more,2,med,low,unacc 951 | med,vhigh,5more,2,med,med,unacc 952 | med,vhigh,5more,2,med,high,unacc 953 | med,vhigh,5more,2,big,low,unacc 954 | med,vhigh,5more,2,big,med,unacc 955 | med,vhigh,5more,2,big,high,unacc 956 | med,vhigh,5more,4,small,low,unacc 957 | med,vhigh,5more,4,small,med,unacc 958 | med,vhigh,5more,4,small,high,acc 959 | med,vhigh,5more,4,med,low,unacc 960 | med,vhigh,5more,4,med,med,acc 961 | med,vhigh,5more,4,med,high,acc 962 | med,vhigh,5more,4,big,low,unacc 963 | med,vhigh,5more,4,big,med,acc 964 | med,vhigh,5more,4,big,high,acc 965 | med,vhigh,5more,more,small,low,unacc 966 | med,vhigh,5more,more,small,med,unacc 967 | med,vhigh,5more,more,small,high,acc 968 | med,vhigh,5more,more,med,low,unacc 969 | med,vhigh,5more,more,med,med,acc 970 | med,vhigh,5more,more,med,high,acc 971 | med,vhigh,5more,more,big,low,unacc 972 | med,vhigh,5more,more,big,med,acc 973 | med,vhigh,5more,more,big,high,acc 974 | med,high,2,2,small,low,unacc 975 | med,high,2,2,small,med,unacc 976 | med,high,2,2,small,high,unacc 977 | med,high,2,2,med,low,unacc 978 | med,high,2,2,med,med,unacc 979 | med,high,2,2,med,high,unacc 980 | med,high,2,2,big,low,unacc 981 | med,high,2,2,big,med,unacc 982 | med,high,2,2,big,high,unacc 983 | med,high,2,4,small,low,unacc 984 | med,high,2,4,small,med,unacc 985 | med,high,2,4,small,high,acc 986 | med,high,2,4,med,low,unacc 987 | med,high,2,4,med,med,unacc 988 | med,high,2,4,med,high,acc 989 | med,high,2,4,big,low,unacc 990 | med,high,2,4,big,med,acc 991 | med,high,2,4,big,high,acc 992 | med,high,2,more,small,low,unacc 993 | med,high,2,more,small,med,unacc 994 | med,high,2,more,small,high,unacc 995 | med,high,2,more,med,low,unacc 996 | med,high,2,more,med,med,unacc 997 | med,high,2,more,med,high,acc 998 | med,high,2,more,big,low,unacc 999 | med,high,2,more,big,med,acc 1000 | med,high,2,more,big,high,acc 1001 | med,high,3,2,small,low,unacc 1002 | med,high,3,2,small,med,unacc 1003 | med,high,3,2,small,high,unacc 1004 | med,high,3,2,med,low,unacc 1005 | med,high,3,2,med,med,unacc 1006 | med,high,3,2,med,high,unacc 1007 | med,high,3,2,big,low,unacc 1008 | med,high,3,2,big,med,unacc 1009 | med,high,3,2,big,high,unacc 1010 | med,high,3,4,small,low,unacc 1011 | med,high,3,4,small,med,unacc 1012 | med,high,3,4,small,high,acc 1013 | med,high,3,4,med,low,unacc 1014 | med,high,3,4,med,med,unacc 1015 | med,high,3,4,med,high,acc 1016 | med,high,3,4,big,low,unacc 1017 | med,high,3,4,big,med,acc 1018 | med,high,3,4,big,high,acc 1019 | med,high,3,more,small,low,unacc 1020 | med,high,3,more,small,med,unacc 1021 | med,high,3,more,small,high,acc 1022 | med,high,3,more,med,low,unacc 1023 | med,high,3,more,med,med,acc 1024 | med,high,3,more,med,high,acc 1025 | med,high,3,more,big,low,unacc 1026 | med,high,3,more,big,med,acc 1027 | med,high,3,more,big,high,acc 1028 | med,high,4,2,small,low,unacc 1029 | med,high,4,2,small,med,unacc 1030 | med,high,4,2,small,high,unacc 1031 | med,high,4,2,med,low,unacc 1032 | med,high,4,2,med,med,unacc 1033 | med,high,4,2,med,high,unacc 1034 | med,high,4,2,big,low,unacc 1035 | med,high,4,2,big,med,unacc 1036 | med,high,4,2,big,high,unacc 1037 | med,high,4,4,small,low,unacc 1038 | med,high,4,4,small,med,unacc 1039 | med,high,4,4,small,high,acc 1040 | med,high,4,4,med,low,unacc 1041 | med,high,4,4,med,med,acc 1042 | med,high,4,4,med,high,acc 1043 | med,high,4,4,big,low,unacc 1044 | med,high,4,4,big,med,acc 1045 | med,high,4,4,big,high,acc 1046 | med,high,4,more,small,low,unacc 1047 | med,high,4,more,small,med,unacc 1048 | med,high,4,more,small,high,acc 1049 | med,high,4,more,med,low,unacc 1050 | med,high,4,more,med,med,acc 1051 | med,high,4,more,med,high,acc 1052 | med,high,4,more,big,low,unacc 1053 | med,high,4,more,big,med,acc 1054 | med,high,4,more,big,high,acc 1055 | med,high,5more,2,small,low,unacc 1056 | med,high,5more,2,small,med,unacc 1057 | med,high,5more,2,small,high,unacc 1058 | med,high,5more,2,med,low,unacc 1059 | med,high,5more,2,med,med,unacc 1060 | med,high,5more,2,med,high,unacc 1061 | med,high,5more,2,big,low,unacc 1062 | med,high,5more,2,big,med,unacc 1063 | med,high,5more,2,big,high,unacc 1064 | med,high,5more,4,small,low,unacc 1065 | med,high,5more,4,small,med,unacc 1066 | med,high,5more,4,small,high,acc 1067 | med,high,5more,4,med,low,unacc 1068 | med,high,5more,4,med,med,acc 1069 | med,high,5more,4,med,high,acc 1070 | med,high,5more,4,big,low,unacc 1071 | med,high,5more,4,big,med,acc 1072 | med,high,5more,4,big,high,acc 1073 | med,high,5more,more,small,low,unacc 1074 | med,high,5more,more,small,med,unacc 1075 | med,high,5more,more,small,high,acc 1076 | med,high,5more,more,med,low,unacc 1077 | med,high,5more,more,med,med,acc 1078 | med,high,5more,more,med,high,acc 1079 | med,high,5more,more,big,low,unacc 1080 | med,high,5more,more,big,med,acc 1081 | med,high,5more,more,big,high,acc 1082 | med,med,2,2,small,low,unacc 1083 | med,med,2,2,small,med,unacc 1084 | med,med,2,2,small,high,unacc 1085 | med,med,2,2,med,low,unacc 1086 | med,med,2,2,med,med,unacc 1087 | med,med,2,2,med,high,unacc 1088 | med,med,2,2,big,low,unacc 1089 | med,med,2,2,big,med,unacc 1090 | med,med,2,2,big,high,unacc 1091 | med,med,2,4,small,low,unacc 1092 | med,med,2,4,small,med,acc 1093 | med,med,2,4,small,high,acc 1094 | med,med,2,4,med,low,unacc 1095 | med,med,2,4,med,med,acc 1096 | med,med,2,4,med,high,acc 1097 | med,med,2,4,big,low,unacc 1098 | med,med,2,4,big,med,acc 1099 | med,med,2,4,big,high,vgood 1100 | med,med,2,more,small,low,unacc 1101 | med,med,2,more,small,med,unacc 1102 | med,med,2,more,small,high,unacc 1103 | med,med,2,more,med,low,unacc 1104 | med,med,2,more,med,med,acc 1105 | med,med,2,more,med,high,acc 1106 | med,med,2,more,big,low,unacc 1107 | med,med,2,more,big,med,acc 1108 | med,med,2,more,big,high,vgood 1109 | med,med,3,2,small,low,unacc 1110 | med,med,3,2,small,med,unacc 1111 | med,med,3,2,small,high,unacc 1112 | med,med,3,2,med,low,unacc 1113 | med,med,3,2,med,med,unacc 1114 | med,med,3,2,med,high,unacc 1115 | med,med,3,2,big,low,unacc 1116 | med,med,3,2,big,med,unacc 1117 | med,med,3,2,big,high,unacc 1118 | med,med,3,4,small,low,unacc 1119 | med,med,3,4,small,med,acc 1120 | med,med,3,4,small,high,acc 1121 | med,med,3,4,med,low,unacc 1122 | med,med,3,4,med,med,acc 1123 | med,med,3,4,med,high,acc 1124 | med,med,3,4,big,low,unacc 1125 | med,med,3,4,big,med,acc 1126 | med,med,3,4,big,high,vgood 1127 | med,med,3,more,small,low,unacc 1128 | med,med,3,more,small,med,acc 1129 | med,med,3,more,small,high,acc 1130 | med,med,3,more,med,low,unacc 1131 | med,med,3,more,med,med,acc 1132 | med,med,3,more,med,high,vgood 1133 | med,med,3,more,big,low,unacc 1134 | med,med,3,more,big,med,acc 1135 | med,med,3,more,big,high,vgood 1136 | med,med,4,2,small,low,unacc 1137 | med,med,4,2,small,med,unacc 1138 | med,med,4,2,small,high,unacc 1139 | med,med,4,2,med,low,unacc 1140 | med,med,4,2,med,med,unacc 1141 | med,med,4,2,med,high,unacc 1142 | med,med,4,2,big,low,unacc 1143 | med,med,4,2,big,med,unacc 1144 | med,med,4,2,big,high,unacc 1145 | med,med,4,4,small,low,unacc 1146 | med,med,4,4,small,med,acc 1147 | med,med,4,4,small,high,acc 1148 | med,med,4,4,med,low,unacc 1149 | med,med,4,4,med,med,acc 1150 | med,med,4,4,med,high,vgood 1151 | med,med,4,4,big,low,unacc 1152 | med,med,4,4,big,med,acc 1153 | med,med,4,4,big,high,vgood 1154 | med,med,4,more,small,low,unacc 1155 | med,med,4,more,small,med,acc 1156 | med,med,4,more,small,high,acc 1157 | med,med,4,more,med,low,unacc 1158 | med,med,4,more,med,med,acc 1159 | med,med,4,more,med,high,vgood 1160 | med,med,4,more,big,low,unacc 1161 | med,med,4,more,big,med,acc 1162 | med,med,4,more,big,high,vgood 1163 | med,med,5more,2,small,low,unacc 1164 | med,med,5more,2,small,med,unacc 1165 | med,med,5more,2,small,high,unacc 1166 | med,med,5more,2,med,low,unacc 1167 | med,med,5more,2,med,med,unacc 1168 | med,med,5more,2,med,high,unacc 1169 | med,med,5more,2,big,low,unacc 1170 | med,med,5more,2,big,med,unacc 1171 | med,med,5more,2,big,high,unacc 1172 | med,med,5more,4,small,low,unacc 1173 | med,med,5more,4,small,med,acc 1174 | med,med,5more,4,small,high,acc 1175 | med,med,5more,4,med,low,unacc 1176 | med,med,5more,4,med,med,acc 1177 | med,med,5more,4,med,high,vgood 1178 | med,med,5more,4,big,low,unacc 1179 | med,med,5more,4,big,med,acc 1180 | med,med,5more,4,big,high,vgood 1181 | med,med,5more,more,small,low,unacc 1182 | med,med,5more,more,small,med,acc 1183 | med,med,5more,more,small,high,acc 1184 | med,med,5more,more,med,low,unacc 1185 | med,med,5more,more,med,med,acc 1186 | med,med,5more,more,med,high,vgood 1187 | med,med,5more,more,big,low,unacc 1188 | med,med,5more,more,big,med,acc 1189 | med,med,5more,more,big,high,vgood 1190 | med,low,2,2,small,low,unacc 1191 | med,low,2,2,small,med,unacc 1192 | med,low,2,2,small,high,unacc 1193 | med,low,2,2,med,low,unacc 1194 | med,low,2,2,med,med,unacc 1195 | med,low,2,2,med,high,unacc 1196 | med,low,2,2,big,low,unacc 1197 | med,low,2,2,big,med,unacc 1198 | med,low,2,2,big,high,unacc 1199 | med,low,2,4,small,low,unacc 1200 | med,low,2,4,small,med,acc 1201 | med,low,2,4,small,high,good 1202 | med,low,2,4,med,low,unacc 1203 | med,low,2,4,med,med,acc 1204 | med,low,2,4,med,high,good 1205 | med,low,2,4,big,low,unacc 1206 | med,low,2,4,big,med,good 1207 | med,low,2,4,big,high,vgood 1208 | med,low,2,more,small,low,unacc 1209 | med,low,2,more,small,med,unacc 1210 | med,low,2,more,small,high,unacc 1211 | med,low,2,more,med,low,unacc 1212 | med,low,2,more,med,med,acc 1213 | med,low,2,more,med,high,good 1214 | med,low,2,more,big,low,unacc 1215 | med,low,2,more,big,med,good 1216 | med,low,2,more,big,high,vgood 1217 | med,low,3,2,small,low,unacc 1218 | med,low,3,2,small,med,unacc 1219 | med,low,3,2,small,high,unacc 1220 | med,low,3,2,med,low,unacc 1221 | med,low,3,2,med,med,unacc 1222 | med,low,3,2,med,high,unacc 1223 | med,low,3,2,big,low,unacc 1224 | med,low,3,2,big,med,unacc 1225 | med,low,3,2,big,high,unacc 1226 | med,low,3,4,small,low,unacc 1227 | med,low,3,4,small,med,acc 1228 | med,low,3,4,small,high,good 1229 | med,low,3,4,med,low,unacc 1230 | med,low,3,4,med,med,acc 1231 | med,low,3,4,med,high,good 1232 | med,low,3,4,big,low,unacc 1233 | med,low,3,4,big,med,good 1234 | med,low,3,4,big,high,vgood 1235 | med,low,3,more,small,low,unacc 1236 | med,low,3,more,small,med,acc 1237 | med,low,3,more,small,high,good 1238 | med,low,3,more,med,low,unacc 1239 | med,low,3,more,med,med,good 1240 | med,low,3,more,med,high,vgood 1241 | med,low,3,more,big,low,unacc 1242 | med,low,3,more,big,med,good 1243 | med,low,3,more,big,high,vgood 1244 | med,low,4,2,small,low,unacc 1245 | med,low,4,2,small,med,unacc 1246 | med,low,4,2,small,high,unacc 1247 | med,low,4,2,med,low,unacc 1248 | med,low,4,2,med,med,unacc 1249 | med,low,4,2,med,high,unacc 1250 | med,low,4,2,big,low,unacc 1251 | med,low,4,2,big,med,unacc 1252 | med,low,4,2,big,high,unacc 1253 | med,low,4,4,small,low,unacc 1254 | med,low,4,4,small,med,acc 1255 | med,low,4,4,small,high,good 1256 | med,low,4,4,med,low,unacc 1257 | med,low,4,4,med,med,good 1258 | med,low,4,4,med,high,vgood 1259 | med,low,4,4,big,low,unacc 1260 | med,low,4,4,big,med,good 1261 | med,low,4,4,big,high,vgood 1262 | med,low,4,more,small,low,unacc 1263 | med,low,4,more,small,med,acc 1264 | med,low,4,more,small,high,good 1265 | med,low,4,more,med,low,unacc 1266 | med,low,4,more,med,med,good 1267 | med,low,4,more,med,high,vgood 1268 | med,low,4,more,big,low,unacc 1269 | med,low,4,more,big,med,good 1270 | med,low,4,more,big,high,vgood 1271 | med,low,5more,2,small,low,unacc 1272 | med,low,5more,2,small,med,unacc 1273 | med,low,5more,2,small,high,unacc 1274 | med,low,5more,2,med,low,unacc 1275 | med,low,5more,2,med,med,unacc 1276 | med,low,5more,2,med,high,unacc 1277 | med,low,5more,2,big,low,unacc 1278 | med,low,5more,2,big,med,unacc 1279 | med,low,5more,2,big,high,unacc 1280 | med,low,5more,4,small,low,unacc 1281 | med,low,5more,4,small,med,acc 1282 | med,low,5more,4,small,high,good 1283 | med,low,5more,4,med,low,unacc 1284 | med,low,5more,4,med,med,good 1285 | med,low,5more,4,med,high,vgood 1286 | med,low,5more,4,big,low,unacc 1287 | med,low,5more,4,big,med,good 1288 | med,low,5more,4,big,high,vgood 1289 | med,low,5more,more,small,low,unacc 1290 | med,low,5more,more,small,med,acc 1291 | med,low,5more,more,small,high,good 1292 | med,low,5more,more,med,low,unacc 1293 | med,low,5more,more,med,med,good 1294 | med,low,5more,more,med,high,vgood 1295 | med,low,5more,more,big,low,unacc 1296 | med,low,5more,more,big,med,good 1297 | med,low,5more,more,big,high,vgood 1298 | low,vhigh,2,2,small,low,unacc 1299 | low,vhigh,2,2,small,med,unacc 1300 | low,vhigh,2,2,small,high,unacc 1301 | low,vhigh,2,2,med,low,unacc 1302 | low,vhigh,2,2,med,med,unacc 1303 | low,vhigh,2,2,med,high,unacc 1304 | low,vhigh,2,2,big,low,unacc 1305 | low,vhigh,2,2,big,med,unacc 1306 | low,vhigh,2,2,big,high,unacc 1307 | low,vhigh,2,4,small,low,unacc 1308 | low,vhigh,2,4,small,med,unacc 1309 | low,vhigh,2,4,small,high,acc 1310 | low,vhigh,2,4,med,low,unacc 1311 | low,vhigh,2,4,med,med,unacc 1312 | low,vhigh,2,4,med,high,acc 1313 | low,vhigh,2,4,big,low,unacc 1314 | low,vhigh,2,4,big,med,acc 1315 | low,vhigh,2,4,big,high,acc 1316 | low,vhigh,2,more,small,low,unacc 1317 | low,vhigh,2,more,small,med,unacc 1318 | low,vhigh,2,more,small,high,unacc 1319 | low,vhigh,2,more,med,low,unacc 1320 | low,vhigh,2,more,med,med,unacc 1321 | low,vhigh,2,more,med,high,acc 1322 | low,vhigh,2,more,big,low,unacc 1323 | low,vhigh,2,more,big,med,acc 1324 | low,vhigh,2,more,big,high,acc 1325 | low,vhigh,3,2,small,low,unacc 1326 | low,vhigh,3,2,small,med,unacc 1327 | low,vhigh,3,2,small,high,unacc 1328 | low,vhigh,3,2,med,low,unacc 1329 | low,vhigh,3,2,med,med,unacc 1330 | low,vhigh,3,2,med,high,unacc 1331 | low,vhigh,3,2,big,low,unacc 1332 | low,vhigh,3,2,big,med,unacc 1333 | low,vhigh,3,2,big,high,unacc 1334 | low,vhigh,3,4,small,low,unacc 1335 | low,vhigh,3,4,small,med,unacc 1336 | low,vhigh,3,4,small,high,acc 1337 | low,vhigh,3,4,med,low,unacc 1338 | low,vhigh,3,4,med,med,unacc 1339 | low,vhigh,3,4,med,high,acc 1340 | low,vhigh,3,more,med,low,unacc 1341 | low,vhigh,3,more,med,med,acc 1342 | low,vhigh,3,more,med,high,acc 1343 | low,vhigh,3,more,big,low,unacc 1344 | low,vhigh,3,more,big,med,acc 1345 | low,vhigh,3,more,big,high,acc 1346 | low,vhigh,4,2,small,low,unacc 1347 | low,vhigh,4,2,small,med,unacc 1348 | low,vhigh,4,2,small,high,unacc 1349 | low,vhigh,4,2,med,low,unacc 1350 | low,vhigh,4,2,med,med,unacc 1351 | low,vhigh,4,2,med,high,unacc 1352 | low,vhigh,4,2,big,low,unacc 1353 | low,vhigh,4,2,big,med,unacc 1354 | low,vhigh,4,2,big,high,unacc 1355 | low,vhigh,4,4,small,low,unacc 1356 | low,vhigh,4,4,small,med,unacc 1357 | low,vhigh,4,4,small,high,acc 1358 | low,vhigh,4,4,med,low,unacc 1359 | low,vhigh,4,4,med,med,acc 1360 | low,vhigh,4,4,med,high,acc 1361 | low,vhigh,4,4,big,low,unacc 1362 | low,vhigh,4,4,big,med,acc 1363 | low,vhigh,4,4,big,high,acc 1364 | low,vhigh,4,more,small,low,unacc 1365 | low,vhigh,4,more,small,med,unacc 1366 | low,vhigh,4,more,small,high,acc 1367 | low,vhigh,4,more,med,low,unacc 1368 | low,vhigh,4,more,med,med,acc 1369 | low,vhigh,4,more,med,high,acc 1370 | low,vhigh,4,more,big,low,unacc 1371 | low,vhigh,4,more,big,med,acc 1372 | low,vhigh,4,more,big,high,acc 1373 | low,vhigh,5more,2,small,low,unacc 1374 | low,vhigh,5more,2,small,med,unacc 1375 | low,vhigh,5more,2,small,high,unacc 1376 | low,vhigh,5more,2,med,low,unacc 1377 | low,vhigh,5more,2,med,med,unacc 1378 | low,vhigh,5more,2,med,high,unacc 1379 | low,vhigh,5more,2,big,low,unacc 1380 | low,vhigh,5more,2,big,med,unacc 1381 | low,vhigh,5more,4,big,low,unacc 1382 | low,vhigh,5more,4,big,med,acc 1383 | low,vhigh,5more,4,big,high,acc 1384 | low,vhigh,5more,more,small,low,unacc 1385 | low,vhigh,5more,more,small,med,unacc 1386 | low,vhigh,5more,more,small,high,acc 1387 | low,vhigh,5more,more,med,low,unacc 1388 | low,vhigh,5more,more,med,med,acc 1389 | low,vhigh,5more,more,med,high,acc 1390 | low,vhigh,5more,more,big,low,unacc 1391 | low,vhigh,5more,more,big,med,acc 1392 | low,vhigh,5more,more,big,high,acc 1393 | low,high,2,2,small,low,unacc 1394 | low,high,2,2,small,med,unacc 1395 | low,high,2,2,small,high,unacc 1396 | low,high,2,2,med,low,unacc 1397 | low,high,2,2,med,med,unacc 1398 | low,high,2,2,med,high,unacc 1399 | low,high,2,2,big,low,unacc 1400 | low,high,2,2,big,med,unacc 1401 | low,high,2,2,big,high,unacc 1402 | low,high,2,4,small,low,unacc 1403 | low,high,2,4,small,med,acc 1404 | low,high,2,4,small,high,acc 1405 | low,high,2,4,med,low,unacc 1406 | low,high,2,4,med,med,acc 1407 | low,high,2,4,med,high,acc 1408 | low,high,2,4,big,low,unacc 1409 | low,high,2,4,big,med,acc 1410 | low,high,2,4,big,high,vgood 1411 | low,high,2,more,small,low,unacc 1412 | low,high,2,more,small,med,unacc 1413 | low,high,2,more,small,high,unacc 1414 | low,high,2,more,med,low,unacc 1415 | low,high,2,more,med,med,acc 1416 | low,high,2,more,med,high,acc 1417 | low,high,2,more,big,low,unacc 1418 | low,high,2,more,big,med,acc 1419 | low,high,2,more,big,high,vgood 1420 | low,high,3,2,small,low,unacc 1421 | low,high,3,2,small,med,unacc 1422 | low,high,3,2,small,high,unacc 1423 | low,high,3,2,med,low,unacc 1424 | low,high,3,2,med,med,unacc 1425 | low,high,3,2,med,high,unacc 1426 | low,high,3,2,big,low,unacc 1427 | low,high,3,2,big,med,unacc 1428 | low,high,3,2,big,high,unacc 1429 | low,high,3,4,small,low,unacc 1430 | low,high,3,4,small,med,acc 1431 | low,high,3,4,small,high,acc 1432 | low,high,3,4,med,low,unacc 1433 | low,high,3,4,med,med,acc 1434 | low,high,3,4,med,high,acc 1435 | low,high,3,4,big,low,unacc 1436 | low,high,3,4,big,med,acc 1437 | low,high,3,4,big,high,vgood 1438 | low,high,3,more,small,low,unacc 1439 | low,high,3,more,small,med,acc 1440 | low,high,3,more,small,high,acc 1441 | low,high,3,more,med,low,unacc 1442 | low,high,3,more,med,med,acc 1443 | low,high,3,more,med,high,vgood 1444 | low,high,3,more,big,low,unacc 1445 | low,high,3,more,big,med,acc 1446 | low,high,3,more,big,high,vgood 1447 | low,high,4,2,small,low,unacc 1448 | low,high,4,2,small,med,unacc 1449 | low,high,4,2,small,high,unacc 1450 | low,high,4,2,med,low,unacc 1451 | low,high,4,2,med,med,unacc 1452 | low,high,4,2,med,high,unacc 1453 | low,high,4,2,big,low,unacc 1454 | low,high,4,2,big,med,unacc 1455 | low,high,4,2,big,high,unacc 1456 | low,high,4,4,small,low,unacc 1457 | low,high,4,4,small,med,acc 1458 | low,high,4,4,small,high,acc 1459 | low,high,4,4,med,low,unacc 1460 | low,high,4,4,med,med,acc 1461 | low,high,4,4,med,high,vgood 1462 | low,high,4,4,big,low,unacc 1463 | low,high,4,4,big,med,acc 1464 | low,high,4,4,big,high,vgood 1465 | low,high,4,more,small,low,unacc 1466 | low,high,4,more,small,med,acc 1467 | low,high,4,more,small,high,acc 1468 | low,high,4,more,med,low,unacc 1469 | low,high,4,more,med,med,acc 1470 | low,high,4,more,med,high,vgood 1471 | low,high,4,more,big,low,unacc 1472 | low,high,4,more,big,med,acc 1473 | low,high,4,more,big,high,vgood 1474 | low,high,5more,2,small,low,unacc 1475 | low,high,5more,2,small,med,unacc 1476 | low,high,5more,2,small,high,unacc 1477 | low,high,5more,2,med,low,unacc 1478 | low,high,5more,2,med,med,unacc 1479 | low,high,5more,2,med,high,unacc 1480 | low,high,5more,2,big,low,unacc 1481 | low,high,5more,2,big,med,unacc 1482 | low,high,5more,2,big,high,unacc 1483 | low,high,5more,4,small,low,unacc 1484 | low,high,5more,4,small,med,acc 1485 | low,high,5more,4,small,high,acc 1486 | low,high,5more,4,med,low,unacc 1487 | low,high,5more,4,med,med,acc 1488 | low,high,5more,4,med,high,vgood 1489 | low,high,5more,4,big,low,unacc 1490 | low,high,5more,4,big,med,acc 1491 | low,high,5more,4,big,high,vgood 1492 | low,high,5more,more,small,low,unacc 1493 | low,high,5more,more,small,med,acc 1494 | low,high,5more,more,small,high,acc 1495 | low,high,5more,more,med,low,unacc 1496 | low,high,5more,more,med,med,acc 1497 | low,high,5more,more,med,high,vgood 1498 | low,high,5more,more,big,low,unacc 1499 | low,high,5more,more,big,med,acc 1500 | low,high,5more,more,big,high,vgood 1501 | low,med,2,2,small,low,unacc 1502 | low,med,2,2,small,med,unacc 1503 | low,med,2,2,small,high,unacc 1504 | low,med,2,2,med,low,unacc 1505 | low,med,2,2,med,med,unacc 1506 | low,med,2,2,med,high,unacc 1507 | low,med,2,2,big,low,unacc 1508 | low,med,2,2,big,med,unacc 1509 | low,med,2,2,big,high,unacc 1510 | low,med,2,4,small,low,unacc 1511 | low,med,2,4,small,med,acc 1512 | low,med,2,4,small,high,good 1513 | low,med,2,4,med,low,unacc 1514 | low,med,2,4,med,med,acc 1515 | low,med,2,4,med,high,good 1516 | low,med,2,4,big,low,unacc 1517 | low,med,2,4,big,med,good 1518 | low,med,2,4,big,high,vgood 1519 | low,med,2,more,small,low,unacc 1520 | low,med,2,more,small,med,unacc 1521 | low,med,2,more,small,high,unacc 1522 | low,med,2,more,med,low,unacc 1523 | low,med,2,more,med,med,acc 1524 | low,med,2,more,med,high,good 1525 | low,med,2,more,big,low,unacc 1526 | low,med,2,more,big,med,good 1527 | low,med,2,more,big,high,vgood 1528 | low,med,3,2,small,low,unacc 1529 | low,med,3,2,small,med,unacc 1530 | low,med,3,2,small,high,unacc 1531 | low,med,3,2,med,low,unacc 1532 | low,med,3,2,med,med,unacc 1533 | low,med,3,2,med,high,unacc 1534 | low,med,3,2,big,low,unacc 1535 | low,med,3,2,big,med,unacc 1536 | low,med,3,2,big,high,unacc 1537 | low,med,3,4,small,low,unacc 1538 | low,med,3,4,small,med,acc 1539 | low,med,3,4,small,high,good 1540 | low,med,3,4,med,low,unacc 1541 | low,med,3,4,med,med,acc 1542 | low,med,3,4,med,high,good 1543 | low,med,3,4,big,low,unacc 1544 | low,med,3,4,big,med,good 1545 | low,med,3,4,big,high,vgood 1546 | low,med,3,more,small,low,unacc 1547 | low,med,3,more,small,med,acc 1548 | low,med,3,more,small,high,good 1549 | low,med,3,more,med,low,unacc 1550 | low,med,3,more,med,med,good 1551 | low,med,3,more,med,high,vgood 1552 | low,med,3,more,big,low,unacc 1553 | low,med,3,more,big,med,good 1554 | low,med,3,more,big,high,vgood 1555 | low,med,4,2,small,low,unacc 1556 | low,med,4,2,small,med,unacc 1557 | low,med,4,2,small,high,unacc 1558 | low,med,4,2,med,low,unacc 1559 | low,med,4,2,med,med,unacc 1560 | low,med,4,2,med,high,unacc 1561 | low,med,4,2,big,low,unacc 1562 | low,med,4,2,big,med,unacc 1563 | low,med,4,2,big,high,unacc 1564 | low,med,4,4,small,low,unacc 1565 | low,med,4,4,small,med,acc 1566 | low,med,4,4,small,high,good 1567 | low,med,4,4,med,low,unacc 1568 | low,med,4,4,med,med,good 1569 | low,med,4,4,med,high,vgood 1570 | low,med,4,4,big,low,unacc 1571 | low,med,4,4,big,med,good 1572 | low,med,4,4,big,high,vgood 1573 | low,med,4,more,small,low,unacc 1574 | low,med,4,more,small,med,acc 1575 | low,med,5more,2,small,med,unacc 1576 | low,med,5more,2,small,high,unacc 1577 | low,med,5more,2,med,low,unacc 1578 | low,med,5more,2,med,med,unacc 1579 | low,med,5more,2,med,high,unacc 1580 | low,med,5more,2,big,low,unacc 1581 | low,med,5more,2,big,med,unacc 1582 | low,med,5more,2,big,high,unacc 1583 | low,med,5more,4,small,low,unacc 1584 | low,med,5more,4,small,med,acc 1585 | low,med,5more,4,small,high,good 1586 | low,med,5more,4,med,low,unacc 1587 | low,med,5more,4,med,med,good 1588 | low,med,5more,4,med,high,vgood 1589 | low,med,5more,4,big,low,unacc 1590 | low,med,5more,4,big,med,good 1591 | low,med,5more,4,big,high,vgood 1592 | low,med,5more,more,small,low,unacc 1593 | low,med,5more,more,small,med,acc 1594 | low,med,5more,more,small,high,good 1595 | low,med,5more,more,med,low,unacc 1596 | low,med,5more,more,med,med,good 1597 | low,med,5more,more,med,high,vgood 1598 | low,med,5more,more,big,low,unacc 1599 | low,med,5more,more,big,med,good 1600 | low,med,5more,more,big,high,vgood 1601 | low,low,2,2,small,low,unacc 1602 | low,low,2,2,small,med,unacc 1603 | low,low,2,2,small,high,unacc 1604 | low,low,2,2,med,low,unacc 1605 | low,low,2,2,med,med,unacc 1606 | low,low,2,2,med,high,unacc 1607 | low,low,2,2,big,low,unacc 1608 | low,low,2,2,big,med,unacc 1609 | low,low,2,2,big,high,unacc 1610 | low,low,2,4,small,low,unacc 1611 | low,low,2,more,small,low,unacc 1612 | low,low,2,more,small,med,unacc 1613 | low,low,2,more,small,high,unacc 1614 | low,low,2,more,med,low,unacc 1615 | low,low,2,more,med,med,acc 1616 | low,low,3,2,big,low,unacc 1617 | low,low,3,2,big,med,unacc 1618 | low,low,3,2,big,high,unacc 1619 | low,low,3,4,small,low,unacc 1620 | low,low,3,4,small,high,good 1621 | low,low,3,4,med,low,unacc 1622 | low,low,3,4,med,high,good 1623 | low,low,3,4,big,low,unacc 1624 | low,low,3,4,big,med,good 1625 | low,low,3,4,big,high,vgood 1626 | low,low,3,more,small,low,unacc 1627 | low,low,3,more,small,high,good 1628 | low,low,3,more,med,low,unacc 1629 | low,low,3,more,med,med,good 1630 | low,low,3,more,med,high,vgood 1631 | low,low,3,more,big,low,unacc 1632 | low,low,3,more,big,med,good 1633 | low,low,3,more,big,high,vgood 1634 | low,low,4,2,small,low,unacc 1635 | low,low,4,2,small,med,unacc 1636 | low,low,4,2,small,high,unacc 1637 | low,low,4,2,med,low,unacc 1638 | low,low,4,2,med,med,unacc 1639 | low,low,4,2,med,high,unacc 1640 | low,low,4,2,big,low,unacc 1641 | low,low,4,2,big,med,unacc 1642 | low,low,4,2,big,high,unacc 1643 | low,low,4,4,small,low,unacc 1644 | low,low,4,4,small,med,acc 1645 | low,low,4,4,small,high,good 1646 | low,low,4,4,med,low,unacc 1647 | low,low,4,4,med,med,good 1648 | low,low,4,4,med,high,vgood 1649 | low,low,4,4,big,low,unacc 1650 | low,low,4,4,big,med,good 1651 | low,low,4,4,big,high,vgood 1652 | low,low,4,more,small,low,unacc 1653 | low,low,4,more,small,med,acc 1654 | low,low,4,more,small,high,good 1655 | low,low,4,more,med,low,unacc 1656 | low,low,4,more,med,med,good 1657 | low,low,4,more,med,high,vgood 1658 | low,low,4,more,big,low,unacc 1659 | low,low,4,more,big,med,good 1660 | low,low,4,more,big,high,vgood 1661 | low,low,5more,2,small,low,unacc 1662 | low,low,5more,2,med,low,unacc 1663 | low,low,5more,2,med,med,unacc 1664 | low,low,5more,2,med,high,unacc 1665 | low,low,5more,2,big,low,unacc 1666 | low,low,5more,2,big,med,unacc 1667 | low,low,5more,2,big,high,unacc 1668 | low,low,5more,4,small,low,unacc 1669 | low,low,5more,4,small,med,acc 1670 | low,low,5more,4,small,high,good 1671 | low,low,5more,4,med,low,unacc 1672 | low,low,5more,4,med,med,good 1673 | low,low,5more,4,med,high,vgood 1674 | low,low,5more,4,big,low,unacc 1675 | low,low,5more,4,big,med,good 1676 | low,low,5more,4,big,high,vgood 1677 | low,low,5more,more,small,low,unacc 1678 | low,low,5more,more,small,med,acc 1679 | low,low,5more,more,small,high,good 1680 | low,vhigh,5more,2,big,high,unacc 1681 | low,vhigh,5more,4,small,low,unacc 1682 | low,vhigh,5more,4,small,med,unacc 1683 | low,low,3,2,med,high,unacc 1684 | low,low,3,4,med,med,acc 1685 | low,vhigh,5more,4,small,high,acc 1686 | low,vhigh,5more,4,med,low,unacc 1687 | low,low,5more,2,small,high,unacc 1688 | low,low,3,more,small,med,acc 1689 | low,low,3,4,small,med,acc 1690 | low,low,2,more,med,high,good 1691 | low,low,3,2,small,low,unacc 1692 | low,low,3,2,small,med,unacc 1693 | low,low,3,2,small,high,unacc 1694 | low,low,3,2,med,low,unacc 1695 | low,low,5more,more,big,low,unacc 1696 | low,low,5more,2,small,med,unacc 1697 | low,low,2,more,big,low,unacc 1698 | low,vhigh,3,more,small,low,unacc 1699 | low,vhigh,3,more,small,med,unacc 1700 | low,med,4,more,big,low,unacc 1701 | low,med,4,more,big,med,good 1702 | -------------------------------------------------------------------------------- /DistanceDiscri/readme.md: -------------------------------------------------------------------------------- 1 | # DistanceDiscri距离判别模块 2 | 3 | DistanceDiscri距离判别模块包含距离判别算法的实现。 4 | 5 | ## 1. 引用头文件"DistanceDiscri.py" 6 | import DistanceDiscri 7 | 8 | ## 2. 创建一个DistanceDiscri对象 9 | > 0. 函数原型 10 | 11 | def __init__(self) 12 | 13 | > 1. 创建DistanceDiscri对象无需任何参数。 14 | 15 | dcr=DistanceDiscri.DistanceDiscri() 16 | 17 | ## 3. 使用训练集进行训练 18 | > 0. 函数原型 19 | 20 | def train(self, *data, rowvar=True, label=[]) 21 | 22 | > 1. 使用train成员方法根据训练集进行训练,该方法从训练集中计算出各个类别的协方差矩阵和均值向量(重心)。 23 | > 2. 第一个参数\*data为不定参数,为不定数目的各个类别的训练集,类型为np,array。这些类别分别被编号为0,1,... 24 | > 3. 第二个参数rowvar指定每行或者每列代表一个变量,类型为bool。rowvar=True指定参数data的每行作为一个变量,每列作为一个样本向量;rowvar=False指定参数data的每列作为一个变量,每行作为一个样本向量。默认值为True。 25 | > 4. 第三个参数label指定各个类别所对应的类别名称,类型为list,list中的元素类型为str。该名称必须与各个类别的训练集在不定参数\*data的顺序一一对应。 26 | 27 | # 训练集 28 | train_0=np.array([[0.1, 0.2, 0.3],[0.4, 0.5, 0.6],[0.7, 0.8, 0.9]]) 29 | train_1=np.array([[1, 2, 3],[4, 5, 6],[7, 8, 9]]) 30 | train_2=np.array([[10, 20, 30],[40, 50, 60],[70, 80, 90]]) 31 | train_3=np.array([[100, 200, 300],[400, 500, 600],[700, 800, 900]]) 32 | 33 | dcr.train(train_0, train_1, train_2, train_3, rowvar=False, label=['类别A','类别B','类别C','类别D']) 34 | 35 | ## 4. 对测试集进行距离判别得到测试集样本所属的类别 36 | > 0. 函数原型 37 | 38 | def discriminate(self, data, rowvar=True) 39 | 40 | > 1. 使用discriminate成员方法判别测试集中的每个样本向量所属的类别,该方法必须在训练(即调用train成员方法)后使用。 41 | > 2. 第一个参数data为测试集样本矩阵,类型为np.array。 42 | > 3. 第二个参数rowvar指定每行或者每列代表一个变量,类型为bool。rowvar=True指定参数data的每行作为一个变量,每列作为一个样本向量;rowvar=False指定参数data的每列作为一个变量,每行作为一个样本向量。默认值为True。 43 | > 4. 各个样本的分类结果res,类型为list。其中res\[i]为第i个样本所属的类别标签。 44 | 45 | test_data=np.array([[7, 5.8, 9.2],[0.02, 0.14, 0.86],[87, 16, 5]]) 46 | res=dcr.discriminate(test_data,rowvar=False) 47 | 48 | ## 附注: 49 | > 1. example文件夹中展示了使用该模块解决的,基于汽车评价数据集的一个微型的距离判别分析案例,具体请参见example文件夹下的readme.md说明文档。 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | -------------------------------------------------------------------------------- /FacAnaly/FacAnaly.py: -------------------------------------------------------------------------------- 1 | # coding = utf-8 2 | 3 | import numpy as np 4 | from sklearn import preprocessing 5 | 6 | class FacAnaly: 7 | 8 | def __init__(self, n_components=2): 9 | ''' 10 | : __init: 初始化方法 11 | ''' 12 | pass 13 | 14 | def analy(self, data, rowvar=True, n_components=2): 15 | ''' 16 | : analy: 进行因子分析,计算得到因子载荷矩阵 17 | : param data: 样本集矩阵 18 | : type data: np.array 19 | : param rowvar: 指定样本集的矩阵的每行或每列代表一个变量,rowvar=True指定样本集矩阵的每一行代表一个向量,rowvar=False指定样本集矩阵的每一列代表一个向量 20 | : type rowvar: bool 21 | : param n_components: 保留的因子数 22 | : type n_components: int 23 | : return: 因子分析结果,即因子载荷矩阵,因子载荷矩阵的每一列代表一个特征,每一行代表一个样本,例如矩阵元素(x,y)表示在样本x中特征y的因子权重 24 | : rtype: np.array 25 | ''' 26 | # 1. 首先将样本集矩阵变换为rowvar=False的情况,即每一列代表一个特征,每一行代表一个样本 27 | if rowvar==True: 28 | data=data.T 29 | 30 | # 2. 然后对样本集矩阵进行标准化,标准化为正态分布,这时均值为0,方差为1 31 | data = preprocessing.scale(data, axis = 1) # axis=1指定按行标准化 32 | 33 | # 3. 计算标准化后的矩阵的相关系数矩阵 34 | corr = np.corrcoef(data) 35 | print(corr.shape) 36 | #print(corr) 37 | 38 | # 4. 计算相关矩阵R的特征根和特征向量 39 | root, vec = np.linalg.eig(corr) 40 | dic={} #将特征值和对应的特征值相对应 41 | for i in range(len(root)): 42 | dic[root[i]]=vec[:,i] #附注:此处容易处理错误,使用库函数时必须严格查阅在线手册,每一个特征和vec中的一个列对应,而不是与一个行对应,不要想当然地处理参数 43 | 44 | # 5. 生成载荷矩阵并作为结果返回 45 | root = sorted(root, reverse = True) 46 | #print(root) 47 | res=[] 48 | for i in range(n_components): 49 | res.append((-np.sqrt(root[i]))*dic[root[i]]) #附注: 按照因子分析的课件中的公式应该是math.sqrt(root[i])*dic[root[i]],之所以加负号取相反数,是因为特征向量取反仍然是特征向量,而不取反,结果会和课本的结果相差一个负号 50 | res = np.array(res).T 51 | 52 | return res 53 | 54 | 55 | 56 | 57 | -------------------------------------------------------------------------------- /FacAnaly/data.csv: -------------------------------------------------------------------------------- 1 | 城市,x1,x2,x3,x4,x5,x6,x7,x8,x9,x10,x11,x12 2 | 北 京 ,830.8,38103630,30671.14,127.4,5925388,64413910,434.15,10989365,15,17.3,8.56,44.94 3 | 天 津 ,549.74,40496103,34679,15.38,2045295,18253200,174.5,3254148,18,7.99,7.23,17.45 4 | 石 家庄 ,331.33,11981505,10008.48,8.07,493429,10444919,86.74,1067432,18,7.23,8.28,21.56 5 | 太 原 ,222.63,5183200,15248.11,2.43,333473,6601300,74.55,945212,16,5.06,7.88,20.58 6 | 呼和浩特 ,97.81,2407794,4155.1,2,205779,2554496,28.9,407963,18,3.81,8.92,26.58 7 | 沈 阳 ,440.6,10643612,14635.74,7.3,810889,14229575,101.7,1521548,15,9.32,6.7,28.36 8 | 长 春 ,313.05,15115270,10891.98,6.94,459709,8313564,89.7,1244167,15,11.87,7.03,18.75 9 | 哈 尔 滨 ,454.52,7215089,9517.8,24.99,763600,11536951,168.83,2102165,14,12.75,6.34,18.51 10 | 上 海 ,1041.39,1.03E+08,63861,35.22,8992850,60546000,281.51,7686511,19,14.57,12.92,19.11 11 | 南 京 ,391.67,25093816,14804.68,7.62,1364788,11336202,87.91,1950742,16,9.06,12.13,136.72 12 | 杭 州 ,263.67,32025226,16815.2,8.36,1503888,14664200,75.72,1867776,17,8.93,6.5,23.19 13 | 合 肥 ,160.18,5348605,4640.84,3.39,358694,3592488,37.88,526577,17,14.11,15.72,28.74 14 | 福 州 ,205.43,12889573,8250.39,4.69,674522,8762245,71.3,1073262,18,9.65,7.9,31.6 15 | 南 昌 ,195.46,4149169,4454.45,3.62,314094,4828029,49.79,692717,17,7.37,7.67,23.98 16 | 济 南 ,297.21,13185425,14354.4,6.6,761054,7583525,78.38,1256160,19,7.77,10.62,19.54 17 | 郑 州 ,249.72,9270494,7846.91,8.77,658737,10484859,83.99,1137056,19,10.11,7.63,17.77 18 | 武 汉 ,474.98,13344938,16610.34,13.58,804368,12855341,136.08,1868350,17,6.87,4.16,8.34 19 | 长 沙 ,205.83,5339304,10630.5,6.31,598930,7048500,60.04,1019924,18,10.09,9.1,29.1 20 | 广 州 ,493.32,40178324,28859.45,21.47,2747707,37273276,182.16,5247087,17,11.16,12.76,178.76 21 | 南 宁 ,167.99,2083763,5893.09,4.95,362435,4514961,50.79,668976,18,9.91,9.32,35.12 22 | 海 口 ,76.05,2025643,3304.4,2.72,122541,2843664,22.97,340392,20,5.09,7.07,15.79 23 | 成 都 ,386.23,9700976,28798.2,8.06,895752,14944197,124.03,1894496,17,8.95,10.17,25.59 24 | 贵 阳 ,165.27,3569419,5317.55,5.75,403855,3449487,54.53,664234,16,9.37,3.11,105.35 25 | 昆 明 ,205.34,5809573,12337.86,7.07,601101,7085278,73.34,1045469,15,15.33,4.49,23.33 26 | 西 安 ,312.88,6386627,9392,12.21,648037,12105607,113.73,1535896,15,7.32,4.48,8.82 27 | 兰 州 ,175.54,5215490,5580.8,3.7,205660,4683830,54.91,740661,15,10.33,6.3,11.22 28 | 西 宁 ,105.13,1148959,2037.15,1.24,84397,1749293,20.6,301364,17,11.47,4.92,14.2 29 | 银 川 ,79.2,1464867,2127.17,1.65,122605,1930771,29.12,393035,15,9.26,10.43,40.21 30 | 乌鲁木齐 ,142.94,3110943,12754.02,3.94,409119,4203000,47.42,782873,19,22.89,6.49,20.53 31 | 大 连 ,297.48,15468641,21081.47,6.6,1105405,13101986,82.13,1442215,14,13.79,6.24,40.21 32 | 宁 波 ,168.81,26302862,13797.38,4.8,1394162,10596339,59.88,1418635,17,9.88,6.81,17.65 33 | 厦 门 ,83.74,13201500,3054.82,2.83,701456,3971559,54.78,1042111,20,15.5,8.15,26.44 34 | 青 岛 ,329.96,25588695,30552.6,6.72,1201398,9084693,104.55,1603305,15,14.78,11.41,35.78 35 | 深 圳 ,122.39,52451037,6792.66,10.84,2908370,21994500,104.98,3259900,21,114.91,47.29,177.62 36 | 重 庆 ,753.92,15889928,32450.2,12.83,1615618,18965569,203.79,2535070,21,4.94,4.24,10.8 37 | -------------------------------------------------------------------------------- /FacAnaly/example.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import FacAnaly as fa 3 | import pandas as pd 4 | import math 5 | import sys 6 | 7 | # 1. 按列读取原始数据 8 | np.set_printoptions(suppress = True) 9 | data = pd.read_csv(sys.path[0]+'\\data.csv') 10 | del data['城市'] # 删除无效的城市名一列 11 | col_data = np.array([data['x'+str(i)].values for i in range(1,13)]) # 读取各个特征所在的列 12 | 13 | data=col_data 14 | 15 | fa=fa.FacAnaly() 16 | 17 | print("m = 12的载荷矩阵") 18 | print(fa.analy(data,rowvar=False,n_components=12)) 19 | 20 | print("m = 3的载荷矩阵") 21 | print(fa.analy(data,rowvar=False,n_components=3)) 22 | 23 | print("m = 4的载荷矩阵") 24 | print(fa.analy(data,rowvar=False,n_components=4)) 25 | 26 | print("m = 5的载荷矩阵") 27 | print(fa.analy(data,rowvar=False,n_components=5)) 28 | 29 | -------------------------------------------------------------------------------- /FacAnaly/pics/QQ截图20190417005321.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Happyxianyueveryday/statslibrary/01494043bc7fb82d4aa6d7d550a4e7dc2ac0503a/FacAnaly/pics/QQ截图20190417005321.png -------------------------------------------------------------------------------- /FacAnaly/pics/QQ截图20190417005816.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Happyxianyueveryday/statslibrary/01494043bc7fb82d4aa6d7d550a4e7dc2ac0503a/FacAnaly/pics/QQ截图20190417005816.png -------------------------------------------------------------------------------- /FacAnaly/pics/QQ截图20190417095029.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Happyxianyueveryday/statslibrary/01494043bc7fb82d4aa6d7d550a4e7dc2ac0503a/FacAnaly/pics/QQ截图20190417095029.png -------------------------------------------------------------------------------- /FacAnaly/pics/QQ截图20190417095125.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Happyxianyueveryday/statslibrary/01494043bc7fb82d4aa6d7d550a4e7dc2ac0503a/FacAnaly/pics/QQ截图20190417095125.png -------------------------------------------------------------------------------- /FacAnaly/pics/QQ截图20190417101717.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Happyxianyueveryday/statslibrary/01494043bc7fb82d4aa6d7d550a4e7dc2ac0503a/FacAnaly/pics/QQ截图20190417101717.png -------------------------------------------------------------------------------- /FacAnaly/pics/QQ截图20190417101728.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Happyxianyueveryday/statslibrary/01494043bc7fb82d4aa6d7d550a4e7dc2ac0503a/FacAnaly/pics/QQ截图20190417101728.png -------------------------------------------------------------------------------- /FacAnaly/pics/readme.md: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /FacAnaly/readme.md: -------------------------------------------------------------------------------- 1 | # FacAnaly因子分析模块 2 | 3 | FacAnaly因子分析模块包含统计分析算法中的因子分析算法。 4 | 5 | ## 1. 引用头文件"FacAnaly.py" 6 | import FacAnaly as fa 7 | 8 | ## 2. 创建FacAnaly对象 9 | > 1. 创建FacAnaly对象不需要提供任何参数。 10 | 11 | fa=fa.FacAnaly() 12 | 13 | ## 3. 进行因子分析 14 | > 0. 函数原型 15 | 16 | def analy(self, data, rowvar=True, n_components=2): 17 | 18 | > 1. 使用analy成员方法进行因子分析。 19 | > 2. 该成员方法的第一个参数data为样本集矩阵。 20 | > 3. 该成员方法的第二个参数rowvar指定样本集矩阵data的每行或者每列作为特征,rowvar=True指定样本集矩阵data的每一行作为一个特征,每一列作为一个样本;rowvar=False指定样本集矩阵的每一列作为一个特征,每一行作为一个样本。 21 | > 4. 该成员方法的第三个参数n_components=2指定保留的主要的因子个数。 22 | > 5. 返回值为因子分析的结果,即因子载荷矩阵,因子载荷矩阵的每一列代表一个特征,每一行代表一个样本,例如矩阵中坐标为(x,y)的元素表示在样本x中特征y的因子权重。 23 | 24 | data=np.array([[1,2,3],[4,5,6]]) 25 | fa=fa.FacAnaly() 26 | print(fa.analy(data,rowvar=False,n_components=2)) 27 | 28 | 29 | ## 示例代码:城市经济数据的因子分析 30 | 考虑如下的城市经济数据。 31 | ![avatar](https://github.com/Happyxianyueveryday/statslibrary/blob/master/FacAnaly/pics/QQ%E6%88%AA%E5%9B%BE20190417095029.png) 32 | ![avatar](https://github.com/Happyxianyueveryday/statslibrary/blob/master/FacAnaly/pics/QQ%E6%88%AA%E5%9B%BE20190417095125.png) 33 | 34 | 我们对上述的城市经济数据进行因子分析,计算出因子载荷矩阵,从而判断每个特征的影响的大小。 35 | 36 | ``` 37 | import numpy as np 38 | import FacAnaly as fa 39 | import pandas as pd 40 | import math 41 | import sys 42 | 43 | # 1. 按列读取原始数据 44 | np.set_printoptions(suppress = True) 45 | data = pd.read_csv(sys.path[0]+'\\data.csv') 46 | del data['城市'] # 删除无效的城市名一列 47 | col_data = np.array([data['x'+str(i)].values for i in range(1,13)]) # 读取各个特征所在的列 48 | 49 | data=col_data 50 | 51 | fa=fa.FacAnaly() 52 | 53 | print("m = 12的载荷矩阵") 54 | print(fa.analy(data,rowvar=False,n_components=12)) 55 | 56 | print("m = 3的载荷矩阵") 57 | print(fa.analy(data,rowvar=False,n_components=3)) 58 | 59 | print("m = 4的载荷矩阵") 60 | print(fa.analy(data,rowvar=False,n_components=4)) 61 | 62 | print("m = 5的载荷矩阵") 63 | print(fa.analy(data,rowvar=False,n_components=5)) 64 | ``` 65 | 66 | 分析结果如下所示。 67 | 68 | + m = 12时的载荷矩阵 69 | ![avatar](https://github.com/Happyxianyueveryday/statslibrary/blob/master/FacAnaly/pics/QQ%E6%88%AA%E5%9B%BE20190417005816.png) 70 | 71 | + m = 3,4,5时的载荷矩阵 72 | ![avatar](https://github.com/Happyxianyueveryday/statslibrary/blob/master/FacAnaly/pics/QQ%E6%88%AA%E5%9B%BE20190417005321.png) 73 | ![avatar](https://github.com/Happyxianyueveryday/statslibrary/blob/master/FacAnaly/pics/QQ%E6%88%AA%E5%9B%BE20190417005816.png) 74 | 75 | 76 | 77 | 78 | -------------------------------------------------------------------------------- /FisherDiscri/FisherDiscri.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | def LoadFile(filename): #load input file containing training data 4 | lines = open(filename, "rb") 5 | dataset =[] 6 | for line in lines: 7 | line = line.strip().split(',') 8 | dataset.append(line) 9 | dataset = np.array(dataset).astype(np.float64) 10 | return dataset 11 | 12 | def ByClass(dataset): #separate data by class 13 | classes = np.unique(dataset[:,-1]) 14 | div_class = {} 15 | for i in classes: 16 | div_class[i] = dataset[dataset[:,-1] == i] 17 | return div_class 18 | 19 | def Mean(data): 20 | mean = data.mean(axis = 0) 21 | return mean 22 | 23 | def Tresh(vector, data1, data2): 24 | mu1 = Mean(np.dot(vector, data1.T)) 25 | mu2 = Mean(np.dot(vector, data2.T)) 26 | return (mu1+mu2)/2, mu1, mu2 27 | 28 | 29 | def main(dataset): #assuming given two class problem 30 | div_data = ByClass(dataset) 31 | class1, class2 = div_data 32 | class1_data, class2_data = div_data[class1], div_data[class2] 33 | class1_data = class1_data[:,:-1] 34 | class2_data = class2_data[:,:-1] 35 | mean1 = Mean(class1_data) 36 | mean2 = Mean(class2_data) 37 | mean = Mean(dataset[:,:-1]) 38 | mean1, mean2, mean = mean1.T, mean2.T, mean.T 39 | 40 | 41 | m,n = class1_data.shape 42 | diff1 = class1_data - np.array(list(mean1)*m).reshape(m,n) 43 | m,n = class2_data.shape 44 | diff2 = class2_data - np.array(list(mean2)*m).reshape(m,n) 45 | diff = np.concatenate([diff1, diff2]) 46 | m, n = diff.shape 47 | withinClass = np.zeros((n,n)) 48 | diff = np.matrix(diff) 49 | for i in xrange(m): 50 | withinClass += np.dot(diff[i,:].T, diff[i,:]) 51 | opt_dir_vector = np.dot(np.linalg.inv(withinClass), (mean1 - mean2)) 52 | print 'Vector = ', np.matrix(opt_dir_vector).T 53 | 54 | threshold, mu1, mu2 = Tresh(opt_dir_vector, class1_data, class2_data) 55 | print 'Threshold = ', threshold, 'm1 = ', mu1, 'm2 = ', mu2 56 | 57 | 58 | if __name__ == '__main__': 59 | filename = 'data.txt' 60 | dataset = LoadFile(filename) 61 | main(dataset) 62 | -------------------------------------------------------------------------------- /FisherDiscri/readme.md: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /GeneralStats/GeneralStats.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import math as mt 3 | 4 | class GeneralStats: 5 | 6 | def average(self, data, rowvar=True): 7 | ''' 8 | :average: 求解样本的平均数 9 | :param data: 样本集 10 | :type data: np.array 11 | :param rowvar: 指定每一行或者每一列作为样本向量;rowvar=True指定每一列作为一个样本向量,也即每一行代表一个变量;rowvar=False指定每一行作为一个样本向量,也即每一列代表一个变量 12 | :type rowvar: bool 13 | :return: 各个变量的平均数组成的向量 14 | :rtype: np.array 15 | ''' 16 | # 1. 统一变换为rowvar==False的情况,即每一列代表一个变量,每一行代表一个样本向量 17 | if rowvar==True: 18 | data=data.T 19 | 20 | # 2. 特别处理一维数组的情况 21 | if data.ndim==1: 22 | return np.array([np.sum(data)/np.shape(data)[0]]) 23 | 24 | # 3. 各个样本向量进行求和 25 | size=np.shape(data)[1] 26 | count=np.shape(data)[0] 27 | add=np.zeros((1,size)) 28 | for i in range(count): 29 | add=np.add(add,data[i]) 30 | 31 | # 4. 求解平均向量 32 | res=np.divide(add,count) 33 | return res 34 | 35 | def median(self, data, rowvar=True): 36 | ''' 37 | :median: 求解样本的中位数 38 | :param data: 样本集 39 | :type data: np.array 40 | :param rowvar: 指定每一行或者每一列作为样本向量;rowvar=True指定每一列作为一个样本向量,也即每一行代表一个变量;rowvar=False指定每一行作为一个样本向量,也即每一列代表一个变量 41 | :type rowvar: bool 42 | :return: 各个变量的中位数组成的向量 43 | :rtype: np.array 44 | ''' 45 | # 1. 统一变换为rowvar==True的情况,即每一行代表一个变量,每一列代表一个样本向量 46 | if rowvar==False: 47 | data=data.T 48 | 49 | # 2. 特别处理一维数组的情况 50 | if data.ndim==1: 51 | count=np.shape(data)[0] 52 | data=np.sort(data) 53 | if count%2: 54 | return np.array([data[mt.floor(count/2)]]) 55 | else: 56 | return np.array([(data[mt.floor(count/2)]+data[mt.floor(count/2)-1])/2.0]) 57 | 58 | # 3. 通过排序生成中位数 59 | size=np.shape(data)[0] 60 | count=np.shape(data)[1] 61 | for i in range(size): 62 | data[i]=np.sort(data[i]) 63 | 64 | res=np.zeros((1,size)) 65 | 66 | if count%2: 67 | for i in range(size): 68 | res[:,i]=data[i][mt.floor(count/2)] 69 | else: 70 | for i in range(size): 71 | res[:,i]=(data[i][mt.floor(count/2)]+data[i][mt.floor(count/2)-1])/2.0 72 | 73 | return res 74 | 75 | def mode(self, data, rowvar=True): 76 | ''' 77 | :mode: 求解样本的众数 78 | :param data: 样本集 79 | :type data: np.array 80 | :param rowvar: 指定每一行或者每一列作为样本向量;rowvar=True指定每一列作为一个样本向量,也即每一行代表一个变量;rowvar=False指定每一行作为一个样本向量,也即每一列代表一个变量 81 | :type rowvar: bool 82 | :return: 各个变量的众数组成的向量 83 | :rtype: np.array 84 | ''' 85 | # 1. 统一变换为rowvar==True的情况,即每一行代表一个变量,每一列代表一个样本向量 86 | if rowvar==False: 87 | data=data.T 88 | 89 | # 2. 特别处理一维数组的情况 90 | if data.ndim==1: 91 | dic={} 92 | for i in range(np.shape(data)[0]): 93 | if data[i] in dic: 94 | dic[data[i]]+=1 95 | else: 96 | dic[data[i]]=1 97 | res=np.array([max(dic,key=dic.get)]) 98 | return res 99 | 100 | # 3. 生成众数结果 101 | size=np.shape(data)[0] 102 | count=np.shape(data)[1] 103 | res=[] 104 | for i in range(size): 105 | dic={} 106 | for k in range(count): 107 | if data[i][k] in dic: 108 | dic[data[i][k]]+=1 109 | else: 110 | dic[data[i][k]]=1 111 | res.append(max(dic,key=dic.get)) 112 | return np.array([res]) 113 | 114 | def quantile(self, data, fraction, rowvar=True, interpolation='linear'): 115 | ''' 116 | :quantile: 求解样本的分位数 117 | :param data: 样本集 118 | :type data: np.array 119 | :param fraction: 分位值,满足fraction>=0且fraction<=1 120 | :type fraction: float 121 | :param rowvar: 指定每一行或者每一列作为样本向量;rowvar=True指定每一列作为一个样本向量,也即每一行代表一个变量;rowvar=False指定每一行作为一个样本向量,也即每一列代表一个变量 122 | :type rowvar: bool 123 | :param interpolation: 此可选参数指定当所需分位数位于两个数据点i 1. 创建GeneralStats对象不需要提供任何参数。 10 | 11 | gen=gs.GeneralStats() 12 | 13 | ## 3. 计算样本的平均值 14 | > 0. 函数原型 15 | 16 | def average(self, data, rowvar=True) 17 | 18 | > 1. 使用average成员方法计算样本的平均值。 19 | > 2. 第一个参数data为由各个变量取值,或者由各个样本向量组成的矩阵。类型为np.array。 20 | > 3. 第二个参数rowvar指定每一行或者每一列作为样本向量,类型为bool:rowvar=True指定每一列作为一个样本向量,即每一行代表一个变量;rowvar=False指定每一行作为一个样本向量,即每一列代表一个变量。 21 | 22 | data=np.array([[1, 1, 2, 2, 3],[2, 2, 3, 3, 5],[1, 4, 3, 3, 3],[2, 4, 5, 5, 3]]) 23 | data1=np.array([1,2,3,4,5]) 24 | res=gen.average(data,rowvar=True) 25 | res1=gen.average(data1,rowvar=True) 26 | print("data平均值 = ",res) 27 | print("data1平均值 = ",res1) 28 | 29 | >>> 输出 30 | data平均值 = [[1.8 3. 2.8 3.8]] 31 | data1平均值 = [3.] 32 | 33 | ## 4. 计算样本的中位值 34 | > 0. 函数原型 35 | 36 | def median(self, data, rowvar=True) 37 | 38 | > 1. 使用median成员方法计算样本的中位值。 39 | > 2. 第一个参数data为由各个变量取值,或者由各个样本向量组成的矩阵。类型为np.array。 40 | > 3. 第二个参数rowvar指定每一行或者每一列作为样本向量,类型为bool:rowvar=True指定每一列作为一个样本向量,即每一行代表一个变量;rowvar=False指定每一行作为一个样本向量,即每一列代表一个变量。 41 | 42 | data=np.array([[1, 1, 2, 2, 3],[2, 2, 3, 3, 5],[1, 4, 3, 3, 3],[2, 4, 5, 5, 3]]) 43 | data1=np.array([1,2,3,4,5]) 44 | res=gen.median(data,rowvar=True) 45 | res1=gen.median(data1,rowvar=True) 46 | print("data中位值 = ",res) 47 | print("data1中位值 = ",res1) 48 | 49 | >>> 输出 50 | data中位值 = [[2. 3. 3. 4.]] 51 | data1中位值 = [3] 52 | 53 | ## 5. 计算样本的众数 54 | > 0. 函数原型 55 | 56 | def mode(self, data, rowvar=True) 57 | 58 | > 1. 使用mode成员方法计算样本的众数。 59 | > 2. 第一个参数data为由各个变量取值,或者由各个样本向量组成的矩阵。类型为np.array。 60 | > 3. 第二个参数rowvar指定每一行或者每一列作为样本向量,类型为bool:rowvar=True指定每一列作为一个样本向量,即每一行代表一个变量;rowvar=False指定每一行作为一个样本向量,即每一列代表一个变量。 61 | 62 | data=np.array([[1, 1, 2, 2, 3],[2, 2, 3, 3, 5],[1, 4, 3, 3, 3],[2, 4, 5, 5, 3]]) 63 | data1=np.array([1,2,3,4,5]) 64 | res=gen.mode(data,rowvar=True) 65 | res1=gen.mode(data1,rowvar=True) 66 | print("data众数值 = ",res) 67 | print("data1众数值 = ",res1) 68 | 69 | >>> 输出 70 | data众数值 = [[1 2 3 5]] 71 | data1众数值 = [1] 72 | 73 | ## 6. 计算样本的分位数 74 | > 0. 函数原型 75 | 76 | def quantile(self, data, fraction, rowvar=True, interpolation='linear') 77 | 78 | > 1. 使用quantile成员方法计算样本的分位数。 79 | > 2. 第一个参数data为由各个变量取值,或者由各个样本向量组成的矩阵。类型为np.array。 80 | > 3. 第二个参数fraction指定分位百分比,类型为float,fraction必须满足大于等于0且小于等于1。 81 | > 4. 第三个参数rowvar指定每一行或者每一列作为样本向量,类型为bool:rowvar=True指定每一列作为一个样本向量,即每一行代表一个变量;rowvar=False指定每一行作为一个样本向量,即每一列代表一个变量。 82 | > 5. 第四个参数指定当所需分位数位于两个数据点i + 'linear': i+fraction*(j-i) 84 | > + 'lower': i 85 | > + 'higher': j 86 | > + 'midpoint': (i+j)/2 87 | > + 若使用该参数取值范围之外的其他值,均将默认使用'midpoint'模式进行分位数的求解 88 | 89 | data=np.array([[1, 1, 2, 2, 3],[2, 2, 3, 3, 5],[1, 4, 3, 3, 3],[2, 4, 5, 5, 3]]) 90 | data1=np.array([1,2,3,4,5]) 91 | res=gen.quantile(data,0.5,rowvar=True,interpolation='lower') #若元素个数为偶数,则模式为'midpoint'的0.5分位数值等价于中位数 92 | res1=gen.quantile(data1,0.5,rowvar=True,interpolation='lower') #若元素个数为奇数,则模式为'lower'的0.5分位数值等价于中位数 93 | print("data 0.5分位数值 = ",res) 94 | print("data1 0.5分位数值 = ",res1) 95 | res=gen.quantile(data,0.25,rowvar=True,interpolation='lower') 96 | res1=gen.quantile(data1,0.25,rowvar=True,interpolation='lower') 97 | print("data 0.25分位数值s = ",res) 98 | print("data1 0.25分位数值 = ",res1) 99 | res=gen.quantile(data,0.75,rowvar=True,interpolation='lower') 100 | res1=gen.quantile(data1,0.75,rowvar=True,interpolation='lower') 101 | print("data 0.75分位数值 = ",res) 102 | print("data1 0.75分位数值 = ",res1) 103 | res=gen.quantile(data,1.0,rowvar=True,interpolation='lower') 104 | res1=gen.quantile(data1,1.0,rowvar=True,interpolation='lower') 105 | print("data 1.0分位数值 = ",res) 106 | print("data1 1.0分位数值 = ",res1) 107 | 108 | >>> 输出 109 | data 0.5分位数值 = [[2. 3. 3. 4.]] 110 | data1 0.5分位数值 = [3] 111 | data 0.25分位数值s = [[1. 2. 3. 3.]] 112 | data1 0.25分位数值 = [2] 113 | data 0.75分位数值 = [[2. 3. 3. 5.]] 114 | data1 0.75分位数值 = [4] 115 | data 1.0分位数值 = [[3. 5. 4. 5.]] 116 | data1 1.0分位数值 = [5] 117 | 118 | ## 7. 计算样本的极差 119 | > 0. 函数原型 120 | 121 | def range(self, data, rowvar=True) 122 | 123 | > 1. 使用range成员方法计算样本的极差。 124 | > 2. 第一个参数data为由各个变量取值,或者由各个样本向量组成的矩阵。类型为np.array。 125 | > 3. 第二个参数rowvar指定每一行或者每一列作为样本向量,类型为bool:rowvar=True指定每一列作为一个样本向量,即每一行代表一个变量;rowvar=False指定每一行作为一个样本向量,即每一列代表一个变量。 126 | 127 | data=np.array([[1, 1, 2, 2, 3],[2, 2, 3, 3, 5],[1, 4, 3, 3, 3],[2, 4, 5, 5, 3]]) 128 | data1=np.array([1,2,3,4,5]) 129 | res=gen.range(data,rowvar=True) 130 | res1=gen.range(data1,rowvar=True) 131 | print("data极差 = ",res) 132 | print("data1极差 = ",res1) 133 | 134 | >>> 输出 135 | data极差 = [[2. 3. 3. 3.]] 136 | data1极差 = [4] 137 | 138 | ## 8. 计算样本的方差 139 | > 0. 函数原型 140 | 141 | def variance(self, data, rowvar=True) 142 | 143 | > 1. 使用variance成员方法计算样本的方差。 144 | > 2. 第一个参数data为由各个变量取值,或者由各个样本向量组成的矩阵。类型为np.array。 145 | > 3. 第二个参数rowvar指定每一行或者每一列作为样本向量,类型为bool:rowvar=True指定每一列作为一个样本向量,即每一行代表一个变量;rowvar=False指定每一行作为一个样本向量,即每一列代表一个变量。 146 | 147 | data=np.array([[1, 1, 2, 2, 3],[2, 2, 3, 3, 5],[1, 4, 3, 3, 3],[2, 4, 5, 5, 3]]) 148 | data1=np.array([1,2,3,4,5]) 149 | res=gen.variance(data,rowvar=True) 150 | res1=gen.variance(data1,rowvar=True) 151 | print("data方差 = ",res) 152 | print("data1方差 = ",res1) 153 | 154 | >>> 输出 155 | data方差 = [[0.56 1.2 0.96 1.36]] 156 | data1方差 = [2.] 157 | 158 | ## 9. 计算样本的标准差 159 | > 0. 函数原型 160 | 161 | def standard_dev(self, data, rowvar=True) 162 | 163 | > 1. 使用standard_dev成员方法计算样本的标准差。 164 | > 2. 第一个参数data为由各个变量取值,或者由各个样本向量组成的矩阵。类型为np.array。 165 | > 3. 第二个参数rowvar指定每一行或者每一列作为样本向量,类型为bool:rowvar=True指定每一列作为一个样本向量,即每一行代表一个变量;rowvar=False指定每一行作为一个样本向量,即每一列代表一个变量。 166 | 167 | data=np.array([[1, 1, 2, 2, 3],[2, 2, 3, 3, 5],[1, 4, 3, 3, 3],[2, 4, 5, 5, 3]]) 168 | data1=np.array([1,2,3,4,5]) 169 | res=gen.standard_dev(data,rowvar=True) 170 | res1=gen.standard_dev(data1,rowvar=True) 171 | print("data标准差 = ",res) 172 | print("data1标准差 = ",res1) 173 | 174 | >>> 输出 175 | data标准差 = [[0.74833148 1.09544512 0.9797959 1.16619038]] 176 | data1标准差 = [1.41421356] 177 | 178 | ## 10. 计算样本的偏度 179 | > 0. 函数原型 180 | 181 | def skewness(self, data, rowvar=True) 182 | 183 | > 1. 使用skewness成员方法计算样本的偏度。 184 | > 2. 第一个参数data为由各个变量取值,或者由各个样本向量组成的矩阵。类型为np.array。 185 | > 3. 第二个参数rowvar指定每一行或者每一列作为样本向量,类型为bool:rowvar=True指定每一列作为一个样本向量,即每一行代表一个变量;rowvar=False指定每一行作为一个样本向量,即每一列代表一个变量。 186 | 187 | data=np.array([[1, 1, 2, 2, 3],[2, 2, 3, 3, 5],[1, 4, 3, 3, 3],[2, 4, 5, 5, 3]]) 188 | data1=np.array([1,2,3,4,5]) 189 | res=gen.skewness(data,rowvar=True) 190 | res1=gen.skewness(data1,rowvar=True) 191 | print("data偏度 = ",res) 192 | print("data1偏度 = ",res1) 193 | res=np.array([skew(data[0]),skew(data[1]),skew(data[2]),skew(data[3])]) 194 | print("使用scipy skew方法验证的data偏度 = ",res) 195 | res1=np.array(skew(data1)) 196 | print("使用scipy skew方法验证的data1偏度 = ",res1) 197 | 198 | >>> 输出 199 | data偏度 = [[ 0.3436216 0.91287093 -0.86752762 -0.36317347]] 200 | data1偏度 = [0.] 201 | 使用scipy skew方法验证的data偏度 = [ 0.3436216 0.91287093 -0.86752762 -0.36317347] 202 | 使用scipy skew方法验证的data1偏度 = 0.0 203 | 204 | 205 | ## 11. 计算样本的峰度 206 | > 0. 函数原型 207 | 208 | def kurtosis(self, data, rowvar=True) 209 | 210 | > 1. 使用kurtosis成员方法计算样本的峰度。 211 | > 2. 第一个参数data为由各个变量取值,或者由各个样本向量组成的矩阵。类型为np.array。 212 | > 3. 第二个参数rowvar指定每一行或者每一列作为样本向量,类型为bool:rowvar=True指定每一列作为一个样本向量,即每一行代表一个变量;rowvar=False指定每一行作为一个样本向量,即每一列代表一个变量。 213 | 214 | data=np.array([[1, 1, 2, 2, 3],[2, 2, 3, 3, 5],[1, 4, 3, 3, 3],[2, 4, 5, 5, 3]]) 215 | data1=np.array([53, 61, 49, 66, 78, 47]) 216 | res=gen.kurtosis(data,rowvar=True) 217 | res1=gen.kurtosis(data1,rowvar=True) 218 | print("data峰度 = ",res) 219 | print("data1峰度 = ",res1) 220 | data_0=pd.Series(data[0]) 221 | data_1=pd.Series(data[1]) 222 | data_2=pd.Series(data[2]) 223 | data_3=pd.Series(data[3]) 224 | print("使用pandas kurt方法验证的data峰度 = ",[data_0.kurt(),data_1.kurt(),data_2.kurt(),data_3.kurt()]) 225 | data1=pd.Series(data1) 226 | print("使用pandas kurt方法验证的data1峰度 = ",data1.kurt()) 227 | 228 | >>> 输出 229 | data峰度 = [[-0.6122449 2. 2.91666667 -1.48788927]] 230 | data1峰度 = [-0.26316554] 231 | 使用pandas kurt方法验证的data峰度 = [-0.6122448979591839, 2.0, 2.9166666666666625, -1.4878892733564015] 232 | 使用pandas kurt方法验证的data1峰度 = -0.2631655441038463 233 | 234 | 235 | 236 | 237 | -------------------------------------------------------------------------------- /Kmeans/Kmeans.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | import random as rd 3 | 4 | class Kmeans: 5 | 6 | def __init__(self, data, kind=2, rowsam=True): 7 | ''' 8 | :__init__: Kmeans类初始化 9 | :param data: 样本矩阵,样本矩阵中的每个样本向量(每行或者每列)将被依次编号为0,1,2,... 10 | :type data: np.array 11 | :param kind: 聚类的类别数量,默认值为2,即聚类为2类,这些类别将被依次编号为0,1,2,... 12 | :type kind: int 13 | :param rowsam: 指定指定每一行或者每一列作为样本向量,rowsam=True指定每一行作为一个样本向量,rowsam=False指定每一列作为一个样本向量,默认值为rowsam=True 14 | :type rowsam: bool 15 | ''' 16 | # 1. 将样本矩阵统一转换为rowsam=True的情况,即每一行作为一个样本向量 17 | if rowsam==False: 18 | data=data.T 19 | 20 | # 2. 使用原始样本矩阵初始化类的变量 21 | self.__data=data 22 | self.__kind=kind 23 | self.__res=[] 24 | 25 | return 26 | 27 | def reload(self, data, kind=2, rowsam=True): 28 | ''' 29 | :reload: 重新载入Kmeans类对象的数据 30 | :param data: 新的样本矩阵,样本矩阵中的每个样本向量(每行或者每列)将被依次编号为0,1,2,... 31 | :type data: np.array 32 | :param kind: 新的聚类的类别数量,默认值为2,即聚类为2类,这些类别将被依次编号为0,1,2,... 33 | :type kind: int 34 | :param rowsam: 指定指定每一行或者每一列作为样本向量,rowsam=True指定每一行作为一个样本向量,rowsam=False指定每一列作为一个样本向量,默认值为rowsam=True 35 | :type rowsam: bool 36 | ''' 37 | # 1. 将样本矩阵统一转换为rowsam=True的情况,即每一行作为一个样本向量 38 | if rowsam==False: 39 | data=data.T 40 | 41 | # 2. 重设样本矩阵和类别数量 42 | self.__data=data 43 | self.__kind=kind 44 | 45 | return 46 | 47 | def getdata(self, rowsam=True): 48 | ''' 49 | :getdata: 返回准备聚类的原始样本向量矩阵 50 | :param rowsam: 指定指定每一行或者每一列作为样本向量,rowsam=True指定每一行作为一个样本向量,rowsam=False指定每一列作为一个样本向量,默认值为rowsam=True 51 | :type rowsam: bool 52 | :return: 原始样本向量矩阵 53 | :rtype: np.array 54 | ''' 55 | if rowsam==False: 56 | return self.__data.T 57 | else: 58 | return self.__data 59 | 60 | def getkind(self): 61 | ''' 62 | :getkind: 返回聚类的类别数量 63 | :return: 聚类的类别数量 64 | :rtype: int 65 | ''' 66 | return self.__kind 67 | 68 | def cluster(self): 69 | ''' 70 | :cluster: 执行Kmeans聚类,经典Kmeans聚类使用欧氏距离 71 | :return: 聚类结果,聚类结果res的格式为:二维list数组形式,其中res[i]即为编号为i的类别所含有的样本编号的列表 72 | :rtype: list 73 | ''' 74 | # 1. 首先从原始数据中随机选择self.__kind个样本作为各个类别的质心 75 | size=np.shape(self.__data)[0] #样本数量 76 | count=np.shape(self.__data)[1] #单个样本维度 77 | 78 | focus=[] #各个类别的质心集合 79 | for i in range(self.__kind): 80 | focus.append(rd.choice(self.__data)) #随机选择self.__kind个样本作为质心 81 | focus=np.array(focus) 82 | 83 | # 2. 将所有的样本分类到距离类别质心最近的类别中,然后更新类别的质心为类别中所有样本的均值向量,接着重复上述步骤直到所有类别的质心都不再变化 84 | res=[] #各类别的样本集合 85 | 86 | while 1: 87 | # 2.1 对每个样本进行分类 88 | res=[[] for i in range(self.__kind)] #每轮迭代的各类别的样本集合,这里需要特别注意每轮迭代过程中需要清空上一次迭代的结果 89 | 90 | for i in range(size): 91 | dist=[0 for i in range(self.__kind)] #样本到各个质心的距离 92 | for k in range(self.__kind): 93 | dist[k]=np.linalg.norm(self.__data[i]-focus[k]) 94 | res[dist.index(min(dist))].append(i) 95 | 96 | # 2.2 重新计算类别质心 97 | newfocus=np.array([[0.0 for i in range(count)] for k in range(self.__kind)]) 98 | for i in range(self.__kind): 99 | if len(res[i])!=0: #类别中有样本才重新计算质心,否则直接取原质心 100 | for k in range(len(res[i])): 101 | newfocus[i]+=self.__data[res[i][k]] 102 | newfocus[i]/=len(res[i]) 103 | else: 104 | newfocus[i]=focus[i] 105 | 106 | # 2.3 判断新的质心是否和原质心相等 107 | if (focus==newfocus).all(): 108 | break 109 | else: 110 | focus=newfocus 111 | 112 | self.__res=res 113 | return res 114 | 115 | 116 | def accuracy(self, real): 117 | ''' 118 | :accuracy: 根据聚类结果计算准确度,本方法需要在调用cluster方法得到聚类结果后使用才可以得到正确结果 119 | :param real: 样本真实分类标签结果,为一维np.array数组形式,其中real[i]即为编号为i的样本的真实类别标签 120 | :type real: np.array 121 | :return: Kmeans聚类准确率。这里准确率的定义是,将每个类别中出现次数最多的真实标签作为整个类别的标签,样本的标签与所在类别标签相等即认为成功预测,最终准确率即为成功预测的样本数量除以样本总数 122 | :rtype: float 123 | ''' 124 | # 1. 首先计算每个类别中出现次数最多的样本标签,将该标签作为类别的标签 125 | res=self.__res 126 | tags=[0 for i in range(self.__kind)] 127 | for i in range(len(res)): 128 | counts=[0 for i in range(self.__kind)] 129 | for k in range(len(res[i])): 130 | counts[real[res[i][k]]]+=1 131 | tags[i]=counts.index(max(counts)) 132 | 133 | # 2. 然后计算准确率 134 | total=0.0 135 | corr=0.0 136 | for i in range(len(res)): 137 | for k in range(len(res[i])): 138 | if real[res[i][k]]==tags[i]: 139 | corr+=1 140 | total+=1 141 | 142 | return corr/total 143 | -------------------------------------------------------------------------------- /Kmeans/example.py: -------------------------------------------------------------------------------- 1 | import Kmeans as km 2 | import numpy as np 3 | 4 | if __name__ == "__main__": 5 | 6 | data=np.array([[1,2,3,4,5],[6,7,8,9,10],[11,12,13,14,15],[16,17,18,19,20],[21,22,23,24,25],[26,27,28,29,30]]) 7 | label=np.array([0,0,0,1,1,1]) 8 | 9 | kmeans=km.Kmeans(data,kind=2,rowsam=True) 10 | 11 | res=kmeans.cluster() 12 | print("聚类结果为 res = ", res) 13 | 14 | acc=kmeans.accuracy(label) 15 | print("聚类准确度为 acc = ",acc) -------------------------------------------------------------------------------- /Kmeans/readme.md: -------------------------------------------------------------------------------- 1 | # Kmeans聚类模块 2 | 3 | Kmeans聚类模块包含Kmeans聚类算法(K-means),以及准确率分析等算法的具体实现。 4 | 5 | ## 1. 引用头文件"Kmeans.py" 6 | 7 | import Kmeans as km 8 | 9 | ## 2. 创建一个Kmeans对象 10 | > 0. 函数原型 11 | 12 | def __init__(self, data, kind=2, rowsam=True) 13 | 14 | > 1. Kmeans聚类对象的默认构造函数需要三个参数。 15 | > 2. 第一个参数data为需要聚类的样本集矩阵,类型为二维np.array。 16 | > 3. 第二个参数kind指定聚类中的类别数目,类型为int,默认值为kind=2。 17 | > 4. 第三个参数rowsam指定样本集矩阵(也即第一个参数data)的行或者列作为样本向量,rowsam=True指定data的每一行代表一个样本向量,rowsam=False指定data的每一列代表一个样本向量。 18 | 19 | data=np.array([[1,2,3,4,5],[6,7,8,9,10],[11,12,13,14,15],[16,17,18,19,20],[21,22,23,24,25],[26,27,28,29,30]]) #样本集矩阵 20 | kmeans=km.Kmeans(data,kind=2,rowsam=True) #6个样本向量进行二分类 21 | 22 | ## 3. 进行Kmeans聚类 23 | > 0. 函数原型 24 | 25 | def cluster(self) 26 | 27 | > 1. 使用cluster成员方法进行Kmeans聚类,该方法无需任何参数。 28 | > 2. 聚类过程中,样本集矩阵中的每个样本向量(每行或者每列)将被依次编号为0,1,2,...,聚类中的各个类别将被依次编号为0,1,2,...,聚类过程存在一定的随机性。 29 | > 3. 返回值为聚类结果res,类型为list,聚类结果res的格式为:二维list数组形式,其中res[i]即为编号为i的类别所含有的样本编号的列表。 30 | 31 | ## 4. 计算准确度 32 | > 0. 函数原型 33 | 34 | def accuracy(self, real) 35 | 36 | > 1. 使用accuracy成员方法计算准确度,该方法必须在聚类完成后使用才能得到正确结果,该成员方法接受一个参数。 37 | > 2. 本方法的准确度定义是:将每个类别中出现次数最多的真实标签作为整个类别的标签,样本的标签与所在类别标签相等即认为成功预测,最终准确率即为成功预测的样本数量除以样本总数。 38 | > 3. 第一个参数real为样本真实分类标签结果,类型为一维np.array,其中real[i]即为编号为i的样本的真实类别标签 39 | > 4. 返回值为本次Kmeans聚类的准确度,类型为float,该准确度在0和1之间。 40 | 41 | label=np.array([0,0,0,1,1,1]) #样本集标签:即data的前三个向量属于类别0,后三个属于类别1 42 | 43 | res=kmeans.cluster() 44 | print("聚类结果为 res = ", res) 45 | 46 | acc=kmeans.accuracy(label) 47 | print("聚类准确度为 acc = ",acc) 48 | 49 | >>> 输出结果1 50 | 聚类结果为 res = [[0, 1, 2], [3, 4, 5]] 51 | 聚类准确度为 acc = 1.0 52 | 53 | >>> 输出结果2 54 | 聚类结果为 res = [[2, 3, 4, 5], [0, 1]] 55 | 聚类准确度为 acc = 0.8333333333333334 56 | 57 | ## 5. 获得聚类的样本集矩阵和类别数 58 | > 0. 函数原型 59 | 60 | def getdata(self, rowsam=True) 61 | def getkind(self) 62 | 63 | > 1. 使用getdata和getkind成员方法分别获取样本集矩阵和类别数。 64 | > 2. 该两个方法调用时均无需任何参数。 65 | > 3. getdata的返回值为样本集矩阵,类型为二维np.array;getkind的返回值为类别数,类型为int。 66 | 67 | ## 6. 重新载入样本集矩阵和重新修改类别数 68 | > 0. 函数原型 69 | 70 | def reload(self, data, kind=2, rowsam=True) 71 | 72 | > 1. 使用reload成员方法重新设定样本集矩阵和类别数,该方法接受三个参数。 73 | > 2. 第一个参数data为需要聚类的样本集矩阵,类型为二维np.array。 74 | > 3. 第二个参数kind指定聚类中的类别数目,类型为int,默认值为kind=2。 75 | > 4. 第三个参数rowsam指定样本集矩阵(也即第一个参数data)的行或者列作为样本向量,rowsam=True指定data的每一行代表一个样本向量,rowsam=False指定data的每一列代表一个样本向量。 76 | 77 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2019 pzh 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /LinearRegre/LinearRegre.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from metrics import r2_score 3 | 4 | class LinearRegression: 5 | def __init__(self): 6 | self.coef_ = None # 系数 7 | self.intercept_ = None # 截距 8 | self._theta = None 9 | 10 | def fit_normal(self, X_train, y_train): 11 | '''根据训练数据集X_train, y_train训练,直接用偏导求极值公式计算theta''' 12 | assert X_train.shape[0] == y_train.shape[0], 'the size of X_train and y_train must be the same' 13 | X_b = np.hstack([np.ones([len(X_train), 1]), X_train]) # hstack 横向添加一列 14 | self._theta = np.linalg.inv(X_b.T.dot(X_b)).dot(X_b.T).dot(y_train) # inv 取逆矩阵 15 | self.intercept_ = self._theta[0] 16 | self.coef_ = self._theta[1:] 17 | 18 | return self 19 | 20 | def fit_gd(self, X_train, y_train, eta=0.01, n_iters=1e4): 21 | '''根据训练数据X_train,y_train,使用批量梯度下降法训练''' 22 | assert X_train.shape[0] == y_train.shape[0], 'the size of X_train must be equal to the size of y_train' 23 | 24 | def J(theta, X_b, y): 25 | try: 26 | return np.sum((y - X_b.dot(theta)) ** 2) / len(y) 27 | except: 28 | return float('inf') 29 | 30 | def dJ(theta, X_b, y): 31 | return X_b.T.dot(X_b.dot(theta) - y) * 2. / len(y) 32 | 33 | def gradient_descent(X_b, y, initial_theta, eta, n_iters=1e4, epsilon=1e-8): 34 | theta = initial_theta 35 | cur_iter = 0 36 | 37 | while cur_iter < n_iters: 38 | gradient = dJ(theta, X_b, y) 39 | last_theta = theta 40 | theta = theta - eta * gradient 41 | if (abs(J(theta, X_b, y) - J(last_theta, X_b, y)) < epsilon): 42 | break 43 | cur_iter += 1 44 | return theta 45 | 46 | X_b = np.hstack([np.ones((len(X_train), 1)), X_train]) 47 | initial_theta = np.zeros(X_b.shape[1]) 48 | self._theta = gradient_descent(X_b, y_train, initial_theta, eta, n_iters) 49 | self.intercept_ = self._theta[0] 50 | self.coef_ = self._theta[1:] 51 | return self 52 | 53 | def fit_sgd(self, X_train, y_train, n_iters=5, t0=5, t1=50): 54 | # 随机梯度下降法 55 | assert X_train.shape[0] == y_train.shape[0], \ 56 | 'the size of X_train must be equal to the size of y_train' 57 | assert n_iters >= 1 58 | 59 | def dJ_sgd(theta, X_b_i, y_i): 60 | return X_b_i * (X_b_i.dot(theta) - y_i) * 2. 61 | # return X_b_i.T.dot(X_b_i.dot(theta)-y_i)*2 62 | 63 | def sgd(X_b, y, initial_theta, n_iters, t0=5, t1=50): 64 | def learning_rate(t): 65 | return t0 / (t + t1) 66 | 67 | theta = initial_theta 68 | m = len(X_b) 69 | # 对所有的样本看n_iters遍 70 | for cur_iter in range(n_iters): 71 | indexes = np.random.permutation(m) 72 | X_b_new = X_b[indexes] 73 | y_new = y[indexes] 74 | for i in range(m): 75 | gradient = dJ_sgd(theta, X_b_new[i], y_new[i]) 76 | theta = theta - learning_rate(cur_iter * m + i) * gradient 77 | 78 | return theta 79 | 80 | X_b = np.hstack([np.ones((len(X_train), 1)), X_train]) 81 | initial_theta = np.random.randn(X_b.shape[1]) 82 | self._theta = sgd(X_b, y_train, initial_theta, n_iters, t0, t1) 83 | self.intercept_ = self._theta[0] 84 | self.coef_ = self._theta[1:] 85 | 86 | return self 87 | 88 | def fit_msgd(self, X_train, y_train, n_iters=1e4, t0=5, t1=50, batch_size=10): 89 | # 小批量随机梯度下降 90 | assert X_train.shape[0] == y_train.shape[0], \ 91 | 'the size of X_train must be equal to the size of y_train' 92 | assert n_iters >= 1 93 | 94 | def J(theta, X_b, y): 95 | try: 96 | return np.sum((y - X_b.dot(theta)) ** 2) / len(y) 97 | except: 98 | return float('inf') 99 | 100 | def dJ(theta, X_b, y): 101 | return X_b.T.dot(X_b.dot(theta) - y) * 2. / len(y) 102 | 103 | def msgd(X_b, y, initial_theta, t0, t1, batch_size, n_iters, epsilon=1e-8): 104 | 105 | def learning_rate(t): 106 | return t0 / (t + t1) 107 | 108 | theta = initial_theta 109 | cur_iter = 0 110 | 111 | while cur_iter < n_iters: 112 | indexes = np.random.randint(0, len(X_b), batch_size) 113 | X_b_new = X_b[indexes] 114 | y_b_new = y[indexes] 115 | gradient = dJ(theta, X_b_new, y_b_new) 116 | last_theta = theta 117 | theta = theta - learning_rate(cur_iter) * gradient 118 | if (abs(J(theta, X_b, y) - J(last_theta, X_b, y)) < epsilon): 119 | break 120 | cur_iter += 1 121 | 122 | return theta 123 | 124 | X_b = np.hstack([np.ones((len(X_train), 1)), X_train]) 125 | initial_theta = np.random.randn(X_b.shape[1]) 126 | self._theta = msgd(X_b, y_train, initial_theta, t0, t1, batch_size, n_iters) 127 | self.intercept_ = self._theta[0] 128 | self.coef_ = self._theta[1:] 129 | 130 | return self 131 | 132 | def predict(self, X_predict): 133 | assert self.intercept_ is not None and self.coef_ is not None, 'must fit before predict' 134 | assert X_predict.shape[1] == len(self.coef_), 'the feature number of X_predict must be equal to X_train' 135 | X_b = np.hstack([np.ones((len(X_predict), 1)), X_predict]) 136 | return X_b.dot(self._theta) 137 | 138 | def score(self, X_test, y_test): 139 | y_predict = self.predict(X_test) 140 | return r2_score(y_test, y_predict) 141 | -------------------------------------------------------------------------------- /PCA/PCA.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | class PCA: 4 | 5 | def __init__(self, n_components=2): 6 | ''' 7 | :__init__: 初始化PCA类 8 | :param n_components: PCA降维后所保留的维数,默认值为2,该值不得超过原始数据的总维数 9 | :type n_components: int 10 | ''' 11 | self.__components=n_components # 降维后保留的维数 12 | self.__eigvalue=[] # 样本矩阵的特征值 13 | self.__eigvec=[] # 每个特征值所对应的特征向量 14 | self.__tarvalue=[] # 降维后所保留的特征对应的特征值 15 | self.__tarvec=[] # 降维后所保留的特征对应的特征向量 16 | self.__transmat=np.array([]) # 降维变换矩阵 17 | 18 | return 19 | 20 | def fit(self, data, rowvar=True): 21 | ''' 22 | :fit: 对原始数据进行PCA主成分分析,得出需要保留的特征,并将PCA主成分分析的结果信息保存在当前对象中 23 | :param data: 训练集矩阵,即进行PCA分析的数据集,在该方法中首先会对训练集矩阵进行标准化 24 | :type data: np.array 25 | :param rowvar: 指定每一行或者每一列代表一个特征:rowvar=True指定每一行代表一个特征,即每一列代表一个样本向量;rowvar=False指定每一列代表一个特征,即每一行代表一个样本向量,默认值为rowvar=True 26 | :type rowvar: bool 27 | ''' 28 | # 1. 首先将训练集矩阵变换为rowvar=False的情况,即每一列代表一个特征,每一行代表一个样本 29 | if rowvar==True: 30 | data=data.T 31 | 32 | # 2. 然后对训练集矩阵进行标准化 33 | size=np.shape(data)[1] # 特征的数量 34 | count=np.shape(data)[0] # 每个特征的样本数 35 | mean=np.array([np.mean(data[:,i]) for i in range(size)]) # 各个特征的均值向量 36 | data=data-mean # 原始矩阵每行(也即每个样本)减去均值向量得到标准化后的矩阵 37 | 38 | # 3. 求解标准化后的训练集矩阵的协方差矩阵 39 | cov=np.cov(data,rowvar=False) 40 | 41 | # 4. 对得到的协方差矩阵进行特征值分解,分解得到特征值和对应的特征向量 42 | self.__eigvalue, self.__eigvec=np.linalg.eig(cov) # 附注: self.__eigvec中的每一列为一个特征向量,此处负号可选,因为特征向量符号取反仍为特征向量 43 | self.__eigvec=-self.__eigvec 44 | 45 | # 5. 将特征值和对应的特征向量从小到大排列,选出后self.__components个特征值和特征向量 46 | conn=[(self.__eigvalue[i], self.__eigvec[:,i]) for i in range(self.__components)] 47 | conn.sort(reverse=True) 48 | self.__tarvalue=np.array([conn[i][0] for i in range(self.__components)]) 49 | self.__tarvec=np.array([conn[i][1] for i in range(self.__components)]) 50 | 51 | # 6. 使用上述选出的self.__components个特征矩阵生成PCA降维的变换矩阵 52 | self.__transmat=np.array([self.__tarvec[i] for i in range(self.__components)]).T 53 | 54 | return 55 | 56 | def transform(self, data, rowvar=True): 57 | ''' 58 | :transform: 根据fit成员方法的主成分分析结果信息,对数据进行PCA降维变换 59 | :param data: 测试集矩阵,即进行降维变换的数据,测试集矩阵无需归一化 60 | :type data: np.array 61 | :param rowvar: 指定每一行或者每一列代表一个特征:rowvar=True指定每一行代表一个特征,即每一列代表一个样本向量;rowvar=False指定每一列代表一个特征,即每一行代表一个样本向量,默认值为rowvar=True 62 | :type rowvar: bool 63 | ''' 64 | # 1. 首先将训练集矩阵变换为rowvar=False的情况,即每一列代表一个特征,每一行代表一个样本 65 | if rowvar==True: 66 | data=data.T 67 | 68 | # 2. 然后对样本集矩阵进行标准化 69 | size=np.shape(data)[1] # 特征的数量 70 | count=np.shape(data)[0] # 每个特征的样本数 71 | mean=np.array([np.mean(data[:,i]) for i in range(size)]) # 各个特征的均值向量 72 | data=data-mean 73 | 74 | # 3. 标准化后的测试集矩阵和变换矩阵相乘得到降维后的结果矩阵 75 | res=np.dot(data,self.__transmat) 76 | 77 | return res 78 | 79 | def eigenvalue(self, only=False): 80 | ''' 81 | :eigenvalue: 返回根据训练集得到的各个特征的特征根 82 | :param only: 指定是否仅保留降维后的特征的特征根,only=True指定仅保留降维后的特征的特征根,only=False则保留全部特征的特征根,默认值为only=False 83 | :type only: bool 84 | :return: 训练集矩阵在PCA主成分分析中的各个特征的对应特征根 85 | :rtype: np.array 86 | ''' 87 | if only==False: 88 | return self.__eigvalue 89 | else: 90 | return self.__tarvalue 91 | 92 | def eigenvector(self, only=False): 93 | ''' 94 | :eigenvector: 返回根据训练集得到的各个特征的特征向量 95 | :param only: 指定是否仅保留降维后的特征的特征向量,only=True指定仅保留降维后的特征的特征向量,only=False则保留全部特征的特征向量,默认值为only=False 96 | :type only: bool 97 | :return: 训练集矩阵在PCA主成分分析中的各个特征的对应特征向量的矩阵,返回矩阵的每一行为一个特征向量 98 | :rtype: np.array 99 | ''' 100 | if only==False: 101 | return self.__eigvec 102 | else: 103 | return self.__tarvec 104 | 105 | def set_components(self, n_components=2): 106 | ''' 107 | :set_components: 改变保留的特征数 108 | :param n_components: 保留的特征数目 109 | :type n_components: int 110 | ''' 111 | self.__components=n_components 112 | return 113 | 114 | def variance_ratio(self, only=False): 115 | ''' 116 | :variance_ratio: 计算各个特征的权重 117 | :param only: 指定是否仅保留降维后的特征的权重,only=True指定仅保留降维后的特征的权重,only=False则保留全部特征的权重,默认值为only=False 118 | :type only: bool 119 | :return: 各个特征的权重列表 120 | :rtype: np.array 121 | ''' 122 | if only==False: 123 | return np.divide(self.__eigvalue,np.sum(self.__eigvalue)) 124 | else: 125 | return np.divide(self.__tarvalue,np.sum(self.__eigvalue)) 126 | -------------------------------------------------------------------------------- /PCA/example.py: -------------------------------------------------------------------------------- 1 | import PCA as pc 2 | import numpy as np 3 | from sklearn.decomposition import PCA 4 | 5 | if __name__ == "__main__": 6 | data = np.array([[-1, -1], [-2, -1], [-3, -2], [1, 1], [2, 1], [3, 2]]) 7 | 8 | pca = pc.PCA(n_components=1) 9 | pca.fit(data,rowvar=False) 10 | res = pca.transform(data,rowvar=False) 11 | ratio = pca.variance_ratio(only=True) 12 | print("各特征的权重为: ratio = ",ratio) 13 | print("使用本库进行计算得到的PCA降维结果为: res = ", res) 14 | 15 | pca1 = PCA(n_components=1) 16 | res = pca1.fit_transform(data) 17 | ratio = pca1.explained_variance_ratio_ 18 | print("各特征的权重为: ratio = ",ratio) 19 | print("使用sklearn.decomposition.PCA 验证的结果为: res = ", res) 20 | 21 | 22 | 23 | 24 | -------------------------------------------------------------------------------- /PCA/readme.md: -------------------------------------------------------------------------------- 1 | # PCA主成分分析模块 2 | 3 | PCA主成分分析模块包括主成分分析算法。 4 | 5 | ## 1. 引用头文件"PCA.py" 6 | import PCA as pc 7 | 8 | ## 2. 创建一个PCA对象 9 | > 0. 函数原型 10 | 11 | def __init__(self, n_components=2) 12 | 13 | > 1. PCA类的初始化需要一个参数。 14 | > 2. 该唯一参数n_components为PCA降维后所保留的特征数或者维数,类型为int,默认值为2。该值不得超过原始样本集数据的总特征数。 15 | 16 | pca = pc.PCA(n_components=1) 17 | 18 | ## 3. 使用训练集进行PCA主成分分析 19 | > 0. 函数原型 20 | 21 | def fit(self, data, rowvar=True) 22 | 23 | > 1. 使用fit成员方法用训练集进行PCA主成分分析的训练过程。 24 | > 2. 第一个参数data为训练集矩阵,即进行PCA分析的数据集,类型为np.array。在该方法中首先会对训练集矩阵进行标准化。 25 | > 3. 第二个参数rowvar指定参数data的每一行或者每一列代表一个特征,类型为bool。rowvar=True指定每一行代表一个特征,即每一列代表一个样本向量;rowvar=False指定每一列代表一个特征,即每一行代表一个样本向量,默认值为rowvar=True。 26 | 27 | data = np.array([[-1, -1], [-2, -1], [-3, -2], [1, 1], [2, 1], [3, 2]]) 28 | pca.fit(data,rowvar=False) 29 | 30 | ## 4. 使用测试集进行PCA降维 31 | > 0. 函数原型 32 | 33 | def transform(self, data, rowvar=True) 34 | 35 | > 1. 使用transform成员方法对测试集进行PCA降维。特别地,在使用该方法进行PCA降维之前,首先应当调用fit进行PCA主成分分析。 36 | > 2. 第一个参数data为测试集矩阵,即进行PCA降维的数据集,类型为np.array。在该方法中首先会对测试集矩阵进行标准化。 37 | > 3. 第二个参数rowvar指定参数data的每一行或者每一列代表一个特征,类型为bool。rowvar=True指定每一行代表一个特征,即每一列代表一个样本向量;rowvar=False指定每一列代表一个特征,即每一行代表一个样本向量,默认值为rowvar=True。 38 | > 4. 返回值为降维后的测试集矩阵,其中每一列代表一个特征,类型为np.array。 39 | 40 | data = np.array([[-1, -1], [-2, -1], [-3, -2], [1, 1], [2, 1], [3, 2]]) 41 | pca.fit(data,rowvar=False) 42 | res = pca.transform(data,rowvar=False) 43 | print("使用本库进行计算得到的PCA降维结果为: res = ", res) 44 | 45 | >>> 输出 46 | 使用本库进行计算得到的PCA降维结果为: res = [[ 1.38340578] 47 | [ 2.22189802] 48 | [ 3.6053038 ] 49 | [-1.38340578] 50 | [-2.22189802] 51 | [-3.6053038 ]] 52 | 53 | > 可以将上述结果与sklearn库的sklearn.decomposition.PCA类的结果进行对比以验证其正确性,如下所示。 54 | 55 | from sklearn.decomposition import PCA 56 | pca1 = PCA(n_components=1) 57 | data = np.array([[-1, -1], [-2, -1], [-3, -2], [1, 1], [2, 1], [3, 2]]) 58 | res = pca1.fit_transform(data) 59 | print("使用sklearn.decomposition.PCA 验证的结果为: res = ", res) 60 | 61 | >>> 输出 62 | 使用sklearn.decomposition.PCA 验证的结果为: res = [[ 1.38340578] 63 | [ 2.22189802] 64 | [ 3.6053038 ] 65 | [-1.38340578] 66 | [-2.22189802] 67 | [-3.6053038 ]] 68 | 69 | 70 | ## 5. 输出特征的权重 71 | > 0. 函数原型 72 | 73 | def variance_ratio(self, only=False) 74 | 75 | > 1. 使用variance_ratio方法输出特征的权重。特别地,在使用该方法获得特征权重之前,首先应当调用fit进行PCA主成分分析。 76 | > 2. 唯一参数only指定是否仅保留降维后的特征的权重,类型为bool。only=True指定仅保留降维后的n_components个特征的权重,only=False则保留全部特征的权重,默认值为only=False。 77 | > 3. 返回值为各个特征权重的向量,类型为np.array。 78 | 79 | pca1 = PCA(n_components=1) 80 | data = np.array([[-1, -1], [-2, -1], [-3, -2], [1, 1], [2, 1], [3, 2]]) 81 | pca.fit(data,rowvar=False) 82 | ratio = pca.variance_ratio(only=True) 83 | print("各特征的权重为: ratio = ",ratio) 84 | 85 | >>> 输出 86 | 各特征的权重为: ratio = [0.99244289] 87 | 88 | > 可以将上述结果与sklearn库的sklearn.decomposition.PCA类的结果进行对比以验证其正确性,如下所示。 89 | 90 | ratio = pca1.explained_variance_ratio_ 91 | print("各特征的权重为: ratio = ",ratio) 92 | 93 | >>> 输出 94 | 各特征的权重为: ratio = [0.99244289] 95 | 96 | ## 6. 修改保留的特征数 97 | > 0. 函数原型 98 | 99 | def set_components(self, n_components=2) 100 | 101 | > 1. 使用set_components方法修改需要保留的特征数。特别地,修改需要保留的特征数后,需要重新使用fit成员方法进行训练。 102 | > 2. 唯一参数n_components指定新的需要保留的特征数,类型为int。 103 | 104 | pca = pc.PCA(n_components=1) 105 | pca.set_components(n_components=2) 106 | 107 | ## 7. 输出各个特征对应的特征值和特征向量 108 | > 0. 函数原型 109 | 110 | def eigenvalue(self, only=False) 111 | def eigenvector(self, only=False) 112 | 113 | > 1. 分别使用eigenvalue, eigenvector成员方法输出各个特征对应的特征值和特征向量。特别地,在使用该方法获得特征值或特征向量之前,首先应当调用fit进行PCA主成分分析。 114 | > 2. 这两个成员方法的唯一参数only指定是否仅保留降维后的特征的特征值或特征向量,类型为bool。only=True指定仅保留降维后的n_components个特征的特征值或特征向量,only=False则保留全部特征的特征值或特征向量,默认值为only=False。 115 | > 3. eigenvalue, eigenvector成员方法的返回值分别为特征值组成的向量和特征向量组成的矩阵,类型均为np.array。 116 | 117 | data = np.array([[-1, -1], [-2, -1], [-3, -2], [1, 1], [2, 1], [3, 2]]) 118 | pca = pc.PCA(n_components=1) 119 | pca.fit(data,rowvar=False) 120 | value=pca.eigenvalue(only=True) 121 | vector=pca.eigenvector(only=True) 122 | print("各特征的特征值为: ",pca.eigenvalue(only=False)) 123 | print("各特征的特征向量为: ",pca.eigenvector(only=False)) 124 | 125 | >>> 输出 126 | 各特征的特征值为: [7.93954312 0.06045688] 127 | 各特征的特征向量为: [[-0.83849224 0.54491354] 128 | [-0.54491354 -0.83849224]] 129 | 130 | ## 附注: 131 | > 1. example.py中提供了一份使用PCA模块的示例代码。 132 | 133 | 134 | 135 | 136 | 137 | 138 | 139 | 140 | 141 | 142 | 143 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # statslibrary 2 | 统计分析与机器学习课程实验作业 3 | 4 | statslibrary包含主成分分析,聚类分析等统计分析算法的实现。 5 | 6 | 需要使用某个算法模块,只需要包含对应模块文件夹下的对应头文件即可使用,各个模块的使用说明文档也请参见对应的文件夹下的readme文档。statslibrary中所实现的各个分析算法请参见如下的目录。 7 | 8 | 9 | ## 目录 10 | #### 1. Distance:距离计算模块,包含欧氏距离,马氏距离等距离计算方法 11 | #### 2. CorreCoef:相关系数与相关系数矩阵计算模块,包含pearman相关系数,spearman相关系数等相关系数计算方法 12 | #### 3. GeneralStats:一般分析统计量计算模块,包含平均数,中位数,众数,分位数,方差,标准差,极差,偏度,峰度等统计量计算方法 13 | #### 4. VarAnaly:方差分析模块,包含单因素方差分析和双因素方差分析 14 | #### 5. \*LinearRegre:回归分析模块,包含一元和多元线性回归分析 15 | #### 6. PCA:主成分分析模块 16 | #### 7. Kmeans:K-Means, Kmeans聚类法模块 17 | #### 8. SCM (Ward-Hierarchical-Clustering):系统聚类法(又称层次聚类法)模块 18 | #### 9. DistanceDiscri:距离判别分析模块 19 | #### 10. BayesDiscri:朴素贝叶斯判别分析模块 20 | #### 11. \*FisherDiscri:费歇判别分析模块 21 | #### 12. FacAnaly:因子分析模块 22 | 23 | -------------------------------------------------------------------------------- /SCM/SCM.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | class SCM: 4 | 5 | # public functions 6 | 7 | def __init__(self, classdist='nearest', sampledist='euc', p=1): 8 | ''' 9 | :__init__: 初始化标准聚类法类SCM 10 | :param classdist: 指定系统聚类法中所使用的类间距离的种类,取值范围为classdist={'nearest','farthest','average','centroid','square'},默认值为classdist='nearest' 11 | : 'nearest' : 类间最近距离,类间最近距离为两个类别之间样本距离的最小值 12 | : 'farthest': 类间最远距离,类间最远距离为两个类别之间样本距离的最小值 13 | : 'average' : 类间平均距离,类间平均距离为两个类别之间样本距离的平均值 14 | : 'centroid': 类间重心距离,类间重心距离为两个类别的重心/均值向量之间的距离 15 | : 'square' : 类间离差平方和距离,类间离差平方和距离=类别a的直径-类别b的直径-类别a,b的合并大类直径 16 | :type classdist: str 17 | :param sampledist: 指定系统聚类法中所使用的样本间距离的种类,取值范围为sampledist={'euc','mah','man','min','cos'},默认值为sampledist='euc'。若参数classdist=='square',则此参数不使用 18 | : 'euc': 样本间欧氏距离 19 | : 'mah': 样本间马氏距离 20 | : 'man': 样本间曼哈顿距离 21 | : 'min': 样本间闵可夫斯基距离 22 | : 'cos': 样本间余弦距离 23 | :type sampledist: str 24 | :param p: 闵可夫斯基距离的维数p,默认值为p=1。若参数classdist=='square'或者参数sampledist!='min',则此参数不使用 25 | :type p: int 26 | ''' 27 | self.__classdist=classdist # 类间距离的种类 28 | self.__sampledist=sampledist # 样本间距离的种类 29 | self.__p=p # 闵可夫斯基距离维数p 30 | 31 | return 32 | 33 | def fit(self, data, kind=2, rowvar=False): 34 | ''' 35 | :fit: 对样本数据集进行系统分类 36 | :param data: 原始样本矩阵或者数据集 37 | :type data: np.array 38 | :param rowvar: 指定每行代表一个变量或者每列代表一个变量;rowvar=True指定每行代表一个变量,即每列代表一个样本;rowvar=False指定每列代表一个变量,即每行代表一个样本。默认值为rowvar=False 39 | :type rowvar: bool 40 | :param kind: 指定分类中的类别数目,默认值为2 41 | :type kind: int 42 | :return: 分类结果,分类结果的形式为一个列表,该列表中包含若干个子列表,每个子列表代表一个类别,该子列表中含有该类别中的样本序号(样本序号为该样本向量在样本矩阵data中所在行下标或者列下标,从下标0开始) 43 | :rtype: list 44 | ''' 45 | # 1. 将原始样本矩阵统一转换为每行代表一个样本向量,每列代表一个变量 46 | if rowvar==True: 47 | data=data.T 48 | 49 | # 2. 初始情况下将每个样本向量作为一个类别 50 | size=np.shape(data)[0] # 样本向量数量 51 | count=np.shape(data)[1] # 样本向量维数 52 | classlist=[[i] for i in range(size)] # 初始情况下每个样本自身作为一个类别,classlist中仅存储样本的下标 53 | 54 | # 3. 正式进行聚类过程,每轮迭代的过程为: 首先计算各个类别之间的距离,然后取出两个距离最近的类别,将其合并为一个类别,一轮迭代结束,然后进行下一轮迭代,直到剩下的类别数量等于用户指定的类别数目 55 | while len(classlist)>kind: 56 | # 3.1 计算各个类别之间的距离 57 | # 附注: 提交实验时没有注意,假期中recheck时发现本处代码存在较大优化空间,后续更新中将进行优化,每次迭代后,由于仅仅合并了两个类别,因此其他的类别之间的距离无需计算,仅仅只需要计算合并后得到的新类别与其他类别之间的距离即可 58 | kind_count=len(classlist) 59 | dist=np.full((kind_count,kind_count),float('inf')) # 附注:不能初始化为全0矩阵,否则在后续矩阵求解min时会出现问题,因为不是所有的矩阵元素均用于储存距离 60 | for i in range(kind_count): 61 | for k in range(i+1,kind_count): # 附注: 注意下标处理,类别m和类别n之间的距离只需要计算1次 62 | if self.__classdist=='nearest': 63 | a=np.vstack([data[classlist[i][m]] for m in range(len(classlist[i]))]) 64 | b=np.vstack([data[classlist[k][m]] for m in range(len(classlist[k]))]) 65 | dist[i][k]=self.__nearest_distance(a,b,mode=self.__sampledist,p=self.__p) 66 | elif self.__classdist=='farthest': 67 | a=np.vstack([data[classlist[i][m]] for m in range(len(classlist[i]))]) 68 | b=np.vstack([data[classlist[k][m]] for m in range(len(classlist[k]))]) 69 | dist[i][k]=self.__farthest_distance(a,b,mode=self.__sampledist,p=self.__p) 70 | elif self.__classdist=='average': 71 | a=np.vstack([data[classlist[i][m]] for m in range(len(classlist[i]))]) 72 | b=np.vstack([data[classlist[k][m]] for m in range(len(classlist[k]))]) 73 | dist[i][k]=self.__average_distance(a,b,mode=self.__sampledist,p=self.__p) 74 | elif self.__classdist=='centroid': 75 | a=np.vstack([data[classlist[i][m]] for m in range(len(classlist[i]))]) 76 | b=np.vstack([data[classlist[k][m]] for m in range(len(classlist[k]))]) 77 | dist[i][k]=self.__centroid_distance(a,b,mode=self.__sampledist,p=self.__p) 78 | elif self.__classdist=='square': 79 | a=np.vstack([data[classlist[i][m]] for m in range(len(classlist[i]))]) 80 | b=np.vstack([data[classlist[k][m]] for m in range(len(classlist[k]))]) 81 | dist[i][k]=self.__square_distance(a,b) 82 | # 3.2 选择距离最小的两个类别进行合并,若有多对类别的距离相同,则随机选择一对类别进行合并 83 | tar=np.where(dist==np.min(dist)) #注意取min而非max 84 | a=min(np.random.choice(tar[0]),np.random.choice(tar[1])) # 待合并的类别较小下标a 85 | b=max(np.random.choice(tar[0]),np.random.choice(tar[1])) # 待合并的类别较大下标b 86 | temp=classlist[b] 87 | classlist.pop(b) # 注意列表顺序,先删除较大下标的类别b 88 | classlist[a]+=temp # 将类别b并入类别a 89 | 90 | return classlist 91 | 92 | def reset(self, classdist='nearest', sampledist='euc', p=1): 93 | ''' 94 | :reset: 重新设置标准聚类法类SCM的聚类距离种类 95 | :param classdist: 指定系统聚类法中所使用的类间距离的种类,取值范围为classdist={'nearest','farthest','average','centroid','square'},默认值为classdist='nearest' 96 | : 'nearest' : 类间最近距离,类间最近距离为两个类别之间样本距离的最小值 97 | : 'farthest': 类间最远距离,类间最远距离为两个类别之间样本距离的最小值 98 | : 'average' : 类间平均距离,类间平均距离为两个类别之间样本距离的平均值 99 | : 'centroid': 类间重心距离,类间重心距离为两个类别的重心/均值向量之间的距离 100 | : 'square' : 类间离差平方和距离,类间离差平方和距离=类别a的直径-类别b的直径-类别a,b的合并大类直径 101 | :type classdist: str 102 | :param sampledist: 指定系统聚类法中所使用的样本间距离的种类,取值范围为sampledist={'euc','mah','man','min','cos'},默认值为sampledist='euc'。若参数classdist=='square',则此参数不使用 103 | : 'euc': 样本间欧氏距离 104 | : 'mah': 样本间马氏距离 105 | : 'man': 样本间曼哈顿距离 106 | : 'min': 样本间闵可夫斯基距离 107 | : 'cos': 样本间余弦距离 108 | :type sampledist: str 109 | :param p: 闵可夫斯基距离的维数p,默认值为p=1。若参数classdist=='square'或者参数sampledist!='min',则此参数不使用 110 | :type p: int 111 | ''' 112 | self.__classdist=classdist # 类间距离的种类 113 | self.__sampledist=sampledist # 样本间距离的种类 114 | self.__p=p # 闵可夫斯基距离维数p 115 | 116 | return 117 | 118 | # private functions 119 | 120 | # 5种样本距离求解方法 121 | def __euc_distance(self, a, b): 122 | ''' 123 | :euc_distance: 求解欧氏距离 124 | :param a: 向量a 125 | :type a: np.array 126 | :param b: 向量b 127 | :type b: np.array 128 | :return -> 向量a和向量b的欧式距离 129 | :rtype: float 130 | ''' 131 | res=np.sqrt(np.sum(np.square(a-b))) 132 | return res 133 | 134 | def __mah_distance(self, a, b, cov_vec): 135 | ''' 136 | :mah_distance :求解马氏距离 137 | :comment: 该方法存在如下两种主要的调用方法: 138 | :1. 计算一个未分类向量到一个类别的马氏距离: 这时a为未分类向量,b为一个类别的均值向量,cov_vec为该类别的协方差矩阵,返回未分类变量a到某个类别的马氏距离 139 | :2. 计算同一个类别中两个样本向量的马氏距离: 这时a, b分别为属于同一类别的两个样本向量,cov_vec为该列别的协方差矩阵,返回同类别下的向量a, b间的马氏距离 140 | 141 | :type a: np.array 142 | :type b: np.array 143 | :type cov_vec: np.array 144 | :rtype: float 145 | ''' 146 | if cov_vec.ndim<2: #判断协方差矩阵是否为2维以上,若非2维以上,则无法求逆,因此无法求解马氏距离,这时直接返回欧氏距离 147 | return self.__euc_distance(a,b) 148 | rev_vec=np.linalg.pinv(cov_vec) # 求协方差矩阵的逆矩阵 149 | tmp=a-b # 行向量, tmp.T为列向量 150 | res=np.sqrt(np.dot(np.dot(tmp,rev_vec),tmp.T)) 151 | 152 | return res 153 | 154 | def __man_distance(self, a, b): 155 | ''' 156 | :man_distance: 求解曼哈顿距离 157 | :param a: 向量a 158 | :type a: np.array 159 | :param b: 向量b 160 | :type b: np.array 161 | :return -> 向量a和向量b的曼哈顿距离 162 | :rtype: float 163 | ''' 164 | res=np.sum(np.abs(a-b)) 165 | return res 166 | 167 | def __min_distance(self, a, b, p): 168 | ''' 169 | :min_distance: 求解闵可夫斯基距离 170 | :param a: 向量a 171 | :type a: np.array 172 | :param b: 向量b 173 | :type b: np.array 174 | :param p: 闵科夫斯基距离的维数p 175 | :type p: int 176 | :return -> 向量a和向量b的闵可夫斯基距离 177 | :rtype: float 178 | ''' 179 | res=np.power(np.sum(np.power(np.abs(a-b),p)),1/p) 180 | return res 181 | 182 | def __cos_distance(self, a, b): 183 | ''' 184 | :standard_euc_distance: 求解余弦距离 185 | :param a: 向量a 186 | :type a: np.array 187 | :param b: 向量b 188 | :type b: np.array 189 | :return -> 向量a和向量b的余弦距离 190 | :rtype: float 191 | ''' 192 | res1=np.sum(np.multiply(a,b)) 193 | res2=np.sqrt(np.sum(np.square(a)))*np.sqrt(np.sum(np.square(b))) 194 | res=res1/res2 195 | return res 196 | 197 | # 5种类间距离计算方法 198 | def __nearest_distance(self, a, b, mode='euc', p=1): 199 | ''' 200 | :__nearest_distance: 求解类间的最短距离。类间的最短距离定义为两个类的样本之间的距离的最小值 201 | :param a: 类别a的样本矩阵,矩阵a的每一行代表类别中的一个样本向量 202 | :type a: np.array 203 | :param b: 类别b的样本矩阵,矩阵b的每一行代表类别中的一个样本向量 204 | :type b: np.array 205 | :param mode: 指定样本之间的距离定义,取值范围为mode={'euc','mah','man','min','cos'} 206 | :type mode: str 207 | :param p: 闵可夫斯基距离的维数p,若不使用闵可夫斯基距离,即mode!='min',则该参数可省略,默认值为p=1 208 | :type p: int 209 | :return: 类别a和类别b之间的最短距离 210 | :rtype: float 211 | ''' 212 | flag=0 213 | if a.ndim==1: # 处理向量的一维情况的方法均为向量矩阵化 214 | a=np.array([a]) 215 | if b.ndim==1: 216 | b=np.array([b]) 217 | flag=1 218 | 219 | size1=np.shape(a)[0] # 类别a的样本数量 220 | size2=np.shape(b)[0] # 类别b的样本数量 221 | 222 | res=float('inf') # 初始值为正无穷 223 | 224 | if mode=='euc': 225 | for i in range(size1): 226 | for k in range(size2): 227 | tempres=self.__euc_distance(a[i],b[k]) 228 | res=tempres if tempresres else res 287 | elif mode=='mah': 288 | for i in range(size1): 289 | for k in range(size2): 290 | # 计算类别b的协方差矩阵: 若只有一个样本,则协方差矩阵为全0阵 291 | cov_vec=np.cov(b,rowvar=False) 292 | avg=np.average(b,axis=0) # 计算类别b的均值向量 293 | tempres=self.__mah_distance(a[i],avg,cov_vec) 294 | res=tempres if tempres>res else res 295 | elif mode=='man': 296 | for i in range(size1): 297 | for k in range(size2): 298 | tempres=self.__man_distance(a[i],b[k]) 299 | res=tempres if tempres>res else res 300 | elif mode=='min': 301 | for i in range(size1): 302 | for k in range(size2): 303 | tempres=self.__min_distance(a[i],b[k],p) 304 | res=tempres if tempres>res else res 305 | else: 306 | for i in range(size1): 307 | for k in range(size2): 308 | # 余弦距离范围为[-1,1],余弦距离为1表示两个类别相近,余弦距离为-1表示两个类别完全相远 309 | # 因此使用余弦差=1-余弦距离表征类别之间的距离,范围为[0,2],余弦差越小表示类别之间越相近,否则类别之间越相远 310 | tempres=1-self.__cos_distance(a[i],b[k]) 311 | res=tempres if tempres>res else res 312 | return res 313 | 314 | def __average_distance(self, a, b, mode='euc', p=1): 315 | ''' 316 | :__average_distance: 求解类间的平均距离。类间的平均距离定义为所有两个类的样本之间的距离的平均值 317 | :param a: 类别a的样本矩阵,矩阵a的每一行代表类别中的一个样本向量 318 | :type a: np.array 319 | :param b: 类别b的样本矩阵,矩阵b的每一行代表类别中的一个样本向量 320 | :type b: np.array 321 | :param mode: 指定样本之间的距离定义,取值范围为mode={'euc','mah','man','min','cos'} 322 | :type mode: str 323 | :param p: 闵可夫斯基距离的维数p,若不使用闵可夫斯基距离,即mode!='min',则该参数可省略,默认值为p=1 324 | :type p: int 325 | :return: 类别a和类别b之间的平均距离 326 | :rtype: float 327 | ''' 328 | flag=0 329 | if a.ndim==1: # 处理向量的一维情况的方法均为向量矩阵化 330 | a=np.array([a]) 331 | if b.ndim==1: 332 | b=np.array([b]) 333 | flag=1 334 | 335 | size1=np.shape(a)[0] # 类别a的样本数量 336 | size2=np.shape(b)[0] # 类别b的样本数量 337 | 338 | res=0.0 #初始值为0.0 339 | 340 | if mode=='euc': 341 | for i in range(size1): 342 | for k in range(size2): 343 | tempres=self.__euc_distance(a[i],b[k]) 344 | res+=tempres 345 | elif mode=='mah': 346 | for i in range(size1): 347 | for k in range(size2): 348 | # 计算类别b的协方差矩阵: 若只有一个样本,则协方差矩阵为全0阵 349 | cov_vec=np.cov(b,rowvar=False) 350 | avg=np.average(b,axis=0) #计算类别b的均值向量 351 | tempres=self.__mah_distance(a[i],avg,cov_vec) 352 | res+=tempres 353 | elif mode=='man': 354 | for i in range(size1): 355 | for k in range(size2): 356 | tempres=self.__man_distance(a[i],b[k]) 357 | res+=tempres 358 | elif mode=='min': 359 | for i in range(size1): 360 | for k in range(size2): 361 | tempres=self.__min_distance(a[i],b[k],p) 362 | res+=tempres 363 | else: 364 | for i in range(size1): 365 | for k in range(size2): 366 | # 余弦距离范围为[-1,1],余弦距离为1表示两个类别相近,余弦距离为-1表示两个类别完全相远 367 | # 因此使用余弦差=1-余弦距离表征类别之间的距离,范围为[0,2],余弦差越小表示类别之间越相近,否则类别之间越相远 368 | tempres=1-self.__cos_distance(a[i],b[k]) 369 | res+=tempres 370 | 371 | res=res/(size1*size2) 372 | return res 373 | 374 | def __centroid_distance(self, a, b, mode='euc', p=1): 375 | ''' 376 | :__centroid_distance: 求解类间的重心距离。类间的平均距离定义为两个类的样本的均值向量之间的距离 377 | :param a: 类别a的样本矩阵,矩阵a的每一行代表类别中的一个样本向量 378 | :type a: np.array 379 | :param b: 类别b的样本矩阵,矩阵b的每一行代表类别中的一个样本向量 380 | :type b: np.array 381 | :param mode: 指定样本之间的距离定义,取值范围为mode={'euc','mah','man','min','cos'} 382 | :type mode: str 383 | :param p: 闵可夫斯基距离的维数p,若不使用闵可夫斯基距离,即mode!='min',则该参数可省略,默认值为p=1 384 | :type p: int 385 | :return: 类别a和类别b之间的重心距离 386 | :rtype: float 387 | ''' 388 | if a.ndim==1: # 处理向量的一维情况的方法均为向量矩阵化 389 | a=np.array([a]) 390 | if b.ndim==1: 391 | b=np.array([b]) 392 | 393 | res=0.0 #初始值为0.0 394 | avg_a=np.average(a,axis=0) #类别a的重心/均值向量 395 | avg_b=np.average(b,axis=0) #类别b的重心/均值向量 396 | 397 | if mode=='euc': 398 | res=self.__euc_distance(avg_a,avg_b) 399 | elif mode=='mah': 400 | cov_vec=np.cov(b,rowvar=False) #计算类别b的协方差矩阵 401 | res=self.__mah_distance(avg_a,avg_b,cov_vec) 402 | elif mode=='man': 403 | res=self.__man_distance(avg_a,avg_b) 404 | elif mode=='min': 405 | res=self.__min_distance(avg_a,avg_b,p) 406 | else: 407 | # 余弦距离范围为[-1,1],余弦距离为1表示两个类别相近,余弦距离为-1表示两个类别完全相远 408 | # 因此使用余弦差=1-余弦距离表征类别之间的距离,范围为[0,2],余弦差越小表示类别之间越相近,否则类别之间越相远 409 | res=1-self.__cos_distance(avg_a,avg_b) 410 | 411 | return res 412 | 413 | def __square_distance(self, a, b): 414 | ''' 415 | :__square_distance: 求解类间的离差平方和距离。类间的离差平方和距离的定义请参见wiki 416 | :param a: 类别a的样本矩阵,矩阵a的每一行代表类别中的一个样本向量 417 | :type a: np.array 418 | :param b: 类别b的样本矩阵,矩阵b的每一行代表类别中的一个样本向量 419 | :type b: np.array 420 | :return: 类别a和类别b之间的离差平方和距离 421 | :rtype: float 422 | ''' 423 | if a.ndim==1: # 处理向量的一维情况的方法均为向量矩阵化 424 | a=np.array([a]) 425 | if b.ndim==1: 426 | b=np.array([b]) 427 | 428 | size1=np.shape(a)[0] # 类别a的样本数量 429 | size2=np.shape(b)[0] # 类别b的样本数量 430 | 431 | avg_a=np.average(a,axis=0) #类别a的重心/均值向量 432 | avg_b=np.average(b,axis=0) #类别b的重心/均值向量 433 | avg_all=np.average(np.vstack([a,b]),axis=0) #类别a和类别b的大类重心/均值向量 434 | 435 | res=0 #最终离差平方和距离结果 436 | 437 | # 1. 分别计算类别a和类别b的直径 438 | da=0.0 439 | for i in range(size1): 440 | da+=np.dot(a[i]-avg_a,(a[i]-avg_a).T) 441 | 442 | db=0.0 443 | for i in range(size2): 444 | db+=np.dot(b[i]-avg_b,(b[i]-avg_b).T) 445 | 446 | # 2. 计算类别a和类别b的大类直径 447 | dab=0.0 448 | for i in range(size1): 449 | dab+=np.dot(a[i]-avg_all,(a[i]-avg_all).T) 450 | for i in range(size2): 451 | dab+=np.dot(b[i]-avg_all,(b[i]-avg_all).T) 452 | 453 | res=dab-da-db 454 | return res 455 | 456 | 457 | 458 | 459 | 460 | 461 | 462 | -------------------------------------------------------------------------------- /SCM/example.py: -------------------------------------------------------------------------------- 1 | import SCM 2 | import numpy as np 3 | 4 | if __name__ == "__main__": 5 | 6 | data=np.array([[1,3,5,7,9],[2,4,6,8,10],[1,4,5,8,9], 7 | [1100,1300,1500,1700,1900],[1200,1400,1600,1800,2000],[1100,1400,1500,1800,1900], 8 | [11000,13000,15000,17000,19000],[12000,14000,16000,18000,20000],[11000,14000,15000,18000,19000] 9 | ]) #12个样本,明显属于3类 10 | 11 | # 1. 欧氏距离 12 | 13 | print("欧氏距离测试: ") 14 | 15 | scm=SCM.SCM(classdist='nearest', sampledist='euc') 16 | res=scm.fit(data,kind=3) 17 | print("样本间欧氏距离+类间最近距离: res = ", res) 18 | 19 | scm=SCM.SCM(classdist='farthest', sampledist='euc') 20 | res=scm.fit(data,kind=3) 21 | print("样本间欧氏距离+类间最远距离: res = ", res) 22 | 23 | scm=SCM.SCM(classdist='average', sampledist='euc') 24 | res=scm.fit(data,kind=3) 25 | print("样本间欧氏距离+类间平均距离: res = ", res) 26 | 27 | scm=SCM.SCM(classdist='centroid', sampledist='euc') 28 | res=scm.fit(data,kind=3) 29 | print("样本间欧氏距离+类间重心距离: res = ", res) 30 | 31 | scm=SCM.SCM(classdist='square', sampledist='euc') 32 | res=scm.fit(data,kind=3) 33 | print("样本间欧氏距离+离差平方距离: res = ", res) 34 | 35 | print("") 36 | print("") 37 | 38 | # 2. 马氏距离 39 | 40 | print("马氏距离测试: ") 41 | 42 | scm=SCM.SCM(classdist='nearest', sampledist='mah') 43 | res=scm.fit(data,kind=3) 44 | print("样本间马氏距离+类间最近距离: res = ", res) 45 | 46 | scm=SCM.SCM(classdist='farthest', sampledist='mah') 47 | res=scm.fit(data,kind=3) 48 | print("样本间马氏距离+类间最远距离: res = ", res) 49 | 50 | scm=SCM.SCM(classdist='average', sampledist='mah') 51 | res=scm.fit(data,kind=3) 52 | print("样本间马氏距离+类间平均距离: res = ", res) 53 | 54 | scm=SCM.SCM(classdist='centroid', sampledist='mah') 55 | res=scm.fit(data,kind=3) 56 | print("样本间马氏距离+类间重心距离: res = ", res) 57 | 58 | scm=SCM.SCM(classdist='square', sampledist='mah') 59 | res=scm.fit(data,kind=3) 60 | print("样本间马氏距离+离差平方距离: res = ", res) 61 | 62 | print("") 63 | print("") 64 | 65 | # 3. 曼哈顿距离 66 | 67 | print("曼哈顿距离测试: ") 68 | 69 | scm=SCM.SCM(classdist='nearest', sampledist='man') 70 | res=scm.fit(data,kind=3) 71 | print("样本间曼哈顿距离+类间最近距离: res = ", res) 72 | 73 | scm=SCM.SCM(classdist='farthest', sampledist='man') 74 | res=scm.fit(data,kind=3) 75 | print("样本间曼哈顿距离+类间最远距离: res = ", res) 76 | 77 | scm=SCM.SCM(classdist='average', sampledist='man') 78 | res=scm.fit(data,kind=3) 79 | print("样本间曼哈顿距离+类间平均距离: res = ", res) 80 | 81 | scm=SCM.SCM(classdist='centroid', sampledist='man') 82 | res=scm.fit(data,kind=3) 83 | print("样本间曼哈顿距离+类间重心距离: res = ", res) 84 | 85 | scm=SCM.SCM(classdist='square', sampledist='man') 86 | res=scm.fit(data,kind=3) 87 | print("样本间曼哈顿距离+离差平方距离: res = ", res) 88 | 89 | print("") 90 | print("") 91 | 92 | # 4. 闵可夫斯基距离 93 | 94 | print("闵科夫斯基距离测试,维数 = 2: ") 95 | 96 | scm=SCM.SCM(classdist='nearest', sampledist='min', p=2) 97 | res=scm.fit(data,kind=3) 98 | print("样本间闵可夫斯基距离+类间最近距离: res = ", res) 99 | 100 | scm=SCM.SCM(classdist='farthest', sampledist='min', p=2) 101 | res=scm.fit(data,kind=3) 102 | print("样本间闵可夫斯基距离+类间最远距离: res = ", res) 103 | 104 | scm=SCM.SCM(classdist='average', sampledist='min', p=2) 105 | res=scm.fit(data,kind=3) 106 | print("样本间闵可夫斯基距离+类间平均距离: res = ", res) 107 | 108 | scm=SCM.SCM(classdist='centroid', sampledist='min', p=2) 109 | res=scm.fit(data,kind=3) 110 | print("样本间闵可夫斯基距离+类间重心距离: res = ", res) 111 | 112 | scm=SCM.SCM(classdist='square', sampledist='min', p=2) 113 | res=scm.fit(data,kind=3) 114 | print("样本间闵可夫斯基距离+离差平方距离: res = ", res) 115 | 116 | print("") 117 | print("") 118 | 119 | # 5. 余弦距离 120 | 121 | print("余弦距离测试,维数 = 3: ") 122 | 123 | scm=SCM.SCM(classdist='nearest', sampledist='cos') 124 | res=scm.fit(data,kind=3) 125 | print("样本间余弦距离+类间最近距离: res = ", res) 126 | 127 | scm=SCM.SCM(classdist='farthest', sampledist='cos') 128 | res=scm.fit(data,kind=3) 129 | print("样本间余弦距离+类间最远距离: res = ", res) 130 | 131 | scm=SCM.SCM(classdist='average', sampledist='cos') 132 | res=scm.fit(data,kind=3) 133 | print("样本间余弦距离+类间平均距离: res = ", res) 134 | 135 | scm=SCM.SCM(classdist='centroid', sampledist='cos') 136 | res=scm.fit(data,kind=3) 137 | print("样本间余弦距离+类间重心距离: res = ", res) 138 | 139 | scm=SCM.SCM(classdist='square', sampledist='cos') 140 | res=scm.fit(data,kind=3) 141 | print("样本间余弦距离+离差平方距离: res = ", res) 142 | 143 | print("") 144 | print("") -------------------------------------------------------------------------------- /SCM/example2.py: -------------------------------------------------------------------------------- 1 | import SCM 2 | import numpy as np 3 | 4 | def replace(lis,dic): 5 | for i in range(len(lis)): 6 | for k in range(len(lis[i])): 7 | lis[i][k]=dic[lis[i][k]] 8 | return lis 9 | 10 | if __name__ == "__main__": 11 | 12 | data=np.array([[1772.14,568.25,298.66,352.20,307.21,490.83,364.28,202.50], # 辽宁 # 原始样本矩阵 13 | [2752.25,569.95,662.31,541.06,623.05,917.23,599.98,354.39], # 浙江 14 | [1386.76,460.99,312.97,280.78,246.24,407.26,547.19,188.52], # 河南 15 | [1552.77,517.16,402.03,272.44,265.29,563.10,302.27,251.41], # 甘肃 16 | [1711.03,458.57,334.91,307.24,297.72,495.34,274.48,306.45]]) # 青海 # 总计5个样本 17 | 18 | dic={0:'辽宁',1:'浙江',2:'河南',3:'甘肃',4:'青海'} 19 | 20 | scm=SCM.SCM(classdist='nearest', sampledist='euc') 21 | 22 | res=scm.fit(data,kind=5) # 获得分类后的样本下标 23 | res=replace(res,dic) # 替换样本下标为样本标签名 24 | print("分为5类结果 res = :",res) 25 | 26 | res=scm.fit(data,kind=4) # 获得分类后的样本下标 27 | res=replace(res,dic) # 替换样本下标为样本标签名 28 | print("分为4类结果 res = :",res) 29 | 30 | res=scm.fit(data,kind=3) # 获得分类后的样本下标 31 | res=replace(res,dic) # 替换样本下标为样本标签名 32 | print("分为3类结果 res = :",res) 33 | 34 | res=scm.fit(data,kind=2) # 获得分类后的样本下标 35 | res=replace(res,dic) # 替换样本下标为样本标签名 36 | print("分为2类结果 res = :",res) 37 | 38 | res=scm.fit(data,kind=1) # 获得分类后的样本下标 39 | res=replace(res,dic) # 替换样本下标为样本标签名 40 | print("分为1类结果 res = :",res) 41 | 42 | -------------------------------------------------------------------------------- /SCM/readme.md: -------------------------------------------------------------------------------- 1 | # SCM系统聚类法模块 2 | 3 | SCM系统聚类法模块包含系统聚类算法的实现。 4 | 5 | 6 | ## 1. 引用头文件"SCM.py" 7 | import SCM 8 | 9 | ## 2. 创建一个SCM对象 10 | > 0. 函数原型 11 | 12 | def __init__(self, classdist='nearest', sampledist='euc', p=1) 13 | 14 | > 1. 创建一个SCM对象最多需要三个参数。 15 | > 2. 第一个参数classdist指定系统聚类法中所使用的类间距离的种类,类型为str。取值范围为classdist={'nearest','farthest','average','centroid','square'},默认值为classdist='nearest',参数的取值的具体含义如下所示: 16 | >> + 'nearest' : 类间最近距离,类间最近距离为两个类别之间样本距离的最小值 17 | >> + 'farthest': 类间最远距离,类间最远距离为两个类别之间样本距离的最小值 18 | >> + 'average' : 类间平均距离,类间平均距离为两个类别之间样本距离的平均值 19 | >> + 'centroid': 类间重心距离,类间重心距离为两个类别的重心/均值向量之间的距离 20 | >> + 'square' : 类间离差平方和距离,类间离差平方和距离=类别a的直径-类别b的直径-类别a,b的合并大类直径 21 | > 3. 第二个参数sampledist指定系统聚类法中所使用的样本间距离的种类,类型为str,取值范围为sampledist={'euc','mah','man','min','cos'},默认值为sampledist='euc'。若参数classdist=='square',则此参数不使用 22 | >> + 'euc': 样本间欧氏距离 23 | >> + 'mah': 样本间马氏距离 24 | >> + 'man': 样本间曼哈顿距离 25 | >> + 'min': 样本间闵可夫斯基距离 26 | >> + 'cos': 样本间余弦距离 27 | > 4. 第三个参数p为闵可夫斯基距离的维数p,类型为int。默认值为p=1。若参数classdist=='square'或者参数sampledist!='min',则此参数不被使用,可以保持缺省。 28 | 29 | scm=SCM.SCM(classdist='nearest', sampledist='euc') 30 | 31 | ## 3. 更改系统聚类的距离参数 32 | > 0. 函数原型 33 | 34 | def reset(self, classdist='nearest', sampledist='euc', p=1) 35 | 36 | > 1. 即使在已经创建了一个SCM对象的情况下,仍然可以通过reset成员方法重新设置类间距离以及样本间距离的种类,该方法同样接受三个参数,且三个参数与初始化时相同,请参见上述创建SCM队对象部分的参数介绍。 37 | 38 | scm.reset(classdist='nearest' ,sampledist='mah') 39 | 40 | ## 4. 进行系统聚类 41 | > 0. 函数原型 42 | 43 | def fit(self, data, kind=2, rowvar=False) 44 | 45 | > 1. 使用fit成员方法进行系统聚类。 46 | > 2. 第一个参数data为原始样本矩阵或者数据集,类型为np.array。 47 | > 3. 第二个参数kind指定类别数目,类型为int。即指定样本向量需要被分为多少类,该值必须小于或者等于样本的数量。 48 | > 4. 第三个参数rowvar指定每行代表一个变量或者每列代表一个变量,类型为bool。rowvar=True指定每行代表一个变量,即每列代表一个样本;rowvar=False指定每列代表一个变量,即每行代表一个样本。默认值为rowvar=False。 49 | > 5. 返回值为分类结果,类型为list。分类结果的形式为一个列表,该列表中包含若干个子列表,每个子列表代表一个类别,该子列表中含有该类别中的样本序号(样本序号为该样本向量在样本矩阵data中所在行下标或者列下标,从下标0开始)。 50 | 51 | data=np.array([[1,3,5,7,9],[2,4,6,8,10],[1,4,5,8,9], 52 | [1100,1300,1500,1700,1900],[1200,1400,1600,1800,2000],[1100,1400,1500,1800,1900], 53 | [11000,13000,15000,17000,19000],[12000,14000,16000,18000,20000],[11000,14000,15000,18000,19000] 54 | ]) # 12个样本,明显属于3类 55 | scm=SCM.SCM(classdist='nearest', sampledist='euc') # 指定样本间距离使用欧氏距离,类别间距离使用最近距离 56 | res=scm.fit(data,kind=3 # 指定将样本分为3类 57 | print("样本间欧氏距离+类间最近距离: res = ", res) 58 | 59 | >>> 输出 60 | 样本间欧氏距离+类间最近距离: res = [[0, 2, 1], [3, 5, 4], [6, 8, 7]] #表明下标为0,2,1的样本向量为一类;3,5,4的为一类;6,8,7的为一类。 61 | 62 | ## 下面给出该系统聚类法模块的两个典型使用示例。 63 | ## 5. 使用案例1 64 | 65 | 题目:在系统聚类法中,在系统聚类法中使用不同的类别间距离和样本间距离组合对下面的数据集矩阵进行分类。已知该数据集矩阵的每一行代表一个样本,且这些样本属于3个类别。 66 | 67 | data=np.array([[1,3,5,7,9],[2,4,6,8,10],[1,4,5,8,9], 68 | [1100,1300,1500,1700,1900],[1200,1400,1600,1800,2000],[1100,1400,1500,1800,1900], 69 | [11000,13000,15000,17000,19000],[12000,14000,16000,18000,20000],[11000,14000,15000,18000,19000] 70 | ]) #12个样本,明显属于3类 71 | 该题目的使用本模块的解答代码文件请参见本文件夹下的example.py。 72 | 73 | 执行example.py,输出结果如下所示: 74 | 75 | >>> 输出 76 | 欧氏距离测试: 77 | 样本间欧氏距离+类间最近距离: res = [[0, 2, 1], [3, 5, 4], [6, 8, 7]] 78 | 样本间欧氏距离+类间最远距离: res = [[0, 2, 1], [3, 5, 4], [6, 8, 7]] 79 | 样本间欧氏距离+类间平均距离: res = [[0, 2, 1], [3, 5, 4], [6, 8, 7]] 80 | 样本间欧氏距离+类间重心距离: res = [[0, 2, 1], [3, 5, 4], [6, 8, 7]] 81 | 样本间欧氏距离+离差平方距离: res = [[0, 2, 1], [3, 5, 4], [6, 8, 7]] 82 | 83 | 84 | 马氏距离测试: 85 | 样本间马氏距离+类间最近距离: res = [[0, 2, 1, 3, 5, 4], [6, 8], [7]] 86 | 样本间马氏距离+类间最远距离: res = [[0, 2, 1, 3, 5], [4, 6, 8], [7]] 87 | 样本间马氏距离+类间平均距离: res = [[0, 2, 1, 3, 5], [4, 6, 8], [7]] 88 | 样本间马氏距离+类间重心距离: res = [[0, 2, 1, 3, 5], [4, 6, 8], [7]] 89 | 样本间马氏距离+离差平方距离: res = [[0, 2, 1], [3, 5, 4], [6, 8, 7]] 90 | 91 | 92 | 曼哈顿距离测试: 93 | 样本间曼哈顿距离+类间最近距离: res = [[0, 2, 1], [3, 5, 4], [6, 8, 7]] 94 | 样本间曼哈顿距离+类间最远距离: res = [[0, 2, 1], [3, 5, 4], [6, 8, 7]] 95 | 样本间曼哈顿距离+类间平均距离: res = [[0, 2, 1], [3, 5, 4], [6, 8, 7]] 96 | 样本间曼哈顿距离+类间重心距离: res = [[0, 2, 1], [3, 5, 4], [6, 8, 7]] 97 | 样本间曼哈顿距离+离差平方距离: res = [[0, 2, 1], [3, 5, 4], [6, 8, 7]] 98 | 99 | 100 | 闵科夫斯基距离测试,维数 = 2: 101 | 样本间闵可夫斯基距离+类间最近距离: res = [[0, 2, 1], [3, 5, 4], [6, 8, 7]] 102 | 样本间闵可夫斯基距离+类间最远距离: res = [[0, 2, 1], [3, 5, 4], [6, 8, 7]] 103 | 样本间闵可夫斯基距离+类间平均距离: res = [[0, 2, 1], [3, 5, 4], [6, 8, 7]] 104 | 样本间闵可夫斯基距离+类间重心距离: res = [[0, 2, 1], [3, 5, 4], [6, 8, 7]] 105 | 样本间闵可夫斯基距离+离差平方距离: res = [[0, 2, 1], [3, 5, 4], [6, 8, 7]] 106 | 107 | 108 | 余弦距离测试: 109 | 样本间余弦距离+类间最近距离: res = [[0, 1], [2], [3, 6, 4, 7, 5, 8]] 110 | 样本间余弦距离+类间最远距离: res = [[0, 1], [2], [3, 4, 7, 5, 6, 8]] 111 | 样本间余弦距离+类间平均距离: res = [[0, 1], [2], [3, 8, 5, 4, 7, 6]] 112 | 样本间余弦距离+类间重心距离: res = [[0, 1], [2], [3, 5, 6, 4, 7, 8]] 113 | 样本间余弦距离+离差平方距离: res = [[0, 2, 1], [3, 5, 4], [6, 8, 7]] 114 | 115 | 可见,原矩阵data的第0,1,2行的样本向量为一类,第3,4,5行的样本向量为一类,第6,7,8行的样本向量为一类。在多种距离度量下,样本间距离使用欧氏距离,类间距离使用离差平方和矩阵的效果较好。 116 | 117 | ## 6. 使用案例2 118 | 119 | 题目: 120 | 121 | 122 | 样本间距离使用欧氏距离,类间距离使用最近距离,对上述省份数据应用系统聚类法,分别将5个省份的数据样本分为5类,4类,3类,2类,1类。 123 | 124 | 该题目的使用本模块的解答代码文件请参见本文件夹下的example2.py。 125 | 126 | 执行example2.py,输出结果如下所示: 127 | 128 | >>> 输出 129 | 分为5类结果 res = : [['辽宁'], ['浙江'], ['河南'], ['甘肃'], ['青海']] 130 | 分为4类结果 res = : [['辽宁', '青海'], ['浙江'], ['河南'], ['甘肃']] 131 | 分为3类结果 res = : [['辽宁', '青海', '甘肃'], ['浙江'], ['河南']] 132 | 分为2类结果 res = : [['辽宁', '青海', '甘肃', '河南'], ['浙江']] 133 | 分为1类结果 res = : [['辽宁', '青海', '甘肃', '河南', '浙江']] 134 | -------------------------------------------------------------------------------- /SCM/图片1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Happyxianyueveryday/statslibrary/01494043bc7fb82d4aa6d7d550a4e7dc2ac0503a/SCM/图片1.png -------------------------------------------------------------------------------- /VarAnaly/VarAnaly.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | 3 | class VarAnaly: 4 | 5 | def single_anova(self, data, rowvar=False): 6 | ''' 7 | :single_anova: 单因素方差分析 8 | :param data: 样本集矩阵,因素的不同水平的样本数必须相同 9 | :type data: np.array 10 | :param rowvar: 指定每一行或者每一列作为因素的不同水平;rowvar=True指定每一行代表因素的一个水平,即每一列作为一个观察结果;rowvar=False指定每一列代表因素的一个水平,即每一行作为一个观察结果,默认rowvar=True 11 | :type rowvar: bool 12 | :return: 元组形式的方差分析试验表:(sa,se,st,fa,fe,ft,_sa,_se,f) 13 | : sa=该单因素平方和,se=误差平方和,st=平方和总和=sa+se 14 | : fa=因素自由度,fe=误差自由度,ft=自由度总和=fa+fe 15 | : _sa=因素均方,_se=误差均方 16 | : f=F比=_sa/_se 17 | :rtype: tuple 18 | ''' 19 | # 1. 将原始数据统一变换为每一行代表因素的一个水平,即每一列作为一个观察结果的情况 20 | if rowvar==False: 21 | data=data.T 22 | 23 | # 2. 计算各个样本的均值 24 | size=np.shape(data)[0] 25 | count=np.shape(data)[1] 26 | 27 | avg=[] #同水平下的样本均值 28 | for i in range(size): 29 | avg.append(np.average(data[i])) 30 | avg=np.array(avg) 31 | avg_all=np.average(avg) #数据总平均数 32 | avg_all=np.array([avg_all for i in range(count)]) #总体平均数向量化 33 | 34 | # 3. 计算因素平方和sa,误差平方和se与平方和总和st 35 | sa=0.0 36 | se=0.0 37 | for i in range(size): 38 | se+=np.sum(np.square(np.add(data[i],-avg[i]))) 39 | sa+=np.sum(np.square(np.add(avg[i],-avg_all))) 40 | st=sa+se 41 | 42 | # 4. 计算因素自由度fa,误差自由度fe,自由度总和ft 43 | fa=size-1 44 | fe=count*size-size 45 | ft=fa+fe 46 | 47 | # 5. 计算因素均方_sa,误差均方_se,F比f 48 | _sa=sa/fa 49 | _se=se/fe 50 | f=_sa/_se 51 | 52 | return (sa,se,st,fa,fe,ft,_sa,_se,f) 53 | 54 | def double_anova(self, data): 55 | ''' 56 | :single_anova: 双因素方差分析 57 | :param data: 因素A和因素B的样本集矩阵 58 | : 矩阵data格式: 假设因素A有r种水平,因素B有s种水平,则data为r×s矩阵,每一行代表因素A的一种水平,每一列代表因素B的一种水平,矩阵中的每个元素是一个 59 | : 矩阵data示例: np.array([[[58.2,52.6],[56.2,41.2],[65.3,60.8]], [[49.1,42.8],[54.1,50.5],[51.6,48.4]], [[60.1,58.3],[70.9,73.2],[39.2,40.7]], [[75.8,71.5],[58.2,51.0],[48.7,41.4]]]) 60 | :type data: np.array 61 | :return: 元组形式的方差分析试验表:(sa,sb,sab,se,st,ta,tb,tab,te,tt,_sa,_sb,_sab,_se,fa,fb,fab) 62 | : sa=因素A平方和,sb=因素B平方和,sab=交互作用平方和,se=误差平方和,st=平方和总和=sa+sb+sab+se 63 | : ta=因素A自由度,tb=因素B自由度,tab=交互作用自由度,te=误差自由度,tt=自由度总和=ta+tb+tab+te 64 | : _sa=因素A均方,_sb=因素B均方,_sab=交互作用均方,_se=误差均方 65 | : fa=因素A的F比,fb=因素B的F比,fab=交互作用F比 66 | :rtype: tuple 67 | ''' 68 | # 1. 计算过程中所需的样本均值 69 | size1=np.shape(data)[0] # 因素A的水平总数r 70 | size2=np.shape(data)[1] # 因素B的水平总数s 71 | count=np.shape(data)[2] # 每对因素取值(A,B)=(a,b)的观察数/样本数 72 | 73 | avg=np.zeros((size1,size2)) #因素A和因素B每个水平组合(每格)的均值_Xij 74 | factor_avg_1=[] #因素A每个水平(每行)的均值_Xi 75 | factor_avg_2=[] #因素B每个水平(每列)的均值_Xj 76 | total_avg=0 #样本总体均值_X 77 | 78 | for i in range(size1): 79 | for k in range(size2): 80 | avg[i][k]=np.average(data[i][k]) 81 | 82 | for i in range(size1): 83 | factor_avg_1.append(np.average(data[i])) 84 | factor_avg_1=np.array(factor_avg_1) 85 | 86 | for i in range(size2): 87 | factor_avg_2.append(np.average(data[:,i])) 88 | factor_avg_2=np.array(factor_avg_2) 89 | 90 | total_avg=np.average(data) 91 | a_total_avg=np.array([total_avg for i in range(size1)]) 92 | b_total_avg=np.array([total_avg for i in range(size2)]) 93 | 94 | # 2. 计算sa=因素A平方和,sb=因素B平方和,sab=交互作用平方和,se=误差平方和,st=平方和总和=sa+sb+sab+se 95 | sa=size2*count*np.sum(np.square(factor_avg_1-a_total_avg)) 96 | sb=size1*count*np.sum(np.square(factor_avg_2-b_total_avg)) 97 | 98 | sab=0 99 | for i in range(size1): 100 | for j in range(size2): 101 | sab+=count*np.square(avg[i][j]-factor_avg_1[i]-factor_avg_2[j]+total_avg) 102 | 103 | se=0 104 | for i in range(size1): 105 | for j in range(size2): 106 | for k in range(count): 107 | se+=np.sum(np.square(data[i][j][k]-avg[i][j])) 108 | 109 | st=sa+sb+sab+se 110 | 111 | # 3. 计算ta=因素A自由度,tb=因素B自由度,tab=交互作用自由度,te=误差自由度,tt=自由度总和=ta+tb+tab+te 112 | ta=size1-1 113 | tb=size2-1 114 | tab=(size1-1)*(size2-1) 115 | te=size1*size2*(count-1) 116 | tt=ta+tb+tab+te 117 | 118 | # 4. 计算_sa=因素A均方,_sb=因素B均方,_sab=交互作用均方,_se=误差均方 119 | _sa=sa/ta 120 | _sb=sb/tb 121 | _sab=sab/tab 122 | _se=se/te 123 | 124 | # 5. 计算fa=因素A的F比,fb=因素B的F比,fab=交互作用F比 125 | fa=_sa/_se 126 | fb=_sb/_se 127 | fab=_sab/_se 128 | 129 | return (sa,sb,sab,se,st,ta,tb,tab,te,tt,_sa,_sb,_sab,_se,fa,fb,fab) 130 | 131 | 132 | 133 | 134 | 135 | 136 | 137 | 138 | 139 | 140 | 141 | 142 | 143 | 144 | 145 | 146 | 147 | -------------------------------------------------------------------------------- /VarAnaly/example.py: -------------------------------------------------------------------------------- 1 | import VarAnaly as va 2 | import numpy as np 3 | from scipy.stats import f_oneway 4 | 5 | if __name__ == "__main__": 6 | 7 | var=va.VarAnaly() 8 | 9 | # 1. 单因素方差分析 10 | data=np.array([[0.236, 0.238, 0.248, 0.245, 0.243],[0.257, 0.253, 0.255, 0.254, 0.261],[0.258, 0.264, 0.259, 0.267, 0.262]]) 11 | res=var.single_anova(data,rowvar=True) 12 | 13 | print("单因素方差分析:") 14 | print("使用本库函数的方差分析结果,res = ", res) 15 | print("使用本库函数的方差分析结果,F值 = ", res[8]) 16 | 17 | res=f_oneway(data[0],data[1],data[2]) 18 | print("使用scipy.stats.f_oneway的方差分析检验结果,F值 = ", res[0]) 19 | 20 | 21 | # 2. 双因素方差分析 22 | data=np.array([[[58.2,52.6],[56.2,41.2],[65.3,60.8]], [[49.1,42.8],[54.1,50.5],[51.6,48.4]], [[60.1,58.3],[70.9,73.2],[39.2,40.7]], [[75.8,71.5],[58.2,51.0],[48.7,41.4]]]) 23 | res=var.double_anova(data) 24 | 25 | print("双因素方差分析:") 26 | print(res) 27 | print("使用本库函数的方差分析结果,res = ", res) 28 | print("使用本库函数的方差分析结果,因素A的F值 = ", res[14]) 29 | print("使用本库函数的方差分析结果,因素A的F值 = ", res[15]) 30 | print("使用本库函数的方差分析结果,交互作用A×B的F值 = ", res[16]) 31 | 32 | -------------------------------------------------------------------------------- /VarAnaly/readme.md: -------------------------------------------------------------------------------- 1 | # VarAnaly方差分析模块 2 | 3 | VarAnaly方差分析模块包含单因素方差分析和两因素方差分析方法。 4 | 5 | ## 1. 引用头文件"VarAnaly.py" 6 | import VarAnaly as va 7 | 8 | ## 2. 创建VarAnaly对象 9 | > 1. 创建VarAnaly对象不需要提供任何参数。 10 | 11 | var=va.VarAnaly() 12 | 13 | ## 3. 进行单因素方差分析 14 | > 0. 函数原型 15 | 16 | def single_anova(self, data, rowvar=False) 17 | 18 | > 1. 使用single_anova成员方法进行单因素方差分析 19 | > 2. 第一个参数data为二维样本集矩阵,类型为np.array 20 | > 3. 第二个参数rowvar指定每一行或者每一列作为因素的不同水平,类型为bool。rowvar=True指定每一行代表因素的一个水平,即每一列作为一个观察结果;rowvar=False指定每一列代表因素的一个水平,即每一行作为一个观察结果,默认rowvar=True。 21 | >> 例如:对于如下的铝合金板的厚度的因素样本表。 22 | 23 | >> | 机器I | 机器II | 机器III | 24 | >> | ----- | ----- | ----- | 25 | >> | 0.236 | 0.257 | 0.258 | 26 | >> | 0.238 | 0.253 | 0.264 | 27 | >> | 0.248 | 0.255 | 0.259 | 28 | >> | 0.245 | 0.254 | 0.267 | 29 | >> | 0.243 | 0.261 | 0.262 | 30 | 31 | >> 其对应的当rowvar=True时,即每一行代表因素的不同水平时,调用时的第一个参数data表示如下 32 | 33 | data=np.array([[0.236, 0.238, 0.248, 0.245, 0.243],[0.257, 0.253, 0.255, 0.254, 0.261],[0.258, 0.264, 0.259, 0.267, 0.262]]) 34 | 35 | > 4. 返回值为元组形式的方差分析试验表(sa,se,st,fa,fe,ft,\_sa,\_se,f)。该元组中的各个元素参数对应的方差分析表如下。 36 | 37 | > | 方差来源 | 平方和 | 自由度 | 均方 | F比 | 38 | > | ------ | ------ | ------ | ------ | ------ | 39 | > | 因素A | sa | fa | \_sa | f | 40 | > | 误差 | se | fe | \_se | | 41 | > | 总和 | st | ft | | | 42 | 43 | # 1. 单因素方差分析 44 | data=np.array([[0.236, 0.238, 0.248, 0.245, 0.243],[0.257, 0.253, 0.255, 0.254, 0.261],[0.258, 0.264, 0.259, 0.267, 0.262]]) 45 | res=var.single_anova(data,rowvar=True) 46 | 47 | print("单因素方差分析:") 48 | print("使用本库函数的方差分析结果,res = ", res) 49 | print("使用本库函数的方差分析结果,F值 = ", res[8]) 50 | 51 | res=f_oneway(data[0],data[1],data[2]) 52 | print("使用scipy.stats.f_oneway的方差分析检验结果,F值 = ", res[0]) 53 | 54 | >>> 输出 55 | 单因素方差分析: 56 | 使用本库函数的方差分析结果,res = (0.0010533333333333367, 0.00019200000000000033, 0.001245333333333337, 2, 12, 14, 0.0005266666666666683, 1.6000000000000026e-05, 32.916666666666714) 57 | 使用本库函数的方差分析结果,F值 = 32.916666666666714 58 | 使用scipy.stats.f_oneway的方差分析检验结果,F值 = 32.91666666666668 59 | 60 | 61 | ## 4. 进行两因素方差分析 62 | > 0. 函数原型 63 | 64 | def double_anova(self, data) 65 | 66 | > 1. 使用double_anova成员方法进行两因素方差分析 67 | > 2. 第一个参数data为三维样本集矩阵,该矩阵的格式严格要求如下:其中每一行代表因素A的不同水平,每一列代表因素B的不同水平,data[i][j]中则包含因素A在水平i,因素B在水平j下的若干个样本观察值。 68 | >> 例如:对于如下的火箭射程的因素样本表。 69 | 70 | >> | | 推进器B1 | 推进器B2 | 推进器B3 | 71 | >> | ------ | ---------- | ---------- | ---------- | 72 | >> | 燃料A1 | 58.2, 52.6 | 56.2, 41.2 | 65.3, 60.8 | 73 | >> | 燃料A2 | 49.1, 42.8 | 54.1, 50.5 | 51.6, 48.4 | 74 | >> | 燃料A3 | 60.1, 58.3 | 70.9, 73.2 | 39.2, 40.7 | 75 | >> | 燃料A4 | 75.8, 71.5 | 58.2, 51.0 | 48.7, 41.4 | 76 | 77 | >> 其对应的,调用时的第一个参数data表示如下 78 | 79 | data=np.array([[[58.2,52.6],[56.2,41.2],[65.3,60.8]], [[49.1,42.8],[54.1,50.5],[51.6,48.4]], [[60.1,58.3],[70.9,73.2],[39.2,40.7]], [[75.8,71.5],[58.2,51.0],[48.7,41.4]]]) 80 | 81 | > 3. 返回值为元组形式的方差分析试验表(sa,sb,sab,se,st,ta,tb,tab,te,tt,\_sa,\_sb,\_sab,\_se,fa,fb,fab)。该元组中的各个元素参数对应的方差分析表如下。 82 | 83 | > | 方差来源 | 平方和 | 自由度 | 均方 | F比 | 84 | > | ------ | ------ | ------ | ------ | ------ | 85 | > | 因素A | sa | ta | \_sa | fa | 86 | > | 因素B | sb | tb | \_sb | fb | 87 | > | 交互作用 | sab | tab | \_sab | fab | 88 | > | 误差 | se | te | \_se | | 89 | > | 总和 | st | tt | | | 90 | 91 | # 2. 双因素方差分析 92 | data=np.array([[[58.2,52.6],[56.2,41.2],[65.3,60.8]], [[49.1,42.8],[54.1,50.5],[51.6,48.4]], [[60.1,58.3],[70.9,73.2],[39.2,40.7]], [[75.8,71.5],[58.2,51.0],[48.7,41.4]]]) 93 | res=var.double_anova(data) 94 | 95 | print("双因素方差分析:") 96 | print("使用本库函数的方差分析结果,res = ", res) 97 | print("使用本库函数的方差分析结果,因素A的F值 = ", res[14]) 98 | print("使用本库函数的方差分析结果,因素A的F值 = ", res[15]) 99 | print("使用本库函数的方差分析结果,交互作用A×B的F值 = ", res[16]) 100 | 101 | >>> 输出 102 | 双因素方差分析: 103 | (261.67499999999984, 370.98083333333375, 1768.6925000000003, 236.9500000000001, 2638.298333333334, 3, 2, 6, 12, 23, 87.22499999999995, 185.49041666666687, 294.7820833333334, 19.74583333333334, 4.417387634522047, 9.393901667018365, 14.928824646549902) 104 | 使用本库函数的方差分析结果,res = (261.67499999999984, 370.98083333333375, 1768.6925000000003, 236.9500000000001, 2638.298333333334, 3, 2, 6, 12, 23, 87.22499999999995, 185.49041666666687, 294.7820833333334, 19.74583333333334, 4.417387634522047, 9.393901667018365, 14.928824646549902) 105 | 使用本库函数的方差分析结果,因素A的F值 = 4.417387634522047 106 | 使用本库函数的方差分析结果,因素A的F值 = 9.393901667018365 107 | 使用本库函数的方差分析结果,交互作用A×B的F值 = 14.928824646549902 108 | 109 | ## 附注: 110 | > 1. example.py中提供了一份使用VarAnaly模块的示例代码。 111 | --------------------------------------------------------------------------------