├── 使用说明书.docx ├── code ├── data.xlsx ├── minidata.xlsx ├── run.py ├── show_diff.py ├── alldata.py ├── main_interface.py ├── algorithme.py ├── main_interface.ui └── mainui.py └── README.md /使用说明书.docx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/coder23263/Machine_learning_prediction_system/HEAD/使用说明书.docx -------------------------------------------------------------------------------- /code/data.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/coder23263/Machine_learning_prediction_system/HEAD/code/data.xlsx -------------------------------------------------------------------------------- /code/minidata.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/coder23263/Machine_learning_prediction_system/HEAD/code/minidata.xlsx -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Machine_learning_prediction_system 2 | 机器学习预测系统汇总:包括贝叶斯网络、马尔科夫模型、线性回归、岭回归、多项式回归、决策树回归、深度神经网络预测 3 | # 具体介绍,这里主要是预测部分(包括GUI界面) 4 | 1.熟悉机器学习的完整流程,包括:问题建模,获取数据,特征工程,模型训练,模型调优,线上运行;或者分为三大块:数据准备与预处理,模型选择与训练,模型验证与参数调优。 5 | 2.绘制机器学习算法分类归纳思维导图,按照有监督学习、无监督学习和半监督学习,将学过的算法进行归纳总结。 6 | 3.自行选择学习任务,按照机器学习流程,分别设计分类、预测、聚类系统,每个系统务必选择不同的算法进行训练,采用多种方法进行模型验证与参数调优,选择适合的多个指标对模型进行评估,采用可视化方法对结果进行分析,最终形成一个完整的系统。 7 | (1)分类算法: 8 | k-近邻算法、贝叶斯分类器、支持向量机、决策树分类、神经网络、AdaBoost、GBDT、随机森林、逻辑回归、softmax回归等 9 | (2)预测:贝叶斯网络、马尔科夫模型、条件随机场、线性回归、XGBoost、岭回归、多项式回归、决策树回归、深度神经网络预测 10 | (3)聚类:K-means、层次聚类BIRCH、密度聚类DBSCAN算法、高斯混合聚类GMM、密度聚类的OPTICS算法、基于网格的聚类(STING、CLIQUE)、Mean Shift聚类算法 11 | -------------------------------------------------------------------------------- /code/run.py: -------------------------------------------------------------------------------- 1 | """ 2 | 这个文件是运行文件,是用来运行主程序代码的 3 | 4 | author: kai 5 | """ 6 | from alldata import datas 7 | from algorithme import all_algorithme 8 | from mainui import MyWindow 9 | import sys 10 | from PyQt5.QtWidgets import QApplication, QMainWindow, QMessageBox, QDialog 11 | from PyQt5.QtCore import QTimer 12 | from PyQt5.QtGui import * 13 | from PyQt5.QtWidgets import * 14 | from PyQt5.QtCore import * 15 | from PyQt5 import QtCore, QtGui, QtWidgets 16 | 17 | if __name__ == '__main__': 18 | 19 | app = QApplication(sys.argv) 20 | myWin = MyWindow() 21 | myWin.show() 22 | 23 | 24 | # d = datas() 25 | # # x_train, x_test, y_train, y_test = d.medium_data() 26 | # x_train, x_test, y_train, y_test = d.tremendous_data() 27 | # # x_train, x_test, y_train, y_test = d.mini_data() # c=a+a^2+6 28 | # a = all_algorithme(x_train, x_test, y_train, y_test) 29 | # a.sk_LinearRegression() 30 | # a.sk_ridge() 31 | # a.sk_PolynomialFeatures() 32 | # a.sk_DecisionTreeRegressor() 33 | # #a.sk_byes_network() 34 | # a.tf_dnn() 35 | 36 | #a.get_plot() 37 | #a.get_r2_score() 38 | 39 | 40 | 41 | sys.exit(app.exec_()) 42 | -------------------------------------------------------------------------------- /code/show_diff.py: -------------------------------------------------------------------------------- 1 | from matplotlib import pyplot as plt 2 | import random 3 | import matplotlib 4 | from matplotlib import font_manager 5 | 6 | #windws和linux设置字体的方式 7 | font = {'family' : 'MicroSoft YaHei', 8 | 'weight': 'bold'} 9 | 10 | matplotlib.rc("font",**font) 11 | matplotlib.rc("font",family='MicroSoft YaHei',weight="bold") 12 | 13 | #另外一种设置字体的方式 14 | # my_font = font_manager.FontProperties(fname="/System/Library/Fonts/PingFang.ttc") 15 | from matplotlib import pyplot as plt 16 | import random 17 | #from matplotlib import font_manage 18 | 19 | 20 | x = range(1,6) 21 | y = [0.647,0.640,0.816,0.878,0.799]#中等数据集 22 | y_little = [1, 0.987, 1.0, 0.976, 0.903] 23 | y_big = [0.706, 0.706, 0.817, 0.738, 0.886] 24 | plt.figure(figsize=(20,8),dpi=80) 25 | 26 | plt.plot(x,y_little, label="小数据集",color="blue",linestyle=":", linewidth=2, alpha=0.5) 27 | plt.plot(x,y, label="中数据集",color="red",linewidth=5, alpha=0.5) 28 | plt.plot(x,y_big, label="大数据集",color="black",linestyle="--", linewidth=10, alpha=0.5) 29 | 30 | #调整x轴的刻度 31 | _xtick_labels = ["线性回归", "岭回归", "多项式回归", "决策树回归", "DNN"] 32 | # _xtick_labels += ["11点{}分".format(i) for i in range(60)] 33 | #取步长,数字和字符串一一对应,数据的长度一样 34 | plt.xticks(list(x)[::1],_xtick_labels[::1],rotation=45) #rotaion旋转的度数 35 | plt.yticks([i/10 for i in range(11)]) 36 | #添加描述信息 37 | plt.xlabel("算法") 38 | plt.ylabel("评价指标R2:[0-1]") 39 | plt.title("不同算法在不同数据集的表现") 40 | 41 | #绘制网格 42 | plt.grid(alpha=0.4,linestyle=':') 43 | 44 | #添加图例 45 | plt.legend(prop=font, loc="upper left") 46 | 47 | plt.show() -------------------------------------------------------------------------------- /code/alldata.py: -------------------------------------------------------------------------------- 1 | """ 2 | 此文件用来调取数据集,数据集一共三份:小中大 3 | 4 | author: kai 5 | """ 6 | import numpy as np 7 | import pandas as pd 8 | from sklearn.preprocessing import MinMaxScaler 9 | from sklearn.model_selection import train_test_split 10 | from sklearn.preprocessing import StandardScaler 11 | 12 | class datas(object): 13 | """ 14 | data_type:默认为1 15 | 1. 导入小数据 16 | 2. 导入中数据 17 | 3. 导入大数据 18 | 19 | data_standard:默认为1 20 | 1、最大最小方法 21 | 2、标准化方法 22 | 23 | mini_data: 24 | 读取小数据,为波士顿房价数据集 25 | """ 26 | def __init__(self): 27 | # self.data_type 28 | self.data_standard = 1 29 | #self.choose_data() 30 | pass 31 | 32 | 33 | # def choose_data(self): 34 | # if (self.data_type == 1): 35 | # self.mini_data() 36 | # elif (self.data_type == 2): 37 | # self.medium_data() 38 | # elif (self.data_type == 3): 39 | # self.tremendous_data() 40 | 41 | 42 | 43 | def mini_data(self): 44 | # 读取数据文件 45 | data = pd.read_excel('minidata.xlsx', encoding='utf-8') 46 | # 获取特征和标签值,总共有13组标签和1组特征 47 | train = data.iloc[:, 0:2] # 48 | target = data.iloc[:, 2] # 获取标签 49 | 50 | scaler = StandardScaler() 51 | scaler.fit(train) 52 | train = scaler.transform(train) 53 | target = np.array(target) # 将y_data转换成数组 54 | x_train, x_test, y_train, y_test = train_test_split(train, target, test_size=0.2) 55 | return x_train, x_test, y_train, y_test 56 | 57 | 58 | def medium_data(self): 59 | # 读取数据文件 60 | data = pd.read_excel('data.xlsx', encoding='utf-8') 61 | # 获取特征和标签值,总共有13组标签和1组特征 62 | train = data.iloc[:, :-1] # 获取前13组特征 63 | target = data.iloc[:, -1] # 获取标签 64 | 65 | if (self.data_standard == 1):# 数据归一化(最大最小方法) 66 | scaler = MinMaxScaler() 67 | scaler.fit(train) 68 | train = scaler.transform(train) # 此时输出的x_data就是数组了 69 | elif (self.data_standard == 1): 70 | #数据归一化(标准化方法) 71 | scaler = StandardScaler() 72 | scaler.fit(train) 73 | train = scaler.transform(train) 74 | target = np.array(target) # 将y_data转换成数组 75 | x_train, x_test, y_train, y_test = train_test_split(train, target, test_size=0.2) 76 | return x_train, x_test, y_train, y_test 77 | 78 | 79 | def tremendous_data(self): #相对大的数据 80 | # 读取数据文件 81 | data = pd.read_csv('kc_house_data.csv', encoding='utf-8') 82 | # 获取特征和标签值,总共有13组标签和1组特征 83 | train = data.iloc[:, 3:] # 获取前13组特征 84 | target = data.iloc[:, 2] # 获取标签 85 | 86 | scaler = StandardScaler() 87 | scaler.fit(train) 88 | train = scaler.transform(train) 89 | target = np.array(target) # 将y_data转换成数组 90 | x_train, x_test, y_train, y_test = train_test_split(train, target, test_size=0.2) 91 | return x_train, x_test, y_train, y_test 92 | -------------------------------------------------------------------------------- /code/main_interface.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | # Form implementation generated from reading ui file 'main_interface.ui' 4 | # 5 | # Created by: PyQt5 UI code generator 5.13.0 6 | # 7 | # WARNING! All changes made in this file will be lost! 8 | 9 | 10 | from PyQt5 import QtCore, QtGui, QtWidgets 11 | 12 | 13 | class Ui_MainWindow(object): 14 | def setupUi(self, MainWindow): 15 | MainWindow.setObjectName("MainWindow") 16 | MainWindow.resize(842, 754) 17 | font = QtGui.QFont() 18 | font.setPointSize(18) 19 | MainWindow.setFont(font) 20 | self.centralwidget = QtWidgets.QWidget(MainWindow) 21 | self.centralwidget.setObjectName("centralwidget") 22 | self.title = QtWidgets.QLabel(self.centralwidget) 23 | self.title.setGeometry(QtCore.QRect(260, 30, 191, 31)) 24 | self.title.setObjectName("title") 25 | self.about_us = QtWidgets.QPushButton(self.centralwidget) 26 | self.about_us.setGeometry(QtCore.QRect(650, 180, 80, 35)) 27 | font = QtGui.QFont() 28 | font.setPointSize(12) 29 | self.about_us.setFont(font) 30 | self.about_us.setObjectName("about_us") 31 | self.choose_orient = QtWidgets.QComboBox(self.centralwidget) 32 | self.choose_orient.setGeometry(QtCore.QRect(50, 120, 69, 23)) 33 | font = QtGui.QFont() 34 | font.setPointSize(14) 35 | self.choose_orient.setFont(font) 36 | self.choose_orient.setObjectName("choose_orient") 37 | self.choose_orient.addItem("") 38 | self.choose_orient.addItem("") 39 | self.choose_orient.addItem("") 40 | self.choose_orient_lable = QtWidgets.QLabel(self.centralwidget) 41 | self.choose_orient_lable.setGeometry(QtCore.QRect(30, 80, 131, 21)) 42 | font = QtGui.QFont() 43 | font.setPointSize(15) 44 | font.setBold(True) 45 | font.setWeight(75) 46 | self.choose_orient_lable.setFont(font) 47 | self.choose_orient_lable.setObjectName("choose_orient_lable") 48 | self.choose_data_lable = QtWidgets.QLabel(self.centralwidget) 49 | self.choose_data_lable.setGeometry(QtCore.QRect(220, 80, 151, 21)) 50 | font = QtGui.QFont() 51 | font.setPointSize(15) 52 | font.setBold(True) 53 | font.setWeight(75) 54 | self.choose_data_lable.setFont(font) 55 | self.choose_data_lable.setObjectName("choose_data_lable") 56 | self.choose_data = QtWidgets.QComboBox(self.centralwidget) 57 | self.choose_data.setGeometry(QtCore.QRect(260, 120, 69, 23)) 58 | font = QtGui.QFont() 59 | font.setPointSize(14) 60 | self.choose_data.setFont(font) 61 | self.choose_data.setObjectName("choose_data") 62 | self.choose_data.addItem("") 63 | self.choose_data.addItem("") 64 | self.choose_data.addItem("") 65 | self.choose_data.addItem("") 66 | self.choose_algorithme_lable = QtWidgets.QLabel(self.centralwidget) 67 | self.choose_algorithme_lable.setGeometry(QtCore.QRect(450, 80, 121, 21)) 68 | font = QtGui.QFont() 69 | font.setPointSize(15) 70 | font.setBold(True) 71 | font.setWeight(75) 72 | self.choose_algorithme_lable.setFont(font) 73 | self.choose_algorithme_lable.setObjectName("choose_algorithme_lable") 74 | self.choose_algorithme = QtWidgets.QComboBox(self.centralwidget) 75 | self.choose_algorithme.setGeometry(QtCore.QRect(420, 120, 151, 23)) 76 | font = QtGui.QFont() 77 | font.setPointSize(14) 78 | self.choose_algorithme.setFont(font) 79 | self.choose_algorithme.setObjectName("choose_algorithme") 80 | self.run = QtWidgets.QPushButton(self.centralwidget) 81 | self.run.setGeometry(QtCore.QRect(650, 80, 80, 35)) 82 | self.run.setObjectName("run") 83 | self.ans_lable = QtWidgets.QLabel(self.centralwidget) 84 | self.ans_lable.setGeometry(QtCore.QRect(40, 200, 91, 21)) 85 | font = QtGui.QFont() 86 | font.setPointSize(15) 87 | font.setBold(True) 88 | font.setWeight(75) 89 | self.ans_lable.setFont(font) 90 | self.ans_lable.setObjectName("ans_lable") 91 | self.ans = QtWidgets.QLabel(self.centralwidget) 92 | self.ans.setGeometry(QtCore.QRect(40, 270, 531, 391)) 93 | self.ans.setObjectName("ans") 94 | self.pushButton = QtWidgets.QPushButton(self.centralwidget) 95 | self.pushButton.setGeometry(QtCore.QRect(650, 130, 80, 35)) 96 | font = QtGui.QFont() 97 | font.setPointSize(11) 98 | font.setBold(False) 99 | font.setWeight(50) 100 | self.pushButton.setFont(font) 101 | self.pushButton.setObjectName("pushButton") 102 | self.ans_lable_2 = QtWidgets.QLabel(self.centralwidget) 103 | self.ans_lable_2.setGeometry(QtCore.QRect(650, 250, 91, 21)) 104 | font = QtGui.QFont() 105 | font.setPointSize(15) 106 | font.setBold(True) 107 | font.setWeight(75) 108 | self.ans_lable_2.setFont(font) 109 | self.ans_lable_2.setObjectName("ans_lable_2") 110 | self.textEdit = QtWidgets.QTextEdit(self.centralwidget) 111 | self.textEdit.setGeometry(QtCore.QRect(630, 300, 131, 311)) 112 | self.textEdit.setObjectName("textEdit") 113 | self.all_ans = QtWidgets.QPushButton(self.centralwidget) 114 | self.all_ans.setGeometry(QtCore.QRect(620, 630, 161, 31)) 115 | font = QtGui.QFont() 116 | font.setPointSize(14) 117 | self.all_ans.setFont(font) 118 | self.all_ans.setObjectName("all_ans") 119 | MainWindow.setCentralWidget(self.centralwidget) 120 | self.menubar = QtWidgets.QMenuBar(MainWindow) 121 | self.menubar.setGeometry(QtCore.QRect(0, 0, 842, 23)) 122 | self.menubar.setObjectName("menubar") 123 | MainWindow.setMenuBar(self.menubar) 124 | self.statusbar = QtWidgets.QStatusBar(MainWindow) 125 | self.statusbar.setObjectName("statusbar") 126 | MainWindow.setStatusBar(self.statusbar) 127 | 128 | self.retranslateUi(MainWindow) 129 | QtCore.QMetaObject.connectSlotsByName(MainWindow) 130 | 131 | def retranslateUi(self, MainWindow): 132 | _translate = QtCore.QCoreApplication.translate 133 | MainWindow.setWindowTitle(_translate("MainWindow", "MainWindow")) 134 | self.title.setText(_translate("MainWindow", "机器学习算法汇总")) 135 | self.about_us.setText(_translate("MainWindow", "关于我们")) 136 | self.choose_orient.setItemText(0, _translate("MainWindow", "聚类")) 137 | self.choose_orient.setItemText(1, _translate("MainWindow", "预测")) 138 | self.choose_orient.setItemText(2, _translate("MainWindow", "分类")) 139 | self.choose_orient_lable.setText(_translate("MainWindow", "选择算法方向")) 140 | self.choose_data_lable.setText(_translate("MainWindow", "选择数据集大小")) 141 | self.choose_data.setItemText(0, _translate("MainWindow", "小")) 142 | self.choose_data.setItemText(1, _translate("MainWindow", "中")) 143 | self.choose_data.setItemText(2, _translate("MainWindow", "大")) 144 | self.choose_data.setItemText(3, _translate("MainWindow", "自定义")) 145 | self.choose_algorithme_lable.setText(_translate("MainWindow", "选择算法")) 146 | self.run.setText(_translate("MainWindow", "Run")) 147 | self.ans_lable.setText(_translate("MainWindow", "运行效果")) 148 | self.ans.setText(_translate("MainWindow", "pic")) 149 | self.pushButton.setText(_translate("MainWindow", "刷新算法")) 150 | self.ans_lable_2.setText(_translate("MainWindow", "运行日志")) 151 | self.all_ans.setText(_translate("MainWindow", "查看所有运行结果")) 152 | -------------------------------------------------------------------------------- /code/algorithme.py: -------------------------------------------------------------------------------- 1 | """ 2 | 这个文件是用来写各种算法的 3 | 4 | author: kai 5 | """ 6 | import matplotlib.pyplot as plt 7 | from sklearn.neighbors import KNeighborsRegressor 8 | from sklearn.linear_model import LinearRegression 9 | from sklearn.linear_model import Ridge 10 | from sklearn.linear_model import Lasso 11 | from sklearn.tree import DecisionTreeRegressor 12 | from sklearn.svm import SVR 13 | from sklearn.metrics import r2_score 14 | from sklearn.linear_model import Ridge 15 | from sklearn.tree import DecisionTreeRegressor 16 | from sklearn.preprocessing import PolynomialFeatures 17 | import numpy as np 18 | from sklearn.naive_bayes import MultinomialNB # 从sklean.naive_bayes里导入朴素贝叶斯模型 19 | from sklearn.metrics import classification_report 20 | from sklearn.naive_bayes import GaussianNB 21 | import tensorflow as tf 22 | #print('Tensorflow Version: {}'.format(tf.__version__)) 23 | import pandas as pd 24 | import os 25 | os.environ["TF_CPP_MIN_LOG_LEVEL"]='2'# 只显示 Error 26 | import warnings 27 | warnings.filterwarnings("ignore") 28 | from random import seed 29 | 30 | 31 | class all_algorithme(object): 32 | """ 33 | sk_LinearRegression:线性模型 34 | 35 | 36 | """ 37 | 38 | def __init__(self, x_train, x_test, y_train, y_test): 39 | self.x_train, self.x_test, self.y_train, self.y_test = x_train, x_test, y_train, y_test 40 | 41 | 42 | def sk_LinearRegression(self): #线性模型 43 | linear = LinearRegression() 44 | linear.fit(self.x_train, self.y_train) 45 | self.y_pre_linear = linear.predict(self.x_test) 46 | self.linear_score = r2_score(self.y_test, self.y_pre_linear) 47 | print("在线性模型下,R2 决定系数-拟合优度为", self.linear_score) 48 | return "在线性模型下,R2 决定系数-拟合优度为: {:.2f}".format(self.linear_score), self.y_pre_linear, self.y_test 49 | 50 | 51 | def sk_ridge(self): #岭回归 52 | ridge = Ridge() 53 | ridge.fit(self.x_train, self.y_train) 54 | self.y_pre_ridge = ridge.predict(self.x_test) 55 | self.ridge_score = r2_score(self.y_test, self.y_pre_ridge) 56 | print("在岭回归的模型下,R2 决定系数-拟合优度为:", self.ridge_score) 57 | return "在岭回归模型下,R2 决定系数-拟合优度为: {:.2f}".format(self.ridge_score), self.y_pre_ridge, self.y_test 58 | 59 | 60 | def sk_DecisionTreeRegressor(self): #决策树回归 61 | decision = DecisionTreeRegressor() 62 | decision.fit(self.x_train, self.y_train) 63 | self.y_pre_decision = decision.predict(self.x_test) 64 | self.decision_score = r2_score(self.y_test, self.y_pre_decision) 65 | print("在决策树回归的模型下,R2 决定系数-拟合优度为", self.decision_score) 66 | return "在决策树回归模型下,R2 决定系数-拟合优度为: {:.2f}".format(self.decision_score), self.y_pre_decision, self.y_test 67 | 68 | 69 | def sk_PolynomialFeatures(self): #多项式回归,默认二项式回归 参数degree=3,即为三项式回归 70 | lr = LinearRegression() 71 | quadratic = PolynomialFeatures() 72 | X_train_quad = quadratic.fit_transform(self.x_train)#X_train_quad为用于训练的x的数据 73 | 74 | lr.fit(X_train_quad, self.y_train) 75 | #self.y_pre_quad = lr.predict(X_train_quad) 76 | self.y_pred_quad = lr.predict(quadratic.fit_transform(self.x_test))#x_test 为用于测试的x的数据,需要转变形式后再预测 77 | self.quadratic_score = r2_score(self.y_test, self.y_pred_quad) 78 | print("在多项式回归模型下,R2 决定系数-拟合优度为", self.quadratic_score) 79 | return "在多项式回归模型下,R2 决定系数-拟合优度为: {:.2f}".format(self.quadratic_score), self.y_pred_quad, self.y_test 80 | 81 | 82 | def sk_byes_network(self):# 贝叶斯网络 83 | # 3.使用朴素贝叶斯进行训练 84 | Gnb = GaussianNB() # 使用默认配置初始化朴素贝叶斯 85 | Gnb.fit(self.x_train, self.y_train) # 利用训练数据对模型参数进行估计 86 | y_predict = Gnb.predict(self.x_test) # 对参数进行预测 87 | 88 | # 4.获取结果报告 89 | print('The Accuracy of Naive Bayes Classifier is:', Gnb.score(self.x_test, self.y_test)) 90 | #print(classification_report(self.y_test, y_predict, target_names="boston")) 91 | 92 | 93 | def tf_dnn(self): #tensorflow dnn 94 | print("tf_dnn运行开始") 95 | model = tf.keras.Sequential([ 96 | tf.keras.layers.Dense(10, activation='relu', input_shape=(self.x_train.shape[1],)), 97 | tf.keras.layers.Dense(64, activation='relu'), 98 | tf.keras.layers.Dense(64, activation='relu'), 99 | tf.keras.layers.Dense(64, activation='relu'), 100 | tf.keras.layers.Dense(64, activation='relu'), 101 | tf.keras.layers.Dense(64, activation='relu'), 102 | tf.keras.layers.Dense(1) 103 | ]) 104 | 105 | """查看网络结构""" 106 | #model.summary() 107 | 108 | """编译,配置""" 109 | model.compile(optimizer='adam', 110 | loss='mse', 111 | metrics=['mae', 'mse'] 112 | ) 113 | 114 | """训练数据""" 115 | history = model.fit(self.x_train, self.y_train, epochs=100, verbose=0) # verbose=0禁止打印日志,1为进度条,2为打印日志 116 | hist = pd.DataFrame(history.history) # hist为训练过程的参数,每一轮训练的loss,mae,mse,epoch 117 | 118 | 119 | hist['epoch'] = history.epoch 120 | # 将训练轮数的索引改为从1开始 121 | hist['epoch'] = hist['epoch'] + 1 122 | self.y_pred_dnn = model.predict(self.x_test) 123 | 124 | self.dnn_score = r2_score(self.y_test, self.y_pred_dnn) 125 | print("在dnn模型下,R2 决定系数-拟合优度为:", self.dnn_score) 126 | 127 | return "在dnn模型下,R2 决定系数-拟合优度为: {:.2f}".format(self.dnn_score), self.y_pred_dnn, self.y_test 128 | 129 | 130 | def markf(self):#马尔科夫模型 131 | P = np.array([[0.2, 0.7, 0.1], 132 | [0.9, 0.0, 0.1], 133 | [0.2, 0.8, 0.0]]) 134 | stateChangeHist = np.array([[0.0, 0.0, 0.0], 135 | [0.0, 0.0, 0.0], 136 | [0.0, 0.0, 0.0]]) 137 | state = np.array([[1.0, 0.0, 0.0]]) 138 | currentState = 0 139 | stateHist = state 140 | dfStateHist = pd.DataFrame(state) 141 | distr_hist = [[0, 0, 0]] 142 | seed(4) 143 | 144 | # Simulate from multinomial distribution 145 | def simulate_multinomial(vmultinomial): 146 | r = np.random.uniform(0.0, 1.0) 147 | CS = np.cumsum(vmultinomial) 148 | CS = np.insert(CS, 0, 0) 149 | m = (np.where(CS < r))[0] 150 | nextState = m[len(m) - 1] 151 | return nextState 152 | 153 | for x in range(1000): 154 | currentRow = np.ma.masked_values((P[currentState]), 0.0) 155 | nextState = simulate_multinomial(currentRow) 156 | # Keep track of state changes 157 | stateChangeHist[currentState, nextState] += 1 158 | # Keep track of the state vector itself 159 | state = np.array([[0, 0, 0]]) 160 | state[0, nextState] = 1.0 161 | # Keep track of state history 162 | stateHist = np.append(stateHist, state, axis=0) 163 | currentState = nextState 164 | # calculate the actual distribution over the 3 states so far 165 | totals = np.sum(stateHist, axis=0) 166 | gt = np.sum(totals) 167 | distrib = totals / gt 168 | distrib = np.reshape(distrib, (1, 3)) 169 | distr_hist = np.append(distr_hist, distrib, axis=0) 170 | print(distrib) 171 | P_hat = stateChangeHist / stateChangeHist.sum(axis=1)[:, None] 172 | # Check estimated state transition probabilities based on history so far: 173 | print(P_hat) 174 | dfDistrHist = pd.DataFrame(distr_hist) 175 | # Plot the distribution as the simulation progresses over time 176 | dfDistrHist.plot(title="Simulation History") 177 | plt.show() 178 | 179 | 180 | def byes(self):#贝叶斯网络 181 | pass 182 | 183 | def get_log(self): 184 | return None 185 | 186 | 187 | def get_plot(self):# 作图函数 188 | #Linear 189 | plt.plot(self.y_test,label='true') 190 | plt.plot(self.y_pre_linear,label='linear') 191 | plt.legend() 192 | #plt.show() 193 | 194 | 195 | # def get_r2_score(self): #打印预测结果的平方差 196 | # print("在线性模型的条件下,平方差R^2为:", self.linear_score) 197 | # # print("在线性模型的条件下,平方差R^2为:", self.linear_score) 198 | # # print("在线性模型的条件下,平方差R^2为:", self.linear_score) -------------------------------------------------------------------------------- /code/main_interface.ui: -------------------------------------------------------------------------------- 1 | 2 | 3 | MainWindow 4 | 5 | 6 | 7 | 0 8 | 0 9 | 842 10 | 754 11 | 12 | 13 | 14 | 15 | 18 16 | 17 | 18 | 19 | MainWindow 20 | 21 | 22 | 23 | 24 | 25 | 260 26 | 30 27 | 191 28 | 31 29 | 30 | 31 | 32 | 机器学习算法汇总 33 | 34 | 35 | 36 | 37 | 38 | 650 39 | 180 40 | 80 41 | 35 42 | 43 | 44 | 45 | 46 | 12 47 | 48 | 49 | 50 | 关于我们 51 | 52 | 53 | 54 | 55 | 56 | 50 57 | 120 58 | 69 59 | 23 60 | 61 | 62 | 63 | 64 | 14 65 | 66 | 67 | 68 | 69 | 聚类 70 | 71 | 72 | 73 | 74 | 预测 75 | 76 | 77 | 78 | 79 | 分类 80 | 81 | 82 | 83 | 84 | 85 | 86 | 30 87 | 80 88 | 131 89 | 21 90 | 91 | 92 | 93 | 94 | 15 95 | 75 96 | true 97 | 98 | 99 | 100 | 选择算法方向 101 | 102 | 103 | 104 | 105 | 106 | 220 107 | 80 108 | 151 109 | 21 110 | 111 | 112 | 113 | 114 | 15 115 | 75 116 | true 117 | 118 | 119 | 120 | 选择数据集大小 121 | 122 | 123 | 124 | 125 | 126 | 260 127 | 120 128 | 69 129 | 23 130 | 131 | 132 | 133 | 134 | 14 135 | 136 | 137 | 138 | 139 | 140 | 141 | 142 | 143 | 144 | 145 | 146 | 147 | 148 | 149 | 150 | 151 | 152 | 153 | 154 | 自定义 155 | 156 | 157 | 158 | 159 | 160 | 161 | 450 162 | 80 163 | 121 164 | 21 165 | 166 | 167 | 168 | 169 | 15 170 | 75 171 | true 172 | 173 | 174 | 175 | 选择算法 176 | 177 | 178 | 179 | 180 | 181 | 420 182 | 120 183 | 151 184 | 23 185 | 186 | 187 | 188 | 189 | 14 190 | 191 | 192 | 193 | 194 | 195 | 196 | 650 197 | 80 198 | 80 199 | 35 200 | 201 | 202 | 203 | Run 204 | 205 | 206 | 207 | 208 | 209 | 40 210 | 200 211 | 91 212 | 21 213 | 214 | 215 | 216 | 217 | 15 218 | 75 219 | true 220 | 221 | 222 | 223 | 运行效果 224 | 225 | 226 | 227 | 228 | 229 | 40 230 | 270 231 | 531 232 | 391 233 | 234 | 235 | 236 | pic 237 | 238 | 239 | 240 | 241 | 242 | 650 243 | 130 244 | 80 245 | 35 246 | 247 | 248 | 249 | 250 | 11 251 | 50 252 | false 253 | 254 | 255 | 256 | 刷新算法 257 | 258 | 259 | 260 | 261 | 262 | 650 263 | 250 264 | 91 265 | 21 266 | 267 | 268 | 269 | 270 | 15 271 | 75 272 | true 273 | 274 | 275 | 276 | 运行日志 277 | 278 | 279 | 280 | 281 | 282 | 630 283 | 300 284 | 131 285 | 311 286 | 287 | 288 | 289 | 290 | 291 | 292 | 620 293 | 630 294 | 161 295 | 31 296 | 297 | 298 | 299 | 300 | 14 301 | 302 | 303 | 304 | 查看所有运行结果 305 | 306 | 307 | 308 | 309 | 310 | 311 | 0 312 | 0 313 | 842 314 | 23 315 | 316 | 317 | 318 | 319 | 320 | 321 | 322 | 323 | -------------------------------------------------------------------------------- /code/mainui.py: -------------------------------------------------------------------------------- 1 | """ 2 | UI文件 3 | 4 | author:kai 5 | """ 6 | import matplotlib.pyplot as plt 7 | import sys 8 | from PyQt5.QtWidgets import QApplication, QMainWindow, QMessageBox, QDialog 9 | from PyQt5.QtCore import QTimer 10 | from PyQt5.QtGui import * 11 | from PyQt5.QtWidgets import * 12 | from PyQt5.QtCore import * 13 | from PyQt5 import QtCore, QtGui, QtWidgets 14 | from main_interface import Ui_MainWindow 15 | from alldata import datas 16 | from algorithme import all_algorithme 17 | 18 | 19 | class MyWindow(QMainWindow, Ui_MainWindow): 20 | def __init__(self, parent=None): 21 | super(MyWindow, self).__init__(parent) 22 | self.setupUi(self) 23 | #self.Run.clicked.connect(self.label_2) 24 | pix = QPixmap('0.jpg') 25 | pix = pix.scaled(1920, 1080)#1920, 1080 26 | lb = self.ans 27 | lb.setPixmap(pix) 28 | self.pushButton.clicked.connect(self.refresh_algorithme) 29 | self.run.clicked.connect(self.run_all_func) 30 | self.about_us.clicked.connect(self.show_message) 31 | self.all_ans.clicked.connect(self.pic_detect) 32 | 33 | 34 | def run_all_func(self): #选择大方向,分类,聚类还是预测 35 | if(self.choose_orient.currentText() == "聚类"): 36 | self.run_culster_algorithme() 37 | elif(self.choose_orient.currentText() == "预测"): 38 | self.run_predict_algorithme() 39 | else: 40 | self.run_classify_algorithme() 41 | 42 | 43 | def run_classify_algorithme(self): 44 | pass 45 | 46 | 47 | def refresh_a(self): 48 | if(self.choose_orient.currentIndex() == 0): 49 | self.choose_algorithme.addItem("AGNES") 50 | self.choose_algorithme.addItem("BIRCH") 51 | self.choose_algorithme.addItem("DBSCAN") 52 | self.choose_algorithme.addItem("GMM") 53 | self.choose_algorithme.addItem("k-means") 54 | self.choose_algorithme.addItem("Mean Shift") 55 | 56 | elif(self.choose_orient.currentIndex() == 1): 57 | self.choose_algorithme.addItem("线性回归") 58 | self.choose_algorithme.addItem("岭回归") 59 | self.choose_algorithme.addItem("多项式回归") 60 | self.choose_algorithme.addItem("决策树回归") 61 | self.choose_algorithme.addItem("DNN") 62 | self.choose_algorithme.addItem("贝叶斯网络") 63 | self.choose_algorithme.addItem("马尔科夫模型") 64 | #self.run_predict_algorithme() 65 | else: 66 | self.choose_algorithme.addItem("KNN") 67 | self.choose_algorithme.addItem("SVM支持向量机") 68 | self.choose_algorithme.addItem("朴素贝叶斯") 69 | self.choose_algorithme.addItem("AdaBoost") 70 | # self.choose_algorithme.addItem("随机森林") 71 | # self.choose_algorithme.addItem("逻辑回归") 72 | # self.choose_algorithme.addItem("GBDT") 73 | # print(self.choose_orient.currentIndex()) 74 | # print(self.choose_orient.currentText()) 75 | 76 | 77 | def choose_data_func(self): #选择数据集大小函数 78 | data_size = self.choose_data.currentText() 79 | if(data_size == "小"): 80 | x_train, x_test, y_train, y_test = self.d.mini_data() # c=a+a^2+6 81 | elif(data_size == "大"): 82 | x_train, x_test, y_train, y_test = self.d.tremendous_data() 83 | else: 84 | x_train, x_test, y_train, y_test = self.d.medium_data() 85 | return x_train, x_test, y_train, y_test 86 | 87 | 88 | def run_predict_algorithme(self): #运行预测算法函数 89 | self.d = datas() 90 | x_train, x_test, y_train, y_test = self.choose_data_func() 91 | a = all_algorithme(x_train, x_test, y_train, y_test) 92 | current_algo = self.choose_algorithme.currentText() 93 | if(current_algo == "线性回归"): 94 | ans, y_pre, y_test = a.sk_LinearRegression() 95 | elif(current_algo == "岭回归"): 96 | ans, y_pre, y_test = a.sk_ridge() 97 | elif(current_algo == "多项式回归"): 98 | ans, y_pre, y_test = a.sk_PolynomialFeatures() 99 | elif (current_algo == "决策树回归"): 100 | ans, y_pre, y_test = a.sk_DecisionTreeRegressor() 101 | elif(current_algo == "DNN"): 102 | ans, y_pre, y_test = a.tf_dnn() 103 | # else: 104 | # QMessageBox.information(self, "请先选择算法", QMessageBox.Warning) 105 | # elif(current_algo == "决策树回归"): 106 | # ans, y_pre_linear, y_test = a.sk_DecisionTreeRegressor() 107 | self.plot_and_log(ans, y_pre, y_test) # 画图打印日志 108 | 109 | def plot_and_log_for_cluster(self): 110 | 111 | pix = QPixmap(self.culster_ans_path) 112 | self.ans.setScaledContents(True) 113 | self.ans.setPixmap(pix) 114 | 115 | 116 | def run_culster_algorithme(self): 117 | current_algo = self.choose_algorithme.currentText() 118 | current_data = self.choose_data.currentText() 119 | if(current_algo == "AGNES"): 120 | self.culster_ans_path = r'./cluster/AGNES/鸢尾花数据集3.png' 121 | self.textEdit.setText("") 122 | # elif(current_data == "中"): 123 | # self.culster_ans_path = r'./cluster/AGNES/鸢尾花数据集2.png' 124 | # self.textEdit.setText("AGNES均值 = \ 125 | # [[5.006 3.428 1.462 0.246 ]\ 126 | # [5.9016129 2.7483871 4.39354839 1.43387097]\ 127 | # [6.85 3.07368421 5.74210526 2.07105263]] \ 128 | # 分类正确率为 0.8933333333333333") 129 | # else: 130 | # self.culster_ans_path = r'./cluster/AGNES/鸢尾花数据集3.png' 131 | # self.textEdit.setText("AGNES均值 = \ 132 | # [[5.006 3.428 1.462 0.246 ]\ 133 | # [5.9016129 2.7483871 4.39354839 1.43387097]\ 134 | # [6.85 3.07368421 5.74210526 2.07105263]] \ 135 | # 分类正确率为 0.8933333333333333") 136 | 137 | elif(current_algo == "BIRCH"): 138 | if (current_data == "小"): 139 | self.culster_ans_path = './cluster/BIRCH/综合聚类数据集2.png' 140 | self.textEdit.setText("BIRCH,轮廓系数为0.4243692758642153 。") 141 | if (current_data == "中"): 142 | self.culster_ans_path = './cluster/BIRCH/鸢尾花数据集2.png' 143 | self.textEdit.setText("BIRCH,聚类正确率为0.23333333333334 。") 144 | if (current_data == "大"): 145 | self.culster_ans_path = './cluster/BIRCH/随机数据集3.png' 146 | self.textEdit.setText("BIRCH,Calinski-Harabasz Score为3301.8023 。") 147 | 148 | elif(current_algo == "DBSCAN"): 149 | if (current_data == "小"): 150 | self.culster_ans_path = './cluster/DBSCAN/综合聚类数据集2.png' 151 | self.textEdit.setText("DBSCAN,轮廓系数为0.4230767949856645 。") 152 | if (current_data == "中"): 153 | self.culster_ans_path = './cluster/DBSCAN/鸢尾花数据集3.png' 154 | self.textEdit.setText("DBSCAN,聚类正确率为0.41333333333 。") 155 | if (current_data == "大"): 156 | self.culster_ans_path = './cluster/DBSCAN/啤酒数据集4.png' 157 | self.textEdit.setText("DBSCAN,轮廓系数为0.49530955296776086") 158 | 159 | 160 | elif(current_algo == "GMM"): 161 | if (current_data == "小"): 162 | self.culster_ans_path = './cluster/GMM/综合聚类数据集2.png' 163 | self.textEdit.setText("高斯混合聚类GMM,轮廓系数为0.42967355323274 。") 164 | if (current_data == "中"): 165 | self.culster_ans_path = './cluster/GMM/鸢尾花数据集3.png' 166 | self.textEdit.setText("DBSCAN,聚类正确率为0.793333333 。") 167 | if (current_data == "大"): 168 | self.culster_ans_path = './cluster/GMM/高斯混合模型4.png' 169 | self.textEdit.setText(" mu:\ 170 | [[0.80381015 0.52303569]\ 171 | [0.26065489 0.60734094]]\ 172 | cov:\ 173 | [[[0.00987706 0.0008622 ]\ 174 | [0.0008622 0.04811051]]\ 175 | \ 176 | [[0.01417906 0.00216207]\ 177 | [0.00216207 0.00822912]]]\ 178 | alpha:\ 179 | [0.6894792 0.3105208]") 180 | 181 | 182 | elif(current_algo == "k-means"): 183 | if (current_data == "小"): 184 | self.culster_ans_path = './cluster/k-means/综合聚类数据集2.png' 185 | self.textEdit.setText("K-means,轮廓系数为0.4270730877908618。 ") 186 | if (current_data == "中"): 187 | self.culster_ans_path = './cluster/k-means/鸢尾花数据集2.png' 188 | self.textEdit.setText("K-means,聚类正确率为0.23333333333334 。") 189 | if (current_data == "大"): 190 | self.culster_ans_path = './cluster/k-means/啤酒数据集5.png' 191 | self.textEdit.setText("K-means,轮廓系数为0.6917656034079486") 192 | 193 | 194 | elif(current_algo == "Mean Shift"): 195 | if (current_data == "小"): 196 | self.culster_ans_path = './cluster/Mean Shift/综合聚类数据集2.png' 197 | self.textEdit.setText("Mean Shift,轮廓系数为0.3994440156335262 。") 198 | if (current_data == "中"): 199 | self.culster_ans_path = './cluster/Mean Shift/鸢尾花数据集2.png' 200 | self.textEdit.setText("Mean Shift,聚类正确率为0.00666666667 。") 201 | if (current_data == "大"): 202 | self.culster_ans_path = './cluster/Mean Shift/三簇数据集1 .png' 203 | self.textEdit.setText("Mean Shift,估计的群集数 : 3\ 204 | 十大样本: [[ 1.35567205 1.03426149 2. ]\ 205 | [ 1.84313781 -0.46642773 0. ]\ 206 | [-1.46430267 -0.86316999 1. ]\ 207 | [ 1.44972653 -0.29907961 0. ]\ 208 | [ 0.46646329 -1.28829248 0. ]\ 209 | [ 0.09852578 1.61534244 2. ]\ 210 | [ 1.35865115 -2.04268199 0. ]\ 211 | [ 0.33726529 -1.41196828 0. ]\ 212 | [-0.2340017 -0.17775738 1. ]\ 213 | [ 0.77906683 -0.64267692 0. ]]") 214 | 215 | self.plot_and_log_for_cluster() 216 | 217 | def plot_and_log(self, ans, y_test, y_pre_linear):#画图打印以及生成日志 218 | self.textEdit.setText(ans) 219 | # 画图 220 | plt.figure(dpi=200) 221 | plt.plot(y_test, label='true') 222 | plt.plot(y_pre_linear, label='linear') 223 | plt.legend() 224 | 225 | plt.savefig('线性回归.jpg') 226 | pix = QPixmap('线性回归.jpg') 227 | self.ans.setScaledContents(True) 228 | #pix = pix.scaled(600, 400) # 1920, 1080 229 | # self.ans.setPixmap() 230 | self.ans.setPixmap(pix) 231 | 232 | def pic_detect(self): 233 | #打开文件夹选择待检测文件 234 | fileName, filetype = QFileDialog.getOpenFileName(self, "选择文件", r"C:\Users\kai\Desktop\m_l\code\integrate\cluster", "All Files (*);;Text Files (*.txt)") 235 | print(fileName) # 打印文件全部路径(包括文件名和后缀名) 236 | 237 | 238 | def show_message(self): 239 | QMessageBox.information(self, "关于我们", "华北理工大学17智能一班:赵国庆,董润玺,梁帅凯", 240 | QMessageBox.Yes) 241 | 242 | def refresh_algorithme(self): 243 | count = self.choose_algorithme.count() 244 | 245 | while(count!=0): 246 | self.choose_algorithme.removeItem(count-1) 247 | count = self.choose_algorithme.count() 248 | 249 | self.refresh_a() 250 | # print(self.choose_orient.currentIndex()) 251 | # print(self.choose_orient.currentText()) 252 | 253 | 254 | def run_classify_algorithme(self): 255 | print("分类 run_classify_algorithme") 256 | current_algo = self.choose_algorithme.currentText() 257 | if(current_algo == "KNN"): 258 | self.classify_ans_path = r'./classify/KNN/图片1.png' 259 | self.textEdit.setText("") 260 | elif(current_algo == "SVM支持向量机"): 261 | self.classify_ans_path = './classify/SVM支持向量机/图片3.png' 262 | self.textEdit.setText("SVM支持向量机\ 263 | 分类正确率为 0.7833333") 264 | elif(current_algo == "朴素贝叶斯"): 265 | self.classify_ans_path = r'./classify/朴素贝叶斯/图片4.png' 266 | self.textEdit.setText("['apple' 'mandarin' 'orange' 'lemon']\ 267 | fruit_name\ 268 | apple 19\ 269 | lemon 16\ 270 | mandarin 5\ 271 | orange 19\ 272 | dtype: int64") 273 | # elif(current_algo == "AdaBoost"): 274 | # self.classify_ans_path = './classify/AdaBoost/' 275 | # self.textEdit.setText("高斯混合聚类GMM 分类正确率为 0.7933333333333333") 276 | print("分类jieshu run_classify_algorithme") 277 | # elif(current_algo == "k-means"): 278 | # self.culster_ans_path = './cluster/k-means/综合聚类数据集2.png' 279 | # self.textEdit.setText("K-Means均值 = \ 280 | # [[5.77358491 2.69245283]\ 281 | # [5.006 3.428 ]\ 282 | # [6.81276596 3.07446809]]\ 283 | # 分类正确率为 0.23333333333333334") 284 | # elif(current_algo == "Mean Shift"): 285 | # self.culster_ans_path = './cluster/Mean Shift/综合聚类数据集2.png' 286 | # self.textEdit.setText("Mean Shift均值 = \ 287 | # [[6.27373737 2.87575758 4.92525253 1.68181818]\ 288 | # [5.00784314 3.40980392 1.49215686 0.2627451 ]\ 289 | # [ 0.001 0.001 0.001 0.001]]\ 290 | # 分类正确率为 0.006666666666666667") 291 | 292 | self.plot_and_log_for_classify() 293 | 294 | 295 | def plot_and_log_for_classify(self): 296 | pix = QPixmap(self.classify_ans_path) 297 | self.ans.setScaledContents(True) 298 | self.ans.setPixmap(pix) --------------------------------------------------------------------------------