├── README.md ├── loginPage.py ├── main.py ├── mainPage.py ├── multiReduct.py ├── reduct.py └── user_info.pickle /README.md: -------------------------------------------------------------------------------- 1 | This is a complete program with GUI.This section contains two algorithms. 2 | 3 | ### CHARA 4 | Classic Heuristic Attribute Reduction Algorithm based on knowledge granularity for a decision system. 5 | ### HRAM 6 | A Heuristic Reduction Algorithm based on knowledge granularity with a Multi-granulation view. 7 | 8 | How to run it? 9 | ```python 10 | python main.py 11 | ``` 12 | 13 | 这是一个完整的GUI程序,包括了两个算法。 14 | 如何运行它? 15 | ```python 16 | python main.py 17 | ``` 18 | 19 | 对其中两个算法进行了改进,参考新仓库: 20 | 21 | [https://github.com/Gedanke/rough_set_algorithms](https://github.com/Gedanke/rough_set_algorithms) 22 | -------------------------------------------------------------------------------- /loginPage.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import pickle 3 | import tkinter.messagebox 4 | from mainPage import * 5 | import re 6 | 7 | 8 | class LoginPage(object): 9 | def __init__(self, master=None): 10 | """ 11 | :param master: tkinter.Tk 12 | """ 13 | self.window = master 14 | '''设置登陆窗口的大小''' 15 | self.window.geometry('800x500') 16 | self.var_user_name = tkinter.StringVar() 17 | self.var_user_password = tkinter.StringVar() 18 | self.signup = False 19 | ''''注册与登录界面''' 20 | '''名称''' 21 | self.label_title = tkinter.Label(self.window, text='基于粗糙集的增量式特征选择系统', bg='green', font=('', 16)) 22 | self.label_title.place(relwidth=0.5, relheight=0.15, relx=0.25, rely=0.1) 23 | '''输入用户名''' 24 | self.label_name = tkinter.Label(self.window, text='用户名:', font=('', 16)) 25 | self.label_name.place(relwidth=0.1, relheight=0.06, relx=0.23, rely=0.35) 26 | self.var_user_name.set('') 27 | self.entry_name = tkinter.Entry(self.window, textvariable=self.var_user_name, font=('', 18)) 28 | self.entry_name.place(relwidth=0.4, relheight=0.09, relx=0.35, rely=0.335) 29 | '''输入密码''' 30 | self.label_password = tkinter.Label(self.window, text='密码:', font=('', 16)) 31 | self.label_password.place(relwidth=0.1, relheight=0.06, relx=0.23, rely=0.525) 32 | self.entry_password = tkinter.Entry(self.window, textvariable=self.var_user_password, 33 | font=('', 18), show='*') 34 | self.entry_password.place(relwidth=0.4, relheight=0.09, relx=0.35, rely=0.50) 35 | '''登录操作''' 36 | self.button_login = tkinter.Button(self.window, text='登录', font=('', 17), command=self.user_login) 37 | self.button_login.place(relwidth=0.15, relheight=0.1, relx=0.25, rely=0.72) 38 | '''注册操作''' 39 | self.button_signup = tkinter.Button(self.window, text='注册', font=('', 17), command=self.user_signup) 40 | self.button_signup.place(relwidth=0.15, relheight=0.1, relx=0.6, rely=0.72) 41 | 42 | def user_login(self): 43 | """ 44 | 定义用户登录功能 45 | :return: NULL 46 | """ 47 | user_name = self.var_user_name.get() 48 | user_pwd = self.var_user_password.get() 49 | '''这里设置异常捕获,当我们第一次访问用户信息文件时是不存在的,所以这里设置异常捕获。 50 | 中间的两行就是我们的匹配,即程序将输入的信息和文件中的信息匹配。''' 51 | try: 52 | with open('user_info.pickle', 'rb') as user_file: 53 | user_info = pickle.load(user_file) 54 | except FileNotFoundError: 55 | '''这里就是我们在没有读取到user_file的时候,程序会创建一个'user_file'这个文件,并将管理员 56 | 的用户和密码写入,即用户名为'admin'密码为'admin'。''' 57 | with open('user_info.pickle', 'wb') as user_file: 58 | user_info = {'admin': 'admin'} 59 | pickle.dump(user_info, user_file) 60 | # 必须先关闭,否则pickle.load()会出现EOFError: Ran out of input 61 | user_file.close() 62 | '''如果用户名和密码与文件中的匹配成功,则会登录成功 63 | 同时销毁登录窗口,转到主窗口''' 64 | if user_name in user_info: 65 | if user_pwd == user_info[user_name]: 66 | self.window.destroy() 67 | MainPage() 68 | # 如果用户名匹配成功,而密码输入错误,则会弹出‘您的密码错误,请继续尝试!' 69 | else: 70 | tkinter.messagebox.showerror(message='您的密码错误,请继续尝试!') 71 | # 如果发现用户名不存在 72 | else: 73 | is_sign_up = tkinter.messagebox.askyesno('欢迎您', '您还未注册,现在注册吗?') 74 | '''提示需不需要注册新用户''' 75 | if is_sign_up: 76 | self.user_signup() 77 | 78 | def user_signup(self): 79 | """ 80 | 定义用户注册功能 81 | 对注册的用户名和密码进行限制 82 | 用户名:只允许使用26个英文字母(大写,小写都可以),10个数字的组合 83 | 长度没有限制,不允许使用纯数字,可以是中文,纯字母,数字或者组合 84 | 密码:字母和数字的组合,至少8个字符,不能有中文--isalpha() 85 | 确认密码:字母和数字的组合,至少8个字符--isalpha(),和密码一致 86 | :return: NULL 87 | """ 88 | 89 | def signup(): 90 | """ 91 | 注册操作 92 | :return: 93 | """ 94 | # 以下三行就是获取我们注册时所输入的信息 95 | np = new_pwd.get() 96 | npf = new_pwd_confirm.get() 97 | nn = new_name.get() 98 | mes_np = self.pwd_check(np)[1] 99 | mes_npf = self.pwd_check(npf)[1] 100 | mes_nn = self.name_check(nn)[1] 101 | '''对用户名,密码,确认密码进行限制''' 102 | if self.name_check(nn)[0] and self.pwd_check(np)[0] and self.pwd_check(npf): 103 | # 这里是打开我们记录数据的文件,将注册信息读出 104 | with open('user_info.pickle', 'rb') as user_file: 105 | exist_user_info = pickle.load(user_file) 106 | '''这里就是判断,如果两次密码输入不一致,则提示‘密码和确认密码必须相同!’''' 107 | if np != npf: 108 | tkinter.messagebox.showerror('错误', '密码和确认密码必须相同!') 109 | else: 110 | '''如果用户名已经在我们的数据文件中,则提示该用户已经被注册了!''' 111 | if nn in exist_user_info: 112 | tkinter.messagebox.showerror('错误', '该用户已经被注册了!') 113 | # 最后如果输入无以上错误,则将注册输入的信息记录到文件当中,并提示注册成功 114 | else: 115 | exist_user_info[nn] = np 116 | with open('user_info.pickle', 'wb') as user_file: 117 | pickle.dump(exist_user_info, user_file) 118 | tkinter.messagebox.showinfo('欢迎您', '您已注册成功!') 119 | '''销毁弹窗''' 120 | self.signup = False 121 | window_signup.destroy() 122 | else: 123 | tkinter.messagebox.showerror(title='错误', message=mes_nn + '\n' + mes_np + '\n' + mes_npf) 124 | 125 | '''定义长在窗口上的窗口''' 126 | window_signup = tkinter.Toplevel(self.window) 127 | window_signup.geometry('400x300') 128 | window_signup.title('注册窗口') 129 | '''将输入的注册名赋值给变量''' 130 | new_name = tkinter.StringVar() 131 | new_name.set('') 132 | tkinter.Label(window_signup, text='用户名: ', font=('', 10)).place(x=20, y=20) 133 | entry_new_name = tkinter.Entry(window_signup, textvariable=new_name) 134 | entry_new_name.place(x=130, y=20) 135 | new_pwd = tkinter.StringVar() 136 | tkinter.Label(window_signup, text='密码: ', font=('', 10)).place(x=20, y=70) 137 | entry_user_pwd = tkinter.Entry(window_signup, textvariable=new_pwd, show='*') 138 | entry_user_pwd.place(x=130, y=70) 139 | new_pwd_confirm = tkinter.StringVar() 140 | tkinter.Label(window_signup, text='确认密码: ', font=('', 10)).place(x=20, y=120) 141 | entry_user_pwd_confirm = tkinter.Entry(window_signup, textvariable=new_pwd_confirm, show='*') 142 | entry_user_pwd_confirm.place(x=130, y=120) 143 | button_comfirm_signup = tkinter.Button(window_signup, text='注册', font=('', 10), command=signup) 144 | button_comfirm_signup.place(x=180, y=180) 145 | 146 | def name_check(self, name): 147 | """ 148 | 用户名核对 149 | :param name: 用户名 150 | :return: 151 | @result:用户名核对结果 152 | @mes_result:弹窗中的提示信息 153 | """ 154 | result = False 155 | mes_result = '' 156 | if name.isdigit(): 157 | mes_result = '用户名不能是纯数字!' 158 | else: 159 | if name.isalnum(): 160 | result = True 161 | else: 162 | mes_result = '用户名只能是中文,纯字母,字母和数字的组合!' 163 | return result, mes_result 164 | 165 | def pwd_check(self, pwd): 166 | """ 167 | 密码核对 168 | :param pwd:密码 169 | :return: 170 | @result:密码核对结果 171 | @mes_result:弹窗中的提示信息 172 | """ 173 | result = False 174 | mes_result = '' 175 | '''正则表达式,检验字符串中是否含有中文''' 176 | zh_pattern = re.compile(u'[\u4e00-\u9fa5]+') 177 | word = pwd.encode('utf-8').decode('utf-8') 178 | if zh_pattern.search(word): 179 | mes_result = '密码中不能有中文!' 180 | else: 181 | if len(pwd) < 8: 182 | mes_result = '密码至少要8位!' 183 | else: 184 | if not pwd.isalnum(): 185 | mes_result = '密码必须是字母和数字的组合!' 186 | else: 187 | result = True 188 | return result, mes_result 189 | -------------------------------------------------------------------------------- /main.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from loginPage import * 3 | 4 | """登陆界面""" 5 | root = tkinter.Tk() 6 | root.title('基于粗糙集的增量式特征选择系统V1.0') 7 | LoginPage(root) 8 | root.mainloop() 9 | -------------------------------------------------------------------------------- /mainPage.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | import tkinter 3 | import tkinter.filedialog 4 | import tkinter.messagebox 5 | import os.path 6 | import time 7 | from reduct import Reduct 8 | from multiReduct import mulReduct 9 | import re 10 | import threading 11 | from tkinter.scrolledtext import ScrolledText 12 | 13 | 14 | class MainPage(object): 15 | def __init__(self): 16 | """"主窗口""" 17 | self.window = tkinter.Tk() 18 | self.window.title('基于粗糙集的增量式特征选择系统') 19 | self.window.geometry('1040x650') 20 | """数据成员""" 21 | self.al1_hit = False 22 | self.var_al = tkinter.StringVar() 23 | self.var_label_granularity = tkinter.StringVar() 24 | self.file_path = '' 25 | self.file_name = '' 26 | self.al_name = '' 27 | self.error_line = 0 28 | self.separator = '' 29 | self.reduction_set = [] 30 | self.attr_B = '' 31 | self.now_time = tkinter.StringVar() 32 | """左侧布局""" 33 | '''打开文件''' 34 | self.button_open = tkinter.Button(self.window, text='打开', bg='#a3cf62', font=('Arial', 12), 35 | command=self.button_open_click) 36 | self.button_open.place(relwidth=0.13, relheight=0.17, relx=0, rely=0) 37 | '''检查数据''' 38 | self.button_check = tkinter.Button(self.window, text='检查数据', font=('Arial', 12), 39 | command=self.button_check_click) 40 | self.button_check.place(relwidth=0.122, relheight=0.18, relx=0, rely=0.16) 41 | '''保存特征选择结果''' 42 | self.button_save = tkinter.Button(self.window, text='保存结果', bg='#a3cf62', font=('Arial', 12), 43 | command=self.button_save_click) 44 | self.button_save.place(relwidth=0.13, relheight=0.18, relx=0, rely=0.33) 45 | '''算法一''' 46 | self.button_al1 = tkinter.Button(self.window, text='算法 1', bg='#f58220', font=('Arial', 12), 47 | command=self.button_al1_click) 48 | self.button_al1.place(relwidth=0.13, relheight=0.18, relx=0, rely=0.50) 49 | '''算法二''' 50 | self.button_al2 = tkinter.Button(self.window, text='算法 2', bg='#a3cf62', font=('Arial', 12), 51 | command=self.button_al12_click) 52 | self.button_al2.place(relwidth=0.13, relheight=0.18, relx=0, rely=0.66) 53 | '''帮助''' 54 | self.button_help_hit = False 55 | self.button_help = tkinter.Button(self.window, text='帮助', bg='#f58220', font=('Arial', 12), 56 | command=self.button_help_click) 57 | self.button_help.place(relwidth=0.13, relheight=0.18, relx=0, rely=0.83) 58 | """"中间布局""" 59 | '''显示当前算法名称''' 60 | self.label_content = tkinter.Label(self.window, textvariable=self.var_al, bg='green', font=('Arial', 16)) 61 | self.label_content.place(relwidth=0.602, relheight=0.24, relx=0.12, rely=0) 62 | '''结果展示''' 63 | self.label_show = tkinter.Label(self.window, text='结果展示', bg='#a3cf62', font=('Arial', 12)) 64 | self.label_show.place(relwidth=0.24, relheight=0.07, relx=0.34, rely=0.24) 65 | '''显示结果''' 66 | self.text_result = ScrolledText(self.window, bg='green', 67 | font=('Arial', 16)) 68 | self.text_result.place(relwidth=0.68, relheight=0.69, relx=0.12, rely=0.31) 69 | """"右侧布局""" 70 | '''时间''' 71 | self.label_time = tkinter.Label(self.window, text='时间', bg='#a3cf62') 72 | self.label_time.place(relwidth=0.28, relheight=0.04, relx=0.722, rely=0) 73 | '''显示当前时间''' 74 | self.text_time = tkinter.Label(self.window, font=('Arial', 16)) 75 | self.text_time.place(relwidth=0.28, relheight=0.16, relx=0.722, rely=0.04) 76 | self.show_time() 77 | '''了解''' 78 | self.button_learn_hit = False 79 | self.button_learn = tkinter.Button(self.window, text='了解', font=('Arial', 11), command=self.button_learn_click) 80 | self.button_learn.place(relwidth=0.28, relheight=0.052, relx=0.722, rely=0.19) 81 | '''参数设置''' 82 | self.label_set = tkinter.Label(self.window, text='参数设置', bg='#a3cf62', font=('Arial', 12)) 83 | self.label_set.place(relwidth=0.20, relheight=0.07, relx=0.80, rely=0.24) 84 | '''决策属性数目''' 85 | self.var_decision_num = tkinter.StringVar() 86 | self.label_decision_num = tkinter.Label(self.window, text='决策属性数 ', font=('Arial', 11)) 87 | self.label_decision_num.place(relwidth=0.08, relheight=0.06, relx=0.81, rely=0.355) 88 | self.entry_decision_num = tkinter.Entry(self.window, textvariable=self.var_decision_num, font=('Arial', 16)) 89 | self.entry_decision_num.place(relwidth=0.10, relheight=0.08, relx=0.89, rely=0.335) 90 | '''精度 1 ''' 91 | self.var_precision_one = tkinter.StringVar() 92 | self.label_precision_one = tkinter.Label(self.window, text='参数 1', font=('Arial', 11)) 93 | self.label_precision_one.place(relwidth=0.05, relheight=0.06, relx=0.82, rely=0.47) 94 | self.entry_precision_one = tkinter.Entry(self.window, textvariable=self.var_precision_one, font=('Arial', 16)) 95 | self.entry_precision_one.place(relwidth=0.10, relheight=0.08, relx=0.89, rely=0.45) 96 | '''精度 2 ''' 97 | self.var_precision_two = tkinter.StringVar() 98 | self.label_precision_two = tkinter.Label(self.window, text='参数 2', font=('Arial', 11)) 99 | self.label_precision_two.place(relwidth=0.05, relheight=0.06, relx=0.82, rely=0.59) 100 | self.entry_precision_two = tkinter.Entry(self.window, textvariable=self.var_precision_two, font=('Arial', 16)) 101 | self.entry_precision_two.place(relwidth=0.10, relheight=0.08, relx=0.89, rely=0.57) 102 | '''精度 3 ''' 103 | self.var_precision_three = tkinter.StringVar() 104 | self.label_precision_three = tkinter.Label(self.window, text='参数 3', font=('Arial', 11)) 105 | self.label_precision_three.place(relwidth=0.05, relheight=0.06, relx=0.82, rely=0.705) 106 | self.entry_precision_three = tkinter.Entry(self.window, textvariable=self.var_precision_three, 107 | font=('Arial', 16)) 108 | self.entry_precision_three.place(relwidth=0.10, relheight=0.08, relx=0.89, rely=0.685) 109 | '''粒度 ''' 110 | self.var_granularity = tkinter.StringVar() 111 | self.label_granularity = tkinter.Label(self.window, textvariable=self.var_label_granularity, 112 | font=('Arial', 11)) 113 | self.label_granularity.place(relwidth=0.05, relheight=0.06, relx=0.82, rely=0.82) 114 | self.entry_granularity = tkinter.Entry(self.window, textvariable=self.var_granularity, font=('Arial', 16)) 115 | self.entry_granularity.place(relwidth=0.10, relheight=0.08, relx=0.89, rely=0.80) 116 | '''确定''' 117 | self.button_sure = tkinter.Button(self.window, text='开始', bg='#f58220', font=('Arial', 15), 118 | command=self.button_sure_click) 119 | self.button_sure.place(relwidth=0.205, relheight=0.105, relx=0.80, rely=0.9) 120 | self.window.mainloop() 121 | 122 | """ 123 | button_open_click(),button_check_click(),button_save_click(),button_sure_click() 124 | 这四个事件(函数)所花费的时间,占用的资源比较多 125 | 使用多线程,创建一个线程用于处理这些时间,防止主界面假死 126 | """ 127 | 128 | def button_open_click(self): 129 | """ 130 | 使用多线程,创建一个线程用于打开并读取文件 131 | :return: NULL 132 | """ 133 | thread = threading.Thread(target=self.button_open_click_thread) 134 | thread.setDaemon(True) 135 | thread.start() 136 | 137 | def button_open_click_thread(self): 138 | """ 139 | 打开txt文件,目前只设置了打开txt文件 140 | 解析文件路径,对未选择文件进行错误提示 141 | 同时得到数据集的分隔符 142 | 分隔符得到的方法的前提是,数据对象是字母或者数字 143 | 而分隔符是数字和字母之外的字符 144 | :return: NULL 145 | """ 146 | self.file_path = tkinter.filedialog.askopenfilename(title='请选择 .txt 结尾的文本文件', filetypes=[('TXT', '*.txt')]) 147 | full_file_name = os.path.split(self.file_path)[1] 148 | self.file_name, ext_name = os.path.splitext(full_file_name) 149 | if self.file_path == '': 150 | tkinter.messagebox.showwarning(title='警告', message='您未选择任何文件!') 151 | else: 152 | file = open(self.file_path, 'r') 153 | lines = file.readlines() 154 | line_one = lines[0] 155 | for i in line_one: 156 | if not i.isalnum(): 157 | self.separator = i 158 | break 159 | tkinter.messagebox.showinfo(title='已选择文件', message='该文件的路径是 ' + self.file_path) 160 | 161 | def button_check_click(self): 162 | """ 163 | 使用多线程,创建一个线程用于打开并核对数据集 164 | :return: NULL 165 | """ 166 | self.thread = threading.Thread(target=self.check_data_thread) 167 | self.thread.setDaemon(True) 168 | self.thread.start() 169 | 170 | def check_data_thread(self): 171 | """ 172 | 数据校验部分,对未选择文件进行预警 173 | 以分隔符作为校验,即统计每一个对象的属性数 174 | 每一列有多少个分隔符 175 | 若每一列的分隔符数目相同,数据无误 176 | 否则指出可能有误所在的行数 177 | :return: NULL 178 | """ 179 | if self.file_path == '': 180 | tkinter.messagebox.showwarning(title='警告', message='您未选择任何文件!') 181 | else: 182 | full_file_name = os.path.split(self.file_path)[1] 183 | self.file_name, ext_name = os.path.splitext(full_file_name) 184 | if self.check_data(): 185 | tkinter.messagebox.showinfo(title='数据', message='数据校验成功!') 186 | else: 187 | """暂时提示有缺失,后期可能加入那部分缺失""" 188 | tkinter.messagebox.showerror(title='错误', 189 | message='数据校验失败,该数据集部分内容不完整!\n' + '可能出现在第 ' + str( 190 | self.error_line) + ' 行') 191 | 192 | def check_data(self): 193 | """ 194 | 数据检验部分,针对完备数据集 195 | :return: 196 | 返回判断结果 197 | @result 198 | """ 199 | result = True 200 | list_separator = [] 201 | file = open(self.file_path, 'r') 202 | lines = file.readlines() 203 | line_one = lines[0] 204 | count_separator = line_one.count(self.separator) 205 | for line in range(len(lines)): 206 | list_separator.append(lines[line].count(self.separator)) 207 | for i in range(len(list_separator)): 208 | if list_separator[i] != count_separator: 209 | self.error_line = int(i) + 1 210 | result = False 211 | break 212 | return result 213 | 214 | def button_save_click(self): 215 | """ 216 | 使用多线程,创建一个子线程用来保存文件 217 | :return: 218 | """ 219 | self.thread = threading.Thread(target=self.button_save_click_thread) 220 | self.thread.setDaemon(True) 221 | self.thread.start() 222 | 223 | def button_save_click_thread(self): 224 | """ 225 | 保存文件操作 226 | 保存文件的格式是*.txt , *.data , *.* 三种方式 227 | 若未选择文件,提示先选择文件 228 | 选择文件后 229 | 若结果未生成,则提示先选择算法 230 | 否则将结果写入文件中 231 | :return: NULL 232 | """ 233 | file_save_path = tkinter.filedialog.asksaveasfilename(defaultextension='.txt', 234 | filetypes=[('txt Files', '*.txt'), 235 | ('data Files', '*.data'), 236 | ('All Files', '*.*')]) 237 | if file_save_path != '': 238 | if len(self.reduction_set) == 0: 239 | tkinter.messagebox.showerror(title='错误', message='结果还未生成,请先选择算法,设置参数然后点击确定') 240 | else: 241 | file = open(file_save_path, 'w') 242 | for i in range(len(self.reduction_set)): 243 | string = '' 244 | j = 0 245 | for j in range(len(self.reduction_set[i]) - 1): 246 | string += str(self.reduction_set[i][j]) + self.separator 247 | if i != len(self.reduction_set) - 1: 248 | string += str(self.reduction_set[i][j]) + '\n' 249 | else: 250 | string += str(self.reduction_set[i][j]) 251 | file.write(string) 252 | file.close() 253 | tkinter.messagebox.showinfo(title='成功', message='特征选择结果已经成功写入文件中\n' + '路径是 ' + file_save_path) 254 | else: 255 | tkinter.messagebox.showerror(title='保存失败', message='您得先选择您的文件!') 256 | 257 | def button_al1_click(self): 258 | """ 259 | 点击算法一,在中央横栏出显示算法一的名称 260 | :return: NULL 261 | """ 262 | self.al_name = '算法一' 263 | self.var_al.set('算法一\n' + '\n基于知识粒度的决策系统经典的启发式属性约简算法') 264 | self.var_label_granularity.set('') 265 | 266 | def button_al12_click(self): 267 | """ 268 | 点击算法二,在中央横栏出显示算法二的名称 269 | :return: NULL 270 | """ 271 | self.al_name = '算法二' 272 | self.var_al.set('算法二\n' + '\n基于知识粒度和多粒度视角的启发式约简算法') 273 | self.var_label_granularity.set('粒度') 274 | 275 | def button_help_click(self): 276 | """ 277 | 点击帮厨后,显示对程序的介绍 278 | :return: 279 | """ 280 | help_mes = tkinter.Tk() 281 | help_mes.title('帮助') 282 | help_mes.geometry('640x400') 283 | help_mes_text = '\t\t程序简介\n' + '\t本系统中有两个特征选择算法,算法理论层面上参考了' \ 284 | 'Yunge Jing, Tianrui Li, Hamido Fujita , Zeng Yu, Bin Wang,' \ 285 | 'An incremental attribute reduction approach based on knowledge granularity with a multi-granulation view,' \ 286 | 'Information Sciences 411 (2017) 23–38 中的算法一,算法二。\n' + \ 287 | '\n在实际实现过程中,加入了一些自己的理解,一部分体现在参数设置部分,点击了解更多可以了解到具体原因\n' + \ 288 | '\n使用时,选择完文件后,建议检查数据,其他的使用信息会以弹窗形式给出' 289 | button_mes = tkinter.Button(help_mes, text='了解更多', font=('Arial', 11), command=self.button_mes_click) 290 | button_mes.place(relwidth=0.15, relheight=0.1, relx=0.25, rely=0.78) 291 | button_exit = tkinter.Button(help_mes, text='退出', font=('Arial', 11), command=help_mes.destroy) 292 | button_exit.place(relwidth=0.15, relheight=0.1, relx=0.6, rely=0.78) 293 | message_help = tkinter.Message(help_mes, text=help_mes_text, font=('Arial', 12)) 294 | message_help.pack() 295 | help_mes.mainloop() 296 | 297 | def button_mes_click(self): 298 | """ 299 | 点击弹出窗口中的更多,显示对程序的更多介绍 300 | :return: NULL 301 | """ 302 | learn_more = tkinter.Tk() 303 | learn_more.title('了解更多') 304 | learn_more.geometry('750x450') 305 | text_title = '\t\t\t\t为什么会有这些参数\n' 306 | text_head = '\n在算法的理论层次上并不需要一部分参数,但为什么需要它们呢\n' + '' \ 307 | '首先,介绍下各个参数的作用,有默认值,可以不输入\n\n' 308 | text_1 = '\n决策属性个数: 默认为1\n' 309 | text_2 = '\n各个精度设置的原因是,该系统选择了python语言,其定义的变量精度很高,' \ 310 | '而在其他语言中如果使用基本的变量,会有精度限制,或者精度截断,即计算机实际得到的结果与预期结果可能会有出路。此处将精度控制权交给使用者,' \ 311 | '让其选择自己需要的精度参数,从而得到预期结果。\n' 312 | text_3 = '\n参数一: 默认为0.0,在原算法选择核属性时,计算内部属性重要度,其值要大于0,此处可以设置为0-1之间的浮点数,即超过该值则认为大于0\n' 313 | text_4 = '\n参数二: 默认为1e-7,此处是判断约简集和条件属性集的知识粒度是否相等,不相等满足条件,此处可以设置二者差的' \ 314 | '绝对值在什么样的范围内默认是相等的,超过该范围则认为不相等,不建议设置成0.0\n' 315 | text_5 = '\n参数三: 默认为0,判断条件属性集和约简集去除约简集中的一个元素的知识粒度是否相等,' \ 316 | '相等则满足条件,此处认为二者差的绝对值在该范围内就是相等\n' 317 | text_6 = '\n粒度: 只在第二个算法中出现并使用,必须输入正整数\n' 318 | text_mine = '\n\n作者邮箱:13767927306@163.com\n\n' + '使用过程中,对该系统的程序或者其他问题可以与本人联系哦!' 319 | learn_more_text = text_title + text_head + text_1 + text_2 + text_3 + text_4 + text_5 + text_6 + text_mine 320 | mes_learn_more = ScrolledText(learn_more, font=('Arial', 12)) 321 | mes_learn_more.insert('insert', learn_more_text) 322 | mes_learn_more.place(relwidth=1, relheight=1, relx=0, rely=0) 323 | learn_more.mainloop() 324 | 325 | def show_time(self): 326 | """ 327 | 显示当前时间 328 | 循环以一秒钟的间隔显示时间 329 | :return: NULL 330 | """ 331 | self.now_time = time.strftime("%Y-%m-%d %H:%M:%S") 332 | self.text_time.configure(text=self.now_time) 333 | self.window.after(1000, self.show_time) 334 | 335 | def button_learn_click(self): 336 | """ 337 | 根据选择的算法显示对该算法的介绍 338 | :return: NULL 339 | """ 340 | text_learn_al1 = '算法一简介\n\n' + \ 341 | '\n基于知识粒度的经典启发式属性约简算法\n' + \ 342 | '\n针对决策系统的基于知识粒度的经典启发式属性约简算法\n' + '' \ 343 | '\n详细介绍可参考相关论文' 344 | text_learn_al2 = '算法二简介\n\n' + '' \ 345 | '\n基于知识粒度的多粒度启发式约简算法\n' \ 346 | + '\n简单来说,就是将一个大的数据集划分成若干个小的数据集\n' + \ 347 | '\n在每个子数据集上求得约简结果后,合并\n' + \ 348 | '\n分而治之的方法,会使特征选择的时间复杂度明显减少,程序耗时减少\n' + \ 349 | '\n详细介绍可参考相关论文' 350 | if self.al_name == '算法一': 351 | self.show_name(text_learn_al1) 352 | elif self.al_name == '算法二': 353 | self.show_name(text_learn_al2) 354 | else: 355 | tkinter.messagebox.showinfo(title='提示', message='您还未选择任何算法!') 356 | 357 | def show_name(self, text_learn_al): 358 | """ 359 | :param text_learn_al: 显示当前中央横栏处算法的详细介绍。 360 | :return: NULL 361 | """ 362 | learn_mes = tkinter.Tk() 363 | learn_mes.title('了解该算法') 364 | learn_mes.geometry('640x400') 365 | label_learn = tkinter.Label(learn_mes, text=text_learn_al, font=('Arial', 12)) 366 | label_learn.pack() 367 | button_exit = tkinter.Button(learn_mes, text='退出', font=('Arial', 11), command=learn_mes.destroy) 368 | button_exit.place(relwidth=0.15, relheight=0.1, relx=0.425, rely=0.8) 369 | learn_mes.mainloop() 370 | 371 | def button_sure_click(self): 372 | """ 373 | 使用多线程,创建一个子线程用来处理特征选择过程 374 | :return: NULL 375 | """ 376 | self.thread = threading.Thread(target=self.button_sure_click_thread) 377 | self.thread.setDaemon(True) 378 | self.thread.start() 379 | 380 | def button_sure_click_thread(self): 381 | """ 382 | 该部分首先得到五个参数输入框的内容 383 | 注意,选择算法一时,粒度参数不出现 384 | 对五个参数进行校验,返回其相应的逻辑值和提示信息 385 | 当框内不输入参数时,使用并显示默认参数 386 | 仅仅是选择算法二时,粒度参数必须强制输入,其他任何参数可以不输入 387 | 首先先判断是否选择了文件以及是否选择了算法 388 | 若没有,发出对应的提示信息 389 | 符合条件后,根据选择的算法 390 | 调用相应的算法类,以及得到的参数 391 | 得到结果,显示在文本框中 392 | :return: NULL 393 | """ 394 | if self.file_path == '' or self.separator == '': 395 | tkinter.messagebox.showerror(title='错误', message='未导入文件,请先导入文件,并核对数据') 396 | else: 397 | self.text_result.delete(1.0, 'end') 398 | result_decision_num, mes_decision_num = self.decision_num_check() 399 | result_precision_one, mes_precision_one = self.precision_one_check() 400 | result_precision_two, mes_precision_two = self.precision_two_check() 401 | result_precision_three, mes_precision_three = self.precision_three_check() 402 | result_granularity, mes_granularity = self.granularity_check() 403 | decision_num = int(self.var_decision_num.get()) 404 | precision_set = [float(self.var_precision_one.get()), float(self.var_precision_two.get()), 405 | float(self.var_precision_three.get())] 406 | result = result_decision_num and result_precision_one and result_precision_two and result_precision_three 407 | if not self.check_data(): 408 | tkinter.messagebox.showerror(title='错误', message='数据集可能有误,请核对数据!') 409 | else: 410 | # 选择算法一进行特征选择 411 | if self.al_name == '算法一': 412 | if result: 413 | algorithm1 = Reduct(self.file_path, self.separator, decision_num, precision_set) 414 | tkinter.messagebox.showinfo(title='选择成功', message='您已经选择算法一进行特征选择!') 415 | self.reduction_set = [] 416 | self.attr_B = '' 417 | time_start = time.time() 418 | algorithm1.gainReduct() 419 | time_end = time.time() 420 | self.reduction_set = algorithm1.attrB 421 | for i in range(len(algorithm1.yuejian) - 1): 422 | self.attr_B += str(algorithm1.yuejian[i]) + ' , ' 423 | self.attr_B += str(algorithm1.yuejian[len(algorithm1.yuejian) - 1]) 424 | content = self.file_name + '数据集\n' + '有 ' + str(algorithm1.rowsNum) + ' 个对象,有 ' + str( 425 | algorithm1.listsNum) + ' 个属性\n' + '特征选择耗时是 ' + str( 426 | time_end - time_start) + ' s\n特征选择的结果(列):\n' + self.attr_B 427 | self.text_result.insert('insert', content) 428 | del algorithm1 429 | 430 | else: 431 | tkinter.messagebox.showerror(title='错误', 432 | message='1. ' + mes_decision_num + '\n' + '2. ' + mes_precision_one + '\n' + '3. ' + mes_precision_two + '\n' + '4. ' + mes_precision_three) 433 | # 选择算法二进行特征选择 434 | elif self.al_name == '算法二': 435 | if result and result_granularity: 436 | algorithm2 = mulReduct(self.file_path, self.separator, decision_num, 437 | int(self.var_granularity.get()), 438 | precision_set) 439 | tkinter.messagebox.showinfo(title='选择成功', message='您已经选择算法二进行特征选择!') 440 | self.reduction_set = [] 441 | self.attr_B = '' 442 | time_start = time.time() 443 | algorithm2.multiReduct() 444 | time_end = time.time() 445 | self.reduction_set = algorithm2.attrB 446 | for i in range(len(algorithm2.result) - 1): 447 | self.attr_B += str(algorithm2.result[i]) + ' , ' 448 | self.attr_B += str(algorithm2.result[len(algorithm2.result) - 1]) 449 | content = self.file_name + '数据集\n' + '有 ' + str(algorithm2.rowsNum) + ' 个对象,有 ' + str( 450 | algorithm2.listsNum) + ' 个属性\n' + '特征选择耗时是 ' + str( 451 | time_end - time_start) + ' s\n' + '该数据集被划分为 ' + str( 452 | self.var_granularity.get()) + ' 个粒度\n特征选择的结果(列):\n' + self.attr_B 453 | self.text_result.insert('insert', content) 454 | del algorithm2 455 | else: 456 | tkinter.messagebox.showerror(title='错误', message='您未新选择任何算法!') 457 | 458 | def decision_num_check(self): 459 | """ 460 | 先判断决策属性数,输入的数必须为正整数,未输入则默认为1 461 | :return: 462 | @result:判断结果 463 | @mes_decision_num:弹窗中的提示信息 464 | """ 465 | result = False 466 | decision_num = self.var_decision_num.get() 467 | mes_decision_num = '' 468 | if decision_num != '': 469 | if not str(decision_num).isdigit(): 470 | mes_decision_num = decision_num + '不是正整数' 471 | else: 472 | if int(decision_num) <= 0: 473 | mes_decision_num = decision_num + '不是正整数' 474 | else: 475 | result = True 476 | mes_decision_num = '您已经成功设置决策属性为 ' + decision_num 477 | else: 478 | result = True 479 | self.var_decision_num.set('1') 480 | mes_decision_num = '您使用了默认值 ' + self.var_decision_num.get() 481 | return result, mes_decision_num 482 | 483 | def precision_one_check(self): 484 | """ 485 | 参数1 486 | 范围在0-1之间的浮点数,默认值为0.0 487 | :return: 488 | @result:判断结果 489 | @mes_decision_one:弹窗中的提示信息 490 | """ 491 | result = False 492 | mes_precision_one = '' 493 | precision_one_num = self.var_precision_one.get() 494 | '''判断条件的正则表达式''' 495 | value = re.compile(r'^[-+]?[0-9]+\.[0-9]+$') 496 | result_one = value.match(precision_one_num) 497 | if precision_one_num != '': 498 | if result_one: 499 | if 0 <= float(precision_one_num) and float(precision_one_num) < 1: 500 | result = True 501 | mes_precision_one = '您已经设置了第一个精度 ' + precision_one_num 502 | else: 503 | mes_precision_one = precision_one_num + ' 该参数不在合理范围内' 504 | else: 505 | mes_precision_one = precision_one_num + ' 不是浮点数' 506 | else: 507 | self.var_precision_one.set('0.0') 508 | mes_precision_one = '您使用了默认值' + precision_one_num 509 | result = True 510 | return result, mes_precision_one 511 | 512 | def precision_two_check(self): 513 | """ 514 | 参数2 515 | 范围在0-1之间的浮点数,越接近0越好,默认值是1e-7 516 | :return: 517 | @result:判断结果 518 | @mes_decision_two:弹窗中的提示信息 519 | """ 520 | result = False 521 | mes_precision_two = '' 522 | precision_two_num = self.var_precision_two.get() 523 | '''判断条件的正则表达式''' 524 | value = re.compile(r'^[-+]?[0-9]+\.[0-9]+$') 525 | result_two = value.match(precision_two_num) 526 | if precision_two_num != '': 527 | if result_two: 528 | if 0 <= float(precision_two_num) and float(precision_two_num) < 1: 529 | result = True 530 | mes_precision_two = '您已经设置了第二个精度 ' + precision_two_num 531 | else: 532 | mes_precision_two = precision_two_num + '该参数不在合理范围内' 533 | else: 534 | mes_precision_two = precision_two_num + ' 不是浮点数' 535 | else: 536 | self.var_precision_two.set('0.0000001') 537 | mes_precision_two = '您使用了默认值' + precision_two_num 538 | result = True 539 | return result, mes_precision_two 540 | 541 | def precision_three_check(self): 542 | """ 543 | 参数3 544 | 范围在0-1之间的浮点数,默认值为0.0 545 | :return: 546 | @result:判断结果 547 | @mes_decision_three:弹窗中的提示信息 548 | """ 549 | result = False 550 | mes_precision_three = '' 551 | precision_three_num = self.var_precision_three.get() 552 | '''判断条件的正则表达式''' 553 | value = re.compile(r'^[-+]?[0-9]+\.[0-9]+$') 554 | result_one = value.match(precision_three_num) 555 | if precision_three_num != '': 556 | if result_one: 557 | if 0 <= float(precision_three_num) and float(precision_three_num) < 1: 558 | result = True 559 | mes_precision_three = '您已经设置了第三个精度 ' + precision_three_num 560 | else: 561 | mes_precision_three = precision_three_num + '该参数不在合理范围内' 562 | else: 563 | mes_precision_three = precision_three_num + ' 不是浮点数' 564 | else: 565 | self.var_precision_three.set('0.0') 566 | mes_precision_three = '您使用了默认值' + precision_three_num 567 | result = True 568 | return result, mes_precision_three 569 | 570 | def granularity_check(self): 571 | """ 572 | 粒度值,选择算法一时,固定为1 573 | 选择算法二时,需要用户输入 574 | :return: 575 | @result:判断结果 576 | @mes_granularity:弹窗中的提示信息 577 | """ 578 | mes_granularity = '' 579 | result = False 580 | if self.al_name == '算法一': 581 | self.var_granularity.set('') 582 | else: 583 | if self.var_granularity.get() == '': 584 | mes_granularity = '您未设置粒度数!' 585 | else: 586 | if not str(self.var_granularity.get()).isdigit(): 587 | mes_granularity = self.var_granularity.get() + '不是正整数' 588 | else: 589 | if int(self.var_granularity.get()) < 0: 590 | mes_granularity = self.var_granularity.get() + '不是正整数' 591 | else: 592 | result = True 593 | mes_granularity = '您已经设置了粒度!' + self.var_granularity.get() 594 | return result, mes_granularity 595 | -------------------------------------------------------------------------------- /multiReduct.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from collections import Counter 3 | import operator 4 | 5 | 6 | class mulReduct(object): 7 | rowsNum = 0 # 行数,已经除以了粒度 8 | listsNum = 0 # 列数 9 | attrTable = [] # 全信息系统,包含条件属性集和决策属性集 10 | attrD = [] # 子决策属性集 11 | attrB = [] # 子约简属性集 12 | rule = [] # 条件属性集与约简集的差集 13 | yuejian = [] # 约简集的下标 14 | attrGranularity = [] # 子决策系统 15 | result = [] 16 | remainder = 0 17 | 18 | def __init__(self, filePath, separator=';', decisionNum=1, granularity=1, precision=[0, 1e-6, 0]): 19 | """ 20 | 基于知识粒度的多粒度启发式约简算法 21 | :param filePath: 文件路径 22 | :param separator: 分割符,默认为; 23 | :param decisionNum: 决策属性个数,默认为1 24 | :param granularity: 粒度数目 25 | :param precision: 算法二中三个可能的参数(精度) 26 | """ 27 | self.filePath = filePath 28 | self.separator = separator 29 | self.decisionNum = decisionNum 30 | self.granularity = granularity 31 | self.precision = precision 32 | mulReduct.rowsNum = 0 33 | mulReduct.listsNum = 0 34 | mulReduct.attrTable = [] 35 | mulReduct.attrD = [] 36 | mulReduct.attrB = [] 37 | mulReduct.rule = [] 38 | mulReduct.yuejian = [] 39 | mulReduct.attrGranularity = [] 40 | mulReduct.result = [] 41 | mulReduct.remainder = 0 42 | file = open(filePath, 'r') 43 | lines = file.readlines() 44 | for line in lines: 45 | line = line.strip('\n') 46 | line = line.split(separator) 47 | mulReduct.attrTable.append(line) 48 | self.remainder = int(len(mulReduct.attrTable) % self.granularity) 49 | mulReduct.rowsNum = int(len(mulReduct.attrTable) / self.granularity) 50 | mulReduct.listsNum = len(mulReduct.attrTable[0]) 51 | self.attrGranularity = self.attrTable 52 | 53 | def listToString(self, attr): 54 | """ 55 | 将二维列表中的每一维的列表转化为字符串 56 | :param attr: 属性,为二维列表 57 | :return: 58 | 返回一个一维的字符串列表 59 | @attrTemp 60 | """ 61 | attrTemp = [] 62 | for i in range(len(attr)): 63 | string = '' 64 | string = string.join(attr[i]) 65 | attrTemp.append(string) 66 | return attrTemp 67 | 68 | def gainKonwledge(self, attr): 69 | """ 70 | 求知识粒度 71 | :param attr: 传入的参数是二维列表 72 | :return: 73 | 返回知识粒度 74 | @divide / float(self.rowsNum * self.rowsNum) 75 | """ 76 | divide = 0 77 | attrString = self.listToString(attr) # 调用list_to_string,将二维列表转化维一维列表 78 | count = Counter(attrString) # 类型: 79 | countDict = dict(count) # 类型: 80 | for key in countDict: 81 | divide += countDict.get(key) * countDict.get(key) 82 | return divide / float(self.rowsNum * self.rowsNum) 83 | 84 | def gainCondiction(self): 85 | """ 86 | 得到条件属性 87 | :return: 88 | 返回的是含条件属性的二维列表 89 | @attrC 90 | """ 91 | attrC = [] 92 | for i in range(self.rowsNum): 93 | attrC.append(self.attrGranularity[i][0:self.listsNum - self.decisionNum]) 94 | return attrC 95 | 96 | def gainReduct(self): 97 | """ 98 | 属性约简过程 99 | :return: NULL 100 | """ 101 | '''选择核属性''' 102 | for i in range(1, self.listsNum - self.decisionNum + 1): 103 | self.rule.append(i) 104 | attrC = self.gainCondiction() 105 | sub = self.gainKonwledge(attrC) - self.gainKonwledge(self.attrGranularity) 106 | for j in range(1, self.listsNum - self.decisionNum + 1): 107 | result = (self.gainInner(attrC, j) - self.gainInner(self.attrGranularity, j)) - sub 108 | if result > self.precision[0]: 109 | self.yuejian.append(j) # 选择核属性 110 | '''根据下标求得约简集''' 111 | for i in range(self.rowsNum): 112 | attrBTemp = [self.attrGranularity[i][j - 1] for j in self.yuejian] 113 | self.attrB.append(attrBTemp) 114 | attrBD = self.gainBandD() 115 | '''求c-b的差集下标''' 116 | for i in self.yuejian: 117 | self.rule.remove(i) 118 | '''在c-b中选择外部重要度大的元素往约简集中添加''' 119 | while abs((self.gainKonwledge(self.attrB) - self.gainKonwledge(attrBD)) - sub) >= self.precision[1]: 120 | maxSigOuter = {} 121 | for i in self.rule: # C-B的差集下标 122 | preOuter = self.gainKonwledge(self.attrB) - self.gainKonwledge(attrBD) 123 | nextOuter = self.gainOuter(self.attrB, i) - self.gainOuter(attrBD, i) 124 | aResult = preOuter - nextOuter 125 | maxSigOuter[i] = aResult 126 | maxSigOuter = sorted(maxSigOuter.items(), key=operator.itemgetter(1), reverse=True) 127 | a0 = maxSigOuter[0][0] 128 | self.yuejian.append(a0) 129 | self.rule.remove(a0) 130 | self.gainBanda0(a0) 131 | attrBD = self.gainBandD() 132 | 133 | '''去冗余''' 134 | for i in self.yuejian: 135 | if abs(self.gainInner(self.attrB, i) - self.gainInner(attrBD, i) - sub) <= self.precision[2]: 136 | self.gainBsubai(i) 137 | attrBD = self.gainBandD() 138 | self.yuejian.remove(i) 139 | 140 | def gainInner(self, attr, j): 141 | """ 142 | 计算((C条件属性-j)或者(C条件属性和D决策属性-j)),取决于attr 143 | :param attr: 属性集 144 | :param j: 第j个特征(属性) 145 | :return: 146 | 返回知识粒度 147 | @self.gainKonwledge(attrTemp) 148 | """ 149 | attrTemp = [] 150 | length = len(attr[0]) 151 | for i in range(self.rowsNum): 152 | '''下标为j-1的元素被去除,即第j个元素''' 153 | attrTemp.append(attr[i][0:j - 1] + attr[i][j:length]) 154 | return self.gainKonwledge(attrTemp) 155 | 156 | def gainBandD(self): 157 | """ 158 | 求约简集和决策属性的并集 159 | :return: 160 | 返回并集 161 | @attrBD 162 | """ 163 | attrBD = [] 164 | for i in range(self.rowsNum): 165 | if len(self.attrB) == 0: 166 | attrBD.append((self.attrD[i])) 167 | else: 168 | attrBD.append(self.attrB[i] + (self.attrD[i])) 169 | return attrBD 170 | 171 | def gainOuter(self, attr, j): 172 | """ 173 | :param attr: 属性集 174 | :param j: 第j个特征(属性) 175 | :return: 176 | 返回知识粒度 177 | @self.gainKonwledge(attrInclude) 178 | """ 179 | attrInclude = [] 180 | for i in range(self.rowsNum): 181 | if len(attr[0]) == 0: 182 | attrInclude.append(list(self.attrGranularity[i][j - 1:j])) 183 | else: 184 | attrInclude.append(attr[i] + (self.attrGranularity[i][j - 1:j])) 185 | return self.gainKonwledge(attrInclude) 186 | 187 | def gainBanda0(self, a0): 188 | """ 189 | 求约简集和第a0个属性的并集 190 | :param a0: 第a0个特征(属性) 191 | :return: NULL 192 | """ 193 | for i in range(self.rowsNum): 194 | self.attrB[i].append(self.attrGranularity[i][a0 - 1]) 195 | 196 | def gainBsubai(self, j): 197 | """ 198 | 求约简集和第a0个属性的并集 199 | :param j: 第j个属性 200 | :return: NULL 201 | """ 202 | for i in range(self.rowsNum): 203 | self.attrB[i].remove(self.attrGranularity[i][j - 1]) 204 | 205 | def multiReduct(self): 206 | """ 207 | 基于知识粒度的多粒度启发式约简算法 208 | :return: NULL 209 | """ 210 | resultTemp = [] 211 | for k in range(0, self.granularity): 212 | self.attrGranularity = [] 213 | self.yuejian = [] 214 | self.attrD = [] 215 | self.rule = [] 216 | self.attrB = [] 217 | if k != self.granularity - 1: 218 | for i in range(k * self.rowsNum, (k + 1) * self.rowsNum): 219 | self.attrGranularity.append(self.attrTable[i]) 220 | self.attrD.append(self.attrTable[i][self.listsNum - self.decisionNum:self.listsNum]) 221 | else: 222 | for i in range(k * self.rowsNum, (k + 1) * self.rowsNum + self.remainder): 223 | self.attrGranularity.append(self.attrTable[i]) 224 | self.attrD.append(self.attrTable[i][self.listsNum - self.decisionNum:self.listsNum]) 225 | self.rowsNum = self.rowsNum + self.remainder 226 | self.gainReduct() 227 | resultTemp.append(self.yuejian) 228 | self.result = resultTemp[0] 229 | for i in range(1, self.granularity): 230 | self.result = list(set(self.result).union(set(resultTemp[i]))) 231 | -------------------------------------------------------------------------------- /reduct.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | from collections import Counter 3 | import operator 4 | 5 | 6 | class Reduct(object): 7 | rowsNum = 0 # 行数 8 | listsNum = 0 # 列数 9 | attrTable = [] # 信息系统,包含条件属性集和决策属性集 10 | attrD = [] # 决策属性集 11 | attrB = [] # 约简属性集 12 | rule = [] # 条件属性集与约简集的差集 13 | yuejian = [] # 约简集的下标 14 | 15 | def __init__(self, filePath, separator=';', decisionNum=1, precision=[0, 1e-7, 0]): 16 | """ 17 | 基于知识粒度的经典启发式属性约简算法 18 | :param filePath: 文件路径 19 | :param separator: 文件中的分割符,默认为; 20 | :param decisionNum: 决策属性个数,默认为1 21 | :param precision: 算法一中三个可能的参数(精度) 22 | """ 23 | self.filePath = filePath 24 | self.separator = separator 25 | self.decisionNum = decisionNum 26 | self.precision = precision 27 | Reduct.rowsNum = 0 28 | Reduct.listsNum = 0 29 | Reduct.attrTable = [] 30 | Reduct.attrD = [] 31 | Reduct.attrB = [] 32 | Reduct.rule = [] 33 | Reduct.yuejian = [] 34 | file = open(filePath, 'r') 35 | lines = file.readlines() 36 | for line in lines: 37 | line = line.strip('\n') 38 | line = line.split(separator) 39 | Reduct.attrTable.append(line) 40 | Reduct.rowsNum = len(Reduct.attrTable) 41 | Reduct.listsNum = len(Reduct.attrTable[0]) 42 | for i in range(1, Reduct.listsNum - self.decisionNum + 1): 43 | Reduct.rule.append(i) 44 | 45 | def __del__(self): 46 | class_name = self.__class__.__name__ 47 | 48 | def listToString(self, attr): 49 | """ 50 | 将二维列表中的每维个列表转化为字符串 51 | :param attr: 属性,为二维列表 52 | :return: 53 | 返回一个一维的字符串列表 54 | @attrTemp 55 | """ 56 | attrTemp = [] 57 | for i in range(len(attr)): 58 | string = '' 59 | string = string.join(attr[i]) 60 | attrTemp.append(string) 61 | return attrTemp 62 | 63 | def gainKonwledge(self, attr): 64 | """ 65 | 求知识粒度 66 | :param attr: 传入的参数是二维列表 67 | :return: 68 | 返回知识粒度 69 | @divide / float(self.rowsNum * self.rowsNum) 70 | """ 71 | divide = 0 72 | attrString = self.listToString(attr) # 调用list_to_string,将二维列表转化维一维列表 73 | count = Counter(attrString) # 类型: 74 | countDict = dict(count) # 类型: 75 | for key in countDict: 76 | divide += countDict.get(key) * countDict.get(key) 77 | return divide / float(self.rowsNum * self.rowsNum) 78 | 79 | def gainCondiction(self): 80 | """ 81 | 得到条件属性 82 | :return: 83 | 返回的是含条件属性的二维列表 84 | @attrC 85 | """ 86 | attrC = [] 87 | for i in range(self.rowsNum): 88 | attrC.append(self.attrTable[i][0:self.listsNum - self.decisionNum]) 89 | self.attrD.append(self.attrTable[i][self.listsNum - self.decisionNum:self.listsNum]) # 得到决策属性 90 | return attrC 91 | 92 | def gainReduct(self): 93 | """ 94 | 属性约简过程 95 | :return: NULL 96 | """ 97 | '''选择核属性''' 98 | attrC = self.gainCondiction() 99 | sub = self.gainKonwledge(attrC) - self.gainKonwledge(self.attrTable) 100 | for j in range(1, self.listsNum - self.decisionNum + 1): 101 | result = (self.gainInner(attrC, j) - self.gainInner(self.attrTable, j)) - sub 102 | if result > self.precision[0]: 103 | self.yuejian.append(j) 104 | '''根据下标求得约简集''' 105 | for i in range(self.rowsNum): 106 | attrBTemp = [self.attrTable[i][j - 1] for j in self.yuejian] 107 | self.attrB.append(attrBTemp) 108 | attrBD = self.gainBandD() 109 | '''求c-b的差集下标''' 110 | for i in self.yuejian: 111 | self.rule.remove(i) 112 | '''在c-b中选择外部重要度大的元素往约简集中添加''' 113 | while abs((self.gainKonwledge(self.attrB) - self.gainKonwledge(attrBD)) - sub) >= self.precision[1]: 114 | maxSigOuter = {} 115 | for i in self.rule: # C-B的差集下标 116 | preOuter = self.gainKonwledge(self.attrB) - self.gainKonwledge(attrBD) 117 | nextOuter = self.gainOuter(self.attrB, i) - self.gainOuter(attrBD, i) 118 | aResult = preOuter - nextOuter 119 | maxSigOuter[i] = aResult 120 | maxSigOuter = sorted(maxSigOuter.items(), key=operator.itemgetter(1), reverse=True) 121 | a0 = maxSigOuter[0][0] 122 | self.yuejian.append(a0) 123 | self.rule.remove(a0) 124 | self.gainBanda0(a0) 125 | attrBD = self.gainBandD() 126 | '''去冗余''' 127 | for i in self.yuejian: 128 | if abs(self.gainInner(self.attrB, i) - self.gainInner(attrBD, i) - sub) <= self.precision[2]: 129 | self.gainBsubai(i) 130 | attrBD = self.gainBandD() 131 | self.yuejian.remove(i) 132 | 133 | def gainInner(self, attr, j): 134 | """ 135 | 计算((C条件属性-j)或者(C条件属性和D决策属性-j)),取决于attr 136 | :param attr: 属性集 137 | :param j: 第j个特征(属性) 138 | :return: 139 | 返回知识粒度 140 | @self.gainKonwledge(attrTemp) 141 | """ 142 | attrTemp = [] 143 | length = len(attr[0]) 144 | for i in range(self.rowsNum): 145 | '''下标为j-1的元素被去除,即第j个元素''' 146 | attrTemp.append(attr[i][0:j - 1] + attr[i][j:length]) 147 | return self.gainKonwledge(attrTemp) 148 | 149 | def gainBandD(self): 150 | """ 151 | 求约简集和决策属性的并集 152 | :return: 153 | 返回并集 154 | @attrBD 155 | """ 156 | attrBD = [] 157 | for i in range(self.rowsNum): 158 | if len(Reduct.attrB) == 0: 159 | attrBD.append((self.attrD[i])) 160 | else: 161 | attrBD.append(self.attrB[i] + (self.attrD[i])) 162 | return attrBD 163 | 164 | def gainOuter(self, attr, j): 165 | """ 166 | :param attr: 属性集 167 | :param j: 第j个特征(属性) 168 | :return: 169 | 返回知识粒度 170 | @self.gainKonwledge(attrInclude) 171 | """ 172 | attrInclude = [] 173 | for i in range(self.rowsNum): 174 | if len(attr[0]) == 0: 175 | attrInclude.append(list(self.attrTable[i][j - 1])) 176 | else: 177 | attrInclude.append(attr[i] + list(self.attrTable[i][j - 1])) 178 | return self.gainKonwledge(attrInclude) 179 | 180 | def gainBanda0(self, a0): 181 | """ 182 | 求约简集和第a0个属性的并集 183 | :param a0: 第a0个特征(属性) 184 | :return: NULL 185 | """ 186 | for i in range(self.rowsNum): 187 | self.attrB[i].append(self.attrTable[i][a0 - 1]) 188 | 189 | def gainBsubai(self, j): 190 | """ 191 | 求约简集和第a0个属性的并集 192 | :param j: 第j个属性 193 | :return: NULL 194 | """ 195 | for i in range(self.rowsNum): 196 | self.attrB[i].remove(self.attrTable[i][j - 1]) 197 | -------------------------------------------------------------------------------- /user_info.pickle: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Gedanke/two_rough_set_algorithms/14971added4779d926d09bae49e0633f6d6cf48b/user_info.pickle --------------------------------------------------------------------------------