├── README.md ├── excel.py └── excel文件夹 ├── 2018.xlsx └── 2019.xls /README.md: -------------------------------------------------------------------------------- 1 | # excelRelated 2 | 读取文件夹内所有xls和xlsx格式文件,获取目标数据,生成新的xlsx文件 3 | -------------------------------------------------------------------------------- /excel.py: -------------------------------------------------------------------------------- 1 | import xlrd 2 | import xlsxwriter 3 | import os 4 | 5 | #要整理的excel表格 xls、xlsx文件所在文件夹 6 | xpath = "C:\\Users\\TED\\Desktop\\excel\\excel文件夹" 7 | 8 | typedata = [] 9 | name = [] 10 | #用来读取列识别列表中xls或xlsx文件,将其名字添加到list中返回 11 | def collect_xls(list_collect): 12 | for each_element in list_collect: 13 | if isinstance(each_element,list): 14 | collect_xls(each_element) 15 | elif each_element.endswith("xls"): 16 | typedata.insert(0,each_element) 17 | elif each_element.endswith("xlsx"): 18 | typedata.insert(0,each_element) 19 | return typedata 20 | #读取文件夹中包含的所有xls和xlsx格式表格文件 21 | def read_xls(path): 22 | for file in os.walk(path): 23 | # os.walk() 返回三个参数:路径,子文件夹,路径下的文件 24 | for each_list in file[2]: 25 | file_path = file[0]+"/"+each_list 26 | name.insert(0,file_path) 27 | all_xls = collect_xls(name) 28 | 29 | return all_xls 30 | 31 | src = read_xls(xpath) 32 | print(src) 33 | total = [['部门名称', '招考职位', '职位简介', '招考人数', '专业', '学历','来源']] 34 | 35 | for xls_item in src: 36 | wb = xlrd.open_workbook(xls_item) 37 | sheets = wb.sheet_names() 38 | 39 | for index in range(len(sheets)): 40 | table = wb.sheets()[index] 41 | nrows = table.nrows 42 | if nrows == 0: 43 | continue 44 | 45 | if xls_item =="C:\\Users\\TED\\Desktop\\excel\\excel文件夹/2019.xls": 46 | label = table.row_values(1) 47 | item1 = label.index("部门名称") 48 | item2 = label.index("招考职位") 49 | item3 = label.index("职位简介") 50 | item4 = label.index("招考人数") 51 | item5 = label.index("专业") 52 | item6 = label.index("学历") 53 | 54 | for i in range(2, nrows): 55 | item = [table.row_values(i)[item1], table.row_values(i)[item2], table.row_values(i)[item3], 56 | table.row_values(i)[item4], table.row_values(i)[item5], table.row_values(i)[item6]] 57 | item.append(xls_item) 58 | total.append(item) 59 | else: 60 | label = table.row_values(0) 61 | 62 | item1 = label.index("招录机关") 63 | item2 = label.index("招考职位") 64 | item3 = label.index("职位简介") 65 | item4 = label.index("招考人数") 66 | item5 = label.index("专业") 67 | item6 = label.index("学历") 68 | 69 | for i in range(1,nrows): 70 | item=[table.row_values(i)[item1],table.row_values(i)[item2],table.row_values(i)[item3],table.row_values(i)[item4],table.row_values(i)[item5],table.row_values(i)[item6]] 71 | item.append(xls_item) 72 | total.append(item) 73 | 74 | workbook = xlsxwriter.Workbook("result.xlsx") 75 | worksheet = workbook.add_worksheet() 76 | 77 | group=['A','B','C','D','E','F','G'] 78 | for i in range(len(total)): 79 | for j in range(len(total[i])): 80 | worksheet.write(f"{group[j]}{i+1}",total[i][j]) 81 | workbook.close() 82 | 83 | 84 | 85 | -------------------------------------------------------------------------------- /excel文件夹/2018.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pengfexue2/excelRelated/ecdd800c73ba362246623115e8edc927f5a4fef0/excel文件夹/2018.xlsx -------------------------------------------------------------------------------- /excel文件夹/2019.xls: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/pengfexue2/excelRelated/ecdd800c73ba362246623115e8edc927f5a4fef0/excel文件夹/2019.xls --------------------------------------------------------------------------------