├── header_remake_Multisheet.py ├── mytest.xlsx └── show.mp4 /header_remake_Multisheet.py: -------------------------------------------------------------------------------- 1 | import pandas as pd 2 | import numpy as np 3 | import glob 4 | import os 5 | 6 | 7 | path = os.getcwd() 8 | excel_files = glob.glob(os.path.join(path, "*.xlsx")) 9 | print(excel_files) 10 | 11 | for file in excel_files: 12 | df_all = pd.read_excel(file, header=None, sheet_name= None) 13 | print(type(df_all)) 14 | for key in list(df_all.keys()): 15 | if key!='hiddenSheet': 16 | print(key) 17 | print(df_all[key].head()) 18 | df=df_all[key] 19 | file_name=key+file.split("\\")[-1] 20 | # file_name='out'+file.split("\\")[-1] 21 | # print(file_name) 22 | label_row_index = np.nan 23 | for row_index, row in df.iterrows(): 24 | for cell in row: 25 | if isinstance(cell, str) and not df.isna().loc[row_index].any(): 26 | label_row_index = row_index 27 | break 28 | if not np.isnan(label_row_index): 29 | break 30 | 31 | # Set the column labels using the identified row 32 | df.columns = df.iloc[label_row_index] 33 | # Drop the rows before the label row and reset the index 34 | df = df.iloc[label_row_index + 1:].reset_index(drop=True) 35 | df.to_excel(file_name, index=False) 36 | 37 | -------------------------------------------------------------------------------- /mytest.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apollo000104/Excelfiles_preprocessing/05412e7d2baf75756d3c3a1a55f844f37d66b726/mytest.xlsx -------------------------------------------------------------------------------- /show.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/apollo000104/Excelfiles_preprocessing/05412e7d2baf75756d3c3a1a55f844f37d66b726/show.mp4 --------------------------------------------------------------------------------