└── Split Files Into Chunks /Split Files Into Chunks: -------------------------------------------------------------------------------- 1 | import sys 2 | import os 3 | import shutil 4 | import pandas as pd 5 | 6 | class Split_Files: 7 | ''' 8 | Class file for split file program 9 | ''' 10 | def __init__(self, filename, split_number): 11 | ''' 12 | Getting the file name and the split index 13 | Initializing the output directory, if present then truncate it. 14 | Getting the file extension 15 | ''' 16 | self.file_name = filename 17 | self.directory = "file_split" 18 | self.split = int(split_number) 19 | if os.path.exists(self.directory): 20 | shutil.rmtree(self.directory) 21 | os.mkdir(self.directory) 22 | if self.file_name.endswith('.txt'): 23 | self.file_extension = '.txt' 24 | else: 25 | self.file_extension = '.csv' 26 | self.file_number = 1 27 | 28 | def split_data(self): 29 | ''' 30 | spliting the input csv/txt file according to the index provided 31 | ''' 32 | data = pd.read_csv(self.file_name, header=None) 33 | data.index += 1 34 | 35 | split_frame = pd.DataFrame() 36 | output_file = f"{self.directory}/split_file{self.file_number}{self.file_extension}" 37 | 38 | for i in range(1, len(data)+1): 39 | split_frame = split_frame.append(data.iloc[i-1]) 40 | if i % self.split == 0: 41 | output_file = f"{self.directory}/split_file{self.file_number}{self.file_extension}" 42 | if self.file_extension == '.txt': 43 | split_frame.to_csv(output_file, header=False, index=False, sep=' ') 44 | else: 45 | split_frame.to_csv(output_file, header=False, index=False) 46 | split_frame.drop(split_frame.index, inplace=True) 47 | self.file_number += 1 48 | if not split_frame.empty: 49 | output_file = f"{self.directory}/split_file{self.file_number}{self.file_extension}" 50 | split_frame.to_csv(output_file, header=False, index=False) 51 | 52 | if __name__ == '__main__': 53 | file, split_number = sys.argv[1], sys.argv[2] 54 | sp = Split_Files(file, split_number) 55 | sp.split_data() 56 | --------------------------------------------------------------------------------