├── mbox_parser.py ├── stripmail.py ├── add_header.py └── split_date.py /mbox_parser.py: -------------------------------------------------------------------------------- 1 | import mailbox 2 | import csv 3 | 4 | writer = csv.writer(open("clean_mail.csv", "wb")) 5 | for message in mailbox.mbox('your_mbox_name'): 6 | writer.writerow([message['subject'], message['from'], message['date']]) 7 | -------------------------------------------------------------------------------- /stripmail.py: -------------------------------------------------------------------------------- 1 | import csv 2 | 3 | writer = csv.writer(open('stripmail.csv', "wb"), quotechar='"', escapechar=' ', quoting=csv.QUOTE_NONE) 4 | reader = csv.reader(open('cleanermail.csv', "rb"), skipinitialspace=True) 5 | writer.writerows(reader) -------------------------------------------------------------------------------- /add_header.py: -------------------------------------------------------------------------------- 1 | import csv 2 | file_read = csv.DictReader(open('dated_mail.csv', 'rb'), ['subject', 'from', 'date']) 3 | file_write = csv.DictWriter(open('final_mail.csv', 'wb'), ['subject', 'from', 'date']) 4 | file_write.writeheader() 5 | file_write.writerows(file_read) 6 | -------------------------------------------------------------------------------- /split_date.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | import sys 3 | import csv 4 | 5 | infile = open(sys.argv[1]) 6 | outfile = open(sys.argv[2], 'w') 7 | writer = csv.writer(outfile) 8 | 9 | for line in infile: 10 | splits = line.split(",") 11 | newline = [] 12 | if len(splits) == 4: 13 | for i in range(0,3): 14 | newline.append(splits[i]) 15 | else: 16 | newline.append("None") 17 | print newline 18 | writer.writerow(newline) 19 | 20 | 21 | infile.close() 22 | outfile.close() 23 | 24 | --------------------------------------------------------------------------------