├── README.md └── mail2csv.py /README.md: -------------------------------------------------------------------------------- 1 | # mail2csv 2 | 3 | This command-line utility converts the contents of a `Maildir` to a CSV file. Each header becomes a CSV column, and each email becomes a row. 4 | 5 | ``` 6 | $ mail2csv example/ 7 | Date,Subject,From 8 | "Wed, 16 May 2018 20:05:16 +0000",An email,Bob 9 | "Wed, 16 May 2018 20:07:52 +0000",Also an email,Alice 10 | ``` 11 | 12 | By default, only `Date`, `Subject` and `From` headers are shown. Use `--headers` to specify which other headers to include, and `--all-headers` to include them all. 13 | 14 | ## Requirements 15 | 16 | - Python 2 or 3. 17 | 18 | ## Installation 19 | 20 | ``` 21 | cp mail2csv.py /usr/local/bin/mail2csv 22 | ``` 23 | 24 | ## Full usage 25 | 26 | ``` 27 | usage: mail2csv.py [-h] [--outfile OUTFILE] [--headers HEADERS [HEADERS ...]] 28 | [--all-headers] 29 | maildir 30 | 31 | Convert maildir to CSV. 32 | 33 | positional arguments: 34 | maildir Directory to read from 35 | 36 | optional arguments: 37 | -h, --help show this help message and exit 38 | --outfile OUTFILE File to output to. Standard output is used if this is 39 | not specified 40 | --headers HEADERS [HEADERS ...] 41 | Headers to include 42 | --all-headers Include all headers. Alias for --headers '*' 43 | ``` 44 | -------------------------------------------------------------------------------- /mail2csv.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | from __future__ import print_function 3 | 4 | import argparse 5 | import mailbox 6 | import csv 7 | import email 8 | import re 9 | import fnmatch 10 | import sys 11 | 12 | class Maildir2Csv: 13 | def print_err(*args, **kwargs): 14 | print(*args, file=sys.stderr, **kwargs) 15 | 16 | @staticmethod 17 | def run(maildir_path, outp_file, header_globs): 18 | # Validate 19 | maildir = mailbox.Maildir(maildir_path, create=False) 20 | keys = maildir.keys() 21 | if len(keys) == 0: 22 | # No emails = no output 23 | return 24 | # Convert each message to a dictionary 25 | messages = [] 26 | all_headers_set = set([]) 27 | for key in keys: 28 | email_txt = str(maildir[key]) 29 | msg = email.message_from_string(email_txt) 30 | msg_dict = {} 31 | for header_name, header_value in msg.items(): 32 | numbered_header_name = header_name 33 | header_number = 1 34 | while numbered_header_name in msg_dict: 35 | header_number = header_number + 1 36 | numbered_header_name = "{}-{}".format(header_name, header_number) 37 | msg_dict[numbered_header_name] = header_value 38 | all_headers_set.add(numbered_header_name) 39 | messages.append(msg_dict) 40 | all_headers = sorted(all_headers_set) 41 | # Determine which headers to use: Treat requested header names as globs against all now headers 42 | use_headers_set = set([]) 43 | use_headers = [] 44 | for header_glob in header_globs: 45 | header_pattern = re.compile(fnmatch.translate(header_glob)) 46 | matches = 0 47 | for header_name in all_headers: 48 | if header_pattern.match(header_name) and not header_name in use_headers_set: 49 | # Track list (in order of matches) with no duplicates 50 | use_headers_set.add(header_name) 51 | use_headers.append(header_name) 52 | matches = matches + 1 53 | if matches == 0 and '*' not in header_glob: 54 | # Usually triggers if a typo has occurred 55 | Maildir2Csv.print_err('WARNING: Header \'{}\' does not appear in any email messages, and will not be included in the output'.format(header_glob)) 56 | # Write out to CSV 57 | dw = csv.DictWriter(outp_file, fieldnames=use_headers, extrasaction='ignore') 58 | dw.writeheader() 59 | dw.writerows(messages) 60 | 61 | 62 | if __name__ == "__main__": 63 | parser = argparse.ArgumentParser(description='Convert maildir to CSV.') 64 | parser.add_argument('--outfile', 65 | type=argparse.FileType('w'), 66 | default=sys.stdout, 67 | help="File to output to. Standard output is used if this is not specified") 68 | parser.add_argument('maildir', 69 | help="Directory to read from", 70 | default="mail/") 71 | parser.add_argument('--headers', 72 | help="Headers to include", 73 | default=['Date', 'Subject', 'From'], 74 | nargs='+') 75 | parser.add_argument('--all-headers', 76 | help="Include all headers. Alias for --headers '*'", 77 | action='store_true') 78 | args = parser.parse_args() 79 | if args.all_headers: 80 | header_globs = ['*'] 81 | else: 82 | header_globs = args.headers 83 | Maildir2Csv.run(args.maildir, args.outfile, header_globs) 84 | --------------------------------------------------------------------------------