├── README.md ├── LICENSE └── gmail_extractor.py /README.md: -------------------------------------------------------------------------------- 1 | # Disclaimer 2 | This is just a tool I've built in an afternoon and I am no Python expert. Things could probably be done in a much more nicer Pythonic way. 3 | 4 | # gmail_extractor 5 | A Python powered Gmail mailbox extractor tool. Use this to extract all emails and attachments of a Gmail mailbox. 6 | 7 | # How it works 8 | This is a simple, quickly developed tool to allow extraction of emails from a Gmail mailbox onto your local computer. This tool will extract some default information such as "subject", "date" and "from" into a convenient JSON format as well as all the attachments available in the email. Afterward, it will store all this into a folder with the UID of the email as its name. 9 | 10 | # How to run 11 | Just do python3 gmail_extractor.py. Make sure you are using a version of Python > 3.7.5 even if this could work with Python 3.3 or greater. 12 | 13 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2020 andreiaugustin 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /gmail_extractor.py: -------------------------------------------------------------------------------- 1 | import imaplib 2 | import os 3 | import email 4 | import sys 5 | import json 6 | 7 | class GMAIL_EXTRACTOR(): 8 | def helloWorld(self): 9 | print("\nWelcome to Gmail extractor,\ndeveloped by A. Augustin.") 10 | 11 | def initializeVariables(self): 12 | self.usr = "" 13 | self.pwd = "" 14 | self.mail = object 15 | self.mailbox = "" 16 | self.mailCount = 0 17 | self.destFolder = "" 18 | self.data = [] 19 | self.ids = [] 20 | self.idsList = [] 21 | 22 | def getLogin(self): 23 | print("\nPlease enter your Gmail login details below.") 24 | self.usr = input("Email: ") 25 | self.pwd = input("Password: ") 26 | 27 | def attemptLogin(self): 28 | self.mail = imaplib.IMAP4_SSL("imap.gmail.com", 993) 29 | if self.mail.login(self.usr, self.pwd): 30 | print("\nLogon SUCCESSFUL") 31 | self.destFolder = input("\nPlease choose a destination folder in the form of /Users/username/dest/ (do not forget trailing slash!): ") 32 | if not self.destFolder.endswith("/"): self.destFolder+="/" 33 | return True 34 | else: 35 | print("\nLogon FAILED") 36 | return False 37 | 38 | def checkIfUsersWantsToContinue(self): 39 | print("\nWe have found "+str(self.mailCount)+" emails in the mailbox "+self.mailbox+".") 40 | return True if input("Do you wish to continue extracting all the emails into "+self.destFolder+"? (y/N) ").lower().strip()[:1] == "y" else False 41 | 42 | def selectMailbox(self): 43 | self.mailbox = input("\nPlease type the name of the mailbox you want to extract, e.g. Inbox: ") 44 | bin_count = self.mail.select(self.mailbox)[1] 45 | self.mailCount = int(bin_count[0].decode("utf-8")) 46 | return True if self.mailCount > 0 else False 47 | 48 | def searchThroughMailbox(self): 49 | type, self.data = self.mail.search(None, "ALL") 50 | self.ids = self.data[0] 51 | self.idsList = self.ids.split() 52 | 53 | def parseEmails(self): 54 | jsonOutput = {} 55 | for anEmail in self.data[0].split(): 56 | type, self.data = self.mail.fetch(anEmail, '(UID RFC822)') 57 | raw = self.data[0][1] 58 | try: 59 | raw_str = raw.decode("utf-8") 60 | except UnicodeDecodeError: 61 | try: 62 | raw_str = raw.decode("ISO-8859-1") # ANSI support 63 | except UnicodeDecodeError: 64 | try: 65 | raw_str = raw.decode("ascii") # ASCII ? 66 | except UnicodeDecodeError: 67 | pass 68 | 69 | msg = email.message_from_string(raw_str) 70 | 71 | jsonOutput['subject'] = msg['subject'] 72 | jsonOutput['from'] = msg['from'] 73 | jsonOutput['date'] = msg['date'] 74 | 75 | raw = self.data[0][0] 76 | raw_str = raw.decode("utf-8") 77 | uid = raw_str.split()[2] 78 | # Body # 79 | if msg.is_multipart(): 80 | for part in msg.walk(): 81 | partType = part.get_content_type() 82 | ## Get Body ## 83 | if partType == "text/plain" and "attachment" not in part: 84 | jsonOutput['body'] = part.get_payload() 85 | ## Get Attachments ## 86 | if part.get('Content-Disposition') is None: 87 | attchName = part.get_filename() 88 | if bool(attchName): 89 | attchFilePath = str(self.destFolder)+str(uid)+str("/")+str(attchName) 90 | os.makedirs(os.path.dirname(attchFilePath), exist_ok=True) 91 | with open(attchFilePath, "wb") as f: 92 | f.write(part.get_payload(decode=True)) 93 | else: 94 | jsonOutput['body'] = msg.get_payload(decode=True).decode("utf-8") # Non-multipart email, perhaps no attachments or just text. 95 | 96 | outputDump = json.dumps(jsonOutput) 97 | emailInfoFilePath = str(self.destFolder)+str(uid)+str("/")+str(uid)+str(".json") 98 | os.makedirs(os.path.dirname(emailInfoFilePath), exist_ok=True) 99 | with open(emailInfoFilePath, "w") as f: 100 | f.write(outputDump) 101 | 102 | def __init__(self): 103 | self.initializeVariables() 104 | self.helloWorld() 105 | self.getLogin() 106 | if self.attemptLogin(): 107 | not self.selectMailbox() and sys.exit() 108 | else: 109 | sys.exit() 110 | not self.checkIfUsersWantsToContinue() and sys.exit() 111 | self.searchThroughMailbox() 112 | self.parseEmails() 113 | 114 | if __name__ == "__main__": 115 | run = GMAIL_EXTRACTOR() 116 | --------------------------------------------------------------------------------