├── .gitignore ├── README.md ├── common.py ├── mbdb.py ├── iphone_history.py ├── sms.py └── whatsapp.py /.gitignore: -------------------------------------------------------------------------------- 1 | *.pyc 2 | output_* 3 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | iphone_history 2 | ============== 3 | 4 | Get the full chat history from Whatsapp and SMS/iMessage. 5 | 6 | Usage 7 | ----- 8 | 9 | * back up your iPhone using iTunes 10 | * run `python iphone_history.py` 11 | 12 | This script will look for the backup folder created by iTunes, extract the WhatsApp and SMS databases, 13 | and create folders containing the conversation history with each of your contacts. 14 | -------------------------------------------------------------------------------- /common.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | from time import strftime 3 | import os 4 | import sys 5 | import shutil 6 | from datetime import datetime 7 | 8 | COLORS = ["#f8ff78", "#85d7ff", "cornsilk", "lightpink", "lightgreen", "yellowgreen", "lightgrey", "khaki", "mistyrose"] 9 | 10 | TEMPLATEBEGINNING = u""" 11 | 12 | 13 | %s Conversation 14 | 15 | 31 | 32 | 33 | 34 | 35 | """ 36 | 37 | TEMPLATEEND = u""" 38 | 39 |
40 | 41 | """ 42 | 43 | ROWTEMPLATE = u"""%s%s%s\n""" 44 | 45 | def get_output_dirs(name): 46 | OUTPUT_DIR = "output_%s_%s" % (strftime("%Y_%m_%d"), name) 47 | if os.path.exists(OUTPUT_DIR): 48 | shutil.rmtree(OUTPUT_DIR) 49 | MEDIA_DIR = os.path.join(OUTPUT_DIR, "media") 50 | os.makedirs(MEDIA_DIR) 51 | return OUTPUT_DIR, MEDIA_DIR 52 | 53 | cached_colors = {} 54 | next_color = 0 55 | def get_color(contact): 56 | global next_color 57 | if contact in cached_colors: 58 | return cached_colors[contact] 59 | cached_colors[contact] = COLORS[1:][next_color % (len(COLORS) - 1)] 60 | next_color += 1 61 | return cached_colors[contact] 62 | 63 | def reset_colors(): 64 | global next_color, cached_colors 65 | cached_colors = {} 66 | next_color = 0 67 | 68 | def get_date(mdate): 69 | # convert apple's "reference date" to unix timestamp 70 | # (seconds between Jan 1 1970 and Jan 1 2001) 71 | # http://stackoverflow.com/questions/6998541 72 | mdate = int(mdate) + 978307200 73 | mdatetime = datetime.fromtimestamp(mdate) 74 | mdatetime = mdatetime.strftime("%Y-%m-%d %H:%M:%S") 75 | return mdatetime 76 | 77 | def sanitize_filename(f): 78 | invalid_chars = "?*/\\:\"<>|" 79 | for char in invalid_chars: 80 | f = f.replace(char, "-") 81 | return f 82 | 83 | def find_nonexisting_path(p): 84 | # used to avoid duplicates running over each other 85 | i = 1 86 | basename = os.path.basename(p) 87 | basename, ext = os.path.splitext(basename) 88 | dirname = os.path.dirname(p) 89 | while os.path.exists(p): 90 | i += 1 91 | p = os.path.join(dirname, "%s-%d%s" % (basename, i, ext)) 92 | return p 93 | 94 | def iterate_with_progress(iterator, count, name): 95 | previouspercent = 0 96 | for index, value in enumerate(iterator): 97 | yield value 98 | percent = round((float(index+1) / count*100)) 99 | if percent != previouspercent: 100 | bar = "[%s%s]" % ("#"*int(percent/10),"-"*(10-int(percent/10))) 101 | print("{:10s} {} {}% done".format(name, bar, percent), end="\r") 102 | sys.stdout.flush() 103 | previouspercent = percent 104 | print() 105 | 106 | -------------------------------------------------------------------------------- /mbdb.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | from __future__ import print_function 4 | import sys 5 | import hashlib 6 | 7 | mbdx = {} 8 | 9 | def o(b): 10 | try: 11 | return ord(b) 12 | except: 13 | return b 14 | 15 | def getint(data, offset, intsize): 16 | """Retrieve an integer (big-endian) and new offset from the current offset""" 17 | value = 0 18 | while intsize > 0: 19 | value = (value << 8) + o(data[offset]) 20 | offset = offset + 1 21 | intsize = intsize - 1 22 | return value, offset 23 | 24 | def getstring(data, offset): 25 | """Retrieve a string and new offset from the current offset into the data""" 26 | if o(data[offset]) == 0xFF and o(data[offset+1]) == 0xFF: 27 | return '', offset + 2 # Blank string 28 | length, offset = getint(data, offset, 2) # 2-byte length 29 | value = data[offset:offset+length] 30 | return value, (offset + length) 31 | 32 | def process_mbdb_file_internal(filename): 33 | mbdb = {} # Map offset of info in this file => file info 34 | data = open(filename, "rb").read() 35 | if data[0:4] != b"mbdb": raise Exception("This does not look like an MBDB file") 36 | offset = 4 37 | offset = offset + 2 # value x05 x00, not sure what this is 38 | while offset < len(data): 39 | fileinfo = {} 40 | fileinfo['start_offset'] = offset 41 | fileinfo['domain'], offset = getstring(data, offset) 42 | fileinfo['filename'], offset = getstring(data, offset) 43 | fileinfo['linktarget'], offset = getstring(data, offset) 44 | fileinfo['datahash'], offset = getstring(data, offset) 45 | fileinfo['unknown1'], offset = getstring(data, offset) 46 | fileinfo['mode'], offset = getint(data, offset, 2) 47 | fileinfo['unknown2'], offset = getint(data, offset, 4) 48 | fileinfo['unknown3'], offset = getint(data, offset, 4) 49 | fileinfo['userid'], offset = getint(data, offset, 4) 50 | fileinfo['groupid'], offset = getint(data, offset, 4) 51 | fileinfo['mtime'], offset = getint(data, offset, 4) 52 | fileinfo['atime'], offset = getint(data, offset, 4) 53 | fileinfo['ctime'], offset = getint(data, offset, 4) 54 | fileinfo['filelen'], offset = getint(data, offset, 8) 55 | fileinfo['flag'], offset = getint(data, offset, 1) 56 | fileinfo['numprops'], offset = getint(data, offset, 1) 57 | fileinfo['properties'] = {} 58 | for ii in range(fileinfo['numprops']): 59 | propname, offset = getstring(data, offset) 60 | propval, offset = getstring(data, offset) 61 | fileinfo['properties'][propname] = propval 62 | mbdb[fileinfo['start_offset']] = fileinfo 63 | fullpath = fileinfo['domain'] + b'-' + fileinfo['filename'] 64 | id = hashlib.sha1(fullpath) 65 | mbdx[fileinfo['start_offset']] = id.hexdigest() 66 | return mbdb 67 | 68 | def process_mbdb_file(fname): 69 | mbdb = process_mbdb_file_internal(fname) 70 | for offset, fileinfo in mbdb.items(): 71 | if offset in mbdx: 72 | fileinfo['fileID'] = mbdx[offset] 73 | else: 74 | fileinfo['fileID'] = "" 75 | print("No fileID found for %s" % fileinfo_str(fileinfo), file=sys.stderr) 76 | yield fileinfo 77 | 78 | def modestr(val): 79 | def mode(val): 80 | if (val & 0x4): r = 'r' 81 | else: r = '-' 82 | if (val & 0x2): w = 'w' 83 | else: w = '-' 84 | if (val & 0x1): x = 'x' 85 | else: x = '-' 86 | return r+w+x 87 | return mode(val>>6) + mode((val>>3)) + mode(val) 88 | 89 | def fileinfo_str(f, verbose=False): 90 | if not verbose: return "(%s)%s::%s" % (f['fileID'], f['domain'], f['filename']) 91 | if (f['mode'] & 0xE000) == 0xA000: type = 'l' # symlink 92 | elif (f['mode'] & 0xE000) == 0x8000: type = '-' # file 93 | elif (f['mode'] & 0xE000) == 0x4000: type = 'd' # dir 94 | else: 95 | print("Unknown file type %04x for %s" % (f['mode'], fileinfo_str(f, False)), file=sys.stderr) 96 | type = '?' # unknown 97 | info = ("%s%s %08x %08x %7d %10d %10d %10d (%s)%s::%s" % 98 | (type, modestr(f['mode']&0x0FFF) , f['userid'], f['groupid'], f['filelen'], 99 | f['mtime'], f['atime'], f['ctime'], f['fileID'], f['domain'], f['filename'])) 100 | if type == 'l': info = info + ' -> ' + f['linktarget'] # symlink destination 101 | for name, value in f['properties'].items(): # extra properties 102 | info = info + ' ' + name + '=' + repr(value) 103 | return info 104 | 105 | verbose = True 106 | if __name__ == '__main__': 107 | for fileinfo in process_mbdb_file("Manifest.mbdb"): 108 | print(fileinfo_str(fileinfo, verbose)) -------------------------------------------------------------------------------- /iphone_history.py: -------------------------------------------------------------------------------- 1 | from __future__ import print_function 2 | import os 3 | import shutil 4 | import sys 5 | import re 6 | import datetime 7 | import mbdb 8 | import whatsapp 9 | import sms 10 | 11 | 12 | class BackupExtractor(object): 13 | """ object representing a single backup directory. used to retrieve the device name and 14 | date of the backup, and convert file paths from the device filesytem to the actual filesytem """ 15 | def __init__(self, dir): 16 | self._dir = dir 17 | self._file_index = None 18 | self._date = datetime.datetime.fromtimestamp(0) 19 | self._device_name = "" 20 | self._parse_info_plist() 21 | 22 | def _parse_info_plist(self): 23 | info_file = os.path.join(self._dir, "Info.plist") 24 | if not os.path.exists(info_file): 25 | print("WARNING: no Info.plist file found in backup folder %s" % (self._dir,)) 26 | return 27 | info_data = open(info_file, "r").read() 28 | match_obj = re.search("([^<]*)", info_data) 29 | if match_obj is not None: 30 | time_str = match_obj.group(1) 31 | self._date = datetime.datetime.strptime(time_str, "%Y-%m-%dT%H:%M:%SZ") 32 | else: 33 | print("WARNING: no date found for backup folder %s" % (self._dir,)) 34 | match_obj = re.search("Device Name\s*([^<]*)", info_data) 35 | if match_obj is not None: 36 | self._device_name = match_obj.group(1) 37 | else: 38 | print("WARNING: no device name found in backup folder %s" % (self._dir,)) 39 | 40 | def get_date(self): 41 | return self._date 42 | 43 | def get_device_name(self): 44 | return self._device_name 45 | 46 | def _get_file_index(self): 47 | if self._file_index is not None: 48 | return self._file_index 49 | 50 | mbdb_file = os.path.join(self._dir, "Manifest.mbdb") 51 | 52 | files_in_backup = mbdb.process_mbdb_file(mbdb_file) 53 | 54 | # file index: map domain+filename to physical file in backup directory 55 | self._file_index = dict() 56 | for f in files_in_backup: 57 | domain = f['domain'].decode("utf-8", errors="ignore") 58 | filename = f['filename'].decode("utf-8", errors="ignore") 59 | file_path = os.path.join(self._dir, str(f['fileID'])) 60 | self._file_index[(domain, filename)] = file_path 61 | return self._file_index 62 | 63 | def get_file_path(self, domain, filename): 64 | return self._get_file_index().get((domain, filename), None) 65 | 66 | 67 | def get_latest_backup(): 68 | backups_root = None 69 | if sys.platform == "win32": 70 | backups_root = os.path.expandvars(r"%appdata%\Apple Computer\MobileSync\Backup") 71 | elif sys.platform == "darwin": 72 | backups_root = os.path.expanduser("~/Library/Application Support/MobileSync/Backup") 73 | else: 74 | print("Unsupported system: %s" % sys.platform) 75 | return None 76 | 77 | list_of_backups = os.listdir(backups_root) 78 | if not list_of_backups: 79 | return None 80 | list_of_backups = [os.path.join(backups_root, backup) for backup in list_of_backups] 81 | list_of_backups = [BackupExtractor(backup) for backup in list_of_backups if os.path.isdir(backup)] 82 | 83 | list_of_backups.sort(key=lambda backup: backup.get_date()) 84 | 85 | choose = len(sys.argv) == 2 and sys.argv[-1] == "--choose" 86 | if choose: 87 | print("Choose backup:") 88 | for i, backup in enumerate(list_of_backups, 1): 89 | print("%d. %s %s" % (i, backup.get_device_name(), backup.get_date())) 90 | index = int(input()) - 1 91 | else: 92 | index = -1 # latest backup 93 | 94 | return list_of_backups[index] 95 | 96 | def lib_main(backup_extractor, lib): 97 | files_to_copy = [] 98 | for phone_path, file_path in lib.FILES: 99 | # phone_path is either a tuple (domain, filename) or a list of such 100 | # tuples (in which case, only one of the domain/filename should be copied) 101 | if type(phone_path) == list: 102 | existing_file_path = [backup_extractor.get_file_path(*one_path) for one_path in phone_path] 103 | existing_file_path = [path for path in existing_file_path if path is not None] 104 | existing_file_path = None if len(existing_file_path) == 0 else existing_file_path[0] 105 | else: 106 | existing_file_path = backup_extractor.get_file_path(*phone_path) 107 | if existing_file_path is None: 108 | print("Could not find file in backup: {}/{}".format(*phone_path)) 109 | return 110 | files_to_copy.append((existing_file_path, file_path)) 111 | 112 | for existing_file_path, file_path in files_to_copy: 113 | shutil.copy(existing_file_path, file_path) 114 | 115 | lib.main(backup_extractor) 116 | 117 | for existing_file_path, file_path in files_to_copy: 118 | os.remove(file_path) 119 | 120 | def main(): 121 | backup_extractor = get_latest_backup() 122 | if backup_extractor is None: 123 | print("Could not find backup folder") 124 | sys.exit() 125 | 126 | for lib in [whatsapp, sms]: 127 | lib_main(backup_extractor, lib) 128 | 129 | if __name__ == "__main__": 130 | main() 131 | -------------------------------------------------------------------------------- /sms.py: -------------------------------------------------------------------------------- 1 | import sqlite3 2 | import os 3 | import shutil 4 | from io import open 5 | 6 | from common import COLORS, TEMPLATEBEGINNING, TEMPLATEEND, ROWTEMPLATE 7 | from common import get_color, reset_colors, get_date, iterate_with_progress, get_output_dirs 8 | from common import sanitize_filename, find_nonexisting_path 9 | 10 | OUTPUT_DIR, MEDIA_DIR = get_output_dirs("sms") 11 | 12 | CHAT_STORAGE_FILE = os.path.join(OUTPUT_DIR, "sms.db") 13 | CONTACTS_FILE = os.path.join(OUTPUT_DIR, "AddressBook.sqlitedb") 14 | 15 | FILES = [(("HomeDomain", "Library/SMS/sms.db"), CHAT_STORAGE_FILE), 16 | (("HomeDomain", "Library/AddressBook/AddressBook.sqlitedb"), CONTACTS_FILE)] 17 | 18 | FIELDS = "ROWID, text, date, is_from_me, handle_id, cache_has_attachments" 19 | 20 | OBJ_MARKER = "\ufffc" 21 | 22 | contact_cache = {} 23 | def get_contact_name(conn, contact_conn, contact_id): 24 | if contact_id in contact_cache: 25 | return contact_cache[contact_id] 26 | c = conn.cursor() 27 | c.execute("SELECT id FROM handle WHERE ROWID=?;", (contact_id,)) 28 | try: 29 | handle_id = next(c)[0] # this is either a phone number or an iMessage address 30 | except StopIteration: 31 | handle_id = "UNKNOWN" 32 | if handle_id.startswith("+"): 33 | c = contact_conn.cursor() 34 | p = handle_id.replace("+972", "0") 35 | phone_options = (handle_id, p, 36 | "{}-{}-{}".format(p[-10:-7], p[-7:-4], p[-4:]), 37 | "({}) {} {}".format(p[-10:-7], p[-7:-4], p[-4:])) 38 | c.execute("SELECT record_id FROM ABMultiValue WHERE value=? or value=? or value=? or value=?", phone_options) 39 | for i in c: 40 | c2 = contact_conn.cursor() 41 | c2.execute("SELECT first, last FROM ABPerson WHERE ROWID=?", i) 42 | handle_id = " ".join((s for s in next(c2) if s)) 43 | contact_cache[contact_id] = handle_id 44 | return handle_id 45 | 46 | def copy_media_file(backup_extractor, path_in_backup): 47 | if path_in_backup.startswith("/var/mobile/"): 48 | path_in_backup = path_in_backup[12:] 49 | elif path_in_backup.startswith("~/"): 50 | path_in_backup = path_in_backup[2:] 51 | filepath = backup_extractor.get_file_path("MediaDomain", path_in_backup) 52 | if filepath is None: 53 | return "" 54 | new_media_path = os.path.join(MEDIA_DIR, os.path.basename(path_in_backup)) 55 | shutil.copy(filepath, new_media_path) 56 | return new_media_path 57 | 58 | def handle_media(conn, backup_extractor, message_id, mtext): 59 | c = conn.cursor() 60 | c.execute("SELECT filename, mime_type FROM attachment WHERE ROWID in "\ 61 | "(SELECT attachment_id FROM message_attachment_join WHERE message_id=?);", (message_id,)) 62 | for row in c: 63 | new_media_path = copy_media_file(backup_extractor, row[0]) 64 | tag_format = '<{0} src="media/{1}" style="width:200px;"{2}>' 65 | media_type = row[1].split("/")[0] 66 | tag = {"video": "video", "image": "img"}.get(media_type, None) 67 | if tag is None: 68 | media_element = "[unknown attachment type: {}]".format(media_type) 69 | else: 70 | controls = " controls" if tag in ["audio", "video"] else "" 71 | media_element = tag_format.format(tag, os.path.basename(new_media_path), controls) 72 | if OBJ_MARKER in mtext: 73 | mtext = mtext.replace(OBJ_MARKER, media_element, 1) 74 | else: 75 | mtext = mtext + media_element 76 | return mtext 77 | 78 | def get_filename(conn, contact_conn, chat_id): 79 | c = conn.cursor() 80 | c.execute("SELECT handle_id FROM chat_handle_join WHERE chat_id=?;", (chat_id,)) 81 | names_in_chat = [] 82 | for row in c: 83 | names_in_chat.append(get_contact_name(conn, contact_conn, row[0])) 84 | filename = sanitize_filename(" & ".join(names_in_chat)) 85 | file_path = os.path.join(OUTPUT_DIR, '%s.html' % filename) 86 | file_path = find_nonexisting_path(file_path) 87 | return file_path 88 | 89 | def output_contact(conn, contact_conn, backup_extractor, chat_id, your_name): 90 | reset_colors() 91 | contact_name = str(chat_id) 92 | html = open(get_filename(conn, contact_conn, chat_id), 'w', encoding="utf-8") 93 | html.write(TEMPLATEBEGINNING % ("SMS/iMessage",)) 94 | c = conn.cursor() 95 | c.execute("SELECT {} FROM message WHERE ROWID in ".format(FIELDS) + \ 96 | "(SELECT message_id FROM chat_message_join WHERE chat_id=?);", (chat_id,)) 97 | for row in c: 98 | mid, mtext, mdate, is_from_me, handle_id, has_attachment = row 99 | if mtext is None: 100 | mtext = "" 101 | if has_attachment: 102 | mtext = handle_media(conn, backup_extractor, mid, mtext) 103 | mtext = mtext.replace("\n", "
\n") 104 | mdatetime = get_date(mdate) 105 | mfrom = your_name if is_from_me else get_contact_name(conn, contact_conn, handle_id) 106 | color = COLORS[0] if is_from_me else get_color(handle_id) 107 | html.write((ROWTEMPLATE % (color, mdatetime, mfrom, mtext))) 108 | html.write(TEMPLATEEND) 109 | html.close() 110 | 111 | def main(backup_extractor): 112 | contact_conn = sqlite3.connect(CONTACTS_FILE) 113 | conn = sqlite3.connect(CHAT_STORAGE_FILE) 114 | c = conn.cursor() 115 | c.execute("SELECT COUNT(*) FROM chat") 116 | total_contacts = next(c)[0] 117 | c = conn.cursor() 118 | c.execute("SELECT ROWID FROM chat") 119 | for chat_id in iterate_with_progress(c, total_contacts, "SMS"): 120 | output_contact(conn, contact_conn, backup_extractor, chat_id[0], "me") 121 | -------------------------------------------------------------------------------- /whatsapp.py: -------------------------------------------------------------------------------- 1 | import sqlite3 2 | import os 3 | import shutil 4 | import codecs 5 | from io import open 6 | import sys 7 | 8 | from common import COLORS, TEMPLATEBEGINNING, TEMPLATEEND, ROWTEMPLATE 9 | from common import get_color, reset_colors, get_date, iterate_with_progress, get_output_dirs 10 | from common import sanitize_filename, find_nonexisting_path 11 | 12 | if sys.version[0] == "3": 13 | unicode = str 14 | 15 | OUTPUT_DIR, MEDIA_DIR = get_output_dirs("whatsapp") 16 | 17 | CHAT_STORAGE_FILE = os.path.join(OUTPUT_DIR, "ChatStorage.sqlite") 18 | FILES = [([("AppDomainGroup-group.net.whatsapp.WhatsApp.shared", "ChatStorage.sqlite"), 19 | ("AppDomain-net.whatsapp.WhatsApp", "Documents/ChatStorage.sqlite")], CHAT_STORAGE_FILE)] 20 | 21 | FIELDS = "ZFROMJID, ZTEXT, ZMESSAGEDATE, ZMESSAGETYPE, ZGROUPEVENTTYPE, ZGROUPMEMBER, ZMEDIAITEM" 22 | 23 | cached_members = {} 24 | def get_group_member_name(conn, id): 25 | if id in cached_members: 26 | return cached_members[id] 27 | c = conn.cursor() 28 | c.execute("SELECT ZCONTACTNAME FROM ZWAGROUPMEMBER WHERE Z_PK=?", (id,)) 29 | cached_members[id] = next(c)[0] 30 | return cached_members[id] 31 | 32 | def get_media_data(conn, mediaid, cols): 33 | c = conn.cursor() 34 | c.execute("SELECT {} FROM ZWAMEDIAITEM WHERE Z_PK=?".format(cols), (mediaid,)) 35 | return next(c) 36 | 37 | def copy_media_file(backup_extractor, path_in_backup): 38 | path_in_backup = "Library" + ("" if path_in_backup.startswith("/") else "/") + path_in_backup 39 | filepath = backup_extractor.get_file_path("AppDomain-net.whatsapp.WhatsApp", path_in_backup) 40 | new_media_path = os.path.join(MEDIA_DIR, os.path.basename(path_in_backup)) 41 | shutil.copy(filepath, new_media_path) 42 | return new_media_path 43 | 44 | def handle_media(conn, backup_extractor, mtype, mmediaitem): 45 | mediadata = ["ZMEDIALOCALPATH", "ZMEDIALOCALPATH", "ZMEDIALOCALPATH", "ZVCARDNAME", 46 | "ZLATITUDE, ZLONGITUDE"][mtype-1] 47 | data = get_media_data(conn, mmediaitem, mediadata) 48 | mtypestr = {1: "image", 2: "video", 3: "audio", 4: "contact", 5: "location"}[mtype] 49 | if data[0] is None: 50 | return "[missing {}]".format(mtypestr) 51 | data = u", ".join([unicode(x) for x in data]) 52 | if mtype in [1, 2, 3]: 53 | new_media_path = copy_media_file(backup_extractor, data) 54 | tag_format = '<{0} src="media/{1}" style="width:200px;"{2}>' 55 | tag = ["img", "video", "audio"][mtype-1] 56 | controls = " controls" if tag in ["audio", "video"] else "" 57 | return tag_format.format(tag, os.path.basename(new_media_path), controls) 58 | if mtype == 4 and data.startswith("="): 59 | # if the vCard has no contact image the format of the row in the db is a little different, 60 | # and name is encoded using quopri encoding 61 | try: 62 | data = codecs.decode(data.decode("ascii"), "quopri").encode("utf-8") 63 | except: 64 | pass 65 | return u"[{} - {}]".format(mtypestr, data) 66 | 67 | def get_text(conn, backup_extractor, row): 68 | mfrom, mtext, mdate, mtype, mgroupeventtype, mgroupmember, mmediaitem = row 69 | if mtype == 0: 70 | return mtext 71 | if mtype == 6: 72 | mgroupmember = "you" if mgroupmember is None else get_group_member_name(conn, mgroupmember) 73 | if mgroupeventtype not in [1, 2, 3, 4]: 74 | return u"[group event {} by {}]".format(mgroupeventtype, mgroupmember) 75 | change_text = {1: u"changed the group subject to {}".format(mtext), 76 | 2: u"joined", 3: u"left", 4: u"changed the group photo"} 77 | return u"[{} {}]".format(mgroupmember, change_text[mgroupeventtype]) 78 | if mtype in [1, 2, 3, 4, 5]: 79 | return handle_media(conn, backup_extractor, mtype, mmediaitem) 80 | return u"[message type %d]" % mtype 81 | 82 | def get_from(conn, is_group, contact_id, contact_name, your_name, row): 83 | mfrom, mtext, mdate, mtype, mgroupeventtype, mgroupmember, mmediaitem = row 84 | if mfrom != contact_id: 85 | if is_group: 86 | return contact_name + " - " + your_name, COLORS[0] 87 | else: 88 | return your_name, COLORS[0] 89 | mfrom = contact_name 90 | if is_group: 91 | if mgroupmember is not None and mtype != 6: 92 | mfrom += " - " + get_group_member_name(conn, mgroupmember) 93 | color = get_color(mfrom) 94 | return mfrom, color 95 | 96 | def output_contact(conn, backup_extractor, is_group, contact_id, contact_name, your_name): 97 | reset_colors() 98 | file_path = os.path.join(OUTPUT_DIR, '%s.html' % sanitize_filename(contact_name)) 99 | file_path = find_nonexisting_path(file_path) 100 | html = open(file_path, 'w', encoding="utf-8") 101 | html.write(TEMPLATEBEGINNING % ("WhatsApp",)) 102 | c = conn.cursor() 103 | c.execute("SELECT {} FROM ZWAMESSAGE WHERE ZFROMJID=? OR ZTOJID=?;".format(FIELDS), (contact_id, contact_id)) 104 | for row in c: 105 | mdatetime = get_date(row[2]) 106 | mtext = get_text(conn, backup_extractor, row) 107 | mtext = mtext.replace("\n", "
\n") 108 | mfrom, color = get_from(conn, is_group, contact_id, contact_name, your_name, row) 109 | html.write((ROWTEMPLATE % (color, mdatetime, mfrom, mtext))) 110 | html.write(TEMPLATEEND) 111 | html.close() 112 | 113 | def main(backup_extractor): 114 | conn = sqlite3.connect(CHAT_STORAGE_FILE) 115 | c = conn.cursor() 116 | c.execute("SELECT COUNT(*) FROM ZWACHATSESSION") 117 | total_contacts = next(c)[0] 118 | c = conn.cursor() 119 | c.execute("SELECT ZCONTACTJID, ZPARTNERNAME, ZSESSIONTYPE FROM ZWACHATSESSION") 120 | for contact_id, contact_name, is_group in iterate_with_progress(c, total_contacts, "WhatsApp"): 121 | output_contact(conn, backup_extractor, is_group, contact_id, contact_name, "me") 122 | --------------------------------------------------------------------------------