├── .gitignore
├── README.md
├── common.py
├── mbdb.py
├── iphone_history.py
├── sms.py
└── whatsapp.py
/.gitignore:
--------------------------------------------------------------------------------
1 | *.pyc
2 | output_*
3 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | iphone_history
2 | ==============
3 |
4 | Get the full chat history from Whatsapp and SMS/iMessage.
5 |
6 | Usage
7 | -----
8 |
9 | * back up your iPhone using iTunes
10 | * run `python iphone_history.py`
11 |
12 | This script will look for the backup folder created by iTunes, extract the WhatsApp and SMS databases,
13 | and create folders containing the conversation history with each of your contacts.
14 |
--------------------------------------------------------------------------------
/common.py:
--------------------------------------------------------------------------------
1 | from __future__ import print_function
2 | from time import strftime
3 | import os
4 | import sys
5 | import shutil
6 | from datetime import datetime
7 |
8 | COLORS = ["#f8ff78", "#85d7ff", "cornsilk", "lightpink", "lightgreen", "yellowgreen", "lightgrey", "khaki", "mistyrose"]
9 |
10 | TEMPLATEBEGINNING = u"""
11 |
12 |
13 | %s Conversation
14 |
15 |
31 |
32 |
33 |
34 |
35 | """
36 |
37 | TEMPLATEEND = u"""
38 |
39 |
40 |
41 | """
42 |
43 | ROWTEMPLATE = u"""| %s | %s | %s |
\n"""
44 |
45 | def get_output_dirs(name):
46 | OUTPUT_DIR = "output_%s_%s" % (strftime("%Y_%m_%d"), name)
47 | if os.path.exists(OUTPUT_DIR):
48 | shutil.rmtree(OUTPUT_DIR)
49 | MEDIA_DIR = os.path.join(OUTPUT_DIR, "media")
50 | os.makedirs(MEDIA_DIR)
51 | return OUTPUT_DIR, MEDIA_DIR
52 |
53 | cached_colors = {}
54 | next_color = 0
55 | def get_color(contact):
56 | global next_color
57 | if contact in cached_colors:
58 | return cached_colors[contact]
59 | cached_colors[contact] = COLORS[1:][next_color % (len(COLORS) - 1)]
60 | next_color += 1
61 | return cached_colors[contact]
62 |
63 | def reset_colors():
64 | global next_color, cached_colors
65 | cached_colors = {}
66 | next_color = 0
67 |
68 | def get_date(mdate):
69 | # convert apple's "reference date" to unix timestamp
70 | # (seconds between Jan 1 1970 and Jan 1 2001)
71 | # http://stackoverflow.com/questions/6998541
72 | mdate = int(mdate) + 978307200
73 | mdatetime = datetime.fromtimestamp(mdate)
74 | mdatetime = mdatetime.strftime("%Y-%m-%d %H:%M:%S")
75 | return mdatetime
76 |
77 | def sanitize_filename(f):
78 | invalid_chars = "?*/\\:\"<>|"
79 | for char in invalid_chars:
80 | f = f.replace(char, "-")
81 | return f
82 |
83 | def find_nonexisting_path(p):
84 | # used to avoid duplicates running over each other
85 | i = 1
86 | basename = os.path.basename(p)
87 | basename, ext = os.path.splitext(basename)
88 | dirname = os.path.dirname(p)
89 | while os.path.exists(p):
90 | i += 1
91 | p = os.path.join(dirname, "%s-%d%s" % (basename, i, ext))
92 | return p
93 |
94 | def iterate_with_progress(iterator, count, name):
95 | previouspercent = 0
96 | for index, value in enumerate(iterator):
97 | yield value
98 | percent = round((float(index+1) / count*100))
99 | if percent != previouspercent:
100 | bar = "[%s%s]" % ("#"*int(percent/10),"-"*(10-int(percent/10)))
101 | print("{:10s} {} {}% done".format(name, bar, percent), end="\r")
102 | sys.stdout.flush()
103 | previouspercent = percent
104 | print()
105 |
106 |
--------------------------------------------------------------------------------
/mbdb.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python
2 |
3 | from __future__ import print_function
4 | import sys
5 | import hashlib
6 |
7 | mbdx = {}
8 |
9 | def o(b):
10 | try:
11 | return ord(b)
12 | except:
13 | return b
14 |
15 | def getint(data, offset, intsize):
16 | """Retrieve an integer (big-endian) and new offset from the current offset"""
17 | value = 0
18 | while intsize > 0:
19 | value = (value << 8) + o(data[offset])
20 | offset = offset + 1
21 | intsize = intsize - 1
22 | return value, offset
23 |
24 | def getstring(data, offset):
25 | """Retrieve a string and new offset from the current offset into the data"""
26 | if o(data[offset]) == 0xFF and o(data[offset+1]) == 0xFF:
27 | return '', offset + 2 # Blank string
28 | length, offset = getint(data, offset, 2) # 2-byte length
29 | value = data[offset:offset+length]
30 | return value, (offset + length)
31 |
32 | def process_mbdb_file_internal(filename):
33 | mbdb = {} # Map offset of info in this file => file info
34 | data = open(filename, "rb").read()
35 | if data[0:4] != b"mbdb": raise Exception("This does not look like an MBDB file")
36 | offset = 4
37 | offset = offset + 2 # value x05 x00, not sure what this is
38 | while offset < len(data):
39 | fileinfo = {}
40 | fileinfo['start_offset'] = offset
41 | fileinfo['domain'], offset = getstring(data, offset)
42 | fileinfo['filename'], offset = getstring(data, offset)
43 | fileinfo['linktarget'], offset = getstring(data, offset)
44 | fileinfo['datahash'], offset = getstring(data, offset)
45 | fileinfo['unknown1'], offset = getstring(data, offset)
46 | fileinfo['mode'], offset = getint(data, offset, 2)
47 | fileinfo['unknown2'], offset = getint(data, offset, 4)
48 | fileinfo['unknown3'], offset = getint(data, offset, 4)
49 | fileinfo['userid'], offset = getint(data, offset, 4)
50 | fileinfo['groupid'], offset = getint(data, offset, 4)
51 | fileinfo['mtime'], offset = getint(data, offset, 4)
52 | fileinfo['atime'], offset = getint(data, offset, 4)
53 | fileinfo['ctime'], offset = getint(data, offset, 4)
54 | fileinfo['filelen'], offset = getint(data, offset, 8)
55 | fileinfo['flag'], offset = getint(data, offset, 1)
56 | fileinfo['numprops'], offset = getint(data, offset, 1)
57 | fileinfo['properties'] = {}
58 | for ii in range(fileinfo['numprops']):
59 | propname, offset = getstring(data, offset)
60 | propval, offset = getstring(data, offset)
61 | fileinfo['properties'][propname] = propval
62 | mbdb[fileinfo['start_offset']] = fileinfo
63 | fullpath = fileinfo['domain'] + b'-' + fileinfo['filename']
64 | id = hashlib.sha1(fullpath)
65 | mbdx[fileinfo['start_offset']] = id.hexdigest()
66 | return mbdb
67 |
68 | def process_mbdb_file(fname):
69 | mbdb = process_mbdb_file_internal(fname)
70 | for offset, fileinfo in mbdb.items():
71 | if offset in mbdx:
72 | fileinfo['fileID'] = mbdx[offset]
73 | else:
74 | fileinfo['fileID'] = ""
75 | print("No fileID found for %s" % fileinfo_str(fileinfo), file=sys.stderr)
76 | yield fileinfo
77 |
78 | def modestr(val):
79 | def mode(val):
80 | if (val & 0x4): r = 'r'
81 | else: r = '-'
82 | if (val & 0x2): w = 'w'
83 | else: w = '-'
84 | if (val & 0x1): x = 'x'
85 | else: x = '-'
86 | return r+w+x
87 | return mode(val>>6) + mode((val>>3)) + mode(val)
88 |
89 | def fileinfo_str(f, verbose=False):
90 | if not verbose: return "(%s)%s::%s" % (f['fileID'], f['domain'], f['filename'])
91 | if (f['mode'] & 0xE000) == 0xA000: type = 'l' # symlink
92 | elif (f['mode'] & 0xE000) == 0x8000: type = '-' # file
93 | elif (f['mode'] & 0xE000) == 0x4000: type = 'd' # dir
94 | else:
95 | print("Unknown file type %04x for %s" % (f['mode'], fileinfo_str(f, False)), file=sys.stderr)
96 | type = '?' # unknown
97 | info = ("%s%s %08x %08x %7d %10d %10d %10d (%s)%s::%s" %
98 | (type, modestr(f['mode']&0x0FFF) , f['userid'], f['groupid'], f['filelen'],
99 | f['mtime'], f['atime'], f['ctime'], f['fileID'], f['domain'], f['filename']))
100 | if type == 'l': info = info + ' -> ' + f['linktarget'] # symlink destination
101 | for name, value in f['properties'].items(): # extra properties
102 | info = info + ' ' + name + '=' + repr(value)
103 | return info
104 |
105 | verbose = True
106 | if __name__ == '__main__':
107 | for fileinfo in process_mbdb_file("Manifest.mbdb"):
108 | print(fileinfo_str(fileinfo, verbose))
--------------------------------------------------------------------------------
/iphone_history.py:
--------------------------------------------------------------------------------
1 | from __future__ import print_function
2 | import os
3 | import shutil
4 | import sys
5 | import re
6 | import datetime
7 | import mbdb
8 | import whatsapp
9 | import sms
10 |
11 |
12 | class BackupExtractor(object):
13 | """ object representing a single backup directory. used to retrieve the device name and
14 | date of the backup, and convert file paths from the device filesytem to the actual filesytem """
15 | def __init__(self, dir):
16 | self._dir = dir
17 | self._file_index = None
18 | self._date = datetime.datetime.fromtimestamp(0)
19 | self._device_name = ""
20 | self._parse_info_plist()
21 |
22 | def _parse_info_plist(self):
23 | info_file = os.path.join(self._dir, "Info.plist")
24 | if not os.path.exists(info_file):
25 | print("WARNING: no Info.plist file found in backup folder %s" % (self._dir,))
26 | return
27 | info_data = open(info_file, "r").read()
28 | match_obj = re.search("([^<]*)", info_data)
29 | if match_obj is not None:
30 | time_str = match_obj.group(1)
31 | self._date = datetime.datetime.strptime(time_str, "%Y-%m-%dT%H:%M:%SZ")
32 | else:
33 | print("WARNING: no date found for backup folder %s" % (self._dir,))
34 | match_obj = re.search("Device Name\s*([^<]*)", info_data)
35 | if match_obj is not None:
36 | self._device_name = match_obj.group(1)
37 | else:
38 | print("WARNING: no device name found in backup folder %s" % (self._dir,))
39 |
40 | def get_date(self):
41 | return self._date
42 |
43 | def get_device_name(self):
44 | return self._device_name
45 |
46 | def _get_file_index(self):
47 | if self._file_index is not None:
48 | return self._file_index
49 |
50 | mbdb_file = os.path.join(self._dir, "Manifest.mbdb")
51 |
52 | files_in_backup = mbdb.process_mbdb_file(mbdb_file)
53 |
54 | # file index: map domain+filename to physical file in backup directory
55 | self._file_index = dict()
56 | for f in files_in_backup:
57 | domain = f['domain'].decode("utf-8", errors="ignore")
58 | filename = f['filename'].decode("utf-8", errors="ignore")
59 | file_path = os.path.join(self._dir, str(f['fileID']))
60 | self._file_index[(domain, filename)] = file_path
61 | return self._file_index
62 |
63 | def get_file_path(self, domain, filename):
64 | return self._get_file_index().get((domain, filename), None)
65 |
66 |
67 | def get_latest_backup():
68 | backups_root = None
69 | if sys.platform == "win32":
70 | backups_root = os.path.expandvars(r"%appdata%\Apple Computer\MobileSync\Backup")
71 | elif sys.platform == "darwin":
72 | backups_root = os.path.expanduser("~/Library/Application Support/MobileSync/Backup")
73 | else:
74 | print("Unsupported system: %s" % sys.platform)
75 | return None
76 |
77 | list_of_backups = os.listdir(backups_root)
78 | if not list_of_backups:
79 | return None
80 | list_of_backups = [os.path.join(backups_root, backup) for backup in list_of_backups]
81 | list_of_backups = [BackupExtractor(backup) for backup in list_of_backups if os.path.isdir(backup)]
82 |
83 | list_of_backups.sort(key=lambda backup: backup.get_date())
84 |
85 | choose = len(sys.argv) == 2 and sys.argv[-1] == "--choose"
86 | if choose:
87 | print("Choose backup:")
88 | for i, backup in enumerate(list_of_backups, 1):
89 | print("%d. %s %s" % (i, backup.get_device_name(), backup.get_date()))
90 | index = int(input()) - 1
91 | else:
92 | index = -1 # latest backup
93 |
94 | return list_of_backups[index]
95 |
96 | def lib_main(backup_extractor, lib):
97 | files_to_copy = []
98 | for phone_path, file_path in lib.FILES:
99 | # phone_path is either a tuple (domain, filename) or a list of such
100 | # tuples (in which case, only one of the domain/filename should be copied)
101 | if type(phone_path) == list:
102 | existing_file_path = [backup_extractor.get_file_path(*one_path) for one_path in phone_path]
103 | existing_file_path = [path for path in existing_file_path if path is not None]
104 | existing_file_path = None if len(existing_file_path) == 0 else existing_file_path[0]
105 | else:
106 | existing_file_path = backup_extractor.get_file_path(*phone_path)
107 | if existing_file_path is None:
108 | print("Could not find file in backup: {}/{}".format(*phone_path))
109 | return
110 | files_to_copy.append((existing_file_path, file_path))
111 |
112 | for existing_file_path, file_path in files_to_copy:
113 | shutil.copy(existing_file_path, file_path)
114 |
115 | lib.main(backup_extractor)
116 |
117 | for existing_file_path, file_path in files_to_copy:
118 | os.remove(file_path)
119 |
120 | def main():
121 | backup_extractor = get_latest_backup()
122 | if backup_extractor is None:
123 | print("Could not find backup folder")
124 | sys.exit()
125 |
126 | for lib in [whatsapp, sms]:
127 | lib_main(backup_extractor, lib)
128 |
129 | if __name__ == "__main__":
130 | main()
131 |
--------------------------------------------------------------------------------
/sms.py:
--------------------------------------------------------------------------------
1 | import sqlite3
2 | import os
3 | import shutil
4 | from io import open
5 |
6 | from common import COLORS, TEMPLATEBEGINNING, TEMPLATEEND, ROWTEMPLATE
7 | from common import get_color, reset_colors, get_date, iterate_with_progress, get_output_dirs
8 | from common import sanitize_filename, find_nonexisting_path
9 |
10 | OUTPUT_DIR, MEDIA_DIR = get_output_dirs("sms")
11 |
12 | CHAT_STORAGE_FILE = os.path.join(OUTPUT_DIR, "sms.db")
13 | CONTACTS_FILE = os.path.join(OUTPUT_DIR, "AddressBook.sqlitedb")
14 |
15 | FILES = [(("HomeDomain", "Library/SMS/sms.db"), CHAT_STORAGE_FILE),
16 | (("HomeDomain", "Library/AddressBook/AddressBook.sqlitedb"), CONTACTS_FILE)]
17 |
18 | FIELDS = "ROWID, text, date, is_from_me, handle_id, cache_has_attachments"
19 |
20 | OBJ_MARKER = "\ufffc"
21 |
22 | contact_cache = {}
23 | def get_contact_name(conn, contact_conn, contact_id):
24 | if contact_id in contact_cache:
25 | return contact_cache[contact_id]
26 | c = conn.cursor()
27 | c.execute("SELECT id FROM handle WHERE ROWID=?;", (contact_id,))
28 | try:
29 | handle_id = next(c)[0] # this is either a phone number or an iMessage address
30 | except StopIteration:
31 | handle_id = "UNKNOWN"
32 | if handle_id.startswith("+"):
33 | c = contact_conn.cursor()
34 | p = handle_id.replace("+972", "0")
35 | phone_options = (handle_id, p,
36 | "{}-{}-{}".format(p[-10:-7], p[-7:-4], p[-4:]),
37 | "({}) {} {}".format(p[-10:-7], p[-7:-4], p[-4:]))
38 | c.execute("SELECT record_id FROM ABMultiValue WHERE value=? or value=? or value=? or value=?", phone_options)
39 | for i in c:
40 | c2 = contact_conn.cursor()
41 | c2.execute("SELECT first, last FROM ABPerson WHERE ROWID=?", i)
42 | handle_id = " ".join((s for s in next(c2) if s))
43 | contact_cache[contact_id] = handle_id
44 | return handle_id
45 |
46 | def copy_media_file(backup_extractor, path_in_backup):
47 | if path_in_backup.startswith("/var/mobile/"):
48 | path_in_backup = path_in_backup[12:]
49 | elif path_in_backup.startswith("~/"):
50 | path_in_backup = path_in_backup[2:]
51 | filepath = backup_extractor.get_file_path("MediaDomain", path_in_backup)
52 | if filepath is None:
53 | return ""
54 | new_media_path = os.path.join(MEDIA_DIR, os.path.basename(path_in_backup))
55 | shutil.copy(filepath, new_media_path)
56 | return new_media_path
57 |
58 | def handle_media(conn, backup_extractor, message_id, mtext):
59 | c = conn.cursor()
60 | c.execute("SELECT filename, mime_type FROM attachment WHERE ROWID in "\
61 | "(SELECT attachment_id FROM message_attachment_join WHERE message_id=?);", (message_id,))
62 | for row in c:
63 | new_media_path = copy_media_file(backup_extractor, row[0])
64 | tag_format = '<{0} src="media/{1}" style="width:200px;"{2}>'
65 | media_type = row[1].split("/")[0]
66 | tag = {"video": "video", "image": "img"}.get(media_type, None)
67 | if tag is None:
68 | media_element = "[unknown attachment type: {}]".format(media_type)
69 | else:
70 | controls = " controls" if tag in ["audio", "video"] else ""
71 | media_element = tag_format.format(tag, os.path.basename(new_media_path), controls)
72 | if OBJ_MARKER in mtext:
73 | mtext = mtext.replace(OBJ_MARKER, media_element, 1)
74 | else:
75 | mtext = mtext + media_element
76 | return mtext
77 |
78 | def get_filename(conn, contact_conn, chat_id):
79 | c = conn.cursor()
80 | c.execute("SELECT handle_id FROM chat_handle_join WHERE chat_id=?;", (chat_id,))
81 | names_in_chat = []
82 | for row in c:
83 | names_in_chat.append(get_contact_name(conn, contact_conn, row[0]))
84 | filename = sanitize_filename(" & ".join(names_in_chat))
85 | file_path = os.path.join(OUTPUT_DIR, '%s.html' % filename)
86 | file_path = find_nonexisting_path(file_path)
87 | return file_path
88 |
89 | def output_contact(conn, contact_conn, backup_extractor, chat_id, your_name):
90 | reset_colors()
91 | contact_name = str(chat_id)
92 | html = open(get_filename(conn, contact_conn, chat_id), 'w', encoding="utf-8")
93 | html.write(TEMPLATEBEGINNING % ("SMS/iMessage",))
94 | c = conn.cursor()
95 | c.execute("SELECT {} FROM message WHERE ROWID in ".format(FIELDS) + \
96 | "(SELECT message_id FROM chat_message_join WHERE chat_id=?);", (chat_id,))
97 | for row in c:
98 | mid, mtext, mdate, is_from_me, handle_id, has_attachment = row
99 | if mtext is None:
100 | mtext = ""
101 | if has_attachment:
102 | mtext = handle_media(conn, backup_extractor, mid, mtext)
103 | mtext = mtext.replace("\n", "
\n")
104 | mdatetime = get_date(mdate)
105 | mfrom = your_name if is_from_me else get_contact_name(conn, contact_conn, handle_id)
106 | color = COLORS[0] if is_from_me else get_color(handle_id)
107 | html.write((ROWTEMPLATE % (color, mdatetime, mfrom, mtext)))
108 | html.write(TEMPLATEEND)
109 | html.close()
110 |
111 | def main(backup_extractor):
112 | contact_conn = sqlite3.connect(CONTACTS_FILE)
113 | conn = sqlite3.connect(CHAT_STORAGE_FILE)
114 | c = conn.cursor()
115 | c.execute("SELECT COUNT(*) FROM chat")
116 | total_contacts = next(c)[0]
117 | c = conn.cursor()
118 | c.execute("SELECT ROWID FROM chat")
119 | for chat_id in iterate_with_progress(c, total_contacts, "SMS"):
120 | output_contact(conn, contact_conn, backup_extractor, chat_id[0], "me")
121 |
--------------------------------------------------------------------------------
/whatsapp.py:
--------------------------------------------------------------------------------
1 | import sqlite3
2 | import os
3 | import shutil
4 | import codecs
5 | from io import open
6 | import sys
7 |
8 | from common import COLORS, TEMPLATEBEGINNING, TEMPLATEEND, ROWTEMPLATE
9 | from common import get_color, reset_colors, get_date, iterate_with_progress, get_output_dirs
10 | from common import sanitize_filename, find_nonexisting_path
11 |
12 | if sys.version[0] == "3":
13 | unicode = str
14 |
15 | OUTPUT_DIR, MEDIA_DIR = get_output_dirs("whatsapp")
16 |
17 | CHAT_STORAGE_FILE = os.path.join(OUTPUT_DIR, "ChatStorage.sqlite")
18 | FILES = [([("AppDomainGroup-group.net.whatsapp.WhatsApp.shared", "ChatStorage.sqlite"),
19 | ("AppDomain-net.whatsapp.WhatsApp", "Documents/ChatStorage.sqlite")], CHAT_STORAGE_FILE)]
20 |
21 | FIELDS = "ZFROMJID, ZTEXT, ZMESSAGEDATE, ZMESSAGETYPE, ZGROUPEVENTTYPE, ZGROUPMEMBER, ZMEDIAITEM"
22 |
23 | cached_members = {}
24 | def get_group_member_name(conn, id):
25 | if id in cached_members:
26 | return cached_members[id]
27 | c = conn.cursor()
28 | c.execute("SELECT ZCONTACTNAME FROM ZWAGROUPMEMBER WHERE Z_PK=?", (id,))
29 | cached_members[id] = next(c)[0]
30 | return cached_members[id]
31 |
32 | def get_media_data(conn, mediaid, cols):
33 | c = conn.cursor()
34 | c.execute("SELECT {} FROM ZWAMEDIAITEM WHERE Z_PK=?".format(cols), (mediaid,))
35 | return next(c)
36 |
37 | def copy_media_file(backup_extractor, path_in_backup):
38 | path_in_backup = "Library" + ("" if path_in_backup.startswith("/") else "/") + path_in_backup
39 | filepath = backup_extractor.get_file_path("AppDomain-net.whatsapp.WhatsApp", path_in_backup)
40 | new_media_path = os.path.join(MEDIA_DIR, os.path.basename(path_in_backup))
41 | shutil.copy(filepath, new_media_path)
42 | return new_media_path
43 |
44 | def handle_media(conn, backup_extractor, mtype, mmediaitem):
45 | mediadata = ["ZMEDIALOCALPATH", "ZMEDIALOCALPATH", "ZMEDIALOCALPATH", "ZVCARDNAME",
46 | "ZLATITUDE, ZLONGITUDE"][mtype-1]
47 | data = get_media_data(conn, mmediaitem, mediadata)
48 | mtypestr = {1: "image", 2: "video", 3: "audio", 4: "contact", 5: "location"}[mtype]
49 | if data[0] is None:
50 | return "[missing {}]".format(mtypestr)
51 | data = u", ".join([unicode(x) for x in data])
52 | if mtype in [1, 2, 3]:
53 | new_media_path = copy_media_file(backup_extractor, data)
54 | tag_format = '<{0} src="media/{1}" style="width:200px;"{2}>'
55 | tag = ["img", "video", "audio"][mtype-1]
56 | controls = " controls" if tag in ["audio", "video"] else ""
57 | return tag_format.format(tag, os.path.basename(new_media_path), controls)
58 | if mtype == 4 and data.startswith("="):
59 | # if the vCard has no contact image the format of the row in the db is a little different,
60 | # and name is encoded using quopri encoding
61 | try:
62 | data = codecs.decode(data.decode("ascii"), "quopri").encode("utf-8")
63 | except:
64 | pass
65 | return u"[{} - {}]".format(mtypestr, data)
66 |
67 | def get_text(conn, backup_extractor, row):
68 | mfrom, mtext, mdate, mtype, mgroupeventtype, mgroupmember, mmediaitem = row
69 | if mtype == 0:
70 | return mtext
71 | if mtype == 6:
72 | mgroupmember = "you" if mgroupmember is None else get_group_member_name(conn, mgroupmember)
73 | if mgroupeventtype not in [1, 2, 3, 4]:
74 | return u"[group event {} by {}]".format(mgroupeventtype, mgroupmember)
75 | change_text = {1: u"changed the group subject to {}".format(mtext),
76 | 2: u"joined", 3: u"left", 4: u"changed the group photo"}
77 | return u"[{} {}]".format(mgroupmember, change_text[mgroupeventtype])
78 | if mtype in [1, 2, 3, 4, 5]:
79 | return handle_media(conn, backup_extractor, mtype, mmediaitem)
80 | return u"[message type %d]" % mtype
81 |
82 | def get_from(conn, is_group, contact_id, contact_name, your_name, row):
83 | mfrom, mtext, mdate, mtype, mgroupeventtype, mgroupmember, mmediaitem = row
84 | if mfrom != contact_id:
85 | if is_group:
86 | return contact_name + " - " + your_name, COLORS[0]
87 | else:
88 | return your_name, COLORS[0]
89 | mfrom = contact_name
90 | if is_group:
91 | if mgroupmember is not None and mtype != 6:
92 | mfrom += " - " + get_group_member_name(conn, mgroupmember)
93 | color = get_color(mfrom)
94 | return mfrom, color
95 |
96 | def output_contact(conn, backup_extractor, is_group, contact_id, contact_name, your_name):
97 | reset_colors()
98 | file_path = os.path.join(OUTPUT_DIR, '%s.html' % sanitize_filename(contact_name))
99 | file_path = find_nonexisting_path(file_path)
100 | html = open(file_path, 'w', encoding="utf-8")
101 | html.write(TEMPLATEBEGINNING % ("WhatsApp",))
102 | c = conn.cursor()
103 | c.execute("SELECT {} FROM ZWAMESSAGE WHERE ZFROMJID=? OR ZTOJID=?;".format(FIELDS), (contact_id, contact_id))
104 | for row in c:
105 | mdatetime = get_date(row[2])
106 | mtext = get_text(conn, backup_extractor, row)
107 | mtext = mtext.replace("\n", "
\n")
108 | mfrom, color = get_from(conn, is_group, contact_id, contact_name, your_name, row)
109 | html.write((ROWTEMPLATE % (color, mdatetime, mfrom, mtext)))
110 | html.write(TEMPLATEEND)
111 | html.close()
112 |
113 | def main(backup_extractor):
114 | conn = sqlite3.connect(CHAT_STORAGE_FILE)
115 | c = conn.cursor()
116 | c.execute("SELECT COUNT(*) FROM ZWACHATSESSION")
117 | total_contacts = next(c)[0]
118 | c = conn.cursor()
119 | c.execute("SELECT ZCONTACTJID, ZPARTNERNAME, ZSESSIONTYPE FROM ZWACHATSESSION")
120 | for contact_id, contact_name, is_group in iterate_with_progress(c, total_contacts, "WhatsApp"):
121 | output_contact(conn, backup_extractor, is_group, contact_id, contact_name, "me")
122 |
--------------------------------------------------------------------------------