├── .gitignore ├── config-db.sample.php ├── contacts.php ├── archive.php ├── export-sql.php ├── export-csv.php ├── include.php └── README.md /.gitignore: -------------------------------------------------------------------------------- 1 | messages/* 2 | .DS_Store 3 | contacts.txt 4 | last.txt 5 | vendor/ 6 | config-db.php 7 | -------------------------------------------------------------------------------- /config-db.sample.php: -------------------------------------------------------------------------------- 1 | 'aaronpk', 4 | 'host' => '127.0.0.1', 5 | 'username' => 'root', 6 | 'password' => '' 7 | ); 8 | -------------------------------------------------------------------------------- /contacts.php: -------------------------------------------------------------------------------- 1 | query('SELECT * FROM handle'); 12 | while($q = $query->fetch(PDO::FETCH_ASSOC)) { 13 | if(!in_array($q['id'], $contacts)) { 14 | echo $q['id'] . "\n"; 15 | } 16 | } 17 | 18 | -------------------------------------------------------------------------------- /archive.php: -------------------------------------------------------------------------------- 1 | fetch(PDO::FETCH_ASSOC)) { 15 | $fn = filename_for_message($line['contact'], $line['date']); 16 | echo $fn."\n"; 17 | if(!file_exists(dirname($fn))) { 18 | mkdir(dirname($fn)); 19 | } 20 | if(!file_exists($fn)) { 21 | file_put_contents($fn, html_template()); 22 | } 23 | 24 | $attachment_query = $db->query('SELECT attachment.* 25 | FROM attachment 26 | JOIN message_attachment_join ON message_attachment_join.attachment_id=attachment.ROWID 27 | WHERE message_attachment_join.message_id = ' . $line['ROWID']); 28 | $attachments = array(); 29 | while($attachment = $attachment_query->fetch(PDO::FETCH_ASSOC)) { 30 | $attachments[] = $attachment; 31 | } 32 | 33 | if(!entry_exists($line, $attachments, $fn)) { 34 | $fp = fopen($fn, 'a'); 35 | $log = format_line($line, $attachments); 36 | fwrite($fp, $log."\n"); 37 | fclose($fp); 38 | echo date('c', $line['date']) . "\t" . $line['contact'] . "\t" . $line['text'] . "\n"; 39 | foreach($attachments as $at) { 40 | $imgsrc = attachment_folder($line['contact'], $line['date']) . $at['transfer_name']; 41 | if(!file_exists(dirname($imgsrc))) 42 | mkdir(dirname($imgsrc)); 43 | copy(str_replace('~/',$_SERVER['HOME'].'/',$at['filename']), $imgsrc); 44 | } 45 | } 46 | 47 | if($line['date'] > $last_timestamp) { 48 | $last_timestamp = $line['date']; 49 | } 50 | } 51 | 52 | if($last_timestamp > 0) 53 | file_put_contents($last_fn, $last_timestamp); 54 | 55 | -------------------------------------------------------------------------------- /export-sql.php: -------------------------------------------------------------------------------- 1 | exec('SET CHARACTER SET utf8mb4'); 8 | 9 | $query = $sql->prepare('SELECT * FROM messages ORDER BY date DESC LIMIT 1'); 10 | $query->execute(); 11 | $last = $query->fetch(PDO::FETCH_OBJ); 12 | if($last) { 13 | $last = $last->timestamp; 14 | } else { 15 | $last = 0; 16 | } 17 | 18 | $insert = $sql->prepare('INSERT INTO messages 19 | (`timestamp`, `date`, `time`, `from`, `from_name`, `to`, `to_name`, `message`, `num_emoji`, `num_attachments`) 20 | VALUES(?,?,?,?,?,?,?,?,?,?)'); 21 | 22 | $query = query_messages_since($db, $last); 23 | $last_timestamp = 0; 24 | while($line = $query->fetch(PDO::FETCH_ASSOC)) { 25 | 26 | $attachment_query = $db->query('SELECT attachment.* 27 | FROM attachment 28 | JOIN message_attachment_join ON message_attachment_join.attachment_id=attachment.ROWID 29 | WHERE message_attachment_join.message_id = ' . $line['ROWID']); 30 | $attachments = array(); 31 | while($attachment = $attachment_query->fetch(PDO::FETCH_ASSOC)) { 32 | $attachments[] = $attachment; 33 | } 34 | 35 | if($line['is_from_me']) { 36 | $from = $me; 37 | $to = $line['contact']; 38 | } else { 39 | $from = $line['contact']; 40 | $to = $me; 41 | } 42 | $from_name = contact_name($from); 43 | $to_name = contact_name($to); 44 | 45 | $num_emoji = 0; 46 | if(preg_match_all('/(?:[0-9|#][\x{20E3}])|[\x{00ae}|\x{00a9}|\x{203C}|\x{2047}|\x{2048}|\x{2049}|\x{3030}|\x{303D}|\x{2139}|\x{2122}|\x{3297}|\x{3299}][\x{FE00}-\x{FEFF}]?|[\x{2190}-\x{21FF}][\x{FE00}-\x{FEFF}]?|[\x{2300}-\x{23FF}][\x{FE00}-\x{FEFF}]?|[\x{2460}-\x{24FF}][\x{FE00}-\x{FEFF}]?|[\x{25A0}-\x{25FF}][\x{FE00}-\x{FEFF}]?|[\x{2600}-\x{27BF}][\x{FE00}-\x{FEFF}]?|[\x{2900}-\x{297F}][\x{FE00}-\x{FEFF}]?|[\x{2B00}-\x{2BF0}][\x{FE00}-\x{FEFF}]?|[\x{1F000}-\x{1F6FF}][\x{FE00}-\x{FEFF}]?/u', $line['text'], $matches)) { 47 | $num_emoji = count($matches[0]); 48 | } 49 | 50 | $insert->bindValue(1, $line['date']); 51 | $insert->bindValue(2, date('Y-m-d', $line['date'])); 52 | $insert->bindValue(3, date('H:i:s', $line['date'])); 53 | $insert->bindValue(4, $from); 54 | $insert->bindValue(5, $from_name); 55 | $insert->bindValue(6, $to); 56 | $insert->bindValue(7, $to_name); 57 | $insert->bindValue(8, trim($line['text'])); 58 | $insert->bindValue(9, $num_emoji); 59 | $insert->bindValue(10, count($attachments)); 60 | $insert->execute(); 61 | 62 | } 63 | 64 | -------------------------------------------------------------------------------- /export-csv.php: -------------------------------------------------------------------------------- 1 | fetch(PDO::FETCH_ASSOC)) { 48 | 49 | $attachment_query = $db->query('SELECT attachment.* 50 | FROM attachment 51 | JOIN message_attachment_join ON message_attachment_join.attachment_id=attachment.ROWID 52 | WHERE message_attachment_join.message_id = ' . $line['ROWID']); 53 | $attachments = array(); 54 | while($attachment = $attachment_query->fetch(PDO::FETCH_ASSOC)) { 55 | $attachments[] = $attachment; 56 | } 57 | 58 | if($line['is_from_me']) { 59 | $from = $me; 60 | $to = $line['contact']; 61 | } else { 62 | $from = $line['contact']; 63 | $to = $me; 64 | } 65 | $from_name = contact_name($from); 66 | $to_name = contact_name($to); 67 | 68 | $num_emoji = 0; 69 | if(preg_match_all('/(?:[0-9|#][\x{20E3}])|[\x{00ae}|\x{00a9}|\x{203C}|\x{2047}|\x{2048}|\x{2049}|\x{3030}|\x{303D}|\x{2139}|\x{2122}|\x{3297}|\x{3299}][\x{FE00}-\x{FEFF}]?|[\x{2190}-\x{21FF}][\x{FE00}-\x{FEFF}]?|[\x{2300}-\x{23FF}][\x{FE00}-\x{FEFF}]?|[\x{2460}-\x{24FF}][\x{FE00}-\x{FEFF}]?|[\x{25A0}-\x{25FF}][\x{FE00}-\x{FEFF}]?|[\x{2600}-\x{27BF}][\x{FE00}-\x{FEFF}]?|[\x{2900}-\x{297F}][\x{FE00}-\x{FEFF}]?|[\x{2B00}-\x{2BF0}][\x{FE00}-\x{FEFF}]?|[\x{1F000}-\x{1F6FF}][\x{FE00}-\x{FEFF}]?/u', $line['text'], $matches)) { 70 | $num_emoji = count($matches[0]); 71 | } 72 | 73 | fputcsv($fp, array( 74 | $line['date'], 75 | date('Y-m-d', $line['date']), 76 | date('H:i:s', $line['date']), 77 | $from, 78 | $from_name, 79 | $to, 80 | $to_name, 81 | trim($line['text']), 82 | $num_emoji, 83 | count($attachments) 84 | )); 85 | 86 | } 87 | 88 | fclose($fp); 89 | 90 | -------------------------------------------------------------------------------- /include.php: -------------------------------------------------------------------------------- 1 | $key) { 15 | $data[trim($key)] = trim($matches[2][$i]); 16 | } 17 | } 18 | return $data; 19 | } 20 | 21 | function contact($id) { 22 | $data = load_contacts(); 23 | 24 | if(preg_match('/.+@.+\..+/', $id)) { 25 | $href = 'mailto:' . $id; 26 | } else { 27 | $href = 'sms:' . $id; 28 | } 29 | 30 | if(array_key_exists($id, $data)) { 31 | return '' . $data[$id] . ''; 32 | } else { 33 | return '' . $id . ''; 34 | } 35 | } 36 | 37 | function contact_name($id) { 38 | $data = load_contacts(); 39 | if(array_key_exists($id, $data)) { 40 | return $data[$id]; 41 | } else { 42 | return $id; 43 | } 44 | } 45 | 46 | function query_messages_since(&$db, $timestamp) { 47 | return $db->query('SELECT message.ROWID, substr(date,1,9)+978307200 AS date, 48 | message.text, is_from_me, handle.id AS contact 49 | FROM message 50 | LEFT JOIN handle ON message.handle_id = handle.ROWID 51 | WHERE cache_roomnames IS NULL 52 | AND substr(date,1,9)+978307200 > ' . $timestamp . ' 53 | ORDER BY date 54 | '); 55 | } 56 | 57 | function filename_for_message($contact, $ts) { 58 | $folder = contact_name($contact); 59 | return 'messages/' . $folder . '/' . date('Y-m', $ts) . '.html'; 60 | } 61 | 62 | function attachment_folder($contact, $ts, $relative=false) { 63 | $folder = contact_name($contact); 64 | return ($relative ? '' : 'messages/' . $folder . '/') . date('Y-m', $ts) . '/'; 65 | } 66 | 67 | function format_line($line, $attachments) { 68 | global $me; 69 | 70 | if($line['is_from_me']) 71 | $contact = $me; 72 | else 73 | $contact = $line['contact']; 74 | 75 | $attachments_html = ''; 76 | 77 | if(count($attachments)) { 78 | foreach($attachments as $at) { 79 | $imgsrc = attachment_folder($line['contact'], $line['date'], true) . $at['transfer_name']; 80 | $attachments_html .= ''; 81 | } 82 | } 83 | 84 | return '
' 85 | . ' ' 86 | . contact($contact) 87 | . ' ' . htmlentities(trim($line['text'])) . '' 88 | . $attachments_html 89 | . '
'; 90 | } 91 | 92 | function entry_exists($line, $attachments, $fn) { 93 | if(!file_exists($fn)) return false; 94 | $file = file_get_contents($fn); 95 | return strpos($file, format_line($line, $attachments)) !== false; 96 | } 97 | 98 | function html_template() { 99 | ob_start(); 100 | ?> 101 | 102 | 103 | 121 | > contacts.txt 22 | ``` 23 | 24 | ``` 25 | +15031234567 Your Name 26 | +15035551212 Cool Dude 27 | cooldude@gmail.com Cool Dude 28 | ``` 29 | 30 | You can have multiple entries per person, and they will be combined into a single log folder with that person's name. 31 | 32 | Running `php contacts.php` subsequently will output only new contacts that were not yet in the file. 33 | 34 | 35 | Export Formats 36 | -------------- 37 | 38 | Running `php archive.php` will export to HTML files sorted by contact. 39 | 40 | Running `php export-csv.php` will export to a single CSV file. See below for the structure of the file. 41 | 42 | 43 | HTML Folder Structure 44 | --------------------- 45 | 46 | Messages are saved in separate files per month under a folder of each person's name. If you don't have an entry for them in your `contacts.txt` file, the folder name will be their iMessage ID (phone number or email address). 47 | 48 | Photos that were sent in messages will also be archived in the folder. 49 | 50 | ``` 51 | messages/ 52 | 53 | # Individual chats 54 | messages/Cool Dude/ 55 | messages/Cool Dude/2014-04.html 56 | messages/Cool Dude/2014-05.html 57 | messages/Cool Dude/2014-05/photo.jpg 58 | ``` 59 | 60 | HTML Log Files 61 | -------------- 62 | 63 | Messages are stored as a minimal HTML page. Structured data is available by parsing out 64 | the [microformats markup](http://microformats.org/wiki/microformats2). 65 | 66 | Each message is an [h-entry](http://microformats.org/wiki/h-entry) containing the author, timestamp and text of the message. You can parse these into a JSON structure using a [Microformats parser](http://microformats.org/wiki/microformats2#Parsers) 67 | 68 | ```html 69 |
70 | 71 | Cool Dude 72 | Message text here 73 |
74 |
75 | 76 | Aaron Parecki 77 | Message text here 78 |
79 | ``` 80 | 81 | ```json 82 | { 83 | "items": [ 84 | { 85 | "type": ["h-entry"], 86 | "properties": { 87 | "author": [ 88 | { 89 | "type": ["h-card"], 90 | "properties": { 91 | "name": ["Cool Dude"], 92 | "url": ["sms:+15035551212"] 93 | }, 94 | "value": "Cool Dude" 95 | } 96 | ], 97 | "name": ["Message text here"], 98 | "published": ["2014-05-01T10:48:00+00:00"], 99 | "content": [ 100 | { 101 | "html": "Message text here", 102 | "value": "Message text here" 103 | } 104 | ] 105 | } 106 | }, 107 | { 108 | "type": ["h-entry"], 109 | "properties": { 110 | "author": [ 111 | { 112 | "type": ["h-card"], 113 | "properties": { 114 | "name": ["Aaron Parecki"], 115 | "url": ["mailto:aaron@parecki.com"] 116 | }, 117 | "value": "Aaron Parecki" 118 | } 119 | ], 120 | "name": ["Message text here"], 121 | "published": ["2014-05-01T10:49:00+00:00"], 122 | "content": [ 123 | { 124 | "html": "Message text here", 125 | "value": "Message text here" 126 | } 127 | ] 128 | } 129 | } 130 | ] 131 | } 132 | ``` 133 | 134 | Photos in the message thread are also included in the export and are stored in a subfolder with the same name as the file. They are embedded in the HTML with an img tag so they will be rendered by browsers. 135 | 136 | 137 | CSV Log File 138 | ------------ 139 | 140 | Only one file is created when exporting as csv. The csv file will have the following columns: 141 | 142 | ``` 143 | Timestamp, Date, Time, From, From Name, To, To Name, Message, Emoji, Attachments 144 | ``` 145 | 146 | * `Timestamp`: The unix timestamp of the message (seconds since 1970-01-01) 147 | * `Date`: The date will be in the format YYYY-mm-dd 148 | * `Time`: The time will be HH:mm:ss 149 | * `From`, `To`: The iMessage ID of the sender and recipient 150 | * `From Name`, `To Name`: The name of the person as defined in your `contacts.txt` file (see above) 151 | * `Message`: This is the actual text of the message 152 | * `Emoji`: The number of emoji characters in the message 153 | * `Attachments`: The number of attachments (usually photos) sent in the message 154 | 155 | The messages are usually in chronological order, but because of delays in when your computer actually receives the messages, they might be slightly out of order. 156 | 157 | 158 | SQL Database 159 | ------------ 160 | 161 | If you want to quickly query your data it may be faster to load the messages into a SQL database so you can write SQL queries. 162 | 163 | First create a table with the following SQL: 164 | 165 | ```sql 166 | CREATE TABLE `messages` ( 167 | `id` int(11) unsigned NOT NULL AUTO_INCREMENT, 168 | `timestamp` int(11) DEFAULT NULL, 169 | `date` date DEFAULT NULL, 170 | `time` time DEFAULT NULL, 171 | `from` varchar(255) DEFAULT NULL, 172 | `from_name` varchar(255) DEFAULT NULL, 173 | `to` varchar(255) DEFAULT NULL, 174 | `to_name` varchar(255) DEFAULT NULL, 175 | `message` text CHARACTER SET utf8mb4, 176 | `num_emoji` int(11) DEFAULT NULL, 177 | `num_attachments` int(11) DEFAULT NULL, 178 | PRIMARY KEY (`id`), 179 | KEY `from,to` (`from`,`to`), 180 | KEY `timestamp` (`timestamp`), 181 | KEY `date` (`date`) 182 | ) ENGINE=InnoDB DEFAULT CHARSET=utf8; 183 | ``` 184 | 185 | To import into the database, first make sure you copy `config-db.sample.php` to `config-db.php` and define your own database credentials. Then run: 186 | 187 | ``` 188 | php export-sql.php 189 | ``` 190 | 191 | Here are some example queries you can run! 192 | 193 | 194 | ### Who sends me the most messages in the past year? 195 | 196 | ```sql 197 | SELECT from_name, COUNT(1) AS num 198 | FROM messages 199 | WHERE date > "2013-07-07" 200 | AND date < "2014-07-07" 201 | GROUP BY from_name 202 | ORDER BY COUNT(1) DESC 203 | ``` 204 | 205 | (The top result will be you, so just ignore that) 206 | 207 | ### Who do I send the most messages to? 208 | 209 | ```sql 210 | SELECT to_name, COUNT(1) AS num 211 | FROM messages 212 | WHERE date > "2013-07-07" 213 | AND date < "2014-07-07" 214 | GROUP BY to_name 215 | ORDER BY COUNT(1) DESC 216 | ``` 217 | 218 | ### Most contacted people in the past year 219 | 220 | ```sql 221 | SELECT IF(from_name="Aaron Parecki", to_name, from_name) AS name, COUNT(1) AS num 222 | FROM messages 223 | WHERE date > "2013-07-07" 224 | AND date < "2014-07-07" 225 | GROUP BY IF(from_name="Aaron Parecki", to_name, from_name) 226 | ORDER BY COUNT(1) DESC; 227 | ``` 228 | 229 | Obviously you should replace my name with yours. This will count both sent and received messages. 230 | 231 | 232 | ### Number of messages sent and received per day 233 | 234 | ```sql 235 | SELECT date, COUNT(1) AS num 236 | FROM messages 237 | GROUP BY date 238 | ``` 239 | 240 | ### Days with the most messages sent in the past year 241 | 242 | ```sql 243 | SELECT date, COUNT(1) AS num 244 | FROM messages 245 | WHERE date > "2013-07-07" 246 | AND date < "2014-07-07" 247 | GROUP BY date 248 | ORDER BY COUNT(1) DESC 249 | ``` 250 | 251 | ### Number of messages per month 252 | 253 | ```sql 254 | SELECT date, COUNT(1) AS num 255 | FROM messages 256 | WHERE date > "2013-07-07" 257 | AND date < "2014-07-07" 258 | GROUP BY YEAR(date), MONTH(date) 259 | ORDER BY date DESC 260 | ``` 261 | 262 | ### Number of emoji used per month 263 | 264 | ```sql 265 | SELECT date, SUM(num_emoji) AS num 266 | FROM messages 267 | WHERE date > "2013-07-07" 268 | AND date < "2014-07-07" 269 | GROUP BY YEAR(date), MONTH(date) 270 | ORDER BY date DESC 271 | ``` 272 | 273 | ### Who sent me the most emoji in the past year? 274 | 275 | ```sql 276 | SELECT from_name, SUM(num_emoji) AS num 277 | FROM messages 278 | WHERE date > "2013-07-07" 279 | AND date < "2014-07-07" 280 | AND num_emoji > 0 281 | GROUP BY from_name 282 | ORDER BY SUM(num_emoji) DESC 283 | ``` 284 | 285 | ### Who did I send the most emoji to in the past year? 286 | 287 | ```sql 288 | SELECT to_name, SUM(num_emoji) AS num 289 | FROM messages 290 | WHERE date > "2013-07-07" 291 | AND date < "2014-07-07" 292 | AND num_emoji > 0 293 | GROUP BY to_name 294 | ORDER BY SUM(num_emoji) DESC 295 | ``` 296 | 297 | ### Do you send or receive more emoji? 298 | 299 | ```sql 300 | SELECT * FROM 301 | (SELECT "received" AS type, SUM(num_emoji) AS num 302 | FROM messages 303 | WHERE to_name = "Aaron Parecki") AS received 304 | UNION 305 | (SELECT "sent" AS type, SUM(num_emoji) AS num 306 | FROM messages 307 | WHERE from_name = "Aaron Parecki") 308 | ``` 309 | 310 | ### What hour is most active? 311 | 312 | ```sql 313 | SELECT HOUR(DATE_ADD(time, INTERVAL 24-7 HOUR)) % 24 AS local_hour, COUNT(1) AS num 314 | FROM messages 315 | GROUP BY HOUR(time) 316 | ORDER BY time 317 | ``` 318 | 319 | Change the -7 to your local timezone offset. Also this ignores DST so that could use some work. 320 | 321 | --------------------------------------------------------------------------------