├── LICENSE ├── README.md └── mailtojson.py /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2013 Newsman App - www.newsmanapp.com 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy of 6 | this software and associated documentation files (the "Software"), to deal in 7 | the Software without restriction, including without limitation the rights to 8 | use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of 9 | the Software, and to permit persons to whom the Software is furnished to do so, 10 | subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS 17 | FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR 18 | COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER 19 | IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 20 | CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 21 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # MailToJson 2 | 3 | Quick script to parse incoming mail and do a post with the content as JSON data. 4 | 5 | ## How to use 6 | 7 | The work flow is quite simple. The script reads the mail mime message from STDIN, parses 8 | the data and makes a POST call with RAW JSON to the url (passed as command line argument). 9 | 10 | Example usage (command line): 11 | ```bash 12 | cat mail.eml | python mailtojson.py -u https://dev.url/autoreply/handle.php 13 | ``` 14 | 15 | Example usage (postfix aliases): 16 | ```bash 17 | mailtojson_autoreply: "|/nethosting/mailtojson/mailtojson.py -u https://dev.url/autoreply/handle.php" 18 | ``` 19 | 20 | ## JSON Format 21 | 22 | ```yaml 23 | json: 24 | headers: 25 | header_key1: value 26 | header_key2: value 27 | subject: "The email subject as utf-8 string" 28 | datetime: "2015-03-17 17:48:06" 29 | encoding: "utf-8" 30 | from: 31 | - { name: "Sender Name", email: "sender@email.com" } 32 | to: 33 | - { name: "Recipient Name", email: "recpient@email.com" } 34 | - { name: "Recipient Name 2", email: "recpient2@email.com" } 35 | cc: 36 | - { name: "Recipient Name", email: "recpient@email.com" } 37 | - { name: "Recipient Name 2", email: "recpient2@email.com" } 38 | parts: 39 | - { content_type: "text/plain", content: "body of this part", "headers": { "header_key1": value, "header_key2": value } } 40 | - { content_type: "text/html", content: "body of this part", "headers": { "header_key1": value, "header_key2": value } } 41 | attachments: 42 | - { filename": "invoice.pdf", content_type: "application/pdf", content: "base64 of binary data" } 43 | - { filename": "invoice2.pdf", content_type: "application/pdf", content: "base64 of binary data" } 44 | ``` 45 | 46 | ### Handling JSON data in PHP 47 | 48 | Here is a quick example of how to parse the JSON data posted by the mail to json script: 49 | 50 | ```php 51 | 58 | ``` 59 | 60 | # License 61 | 62 | This code is released under [MIT license](https://github.com/Newsman/MailToJson/blob/master/LICENSE) by [Newsman App - Smart Email Service Provider](https://www.newsman.app). 63 | -------------------------------------------------------------------------------- /mailtojson.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | ## Open Sourced by - Newsman App www.newsmanapp.com 4 | ## (c) 2013 Newsman App 5 | ## https://github.com/Newsman/MailToJson 6 | 7 | import sys, urllib2, email, re, csv, StringIO, base64, json, datetime, pprint 8 | from optparse import OptionParser 9 | 10 | VERSION = "1.3.1" 11 | 12 | ERROR_NOUSER = 67 13 | ERROR_PERM_DENIED = 77 14 | ERROR_TEMP_FAIL = 75 15 | 16 | # regular expresion from https://github.com/django/django/blob/master/django/core/validators.py 17 | email_re = re.compile( 18 | r"(^[-!#$%&'*+/=?^_`{}|~0-9A-Z]+(\.[-!#$%&'*+/=?^_`{}|~0-9A-Z]+)*" # dot-atom 19 | # quoted-string, see also http://tools.ietf.org/html/rfc2822#section-3.2.5 20 | r'|^"([\001-\010\013\014\016-\037!#-\[\]-\177]|\\[\001-\011\013\014\016-\177])*"' 21 | r')@((?:[A-Z0-9](?:[A-Z0-9-]{0,61}[A-Z0-9])?\.)+(?:[A-Z]{2,6}\.?|[A-Z0-9-]{2,}\.?)$)' # domain 22 | r'|\[(25[0-5]|2[0-4]\d|[0-1]?\d?\d)(\.(25[0-5]|2[0-4]\d|[0-1]?\d?\d)){3}\]$', re.IGNORECASE) 23 | 24 | email_extract_re = re.compile("<(([.0-9a-z_+-=]+)@(([0-9a-z-]+\.)+[0-9a-z]{2,9}))>", re.M|re.S|re.I) 25 | filename_re = re.compile("filename=\"(.+)\"|filename=([^;\n\r\"\']+)", re.I|re.S) 26 | contenttype_filename_re = re.compile("name=\"(.+)\"|name=([^;\n\r\"\']+)", re.I|re.S) 27 | 28 | begin_tab_re = re.compile("^\t{1,}", re.M) 29 | begin_space_re = re.compile("^\s{1,}", re.M) 30 | 31 | class MailJson: 32 | def __init__(self, content = None): 33 | self.data = {} 34 | self.raw_parts = [] 35 | self.encoding = "utf-8" # output encoding 36 | self.setContent(content) 37 | 38 | def setEncoding(self, encoding): 39 | self.encoding = encoding 40 | 41 | def setContent(self, content): 42 | self.content = content 43 | 44 | def _fixEncodedSubject(self, subject): 45 | if subject is None: 46 | return "" 47 | 48 | subject = "%s" % subject 49 | subject = subject.strip() 50 | 51 | if len(subject) < 2: 52 | # empty string or not encoded string ? 53 | return subject 54 | if subject.find("\n") == -1: 55 | # is on single line 56 | return subject 57 | if subject[0:2] != "=?": 58 | # not encoded 59 | return subject 60 | 61 | subject = subject.replace("\r", "") 62 | subject = begin_tab_re.sub("", subject) 63 | subject = begin_space_re.sub("", subject) 64 | lines = subject.split("\n") 65 | 66 | new_subject = "" 67 | for l in lines: 68 | new_subject = "%s%s" % (new_subject, l) 69 | if l[-1] == "=": 70 | new_subject = "%s\n " % new_subject 71 | 72 | return new_subject 73 | 74 | def _extract_email(self, s): 75 | ret = email_extract_re.findall(s) 76 | if len(ret) < 1: 77 | p = s.split(" ") 78 | for e in p: 79 | e = e.strip() 80 | if email_re.match(e): 81 | return e 82 | 83 | return None 84 | else: 85 | return ret[0][0] 86 | 87 | def _decode_headers(self, v): 88 | if type(v) is not list: 89 | v = [ v ] 90 | 91 | ret = [] 92 | for h in v: 93 | h = email.Header.decode_header(h) 94 | h_ret = [] 95 | for h_decoded in h: 96 | hv = h_decoded[0] 97 | h_encoding = h_decoded[1] 98 | if h_encoding is None: 99 | h_encoding = "ascii" 100 | else: 101 | h_encoding = h_encoding.lower() 102 | 103 | hv = unicode(hv, h_encoding).strip().strip("\t") 104 | 105 | 106 | h_ret.append(hv.encode(self.encoding)) 107 | 108 | ret.append(" ".join(h_ret)) 109 | 110 | return ret 111 | 112 | def _parse_recipients(self, v): 113 | if v is None: 114 | return None 115 | 116 | ret = [] 117 | 118 | # Sometimes a list is passed, which breaks .replace() 119 | if isinstance(v, list): 120 | v = ",".join(v) 121 | v = v.replace("\n", " ").replace("\r", " ").strip() 122 | s = StringIO.StringIO(v) 123 | c = csv.reader(s) 124 | try: 125 | row = c.next() 126 | except StopIteration: 127 | return ret 128 | 129 | for entry in row: 130 | entry = entry.strip() 131 | if email_re.match(entry): 132 | e = entry 133 | entry = "" 134 | else: 135 | e = self._extract_email(entry) 136 | entry = entry.replace("<%s>" % e, "") 137 | entry = entry.strip() 138 | if e and entry.find(e) != -1: 139 | entry = entry.replace(e, "").strip() 140 | 141 | # If all else has failed 142 | if entry and e is None: 143 | e_split = entry.split(" ") 144 | e = e_split[-1].replace("<", "").replace(">","") 145 | entry = " ".join(e_split[:-1]) 146 | 147 | ret.append({"name": entry, "email": e}) 148 | 149 | return ret 150 | 151 | def _parse_date(self, v): 152 | if v is None: 153 | return datetime.datetime.now() 154 | 155 | tt = email.utils.parsedate_tz(v) 156 | 157 | if tt is None: 158 | return datetime.datetime.now() 159 | 160 | timestamp = email.utils.mktime_tz(tt) 161 | date = datetime.datetime.fromtimestamp(timestamp) 162 | return date 163 | 164 | def _get_content_charset(self, part, failobj = None): 165 | """Return the charset parameter of the Content-Type header. 166 | 167 | The returned string is always coerced to lower case. If there is no 168 | Content-Type header, or if that header has no charset parameter, 169 | failobj is returned. 170 | """ 171 | missing = object() 172 | charset = part.get_param("charset", missing) 173 | if charset is missing: 174 | return failobj 175 | if isinstance(charset, tuple): 176 | # RFC 2231 encoded, so decode it, and it better end up as ascii. 177 | pcharset = charset[0] or "us-ascii" 178 | try: 179 | # LookupError will be raised if the charset isn't known to 180 | # Python. UnicodeError will be raised if the encoded text 181 | # contains a character not in the charset. 182 | charset = unicode(charset[2], pcharset).encode("us-ascii") 183 | except (LookupError, UnicodeError): 184 | charset = charset[2] 185 | # charset character must be in us-ascii range 186 | try: 187 | if isinstance(charset, unicode): 188 | charset = charset.encode("us-ascii") 189 | charset = unicode(charset, "us-ascii").encode("us-ascii") 190 | except UnicodeError: 191 | return failobj 192 | # RFC 2046, $4.1.2 says charsets are not case sensitive 193 | return charset.lower() 194 | 195 | def _get_part_headers(self, part): 196 | # raw headers 197 | headers = {} 198 | for k in part.keys(): 199 | k = k.lower() 200 | v = part.get_all(k) 201 | v = self._decode_headers(v) 202 | 203 | if len(v) == 1: 204 | headers[k] = v[0] 205 | else: 206 | headers[k] = v 207 | 208 | return headers 209 | 210 | def parse(self): 211 | self.msg = email.message_from_string(self.content) 212 | 213 | headers = self._get_part_headers(self.msg) 214 | self.data["headers"] = headers 215 | self.data["datetime"] = self._parse_date(headers.get("date", None)).strftime("%Y-%m-%d %H:%M:%S") 216 | self.data["subject"] = self._fixEncodedSubject(headers.get("subject", None)) 217 | self.data["to"] = self._parse_recipients(headers.get("to", None)) 218 | self.data["reply-to"] = self._parse_recipients(headers.get("reply-to", None)) 219 | self.data["from"] = self._parse_recipients(headers.get("from", None)) 220 | self.data["cc"] = self._parse_recipients(headers.get("cc", None)) 221 | 222 | attachments = [] 223 | parts = [] 224 | for part in self.msg.walk(): 225 | if part.is_multipart(): 226 | continue 227 | 228 | content_disposition = part.get("Content-Disposition", None) 229 | if content_disposition: 230 | # we have attachment 231 | filename = "undefined" 232 | 233 | r = filename_re.findall(content_disposition) 234 | if r: 235 | filename = sorted(r[0])[1] 236 | else: 237 | content_type = part.get("Content-Type", None) 238 | if content_type: 239 | r = contenttype_filename_re.findall(content_type) 240 | if r: 241 | filename = sorted(r[0])[1] 242 | 243 | a = { "filename": filename, "content": base64.b64encode(part.get_payload(decode = True)), "content_type": part.get_content_type() } 244 | attachments.append(a) 245 | else: 246 | try: 247 | p = { "content_type": part.get_content_type(), "content": unicode(part.get_payload(decode = 1), self._get_content_charset(part, "utf-8"), "ignore").encode(self.encoding), "headers": self._get_part_headers(part) } 248 | parts.append(p) 249 | self.raw_parts.append(part) 250 | except LookupError: 251 | # Sometimes an encoding isn't recognised - not much to be done 252 | pass 253 | 254 | self.data["attachments"] = attachments 255 | self.data["parts"] = parts 256 | self.data["encoding"] = self.encoding 257 | 258 | return self.get_data() 259 | 260 | def get_data(self): 261 | return self.data 262 | 263 | def get_raw_parts(self): 264 | return self.raw_parts 265 | 266 | if __name__ == "__main__": 267 | usage = "usage: %prog [options]" 268 | parser = OptionParser(usage) 269 | parser.add_option("-u", "--url", dest = "url", action = "store", help = "the url where to post the mail data as json") 270 | parser.add_option("-p", "--print", dest = "do_print", action = "store_true", help = "no json posting, just print the data") 271 | parser.add_option("-d", "--dump", dest = "do_dump", action = "store_true", help = "if present print to output the url post response") 272 | 273 | opt, args = parser.parse_args() 274 | 275 | if not opt.url and not opt.do_print: 276 | print parser.format_help() 277 | sys.exit(1) 278 | 279 | content = sys.stdin.read() 280 | 281 | try: 282 | mj = MailJson(content) 283 | mj.parse() 284 | data = mj.get_data() 285 | 286 | if opt.do_print: 287 | print(json.dumps(data, encoding = data.get("encoding"))) 288 | else: 289 | headers = { "Content-Type": "application/json; charset=%s" % data.get("encoding"), "User-Agent": "NewsmanApp/MailToJson %s - https://github.com/Newsman/MailToJson" % VERSION } 290 | req = urllib2.Request(opt.url.replace("\n", "").replace("\r", ""), json.dumps(data, encoding = data.get("encoding")), headers) 291 | resp = urllib2.urlopen(req) 292 | ret = resp.read() 293 | 294 | print "Parsed Mail Data sent to: %s\n" % opt.url 295 | if opt.do_dump: 296 | print ret 297 | except Exception, inst: 298 | print "ERR: %s" % inst 299 | sys.exit(ERROR_TEMP_FAIL) 300 | --------------------------------------------------------------------------------