├── bin ├── __init__.py └── app.py ├── tests ├── __init__.py └── dmarc_parser_tests.py ├── dmarc_parser ├── __init__.py ├── .DS_Store ├── unzip.py └── parse_dmarc.py ├── Procfile ├── .gitignore ├── docs └── TODO.txt ├── requirements.txt ├── README.md ├── setup.py ├── LICENSE.txt └── schema └── schema.sql /bin/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /dmarc_parser/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /Procfile: -------------------------------------------------------------------------------- 1 | web: python bin/app.py 2 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | venv 2 | dist 3 | build 4 | data 5 | config.ini 6 | *.pyc 7 | -------------------------------------------------------------------------------- /docs/TODO.txt: -------------------------------------------------------------------------------- 1 | 0. Use SQLAlchemy 2 | 1. Do not store duplicate reports 3 | 2. Add tests 4 | 3. Add reporting functionality -------------------------------------------------------------------------------- /dmarc_parser/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/thinkingserious/sendgrid-python-dmarc-parser/HEAD/dmarc_parser/.DS_Store -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | Flask==0.9 2 | Jinja2==2.6 3 | Werkzeug==0.8.3 4 | distribute==0.6.24 5 | wsgiref==0.1.2 6 | mysql-python 7 | requests 8 | simplejson 9 | Flask-SQLAlchemy==0.15 10 | configobj 11 | -------------------------------------------------------------------------------- /tests/dmarc_parser_tests.py: -------------------------------------------------------------------------------- 1 | from nose.tools import * 2 | import NAME 3 | 4 | def setup(): 5 | print "SETUP!" 6 | 7 | def teardown(): 8 | print "TEAR DOWN!" 9 | 10 | def test_basic(): 11 | print "I RAN!" 12 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | This code is running on a Heroku/Python/Flask stack using the SendGrid Parse and Web APIs. 2 | 3 | With this code, you can create a DMARC report aggregator parser that can accept and email containing a zipped DMARC aggregate report and store the data received in a DB. 4 | 5 | When completed, you will be able to send out a report analyzing the received DMARC aggregate data. -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | try: 2 | from setuptools import setup 3 | except ImportError: 4 | from distutils.core import setup 5 | 6 | config = { 7 | 'description': 'This is a DMARC report parser that accepts either an XML or zipped file as input at an attachment via email', 8 | 'author': 'Elmer Thomas', 9 | 'url': '', 10 | 'download_url': '', 11 | 'author_email': 'elmer.thomas@sendgrid.com', 12 | 'version': '0.1', 13 | 'install_requires': ['nose', 'Flask', 'Flask-SQLAlchemy', 'Jinja2', 'Werkzeug', 'distribute', 'wsgiref', 'mysql-python', 'requests', 'simplejson', 'configobj'], 14 | 'packages': ['dmarc_parser'], 15 | 'scripts': [], 16 | 'name': 'DMARC Parser' 17 | } 18 | 19 | setup(**config) 20 | -------------------------------------------------------------------------------- /dmarc_parser/unzip.py: -------------------------------------------------------------------------------- 1 | import zipfile 2 | import sys 3 | import os 4 | import os.path 5 | 6 | __author__ = 'Elmer Thomas' 7 | __version__ = '0.1' 8 | 9 | class unzip: 10 | """Unzip a file to a specified output directory""" 11 | def __init__(self, input_filename, dir): 12 | """Attributes needed for file processing 13 | 14 | Keyword arguements: 15 | input_filename -- file that needs to be unzipped 16 | dir -- directory where the file is located 17 | zipped_files -- arroay of the unzipped filenames 18 | """ 19 | self.input_filename = input_filename 20 | self.dir = dir 21 | self.zipped_files = [] 22 | 23 | def extract(self): 24 | """Perform the unzip action""" 25 | unzipped_file = zipfile.ZipFile(self.input_filename) 26 | """Write the file(s) to specified directory""" 27 | for i, name in enumerate(unzipped_file.namelist()): 28 | file = open(os.path.join(self.dir,name), 'wb') 29 | self.zipped_files.append(name) 30 | file.write(unzipped_file.read(name)) 31 | file.flush() 32 | file.close() 33 | 34 | def get_unzipped_filenames(self): 35 | return self.zipped_files 36 | -------------------------------------------------------------------------------- /LICENSE.txt: -------------------------------------------------------------------------------- 1 | Copyright (c) 2012 SendGrid 2 | 3 | Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated 4 | documentation files (the "Software"), to deal in the Software without restriction, including without limitation 5 | the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, 6 | and to permit persons to whom the Software is furnished to do so, subject to the following conditions: 7 | 8 | The above copyright notice and this permission notice shall be included in all copies or substantial portions of 9 | the Software. 10 | 11 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO 12 | THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 13 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF 14 | CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 15 | DEALINGS IN THE SOFTWARE. 16 | -------------------------------------------------------------------------------- /schema/schema.sql: -------------------------------------------------------------------------------- 1 | CREATE TABLE `policy_published` ( 2 | `id` int(11) unsigned NOT NULL AUTO_INCREMENT, 3 | `domain` text, 4 | `aspf` tinytext, 5 | `adkim` tinytext, 6 | `p` text, 7 | `pct` int(3) DEFAULT NULL, 8 | PRIMARY KEY (`id`) 9 | ) ENGINE=InnoDB AUTO_INCREMENT=11243 DEFAULT CHARSET=latin1; 10 | 11 | CREATE TABLE `report_metadata` ( 12 | `id` int(11) unsigned NOT NULL AUTO_INCREMENT, 13 | `organization` text, 14 | `email` text, 15 | `extra_contact_information` text, 16 | `report_id` text, 17 | `date_range_begin` int(11) DEFAULT NULL, 18 | `date_range_end` int(11) DEFAULT NULL, 19 | PRIMARY KEY (`id`) 20 | ) ENGINE=InnoDB AUTO_INCREMENT=11263 DEFAULT CHARSET=latin1; 21 | 22 | CREATE TABLE `records` ( 23 | `id` int(11) unsigned NOT NULL AUTO_INCREMENT, 24 | `source_ip` char(15) DEFAULT NULL, 25 | `count` int(11) DEFAULT NULL, 26 | `disposition` char(11) DEFAULT NULL, 27 | `dkim` char(11) DEFAULT NULL, 28 | `spf` char(11) DEFAULT NULL, 29 | `type` char(20) DEFAULT NULL, 30 | `comment` text, 31 | `header_from` char(255) DEFAULT NULL, 32 | `dkim_domain` char(255) DEFAULT NULL, 33 | `dkim_result` char(11) DEFAULT NULL, 34 | `dkim_hresult` char(255) DEFAULT NULL, 35 | `spf_domain` char(255) DEFAULT NULL, 36 | `spf_result` char(11) DEFAULT NULL, 37 | `metadata_fk` int(11) unsigned NOT NULL, 38 | `published_fk` int(11) unsigned NOT NULL, 39 | PRIMARY KEY (`id`), 40 | KEY `report_metadata_fk` (`metadata_fk`), 41 | KEY `policy_published_fk` (`published_fk`), 42 | CONSTRAINT `policy_published_fk` FOREIGN KEY (`published_fk`) REFERENCES `policy_published` (`id`), 43 | CONSTRAINT `report_metadata_fk` FOREIGN KEY (`metadata_fk`) REFERENCES `report_metadata` (`id`) 44 | ) ENGINE=InnoDB AUTO_INCREMENT=6575763 DEFAULT CHARSET=latin1; -------------------------------------------------------------------------------- /bin/app.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/python 2 | 3 | import os 4 | import sys 5 | """Allow imports from the dmarc_parser directory""" 6 | sys.path.append('./dmarc_parser') 7 | import parse_dmarc 8 | from parse_dmarc import parse_dmarc 9 | import requests 10 | import simplejson 11 | import unzip 12 | from unzip import unzip 13 | from flask import Flask, Response, request 14 | from flaskext.sqlalchemy import SQLAlchemy 15 | from sqlalchemy import create_engine, MetaData, Table 16 | from sqlalchemy.orm import mapper, sessionmaker 17 | import requests 18 | from configobj import ConfigObj 19 | 20 | __author__ = 'Elmer Thomas' 21 | __version__ = '0.1' 22 | 23 | app = Flask(__name__) 24 | config = ConfigObj('./config.ini') 25 | app.config['SQLALCHEMY_DATABASE_URI'] = config['mysql_db_url'] 26 | db = SQLAlchemy(app) 27 | 28 | """Default route""" 29 | @app.route('/') 30 | def default(): 31 | return "Welcome to the SendGrid.com DMARC parser." 32 | 33 | """Unzip an emailed DMARC report, parse it, store in DB and if applicable return a report""" 34 | def process(file): 35 | """Attributes needed for the DMARC processing 36 | 37 | Keyword arguements: 38 | file -- location of the DMARC file that has been already received and saved on the local server 39 | """ 40 | 41 | """Unzip the first attached DMARC report""" 42 | app = unzip(file, "./") 43 | app.extract() 44 | unzipped_filenames = app.get_unzipped_filenames() 45 | 46 | """Parse the DMARC records""" 47 | app = parse_dmarc(unzipped_filenames[0], config) 48 | app.parser() 49 | app.get_report_metadata() 50 | app.get_policy_published() 51 | app.get_records() 52 | 53 | """Convert MySQL tables to Python objects -- WORK IN PROGRESS""" 54 | def load_session(): 55 | """""" 56 | engine = create_engine(app.config['SQLALCHEMY_DATABASE_URI'], echo=True) 57 | metadata = MetaData(engine) 58 | sg_policy_published = Table('policy_published', metadata, autoload=True) 59 | mapper(Policy_Published, sg_policy_published) 60 | sg_records = Table('records', metadata, autoload=True) 61 | mapper(Records, sg_records) 62 | sg_report_metadata = Table('report_metadata', metadata, autoload=True) 63 | mapper(Report_Metadata, sg_report_metadata) 64 | Session = sessionmaker(bind=engine) 65 | session = Session() 66 | return session 67 | 68 | """Process an incoming request via email using SendGrid.com's Parse API""" 69 | @app.route('/parse', methods=('GET', 'POST')) 70 | def sendgrid_parser(): 71 | if request.method == 'POST': 72 | """Required response to SendGrid.com's Parse API""" 73 | print "HTTP/1.1 200 OK" 74 | print 75 | 76 | """Parse the incoming email using SendGrid's Parse API and identify the DMARC report attachement""" 77 | envelope = simplejson.loads(request.form.get('envelope')) 78 | to_address = envelope['to'][0] 79 | from_address = envelope['from'] 80 | text = request.form.get('text') 81 | subject = request.form.get('subject') 82 | num_attachments = int(request.form.get('attachments', 0)) 83 | 84 | if num_attachments == 1: 85 | """Grab the attachment and process it""" 86 | attachment = request.files.get('attachment1') 87 | attachment.save("./data/tmp.zip") 88 | process("./data/tmp.zip") 89 | 90 | if to_address != config['slurp_email']: 91 | """ Take the domain in the subject line to find DMARC reported issues """ 92 | domain = subject 93 | """ Send the results via email using SendGrid.com's REST API: http://docs.sendgrid.com/documentation/api/web-api/""" 94 | payload = {'to': config['report_receiver_email'], 'from': config['report_sending_email'], 'subject': 'DMARC Results sent via SendGrid.com', 'text': from_address, 'html': from_address, 'api_user': config['sendgrid_api_user'], 'api_key': config['sendgrid_api_key']} 95 | r = requests.get("http://sendgrid.com/api/mail.send.json", params=payload) 96 | 97 | return "HTTP/1.1 200 OK" 98 | 99 | if __name__ == "__main__": 100 | """ Bind to PORT if defined, otherwise default to 5000 via Flask """ 101 | port = int(os.environ.get('PORT', 5000)) 102 | """ Turn this flag to False when in production """ 103 | app.debug = True 104 | app.run(host='0.0.0.0', port=port) -------------------------------------------------------------------------------- /dmarc_parser/parse_dmarc.py: -------------------------------------------------------------------------------- 1 | import time 2 | import MySQLdb as mysql 3 | import xml.etree.ElementTree as ET 4 | 5 | __author__ = 'Elmer Thomas' 6 | __version__ = '0.1' 7 | 8 | class parse_dmarc: 9 | """Parse a DMARC XML formatted report""" 10 | def __init__(self, input_filename, config): 11 | """Attributes needed for file processing 12 | 13 | Keyword arguements: 14 | input_filename -- file that needs to be parsed 15 | dir -- directory where the parsed file will be stored 16 | """ 17 | self.input_filename = input_filename 18 | self.db = mysql.connect(config['mysql_host'], config['mysql_user'], config['mysql_pass'], config['mysql_db']) 19 | self.cursor = self.db.cursor() 20 | """These are foreign keys that link the reports to their metadata and policy information""" 21 | self.metadata_fk = "" 22 | self.policy_fk = "" 23 | 24 | def parser(self): 25 | """Open the file to be Parsed and input the XML into the DOM""" 26 | dom = ET.parse(self.input_filename) 27 | self.doc = dom.getroot() 28 | 29 | def get_report_metadata(self): 30 | """Extract the DMARC metadata as defined here: 31 | http://www.dmarc.org/draft-dmarc-base-00-02.txt in Appendix C 32 | If no data is found, return NA 33 | """ 34 | orgName = self.doc.findtext("report_metadata/org_name", default="NA") 35 | email = self.doc.findtext("report_metadata/email", default="NA") 36 | extraContactInfo = self.doc.findtext("report_metadata/extra_contact_info", default="NA") 37 | reportID = self.doc.findtext("report_metadata/report_id", default="NA") 38 | dateRangeBegin = self.doc.findtext("report_metadata/date_range/begin", default="NA") 39 | dateRangeBegin = int(dateRangeBegin) 40 | dateRangeEnd = self.doc.findtext("report_metadata/date_range/end", default="NA") 41 | dateRangeEnd = int(dateRangeEnd) 42 | 43 | """Insert report metadata""" 44 | sql = """INSERT INTO report_metadata(organization, email, extra_contact_information, report_id, date_range_begin, date_range_end) 45 | VALUES("%s", "%s", "%s", "%s", %d, %d)""" %(orgName, email, extraContactInfo, reportID, dateRangeBegin, dateRangeEnd) 46 | try: 47 | self.cursor.execute(sql) 48 | self.db.commit() 49 | except: 50 | self.db.rollback() 51 | 52 | self.metadata_fk = self.cursor.lastrowid 53 | 54 | def get_policy_published(self): 55 | """Extract the DMARC policy published information as defined here: 56 | http://www.dmarc.org/draft-dmarc-base-00-02.txt in Section 6.2 57 | If no data is found, return NA 58 | """ 59 | domain = self.doc.findtext("policy_published/domain", default="NA") 60 | adkim = self.doc.findtext("policy_published/adkim", default="NA") 61 | aspf = self.doc.findtext("policy_published/aspf", default="NA") 62 | p = self.doc.findtext("policy_published/p", default="NA") 63 | pct = self.doc.findtext("policy_published/pct", default="NA") 64 | pct = int(pct) 65 | sql = """INSERT INTO policy_published(domain, adkim, aspf, p, pct) 66 | VALUES("%s", "%s", "%s", "%s", %d)""" %(domain, adkim, aspf, p, pct) 67 | try: 68 | self.cursor.execute(sql) 69 | self.db.commit() 70 | except: 71 | self.db.rollback() 72 | 73 | self.policy_fk = self.cursor.lastrowid 74 | 75 | def get_records(self): 76 | """Extract the DMARC records as defined here: 77 | http://www.dmarc.org/draft-dmarc-base-00-02.txt in Appendix C 78 | If no data is found, return NA 79 | """ 80 | container = self.doc.findall("record") 81 | for elem in container: 82 | source_ip = elem.findtext("row/source_ip", default="NA") 83 | count = elem.findtext("row/count", default="NA") 84 | count = int(count) 85 | disposition = elem.findtext("row/policy_evaluated/disposition", default="NA") 86 | dkim = elem.findtext("row/policy_evaluated/dkim", default="NA") 87 | spf = elem.findtext("row/policy_evaluated/spf", default="NA") 88 | type = elem.findtext("row/policy_evaluated/reason/type", default="NA") 89 | comment = elem.findtext("row/policy_evaluated/reason/comment", default="NA") 90 | header_from = elem.findtext("identifiers/header_from", default="NA") 91 | dkim_domain = elem.findtext("auth_results/dkim/domain", default="NA") 92 | dkim_result = elem.findtext("auth_results/dkim/result", default="NA") 93 | dkim_hresult = elem.findtext("auth_results/dkim/human_result", default="NA") 94 | spf_domain = elem.findtext("auth_results/spf/domain", default="NA") 95 | spf_result = elem.findtext("auth_results/spf/result", default="NA") 96 | 97 | sql = """INSERT INTO records(source_ip, count, disposition, dkim, spf, type, comment, header_from, dkim_domain, dkim_result, dkim_hresult, spf_domain, spf_result, metadata_fk, published_fk) 98 | VALUES("%s", %d, "%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s", "%s", %d, %d)""" %(source_ip, count, disposition, dkim, spf, type, comment, header_from, dkim_domain, dkim_result, dkim_hresult, spf_domain, spf_result, self.metadata_fk, self.policy_fk) 99 | try: 100 | self.cursor.execute(sql) 101 | self.db.commit() 102 | except: 103 | self.db.rollback() 104 | 105 | self.db.close() --------------------------------------------------------------------------------