├── samples ├── clean.txt └── dirty.txt ├── rules └── simple ├── conf └── logging.conf ├── LICENSE ├── bulk ├── __init__.py ├── processors │ ├── __init__.py │ ├── basic.py │ └── yara_processor.py ├── helpers.py ├── proxy.py └── message.py ├── setup.py ├── scripts ├── smtp_server.py ├── get_attachments.py ├── send_email.py └── bulk_proxy.py └── README.md /samples/clean.txt: -------------------------------------------------------------------------------- 1 | This is a clean text file 2 | 3 | -------------------------------------------------------------------------------- /samples/dirty.txt: -------------------------------------------------------------------------------- 1 | This is a dirty text file 2 | 3 | -------------------------------------------------------------------------------- /rules/simple: -------------------------------------------------------------------------------- 1 | rule simple 2 | { 3 | strings: 4 | $a = "dirty" 5 | 6 | condition: 7 | $a 8 | } 9 | -------------------------------------------------------------------------------- /conf/logging.conf: -------------------------------------------------------------------------------- 1 | [loggers] 2 | keys=root,bulk 3 | 4 | [handlers] 5 | keys=console,infoFile 6 | 7 | [formatters] 8 | keys=console,infoFile 9 | 10 | [logger_root] 11 | level=DEBUG 12 | handlers=console,infoFile 13 | 14 | [logger_bulk] 15 | handlers=console,infoFile 16 | level=DEBUG 17 | propagate=0 18 | qualname=bulk 19 | 20 | [handler_console] 21 | class=StreamHandler 22 | level=DEBUG 23 | formatter=console 24 | args=(sys.stdout,) 25 | 26 | [handler_infoFile] 27 | class=FileHandler 28 | level=INFO 29 | formatter=console 30 | args=("logs/bulk.log",) 31 | 32 | [formatter_console] 33 | format=%(name)s: %(asctime)s %(levelname)s %(message)s 34 | datefmt=%Y-%m-%d %H:%M:%S 35 | 36 | [formatter_infoFile] 37 | format=%(name)s: %(asctime)s %(levelname)s %(message)s 38 | datefmt=%Y-%m-%d %H:%M:%S 39 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | Copyright (c) 2014 The MITRE Corporation. All rights reserved. 2 | 3 | Redistribution and use in source and binary forms, with or without 4 | modification, are permitted provided that the following conditions 5 | are met: 6 | 1. Redistributions of source code must retain the above copyright 7 | notice, this list of conditions and the following disclaimer. 8 | 2. Redistributions in binary form must reproduce the above copyright 9 | notice, this list of conditions and the following disclaimer in the 10 | documentation and/or other materials provided with the distribution. 11 | 12 | THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 13 | ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 14 | IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 15 | ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 16 | FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 17 | DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 18 | OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 19 | HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 20 | LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 21 | OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 22 | SUCH DAMAGE. 23 | -------------------------------------------------------------------------------- /bulk/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2014 The MITRE Corporation. All rights reserved. 2 | # 3 | # Redistribution and use in source and binary forms, with or without 4 | # modification, are permitted provided that the following conditions 5 | # are met: 6 | # 1. Redistributions of source code must retain the above copyright 7 | # notice, this list of conditions and the following disclaimer. 8 | # 2. Redistributions in binary form must reproduce the above copyright 9 | # notice, this list of conditions and the following disclaimer in the 10 | # documentation and/or other materials provided with the distribution. 11 | # 12 | # THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 13 | # ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 14 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 15 | # ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 16 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 17 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 18 | # OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 19 | # HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 20 | # LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 21 | # OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 22 | # SUCH DAMAGE. 23 | -------------------------------------------------------------------------------- /bulk/processors/__init__.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2014 The MITRE Corporation. All rights reserved. 2 | # 3 | # Redistribution and use in source and binary forms, with or without 4 | # modification, are permitted provided that the following conditions 5 | # are met: 6 | # 1. Redistributions of source code must retain the above copyright 7 | # notice, this list of conditions and the following disclaimer. 8 | # 2. Redistributions in binary form must reproduce the above copyright 9 | # notice, this list of conditions and the following disclaimer in the 10 | # documentation and/or other materials provided with the distribution. 11 | # 12 | # THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 13 | # ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 14 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 15 | # ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 16 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 17 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 18 | # OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 19 | # HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 20 | # LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 21 | # OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 22 | # SUCH DAMAGE. 23 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | # Copyright (c) 2014 The MITRE Corporation. All rights reserved. 4 | # 5 | # Redistribution and use in source and binary forms, with or without 6 | # modification, are permitted provided that the following conditions 7 | # are met: 8 | # 1. Redistributions of source code must retain the above copyright 9 | # notice, this list of conditions and the following disclaimer. 10 | # 2. Redistributions in binary form must reproduce the above copyright 11 | # notice, this list of conditions and the following disclaimer in the 12 | # documentation and/or other materials provided with the distribution. 13 | # 14 | # THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 15 | # ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 | # ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 18 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20 | # OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 | # HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22 | # LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23 | # OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24 | # SUCH DAMAGE. 25 | 26 | from setuptools import setup 27 | 28 | DESCRIPTION = 'A simple, content-inspecting, SMTP proxy' 29 | 30 | LONG_DESCRIPTION = """ 31 | Bulk is a content-inspecting SMTP proxy. It is designed to 32 | function as an 'advanced content inspector' and inter-operate 33 | in a production environment using Postfix as an MTA. 34 | 35 | Its basic processor depends upon Yara, a malware classification tool. 36 | """ 37 | 38 | setup(name='Bulk', 39 | version='0.1.0', 40 | description=DESCRIPTION, 41 | long_description=LONG_DESCRIPTION, 42 | author='Stephen DiCato', 43 | author_email='sdicato@mitre.org', 44 | url='', 45 | packages=['bulk', 'bulk.processors'], 46 | scripts=['scripts/bulk_proxy.py', 47 | 'scripts/get_attachments.py']) 48 | -------------------------------------------------------------------------------- /bulk/processors/basic.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2014 The MITRE Corporation. All rights reserved. 2 | # 3 | # Redistribution and use in source and binary forms, with or without 4 | # modification, are permitted provided that the following conditions 5 | # are met: 6 | # 1. Redistributions of source code must retain the above copyright 7 | # notice, this list of conditions and the following disclaimer. 8 | # 2. Redistributions in binary form must reproduce the above copyright 9 | # notice, this list of conditions and the following disclaimer in the 10 | # documentation and/or other materials provided with the distribution. 11 | # 12 | # THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 13 | # ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 14 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 15 | # ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 16 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 17 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 18 | # OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 19 | # HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 20 | # LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 21 | # OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 22 | # SUCH DAMAGE. 23 | 24 | import logging 25 | 26 | 27 | class Processor(object): 28 | 29 | def __init__(self, rule_files=None): 30 | """ 31 | Default initializer. 32 | 33 | Sets up logging and rules. 34 | 35 | """ 36 | # Handle logger 37 | self.logger = logging.getLogger('bulk') 38 | self._rule_files = rule_files 39 | 40 | def __str__(self): 41 | """ 42 | Pretty way to print the processor. 43 | """ 44 | s = 'Processor ' + __name__ 45 | if self._rule_files: 46 | s += ' running with rules ' + ' '.join(self._rule_files.values()) 47 | 48 | return s 49 | 50 | def match(self, data): 51 | """ 52 | Processor consumes data and analyzes it for matches against rules. 53 | 54 | Returns True upon match. 55 | 56 | """ 57 | self.logger.info('Always returning true in this basic processor!') 58 | return True 59 | -------------------------------------------------------------------------------- /scripts/smtp_server.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | # Copyright (c) 2014 The MITRE Corporation. All rights reserved. 4 | # 5 | # Redistribution and use in source and binary forms, with or without 6 | # modification, are permitted provided that the following conditions 7 | # are met: 8 | # 1. Redistributions of source code must retain the above copyright 9 | # notice, this list of conditions and the following disclaimer. 10 | # 2. Redistributions in binary form must reproduce the above copyright 11 | # notice, this list of conditions and the following disclaimer in the 12 | # documentation and/or other materials provided with the distribution. 13 | # 14 | # THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 15 | # ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 | # ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 18 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20 | # OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 | # HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22 | # LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23 | # OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24 | # SUCH DAMAGE. 25 | 26 | import smtpd 27 | import email 28 | import asyncore 29 | import argparse 30 | 31 | 32 | class CustomSMTPServer(smtpd.SMTPServer): 33 | """ 34 | A simple SMTP server. 35 | """ 36 | 37 | def process_message(self, peer, mailfrom, rcpttos, data): 38 | """ 39 | Process each message as it arrives 40 | """ 41 | print 'Receiving message from:', peer 42 | print 'Message addressed from:', mailfrom 43 | print 'Message addressed to :', rcpttos 44 | print 'Message length :', len(data) 45 | 46 | filenames = [] 47 | attachments = [] 48 | 49 | msg = email.message_from_string(data) 50 | 51 | for k, v in msg.items(): 52 | print k + " -- " + v 53 | 54 | for part in msg.walk(): 55 | #help(part) 56 | fn = part.get_filename() 57 | 58 | if fn: 59 | filenames.append(fn) 60 | attachments.append(part.get_payload(decode=True)) 61 | 62 | 63 | if __name__ == '__main__': 64 | """ 65 | Main 66 | """ 67 | parser = argparse.ArgumentParser(description='A simple SMTP Server') 68 | 69 | parser.add_argument( 70 | '--bind_address', 71 | default='127.0.0.1', 72 | help='Address to bind to and listen on for incoming mail. \ 73 | Default is 127.0.0.1' 74 | ) 75 | 76 | parser.add_argument( 77 | '--bind_port', 78 | default=1025, 79 | type=int, 80 | help='Port to bind to and to listen on for incoming mail. \ 81 | Default is 1025' 82 | ) 83 | 84 | args = parser.parse_args() 85 | server = CustomSMTPServer((args.bind_address, args.bind_port), None) 86 | 87 | try: 88 | print 'Starting Server' 89 | asyncore.loop() 90 | 91 | except KeyboardInterrupt: 92 | print 'Stopping Server' 93 | -------------------------------------------------------------------------------- /bulk/processors/yara_processor.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2014 The MITRE Corporation. All rights reserved. 2 | # 3 | # Redistribution and use in source and binary forms, with or without 4 | # modification, are permitted provided that the following conditions 5 | # are met: 6 | # 1. Redistributions of source code must retain the above copyright 7 | # notice, this list of conditions and the following disclaimer. 8 | # 2. Redistributions in binary form must reproduce the above copyright 9 | # notice, this list of conditions and the following disclaimer in the 10 | # documentation and/or other materials provided with the distribution. 11 | # 12 | # THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 13 | # ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 14 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 15 | # ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 16 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 17 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 18 | # OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 19 | # HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 20 | # LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 21 | # OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 22 | # SUCH DAMAGE. 23 | 24 | # Standard Imports 25 | import logging 26 | import hashlib 27 | 28 | # Libary Imports 29 | import yara 30 | 31 | 32 | class Processor(object): 33 | """ 34 | A simple wrapper to yara. 35 | 36 | Ideally, this will provide interfaces for updating 37 | rules files, managing detailed logging, and any 38 | extra analysis needs (metrics, reporting, etc)). 39 | """ 40 | 41 | def __init__(self, rule_files): 42 | """ 43 | Default initializer. 44 | 45 | Keyword arguments: 46 | rules -- dictionary of namespaces:/path/to/file 47 | 48 | """ 49 | # Handle logger 50 | self.logger = logging.getLogger('bulk') 51 | 52 | self._rule_files = rule_files 53 | self._rules = None 54 | 55 | # Try to load the rules into yara 56 | try: 57 | self.logger.debug('Loading rules into yara: %s' % self._rule_files) 58 | self._rules = yara.compile(filepaths=self._rule_files) 59 | 60 | except yara.Error as e: 61 | self.logger.error('Cannot find rules file: %s, \ 62 | exiting' % self._rule_files) 63 | raise 64 | 65 | def __str__(self): 66 | """ 67 | Pretty way to print processor. 68 | """ 69 | s = 'Processor ' + __name__ 70 | if self._rule_files: 71 | s += ' running with rules ' + ' '.join(self._rule_files.values()) 72 | 73 | return s 74 | 75 | def match(self, data): 76 | """ 77 | Run yara against a blob of data and True or False 78 | based on whether a match was found. 79 | 80 | Keyword arguments: 81 | data -- blob of data to analyze 82 | 83 | Returns True or False. 84 | 85 | """ 86 | self.logger.debug('Running yara against data') 87 | malicious = self._rules.match(data=data) 88 | md5 = hashlib.md5(data).hexdigest() 89 | if malicious: 90 | for match in malicious: 91 | self.logger.info('Match found; Rule: \'%s\';' 92 | 'Namespace: \'%s\'; MD5: %s' % 93 | (match.rule, match.namespace, md5)) 94 | 95 | return True 96 | 97 | return False 98 | -------------------------------------------------------------------------------- /scripts/get_attachments.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | # Copyright (c) 2014 The MITRE Corporation. All rights reserved. 4 | # 5 | # Redistribution and use in source and binary forms, with or without 6 | # modification, are permitted provided that the following conditions 7 | # are met: 8 | # 1. Redistributions of source code must retain the above copyright 9 | # notice, this list of conditions and the following disclaimer. 10 | # 2. Redistributions in binary form must reproduce the above copyright 11 | # notice, this list of conditions and the following disclaimer in the 12 | # documentation and/or other materials provided with the distribution. 13 | # 14 | # THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 15 | # ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 | # ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 18 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20 | # OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 | # HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22 | # LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23 | # OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24 | # SUCH DAMAGE. 25 | 26 | import argparse 27 | 28 | from bulk import message 29 | from bulk.helpers import * 30 | 31 | 32 | def get_message(filename): 33 | """ 34 | Load message from file. 35 | 36 | Keyword arguments: 37 | filename -- path to message 38 | 39 | """ 40 | try: 41 | with open(filename) as f: 42 | lines = f.readlines() 43 | 44 | except IOError: 45 | print 'Cannot open email file %s, exiting!' % filename 46 | raise 47 | 48 | # Filter a list of strings by removing all strings starting with 'BULKMSG:' 49 | # Then concatenate all the remaining strings into one string 50 | # and return it 51 | return ''.join([line for line in lines if not line.startswith('BULKMSG:')]) 52 | 53 | 54 | def save(name, contents): 55 | """ 56 | Write contents to file. 57 | 58 | Keyword arguments: 59 | name -- file to write to 60 | contents -- contents to write to file 61 | 62 | """ 63 | try: 64 | with open(name, 'wb') as f: 65 | f.write(contents) 66 | 67 | except IOError: 68 | print 'Cannot write file %s to disk, skipping!' % name 69 | 70 | if __name__ == '__main__': 71 | """ 72 | Main 73 | """ 74 | parser = argparse.ArgumentParser(description='A simple tool to pull \ 75 | attachments out of an email') 76 | 77 | parser.add_argument( 78 | '--infile', 79 | type=str, 80 | required=True, 81 | help='Email file to pull attachments out of' 82 | ) 83 | 84 | parser.add_argument( 85 | '--output_path', 86 | default='./', 87 | type=str, 88 | help='Optional path to write attachments to. \ 89 | Default is current directory.' 90 | ) 91 | 92 | args = parser.parse_args() 93 | 94 | print 'Reading email from file %s' % args.infile 95 | msg = message.Message(None, None, None, get_message(args.infile)) 96 | 97 | (names, contents) = msg.get_attachments() 98 | 99 | for i, name in enumerate(names): 100 | print 'Writing attachment %s to disk' % name 101 | save(directory_name(args.output_path) + name, contents[i]) 102 | -------------------------------------------------------------------------------- /bulk/helpers.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2014 The MITRE Corporation. All rights reserved. 2 | # 3 | # Redistribution and use in source and binary forms, with or without 4 | # modification, are permitted provided that the following conditions 5 | # are met: 6 | # 1. Redistributions of source code must retain the above copyright 7 | # notice, this list of conditions and the following disclaimer. 8 | # 2. Redistributions in binary form must reproduce the above copyright 9 | # notice, this list of conditions and the following disclaimer in the 10 | # documentation and/or other materials provided with the distribution. 11 | # 12 | # THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 13 | # ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 14 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 15 | # ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 16 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 17 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 18 | # OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 19 | # HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 20 | # LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 21 | # OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 22 | # SUCH DAMAGE. 23 | 24 | # Standard Imports 25 | import os 26 | import sys 27 | import time 28 | import errno 29 | 30 | 31 | def directory_name(dn): 32 | """ 33 | Appends proper directory path seperator to a directory path. 34 | 35 | Keyword arguments: 36 | dn -- the directory path 37 | 38 | """ 39 | return os.path.normpath(dn) + os.sep 40 | 41 | 42 | def convert_rules(rules): 43 | """ 44 | Converts a list of rule files to a dict of rule files. 45 | 46 | Keyword arguments: 47 | rules -- list of rule files as fully qualified paths 48 | 49 | Argparse returns a list of files, use this to convert it 50 | to a dictionary in the format of: 51 | {'RuleFile0' : '/path/to/first', 'RuleFile1' : '/path/to/second', ... } 52 | 53 | Returns a dictionary of rule files. 54 | 55 | """ 56 | results = {} 57 | for i, fn in enumerate(rules): 58 | results['RuleFile%s' % i] = fn 59 | 60 | return results 61 | 62 | 63 | def create_sub_directories(basedir): 64 | """ 65 | Create the necessary Bulk sub directories inside 66 | of a base directory. 67 | 68 | Keyword arguments: 69 | basedir -- base path to use in creating directories 70 | 71 | """ 72 | # Create the sub directories for the log 73 | for each in ['messages', 'quarantine', 'attachments']: 74 | dir = directory_name(basedir + each) 75 | 76 | try: 77 | os.makedirs(dir) 78 | 79 | except OSError as e: 80 | # If file exists 81 | if e.args[0] == errno.EEXIST: 82 | pass 83 | 84 | else: 85 | raise 86 | 87 | 88 | def build_processor(module_name, rules=None): 89 | """ 90 | Imports a module and instantiates a Processor. 91 | 92 | Keyword arguments: 93 | module_name -- name of the module to import 94 | rules -- dictionary of rule files 95 | 96 | Returns a processor instance. 97 | 98 | """ 99 | try: 100 | mod = __import__(module_name, fromlist=['processors']) 101 | 102 | except ImportError: 103 | print 'Cannot import %s as processor, exiting!' % module_name 104 | sys.exit(1) 105 | 106 | try: 107 | return mod.Processor(rules) 108 | 109 | except AttributeError: 110 | print "Module %s must define a 'Processor' class." \ 111 | "See README" % module_name 112 | sys.exit(1) 113 | 114 | 115 | def timeit(func): 116 | """ 117 | Simple decorator to time functions 118 | """ 119 | def wrapper(*arg): 120 | """ 121 | """ 122 | t1 = time.time() 123 | res = func(*arg) 124 | t2 = time.time() 125 | print '%s took %0.3f ms' % (func.func_name, (t2 - t1) * 1000.0) 126 | return res 127 | 128 | return wrapper 129 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Bulk - A content inspecting SMTP Proxy 2 | 3 | Bulk is a content inspecting SMTP proxy/server. By default, it will 4 | pull attachments from emails and analyze them with Yara. 5 | 6 | ``` 7 | Approved for Public Release; Distribution Unlimited. 13-0510 8 | Copyright: ©2014 The MITRE Corporation. ALL RIGHTS RESERVED. 9 | ``` 10 | 11 | ## Dependencies 12 | 13 | Bulk has been tested with Python 2.7. 14 | Bulk REQUIRES Python 2.7+ due to its use of argparse. Easy 15 | installation of bulk requires setuptools. 16 | There is no desire at this time to add compatibility for 17 | older versions of Python. 18 | 19 | 1. [PCRE](http://www.pcre.org/) 20 | 1. [Yara](http://plusvic.github.io/yara/) 21 | 1. [Yara-Python](https://github.com/plusvic/yara/tree/master/yara-python) 22 | 1. [setuptools](https://pypi.python.org/pypi/setuptools) 23 | 24 | ## Build & Install 25 | 26 | After installing the dependencies, install bulk by: 27 | 28 | ``` 29 | tar xzvf bulk-.tar.gz 30 | cd bulk-version 31 | python setup.py build 32 | sudo python setup.py install 33 | ``` 34 | 35 | Bulk scripts, such as bulk_proxy.py, can be placed in non-default locations 36 | by using `--install-scripts /path/you/want` when calling `python setup.py install`. 37 | This is useful if you managing multiple versions of python on a machine. 38 | 39 | You can test your installation by invoking Python and trying to import bulk. 40 | 41 | ``` 42 | python 43 | Python 2.7.2 (default, Feb 9 2012, 21:50:01) 44 | [GCC 4.2.1 Compatible Apple Clang 3.0 (tags/Apple/clang-211.12)] on darwin 45 | Type "help", "copyright", "credits" or "license" for more information. 46 | >>> import bulk 47 | >>> 48 | ``` 49 | 50 | # Basic Usage 51 | 52 | Bulk is essentially an SMTP proxy, so you can run it as a listening service. 53 | Otherwise, you can daemonize it in your OS's fashion. Bulk comes with scripts 54 | to be run as services. 55 | 56 | To run it as a service (with default options): 57 | 58 | `bulk_proxy.py` 59 | 60 | Or to check the help 61 | 62 | ``` 63 | bulk_proxy.py --help 64 | 65 | usage: bulk_proxy.py [-h] [--bind_address BIND_ADDRESS] 66 | [--bind_port BIND_PORT] [--remote_address REMOTE_ADDRESS] 67 | [--remote_port REMOTE_PORT] 68 | [--base_log_directory BASE_LOG_DIRECTORY] 69 | [--log_all_messages] [--block] [--always_block] 70 | [--save_attachments] [--log_config LOG_CONFIG] 71 | --processor PROCESSORS [PROCESSORS ...] 72 | 73 | A content inspecting mail relay built on smtpd 74 | 75 | optional arguments: 76 | -h, --help show this help message and exit 77 | --bind_address BIND_ADDRESS 78 | Address to bind to and listen on for incoming mail. 79 | Default is 127.0.0.1 80 | --bind_port BIND_PORT 81 | Port to bind to and to listen on for incoming mail. 82 | Default is 1025 83 | --remote_address REMOTE_ADDRESS 84 | Remote address to forward outbound mail. Default is 85 | 127.0.0.1 86 | --remote_port REMOTE_PORT 87 | Remote port to forward outbound mail. Default is 25 88 | --base_log_directory BASE_LOG_DIRECTORY 89 | Directory to write log files, messages, and 90 | attachments. Default is /tmp/bulk/ 91 | --log_all_messages Log all messages to /base_log_directory/messages/ 92 | --block Block mail with quarantined attachments. Default is 93 | False 94 | --always_block Turn the proxy into a server (block all). Default is 95 | false 96 | --save_attachments Experimental: Save all attachments as seperate files. 97 | Default is false. 98 | --log_config LOG_CONFIG 99 | Logging config file. Default is /etc/bulk/logging.conf 100 | 101 | required: 102 | --processor PROCESSORS [PROCESSORS ...] 103 | Choose a processing engine by supplying an import 104 | string as the first positional argument and multiple 105 | rules files as optional following arguments. For 106 | example: --processor bulk.processors.basic 107 | /etc/bulk/rules/simple 108 | ``` 109 | 110 | # Logging 111 | 112 | Logging is accomplished via [Python's logging module](http://docs.python.org/library/logging.html). 113 | 114 | To configure logging for bulk, use a configuration file. You can 115 | see the default configuration file in `conf/logging.conf`. You can 116 | pass a logging.conf file using the `--logging` command line option. 117 | 118 | # Contributing 119 | We love to hear from people using our tools and code. 120 | Feel free to discuss issues on our issue tracker and make pull requests! 121 | -------------------------------------------------------------------------------- /scripts/send_email.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | # Copyright (c) 2014 The MITRE Corporation. All rights reserved. 4 | # 5 | # Redistribution and use in source and binary forms, with or without 6 | # modification, are permitted provided that the following conditions 7 | # are met: 8 | # 1. Redistributions of source code must retain the above copyright 9 | # notice, this list of conditions and the following disclaimer. 10 | # 2. Redistributions in binary form must reproduce the above copyright 11 | # notice, this list of conditions and the following disclaimer in the 12 | # documentation and/or other materials provided with the distribution. 13 | # 14 | # THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 15 | # ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 | # ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 18 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20 | # OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 | # HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22 | # LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23 | # OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24 | # SUCH DAMAGE. 25 | 26 | import os 27 | import sys 28 | import argparse 29 | import smtplib 30 | import email 31 | from email.MIMEMultipart import MIMEMultipart 32 | from email.MIMEBase import MIMEBase 33 | from email.MIMEText import MIMEText 34 | from email.Utils import formatdate 35 | from email import Encoders 36 | 37 | 38 | def send_mail(send_from, send_to, subject, text, 39 | files=[], server="localhost", port=1024): 40 | """ 41 | Send an email. 42 | 43 | Keyword arguments: 44 | send_from -- sender's email address 45 | send_to -- recipient's email address 46 | subject -- email subject 47 | text -- email text body 48 | files -- email attachments 49 | server -- email server to use 50 | port -- server's listening port 51 | 52 | """ 53 | assert type(files) == list 54 | 55 | msg = MIMEMultipart() 56 | msg['From'] = send_from 57 | msg['To'] = send_to 58 | msg['Date'] = formatdate(localtime=True) 59 | msg['Subject'] = subject 60 | 61 | msg.attach(MIMEText(text)) 62 | 63 | for f in files: 64 | part = MIMEBase('application', "octet-stream") 65 | part.set_payload(open(f, "rb").read()) 66 | Encoders.encode_base64(part) 67 | part.add_header('Content-Disposition', 68 | 'attachment; filename="%s"' % os.path.basename(f)) 69 | msg.attach(part) 70 | 71 | smtp = smtplib.SMTP(server, port) 72 | smtp.sendmail(send_from, send_to, msg.as_string()) 73 | smtp.close() 74 | 75 | if __name__ == "__main__": 76 | """ 77 | Main 78 | """ 79 | parser = argparse.ArgumentParser(description='A simple SMTP Server') 80 | 81 | parser.add_argument( 82 | '--server', 83 | default='127.0.0.1', 84 | type=str, 85 | help='Address of remote server. Default is 127.0.0.1' 86 | ) 87 | 88 | parser.add_argument( 89 | '--port', 90 | default=1025, 91 | type=int, 92 | help='Remote port to connect to to send mail. Default is 1025' 93 | ) 94 | 95 | parser.add_argument( 96 | '--to', 97 | default='example@example.com', 98 | type=str, 99 | help='Recipient for email, default is example@example.com' 100 | ) 101 | 102 | parser.add_argument( 103 | '--from', 104 | default='example@example.com', 105 | type=str, 106 | dest='recipient', 107 | help='Sender for email, default is example@example.com' 108 | ) 109 | 110 | parser.add_argument( 111 | '--subject', 112 | default='Test message', 113 | type=str, 114 | help='Subject for message, default is "Test message"' 115 | ) 116 | 117 | parser.add_argument( 118 | '--text', 119 | default='This is a short email', 120 | type=str, 121 | help='Main message body. Default is "This is a short email"' 122 | ) 123 | 124 | parser.add_argument( 125 | '--text_file', 126 | type=str, 127 | dest='textfile', 128 | help='Read email body from a text file' 129 | ) 130 | 131 | parser.add_argument( 132 | '--attachment', 133 | type=str, 134 | help='Path to file to use as attachment' 135 | ) 136 | 137 | args = parser.parse_args() 138 | send_to = email.utils.formataddr(('Recipient', args.to)) 139 | send_from = email.utils.formataddr(('Author', args.recipient)) 140 | 141 | if args.attachment: 142 | files = [args.attachment] 143 | 144 | else: 145 | files = [] 146 | 147 | if args.textfile: 148 | if os.path.isfile(args.textfile): 149 | try: 150 | with open(args.textfile, 'r') as f: 151 | args.text = f.read() 152 | 153 | except IOError as e: 154 | print 'Cannot open and read text file %s, ' \ 155 | 'exiting' % args.textfile 156 | sys.exit(1) 157 | 158 | else: 159 | print 'Cannot find text file %s, exiting' % args.textfile 160 | sys.exit(1) 161 | 162 | print 'Trying to send mail...' 163 | send_mail(send_from, send_to, args.subject, args.text, 164 | files, args.server, args.port) 165 | print 'Mail successfully sent!' 166 | -------------------------------------------------------------------------------- /bulk/proxy.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2014 The MITRE Corporation. All rights reserved. 2 | # 3 | # Redistribution and use in source and binary forms, with or without 4 | # modification, are permitted provided that the following conditions 5 | # are met: 6 | # 1. Redistributions of source code must retain the above copyright 7 | # notice, this list of conditions and the following disclaimer. 8 | # 2. Redistributions in binary form must reproduce the above copyright 9 | # notice, this list of conditions and the following disclaimer in the 10 | # documentation and/or other materials provided with the distribution. 11 | # 12 | # THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 13 | # ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 14 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 15 | # ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 16 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 17 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 18 | # OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 19 | # HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 20 | # LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 21 | # OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 22 | # SUCH DAMAGE. 23 | 24 | # Standard Imports 25 | import smtpd 26 | import logging 27 | import hashlib 28 | 29 | # Bulk Imports 30 | from bulk import message 31 | 32 | 33 | class BulkProxy(smtpd.PureProxy): 34 | """ 35 | A simple message inspecting SMTP proxy. 36 | """ 37 | 38 | def __init__(self, localaddress, remoteaddress, processors, **kwargs): 39 | """ 40 | Default initializer. 41 | 42 | Sets up logging and the processing engines. 43 | 44 | """ 45 | # Get a handle to the bulk logger 46 | self.logger = logging.getLogger('bulk') 47 | # Basic private variables 48 | self._localaddress = localaddress 49 | self._remoteaddress = remoteaddress 50 | # Processors is a handle to the active analysis engine 51 | # Currently only one is supported 52 | self._processors = processors 53 | 54 | # Optional keyword arguments 55 | self._basedir = kwargs.get('base_directory', '/tmp/') 56 | self._block = kwargs.get('block', False) 57 | # This flag changes functionality from a proxy to a server 58 | # I.E. no forwarding messages upstram, ever 59 | self._always_block = kwargs.get('always_block', False) 60 | # Do we want to save attachments? 61 | self._save_attachments = kwargs.get('save_attachments', False) 62 | # Do we want to store ALL messages for later anaylsis? 63 | self._log = kwargs.get('log', False) 64 | # These paths are hardcoded and based off the base directory 65 | self._quarantine_directory = self._basedir + 'quarantine/' 66 | self._message_directory = self._basedir + 'messages/' 67 | self._attachment_directory = self._basedir + 'attachments/' 68 | 69 | # Call the base class 70 | smtpd.PureProxy.__init__(self, localaddress, remoteaddress) 71 | 72 | def process_message(self, peer, mailfrom, rcpttos, data): 73 | """ 74 | process_message is called once per incoming message/email. 75 | 76 | Keyword arguments: 77 | peer -- tuple containing (ipaddr, port) of the client that made the 78 | socket connection to our smtp port. 79 | 80 | mailfrom -- raw address the client claims the message is coming 81 | from. 82 | 83 | rcpttos -- list of raw addresses the client wishes to deliver the 84 | message to. 85 | 86 | data -- string containing the entire full text of the message, 87 | headers (if supplied) and all. It has been `de-transparencied' 88 | according to RFC 821, Section 4.5.2. In other words, a line 89 | containing a `.' followed by other text has had the leading dot 90 | removed. 91 | 92 | Messages are handed over to a 'processor(s)' which uses yara or 93 | another engine to analyze the email attachments. 94 | """ 95 | # Do some logging 96 | self.logger.info('Messaged received; From: %s; To: %s' 97 | % (str(mailfrom), str(rcpttos))) 98 | 99 | msg = message.Message(peer, mailfrom, rcpttos, data) 100 | 101 | # Do we want to log all? Usually no 102 | if self._log: 103 | msg.save(self._message_directory) 104 | 105 | # If we don't want to block emails EVER, then 106 | # we send them along first, then analyze later 107 | if not self._block: 108 | self.deliver_message(peer, mailfrom, rcpttos, data) 109 | 110 | # Pull the attachments out of the message 111 | attachment_names, attachment_contents = msg.get_attachments() 112 | 113 | # Now that we have all the attachments, time to check them 114 | clean = True 115 | for i, filename in enumerate(attachment_names): 116 | md5 = hashlib.md5(attachment_contents[i]).hexdigest() 117 | self.logger.info('Analyzing attachment; From: %s; To: %s; ' \ 118 | 'Name: %s; MD5:%s' % (str(mailfrom), 119 | str(rcpttos), 120 | filename, md5)) 121 | 122 | for processor in self._processors: 123 | malicious = processor.match(attachment_contents[i]) 124 | 125 | if malicious: 126 | clean = False 127 | 128 | # Once looking at all attachments, we can decide to deliver or not 129 | if clean: 130 | self.logger.info('Message clean; From: %s; To: %s' 131 | % (str(mailfrom), str(rcpttos))) 132 | 133 | if self._block: 134 | self.deliver_message(peer, mailfrom, rcpttos, data) 135 | 136 | else: 137 | if self._block: 138 | self.logger.info('Message blocked; From: %s; To: %s' 139 | % (str(mailfrom), str(rcpttos))) 140 | 141 | msg.save(self._quarantine_directory) 142 | 143 | # Do we want to save attachments? 144 | if self._save_attachments: 145 | msg.save_attachments(self._attachment_directory) 146 | 147 | def deliver_message(self, peer, mailfrom, rcpttos, data): 148 | """ 149 | Delivers a message to final destination if allowed 150 | """ 151 | if not self._always_block: 152 | self.logger.info('Sending message; From: %s; To: %s' 153 | % (str(mailfrom), str(rcpttos))) 154 | self._deliver(mailfrom, rcpttos, data) 155 | -------------------------------------------------------------------------------- /bulk/message.py: -------------------------------------------------------------------------------- 1 | # Copyright (c) 2014 The MITRE Corporation. All rights reserved. 2 | # 3 | # Redistribution and use in source and binary forms, with or without 4 | # modification, are permitted provided that the following conditions 5 | # are met: 6 | # 1. Redistributions of source code must retain the above copyright 7 | # notice, this list of conditions and the following disclaimer. 8 | # 2. Redistributions in binary form must reproduce the above copyright 9 | # notice, this list of conditions and the following disclaimer in the 10 | # documentation and/or other materials provided with the distribution. 11 | # 12 | # THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 13 | # ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 14 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 15 | # ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 16 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 17 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 18 | # OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 19 | # HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 20 | # LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 21 | # OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 22 | # SUCH DAMAGE. 23 | 24 | # Standard Imports 25 | import os 26 | import uuid 27 | import email 28 | import datetime 29 | import logging 30 | import hashlib 31 | import pickle 32 | 33 | 34 | class Message(object): 35 | """ 36 | A simple wrapper to access email data. 37 | """ 38 | 39 | def __init__(self, peer, mailfrom, rcpttos, data): 40 | """ 41 | Default initializer 42 | 43 | Sets up logging and parses pieces of the message. 44 | 45 | """ 46 | self.logger = logging.getLogger('bulk') 47 | 48 | self._peer = peer 49 | self._mailfrom = mailfrom 50 | self._rcpttos = rcpttos 51 | self._data = data 52 | 53 | self._parsed_message = email.message_from_string(data) 54 | 55 | # Attachments 56 | self._attachment_names = [] 57 | self._attachment_contents = [] 58 | 59 | def __str__(self): 60 | """ 61 | A nice way to print a message. 62 | """ 63 | s = 'BULKMSG: Received message from : %s\n' % str(self._peer) 64 | s += 'BULKMSG: Message addressed from: %s\n' % str(self._mailfrom) 65 | s += 'BULKMSG: Message addressed to : %s\n' % str(self._rcpttos) 66 | s += 'BULKMSG: Message length : %s\n' % str(len(self._data)) 67 | s += 'BULKMSG: Original message seen below\n' 68 | s += self._data 69 | 70 | return s 71 | 72 | def save(self, location): 73 | """ 74 | Writes the message to a unique file in the 75 | supplied location. 76 | 77 | Keyword arguments: 78 | location -- path to write message to 79 | 80 | """ 81 | fn = self.get_unique_filepath(location) 82 | self.logger.info('Saving message to %s' % fn) 83 | 84 | try: 85 | with open(fn, 'wb') as f: 86 | f.write(str(self)) 87 | 88 | except IOError: 89 | self.logger.error('Cannot write to %s' % fn) 90 | self.logger.error('Ensure the directory exists \ 91 | and permissions are correct') 92 | 93 | def get_unique_filepath(self, basepath): 94 | """ 95 | Given a base directory path, return a unique filename. 96 | 97 | Keyword arguments: 98 | basepath -- base directory path to append to 99 | 100 | """ 101 | # Don't print '<>' when there is no sender 102 | if self._mailfrom == '<>': 103 | print "NORMALIZING FROM" 104 | mailfrom = 'None' 105 | 106 | else: 107 | mailfrom = self._mailfrom 108 | 109 | fn = basepath 110 | fn += '%s' % mailfrom 111 | fn += datetime.datetime.now().strftime('_%Y-%m-%d_%H-%M-%S_') 112 | fn += str(uuid.uuid4()) 113 | 114 | return fn 115 | 116 | def get_attachments(self): 117 | """ 118 | Pull attachments from a message. 119 | 120 | Returns two lists, one of the attachment names 121 | and another of the attachment contents. 122 | 123 | """ 124 | 125 | attachment_names = [] 126 | attachment_contents = [] 127 | 128 | for part in self._parsed_message.walk(): 129 | attachment_name = part.get_filename() 130 | 131 | if attachment_name: 132 | self.logger.info('Found attachment; Name: %s' % attachment_name) 133 | attachment_names.append(attachment_name) 134 | attachment_contents.append(part.get_payload(decode=True)) 135 | 136 | self._attachment_names = attachment_names 137 | self._attachment_contents = attachment_contents 138 | return (attachment_names, attachment_contents) 139 | 140 | def save_attachments(self, location): 141 | """ 142 | Save attachments and report to a directory. 143 | 144 | Keyword arguments: 145 | location -- directory to save attachements to 146 | 147 | Attachments are saved to .file. 148 | Reported info is saved to .report. 149 | """ 150 | self.logger.debug('Saving attachments to disk') 151 | # In case the attachments have not been 152 | # populated, we can try ourselves. 153 | if not self._attachment_names: 154 | self.logger.debug('No attachments found, trying to parse them now') 155 | self.get_attachments() 156 | 157 | for i, filename in enumerate(self._attachment_names): 158 | # Write a report file 159 | md5 = hashlib.md5(self._attachment_contents[i]).hexdigest() 160 | report = location + md5 + '.pkl' 161 | 162 | fn = location + md5 + '.file' 163 | try: 164 | with open(fn, 'wb') as f: 165 | f.write(str(self._attachment_contents[i])) 166 | 167 | except IOError: 168 | self.logger.error('Cannot write to %s' % fn) 169 | self.logger.error('Ensure the directory exists \ 170 | and permissions are correct') 171 | 172 | # Going to pickle this data to disk for now 173 | content = {} 174 | content['name'] = filename 175 | content['size'] = str(os.path.getsize(fn)) 176 | content['from'] = str(self._peer) 177 | content['mailed_from'] = str(self._mailfrom) 178 | content['to'] = str(self._rcpttos) 179 | content['email'] = str(self) 180 | content['attachment'] = fn 181 | content['md5'] = md5 182 | 183 | try: 184 | with open(report, 'wb') as f: 185 | pickle.dump(content, f) 186 | 187 | except IOError: 188 | self.logger.error('Cannot write to %s' % report) 189 | self.logger.error('Ensure the directory exists \ 190 | and permissions are correct') 191 | -------------------------------------------------------------------------------- /scripts/bulk_proxy.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | # Copyright (c) 2014 The MITRE Corporation. All rights reserved. 4 | # 5 | # Redistribution and use in source and binary forms, with or without 6 | # modification, are permitted provided that the following conditions 7 | # are met: 8 | # 1. Redistributions of source code must retain the above copyright 9 | # notice, this list of conditions and the following disclaimer. 10 | # 2. Redistributions in binary form must reproduce the above copyright 11 | # notice, this list of conditions and the following disclaimer in the 12 | # documentation and/or other materials provided with the distribution. 13 | # 14 | # THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 15 | # ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 | # IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 | # ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 18 | # FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 | # DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20 | # OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 | # HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22 | # LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23 | # OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24 | # SUCH DAMAGE. 25 | 26 | # Standard Imports 27 | import re 28 | import os 29 | import errno 30 | import argparse 31 | import asyncore 32 | import logging 33 | import logging.config 34 | 35 | # Bulk Imports 36 | from bulk.proxy import BulkProxy 37 | from bulk.helpers import * 38 | 39 | 40 | class CreateProcessor(argparse.Action): 41 | """ 42 | A custom argparse action. 43 | 44 | Instantiates a processing engine to be used in Bulk 45 | and appends it to the list of actively used processing 46 | engines. 47 | 48 | """ 49 | 50 | def __call__(self, parser, namespace, values, option_string=None): 51 | """ 52 | Instantiate a processing engine and append it to the active set. 53 | """ 54 | module_name = values[0] 55 | rule_files = values[1:] 56 | 57 | # check the rules files 58 | for fn in rule_files: 59 | if os.path.isfile(fn): 60 | try: 61 | with open(fn): 62 | pass 63 | 64 | except IOError: 65 | raise IOError((errno.EACCES, 66 | 'Cannot open and read rules file.', fn)) 67 | 68 | else: 69 | raise IOError((errno.ENOENT, 'Cannot find rules file.', fn)) 70 | 71 | current_processors = getattr(namespace, self.dest) 72 | current_processors.append(build_processor(module_name, 73 | convert_rules(rule_files))) 74 | setattr(namespace, self.dest, current_processors) 75 | 76 | 77 | def setup_logging(config): 78 | """ 79 | Configure logging for Bulk. 80 | 81 | Keyword arguments: 82 | config -- path to logging config file 83 | 84 | Returns a logger. 85 | 86 | """ 87 | done = False 88 | while not done: 89 | try: 90 | logging.config.fileConfig(config) 91 | 92 | except IOError as e: 93 | if e.args[0] == errno.ENOENT and e.filename: 94 | 95 | print "The full path to the log file (%s) does not exist!" \ 96 | " Trying to recover." % e.filename 97 | fp = os.path.dirname(e.filename) 98 | 99 | if not os.path.exists(fp): 100 | os.makedirs(fp) 101 | 102 | else: 103 | print "Failed to setup logging, exiting." 104 | raise 105 | 106 | else: 107 | print "Failed to setup logging," \ 108 | " check permissions on log file." 109 | raise 110 | 111 | except Exception as e: 112 | print "Something went wrong with the logging setup!" 113 | raise 114 | 115 | else: 116 | done = True 117 | 118 | logger = logging.getLogger('bulk') 119 | return logger 120 | 121 | 122 | def validate_arguments(args): 123 | """ 124 | Validate command line arguments. 125 | 126 | Keyword arguments: 127 | args -- a populated argument namespace from argparse 128 | 129 | Returns error messages, or none upon success. 130 | 131 | """ 132 | 133 | # Check the IP addresses are actually IP addresses 134 | # Check the quarantine_directory is exists and is writable 135 | 136 | # Check the IP addresses first 137 | valid_ip = ( 138 | "^(([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])\.){3}" 139 | "([0-9]|[1-9][0-9]|1[0-9]{2}|2[0-4][0-9]|25[0-5])$") 140 | 141 | if not re.match(valid_ip, args.remote_address): 142 | return 'remote_address parameter must be in IPv4 format' \ 143 | ' (Ex. 127.0.0.1)' 144 | 145 | if not re.match(valid_ip, args.bind_address): 146 | return 'bind_address parameter must be in IPv4 format (Ex. 127.0.0.1)' 147 | 148 | # Then check the logging config file 149 | if os.path.isfile(args.log_config): 150 | try: 151 | with open(args.log_config): 152 | pass 153 | 154 | except IOError: 155 | return 'Cannot open and read logging config file "%s",' \ 156 | ' exiting' % args.log_config 157 | 158 | else: 159 | return 'Cannot find the logging config file "%s",' \ 160 | ' exiting' % args.log_config 161 | 162 | # Then check the directories 163 | # filter(None, list) simple returns all non-none entities 164 | for directory in filter(None, [args.base_log_directory]): 165 | if os.path.isdir(directory): 166 | try: 167 | with open(directory + 'test.txt', 'wb') as f: 168 | f.write('Testing write access to "%s"' % directory) 169 | 170 | except IOError: 171 | return 'Cannot write to directory "%s", exiting' % directory 172 | 173 | else: 174 | # If we get here, we know the file wrote, so remove it 175 | os.remove(directory + 'test.txt') 176 | 177 | else: 178 | create_sub_directories(directory) 179 | 180 | # Don't return an error string if we made it here 181 | return None 182 | 183 | 184 | def run(): 185 | """ 186 | Start Bulk. 187 | 188 | Handles all commmand line arguments, logging setup, 189 | and kicking off the network listener. 190 | 191 | """ 192 | parser = argparse.ArgumentParser(description='A content inspecting \ 193 | mail relay built on smtpd') 194 | 195 | parser.add_argument( 196 | '--bind_address', 197 | default='127.0.0.1', 198 | help='Address to bind to and listen on for incoming mail. \ 199 | Default is 127.0.0.1' 200 | ) 201 | 202 | parser.add_argument( 203 | '--bind_port', 204 | default=1025, 205 | type=int, 206 | help='Port to bind to and to listen on for incoming mail. \ 207 | Default is 1025' 208 | ) 209 | 210 | parser.add_argument( 211 | '--remote_address', 212 | default='127.0.0.1', 213 | help='Remote address to forward outbound mail. \ 214 | Default is 127.0.0.1' 215 | ) 216 | 217 | parser.add_argument( 218 | '--remote_port', 219 | default=25, 220 | type=int, 221 | help='Remote port to forward outbound mail. Default is 25' 222 | ) 223 | 224 | # Note that type can be a function 225 | parser.add_argument( 226 | '--base_log_directory', 227 | default='/tmp/bulk/', 228 | type=directory_name, 229 | help='Directory to write log files, messages, and attachments. \ 230 | Default is /tmp/bulk/' 231 | ) 232 | 233 | parser.add_argument( 234 | '--log_all_messages', 235 | action='store_true', 236 | help='Log all messages to /base_log_directory/messages/' 237 | ) 238 | 239 | parser.add_argument( 240 | '--block', 241 | action='store_true', 242 | help='Block mail with quarantined attachments. Default is False' 243 | ) 244 | 245 | parser.add_argument( 246 | '--always_block', 247 | action='store_true', 248 | help='Turn the proxy into a server (block all). Default is false' 249 | ) 250 | 251 | parser.add_argument( 252 | '--save_attachments', 253 | action='store_true', 254 | help='Experimental: Save all attachments as seperate files. \ 255 | Default is false.' 256 | ) 257 | 258 | parser.add_argument( 259 | '--log_config', 260 | default='/etc/bulk/logging.conf', 261 | help='Logging config file. Default is /etc/bulk/logging.conf' 262 | ) 263 | 264 | # add a group to mark certain arguments as required 265 | req = parser.add_argument_group('required') 266 | # the processor arg is the only required argument 267 | req.add_argument( 268 | '--processor', 269 | default=[], 270 | required=True, 271 | nargs='+', 272 | action=CreateProcessor, 273 | dest='processors', 274 | help='Choose a processing engine by supplying an import string as the \ 275 | first positional argument and multiple rules files as optional \ 276 | following arguments. For example: \ 277 | --processor bulk.processors.basic /etc/bulk/rules/simple' 278 | ) 279 | 280 | args = parser.parse_args() 281 | err = validate_arguments(args) 282 | 283 | if err: 284 | raise Exception(err) 285 | 286 | create_sub_directories(args.base_log_directory) 287 | 288 | # Setup logging 289 | logger = setup_logging(args.log_config) 290 | logger.info('Starting Bulk Proxy') 291 | 292 | logger.info('Listening on %s:%s' % 293 | (args.bind_address, args.bind_port)) 294 | 295 | if not args.always_block: 296 | logger.info('Forwarding to %s:%s' % 297 | (args.remote_address, args.remote_port)) 298 | 299 | logger.info('Bulk matches will be logged to %squarantine/' 300 | % args.base_log_directory) 301 | 302 | if args.block: 303 | logger.info('Emails that match a processor rule will be BLOCKED') 304 | 305 | if args.always_block: 306 | logger.info('Bulk set to BLOCK ALL mail') 307 | 308 | if args.log_all_messages: 309 | logger.info('Logging ALL messages to %smessages/' 310 | % args.base_log_directory) 311 | 312 | if args.save_attachments: 313 | logger.info('Saving attachments to %sattachments/' 314 | % args.base_log_directory) 315 | 316 | if args.processors: 317 | for p in args.processors: 318 | logger.info('Bulk using %s' % p) 319 | 320 | server = BulkProxy((args.bind_address, args.bind_port), 321 | (args.remote_address, args.remote_port), 322 | args.processors, 323 | base_directory=args.base_log_directory, 324 | block=args.block, 325 | always_block=args.always_block, 326 | log=args.log_all_messages, 327 | save_attachments=args.save_attachments) 328 | 329 | # Kick off the main process 330 | asyncore.loop() 331 | 332 | 333 | def stop(): 334 | """ 335 | Responsible for safely stopping Bulk. 336 | """ 337 | 338 | logger = logging.getLogger('bulk') 339 | logger.info('Received a keyboard interrupt, stopping Bulk') 340 | 341 | 342 | if __name__ == '__main__': 343 | try: 344 | run() 345 | 346 | except KeyboardInterrupt: 347 | stop() 348 | --------------------------------------------------------------------------------