├── README.md ├── conf ├── keywords.cfg ├── reactor.cfg └── sources.cfg ├── console.py ├── data └── placeholder ├── docs ├── howto ├── install ├── license ├── notes └── readme ├── launch.py ├── lib ├── __init__.py ├── __init__.pyc ├── dispatch.py ├── exploits.py ├── facebook.py ├── kippo.py ├── knownbad.py ├── otx.py ├── pastebin.py ├── pastebin.pyc ├── reactor.py ├── reactor.pyc └── reddit.py └── standalone ├── otx-esm.py └── pb_scrape.py /README.md: -------------------------------------------------------------------------------- 1 | [![SayThanks](https://img.shields.io/badge/Say%20Thanks-!-1EAEDB.svg?style=flat)](https://saythanks.io/to/deadbits) [![Donate](https://img.shields.io/badge/donate-BTC-blue.svg?style=flat)](https://www.coinbase.com/deadbits) 2 | 3 | # ArcReactor 4 | ArcReactor is a free, open-source application used to collect OSINT (Open-Source Intelligence) data 5 | and send this data to a SIEM, such as ArcSight ESM or Splunk. This application is meant to be used by 6 | security analysts and/or engineers who want to expand and compliment their event correlation, attack 7 | prediction and overall threat insight by making use of open-source sources such as known bad host lists, 8 | honeypot information, exploit and vulnerability trackers, and social media. 9 | 10 | **Full documentation is available in the `docs` directory.** 11 | 12 | ## Import Update 13 | This project is no longer maintained and has not been updated in many years. 14 | When it was originally created, there was a huge lack of these types of automated collection systems but now that gap has been filled by others who have done great work (_cough_ IntelMQ _cough_). 15 | I hope that in some small way this project, among others, started the path to the development of these newer and more robust OSINT collection systems. I want to thank everyone who has shown interest in this project over the years. Forks, stars, and watches are a big motivation to continue development on similiar projects, and open source development in general. 16 | 17 | Thank you! :) 18 | -------------------------------------------------------------------------------- /conf/keywords.cfg: -------------------------------------------------------------------------------- 1 | ##################################################### 2 | # Keywords used within social media modules # 3 | ##################################################### 4 | # 5 | # This list should contain specific keywords you wish 6 | # to keep an eye out for. Try to make this list as 7 | # specific to your needs as possible. Using words like 8 | # 'password' or 'hack' will turn up dozens and dozens 9 | # of results, and most likely none of them will pertain 10 | # to you or your company. 11 | # This is also good for finding information that might 12 | # have been inadvertenly leaked by employees. Often times 13 | # employees will use public services like pastebin, stack- 14 | # exchange, etc to trouble-shoot problems and in the process 15 | # publish internal or confidential information 16 | # Try things like: '@corp-domain.com', internal hostname schemes, 17 | # intranet domain names, etc. 18 | # 19 | # The keywords below are just some standard examples. Change em! 20 | # 21 | 22 | Anonymous 23 | antisec 24 | data leak 25 | sql injection 26 | password 27 | hacked 28 | sql dump 29 | zeroday 30 | exploit 31 | 32 | -------------------------------------------------------------------------------- /conf/reactor.cfg: -------------------------------------------------------------------------------- 1 | 2 | ########################################## 3 | # SIEM / Syslog receiver configuration # 4 | ########################################## 5 | # 6 | # Define settings for your SIEM connector here. 7 | # Host and port are obvious. The 'name' option 8 | # is used as an identifier only and serves no 9 | # purpose other than keeping track of whats being 10 | # sent where. eventually this will be expanded to 11 | # allow for multiple connectors and sending collection 12 | # module results to specific connectors. For now, we 13 | # only have the one. 14 | # 15 | 16 | [syslog] 17 | name = ArcSight 18 | host = 192.168.56.101 19 | port = 7771 20 | #max = 1500 ; max events per second before connector starts dropping 21 | -------------------------------------------------------------------------------- /conf/sources.cfg: -------------------------------------------------------------------------------- 1 | # The majority of this list is from the arcosi / badharvest script. 2 | # I had a much larger list that I built upon but I accidently deleted it :( 3 | # I haven't checked all of these sources in some time so a few might 4 | # be dead or moved at this point. I will be expanding on this list in 5 | # the very near future. It will be split up into domains, ips, proxies, 6 | # tor nodes and 'other' 7 | # for now, they are all classified as 'known malicious host' during the 8 | # syslog events. 9 | # 10 | 11 | http://www.mtc.sri.com/live_data/attackers/ 12 | http://isc.sans.edu/reports.html 13 | https://zeustracker.abuse.ch/blocklist.php?download=ipblocklist 14 | https://spyeyetracker.abuse.ch/blocklist.php?download=ipblocklist 15 | http://www.projecthoneypot.org/list_of_ips.php 16 | http://intel.martincyber.com/ip 17 | https://www.openbl.org/lists/base.txt 18 | http://www.blocklist.de/lists/ssh.txt 19 | https://palevotracker.abuse.ch/ 20 | http://www.malwaregroup.com/ipaddresses 21 | http://www.ciarmy.com/list/ci-badguys.txt 22 | http://rules.emergingthreats.net/blockrules/rbn-malvertisers-ips.txt 23 | https://secure.mayhemiclabs.com/malhosts/malhosts.txt 24 | https://zeustracker.abuse.ch/blocklist.php?download=domainblocklist 25 | https://spyeyetracker.abuse.ch/blocklist.php?download=domainblocklist 26 | http://mirror1.malwaredomains.com/files/BOOT 27 | http://www.malwaredomainlist.com/hostslist/hosts.txt 28 | http://www.malware.com.br/cgi/submit?action=list 29 | http://www.malwarepatrol.net/cgi/submit?action=list_xml 30 | -------------------------------------------------------------------------------- /console.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # 3 | # part of ArcReactor [version 1.0] 4 | # https://github.com/ohdae/arcreactor 5 | # 6 | # Interactive console. 7 | # this module handles all interactive console sessions 8 | # here we provide the basic console functionality such 9 | # as command input & output, editing configuration files, 10 | # preliminary command verification and sending commands 11 | # to the dispatch module 12 | # 13 | # TODO: 14 | # - config edit commands (configparser) 15 | # - fix keyword/source loading issues 16 | # - rewrite cmd verification and dispatch interaction 17 | # - create better environment check before starting new session 18 | 19 | from lib import reactor 20 | from lib import dispatch 21 | import os, sys 22 | import readline 23 | import signal 24 | import commands 25 | 26 | readline.parse_and_bind('tab: complete') 27 | keywords = [] 28 | sources = [] 29 | options = {} 30 | prompt = 'reactor >> ' 31 | help = { 32 | 'general': { 33 | 'help': 'display this menu', 34 | 'quit': 'exit the console', 35 | 'about': 'display about dialog', 36 | 'exec': 'execute os command', 37 | 'modules': 'show description of all modules', 38 | 'keywords': 'show current watchlist keywords' 39 | }, 40 | 'configuration': { 41 | 'cfg syslog': 'manage siem and syslog settings', 42 | 'cfg sources': 'manage external sources', 43 | 'cfg keywords': 'manage watchlist keywords' 44 | }, 45 | 'statistics': { 46 | 'info tasks': 'view stats on running and queued jobs', 47 | 'info reactor': 'view general ArcReactor stats', 48 | 'data ': 'view information on data collected by module' 49 | }, 50 | 'collection': { 51 | 'start all': 'start all collection modules', 52 | #'stop all': 'stop all running collection modules', 53 | 'start ': 'launch selected module', 54 | #'stop ': 'stop selected module', 55 | 'dashboard': 'start the web dashboard [experimental]' 56 | } 57 | } 58 | 59 | 60 | class Completer: 61 | def __init__(self): 62 | self.words = [ 'help', 'quit', 'exit', 'about', 'clear', 'cfg', 'sources', 'keywords', 'syslog', 'modules', 63 | 'start', 'stop', 'all', 'info', 'data', 'reactor', 'tasks', 'dashboard', 'pastebin', 'otx', 'exploits', 'twitter' ] 64 | self.prefix = '' 65 | 66 | def complete(self, prefix, index): 67 | if prefix != self.prefix: 68 | self.matching_words = [w for w in cmds if w.startswith(prefix)] 69 | self.prefix = prefix 70 | else: pass 71 | try: 72 | return self.matching_words[index] 73 | except IndexError: 74 | return None 75 | 76 | class Session(object): 77 | def __init__(self): 78 | signal.signal(signal.SIGINT, reactor.signal_handler) 79 | 80 | def new(self): 81 | """ 82 | Initializes a new console session 83 | 84 | Perform some simple environment checks to ensure that we can properly 85 | start a new interactive session, load needed configuration files and if 86 | these pass, we start our console. 87 | 88 | """ 89 | reactor.status('info', 'arcreactor', 'initializing new console session') 90 | reactor.status('info', 'arcreactor', 'loading configuration files') 91 | keywords = reactor.load_keywords(reactor.PATH_CONF+'/keywords.cfg') 92 | sources = reactor.load_sources(reactor.PATH_CONF+'/sources.cfg') 93 | options = reactor.load_config(reactor.PATH_CONF+'/reactor.cfg') 94 | self.console() 95 | 96 | def kill_session(self): 97 | if len(dispatch.job_stats) > 0: 98 | print('[*] %d jobs are still running.' & len(dispatch.job_stats)) 99 | print('are you sure you want to exit?') 100 | self.answer = raw_input('[y/n]') 101 | if self.answer == 'y': 102 | pass 103 | elif self.answer == 'n': 104 | print('[*] returning to ArcReactor console') 105 | self.console() 106 | else: 107 | print('[!] invalid answer. returning to ArcReactor console.') 108 | self.console() 109 | reactor.status('info', 'arcreactor', 'shutting down ArcReactor console') 110 | sys.exit(0) 111 | 112 | def pre_command(self, cmd): 113 | """ 114 | Handle basic functions before we send command to dispatch. 115 | 116 | Several commands do not need to be sent to dispatch to be executed, 117 | so we take care of the more basic/static commands here and only send 118 | the more actionable commands to the dispatch module. 119 | 120 | """ 121 | if cmd == 'quit' or cmd == 'exit': 122 | self.kill_session() 123 | elif cmd == 'help': 124 | print('\n\t general') 125 | for self.c, self.i in help['general'].iteritems(): 126 | print('{0:12} \t {1:26}'.format(self.c, self.i)) 127 | print('\n\t configuration') 128 | for self.c, self.i in help['configuration'].iteritems(): 129 | print('{0:12} \t {1:26}'.format(self.c, self.i)) 130 | print('\n\t statistics') 131 | for self.c, self.i in help['statistics'].iteritems(): 132 | print('{0:12} \t {1:26}'.format(self.c, self.i)) 133 | print('\n\t collection') 134 | for self.c, self.i in help['collection'].iteritems(): 135 | print('{0:12} \t {1:26}'.format(self.c, self.i)) 136 | elif cmd == 'clear': 137 | os.system('clear') 138 | elif cmd.startswith('exec'): 139 | self.exec_output = commands.getoutput(' '.join(cmd.split(' ')[1:])) 140 | print self.exec_output 141 | elif cmd == 'keywords': 142 | if len(keywords) > 0: 143 | print('\nWatch-List Keywords') 144 | for self.word in keywords: 145 | print self.word 146 | else: 147 | print('[*] keyword list is empty') 148 | elif cmd == 'sources': 149 | if len(sources) > 0: 150 | print('\nExternal Sources') 151 | for self.src in sources: 152 | print self.src 153 | else: 154 | print('[*] source list is empty') 155 | elif cmd == 'modules': 156 | print('\nAvailable Collection Modules') 157 | for self.mod_name, self.mod_info in reactor.modules.iteritems(): 158 | print('%s\t\t%s' % (self.mod_name, self.mod_info)) 159 | else: 160 | dispatch.receive(cmd) 161 | 162 | def check_command(self, args): 163 | try: 164 | cmd, arg = args.split(' ') 165 | except: 166 | cmd = args 167 | 168 | for title in help.keys(): 169 | if cmd in help[title].keys(): 170 | return True 171 | return False 172 | 173 | def console(self): 174 | print reactor.ascii 175 | print('Welcome to the ArcReactor console!') 176 | print('type `help` to get started\n') 177 | 178 | while True: 179 | completer = Completer() 180 | readline.set_completer(completer.complete) 181 | cmd = raw_input(prompt) 182 | 183 | if self.check_command(cmd): 184 | self.pre_command(cmd) 185 | 186 | 187 | 188 | 189 | 190 | 191 | 192 | 193 | -------------------------------------------------------------------------------- /data/placeholder: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deadbits/arcreactor/7386585be8de74a1a6842c49e5c2b791bbce7dd2/data/placeholder -------------------------------------------------------------------------------- /docs/howto: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deadbits/arcreactor/7386585be8de74a1a6842c49e5c2b791bbce7dd2/docs/howto -------------------------------------------------------------------------------- /docs/install: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deadbits/arcreactor/7386585be8de74a1a6842c49e5c2b791bbce7dd2/docs/install -------------------------------------------------------------------------------- /docs/license: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deadbits/arcreactor/7386585be8de74a1a6842c49e5c2b791bbce7dd2/docs/license -------------------------------------------------------------------------------- /docs/notes: -------------------------------------------------------------------------------- 1 | Development Notes 2 | ================= 3 | 4 | 5 | Console Commands => 6 | 'help': 'display this menu', 7 | 'quit': 'exit the console', 8 | 'exit': 'exit the console', 9 | 'exec': 'execute os command', 10 | 'about': 'display basic information', 11 | 'clear': 'clears the screen', 12 | 'config sources': 'manage osint sources', 13 | 'config keywords': 'manage your keywords', 14 | 'config syslog': 'manage siem syslog settings', 15 | 'modules': 'list all collection module information', 16 | 'keywords': 'show loaded keywords', 17 | 'start all': 'start all available modules', 18 | 'stop all': 'stop all running modules', 19 | 'start ': 'launch the selected module', 20 | 'stop ': 'stop the selected module', 21 | 'info tasks': 'view stats on running and queued tasks', 22 | 'info reactor': 'view general ArcReactor stats', 23 | 'data ': 'view information on data collected by module', 24 | 'dashboard': 'launch web dashboard [experimental]' 25 | 26 | ===================================================================================== 27 | 28 | def processor(): 29 | if queue.empty() == True: 30 | print "the Queue is empty!" 31 | sys.exit(1) 32 | try: 33 | job = queue.get() 34 | print "I'm operating on job item: %s"%(job) 35 | queue.task_done() 36 | except: 37 | print "Failed to operate on job" 38 | 39 | '''set variables''' 40 | queue = Queue() 41 | threads = 4 42 | 43 | '''a list of job items. you would want this to be more advanced, 44 | like reading from a file or database''' 45 | jobs = [ "job1", "job2", "job3" ] 46 | 47 | ”’iterate over jobs and put each into the queue in sequence”’ 48 | for job in jobs: 49 | print “inserting job into the queue: %s”%(job) 50 | queue.put(job) 51 | 52 | ”’start some threads, each one will process one job from the queue”’ 53 | for i in range(threads): 54 | th = Thread(target=processor) 55 | th.setDaemon(True) 56 | th.start() 57 | 58 | ”’wait until all jobs are processed before quitting”’ 59 | queue.join() 60 | 61 | ========================= 62 | 63 | 64 | def daemonize(no_close=False, pidfile=None): 65 | """ 66 | Convert the calling process into a daemon. To make the current Python 67 | process into a daemon process, you need two lines of code: 68 | 69 | .. python:: 70 | 71 | from grizzled.os import daemonize 72 | daemonize.daemonize() 73 | 74 | If ``daemonize()`` fails for any reason, it throws a ``DaemonError``, 75 | which is a subclass of the standard ``OSError`` exception. also logs debug 76 | messages, using the standard Python ``logging`` package, to channel 77 | "grizzled.os.daemon". 78 | 79 | **Adapted from:** http://software.clapper.org/daemonize/ 80 | 81 | **See Also:** 82 | 83 | - Stevens, W. Richard. *Unix Network Programming* (Addison-Wesley, 1990). 84 | 85 | :Parameters: 86 | no_close : bool 87 | If ``True``, don't close the file descriptors. Useful if the 88 | calling process has already redirected file descriptors to an 89 | output file. **Warning**: Only set this parameter to ``True`` if 90 | you're *sure* there are no open file descriptors to the calling 91 | terminal. Otherwise, you'll risk having the daemon re-acquire a 92 | control terminal, which can cause it to be killed if someone logs 93 | off that terminal. 94 | 95 | pidfile : str 96 | Path to file to which to write daemon's process ID. The string may 97 | contain a ``${pid}`` token, which is replaced with the process ID 98 | of the daemon. e.g.: ``/var/run/myserver-${pid}`` 99 | 100 | :raise DaemonError: Error during daemonizing 101 | """ 102 | log = logging.getLogger('grizzled.os.daemon') 103 | 104 | def __fork(): 105 | try: 106 | return _os.fork() 107 | except OSError, e: 108 | raise DaemonError, ('Cannot fork', e.errno, e.strerror) 109 | 110 | def __redirect_file_descriptors(): 111 | import resource # POSIX resource information 112 | maxfd = resource.getrlimit(resource.RLIMIT_NOFILE)[1] 113 | if maxfd == resource.RLIM_INFINITY: 114 | maxfd = MAXFD 115 | 116 | # Close all file descriptors. 117 | 118 | for fd in range(0, maxfd): 119 | # Only close TTYs. 120 | try: 121 | _os.ttyname(fd) 122 | except: 123 | continue 124 | 125 | try: 126 | _os.close(fd) 127 | except OSError: 128 | # File descriptor wasn't open. Ignore. 129 | pass 130 | 131 | # Redirect standard input, output and error to something safe. 132 | # os.open() is guaranteed to return the lowest available file 133 | # descriptor (0, or standard input). Then, we can dup that 134 | # descriptor for standard output and standard error. 135 | 136 | _os.open(NULL_DEVICE, _os.O_RDWR) 137 | _os.dup2(0, 1) 138 | _os.dup2(0, 2) 139 | 140 | 141 | if _os.name != 'posix': 142 | import errno 143 | raise DaemonError, \ 144 | ('daemonize() is only supported on Posix-compliant systems.', 145 | errno.ENOSYS, _os.strerror(errno.ENOSYS)) 146 | 147 | try: 148 | # Fork once to go into the background. 149 | 150 | log.debug('Forking first child.') 151 | pid = __fork() 152 | if pid != 0: 153 | # Parent. Exit using os._exit(), which doesn't fire any atexit 154 | # functions. 155 | _os._exit(0) 156 | 157 | # First child. Create a new session. os.setsid() creates the session 158 | # and makes this (child) process the process group leader. The process 159 | # is guaranteed not to have a control terminal. 160 | log.debug('Creating new session') 161 | _os.setsid() 162 | 163 | # Fork a second child to ensure that the daemon never reacquires 164 | # a control terminal. 165 | log.debug('Forking second child.') 166 | pid = __fork() 167 | if pid != 0: 168 | # Original child. Exit. 169 | _os._exit(0) 170 | 171 | # This is the second child. Set the umask. 172 | log.debug('Setting umask') 173 | _os.umask(UMASK) 174 | 175 | # Go to a neutral corner (i.e., the primary file system, so 176 | # the daemon doesn't prevent some other file system from being 177 | # unmounted). 178 | log.debug('Changing working directory to "%s"' % WORKDIR) 179 | _os.chdir(WORKDIR) 180 | 181 | # Unless no_close was specified, close all file descriptors. 182 | if not no_close: 183 | log.debug('Redirecting file descriptors') 184 | __redirect_file_descriptors() 185 | 186 | if pidfile: 187 | from string import Template 188 | t = Template(pidfile) 189 | pidfile = t.safe_substitute(pid=str(_os.getpid())) 190 | open(pidfile, 'w').write(str(_os.getpid()) + '\n') 191 | 192 | except DaemonError: 193 | raise 194 | 195 | except OSError, e: 196 | raise DaemonError, ('Unable to daemonize()', e.errno, e.strerror) 197 | 198 | ============================================= 199 | 200 | #!/usr/bin/env python 201 | # encoding: utf-8 202 | """Sending log output to a file and the console at the same time. 203 | """ 204 | 205 | import logging 206 | import logging.handlers 207 | import sys 208 | 209 | # Log verbosely 210 | root_logger = logging.getLogger('') 211 | root_logger.setLevel(logging.DEBUG) 212 | 213 | # Set up console output to stderr 214 | console = logging.StreamHandler(sys.stderr) 215 | console_format = '%(message)s' 216 | console.setFormatter(logging.Formatter(console_format)) 217 | console.setLevel(logging.INFO) # TODO: command line switch 218 | root_logger.addHandler(console) 219 | 220 | # Include debug messages when logging to a file 221 | file_handler = logging.handlers.RotatingFileHandler( 222 | 'logging_example.log', # use a full path 223 | ) 224 | file_format = '%(asctime)s %(levelname)6s %(name)s %(message)s' 225 | file_handler.setFormatter(logging.Formatter(file_format)) 226 | file_handler.setLevel(logging.DEBUG) 227 | root_logger.addHandler(file_handler) 228 | 229 | # Log sample messages with different levels 230 | log = logging.getLogger(__name__) 231 | log.info('on the console and in the file') 232 | log.debug('only in the file') 233 | log.error('simple error message') 234 | 235 | # Replace excepthook with logger 236 | def log_exception(exc_type, exc_value, traceback): 237 | logging.getLogger(__name__).error(exc_value) 238 | sys.excepthook = log_exception 239 | 240 | # Send exceptions to the logger automatically 241 | raise RuntimeError('failure message') 242 | -------------------------------------------------------------------------------- /docs/readme: -------------------------------------------------------------------------------- 1 | 2 | app_info = { 3 | :name => 'ArcReactor', 4 | :version => '1.0 - beta', 5 | :author => 'ohdae [ams]', 6 | :website => 'https://github.com/ohdae/arcreactor', 7 | :contact => 'ohdae@zeroharbor.org' 8 | } 9 | 10 | 11 | Overview 12 | ======== 13 | ArcReactor is a free, open-source application used to collect OSINT (Open-Source Intelligence) data 14 | and send this data to a SIEM, such as ArcSight ESM or Splunk. This application is meant to be used by 15 | security analysts and/or engineers who want to expand and compliment their event correlation, attack 16 | prediction and overall threat insight by making use of open-source sources such as known bad host lists, 17 | honeypot information, exploit and vulnerability trackers, and social media. 18 | 19 | 20 | Why OSINT? 21 | ========== 22 | Open-Source Intelligence can be highly useful in a SOC or NCSIRT type environment, where analysts and 23 | responders are required to track, analyze and react to threats. OSINT can often be a great source of 24 | information on emerging threats and even specific attacks before they happen. Proper collection, 25 | correlation and use of this information can be used to better predict future attacks, assist in attack 26 | attribution and provide a more in-depth view of threat actor groups. By feeding this information into 27 | your SIEM, you can have more tangible, actionable events. 28 | 29 | 30 | ArcReactor Design 31 | ================== 32 | ArcReactor collects information from dozens of public sources by using a combination of live monitoring 33 | and web-scraping. Each area of information is grouped into a module that can either be executed on it's 34 | own or ran as part of a larger data collection campaign. All of the information collected is parsed into 35 | CEF or JSON format and sent via Secure Syslog to your SIEM collectors. Everything is completely customizable 36 | to fit your use case- modules executed, how often collection occurs, keywords to monitor and SIEM settings. 37 | ArcReactor also has many API-like wrappers for commonly used functions throughout the application. 38 | This makes it very easy for users to write their own collection modules and expand on the program. 39 | 40 | 41 | Collection Modules 42 | ================== 43 | * Known Bad Hosts 44 | utilizes dozens of sources to pull down known malicious IP addresses, domain names, proxies, 45 | TOR exit nodes, known bad file hashes and other attacker information 46 | 47 | * OTX Reputation 48 | scrapes AlienVault's OTX (Open Threat Exchange) Reputation database for known attacker information 49 | 50 | * Twitter 51 | monitor Twitter feeds for custom keywords in your watch list 52 | 53 | * Pastebin 54 | monitor new Pastebin posts for custom keywords in your watch list 55 | 56 | * Facebook 57 | monitor Facebook posts for custom keywords in your watch list 58 | 59 | * Kippo 60 | collect logs and attacker information from your Kippo honeypots 61 | 62 | * Reddit 63 | monitor Reddit posts and users for custom keywords in your watch list 64 | 65 | * Exploits 66 | monitor exploit, malware and vulnerability trackers for new threats, CVEs and public exploits 67 | 68 | 69 | Running ArcReactor 70 | =================== 71 | There are three ways to run ArcReactor. 72 | 73 | * Standard command line script that will read the SIEM, sources and keyword settings from a config file, 74 | execute all of the collection modules and automatically send all data collected to your SIEM. 75 | It can either be run as a daemon or with the status output sent to your terminal. 76 | All the status, informational and error output is sent to a log file. 77 | 78 | * Interactive ArcReactor console. This is the preferred method of use as it allows much more fine-tuned 79 | control over the modules and the collection process as a whole. Using the interactive console, you can 80 | select which modules to run, modify configuration settings, interact and monitor running tasks, and setup 81 | collection as either a recurring process or as 'live', on-going collection. All information and errors are 82 | also logged. 83 | 84 | * Stand-alone collection scripts are available in the standalone/ directory. Each module has a counter-part 85 | that can be ran on it's own. These are useful if you want to launch a specific task quickly. 86 | 87 | 88 | Authors & Contributing 89 | ====================== 90 | Adam Swanda, @ohdae, is the core developer of ArcReactor. As this is a free and open-source project, 91 | public contributions are encouraged. Anything from feature requests, bug fixes, complaints or new modules. 92 | You can enter bugs and issues into the Github repo's issue tracker or fork your own repo for pull requests. 93 | 94 | 95 | Documentation & Support 96 | ======================= 97 | Full documentation will soon be available on the Github repo's wiki and also included in the docs/ directory of the application. 98 | 99 | -------------------------------------------------------------------------------- /launch.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | from lib import reactor 4 | from lib import dispatch 5 | import console 6 | import argparse 7 | import sys, os 8 | 9 | # the usage block should explain itself. 10 | # you *must* run this when starting ArcReactor, you can't run console.py by itself. 11 | # eventually this whole application will be turned into an optional service and this 12 | # launcher will control the 'start', 'stop', 'restart', etc commands for the service. 13 | 14 | usage = """ 15 | 16 | This is the main launcher for ArcReactor. 17 | 18 | Below is a full list of all available modules and explanations of both ArcReactor execution modes. 19 | For further documentation or assistance, please refer to the online Wiki or the 'docs' directory. 20 | 21 | Modules: 22 | malicious \tgathers known malicious ip addresses and hostnames from public sources 23 | kippo \tcollect attacker information from your kippo honeypots 24 | otx \tgathers known malicious ip addresses, hostnames and reputation data from AlienVault's OTX 25 | reddit \tscrapes reddit posts for keywords defined in the watchlist configuration 26 | twitter \tscrapes public twitter timelines for keywords defined in the watchlist configuration 27 | pastebin \tscrapes public pastebin archives for keywords defined in the watchlist configuration 28 | facebook \tscrapes public facebook posts for keywords defined in the watchlist configuration 29 | exploits \tscrapes exploit-db for information on newly released remote, local and dos exploits 30 | 31 | Options: 32 | --interactive 33 | This command will start an interactive console where you can launch and interact with 34 | specific collection tasks, edit configuration files and have fine-tuned control over 35 | your data collection. 36 | This is the suggested method of use. 37 | 38 | --collect 39 | This command will start all data collection tasks using the current configuration files. 40 | ArcReactor will attempt to execute each of the available modules and send syslog events for 41 | all data that is gathered. 42 | 43 | --daemon 44 | Daemonize the process when used with the --collect command. All status and error messages 45 | will be logged to the /opt/arcreactor/var/log/reactor.log file. 46 | 47 | 48 | """ 49 | 50 | print usage 51 | parser = argparse.ArgumentParser() 52 | parser.add_argument("--interactive", help="start interactive console", action="store_true") 53 | parser.add_argument("--collect", help="execute all collection modules", action="store_true") 54 | parser.add_argument("--daemon", help="run --collect as background process", action="store_true") 55 | args = parser.parse_args() 56 | 57 | if args.interactive: 58 | session = console.Session() 59 | reactor.start_logger() 60 | session.new() 61 | elif args.collect: 62 | if args.daemon: 63 | background_job = True 64 | launcher = dispatch.Module() 65 | jobs = dispatch.Jobs() 66 | reactor.status('info', 'arcreactor', 'launching all collection modules') 67 | launcher.run_knownbad() 68 | launcher.run_pastebin() 69 | launcher.run_otx() 70 | reactor.status('info', 'arcreactor', 'all collection modules finished') 71 | print('[*] Collection Statistics: ') 72 | jobs.get_stats() 73 | sys.exit(0) 74 | else: 75 | print('[!] arcreactor - invalid argument!') 76 | sys.exit(1) 77 | 78 | 79 | 80 | 81 | 82 | 83 | 84 | 85 | 86 | 87 | -------------------------------------------------------------------------------- /lib/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deadbits/arcreactor/7386585be8de74a1a6842c49e5c2b791bbce7dd2/lib/__init__.py -------------------------------------------------------------------------------- /lib/__init__.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deadbits/arcreactor/7386585be8de74a1a6842c49e5c2b791bbce7dd2/lib/__init__.pyc -------------------------------------------------------------------------------- /lib/dispatch.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # 3 | # part of the ArcReactor application 4 | # http://github.com/ohdae/ArcReactor 5 | # 6 | # Module & task dispatch 7 | # this module receives input from the interactive 8 | # console. we take in requests to execute collection 9 | # modules, verify the settings, execute the appropriate 10 | # functions and return the output to be sent via syslog 11 | # this is where the magic happens. 12 | # 13 | 14 | import os, sys 15 | import commands 16 | #import atexit 17 | from datetime import datetime 18 | from Queue import Queue 19 | import threading 20 | import reactor 21 | import pastebin 22 | import otx 23 | import knownbad 24 | 25 | #import exploits 26 | #import twitter 27 | #import facebook 28 | #import reddit 29 | #import kippo 30 | 31 | 32 | 33 | job_stats = {} 34 | #job_stats keeps track of which module is doing what. 35 | #this hash is also used when we pull and output the job statistics using the 'info' cmd. 36 | #example: 37 | #job_stats = { 38 | # 'pastebin': { 39 | # 'status': 'running', 40 | # 'message': 'retrieving newest archive', 41 | # 'started': '2012-12-16 12:34:20', 42 | # 'ended': '', 43 | # 'events': 0, 44 | # } 45 | #} 46 | 47 | def receive(command): 48 | if command.startswith('start'): 49 | module = command.split(' ')[1] 50 | Jobs.start_module(module) 51 | #elif command.startswith('kill'): 52 | # task = command.split(' ')[1] 53 | # if task == 'all': 54 | # Jobs.kill_all() 55 | # elif task in reactor.modules.keys(): 56 | # Jobs.kill_job(task) 57 | # else: 58 | # reactor.status('warn', 'arcreactor', '%s is not a valid option') 59 | elif command == 'info tasks': 60 | Jobs.get_stats() 61 | elif command.startswith('cfg'): 62 | conf = command.split(' ')[1] 63 | Config.manage(conf) 64 | else: 65 | reactor.status('warn', 'arcreactor', '%s is not a valid command') 66 | 67 | 68 | class Jobs: 69 | def __init__(self): 70 | """ 71 | Handles job interaction tasks. 72 | 73 | This class is responsible for the overall management and monitoring of any major tasks 74 | within ArcReactor. Smaller, static functions do not need to be included within this 75 | management class. 76 | Features: 77 | - start/stop collection modules 78 | - create job queues and assign workers 79 | - collect statistics on running jobs 80 | - ensure safe shutdown of jobs 81 | 82 | """ 83 | self.running = [] 84 | 85 | def start_module(self, module): 86 | """ 87 | Start execution of collection modules 88 | 89 | This functions only purpose is to take a module name as input, verify that the module 90 | exists, find the correct function from the Module() class and then execute. There are 91 | various checks to ensure only valid module names are being passed up until this point, 92 | so any input we receive here should be valid. 93 | 94 | """ 95 | if module in self.running: 96 | arcreactor.status('info', 'arcreactor', 'collection module %s is all ready running' % module) 97 | 98 | elif module in reactor.modules.keys(): 99 | if module == 'all': 100 | for self.name in reactor.modules.keys(): 101 | reactor.statux('info', 'arcreactor', 'starting collection module %s' % module) 102 | self.running.append(self.name) 103 | Module.run_pastebin() 104 | Module.run_knownbad() 105 | Module.run_otx() 106 | elif module == 'pastebin': 107 | Module.run_pastebin() 108 | elif module == 'otx': 109 | Module.run_otx() 110 | elif module == 'knownbad': 111 | Module.run_knownbad() 112 | else: 113 | reactor.status('warn', 'arcreactor', '%s is not a valid collection module' % module) 114 | else: 115 | reactor.status('warn', 'arcreactor', '%s is not a valid collection module' % module) 116 | 117 | #def kill_all(self): 118 | # """ 119 | # Safely kill all running and queued jobs. 120 | # 121 | # Ensures safe shutdown of ArcReactor jobs. This function is also registered as an 122 | # atexit function so it will be called everytime ArcReactor exits - whether by user 123 | # intervention or signal interrupts. 124 | # 125 | # """ 126 | # if len(job_stats) > 0: 127 | # for self.job in job_stats.keys(): 128 | # reactor.status('info', 'arcreactor', 'killing %s' % self.job) 129 | # if self.job in self.running or self.queue: 130 | # self.queue[self.job].stop() 131 | # job_stats.remove(job) 132 | # return True 133 | # else: 134 | # reactor.status('info', 'arcreactor', 'no running jobs') 135 | # return False 136 | 137 | 138 | #def kill_job(self, job): 139 | # """ Safely kill a specific running or queued job. """ 140 | # if job in job_stats.keys(): 141 | # reactor.status('info', 'arcreactor', 'stopping %s' % job) 142 | # if job in self.running or self.queue: 143 | # self.queue[job].stop() 144 | # job_stats.remove(job) 145 | # return True 146 | # reactor.status('info', 'arcreactor', '%s does not seem to exist' % job) 147 | # return False 148 | 149 | def get_stats(self, type='all'): 150 | """ 151 | Gather statistics on running and queued jobs. 152 | 153 | Jobs.get_stats() interacts with the job_stats hash to pull down information 154 | on running, paused and queued jobs. This function will only be called when the 155 | user passes the console comamnd 'info tasks'. 156 | 157 | """ 158 | if len(job_stats) > 0: 159 | if type == 'all': 160 | for self.job_title in job_stats.keys(): 161 | print('\n%s => ' % self.job_title) 162 | for self.key, self.value in job_stats[self.title].iteritems(): 163 | print('{0:12}: \t {1:16}'.format(self.key, self.value)) 164 | elif type in job_stats.keys(): 165 | print('\n%s => ' % type) 166 | for self.key, self.value in job_stats[type].iteritems(): 167 | print('{0:12}: \t {1:16}'.format(self.key, self.value)) 168 | else: 169 | reactor.status('info', 'arcreactor', 'cannot find job %s' % type) 170 | else: 171 | reactor.status('info', 'arcreactor', 'no running or queued jobs') 172 | 173 | 174 | class Module: 175 | def __init__(self): 176 | self.running = 0 177 | self.queued = 0 178 | 179 | def run_knownbad(self): 180 | jobs_stats['knownbad'] = { 181 | 'status': 'running', 182 | 'started': str(datetime.now()).split('.')[0], 183 | 'message': 'loading sources', 184 | 'events': 0 185 | } 186 | if knownbad.load_sources(): 187 | for src in knownbad.sources: 188 | job_stats['knownbad'] = { 'message': 'gathering data from sources' } 189 | self.host, self.source = knownbad.gather_data(src) 190 | if not self.host == "": 191 | job_stats['knownbad'] = { 'message': 'sending syslog events' } 192 | self.cef = 'CEF:0|OSINT|ArcReactor|1.0|100|Known Malicious Host|1|src=%s msg=%s' % (self.host, self.source) 193 | reactor.send_syslog(self.cef) 194 | job_stats['knownbad'] = { 'events': job_stats['knownbad']['events'] + 1 } 195 | job_stats['knownbad'] = { 'status': 'finished', 'message': 'finished successfully', 'ended': str(datetime.now()).split('.')[0] } 196 | job_stats['knownbad'] = { 'message': 'finished with errors', 'ended': str(datetime.now()).split('.')[0] } 197 | 198 | def run_pastebin(self): 199 | job_stats['pastebin'] = { 200 | 'status': 'running', 201 | 'started': str(datetime.now()).split('.')[0], 202 | 'message': 'loading keywords', 203 | 'events': 0 204 | } 205 | reactor.status('info', 'pastebin', 'launching pastebin module') 206 | if pastebin.load_words(): 207 | job_stats['pastebin'] = { 'message': 'collecting post archive' } 208 | pastebin.gather_archive() 209 | if len(pastebin.queue) > 0: 210 | for post in pastebin.queue: 211 | job_stats['pastebin'] = { 'message': 'searching post %s' % post } 212 | # the search_raw function is called from within gather_content 213 | pastebin.gather_content(post) 214 | job_stats['pastebin'] = { 'events': len(pastebin.found) } 215 | if len(pastebin.found) > 0: 216 | for self.post_id, self.data in pastebin.found.iteritems(): 217 | job_stats['pastebin'] = { 'message': 'sending syslog events' } 218 | self.cef = 'CEF:0|OSINT|ArcReactor|1.0|100|Watchlist Keyword Found|1|src=%s msg=%s' % (self.post_id, self.data) 219 | reactor.send_syslog(self.cef) 220 | job_stats['pastebin'] = { 'status': 'finished', 'message': 'finished successfully', 'ended': str(datetime.now()).split('.')[0] } 221 | job_stats['pastebin'] = { 'status': 'finished', 'message': 'finished with errors', 'ended': str(datetime.now()).split('.')[0] } 222 | 223 | def run_otx(self): 224 | reactor.status('info', 'otx', 'launching otx module') 225 | job_stats['otx'] = { 226 | 'status': 'running', 227 | 'started': str(datetime.now()).split('.')[0], 228 | 'message': 'loading keywords', 229 | 'events': 0 230 | } 231 | if otx.gather_data(): 232 | job_stats['otx'] = {'status': 'finished', 'message': 'finished successfully', 'ended': str(datetime.now()).split('.')[0] } 233 | jobs_stats['otx'] = {'events': otx.count } 234 | else: 235 | job_stats['otx'] = { 'status': 'finished', 'message': 'finished with errors', 'ended': str(datetime.now()).split('.')[0] } 236 | 237 | 238 | 239 | 240 | 241 | 242 | 243 | 244 | 245 | 246 | 247 | 248 | -------------------------------------------------------------------------------- /lib/exploits.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | import reactor 4 | 5 | names = [] 6 | 7 | def gather_exploits(): 8 | sec_tracker = 'http://securitytracker.com/archives/summary/9000.html' 9 | try: 10 | reactor.status('info', 'exploits', 'retrieving exploits from securitytracker.com') 11 | req = reactor.http_request(sec_tracker) 12 | if req is not None: 13 | for line in req.split('\n'): 14 | if '')[1].split("")[0] 16 | names.append(name) 17 | 18 | -------------------------------------------------------------------------------- /lib/facebook.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deadbits/arcreactor/7386585be8de74a1a6842c49e5c2b791bbce7dd2/lib/facebook.py -------------------------------------------------------------------------------- /lib/kippo.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deadbits/arcreactor/7386585be8de74a1a6842c49e5c2b791bbce7dd2/lib/kippo.py -------------------------------------------------------------------------------- /lib/knownbad.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # 3 | # 'Known Bad' collection module 4 | # part of ArcReactor 5 | # 6 | # https://github.com/ohdae/arcreactor 7 | # 8 | 9 | import reactor 10 | import re 11 | 12 | sources = [] 13 | 14 | ip_regex = re.compile(r"\b(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\b") 15 | dom_regex = re.compile(r'([\d\w.][-\d\w.]{0,253}[\d\w.]+\.)+(AC|AD|AE|AERO|AF|AG|AI|AL|AM|AN|AO|AQ|AR|ARPA|AS|ASIA|AT|AU|AW|AX|AZ|BA|BB|BD|BE|BF|BG|BH|BI|BIZ|BJ|BM|BN|BO|BR|BS|BT|BV|BW|BY|BZ|CA|CAT|CC|CD|CF|CG|CH|CI|CK|CL|CM|CN|COM|COOP|CR|CU|CV|CX|CY|CZ|DE|DJ|DK|DM|DO|DZ|EC|EDU|EE|EG|ER|ES|ET|EU|FI|FJ|FK|FM|FO|FR|GA|GB|GD|GE|GF|GG|GH|GI|GL|GM|GN|GOV|GP|GQ|GR|GS|GT|GU|GW|GY|HK|HM|HN|HR|HT|HU|ID|IE|IL|IM|INFO|INT|IO|IQ|IR|IS|IT|JE|JM|JO|JOBS|JP|KE|KG|KH|KI|KM|KN|KP|KR|KW|KY|KZ|LA|LB|LC|LI|LK|LR|LS|LT|LU|LV|LY|MA|MC|MD|ME|MG|MH|MIL|MK|ML|MM|MN|MO|MOBI|MP|MQ|MR|MS|MT|MU|MUSEUM|MV|MW|MX|MY|MZ|NA|NAME|NC|NET|NF|NG|NI|NL|NO|NP|NR|NU|NZ|OM|ORG|PA|PE|PF|PG|PH|PK|PL|PM|PN|PR|PRO|PS|PT|PW|PY|QA|RE|RO|RS|RU|RW|SA|SB|SC|SD|SE|SG|SH|SI|SJ|SK|SL|SM|SN|SO|SR|ST|SU|SV|SY|SZ|TC|TD|TEL|TF|TG|TH|TJ|TK|TL|TM|TN|TO|TP|TR|TRAVEL|TT|TV|TW|TZ|UA|UG|UK|US|UY|UZ|VA|VC|VE|VG|VI|VN|VU|WF|WS|XN|XN|XN|XN|XN|XN|XN|XN|XN|XN|XN|YE|YT|YU|ZA|ZM|ZW)', re.IGNORECASE) 16 | comment_regex = re.compile ("#.*?\n") 17 | comment2_regex = re.compile ("//.*?\n") 18 | 19 | def load_sources(): 20 | sources = reactor.load_sources(reactor.PATH_CONF+'/sources.cfg') 21 | if len(sources) > 0: 22 | reactor.status('info', 'known bad', '%d sources added to queue' % len(sources)) 23 | return True 24 | return False 25 | 26 | def gather_data(source): 27 | try: 28 | reactor.status('info', 'known bad', 'retrieving hosts from %s' % source) 29 | raw = reactor.http_request(source) 30 | if raw is not None: 31 | data = re.findall(ip_regex, raw) 32 | if data == "": 33 | data = re.findall(dom_regex, raw) 34 | return data, source 35 | except: 36 | reactor.satus('warn', 'known bad', 'failed to retrieve hosts from %s' % source) 37 | 38 | 39 | 40 | -------------------------------------------------------------------------------- /lib/otx.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # 3 | # AlienVault OTX collection module 4 | # part of ArcReactor 5 | # 6 | # http://github.com/ohdae/arcreactor/ 7 | # 8 | 9 | import reactor 10 | 11 | count = 0 12 | 13 | def gather_data(): 14 | try: 15 | data = reactor.http_request('http://reputation.alienvault.com/reputation.snort') 16 | if data is not None: 17 | reactor.status('info', 'OTX', 'attempting to parse reputation data') 18 | for line in data.split('\n'): 19 | if not line.startswith('#') or not len(line) == 0: 20 | try: 21 | d = line.split('#') 22 | addr, info = d[0], d[1] 23 | cef = 'CEF:0|OSINT|ArcReactor|1.0|100|%s|1|src=%s msg=%s' % (info, addr, 'http://reputation.alienvault.com/reputation.snort') 24 | reactor.status('info', 'OTX', 'sending CEF syslog for %s - %s' % (info, addr)) 25 | reactor.send_syslog(cef) 26 | count += 1 27 | except IndexError: 28 | continue 29 | reactor.status('info', 'OTX', 'sent %d total events' % count) 30 | return True 31 | except: 32 | reactor.status('warn', 'OTX', 'failed to retrieve OTX database') 33 | return False 34 | 35 | 36 | 37 | 38 | 39 | 40 | 41 | 42 | 43 | -------------------------------------------------------------------------------- /lib/pastebin.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # 3 | # pastebin collection module 4 | # part of ArcReactor 5 | # 6 | # ohdae [ams] 7 | # http://github.com/ohdae/arcreactor/ 8 | # 9 | 10 | import reactor 11 | import re 12 | 13 | archive = "http://www.pastebin.com/archive" 14 | raw = "http://pastebin.com/raw.php?i=" 15 | 16 | queue = [] 17 | found = {} 18 | watch_list = [] 19 | regex = re.compile('(.*?).*?(.*?)', re.S) 20 | 21 | def load_words(): 22 | watch_list = reactor.load_keywords(reactor.PATH_CONF+'/keywords.cfg') 23 | if len(watch_list) > 0: 24 | reactor.status('info', 'pastebin', '%d keywords added to watch list' % (len(watch_list))) 25 | return True 26 | return False 27 | 28 | def gather_archive(): 29 | try: 30 | posts = reactor.http_request(archive) 31 | posts = regex.findall(posts) 32 | for p in posts: 33 | post_id, post_title = p[0], p[1] 34 | if post_id not in queue: 35 | reactor.status('info', 'pastebin', 'post id %s added to queue' % post_id) 36 | queue.append(post_id) 37 | reactor.status('info', 'pastebin', 'total posts added to queue: %d' % len(queue)) 38 | except: 39 | reactor.status('warn', 'pastebin', 'failed to fetch pastebin archive') 40 | 41 | def gather_content(post_id): 42 | try: 43 | raw = reactor.http_request('http://pastebin.com/raw.php?i=%s' % post_id) 44 | queue.remove(post_id) 45 | if not 'Unknown Paste ID!' in raw and raw is not None: 46 | reactor.status('info', 'pastebin', 'searching post id %s' % post_id) 47 | if '\r\n' in raw: 48 | lines = raw.split('\r\n') 49 | for line in lines: 50 | search_raw(line, post_id) 51 | else: 52 | search_raw(raw, post_id) 53 | except: 54 | reactor.status('warn', 'pastebin', 'failed to fetch post id %s' % post_id) 55 | 56 | def search_raw(data, post_id): 57 | for word in watch_list: 58 | if word in data: 59 | found[post_id] = data 60 | reactor.status('info', 'pastebin', 'found %s in pastebin post %s' % (word, post_id)) 61 | 62 | 63 | 64 | 65 | -------------------------------------------------------------------------------- /lib/pastebin.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deadbits/arcreactor/7386585be8de74a1a6842c49e5c2b791bbce7dd2/lib/pastebin.pyc -------------------------------------------------------------------------------- /lib/reactor.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # 3 | # part of ArcReactor application 4 | # 5 | # this module includes some of the 6 | # core functionality ArcReactor uses 7 | # throughout the application. handles 8 | # things like logging, message output, 9 | # syslog events, interacting with config 10 | # files and some preliminary database 11 | # interaction. 12 | # 13 | # TODO: 14 | # - move over finished json format function (testing/data/json.py) 15 | # 16 | 17 | 18 | import logging 19 | import socket 20 | import time 21 | import os, sys 22 | import ConfigParser 23 | import signal 24 | 25 | # define all our needed paths 26 | PATH_HOME = '/opt/arcreactor' 27 | PATH_LOGS = '/opt/arcreactor/var/logs' 28 | PATH_DATA = '/opt/arcreactor/data' 29 | PATH_CONF = '/opt/arcreactor/conf' 30 | PATH_MODS = '/opt/arcreactor/lib' 31 | PATH_HIST = '/opt/arcreactor/.console_history' 32 | 33 | modules = { 34 | 'pastebin': 'monitor Pastebin archive for custom keywords from your watch list', 35 | 'otx': 'collect known malicious hosts and information from AlienVaults OTX reputation database', 36 | 'twitter': 'monitor Twitter feeds for custom keywords from your watch list', 37 | 'facebook': 'monitor Facebook posts for custom keywords from your watch list', 38 | 'knownbad': 'scrapes dozens of public sources for known malicious IP addresses, domain names, open proxies, TOR exit nodes and other attacker information', 39 | 'exploits': 'monitor exploit, malware and vulnerability trackers for new threats, CVEs and recently released exploits', 40 | 'kippo': 'collect log and attacker information from your Kippo honeypots', 41 | 'reddit': 'monitor Reddit posts and users for custom keywords from your watch list', 42 | 'malware': 'scrape public sources for known malicious websites, exploit kit domains, phishing domains, malware file hashes and other malware related information' 43 | } 44 | 45 | config = ConfigParser.ConfigParser() 46 | 47 | ascii = ''' 48 | 49 | 50 | ______ _______ __ 51 | / \ / \ / | 52 | /$$$$$$ | ______ _______ $$$$$$$ | ______ ______ _______ _$$ |_ ______ ______ 53 | $$ |__$$ | / \ / |$$ |__$$ | / \ / \ / |/ $$ | / \ / \ 54 | $$ $$ |/$$$$$$ |/$$$$$$$/ $$ $$< /$$$$$$ | $$$$$$ |/$$$$$$$/ $$$$$$/ /$$$$$$ |/$$$$$$ | 55 | $$$$$$$$ |$$ | $$/ $$ | $$$$$$$ |$$ $$ | / $$ |$$ | $$ | __ $$ | $$ |$$ | $$/ 56 | $$ | $$ |$$ | $$ \_____ $$ | $$ |$$$$$$$$/ /$$$$$$$ |$$ \_____ $$ |/ |$$ \__$$ |$$ | 57 | $$ | $$ |$$ | $$ |$$ | $$ |$$ |$$ $$ |$$ | $$ $$/ $$ $$/ $$ | 58 | $$/ $$/ $$/ $$$$$$$/ $$/ $$/ $$$$$$$/ $$$$$$$/ $$$$$$$/ $$$$/ $$$$$$/ $$/ 59 | 60 | ArcReactor [version 1.0] 61 | ohdae - 2012 62 | https://github.com/ohdae/arcreactor 63 | 64 | ''' 65 | 66 | def start_logger(): 67 | # setup our logger 68 | # TODO: add log rotation function 69 | debug_log = PATH_LOGS+'/reactor.log' 70 | if os.path.exists(debug_log): 71 | # remove this print. debug msg. 72 | print('[*] logs will be appened to %s' % debug_log) 73 | logging.basicConfig(filename=debug_log, filemode='a', 74 | format='%(asctime)s %(levelname)s %(message)s', 75 | datefmt='%H:%M:%S', level=logging.DEBUG) 76 | return True 77 | else: 78 | status('warn', 'arcreactor', 'log file does not exist.') 79 | return False 80 | 81 | def signal_handler(signal, frame): 82 | status('info', 'arcreactor', 'Ctrl+C signal caught. shutting down ArcReactor') 83 | 84 | def load_keywords(file_path): 85 | # basic function for loading all keyword based config files 86 | file_data = [] 87 | if os.path.exists(file_path) is False: 88 | status('warn', 'arcreactor', 'unable to load %s' % file_path) 89 | return False 90 | status('info', 'arcreactor', 'loading contents of %s' % file_path) 91 | f = open(file_path, 'rb') 92 | for line in f.readlines(): 93 | # skip any commented lines 94 | if line.startswith('#'): continue 95 | # skip any empty lines 96 | text = line.strip('\n') 97 | if len(text) == 0: continue 98 | file_data.append(text) 99 | f.close() 100 | return file_data 101 | 102 | def load_config(file_path): 103 | opts = {} 104 | # make sure the config file exists 105 | if not os.path.exists(file_path): 106 | return False 107 | # utilize the ConfigParser module for easier parsing 108 | config.read(file_path) 109 | opts['siem_host'] = config.get('syslog', 'host') 110 | opts['siem_port'] = config.getint('syslog', 'port') 111 | opts['siem_name'] = config.get('syslog', 'name') 112 | opts['siem_max'] = config.get('syslog', 'max') 113 | return opts 114 | 115 | def load_sources(file_path): 116 | # basic function for loading all www source config files 117 | file_data = [] 118 | if os.path.exists(file_path) is False: 119 | status('warn', 'arcreactor', 'unable to load %s' % file_path) 120 | return False 121 | status('info', 'arcreactor', 'loading contents of %s' % file_path) 122 | f = open(file_path, 'rb') 123 | for line in f.readlines(): 124 | # skip all commented lines 125 | if line.startswith('#'): continue 126 | # skip all empty lines 127 | text = line.strip('\n') 128 | if len(text) == 0: continue 129 | if text.startswith('http'): 130 | file_data.append(text) 131 | f.close() 132 | return file_data 133 | 134 | def status(level, module, message): 135 | msg = '%s - %s' % (module, message) 136 | if level == 'warn': 137 | print('[!] %s' % msg) 138 | logging.warn(msg) 139 | else: 140 | print('[~] %s' % msg) 141 | logging.info(msg) 142 | 143 | def json_request(url): 144 | try: 145 | headers = {'User-Agent': 'ArcReactor - 1.0 (https://github.com/ohdae/arcreactor)'} 146 | request = requests.get(url, headers=headers) 147 | if request.status_code == 200: 148 | return request.json 149 | else: 150 | status('warn', 'arcreactor', 'http request failed for url %s. returned status code %s' % (url, request.status_code)) 151 | return None 152 | except: 153 | status('warn', 'arcreactor', 'http request failed for url %s' % url) 154 | return None 155 | 156 | def http_request(url): 157 | try: 158 | headers = {'User-Agent': 'ArcReactor - 1.0 (https://github.com/ohdae/arcreactor)'} 159 | request = requests.get(url, headers=headers) 160 | if request.status_code == 200: 161 | return request.content 162 | else: 163 | status('warn', 'arcreactor', 'http request failed for url %s. returned status code %s' % (url, request.status_code)) 164 | return None 165 | except: 166 | status('warn', 'arcreactor', 'http request failed for url %s' % url) 167 | return None 168 | 169 | def send_syslog(message): 170 | # create socket for sending syslog events 171 | sock = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) 172 | # 'notice' is the default event. 3 + 5 * 8 173 | # change this is need be 174 | data = '<%d>%s' % (29, message) 175 | sock.sendto(data, (opts['siem_host'], int(opts['siem_port']))) 176 | sock.close() 177 | 178 | def test_syslog(): 179 | try: 180 | send_syslog('DEBUG MESSAGE') 181 | return True 182 | except: 183 | return False 184 | 185 | 186 | 187 | 188 | 189 | 190 | -------------------------------------------------------------------------------- /lib/reactor.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deadbits/arcreactor/7386585be8de74a1a6842c49e5c2b791bbce7dd2/lib/reactor.pyc -------------------------------------------------------------------------------- /lib/reddit.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/deadbits/arcreactor/7386585be8de74a1a6842c49e5c2b791bbce7dd2/lib/reddit.py -------------------------------------------------------------------------------- /standalone/otx-esm.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # 3 | # osint collection for AlienVault OTX reputation db 4 | # stand-alone version from ArcReactor 5 | # 6 | # ohdae [ams] 7 | # http://github.com/ohdae/ArcReactor/ 8 | # 9 | 10 | import requests 11 | import re, sys 12 | import socket 13 | 14 | # define some stuff 15 | config = { 16 | # alienvault's reputation db to use. i find snort format easier to parse 17 | 'otx': 'http://reputation.alienvault.com/reputation.snort', 18 | # syslog host 19 | 'host': '127.0.0.1', 20 | # syslog port 21 | 'port': '512' 22 | } 23 | count = 0 24 | 25 | def send_syslog(msg): 26 | sock = socket.socket(socket.AF_INET, socket.SOCK_DGRAM) 27 | # %d in the syslog msg is the syslog level + facility * 8 28 | # 29 is the default for notice + daemon * 8 or 5 + 3 * 8 29 | # data = '<%d>%s' % (level + facility*8, message) 30 | # change this if you feel the need 31 | data = '<%d>%s' % (29, msg) 32 | sock.sendto(data, (config['host'], int(config['port']))) 33 | sock.close() 34 | 35 | def gather_data(): 36 | # why anyone would use urllib when we have the requests lib, idk. 37 | data = requests.get(config['otx']).content 38 | try: 39 | print("[~] attempting to parse reputation data...") 40 | for line in data.split("\n"): 41 | # we really dont need that format checking function. 42 | # lets just look for comments and blank lines first then parse 43 | if not line.startswith("#") and line != "": 44 | try: 45 | # snort format is: ip-address # message 46 | d = line.split("#") 47 | addr, info = d[0], d[1] 48 | print("[~] sending syslog event for %s - %s" % (info, addr)) 49 | cef = 'CEF:0|OSINT|ArcReactor|1.0|100|%s|1|src=%s msg=%s' % (info, addr, config['otx']) 50 | send_syslog(cef) 51 | count += 1 52 | except IndexError: 53 | continue 54 | except: 55 | print("[!] error retrieving otx database") 56 | sys.exit(1) 57 | 58 | 59 | print("\n\n") 60 | print("\t open-source data gathering ") 61 | print("\t source >> alienvault.com ") 62 | print("\t author: ohdae [ams] ") 63 | print("\n\thttp://github.com/ohdae/arcreactor") 64 | print("\n\n") 65 | 66 | print("[~] starting collecting of OTX reputation database...") 67 | # the alienvault otx db is updated every 60 minutes 68 | # if you want a constantly updated activelist in esm, 69 | # either run this script as a cronjob every hour or 70 | # change this to add a simple time.sleep(3600) 71 | # and repeat the gather_data() function 72 | gather_data() 73 | print("[*] collection complete.") 74 | print("[*] %d unique events sent." % count) 75 | 76 | 77 | -------------------------------------------------------------------------------- /standalone/pb_scrape.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # 3 | # osint collection for pastebin 4 | # stand-alone version of the scraper from ArcReactor 5 | # 6 | # ohdae [ams] 7 | # http://github.com/ohdae/ArcReactor/ 8 | # 9 | 10 | import requests 11 | import re, sys 12 | import time 13 | 14 | # define some stuff 15 | pb_archive = "http://www.pastebin.com/archive" 16 | pb_raw = "http://pastebin.com/raw.php?i=" 17 | pb_regex = re.compile('(.*?).*?(.*?)', re.S) 18 | pb_queue = [] 19 | found = {} 20 | 21 | # this will eventually be changed to a config file where the user can define 22 | # keywords to look out for, individually or in pairs. until then, edit this 23 | # list to your liking. 24 | # e.g.: 25 | # watch = 'hacked' + 'my company name' 26 | # watch = 'myemail@address.here' 27 | watch_words = ['hacked', 'corporate', 'owned', 'sql injection', 'password', 'leak', 'waffles'] 28 | 29 | def collect_posts(): 30 | try: 31 | posts = pb_regex.findall(requests.get(pb_archive).content) 32 | for p in posts: 33 | paste_id, paste_title = p[0], p[1] 34 | if paste_id not in pb_queue: 35 | print("[~] collected ") 36 | print(" id: %s | title: %s" % (paste_id, paste_title)) 37 | pb_queue.append(paste_id) 38 | print("[~] total posts in queue: %d" % len(pb_queue)) 39 | except: 40 | print("[!] problem fetching pastebin archive.") 41 | print(" check your network connection and try again.") 42 | 43 | def search_content(post_id): 44 | try: 45 | raw = requests.get("%s%s" % (pb_raw, post_id)).content 46 | pb_queue.remove(post_id) 47 | if "\r\n" in raw: 48 | data = raw.split("\r\n") 49 | for line in data: 50 | search_raw(line, post_id) 51 | else: 52 | search_raw(raw, post_id) 53 | except: 54 | print("[!] problem fetching post %s" % post_id) 55 | 56 | def search_raw(data, post_id): 57 | for word in watch_words: 58 | if word in data: 59 | found[post_id] = data 60 | print("\n") 61 | print("[*] found:\t%s" % word) 62 | print(" post:\t%s" % post_id) 63 | print(" data:\t%s" % data) 64 | 65 | def menu(): 66 | print(" help \tdisplay this command menu") 67 | print(" gather\tcollect new posts from pastebin.com/archive") 68 | print(" search\tsearch current post queue for keywords") 69 | print(" words \tview current keyword watch list") 70 | print(" posts \tview post queue information and entries") 71 | print(" found \tview all found data") 72 | print(" exit \texit this application\n") 73 | 74 | def main(): 75 | menu() 76 | while True: 77 | r = raw_input("arc >> ") 78 | if r == "gather": 79 | collect_posts() 80 | elif r == "search": 81 | if pb_queue != "": 82 | start_queue = len(pb_queue) 83 | for post in pb_queue: 84 | search_content(post) 85 | print("\n[~] searched %s" % post) 86 | time.sleep(0.5) 87 | else: 88 | print("[!] post queue is currently empty. try running 'gather' first.") 89 | elif r == "words": 90 | print("[~] keyword watch list: ") 91 | for word in watch_words: 92 | print word 93 | elif r == "posts": 94 | if pb_queue != "": 95 | print("[~] total queued posts: %d" % len(pb_queue)) 96 | else: 97 | print("[!] post queue is currently empty. try running 'gather' first.") 98 | elif r == "exit" or r == "quit": 99 | print("[*] exiting application...") 100 | sys.exit(0) 101 | elif r == "found": 102 | if found != "": 103 | for key, value in found.iteritems(): 104 | print("\n") 105 | print("post id: %s" % key) 106 | print(" data: %s" % value) 107 | elif r == "help": 108 | menu() 109 | else: 110 | print("[!] this is not a valid command.") 111 | 112 | 113 | print("\n\n") 114 | print("\t open-source data gathering ") 115 | print("\t source >> pastebin.com ") 116 | print("\t author: ohdae [ams] ") 117 | print("\n\thttp://github.com/ohdae/arcreactor") 118 | print("\n\n") 119 | main() --------------------------------------------------------------------------------