├── README.md
├── conf
    ├── keywords.cfg
    ├── reactor.cfg
    └── sources.cfg
├── console.py
├── data
    └── placeholder
├── docs
    ├── howto
    ├── install
    ├── license
    ├── notes
    └── readme
├── launch.py
├── lib
    ├── __init__.py
    ├── __init__.pyc
    ├── dispatch.py
    ├── exploits.py
    ├── facebook.py
    ├── kippo.py
    ├── knownbad.py
    ├── otx.py
    ├── pastebin.py
    ├── pastebin.pyc
    ├── reactor.py
    ├── reactor.pyc
    └── reddit.py
└── standalone
    ├── otx-esm.py
    └── pb_scrape.py


/README.md:
--------------------------------------------------------------------------------
 1 | [![SayThanks](https://img.shields.io/badge/Say%20Thanks-!-1EAEDB.svg?style=flat)](https://saythanks.io/to/deadbits)  [![Donate](https://img.shields.io/badge/donate-BTC-blue.svg?style=flat)](https://www.coinbase.com/deadbits)
 2 | 
 3 | # ArcReactor
 4 | ArcReactor is a free, open-source application used to collect OSINT (Open-Source Intelligence) data
 5 | and send this data to a SIEM, such as ArcSight ESM or Splunk. This application is meant to be used by
 6 | security analysts and/or engineers who want to expand and compliment their event correlation, attack
 7 | prediction and overall threat insight by making use of open-source sources such as known bad host lists,
 8 | honeypot information, exploit and vulnerability trackers, and social media.
 9 | 
10 | **Full documentation is available in the `docs` directory.**
11 | 
12 | ## Import Update
13 | This project is no longer maintained and has not been updated in many years.  
14 | When it was originally created, there was a huge lack of these types of automated collection systems but now that gap has been filled by others who have done great work (_cough_ IntelMQ _cough_).  
15 | I hope that in some small way this project, among others, started the path to the development of these newer and more robust OSINT collection systems. I want to thank everyone who has shown interest in this project over the years. Forks, stars, and watches are a big motivation to continue development on similiar projects, and open source development in general. 
16 |   
17 | Thank you! :)
18 | 


--------------------------------------------------------------------------------
/conf/keywords.cfg:
--------------------------------------------------------------------------------
 1 | #####################################################
 2 | # Keywords used within social media modules	    #	
 3 | #####################################################
 4 | #
 5 | # This list should contain specific keywords you wish
 6 | # to keep an eye out for. Try to make this list as 
 7 | # specific to your needs as possible. Using words like
 8 | # 'password' or 'hack' will turn up dozens and dozens
 9 | # of results, and most likely none of them will pertain
10 | # to you or your company.
11 | # This is also good for finding information that might
12 | # have been inadvertenly leaked by employees. Often times
13 | # employees will use public services like pastebin, stack-
14 | # exchange, etc to trouble-shoot problems and in the process
15 | # publish internal or confidential information
16 | # Try things like: '@corp-domain.com', internal hostname schemes,
17 | # intranet domain names, etc.
18 | #
19 | # The keywords below are just some standard examples. Change em!
20 | #
21 | 
22 | Anonymous
23 | antisec
24 | data leak
25 | sql injection
26 | password
27 | hacked
28 | sql dump
29 | zeroday
30 | exploit
31 |  
32 | 


--------------------------------------------------------------------------------
/conf/reactor.cfg:
--------------------------------------------------------------------------------
 1 | 
 2 | ##########################################
 3 | # SIEM / Syslog receiver configuration	 #
 4 | ##########################################
 5 | # 
 6 | # Define settings for your SIEM connector here.
 7 | # Host and port are obvious. The 'name' option
 8 | # is used as an identifier only and serves no
 9 | # purpose other than keeping track of whats being
10 | # sent where. eventually this will be expanded to
11 | # allow for multiple connectors and sending collection
12 | # module results to specific connectors. For now, we
13 | # only have the one.
14 | #
15 | 
16 | [syslog]
17 | name = ArcSight
18 | host = 192.168.56.101
19 | port = 7771
20 | #max = 1500 ; max events per second before connector starts dropping
21 | 


--------------------------------------------------------------------------------
/conf/sources.cfg:
--------------------------------------------------------------------------------
 1 | # The majority of this list is from the arcosi / badharvest script.
 2 | # I had a much larger list that I built upon but I accidently deleted it :(
 3 | # I haven't checked all of these sources in some time so a few might
 4 | # be dead or moved at this point. I will be expanding on this list in
 5 | # the very near future. It will be split up into domains, ips, proxies,
 6 | # tor nodes and 'other'
 7 | # for now, they are all classified as 'known malicious host' during the
 8 | # syslog events.
 9 | #
10 | 
11 | http://www.mtc.sri.com/live_data/attackers/
12 | http://isc.sans.edu/reports.html
13 | https://zeustracker.abuse.ch/blocklist.php?download=ipblocklist
14 | https://spyeyetracker.abuse.ch/blocklist.php?download=ipblocklist
15 | http://www.projecthoneypot.org/list_of_ips.php
16 | http://intel.martincyber.com/ip
17 | https://www.openbl.org/lists/base.txt
18 | http://www.blocklist.de/lists/ssh.txt
19 | https://palevotracker.abuse.ch/
20 | http://www.malwaregroup.com/ipaddresses
21 | http://www.ciarmy.com/list/ci-badguys.txt
22 | http://rules.emergingthreats.net/blockrules/rbn-malvertisers-ips.txt
23 | https://secure.mayhemiclabs.com/malhosts/malhosts.txt
24 | https://zeustracker.abuse.ch/blocklist.php?download=domainblocklist
25 | https://spyeyetracker.abuse.ch/blocklist.php?download=domainblocklist
26 | http://mirror1.malwaredomains.com/files/BOOT
27 | http://www.malwaredomainlist.com/hostslist/hosts.txt
28 | http://www.malware.com.br/cgi/submit?action=list
29 | http://www.malwarepatrol.net/cgi/submit?action=list_xml
30 | 


--------------------------------------------------------------------------------
/console.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | #
  3 | # part of ArcReactor [version 1.0]
  4 | # https://github.com/ohdae/arcreactor
  5 | # 
  6 | # Interactive console.
  7 | # this module handles all interactive console sessions
  8 | # here we provide the basic console functionality such
  9 | # as command input & output, editing configuration files,
 10 | # preliminary command verification and sending commands
 11 | # to the dispatch module
 12 | #
 13 | # TODO:
 14 | #   - config edit commands (configparser)
 15 | #   - fix keyword/source loading issues
 16 | #   - rewrite cmd verification and dispatch interaction
 17 | #   - create better environment check before starting new session
 18 | 
 19 | from lib import reactor
 20 | from lib import dispatch 
 21 | import os, sys
 22 | import readline
 23 | import signal
 24 | import commands
 25 | 
 26 | readline.parse_and_bind('tab: complete') 
 27 | keywords = []
 28 | sources = []
 29 | options = {}
 30 | prompt = 'reactor >> '
 31 | help = {
 32 |     'general': {
 33 |         'help': 'display this menu',
 34 |         'quit': 'exit the console',
 35 |         'about': 'display about dialog',
 36 |         'exec': 'execute os command',
 37 |         'modules': 'show description of all modules',
 38 |         'keywords': 'show current watchlist keywords'
 39 |     },
 40 |     'configuration': {
 41 |         'cfg syslog': 'manage siem and syslog settings',
 42 |         'cfg sources': 'manage external sources',
 43 |         'cfg keywords': 'manage watchlist keywords'
 44 |     },
 45 |     'statistics': {
 46 |         'info tasks': 'view stats on running and queued jobs',
 47 |         'info reactor': 'view general ArcReactor stats',
 48 |         'data <module>': 'view information on data collected by module'
 49 |     },
 50 |     'collection': {
 51 |         'start all': 'start all collection modules',
 52 |         #'stop all': 'stop all running collection modules',
 53 |         'start <module>': 'launch selected module',
 54 |         #'stop <module>': 'stop selected module',
 55 |         'dashboard': 'start the web dashboard [experimental]'
 56 |     }
 57 | }
 58 | 
 59 | 
 60 | class Completer:
 61 |     def __init__(self):
 62 |         self.words = [ 'help', 'quit', 'exit', 'about', 'clear', 'cfg', 'sources', 'keywords', 'syslog', 'modules',
 63 |             'start', 'stop', 'all', 'info', 'data', 'reactor', 'tasks', 'dashboard', 'pastebin', 'otx', 'exploits', 'twitter' ]
 64 |         self.prefix = ''
 65 | 
 66 |     def complete(self, prefix, index):
 67 |         if prefix != self.prefix:
 68 |             self.matching_words = [w for w in cmds if w.startswith(prefix)]
 69 |             self.prefix = prefix
 70 |         else: pass
 71 |         try:
 72 |             return self.matching_words[index]
 73 |         except IndexError:
 74 |             return None
 75 | 
 76 | class Session(object):
 77 |     def __init__(self):
 78 |         signal.signal(signal.SIGINT, reactor.signal_handler)
 79 | 
 80 |     def new(self):
 81 |         """
 82 |         Initializes a new console session
 83 | 
 84 |         Perform some simple environment checks to ensure that we can properly
 85 |         start a new interactive session, load needed configuration files and if
 86 |         these pass, we start our console.
 87 | 
 88 |         """
 89 |         reactor.status('info', 'arcreactor', 'initializing new console session')
 90 |         reactor.status('info', 'arcreactor', 'loading configuration files')
 91 |         keywords = reactor.load_keywords(reactor.PATH_CONF+'/keywords.cfg')
 92 |         sources = reactor.load_sources(reactor.PATH_CONF+'/sources.cfg')
 93 |         options = reactor.load_config(reactor.PATH_CONF+'/reactor.cfg')
 94 |         self.console()
 95 | 
 96 |     def kill_session(self):
 97 |         if len(dispatch.job_stats) > 0:
 98 |             print('[*] %d jobs are still running.' & len(dispatch.job_stats))
 99 |             print('are you sure you want to exit?')
100 |             self.answer = raw_input('[y/n]')
101 |             if self.answer == 'y': 
102 |                 pass
103 |             elif self.answer == 'n':
104 |                 print('[*] returning to ArcReactor console')
105 |                 self.console()
106 |             else:
107 |                 print('[!] invalid answer. returning to ArcReactor console.')
108 |                 self.console()
109 |         reactor.status('info', 'arcreactor', 'shutting down ArcReactor console')
110 |         sys.exit(0)
111 | 
112 |     def pre_command(self, cmd):
113 |         """
114 |         Handle basic functions before we send command to dispatch.
115 | 
116 |         Several commands do not need to be sent to dispatch to be executed,
117 |         so we take care of the more basic/static commands here and only send
118 |         the more actionable commands to the dispatch module.
119 | 
120 |         """
121 |         if cmd == 'quit' or cmd == 'exit': 
122 |             self.kill_session()
123 |         elif cmd == 'help':
124 |             print('\n\t  general')
125 |             for self.c, self.i in help['general'].iteritems():
126 |                 print('{0:12} \t {1:26}'.format(self.c, self.i))
127 |             print('\n\t  configuration')
128 |             for self.c, self.i in help['configuration'].iteritems():
129 |                 print('{0:12} \t {1:26}'.format(self.c, self.i))
130 |             print('\n\t  statistics')
131 |             for self.c, self.i in help['statistics'].iteritems():
132 |                 print('{0:12} \t {1:26}'.format(self.c, self.i))
133 |             print('\n\t  collection')
134 |             for self.c, self.i in help['collection'].iteritems():
135 |                 print('{0:12} \t {1:26}'.format(self.c, self.i))
136 |         elif cmd == 'clear': 
137 |             os.system('clear')
138 |         elif cmd.startswith('exec'):
139 |             self.exec_output = commands.getoutput(' '.join(cmd.split(' ')[1:]))
140 |             print self.exec_output
141 |         elif cmd == 'keywords':
142 |             if len(keywords) > 0:
143 |                 print('\nWatch-List Keywords')
144 |                 for self.word in keywords:
145 |                     print self.word
146 |             else:
147 |                 print('[*] keyword list is empty')
148 |         elif cmd == 'sources':
149 |             if len(sources) > 0:
150 |                 print('\nExternal Sources')
151 |                 for self.src in sources:
152 |                     print self.src
153 |             else: 
154 |                 print('[*] source list is empty')
155 |         elif cmd == 'modules':
156 |             print('\nAvailable Collection Modules')
157 |             for self.mod_name, self.mod_info in reactor.modules.iteritems():
158 |                 print('%s\t\t%s' % (self.mod_name, self.mod_info))
159 |         else:
160 |             dispatch.receive(cmd)
161 | 
162 |     def check_command(self, args):
163 |         try:
164 |             cmd, arg = args.split(' ')
165 |         except:
166 |             cmd = args
167 | 
168 |         for title in help.keys():
169 |             if cmd in help[title].keys():
170 |                 return True
171 |         return False
172 | 
173 |     def console(self):
174 |         print reactor.ascii
175 |         print('Welcome to the ArcReactor console!')
176 |         print('type `help` to get started\n')
177 | 
178 |         while True:
179 |             completer = Completer()
180 |             readline.set_completer(completer.complete)
181 |             cmd = raw_input(prompt)
182 | 
183 |             if self.check_command(cmd):
184 |                 self.pre_command(cmd)
185 | 
186 | 
187 |             
188 | 
189 | 
190 | 
191 | 
192 | 
193 | 


--------------------------------------------------------------------------------
/data/placeholder:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deadbits/arcreactor/7386585be8de74a1a6842c49e5c2b791bbce7dd2/data/placeholder


--------------------------------------------------------------------------------
/docs/howto:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deadbits/arcreactor/7386585be8de74a1a6842c49e5c2b791bbce7dd2/docs/howto


--------------------------------------------------------------------------------
/docs/install:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deadbits/arcreactor/7386585be8de74a1a6842c49e5c2b791bbce7dd2/docs/install


--------------------------------------------------------------------------------
/docs/license:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deadbits/arcreactor/7386585be8de74a1a6842c49e5c2b791bbce7dd2/docs/license


--------------------------------------------------------------------------------
/docs/notes:
--------------------------------------------------------------------------------
  1 | Development Notes
  2 | =================
  3 | 
  4 | 
  5 | Console Commands =>
  6 |     'help': 'display this menu',
  7 |     'quit': 'exit the console',
  8 |     'exit': 'exit the console',
  9 |     'exec': 'execute os command',
 10 |     'about': 'display basic information',
 11 |     'clear': 'clears the screen',
 12 |     'config sources': 'manage osint sources',
 13 |     'config keywords': 'manage your keywords',
 14 |     'config syslog': 'manage siem syslog settings',
 15 |     'modules': 'list all collection module information',
 16 |     'keywords': 'show loaded keywords',
 17 |     'start all': 'start all available modules',
 18 |     'stop all': 'stop all running modules',
 19 |     'start <module>': 'launch the selected module',
 20 |     'stop <module>': 'stop the selected module',
 21 |     'info tasks': 'view stats on running and queued tasks',
 22 |     'info reactor': 'view general ArcReactor stats',
 23 |     'data <module>': 'view information on data collected by module',
 24 |     'dashboard': 'launch web dashboard [experimental]'
 25 | 
 26 | =====================================================================================
 27 | 
 28 | def processor():
 29 |     if queue.empty() == True:
 30 |         print "the Queue is empty!"
 31 |         sys.exit(1)
 32 |     try:
 33 |         job = queue.get()
 34 |         print "I'm operating on job item: %s"%(job)
 35 |         queue.task_done()
 36 |     except:
 37 |         print "Failed to operate on job"
 38 | 
 39 | '''set variables'''
 40 | queue = Queue()
 41 | threads = 4
 42 |     
 43 | '''a list of job items. you would want this to be more advanced,
 44 | like reading from a file or database'''
 45 | jobs = [ "job1", "job2", "job3" ]
 46 | 
 47 | ”’iterate over jobs and put each into the queue in sequence”’
 48 | for job in jobs:
 49 |      print “inserting job into the queue: %s”%(job)
 50 |      queue.put(job)
 51 | 
 52 | ”’start some threads, each one will process one job from the queue”’
 53 | for i in range(threads):
 54 |      th = Thread(target=processor)
 55 |      th.setDaemon(True)
 56 |      th.start()
 57 | 
 58 | ”’wait until all jobs are processed before quitting”’
 59 | queue.join() 
 60 | 
 61 | =========================
 62 | 
 63 | 
 64 | def daemonize(no_close=False, pidfile=None):
 65 |     """
 66 |     Convert the calling process into a daemon. To make the current Python
 67 |     process into a daemon process, you need two lines of code:
 68 | 
 69 |     .. python::
 70 | 
 71 |         from grizzled.os import daemonize
 72 |         daemonize.daemonize()
 73 | 
 74 |     If ``daemonize()`` fails for any reason, it throws a ``DaemonError``,
 75 |     which is a subclass of the standard ``OSError`` exception. also logs debug
 76 |     messages, using the standard Python ``logging`` package, to channel
 77 |     "grizzled.os.daemon".
 78 | 
 79 |     **Adapted from:** http://software.clapper.org/daemonize/
 80 | 
 81 |     **See Also:**
 82 | 
 83 |     - Stevens, W. Richard. *Unix Network Programming* (Addison-Wesley, 1990).
 84 | 
 85 |     :Parameters:
 86 |         no_close : bool
 87 |             If ``True``, don't close the file descriptors. Useful if the
 88 |             calling process has already redirected file descriptors to an
 89 |             output file. **Warning**: Only set this parameter to ``True`` if
 90 |             you're *sure* there are no open file descriptors to the calling
 91 |             terminal. Otherwise, you'll risk having the daemon re-acquire a
 92 |             control terminal, which can cause it to be killed if someone logs
 93 |             off that terminal.
 94 | 
 95 |         pidfile : str
 96 |             Path to file to which to write daemon's process ID. The string may
 97 |             contain a ``${pid}`` token, which is replaced with the process ID
 98 |             of the daemon. e.g.: ``/var/run/myserver-${pid}``
 99 | 
100 |     :raise DaemonError: Error during daemonizing
101 |     """
102 |     log = logging.getLogger('grizzled.os.daemon')
103 | 
104 |     def __fork():
105 |         try:
106 |             return _os.fork()
107 |         except OSError, e:
108 |             raise DaemonError, ('Cannot fork', e.errno, e.strerror)
109 | 
110 |     def __redirect_file_descriptors():
111 |         import resource  # POSIX resource information
112 |         maxfd = resource.getrlimit(resource.RLIMIT_NOFILE)[1]
113 |         if maxfd == resource.RLIM_INFINITY:
114 |             maxfd = MAXFD
115 | 
116 |         # Close all file descriptors.
117 | 
118 |         for fd in range(0, maxfd):
119 |             # Only close TTYs.
120 |             try:
121 |                 _os.ttyname(fd)
122 |             except:
123 |                 continue
124 | 
125 |             try:
126 |                 _os.close(fd)
127 |             except OSError:
128 |                 # File descriptor wasn't open. Ignore.
129 |                 pass
130 | 
131 |             # Redirect standard input, output and error to something safe.
132 |             # os.open() is guaranteed to return the lowest available file
133 |             # descriptor (0, or standard input). Then, we can dup that
134 |             # descriptor for standard output and standard error.
135 | 
136 |             _os.open(NULL_DEVICE, _os.O_RDWR)
137 |             _os.dup2(0, 1)
138 |             _os.dup2(0, 2)
139 | 
140 | 
141 |     if _os.name != 'posix':
142 |         import errno
143 |         raise DaemonError, \
144 |               ('daemonize() is only supported on Posix-compliant systems.',
145 |                errno.ENOSYS, _os.strerror(errno.ENOSYS))
146 | 
147 |     try:
148 |         # Fork once to go into the background.
149 | 
150 |         log.debug('Forking first child.')
151 |         pid = __fork()
152 |         if pid != 0:
153 |             # Parent. Exit using os._exit(), which doesn't fire any atexit
154 |             # functions.
155 |             _os._exit(0)
156 | 
157 |         # First child. Create a new session. os.setsid() creates the session
158 |         # and makes this (child) process the process group leader. The process
159 |         # is guaranteed not to have a control terminal.
160 |         log.debug('Creating new session')
161 |         _os.setsid()
162 | 
163 |         # Fork a second child to ensure that the daemon never reacquires
164 |         # a control terminal.
165 |         log.debug('Forking second child.')
166 |         pid = __fork()
167 |         if pid != 0:
168 |             # Original child. Exit.
169 |             _os._exit(0)
170 | 
171 |         # This is the second child. Set the umask.
172 |         log.debug('Setting umask')
173 |         _os.umask(UMASK)
174 | 
175 |         # Go to a neutral corner (i.e., the primary file system, so
176 |         # the daemon doesn't prevent some other file system from being
177 |         # unmounted).
178 |         log.debug('Changing working directory to "%s"' % WORKDIR)
179 |         _os.chdir(WORKDIR)
180 | 
181 |         # Unless no_close was specified, close all file descriptors.
182 |         if not no_close:
183 |             log.debug('Redirecting file descriptors')
184 |             __redirect_file_descriptors()
185 | 
186 |         if pidfile:
187 |             from string import Template
188 |             t = Template(pidfile)
189 |             pidfile = t.safe_substitute(pid=str(_os.getpid()))
190 |             open(pidfile, 'w').write(str(_os.getpid()) + '\n')
191 | 
192 |     except DaemonError:
193 |         raise
194 | 
195 |     except OSError, e:
196 |         raise DaemonError, ('Unable to daemonize()', e.errno, e.strerror)
197 | 
198 | =============================================
199 | 
200 | #!/usr/bin/env python
201 | # encoding: utf-8
202 | """Sending log output to a file and the console at the same time.
203 | """
204 | 
205 | import logging
206 | import logging.handlers
207 | import sys
208 | 
209 | # Log verbosely
210 | root_logger = logging.getLogger('')
211 | root_logger.setLevel(logging.DEBUG)
212 | 
213 | # Set up console output to stderr
214 | console = logging.StreamHandler(sys.stderr)
215 | console_format = '%(message)s'
216 | console.setFormatter(logging.Formatter(console_format))
217 | console.setLevel(logging.INFO) # TODO: command line switch
218 | root_logger.addHandler(console)
219 | 
220 | # Include debug messages when logging to a file
221 | file_handler = logging.handlers.RotatingFileHandler(
222 |     'logging_example.log', # use a full path
223 |     )
224 | file_format = '%(asctime)s %(levelname)6s %(name)s %(message)s'
225 | file_handler.setFormatter(logging.Formatter(file_format))
226 | file_handler.setLevel(logging.DEBUG)
227 | root_logger.addHandler(file_handler)
228 | 
229 | # Log sample messages with different levels
230 | log = logging.getLogger(__name__)
231 | log.info('on the console and in the file')
232 | log.debug('only in the file')
233 | log.error('simple error message')
234 | 
235 | # Replace excepthook with logger
236 | def log_exception(exc_type, exc_value, traceback):
237 |     logging.getLogger(__name__).error(exc_value)
238 | sys.excepthook = log_exception
239 | 
240 | # Send exceptions to the logger automatically
241 | raise RuntimeError('failure message')
242 | 


--------------------------------------------------------------------------------
/docs/readme:
--------------------------------------------------------------------------------
 1 | 
 2 | app_info = {
 3 |     :name       => 'ArcReactor',
 4 |     :version    => '1.0 - beta',
 5 |     :author     => 'ohdae [ams]',
 6 |     :website    => 'https://github.com/ohdae/arcreactor',
 7 |     :contact    => 'ohdae@zeroharbor.org'
 8 | }
 9 | 
10 | 
11 | Overview
12 | ========
13 | ArcReactor is a free, open-source application used to collect OSINT (Open-Source Intelligence) data
14 | and send this data to a SIEM, such as ArcSight ESM or Splunk. This application is meant to be used by
15 | security analysts and/or engineers who want to expand and compliment their event correlation, attack
16 | prediction and overall threat insight by making use of open-source sources such as known bad host lists,
17 | honeypot information, exploit and vulnerability trackers, and social media.
18 | 
19 | 
20 | Why OSINT?
21 | ==========
22 | Open-Source Intelligence can be highly useful in a SOC or NCSIRT type environment, where analysts and 
23 | responders are required to track, analyze and react to threats. OSINT can often be a great source of 
24 | information on emerging threats and even specific attacks before they happen. Proper collection, 
25 | correlation and use of this information can be used to better predict future attacks, assist in attack
26 | attribution and provide a more in-depth view of threat actor groups. By feeding this information into 
27 | your SIEM, you can have more tangible, actionable events.
28 | 
29 | 
30 | ArcReactor Design
31 | ==================
32 | ArcReactor collects information from dozens of public sources by using a combination of live monitoring
33 | and web-scraping. Each area of information is grouped into a module that can either be executed on it's
34 | own or ran as part of a larger data collection campaign. All of the information collected is parsed into
35 | CEF or JSON format and sent via Secure Syslog to your SIEM collectors. Everything is completely customizable
36 | to fit your use case- modules executed, how often collection occurs, keywords to monitor and SIEM settings.
37 | ArcReactor also has many API-like wrappers for commonly used functions throughout the application. 
38 | This makes it very easy for users to write their own collection modules and expand on the program.
39 | 
40 | 
41 | Collection Modules
42 | ==================
43 | * Known Bad Hosts
44 |     utilizes dozens of sources to pull down known malicious IP addresses, domain names, proxies, 
45 |     TOR exit nodes, known bad file hashes and other attacker information
46 | 
47 | * OTX Reputation
48 |     scrapes AlienVault's OTX (Open Threat Exchange) Reputation database for known attacker information
49 | 
50 | * Twitter
51 |     monitor Twitter feeds for custom keywords in your watch list
52 | 
53 | * Pastebin
54 |     monitor new Pastebin posts for custom keywords in your watch list
55 | 
56 | * Facebook
57 |     monitor Facebook posts for custom keywords in your watch list
58 | 
59 | * Kippo
60 |     collect logs and attacker information from your Kippo honeypots
61 | 
62 | * Reddit
63 |     monitor Reddit posts and users for custom keywords in your watch list
64 | 
65 | * Exploits
66 |     monitor exploit, malware and vulnerability trackers for new threats, CVEs and public exploits
67 | 
68 | 
69 | Running ArcReactor
70 | ===================
71 | There are three ways to run ArcReactor.
72 | 
73 | * Standard command line script that will read the SIEM, sources and keyword settings from a config file,
74 |   execute all of the collection modules and automatically send all data collected to your SIEM. 
75 |   It can either be run as a daemon or with the status output sent to your terminal.
76 |   All the status, informational and error output is sent to a log file.
77 | 
78 | * Interactive ArcReactor console. This is the preferred method of use as it allows much more fine-tuned
79 |   control over the modules and the collection process as a whole. Using the interactive console, you can
80 |   select which modules to run, modify configuration settings, interact and monitor running tasks, and setup
81 |   collection as either a recurring process or as 'live', on-going collection. All information and errors are 
82 |   also logged.
83 | 
84 | * Stand-alone collection scripts are available in the standalone/ directory. Each module has a counter-part
85 |   that can be ran on it's own. These are useful if you want to launch a specific task quickly.
86 | 
87 | 
88 | Authors & Contributing
89 | ======================
90 | Adam Swanda, @ohdae, is the core developer of ArcReactor. As this is a free and open-source project,
91 | public contributions are encouraged. Anything from feature requests, bug fixes, complaints or new modules.
92 | You can enter bugs and issues into the Github repo's issue tracker or fork your own repo for pull requests.
93 | 
94 | 
95 | Documentation & Support
96 | =======================
97 | Full documentation will soon be available on the Github repo's wiki and also included in the docs/ directory of the application.
98 | 
99 | 


--------------------------------------------------------------------------------
/launch.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | from lib import reactor
 4 | from lib import dispatch 
 5 | import console
 6 | import argparse
 7 | import sys, os
 8 | 
 9 | # the usage block should explain itself.
10 | # you *must* run this when starting ArcReactor, you can't run console.py by itself.
11 | # eventually this whole application will be turned into an optional service and this
12 | # launcher will control the 'start', 'stop', 'restart', etc commands for the service.
13 | 
14 | usage = """
15 | 
16 | This is the main launcher for ArcReactor.
17 | 
18 | Below is a full list of all available modules and explanations of both ArcReactor execution modes.
19 | For further documentation or assistance, please refer to the online Wiki or the 'docs' directory.
20 | 
21 | Modules:
22 | malicious  \tgathers known malicious ip addresses and hostnames from public sources
23 | kippo      \tcollect attacker information from your kippo honeypots
24 | otx        \tgathers known malicious ip addresses, hostnames and reputation data from AlienVault's OTX 
25 | reddit     \tscrapes reddit posts for keywords defined in the watchlist configuration
26 | twitter    \tscrapes public twitter timelines for keywords defined in the watchlist configuration
27 | pastebin   \tscrapes public pastebin archives for keywords defined in the watchlist configuration
28 | facebook   \tscrapes public facebook posts for keywords defined in the watchlist configuration
29 | exploits   \tscrapes exploit-db for information on newly released remote, local and dos exploits
30 | 
31 | Options:
32 | --interactive
33 |     This command will start an interactive console where you can launch and interact with
34 |     specific collection tasks, edit configuration files and have fine-tuned control over
35 |     your data collection.
36 |     This is the suggested method of use.
37 | 
38 | --collect
39 |     This command will start all data collection tasks using the current configuration files.
40 |     ArcReactor will attempt to execute each of the available modules and send syslog events for
41 |     all data that is gathered.
42 | 
43 | --daemon
44 |     Daemonize the process when used with the --collect command. All status and error messages
45 |     will be logged to the /opt/arcreactor/var/log/reactor.log file.
46 | 
47 | 
48 | """
49 | 
50 | print usage
51 | parser = argparse.ArgumentParser()
52 | parser.add_argument("--interactive", help="start interactive console", action="store_true")
53 | parser.add_argument("--collect", help="execute all collection modules", action="store_true")
54 | parser.add_argument("--daemon", help="run --collect as background process", action="store_true")
55 | args = parser.parse_args()
56 | 
57 | if args.interactive:
58 |     session = console.Session()
59 |     reactor.start_logger()
60 |     session.new()
61 | elif args.collect:
62 |     if args.daemon:
63 |         background_job = True
64 |     launcher = dispatch.Module()
65 |     jobs = dispatch.Jobs()
66 |     reactor.status('info', 'arcreactor', 'launching all collection modules')
67 |     launcher.run_knownbad()
68 |     launcher.run_pastebin()
69 |     launcher.run_otx()
70 |     reactor.status('info', 'arcreactor', 'all collection modules finished')
71 |     print('[*] Collection Statistics: ')
72 |     jobs.get_stats()
73 |     sys.exit(0)
74 | else:
75 |     print('[!] arcreactor - invalid argument!')
76 |     sys.exit(1)
77 | 
78 | 
79 | 
80 | 
81 | 
82 | 
83 | 
84 | 
85 | 
86 | 
87 | 


--------------------------------------------------------------------------------
/lib/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deadbits/arcreactor/7386585be8de74a1a6842c49e5c2b791bbce7dd2/lib/__init__.py


--------------------------------------------------------------------------------
/lib/__init__.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deadbits/arcreactor/7386585be8de74a1a6842c49e5c2b791bbce7dd2/lib/__init__.pyc


--------------------------------------------------------------------------------
/lib/dispatch.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | #
  3 | # part of the ArcReactor application
  4 | # http://github.com/ohdae/ArcReactor
  5 | #
  6 | # Module & task dispatch
  7 | # this module receives input from the interactive
  8 | # console. we take in requests to execute collection
  9 | # modules, verify the settings, execute the appropriate
 10 | # functions and return the output to be sent via syslog
 11 | # this is where the magic happens.
 12 | #
 13 | 
 14 | import os, sys
 15 | import commands
 16 | #import atexit
 17 | from datetime import datetime
 18 | from Queue import Queue
 19 | import threading
 20 | import reactor
 21 | import pastebin
 22 | import otx
 23 | import knownbad
 24 | 
 25 | #import exploits
 26 | #import twitter
 27 | #import facebook
 28 | #import reddit
 29 | #import kippo
 30 | 
 31 | 
 32 | 
 33 | job_stats = {}
 34 | #job_stats keeps track of which module is doing what. 
 35 | #this hash is also used when we pull and output the job statistics using the 'info' cmd. 
 36 | #example:
 37 | #job_stats = {
 38 | #    'pastebin': {
 39 | #        'status': 'running',
 40 | #        'message': 'retrieving newest archive',
 41 | #        'started': '2012-12-16 12:34:20',
 42 | #        'ended': '',
 43 | #        'events': 0,
 44 | #    }
 45 | #}
 46 | 
 47 | def receive(command):
 48 |     if command.startswith('start'):
 49 |         module = command.split(' ')[1]
 50 |         Jobs.start_module(module)
 51 |     #elif command.startswith('kill'):
 52 |     #    task = command.split(' ')[1]
 53 |     #    if task == 'all':
 54 |     #        Jobs.kill_all()
 55 |     #    elif task in reactor.modules.keys():
 56 |     #        Jobs.kill_job(task)
 57 |     #    else:
 58 |     #        reactor.status('warn', 'arcreactor', '%s is not a valid option')
 59 |     elif command == 'info tasks':
 60 |         Jobs.get_stats()
 61 |     elif command.startswith('cfg'):
 62 |         conf = command.split(' ')[1]
 63 |         Config.manage(conf)
 64 |     else:
 65 |         reactor.status('warn', 'arcreactor', '%s is not a valid command')
 66 | 
 67 | 
 68 | class Jobs:
 69 |     def __init__(self):
 70 |         """
 71 |         Handles job interaction tasks.
 72 | 
 73 |         This class is responsible for the overall management and monitoring of any major tasks
 74 |         within ArcReactor. Smaller, static functions do not need to be included within this
 75 |         management class.
 76 |         Features:
 77 |             - start/stop collection modules
 78 |             - create job queues and assign workers
 79 |             - collect statistics on running jobs
 80 |             - ensure safe shutdown of jobs
 81 | 
 82 |         """
 83 |         self.running = []
 84 | 
 85 |     def start_module(self, module):
 86 |         """
 87 |         Start execution of collection modules
 88 | 
 89 |         This functions only purpose is to take a module name as input, verify that the module
 90 |         exists, find the correct function from the Module() class and then execute. There are
 91 |         various checks to ensure only valid module names are being passed up until this point,
 92 |         so any input we receive here should be valid.
 93 | 
 94 |         """
 95 |         if module in self.running:
 96 |             arcreactor.status('info', 'arcreactor', 'collection module %s is all ready running' % module)
 97 |         
 98 |         elif module in reactor.modules.keys():
 99 |             if module == 'all':
100 |                 for self.name in reactor.modules.keys():
101 |                     reactor.statux('info', 'arcreactor', 'starting collection module %s' % module)
102 |                     self.running.append(self.name)
103 |                 Module.run_pastebin()
104 |                 Module.run_knownbad()
105 |                 Module.run_otx()
106 |             elif module == 'pastebin':
107 |                 Module.run_pastebin()
108 |             elif module == 'otx':
109 |                 Module.run_otx()
110 |             elif module == 'knownbad':
111 |                 Module.run_knownbad()
112 |             else:
113 |                 reactor.status('warn', 'arcreactor', '%s is not a valid collection module' % module)
114 |         else:
115 |             reactor.status('warn', 'arcreactor', '%s is not a valid collection module' % module)
116 | 
117 |     #def kill_all(self):
118 |     #    """
119 |     #    Safely kill all running and queued jobs.
120 |     #
121 |     #    Ensures safe shutdown of ArcReactor jobs. This function is also registered as an
122 |     #    atexit function so it will be called everytime ArcReactor exits - whether by user
123 |     #    intervention or signal interrupts.
124 |     #
125 |     #    """
126 |     #    if len(job_stats) > 0:
127 |     #        for self.job in job_stats.keys():
128 |     #            reactor.status('info', 'arcreactor', 'killing %s' % self.job)
129 |     #            if self.job in self.running or self.queue:
130 |     #                self.queue[self.job].stop()
131 |     #                job_stats.remove(job)
132 |     #                return True
133 |     #    else:
134 |     #        reactor.status('info', 'arcreactor', 'no running jobs')
135 |     #        return False
136 | 
137 | 
138 |     #def kill_job(self, job):
139 |     #    """ Safely kill a specific running or queued job. """
140 |     #    if job in job_stats.keys():
141 |     #        reactor.status('info', 'arcreactor', 'stopping %s' % job)
142 |     #        if job in self.running or self.queue:
143 |     #            self.queue[job].stop()
144 |     #            job_stats.remove(job)
145 |     #            return True
146 |     #    reactor.status('info', 'arcreactor', '%s does not seem to exist' % job)
147 |     #    return False
148 | 
149 |     def get_stats(self, type='all'):
150 |         """
151 |         Gather statistics on running and queued jobs.
152 | 
153 |         Jobs.get_stats() interacts with the job_stats hash to pull down information
154 |         on running, paused and queued jobs. This function will only be called when the
155 |         user passes the console comamnd 'info tasks'. 
156 | 
157 |         """
158 |         if len(job_stats) > 0:
159 |             if type == 'all':
160 |                 for self.job_title in job_stats.keys():
161 |                     print('\n%s => ' % self.job_title)
162 |                     for self.key, self.value in job_stats[self.title].iteritems():
163 |                         print('{0:12}: \t {1:16}'.format(self.key, self.value))
164 |             elif type in job_stats.keys():
165 |                 print('\n%s => ' % type)
166 |                 for self.key, self.value in job_stats[type].iteritems():
167 |                     print('{0:12}: \t {1:16}'.format(self.key, self.value))
168 |             else:
169 |                 reactor.status('info', 'arcreactor', 'cannot find job %s' % type)
170 |         else:
171 |             reactor.status('info', 'arcreactor', 'no running or queued jobs')
172 | 
173 | 
174 | class Module:
175 |     def __init__(self):
176 |         self.running = 0
177 |         self.queued = 0
178 | 
179 |     def run_knownbad(self):
180 |         jobs_stats['knownbad'] = {
181 |             'status': 'running',
182 |             'started': str(datetime.now()).split('.')[0],
183 |             'message': 'loading sources',
184 |             'events': 0
185 |         }
186 |         if knownbad.load_sources():
187 |             for src in knownbad.sources:
188 |                 job_stats['knownbad'] = { 'message': 'gathering data from sources' }
189 |                 self.host, self.source = knownbad.gather_data(src)
190 |                 if not self.host == "":
191 |                     job_stats['knownbad'] = { 'message': 'sending syslog events' }
192 |                     self.cef = 'CEF:0|OSINT|ArcReactor|1.0|100|Known Malicious Host|1|src=%s msg=%s' % (self.host, self.source)
193 |                     reactor.send_syslog(self.cef)
194 |                     job_stats['knownbad'] = { 'events': job_stats['knownbad']['events'] + 1 }
195 |             job_stats['knownbad'] = { 'status': 'finished', 'message': 'finished successfully', 'ended': str(datetime.now()).split('.')[0] }
196 |         job_stats['knownbad'] = { 'message': 'finished with errors', 'ended': str(datetime.now()).split('.')[0] }
197 | 
198 |     def run_pastebin(self):
199 |         job_stats['pastebin'] = {
200 |             'status': 'running',
201 |             'started': str(datetime.now()).split('.')[0],
202 |             'message': 'loading keywords',
203 |             'events': 0
204 |         }
205 |         reactor.status('info', 'pastebin', 'launching pastebin module')
206 |         if pastebin.load_words():
207 |             job_stats['pastebin'] = { 'message': 'collecting post archive' }
208 |             pastebin.gather_archive()
209 |             if len(pastebin.queue) > 0:
210 |                 for post in pastebin.queue:
211 |                     job_stats['pastebin'] = { 'message': 'searching post %s' % post }
212 |                     # the search_raw function is called from within gather_content
213 |                     pastebin.gather_content(post)
214 |             job_stats['pastebin'] = { 'events': len(pastebin.found) }
215 |             if len(pastebin.found) > 0:
216 |                 for self.post_id, self.data in pastebin.found.iteritems():
217 |                     job_stats['pastebin'] = { 'message': 'sending syslog events' }
218 |                     self.cef = 'CEF:0|OSINT|ArcReactor|1.0|100|Watchlist Keyword Found|1|src=%s msg=%s' % (self.post_id, self.data)
219 |                     reactor.send_syslog(self.cef)
220 |             job_stats['pastebin'] = { 'status': 'finished', 'message': 'finished successfully', 'ended': str(datetime.now()).split('.')[0] }
221 |         job_stats['pastebin'] = { 'status': 'finished', 'message': 'finished with errors', 'ended': str(datetime.now()).split('.')[0] }
222 | 
223 |     def run_otx(self):
224 |         reactor.status('info', 'otx', 'launching otx module')
225 |         job_stats['otx'] = {
226 |             'status': 'running',
227 |             'started': str(datetime.now()).split('.')[0],
228 |             'message': 'loading keywords',
229 |             'events': 0
230 |         }
231 |         if otx.gather_data():
232 |             job_stats['otx'] = {'status': 'finished', 'message': 'finished successfully', 'ended': str(datetime.now()).split('.')[0] }
233 |             jobs_stats['otx'] = {'events': otx.count }
234 |         else:
235 |             job_stats['otx'] = { 'status': 'finished', 'message': 'finished with errors', 'ended': str(datetime.now()).split('.')[0] }
236 | 
237 | 
238 | 
239 | 
240 | 
241 | 
242 | 
243 | 
244 | 
245 | 
246 | 
247 | 
248 | 


--------------------------------------------------------------------------------
/lib/exploits.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | 
 3 | import reactor
 4 | 
 5 | names = []
 6 | 
 7 | def gather_exploits():
 8 |     sec_tracker = 'http://securitytracker.com/archives/summary/9000.html'
 9 |     try:
10 |         reactor.status('info', 'exploits', 'retrieving exploits from securitytracker.com')
11 |         req = reactor.http_request(sec_tracker)
12 |         if req is not None:
13 |             for line in req.split('\n'):
14 |                 if '<a href="/id/' in line:
15 |                     name = line.split('">')[1].split("</a>")[0]
16 |                     names.append(name)
17 |                 
18 | 


--------------------------------------------------------------------------------
/lib/facebook.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deadbits/arcreactor/7386585be8de74a1a6842c49e5c2b791bbce7dd2/lib/facebook.py


--------------------------------------------------------------------------------
/lib/kippo.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deadbits/arcreactor/7386585be8de74a1a6842c49e5c2b791bbce7dd2/lib/kippo.py


--------------------------------------------------------------------------------
/lib/knownbad.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | #
 3 | # 'Known Bad' collection module
 4 | # part of ArcReactor
 5 | #
 6 | # https://github.com/ohdae/arcreactor
 7 | #
 8 | 
 9 | import reactor
10 | import re
11 | 
12 | sources = []
13 | 
14 | ip_regex = re.compile(r"\b(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\.(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)\b")
15 | dom_regex = re.compile(r'([\d\w.][-\d\w.]{0,253}[\d\w.]+\.)+(AC|AD|AE|AERO|AF|AG|AI|AL|AM|AN|AO|AQ|AR|ARPA|AS|ASIA|AT|AU|AW|AX|AZ|BA|BB|BD|BE|BF|BG|BH|BI|BIZ|BJ|BM|BN|BO|BR|BS|BT|BV|BW|BY|BZ|CA|CAT|CC|CD|CF|CG|CH|CI|CK|CL|CM|CN|COM|COOP|CR|CU|CV|CX|CY|CZ|DE|DJ|DK|DM|DO|DZ|EC|EDU|EE|EG|ER|ES|ET|EU|FI|FJ|FK|FM|FO|FR|GA|GB|GD|GE|GF|GG|GH|GI|GL|GM|GN|GOV|GP|GQ|GR|GS|GT|GU|GW|GY|HK|HM|HN|HR|HT|HU|ID|IE|IL|IM|INFO|INT|IO|IQ|IR|IS|IT|JE|JM|JO|JOBS|JP|KE|KG|KH|KI|KM|KN|KP|KR|KW|KY|KZ|LA|LB|LC|LI|LK|LR|LS|LT|LU|LV|LY|MA|MC|MD|ME|MG|MH|MIL|MK|ML|MM|MN|MO|MOBI|MP|MQ|MR|MS|MT|MU|MUSEUM|MV|MW|MX|MY|MZ|NA|NAME|NC|NET|NF|NG|NI|NL|NO|NP|NR|NU|NZ|OM|ORG|PA|PE|PF|PG|PH|PK|PL|PM|PN|PR|PRO|PS|PT|PW|PY|QA|RE|RO|RS|RU|RW|SA|SB|SC|SD|SE|SG|SH|SI|SJ|SK|SL|SM|SN|SO|SR|ST|SU|SV|SY|SZ|TC|TD|TEL|TF|TG|TH|TJ|TK|TL|TM|TN|TO|TP|TR|TRAVEL|TT|TV|TW|TZ|UA|UG|UK|US|UY|UZ|VA|VC|VE|VG|VI|VN|VU|WF|WS|XN|XN|XN|XN|XN|XN|XN|XN|XN|XN|XN|YE|YT|YU|ZA|ZM|ZW)', re.IGNORECASE)
16 | comment_regex = re.compile ("#.*?\n")
17 | comment2_regex = re.compile ("//.*?\n")
18 | 
19 | def load_sources():
20 |     sources = reactor.load_sources(reactor.PATH_CONF+'/sources.cfg')
21 |     if len(sources) > 0:
22 |         reactor.status('info', 'known bad', '%d sources added to queue' % len(sources))
23 |         return True
24 |     return False
25 | 
26 | def gather_data(source):
27 |     try:
28 |         reactor.status('info', 'known bad', 'retrieving hosts from %s' % source)
29 |         raw = reactor.http_request(source)
30 |         if raw is not None:
31 |             data = re.findall(ip_regex, raw)
32 |             if data == "":
33 |                 data = re.findall(dom_regex, raw)
34 |             return data, source
35 |     except:
36 |         reactor.satus('warn', 'known bad', 'failed to retrieve hosts from %s' % source)
37 | 
38 | 
39 | 
40 | 


--------------------------------------------------------------------------------
/lib/otx.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | #
 3 | # AlienVault OTX collection module
 4 | # part of ArcReactor
 5 | #
 6 | # http://github.com/ohdae/arcreactor/
 7 | #
 8 | 
 9 | import reactor
10 | 
11 | count = 0
12 | 
13 | def gather_data():
14 |     try:
15 |         data = reactor.http_request('http://reputation.alienvault.com/reputation.snort')
16 |         if data is not None:
17 |             reactor.status('info', 'OTX', 'attempting to parse reputation data')
18 |             for line in data.split('\n'):
19 |                 if not line.startswith('#') or not len(line) == 0:
20 |                     try:
21 |                         d = line.split('#')
22 |                         addr, info = d[0], d[1]
23 |                         cef = 'CEF:0|OSINT|ArcReactor|1.0|100|%s|1|src=%s msg=%s' % (info, addr, 'http://reputation.alienvault.com/reputation.snort')
24 |                         reactor.status('info', 'OTX', 'sending CEF syslog for %s - %s' % (info, addr))
25 |                         reactor.send_syslog(cef)
26 |                         count += 1
27 |                     except IndexError:
28 |                         continue
29 |             reactor.status('info', 'OTX', 'sent %d total events' % count)
30 |             return True
31 |     except:
32 |         reactor.status('warn', 'OTX', 'failed to retrieve OTX database')
33 |         return False
34 | 
35 | 
36 | 
37 | 
38 | 
39 | 
40 | 
41 | 
42 | 
43 | 


--------------------------------------------------------------------------------
/lib/pastebin.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | #
 3 | # pastebin collection module
 4 | # part of ArcReactor
 5 | #
 6 | # ohdae [ams]
 7 | # http://github.com/ohdae/arcreactor/
 8 | #
 9 | 
10 | import reactor
11 | import re
12 | 
13 | archive = "http://www.pastebin.com/archive"
14 | raw = "http://pastebin.com/raw.php?i="
15 | 
16 | queue = []
17 | found = {}
18 | watch_list = []
19 | regex = re.compile('<td><img src="/i/t.gif" .*?<a href="/(.*?)">(.*?)</a></td>.*?<td>(.*?)</td>', re.S)
20 | 
21 | def load_words():
22 |     watch_list = reactor.load_keywords(reactor.PATH_CONF+'/keywords.cfg')
23 |     if len(watch_list) > 0:
24 |         reactor.status('info', 'pastebin', '%d keywords added to watch list' % (len(watch_list)))
25 |         return True
26 |     return False
27 | 
28 | def gather_archive():
29 |     try:
30 |         posts = reactor.http_request(archive)
31 |         posts = regex.findall(posts)
32 |         for p in posts:
33 |             post_id, post_title = p[0], p[1]
34 |             if post_id not in queue:
35 |                 reactor.status('info', 'pastebin', 'post id %s added to queue' % post_id)
36 |                 queue.append(post_id)
37 |         reactor.status('info', 'pastebin', 'total posts added to queue: %d' % len(queue))
38 |     except:
39 |         reactor.status('warn', 'pastebin', 'failed to fetch pastebin archive')
40 | 
41 | def gather_content(post_id):
42 |     try:
43 |         raw = reactor.http_request('http://pastebin.com/raw.php?i=%s' % post_id)
44 |         queue.remove(post_id)
45 |         if not 'Unknown Paste ID!' in raw and raw is not None:
46 |             reactor.status('info', 'pastebin', 'searching post id %s' % post_id)
47 |             if '\r\n' in raw:
48 |                 lines = raw.split('\r\n')
49 |                 for line in lines:
50 |                     search_raw(line, post_id)
51 |             else:
52 |                 search_raw(raw, post_id)
53 |     except:
54 |         reactor.status('warn', 'pastebin', 'failed to fetch post id %s' % post_id)
55 | 
56 | def search_raw(data, post_id):
57 |     for word in watch_list:
58 |         if word in data:
59 |             found[post_id] = data
60 |             reactor.status('info', 'pastebin', 'found %s in pastebin post %s' % (word, post_id))
61 | 
62 | 
63 | 
64 | 
65 | 


--------------------------------------------------------------------------------
/lib/pastebin.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deadbits/arcreactor/7386585be8de74a1a6842c49e5c2b791bbce7dd2/lib/pastebin.pyc


--------------------------------------------------------------------------------
/lib/reactor.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | #
  3 | # part of ArcReactor application
  4 | #
  5 | # this module includes some of the 
  6 | # core functionality ArcReactor uses
  7 | # throughout the application. handles
  8 | # things like logging, message output,
  9 | # syslog events, interacting with config
 10 | # files and some preliminary database
 11 | # interaction.
 12 | #
 13 | # TODO:
 14 | #   - move over finished json format function (testing/data/json.py)
 15 | #
 16 | 
 17 | 
 18 | import logging
 19 | import socket
 20 | import time
 21 | import os, sys
 22 | import ConfigParser
 23 | import signal
 24 | 
 25 | # define all our needed paths
 26 | PATH_HOME = '/opt/arcreactor'
 27 | PATH_LOGS = '/opt/arcreactor/var/logs'
 28 | PATH_DATA = '/opt/arcreactor/data'
 29 | PATH_CONF = '/opt/arcreactor/conf'
 30 | PATH_MODS = '/opt/arcreactor/lib'
 31 | PATH_HIST = '/opt/arcreactor/.console_history'
 32 | 
 33 | modules = {
 34 |     'pastebin': 'monitor Pastebin archive for custom keywords from your watch list',
 35 |     'otx': 'collect known malicious hosts and information from AlienVaults OTX reputation database',
 36 |     'twitter': 'monitor Twitter feeds for custom keywords from your watch list',
 37 |     'facebook': 'monitor Facebook posts for custom keywords from your watch list',
 38 |     'knownbad': 'scrapes dozens of public sources for known malicious IP addresses, domain names, open proxies, TOR exit nodes and other attacker information',
 39 |     'exploits': 'monitor exploit, malware and vulnerability trackers for new threats, CVEs and recently released exploits',
 40 |     'kippo': 'collect log and attacker information from your Kippo honeypots',
 41 |     'reddit': 'monitor Reddit posts and users for custom keywords from your watch list',
 42 |     'malware': 'scrape public sources for known malicious websites, exploit kit domains, phishing domains, malware file hashes and other malware related information'
 43 | }
 44 | 
 45 | config = ConfigParser.ConfigParser()
 46 | 
 47 | ascii = '''
 48 | 
 49 | 
 50 |   ______                       _______                                   __                         
 51 |  /      \                     /       \                                 /  |                        
 52 | /$$$$$$  |  ______    _______ $$$$$$$  |  ______    ______    _______  _$$ |_     ______    ______  
 53 | $$ |__$$ | /      \  /       |$$ |__$$ | /      \  /      \  /       |/ $$   |   /      \  /      \ 
 54 | $$    $$ |/$$$$$$  |/$$$$$$$/ $$    $$< /$$$$$$  | $$$$$$  |/$$$$$$$/ $$$$$$/   /$$$$$$  |/$$$$$$  |
 55 | $$$$$$$$ |$$ |  $$/ $$ |      $$$$$$$  |$$    $$ | /    $$ |$$ |        $$ | __ $$ |  $$ |$$ |  $$/ 
 56 | $$ |  $$ |$$ |      $$ \_____ $$ |  $$ |$$$$$$$$/ /$$$$$$$ |$$ \_____   $$ |/  |$$ \__$$ |$$ |      
 57 | $$ |  $$ |$$ |      $$       |$$ |  $$ |$$       |$$    $$ |$$       |  $$  $$/ $$    $$/ $$ |      
 58 | $$/   $$/ $$/        $$$$$$$/ $$/   $$/  $$$$$$$/  $$$$$$$/  $$$$$$$/    $$$$/   $$$$$$/  $$/       
 59 |                                                                                                     
 60 |                                     ArcReactor [version 1.0]
 61 |                                         ohdae - 2012
 62 |                                 https://github.com/ohdae/arcreactor
 63 | 
 64 | '''   
 65 | 
 66 | def start_logger():
 67 |     # setup our logger
 68 |     # TODO: add log rotation function
 69 |     debug_log = PATH_LOGS+'/reactor.log'
 70 |     if os.path.exists(debug_log):
 71 |         # remove this print. debug msg.
 72 |         print('[*] logs will be appened to %s' % debug_log)
 73 |         logging.basicConfig(filename=debug_log, filemode='a',
 74 |                         format='%(asctime)s %(levelname)s %(message)s',
 75 |                         datefmt='%H:%M:%S', level=logging.DEBUG)
 76 |         return True
 77 |     else:
 78 |         status('warn', 'arcreactor', 'log file does not exist.')
 79 |         return False
 80 | 
 81 | def signal_handler(signal, frame):
 82 |     status('info', 'arcreactor', 'Ctrl+C signal caught. shutting down ArcReactor')
 83 | 
 84 | def load_keywords(file_path):
 85 |     # basic function for loading all keyword based config files
 86 |     file_data = []
 87 |     if os.path.exists(file_path) is False:
 88 |         status('warn', 'arcreactor', 'unable to load %s' % file_path)
 89 |         return False
 90 |     status('info', 'arcreactor', 'loading contents of %s' % file_path)
 91 |     f = open(file_path, 'rb')
 92 |     for line in f.readlines():
 93 |         # skip any commented lines
 94 |         if line.startswith('#'): continue
 95 |         # skip any empty lines
 96 |         text = line.strip('\n')
 97 |         if len(text) == 0: continue
 98 |         file_data.append(text)
 99 |     f.close()
100 |     return file_data
101 | 
102 | def load_config(file_path):
103 |     opts = {}
104 |     # make sure the config file exists
105 |     if not os.path.exists(file_path):
106 |         return False
107 |     # utilize the ConfigParser module for easier parsing
108 |     config.read(file_path)
109 |     opts['siem_host'] = config.get('syslog', 'host')
110 |     opts['siem_port'] = config.getint('syslog', 'port')
111 |     opts['siem_name'] = config.get('syslog', 'name')
112 |     opts['siem_max'] = config.get('syslog', 'max')
113 |     return opts 
114 | 
115 | def load_sources(file_path):
116 |     # basic function for loading all www source config files
117 |     file_data = []
118 |     if os.path.exists(file_path) is False:
119 |         status('warn', 'arcreactor', 'unable to load %s' % file_path)
120 |         return False
121 |     status('info', 'arcreactor', 'loading contents of %s' % file_path)
122 |     f = open(file_path, 'rb')
123 |     for line in f.readlines():
124 |         # skip all commented lines
125 |         if line.startswith('#'): continue
126 |         # skip all empty lines
127 |         text = line.strip('\n')
128 |         if len(text) == 0: continue
129 |         if text.startswith('http'):
130 |             file_data.append(text)
131 |     f.close()
132 |     return file_data
133 | 
134 | def status(level, module, message):
135 |     msg = '%s - %s' % (module, message)
136 |     if level == 'warn':
137 |         print('[!] %s' % msg)
138 |         logging.warn(msg)
139 |     else:
140 |         print('[~] %s' % msg)
141 |         logging.info(msg)
142 | 
143 | def json_request(url):
144 |     try:
145 |         headers = {'User-Agent': 'ArcReactor - 1.0 (https://github.com/ohdae/arcreactor)'}
146 |         request = requests.get(url, headers=headers)
147 |         if request.status_code == 200:
148 |             return request.json
149 |         else:
150 |             status('warn', 'arcreactor', 'http request failed for url %s. returned status code %s' % (url, request.status_code))
151 |             return None
152 |     except:
153 |         status('warn', 'arcreactor', 'http request failed for url %s' % url)
154 |         return None
155 | 
156 | def http_request(url):
157 |     try:
158 |         headers = {'User-Agent': 'ArcReactor - 1.0 (https://github.com/ohdae/arcreactor)'}
159 |         request = requests.get(url, headers=headers)
160 |         if request.status_code == 200:
161 |             return request.content
162 |         else:
163 |             status('warn', 'arcreactor', 'http request failed for url %s. returned status code %s' % (url, request.status_code))
164 |             return None
165 |     except:
166 |         status('warn', 'arcreactor', 'http request failed for url %s' % url)
167 |         return None
168 | 
169 | def send_syslog(message):
170 |     # create socket for sending syslog events
171 |     sock = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
172 |     # 'notice' is the default event. 3 + 5 * 8
173 |     # change this is need be
174 |     data = '<%d>%s' % (29, message)
175 |     sock.sendto(data, (opts['siem_host'], int(opts['siem_port'])))
176 |     sock.close()
177 | 
178 | def test_syslog():
179 |     try:
180 |         send_syslog('DEBUG MESSAGE')
181 |         return True
182 |     except:
183 |         return False
184 | 
185 | 
186 | 
187 | 
188 | 
189 | 
190 | 


--------------------------------------------------------------------------------
/lib/reactor.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deadbits/arcreactor/7386585be8de74a1a6842c49e5c2b791bbce7dd2/lib/reactor.pyc


--------------------------------------------------------------------------------
/lib/reddit.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/deadbits/arcreactor/7386585be8de74a1a6842c49e5c2b791bbce7dd2/lib/reddit.py


--------------------------------------------------------------------------------
/standalone/otx-esm.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | #
 3 | # osint collection for AlienVault OTX reputation db
 4 | # stand-alone version from ArcReactor
 5 | #
 6 | # ohdae [ams]
 7 | # http://github.com/ohdae/ArcReactor/
 8 | #
 9 | 
10 | import requests
11 | import re, sys
12 | import socket
13 | 
14 | # define some stuff
15 | config = {
16 |     # alienvault's reputation db to use. i find snort format easier to parse
17 |     'otx': 'http://reputation.alienvault.com/reputation.snort',
18 |     # syslog host
19 |     'host': '127.0.0.1',
20 |     # syslog port
21 |     'port': '512'
22 | }
23 | count = 0
24 | 
25 | def send_syslog(msg):
26 |     sock = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
27 |     # %d in the syslog msg is the syslog level + facility * 8
28 |     # 29 is the default for notice + daemon * 8 or 5 + 3 * 8
29 |     # data = '<%d>%s' % (level + facility*8, message)
30 |     # change this if you feel the need
31 |     data = '<%d>%s' % (29, msg)
32 |     sock.sendto(data, (config['host'], int(config['port'])))
33 |     sock.close()
34 | 
35 | def gather_data():
36 |     # why anyone would use urllib when we have the requests lib, idk.
37 |     data = requests.get(config['otx']).content
38 |     try:
39 |         print("[~] attempting to parse reputation data...")
40 |         for line in data.split("\n"):
41 |             # we really dont need that format checking function.
42 |             # lets just look for comments and blank lines first then parse
43 |             if not line.startswith("#") and line != "":
44 |                 try:
45 |                     # snort format is: ip-address # message
46 |                     d = line.split("#")
47 |                     addr, info = d[0], d[1]
48 |                     print("[~] sending syslog event for %s - %s" % (info, addr))
49 |                     cef = 'CEF:0|OSINT|ArcReactor|1.0|100|%s|1|src=%s msg=%s' % (info, addr, config['otx'])
50 |                     send_syslog(cef)
51 |                     count += 1
52 |                 except IndexError:
53 |                     continue
54 |     except:
55 |         print("[!] error retrieving otx database")
56 |         sys.exit(1)
57 | 
58 | 
59 | print("\n\n")
60 | print("\t open-source data gathering ")
61 | print("\t   source >> alienvault.com   ")
62 | print("\t    author: ohdae [ams] ")
63 | print("\n\thttp://github.com/ohdae/arcreactor")
64 | print("\n\n")
65 | 
66 | print("[~] starting collecting of OTX reputation database...")
67 | # the alienvault otx db is updated every 60 minutes
68 | # if you want a constantly updated activelist in esm,
69 | # either run this script as a cronjob every hour or
70 | # change this to add a simple time.sleep(3600)
71 | # and repeat the gather_data() function
72 | gather_data()
73 | print("[*] collection complete.")
74 | print("[*] %d unique events sent." % count)
75 | 
76 | 
77 | 


--------------------------------------------------------------------------------
/standalone/pb_scrape.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python
  2 | #
  3 | # osint collection for pastebin
  4 | # stand-alone version of the scraper from ArcReactor
  5 | #
  6 | # ohdae [ams]
  7 | # http://github.com/ohdae/ArcReactor/
  8 | #
  9 | 
 10 | import requests
 11 | import re, sys
 12 | import time
 13 | 
 14 | # define some stuff
 15 | pb_archive = "http://www.pastebin.com/archive"
 16 | pb_raw = "http://pastebin.com/raw.php?i="
 17 | pb_regex = re.compile('<td><img src="/i/t.gif" .*?<a href="/(.*?)">(.*?)</a></td>.*?<td>(.*?)</td>', re.S)
 18 | pb_queue = []
 19 | found = {}
 20 | 
 21 | # this will eventually be changed to a config file where the user can define
 22 | # keywords to look out for, individually or in pairs. until then, edit this
 23 | # list to your liking.
 24 | # e.g.: 
 25 | # watch = 'hacked' + 'my company name'
 26 | # watch = 'myemail@address.here'
 27 | watch_words = ['hacked', 'corporate', 'owned', 'sql injection', 'password', 'leak', 'waffles']
 28 | 
 29 | def collect_posts():
 30 |     try:
 31 |         posts = pb_regex.findall(requests.get(pb_archive).content)
 32 |         for p in posts:
 33 |             paste_id, paste_title = p[0], p[1]
 34 |             if paste_id not in pb_queue:
 35 |                 print("[~] collected ")
 36 |                 print("    id: %s | title: %s" % (paste_id, paste_title))
 37 |                 pb_queue.append(paste_id)
 38 |         print("[~] total posts in queue: %d" % len(pb_queue))
 39 |     except:
 40 |         print("[!] problem fetching pastebin archive.")
 41 |         print("    check your network connection and try again.")
 42 | 
 43 | def search_content(post_id):
 44 |     try:
 45 |         raw = requests.get("%s%s" % (pb_raw, post_id)).content
 46 |         pb_queue.remove(post_id)
 47 |         if "\r\n" in raw:
 48 |             data = raw.split("\r\n")
 49 |             for line in data:
 50 |                 search_raw(line, post_id)
 51 |         else:
 52 |             search_raw(raw, post_id)
 53 |     except:
 54 |         print("[!] problem fetching post %s" % post_id)
 55 | 
 56 | def search_raw(data, post_id):
 57 |     for word in watch_words:
 58 |         if word in data:
 59 |             found[post_id] = data
 60 |             print("\n")
 61 |             print("[*] found:\t%s" % word)
 62 |             print("     post:\t%s" % post_id)
 63 |             print("     data:\t%s" % data)
 64 | 
 65 | def menu():
 66 |     print(" help  \tdisplay this command menu")
 67 |     print(" gather\tcollect new posts from pastebin.com/archive")
 68 |     print(" search\tsearch current post queue for keywords")
 69 |     print(" words \tview current keyword watch list")
 70 |     print(" posts \tview post queue information and entries")
 71 |     print(" found \tview all found data")
 72 |     print(" exit  \texit this application\n")
 73 | 
 74 | def main():
 75 |     menu()
 76 |     while True:
 77 |         r = raw_input("arc >> ")
 78 |         if r == "gather":
 79 |             collect_posts()
 80 |         elif r == "search":
 81 |             if pb_queue != "":
 82 |                 start_queue = len(pb_queue)
 83 |                 for post in pb_queue:
 84 |                     search_content(post)
 85 |                     print("\n[~] searched %s" % post)
 86 |                     time.sleep(0.5)
 87 |             else:
 88 |                 print("[!] post queue is currently empty. try running 'gather' first.")
 89 |         elif r == "words":
 90 |             print("[~] keyword watch list: ")
 91 |             for word in watch_words:
 92 |                 print word
 93 |         elif r == "posts":
 94 |             if pb_queue != "":
 95 |                 print("[~] total queued posts: %d" % len(pb_queue))
 96 |             else:
 97 |                 print("[!] post queue is currently empty. try running 'gather' first.")
 98 |         elif r == "exit" or r == "quit":
 99 |             print("[*] exiting application...")
100 |             sys.exit(0)
101 |         elif r == "found":
102 |             if found != "":
103 |                 for key, value in found.iteritems():
104 |                     print("\n")
105 |                     print("post id: %s" % key)
106 |                     print("   data: %s" % value)
107 |         elif r == "help":
108 |             menu()
109 |         else:
110 |             print("[!] this is not a valid command.")
111 | 
112 | 
113 | print("\n\n")
114 | print("\t open-source data gathering ")
115 | print("\t   source >> pastebin.com   ")
116 | print("\t    author: ohdae [ams] ")
117 | print("\n\thttp://github.com/ohdae/arcreactor")
118 | print("\n\n")
119 | main()


--------------------------------------------------------------------------------