├── .github
    └── FUNDING.yml
├── terraform
    ├── aws_config_file
    ├── boto_file
    ├── vars.tf
    ├── user-data.sh
    ├── example.yaml
    └── main.tf
├── docs
    └── images
    │   └── logo.png
├── gunslinger
    ├── rules
    │   └── example.py
    ├── launch.sh
    ├── requirements.txt
    ├── backends
    │   ├── outputs
    │   │   ├── slack_output.py
    │   │   └── http_output.py
    │   ├── sqs_backend.py
    │   ├── plugin_backend.py
    │   ├── processors
    │   │   ├── domain_processor.py
    │   │   └── urlscan_processor.py
    │   └── slack_backend.py
    ├── gunslinger.py
    └── inputs
    │   └── reloader.py
├── LICENSE
├── .gitignore
└── README.md


/.github/FUNDING.yml:
--------------------------------------------------------------------------------
1 | ko_fi: JacobPimental
2 | 


--------------------------------------------------------------------------------
/terraform/aws_config_file:
--------------------------------------------------------------------------------
1 | [default]
2 | region = ${region}
3 | output = json
4 | 


--------------------------------------------------------------------------------
/docs/images/logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/JacobPimental/gunslinger/HEAD/docs/images/logo.png


--------------------------------------------------------------------------------
/terraform/boto_file:
--------------------------------------------------------------------------------
1 | [Credentials]
2 | aws_access_key_id = ${access_key}
3 | aws_secret_access_key = ${secret}
4 | 


--------------------------------------------------------------------------------
/gunslinger/rules/example.py:
--------------------------------------------------------------------------------
 1 | import re
 2 | 
 3 | def run(**kwargs):
 4 |     reg1 = r'function ant_cockroach'
 5 |     reg2 = r'cc_number'
 6 |     reg3 = r'payment_checkout[0-9]'
 7 |     script = kwargs.get('script', '')
 8 |     ant_cockroach = len(re.findall(reg1, script)) > 0
 9 |     cc_number = len(re.findall(reg2, script)) > 0
10 |     checkout = len(re.findall(reg3, script)) > 0
11 |     return ant_cockroach and cc_number and checkout
12 | 


--------------------------------------------------------------------------------
/terraform/vars.tf:
--------------------------------------------------------------------------------
 1 | #DigitalOcean Variables
 2 | variable "digitalocean_token" {}
 3 | variable "server_pub_key" {}
 4 | variable "server_priv_key" {}
 5 | variable "server_region" { default="nyc1" }
 6 | 
 7 | # Whether or not to use Amazon SQS Message Queue
 8 | variable "use_sqs" { 
 9 | 	type=bool
10 | 	default=false 
11 | }
12 | 
13 | # Launch Script variables
14 | variable "rule_dir" { default="../gunslinger/rules/" }
15 | variable "num_workers" { default="" }
16 | 
17 | # Miscellaneous
18 | variable "aws_region" { default="us-east-1" }
19 | 


--------------------------------------------------------------------------------
/terraform/user-data.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | sudo apt update
 3 | sudo apt upgrade --yes --force-yes -o Dpkg::Options::="--force-confnew"
 4 | sudo apt install python3-pip --yes --force-yes
 5 | cd /opt/
 6 | while [[ ! -d gunslinger ]]
 7 | do
 8 | 	:
 9 | done
10 | cd gunslinger
11 | while [[ ! -d gunslinger_rules ]]
12 | do
13 | 	:
14 | done
15 | while [[ -z $(ls -A gunslinger_rules) ]]
16 | do
17 | 	:
18 | done
19 | sudo pip3 install -r requirements.txt
20 | sudo chmod +x launch.sh
21 | echo "Running ./launch.sh ${num_workers}"
22 | ./launch.sh ${num_workers}
23 | 


--------------------------------------------------------------------------------
/gunslinger/launch.sh:
--------------------------------------------------------------------------------
 1 | NUM_WORKERS=5
 2 | CONFIG_FILE="-c gunslinger.yaml"
 3 | 
 4 | while [[ $# -gt 0 ]]
 5 | do
 6 | 	case "$1" in
 7 | 		-c|--config-file)
 8 | 			QUEUE_CHANNEL="-c $2 "
 9 | 			shift
10 | 			shift
11 | 			;;
12 | 
13 | 		-t|--num_workers)
14 | 			NUM_WORKERS="$2"
15 | 			shift
16 | 			shift
17 | 			;;
18 | 	esac
19 | done
20 | 
21 | mkdir logs
22 | 
23 | chmod +x inputs/*
24 | 
25 | for f in inputs/*
26 | do
27 | 	cmd="./$f $CONFIG_FILE"
28 | 	bn=$(basename $f)
29 | 	nohup $cmd &
30 | done
31 | sleep 60
32 | gunslinger_cmd="python3 gunslinger.py $CONFIG_FILE"
33 | echo $gunslinger_cmd
34 | for i in $(seq 1 $NUM_WORKERS)
35 | do
36 | 	nohup $gunslinger_cmd &
37 | 	if [ -z "$SQS_URL" ]
38 | 	then
39 | 		sleep 60
40 | 	fi
41 | done
42 | 
43 | 


--------------------------------------------------------------------------------
/gunslinger/requirements.txt:
--------------------------------------------------------------------------------
 1 | aiohttp==3.7.4
 2 | appdirs==1.4.4
 3 | APScheduler==3.6.3
 4 | async-timeout==3.0.1
 5 | attrs==19.3.0
 6 | beautifulsoup4==4.9.1
 7 | boto3==1.12.31
 8 | botocore==1.15.31
 9 | bs4==0.0.1
10 | certifi==2019.11.28
11 | chardet==3.0.4
12 | cssselect==1.1.0
13 | docutils==0.15.2
14 | fake-useragent==0.1.11
15 | idna==2.9
16 | jmespath==0.9.5
17 | lxml==4.6.3
18 | multidict==4.7.5
19 | parse==1.15.0
20 | pluginbase==1.0.0
21 | pyee==7.0.2
22 | pyppeteer==0.2.2
23 | pyquery==1.4.1
24 | python-dateutil==2.8.1
25 | pytz==2019.3
26 | PyYAML==5.4
27 | requests==2.23.0
28 | s3transfer==0.3.3
29 | six==1.14.0
30 | slackclient==2.5.0
31 | soupsieve==2.0.1
32 | tqdm==4.46.0
33 | tzlocal==2.0.0
34 | urllib3==1.26.5
35 | w3lib==1.22.0
36 | websockets==9.1
37 | yarl==1.4.2
38 | 


--------------------------------------------------------------------------------
/terraform/example.yaml:
--------------------------------------------------------------------------------
 1 | processors:
 2 |   urlscan_processor:
 3 |     api_key: "<api-key-for-urlscan>"
 4 |   domain_processor:
 5 |     timeout: 10 # timeout value for requests
 6 | outputs:
 7 |   - name: "slack_output"
 8 |     slack_token: "<slack-api-key>"
 9 |     channel: "<channel to log hits to>"
10 | queue_data:
11 |   slack_token: "<slack-api-key for message queue>"
12 |   channel: "<channel for message queue>"
13 |   rate_limit: 15 # Amount in seconds to wait when hitting rate limit
14 | inputs:
15 |   urlscan_input:
16 |     query: "<query to run for urlscan>"
17 |     urlscan_key: "<urlscan api key>"
18 |     cron: "<cron schedule to run query>"
19 |     num_workers: 5 # number of gunslinger agents that will pull results from MQ
20 | rule_dir: '<path to directory containing rules>'
21 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2020 Jacob Pimental
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/gunslinger/backends/outputs/slack_output.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | import slack
 3 | import yaml
 4 | 
 5 | class SlackHandler():
 6 | 
 7 |     def __init__(self, **kwargs):
 8 |         logging.getLogger(__name__)
 9 |         channel = kwargs.get('channel', 'mq')
10 |         slack_token = kwargs.get('slack_token', '')
11 |         self.client = slack.WebClient(token=slack_token)
12 |         logging.info(f'Channel is {channel}')
13 |         self.channel = self.get_channel(channel)
14 | 
15 | 
16 |     def get_channel(self, channel):
17 |         """Gets ID of Slack channel.
18 | 
19 |         Arguments:
20 |             channel (str): channel to get ID of
21 | 
22 |         Returns:
23 |             str: Channel ID
24 |         """
25 |         logging.info(f'Getting channel {channel}')
26 |         channels = self.client.conversations_list()
27 |         for slack_channel in channels['channels']:
28 |             if slack_channel['name'] == channel:
29 |                 return slack_channel['id']
30 |         raise Exception('Channel does not exist')
31 | 
32 | 
33 |     def post_message(self, text, **kwargs):
34 |         """Posts message to Slack
35 | 
36 |         Arguments:
37 |             text (str): message to send
38 |             channel (str, optional): channel to send the message to
39 |             reaction (str, optional): Slack reaction code to add to message
40 | 
41 |         Returns:
42 |             (dict): Message response object from Slack API
43 |         """
44 |         channel = kwargs.get('channel', self.channel)
45 |         reaction = kwargs.get('reaction', '')
46 |         message_response = self.client.chat_postMessage(channel=channel,
47 |                                                         text=text)
48 |         return message_response
49 | 
50 | 
51 | def run(output_data, config_data):
52 |     slack_handler = SlackHandler(**config_data)
53 |     try:
54 |         output_str = 'Hit!:gun:\n'+yaml.dump(output_data)
55 |         slack_handler.post_message(output_str)
56 |     except Exception as e:
57 |         logging.error(e)
58 | 


--------------------------------------------------------------------------------
/gunslinger/backends/sqs_backend.py:
--------------------------------------------------------------------------------
 1 | import boto3
 2 | import logging
 3 | 
 4 | class AWS_SQS():
 5 | 
 6 |     def __init__(self, **kwargs):
 7 |         logging.getLogger(__name__)
 8 |         self.sqs = boto3.client('sqs')
 9 |         self.url = kwargs.get('url', '')
10 | 
11 | 
12 |     def post_message(self, text, **kwargs):
13 |         """Posts message to SQS.
14 | 
15 |         Arguments:
16 |             text (str): Text to send to SQS
17 | 
18 |         Returns:
19 |             dict: Response object from SQS
20 |         """
21 |         while True:
22 |             try:
23 |                 response = self.sqs.send_message(QueueUrl=self.url,
24 |                                                  MessageBody=text,
25 |                                                  MessageGroupId='gunslinger_group')
26 |                 break
27 |             except Exception as e:
28 |                 logging.error(e)
29 |                 continue
30 |         return response
31 | 
32 | 
33 |     def get_next_message(self, **kwargs):
34 |         """Gets next message from the queue and deletes it.
35 | 
36 |         Returns:
37 |             list: list of results from SQS
38 |             int: numeric 0 to comply with Gunslinger logic
39 |         """
40 |         while True:
41 |             try:
42 |                 response = self.sqs.receive_message(QueueUrl=self.url,
43 |                                                     MaxNumberOfMessages=1)
44 |                 break
45 |             except Exception as e:
46 |                 logging.error(e)
47 |                 continue
48 | 
49 |         messages = response.get('Messages', [])
50 |         if len(messages) == 0:
51 |             return [], 0
52 |         message = messages[0]
53 |         self.sqs.delete_message(QueueUrl=self.url,
54 |                                 ReceiptHandle=message['ReceiptHandle'])
55 |         message_body = message['Body']
56 |         if 'gunslinger' in message_body:
57 |             return [], 0
58 |         dat = message_body.strip().split('\n')[1:]
59 |         return dat, 0
60 | 


--------------------------------------------------------------------------------
/gunslinger/backends/outputs/http_output.py:
--------------------------------------------------------------------------------
 1 | import requests
 2 | import logging
 3 | import json
 4 | 
 5 | class HTTPOutputHandler():
 6 | 
 7 |     def __init__(self, **kwargs):
 8 |         logging.getLogger(__name__)
 9 |         try:
10 |             self.headers = kwargs.get('headers', {})
11 |             self.headers['content-type'] = 'application/json'
12 |             self.fields = kwargs.get('fields', [])
13 |             self.method = kwargs.get('method', 'POST')
14 |             self.endpoint = kwargs['url']
15 |         except Exception as exc:
16 |             logging.critical(f'HTTP Output configured incorrectly: {exc}')
17 | 
18 | 
19 |     def get_field(self, data, field):
20 |         if isinstance(data, dict):
21 |             if field in data.keys():
22 |                 yield data[field]
23 |             else:
24 |                 for key in data.keys():
25 |                     for val in self.get_field(data[key], field):
26 |                         yield val
27 |         elif isinstance(data, list):
28 |             for dat in data:
29 |                 for val in self.get_field(dat, field):
30 |                     yield val
31 | 
32 | 
33 |     def create_data(self, data):
34 |         if not 'results' in data:
35 |             return
36 |         num_results = len(data['results'])
37 |         report_data = {'results':[{} for _ in range(num_results)]}
38 |         for field in self.fields:
39 |             field_data = list(self.get_field(data, field))
40 |             print(field_data)
41 |             if len(field_data) < num_results:
42 |                 field_data += [''] * (num_results - len(field_data))
43 |             for i in range(num_results):
44 |                 print(field_data[i])
45 |                 report_data['results'][i][field] = field_data[i]
46 |         return report_data
47 | 
48 | 
49 |     def send_data(self, data):
50 |         report_data = self.create_data(data)
51 |         response = requests.request(self.method, self.endpoint,
52 |                                     data=json.dumps(report_data),
53 |                                     headers=self.headers)
54 |         if not response.ok:
55 |             status = response.status_code
56 |             logging.error(f'ERROR sending data to {self.endpoint}: {status} :'\
57 |                           ' {response.text}')
58 | 
59 | 
60 | def run(output_data, config_info):
61 |     http_handler = HTTPOutputHandler(**config_info)
62 |     try:
63 |         http_handler.send_data(output_data)
64 |     except Exception as exc:
65 |         logging.error(exc)
66 | 


--------------------------------------------------------------------------------
/gunslinger/backends/plugin_backend.py:
--------------------------------------------------------------------------------
 1 | from pluginbase import PluginBase
 2 | import os
 3 | import sys
 4 | import logging
 5 | 
 6 | class PluginManager():
 7 | 
 8 |     def __init__(self, **kwargs):
 9 |         logging.getLogger(__name__)
10 |         package = kwargs.get('package', 'rule_backend.plugins')
11 |         self._plugin_base = PluginBase(package=package)
12 |         self._plugin_dir = kwargs.get('plugin_dir', '.')
13 |         plugin_path = self.get_path(self._plugin_dir)
14 |         self._source = self._plugin_base.make_plugin_source(
15 |             searchpath=[plugin_path])
16 | 
17 | 
18 |     def get_path(self, directory):
19 |         """Gets path of rule directory relative to working directory.
20 | 
21 |         Arguments:
22 |             directory (str): Path to the directory of rule files
23 | 
24 |         Returns:
25 |             str: A string of the absolute path to the rule directory
26 |         """
27 |         here = os.path.abspath(os.getcwd())
28 |         return os.path.join(here, directory)
29 | 
30 | 
31 |     def run_rules(self, **kwargs):
32 |         """Runs rules via python plugins.
33 | 
34 |         Returns:
35 |             list: List of all rules that returned True
36 |         """
37 |         fired_rules = []
38 |         for plugin_name in self._source.list_plugins():
39 |             logging.info(f'Running rule {plugin_name}')
40 |             rule = self._source.load_plugin(plugin_name)
41 |             try:
42 |                 rule_fired = rule.run(**kwargs)
43 |                 if rule_fired:
44 |                     fired_rules.append(plugin_name)
45 |             except Exception as e:
46 |                 logging.error(f'Cannot run rule {plugin_name} ' \
47 |                               '(possibly formatted incorrectly)')
48 |                 logging.error(e)
49 |         return fired_rules
50 | 
51 | 
52 |     def run_processor(self, processor_name, processor_data, config_info,
53 |                       rule_manager):
54 |         plugin = self._source.load_plugin(processor_name)
55 |         try:
56 |             returned_data = plugin.run(data=processor_data,
57 |                                        config_info=config_info,
58 |                                        rule_manager=rule_manager)
59 |             return returned_data
60 |         except Exception as e:
61 |             logging.error(f'Cannot run processor {processor_name} ' \
62 |                           '(possible misconfigured)')
63 |             logging.error(e)
64 |             return {}
65 | 
66 | 
67 |     def run_output(self, output_name, output_data, config_info):
68 |         plugin = self._source.load_plugin(output_name)
69 |         try:
70 |             plugin.run(output_data, config_info)
71 |         except Exception as e:
72 |             logging.error(f'Cannot run output {output_name} ' \
73 |                           '(possibly misconfigured)')
74 |             logging.error(e)
75 | 


--------------------------------------------------------------------------------
/gunslinger/backends/processors/domain_processor.py:
--------------------------------------------------------------------------------
 1 | import gc
 2 | import logging
 3 | from bs4 import BeautifulSoup
 4 | import requests
 5 | import hashlib
 6 | 
 7 | logger = logging.getLogger(__name__)
 8 | 
 9 | def url_thread(url, timeout=10):
10 |     clean_url = url.replace('<', '').replace('>', '')
11 | 
12 |     if '|' in clean_url:
13 |         clean_url = clean_url[:clean_url.index('|')]
14 | 
15 |     if (not clean_url.startswith('http://') and not
16 |             clean_url.startswith('https://')):
17 |         clean_url = 'http://' + clean_url
18 |     logger.info(f'Getting scripts at {clean_url}')
19 |     r = requests.head(clean_url, allow_redirects=True,
20 |                       timeout=timeout)
21 | 
22 |     if not 'text/html' in r.headers['Content-Type']:
23 |         return []
24 |     r = requests.get(clean_url, timeout=timeout)
25 |     soup = BeautifulSoup(r.content.decode('ISO-8859-1')
26 |                          , "lxml")
27 |     scripts = soup.find_all('script', {'src':True})
28 |     logger.info(f'Found {len(scripts)} at url')
29 |     soup.decompose()
30 |     data = [clean_url]
31 | 
32 |     for script in scripts:
33 |         script_src = script['src']
34 | 
35 |         if script_src[:2] == '//':
36 |             script_src = 'http:' + script_src
37 |         elif (not script_src.startswith('http://') and not
38 |               script_src.startswith('https://')):
39 |             script_src = 'http://' + script_src
40 |         data.append(script_src)
41 |     logger.info(f'Got {len(data)} script(s)')
42 | 
43 |     return data
44 | 
45 | 
46 | def get_js_content(js_dat, rule_manager, timeout=10):
47 |     scripts_found = []
48 | 
49 |     for url in js_dat:
50 |         logger.info(f'Getting script at {url}')
51 |         try:
52 |             r = requests.get(url, timeout=timeout)
53 |             logger.info('Running rules')
54 |             fired_rules = rule_manager.run_rules(
55 |                 script=r.content.decode('ISO-8859-1'))
56 | 
57 |             if fired_rules:
58 |                 sha256_hash = hashlib.sha256(r.content).hexdigest()
59 |                 scripts_found.append({'url':url,
60 |                                       'fired_rules':fired_rules,
61 |                                       'hash':sha256_hash})
62 |         except Exception as e:
63 |             logger.error(e)
64 | 
65 |             continue
66 |         gc.collect()
67 |         logger.info(f'Got script at {url}')
68 | 
69 |     return scripts_found
70 | 
71 | 
72 | def run(**kwargs):
73 |     urls = kwargs.get('data', [])
74 |     timeout = kwargs.get('config_info',
75 |                          {'timeout':10}).get('timeout', 10)
76 |     rule_manager = kwargs.get('rule_manager')
77 |     report_data = {'results':[]}
78 | 
79 |     for url in urls:
80 |         js_urls = url_thread(url, timeout)
81 |         found_scripts = get_js_content(js_urls, rule_manager, timeout)
82 | 
83 |         if found_scripts:
84 |             for script_data in found_scripts:
85 |                 report = script_data
86 |                 report['submitted_url'] = url
87 |                 report_data['results'].append(report)
88 |         else:
89 |             del found_scripts
90 |     if report_data['results']:
91 |         return report_data
92 |     return None
93 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | pip-wheel-metadata/
 24 | share/python-wheels/
 25 | *.egg-info/
 26 | .installed.cfg
 27 | *.egg
 28 | MANIFEST
 29 | 
 30 | # PyInstaller
 31 | #  Usually these files are written by a python script from a template
 32 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 33 | *.manifest
 34 | *.spec
 35 | 
 36 | # Installer logs
 37 | pip-log.txt
 38 | pip-delete-this-directory.txt
 39 | 
 40 | # Unit test / coverage reports
 41 | htmlcov/
 42 | .tox/
 43 | .nox/
 44 | .coverage
 45 | .coverage.*
 46 | .cache
 47 | nosetests.xml
 48 | coverage.xml
 49 | *.cover
 50 | *.py,cover
 51 | .hypothesis/
 52 | .pytest_cache/
 53 | 
 54 | # Translations
 55 | *.mo
 56 | *.pot
 57 | 
 58 | # Django stuff:
 59 | *.log
 60 | local_settings.py
 61 | db.sqlite3
 62 | db.sqlite3-journal
 63 | 
 64 | # Flask stuff:
 65 | instance/
 66 | .webassets-cache
 67 | 
 68 | # Scrapy stuff:
 69 | .scrapy
 70 | 
 71 | # Sphinx documentation
 72 | docs/_build/
 73 | 
 74 | # PyBuilder
 75 | target/
 76 | 
 77 | # Jupyter Notebook
 78 | .ipynb_checkpoints
 79 | 
 80 | # IPython
 81 | profile_default/
 82 | ipython_config.py
 83 | 
 84 | # pyenv
 85 | .python-version
 86 | 
 87 | # pipenv
 88 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 89 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 90 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 91 | #   install all needed dependencies.
 92 | #Pipfile.lock
 93 | 
 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
 95 | __pypackages__/
 96 | 
 97 | # Celery stuff
 98 | celerybeat-schedule
 99 | celerybeat.pid
100 | 
101 | # SageMath parsed files
102 | *.sage.py
103 | 
104 | # Environments
105 | .env
106 | .venv
107 | env/
108 | venv/
109 | ENV/
110 | env.bak/
111 | venv.bak/
112 | 
113 | # Spyder project settings
114 | .spyderproject
115 | .spyproject
116 | 
117 | # Rope project settings
118 | .ropeproject
119 | 
120 | # mkdocs documentation
121 | /site
122 | 
123 | # mypy
124 | .mypy_cache/
125 | .dmypy.json
126 | dmypy.json
127 | 
128 | # Pyre type checker
129 | .pyre/
130 | 
131 | # Local .terraform directories
132 | **/.terraform/*
133 | 
134 | # .tfstate files
135 | *.tfstate
136 | *.tfstate.*
137 | 
138 | # Crash log files
139 | crash.log
140 | 
141 | # Ignore any .tfvars files that are generated automatically for each Terraform run. Most
142 | # .tfvars files are managed as part of configuration and so should be included in
143 | # version control.
144 | #
145 | *.tfvars
146 | 
147 | # Ignore override files as they are usually used to override resources locally and so
148 | # are not checked in
149 | override.tf
150 | override.tf.json
151 | *_override.tf
152 | *_override.tf.json
153 | 
154 | # Other terraform files
155 | .terraform.lock.hcl
156 | versions.tf
157 | 
158 | # Include override files you do wish to add to version control using negated pattern
159 | #
160 | # !example_override.tf
161 | 
162 | # Include tfplan files to ignore the plan output of command: terraform plan -out=tfplan
163 | # example: *tfplan*
164 | 
165 | # Custom Rules
166 | proprietary_rules/
167 | *.log
168 | gunslinger.yaml
169 | 


--------------------------------------------------------------------------------
/terraform/main.tf:
--------------------------------------------------------------------------------
  1 | provider "aws" {
  2 | 	region = var.aws_region
  3 | }
  4 | provider "digitalocean" {
  5 | 	token = var.digitalocean_token
  6 | }
  7 | 
  8 | resource "aws_iam_user" "sqs_user" {
  9 | 	name = "sqs_reader"
 10 | 	path = "/"
 11 | 	count = var.use_sqs == false ? 0 : 1
 12 | }
 13 | 
 14 | resource "aws_iam_user_policy" "sqs_user_policy" {
 15 | 	name = "sqs_gunslinger_policy"
 16 | 	user = aws_iam_user.sqs_user.0.name
 17 | 	policy = <<EOF
 18 | {
 19 |     "Version": "2012-10-17",
 20 |     "Statement": [
 21 |         {
 22 |             "Sid": "VisualEditor0",
 23 |             "Effect": "Allow",
 24 |             "Action": [
 25 |                 "sqs:DeleteMessage",
 26 |                 "sqs:ReceiveMessage",
 27 |                 "sqs:SendMessage"
 28 |             ],
 29 |             "Resource": "${aws_sqs_queue.message_queue.0.arn}"
 30 |         }
 31 |     ]
 32 | }
 33 | EOF
 34 | 	count = var.use_sqs == false ? 0 : 1
 35 | 	depends_on = [aws_sqs_queue.message_queue]
 36 | }
 37 | 
 38 | resource "aws_iam_access_key" "sqs_user_key" {
 39 |   user = aws_iam_user.sqs_user.0.name
 40 | 	count = var.use_sqs == false ? 0 : 1
 41 | }
 42 | 
 43 | resource "aws_sqs_queue" "message_queue" {
 44 | 	name = "gunslinger_queue.fifo"
 45 |   fifo_queue = true	
 46 |   content_based_deduplication = true
 47 | 	receive_wait_time_seconds = 20
 48 |   count = var.use_sqs == false ? 0 : 1
 49 | }
 50 | 
 51 | output "sqs_id" {
 52 | 	value = aws_sqs_queue.message_queue.*.id
 53 | }
 54 | 
 55 | output "sqs_arn" {
 56 | 	value = aws_sqs_queue.message_queue.*.arn
 57 | }
 58 | 
 59 | 
 60 | 
 61 | resource "digitalocean_ssh_key" "key" {
 62 | 	name = "Gunsinger Key"
 63 | 	public_key = file(var.server_pub_key)
 64 | }
 65 | 
 66 | resource "digitalocean_droplet" "server" {
 67 | 	image = "ubuntu-20-04-x64"
 68 | 	name = "gunslinger"
 69 | 	region = var.server_region
 70 | 	size = "s-1vcpu-1gb"
 71 | 	ssh_keys = [digitalocean_ssh_key.key.fingerprint]
 72 | #	depends_on = [ aws_sqs_queue.message_queue,
 73 | #								 aws_iam_access_key.sqs_user_key]
 74 | 
 75 | 	user_data = templatefile("user-data.sh", {
 76 | 		num_workers = var.num_workers != "" ? format("-t %s ", var.num_workers) : ""})
 77 | 
 78 | 	provisioner "remote-exec" {
 79 | 		inline = ["sudo mkdir -p /opt/gunslinger/gunslinger_rules",
 80 | 							"sudo mkdir ~/.aws"]
 81 | 
 82 | 		connection {
 83 | 			user = "root"
 84 | 			private_key = file(var.server_priv_key)
 85 | 			host = digitalocean_droplet.server.ipv4_address
 86 | 		}
 87 | 	}
 88 | 	
 89 | 	provisioner "file" {
 90 | 		source = "${dirname(path.cwd)}/gunslinger/"
 91 | 		destination = "/opt/gunslinger"
 92 | 		connection {
 93 | 			user = "root"
 94 | 			private_key = file(var.server_priv_key)
 95 | 			host = digitalocean_droplet.server.ipv4_address
 96 | 		}
 97 | 	}
 98 | 
 99 | 	provisioner "file" {
100 | 		content = templatefile("boto_file", {
101 | 			access_key = var.use_sqs == false ? "" : aws_iam_access_key.sqs_user_key.0.id,
102 | 			secret = var.use_sqs == false ? "" : aws_iam_access_key.sqs_user_key.0.secret})
103 | 		destination = "~/.boto"
104 | 		connection {
105 | 			user = "root"
106 | 			private_key = file(var.server_priv_key)
107 | 			host = digitalocean_droplet.server.ipv4_address
108 | 		}
109 | 	}
110 | 
111 | 	provisioner "file" {
112 | 		content = templatefile("aws_config_file", {
113 | 			region = var.aws_region})
114 | 		destination = "~/.aws/config"
115 | 		connection {
116 | 			user = "root"
117 | 			private_key = file(var.server_priv_key)
118 | 			host = digitalocean_droplet.server.ipv4_address
119 | 		}
120 | 	}
121 | 
122 | 	provisioner "file" {
123 | 		content = templatefile("gunslinger.yaml", {
124 | 			sqs_url = var.use_sqs == false ? "" : aws_sqs_queue.message_queue.0.id})
125 | 		destination = "/opt/gunslinger/gunslinger.yaml"
126 | 		connection {
127 | 			user = "root"
128 | 			private_key = file(var.server_priv_key)
129 | 			host = digitalocean_droplet.server.ipv4_address
130 | 		}
131 | 	}
132 | 
133 | 	provisioner "file" {
134 | 		source = substr(var.rule_dir, length(var.rule_dir)-1, 1) == "/" ? var.rule_dir : format("%s/", var.rule_dir)
135 | 		destination = "/opt/gunslinger/gunslinger_rules"
136 | 		connection {
137 | 			user = "root"
138 | 			private_key = file(var.server_priv_key)
139 | 			host = digitalocean_droplet.server.ipv4_address
140 | 		}
141 | 	}
142 | }
143 | 
144 | output "ip" {
145 | 	value = digitalocean_droplet.server.ipv4_address
146 | }
147 | 


--------------------------------------------------------------------------------
/gunslinger/backends/processors/urlscan_processor.py:
--------------------------------------------------------------------------------
  1 | import requests
  2 | import logging
  3 | 
  4 | logger = logging.getLogger(__name__)
  5 | 
  6 | class URLScanProcessor():
  7 | 
  8 |     def __init__(self, config_data, rule_manager):
  9 |         api_key = config_data.get('api_key', '')
 10 |         self.header = {'Content-Type': 'application/json',
 11 |                        'Api-Key': api_key}
 12 |         self.rule_manager = rule_manager
 13 | 
 14 | 
 15 |     def parse_search_results(self, results):
 16 |         """Parses the results of the search to look for magecart.
 17 | 
 18 |         Arguments:
 19 |             results (array): Array of object results from URLScan
 20 |         """
 21 |         report_data = {'results':[]}
 22 | 
 23 |         for result in results:
 24 |             try:
 25 |                 # Contains the URLScan info for URL
 26 |                 url = result.replace('<', '').replace('>', '')
 27 |                 web_requests, submitted_url, urlscan_url = self.get_requests(url)
 28 |                 scripts_found = self.parse_requests(web_requests)
 29 | 
 30 |                 if scripts_found:
 31 |                     for script_data in scripts_found:
 32 |                         report = script_data
 33 |                         report['submitted_url'] = submitted_url
 34 |                         report['urlscan_url'] = urlscan_url
 35 |                         report_data['results'].append(report)
 36 |             except Exception as e:
 37 |                 logger.error(e)
 38 | 
 39 |                 continue
 40 |         if report_data['results']:
 41 |             return report_data
 42 |         return None
 43 | 
 44 | 
 45 |     def get_requests(self, url):
 46 |         """Gets the requests a URL makes when a webpage is loaded
 47 | 
 48 |         Arguments:
 49 |             url (string): Url of the URLScan page for the scanned website
 50 | 
 51 |         Returns:
 52 |             array: Array of request objects from URLScan API
 53 |             dict: Dict containing the original URL submitted to URLScan
 54 |                   and the URLScan report
 55 |         """
 56 |         result_dat = requests.get(url, headers=self.header, timeout=10).json()
 57 | 
 58 |         if not 'data' in result_dat.keys() or not 'task' in result_dat.keys():
 59 |             return ([], '', '')
 60 |         submitted_url = result_dat['task']['url']
 61 |         urlscan_url = result_dat['task']['reportURL']
 62 | 
 63 |         return result_dat['data']['requests'], submitted_url, urlscan_url
 64 | 
 65 | 
 66 |     def get_response(self, response, h):
 67 |         """Gets the data returned from a request made by a webpage.
 68 | 
 69 |         Arguments:
 70 |             response (dict): URLScan response object
 71 |             h (str): Hash of the response, used to get the url for the response
 72 |                 from URLScan
 73 | 
 74 |         Returns:
 75 |             string: The data returned by the request (i.e. scripts, html, etc.)
 76 |         """
 77 |         logger.info(f'Getting hash {h}')
 78 |         script = ''
 79 |         response = response['response']
 80 |         url = f'https://urlscan.io/responses/{h}/' #URLScan response URL
 81 |         script_r = requests.get(url, timeout=10)
 82 | 
 83 |         if script_r.status_code == 200:
 84 |             script = script_r.text
 85 | 
 86 |         return script
 87 | 
 88 | 
 89 |     def parse_requests(self, requests):
 90 |         """Parses the requests made by a webpage to look for Magecart.
 91 | 
 92 |         Arguments:
 93 |             requests (array): Array of objects contianing data on the request
 94 |                 made
 95 |         """
 96 |         scripts_found = []
 97 | 
 98 |         for request in requests:
 99 |             try:
100 |                 response = request['response'] #Get the response for each request
101 |                 h = response['hash']
102 |                 script = self.get_response(response, h)
103 |                 url = response['response']['url']
104 |                 fired_rules = self.rule_manager.run_rules(script=script,
105 |                                                           response_data=response)
106 | 
107 |                 if fired_rules:
108 |                     logger.info(f'Rule fired on {url}')
109 |                     script_data = {'url':url, 'hash':h,
110 |                                    'fired_rules':fired_rules}
111 |                     scripts_found.append(script_data)
112 |             except Exception as e:
113 |                 logger.error(e)
114 | 
115 |                 continue
116 | 
117 |         return scripts_found
118 | 
119 | def run(**kwargs):
120 |     config_data = kwargs.get('config_info')
121 |     rule_manager = kwargs.get('rule_manager')
122 |     urlscan_processor = URLScanProcessor(config_data, rule_manager)
123 |     rule_data = urlscan_processor.parse_search_results(kwargs.get('data',
124 |                                                                   []))
125 | 
126 |     return rule_data
127 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | ![](docs/images/logo.png)
 2 | # Gunslinger
 3 | "The man in black fled across the desert, and the gunslinger followed."<br>
 4 | \- Stephen King, The Gunslinger<br><br>
 5 | Gunslinger is a hunting tool that is based around URLScan's Search API. Gunslinger can crawl URLScan for JavaScript files that match a set of user-defined rules and reports the information back to Slack.
 6 | 
 7 | ## Usage
 8 | Gunslinger can be deployed via the Terraform modules in the `terraform` directory. If you have Terraform installed you can deploy the script by running `terraform apply` while in the directory.
 9 | ### Terraform Variables
10 | ```
11 | #Digital Ocean Variables
12 | digitalocean_token: Used to deploy the Gunslinger server to DigitalOcean
13 | server_pub_key: Path to the public SSH key for the server
14 | server_priv_key: Path to the private SSH key for the server (Used to scp scripts once deployed)
15 | server_region: DigitalOcean region to deploy the server to (default: nyc1)
16 | 
17 | #API Keys
18 | slack_token: API Key for the Slack Bot that will be used
19 | urlscan_api_key: API Key to connect to URLScan
20 | 
21 | #Launch Script Variables
22 | rule_dir: Path to directory where custom rules are stored (default: "../gunslinger/rules")
23 | num_workers: Number of gunslinger workers to spin up (default: 5)
24 | queue_channel: Slack Channel to use as a Message Queue (default: mq)
25 | urlscan_query: Query to use for URLScan Search API (default: *)
26 | cron: Cron schedule for the reloader module (note: replace asterisks with underscores; default: _ _ _ _ _ (every minute))
27 | 
28 | #Miscellaneous
29 | aws_region: Region to deploy any AWS resources (default: us-east-1)
30 | use_sqs: Whether or not to use AWS SQS for the Message Queue (default: false)
31 | ```
32 | 
33 | ## Python Modules
34 | ### Reloader Module
35 | This module will reach out to URLScan's search API on a specified cron schedule, split up the results between the number of gunslinger worker, and post that info the the Message Queue.
36 | #### Usage:
37 | ```
38 | usage: reloader.py [-h] -u URLSCAN_KEY [-s SLACK_TOKEN] [-c QUEUE_CHANNEL] [-q QUERY] [-cr CRON CRON CRON CRON CRON] [-w NUM_WORKERS] [-a SQS_URL]
39 | 
40 | optional arguments:
41 |   -h, --help            show this help message and exit
42 |   -u URLSCAN_KEY, --urlscan_key URLSCAN_KEY
43 |                         URLScan API key
44 |   -s SLACK_TOKEN, --slack_token SLACK_TOKEN
45 |                         Slack Token
46 |   -c QUEUE_CHANNEL, --queue_channel QUEUE_CHANNEL
47 |                         Message Queue Channel (Default: 5)
48 |   -q QUERY, --query QUERY
49 |                         URLScan query (Default: *)
50 |   -cr CRON CRON CRON CRON CRON, --cron CRON CRON CRON CRON CRON
51 |                         Cron job for searches to run on (Default: _ _ _ _ _)
52 |   -w NUM_WORKERS, --num_workers NUM_WORKERS
53 |                         Number of gunslinger works to divy tasks(Default: 5)
54 |   -a SQS_URL, --sqs_url SQS_URL
55 |                         AWS SQS Url (optional)
56 | ```
57 | ### Gunslinger Module
58 | This is the main worker module that will analyze jobs from the Reloader module and post results back to Slack. It is driven by a set of user-defined rules to perform its analysis.
59 | #### Usage:
60 | ```
61 | usage: gunslinger.py [-h] -u URLSCAN_KEY -s SLACK_TOKEN [-c QUEUE_CHANNEL] [-d RULE_DIR] [-a SQS_URL]
62 | 
63 | optional arguments:
64 |   -h, --help            show this help message and exit
65 |   -u URLSCAN_KEY, --urlscan_key URLSCAN_KEY
66 |                         URLScan API key
67 |   -s SLACK_TOKEN, --slack_token SLACK_TOKEN
68 |                         Slack Token
69 |   -c QUEUE_CHANNEL, --queue_channel QUEUE_CHANNEL
70 |                         Message Queue Channel (Default: mq)
71 |   -d RULE_DIR, --rule_dir RULE_DIR
72 |                         Directory containing python plugins to be used as rules (Default: ./rules)
73 |   -a SQS_URL, --sqs_url SQS_URL
74 |                         URL of AWS SQS service (optional)
75 | ```
76 | 
77 | ## Rule Creation
78 | Gunslinger is driven y a set of user-defined Python modules that act as rules. This way the user has free reign over how to handle information. All modules must be contained in one directory (`rules` by default) and must have a function named `run` that will be called when analyzing scripts. The arguments passed to this function will be a string called `script` containing the script that was found by URLScan's API and a JSON object called `response_data` which contains the data returned from URLScan's API (see URLScan's API [documentation](https://urlscan.io/about-api/) for more info).
79 | ### Example:
80 | This is a very basic example that will catch an older Magecart sample from e4[.]ms/c1.js. You can see that the module contains the function`run` that accepts the arguments via `kwargs`. The function will also only return `True` or `False`.
81 | ```
82 | import re
83 | 
84 | def run(**kwargs):
85 |     reg1 = r'function ant_cockroach'
86 |     reg2 = r'cc_number'
87 |     reg3 = r'payment_checkout[0-9]'
88 |     script = kwargs.get('script', '')
89 |     ant_cockroach = len(re.findall(reg1, script)) > 0
90 |     cc_number = len(re.findall(reg2, script)) > 0
91 |     checkout = len(re.findall(reg3, script)) > 0
92 |     return ant_cockroach and cc_number and checkout
93 | ```
94 | 


--------------------------------------------------------------------------------
/gunslinger/gunslinger.py:
--------------------------------------------------------------------------------
  1 | import time
  2 | import sys
  3 | import argparse
  4 | from datetime import datetime as dt, timedelta as td
  5 | import json
  6 | import os
  7 | import logging
  8 | import yaml
  9 | from backends.slack_backend import Slack_MQ
 10 | from backends.sqs_backend import AWS_SQS
 11 | from backends.plugin_backend import PluginManager
 12 | 
 13 | class Gunslinger():
 14 |     """Main class for Gunslinger application.
 15 | 
 16 |     Attributes:
 17 |         header (dict): Header for communication with URLScan API
 18 |         payload (dict): Search parameters for URLScan API
 19 | 
 20 |     Arguments:
 21 |         urlscan_key (string): API key for urlscan
 22 |         slack_token (string): Slack token for channel you want to send data to
 23 |         query (string, optional): Query you want to use against URLScan
 24 |         num_results (int, optional): Number of search results to return
 25 |     """
 26 | 
 27 |     def __init__(self, **kwargs):
 28 |         self.config_info = self.read_config_file(kwargs.get('config_file'))
 29 |         rule_directory = self.config_info.get('rule_dir', '.')
 30 |         self.rule_manager = PluginManager(package='gunslinger.rules',
 31 |                                           plugin_dir=rule_directory)
 32 |         processor_directory = self.config_info.get('processor_dir',
 33 |                                                    './backends/processors')
 34 |         self.proc_manager = PluginManager(package='gunslinger.processors',
 35 |                                           plugin_dir=processor_directory)
 36 |         out_dir = self.config_info.get('output_plugin_dir',
 37 |                                        './backends/outputs')
 38 |         self.out_manager = PluginManager(package='gunslinger.outputs',
 39 |                                          plugin_dir=out_dir)
 40 |         mq_type = self.config_info.get('message_queue', 'slack_mq')
 41 |         queue_data = self.config_info.get('queue_data', {})
 42 | 
 43 |         if mq_type == 'slack_mq':
 44 |             self.message_queue = Slack_MQ(**queue_data)
 45 |         elif mq_type == 'aws_sqs':
 46 |             self.message_queue = AWS_SQS(**queue_data)
 47 | 
 48 | 
 49 |     def read_config_file(self, config_file):
 50 |         here = os.path.abspath(os.getcwd())
 51 |         config_path = os.path.join(here, config_file)
 52 |         try:
 53 |             with open(config_path) as f:
 54 |                 config_data = yaml.load(f, Loader=yaml.FullLoader)
 55 | 
 56 |                 return config_data
 57 |         except FileNotFoundError:
 58 |             logging.critical("Config file not found")
 59 |             sys.exit()
 60 | 
 61 | 
 62 |     def report(self, report_data):
 63 |         """Reports on urls that rules fired on.
 64 | 
 65 |         Arguments:
 66 |             report_data (dict): dictionary of data to report on
 67 |         """
 68 | 
 69 |         for output in self.config_info['outputs']:
 70 |             output_name = output['name']
 71 |             self.out_manager.run_output(output_name,
 72 |                                         report_data,
 73 |                                         output)
 74 |         del report_data
 75 | 
 76 | 
 77 |     def parse_message(self, data):
 78 |         processor_name = data.get('processor', '')
 79 | 
 80 |         if not processor_name:
 81 |             return
 82 |         logging.info(f'Loading processor {processor_name}')
 83 |         processor_data = data.get('data', {})
 84 |         config_info = self.config_info.get(processor_name, {})
 85 |         returned_data = self.proc_manager.run_processor(
 86 |             processor_name, processor_data,
 87 |             config_info, self.rule_manager)
 88 | 
 89 |         if returned_data:
 90 |             self.report(returned_data)
 91 | 
 92 | 
 93 |     def run(self):
 94 |         """Starts the application."""
 95 |         logging.info('“The man in black fled across the desert, and the ' \
 96 |               'gunslinger followed.”')
 97 |         logging.info('\t― Stephen King, The Gunslinger')
 98 |         prev_time = 0
 99 |         latest = dt.now().timestamp()
100 | 
101 |         while True:
102 |             data, prev_time = self.message_queue.get_next_message(
103 |                 oldest=prev_time,
104 |                 latest=latest)
105 |             latest = (dt.fromtimestamp(float(prev_time)) + td(hours=1)).timestamp()
106 | 
107 |             if dt.fromtimestamp(latest) > dt.now():
108 |                 latest = dt.now().timestamp()
109 |             elif prev_time == 0:
110 |                 latest = dt.now().timestamp()
111 |             try:
112 |                 json_data = json.loads(data)
113 |                 self.parse_message(json_data)
114 |             except Exception as e:
115 |                 logging.error(e)
116 |                 logging.info('Sleeping')
117 |                 time.sleep(self.config_info['queue_data'].get('rate_limit', 0))
118 | 
119 |                 continue
120 | 
121 | 
122 | if __name__ == '__main__':
123 |     if not os.path.exists('logs'):
124 |         os.mkdir('logs')
125 |     pid = os.getpid()
126 |     logging.getLogger(__name__)
127 |     logging.basicConfig(filename=f'logs/gunslinger_{pid}.log',
128 |                         level=logging.INFO,
129 |                         format='%(asctime)s:%(levelname)s:%(name)s:%(message)s')
130 |     parser = argparse.ArgumentParser()
131 |     parser.add_argument('-c', '--config-file',
132 |                         help='Path to config file (default: gunslinger.yaml)',
133 |                         default='gunslinger.yaml')
134 |     args = parser.parse_args()
135 |     gunslinger = Gunslinger(**vars(args))
136 |     gunslinger.run()
137 | 


--------------------------------------------------------------------------------
/gunslinger/inputs/reloader.py:
--------------------------------------------------------------------------------
  1 | #!/bin/python3
  2 | 
  3 | import sys
  4 | import argparse
  5 | from datetime import datetime as dt
  6 | import os
  7 | import math
  8 | import json
  9 | import yaml
 10 | import logging
 11 | 
 12 | import requests
 13 | from apscheduler.schedulers.blocking import BlockingScheduler
 14 | from apscheduler.triggers.cron import CronTrigger
 15 | 
 16 | BASE_PATH = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
 17 | sys.path.append(BASE_PATH)
 18 | 
 19 | from backends.slack_backend import Slack_MQ
 20 | from backends.sqs_backend import AWS_SQS
 21 | 
 22 | class Reloader():
 23 | 
 24 |     def __init__(self, **kwargs):
 25 |         self.config_info = self.read_config_file(kwargs.get('config_file'))
 26 |         data = self.config_info.get('inputs',
 27 |                                     {'urlscan_input': {}})['urlscan_input']
 28 |         # UrlScan API info
 29 |         self._query = data.get('query', '*')
 30 |         api_key = data.get('urlscan_key', '')
 31 |         self.header = {'Content-Type': 'application/json',
 32 |                        'Api-Key': api_key}
 33 |         self.payload = {'size':10000,
 34 |                         'sort':'date'}
 35 |         self.prev_time = dt.utcnow()
 36 |         self.cron = data.get('cron', '* * * * *')
 37 |         self.num_workers = data.get('num_workers', 5)
 38 |         queue_type = self.config_info.get('message_queue', '')
 39 |         queue_data = self.config_info.get('queue_data', {})
 40 | 
 41 |         if queue_type == 'slack_mq':
 42 |             self.message_queue = Slack_MQ(**queue_data)
 43 |         elif queue_type == 'aws_sqs':
 44 |             self.message_queue = AWS_SQS(**queue_data)
 45 |         else:
 46 |             logging.critical('Error: No message queue specified!')
 47 |             sys.exit()
 48 | 
 49 | 
 50 |     def read_config_file(self, config_file):
 51 |         here = os.path.abspath(os.getcwd())
 52 |         config_path = os.path.join(here, config_file)
 53 |         try:
 54 |             with open(config_path) as f:
 55 |                 config_data = yaml.load(f, Loader=yaml.FullLoader)
 56 | 
 57 |                 return config_data
 58 |         except FileNotFoundError:
 59 |             logging.critical('config file not found')
 60 |             exit()
 61 | 
 62 | 
 63 |     def get_results(self, prev_time):
 64 |         """Gets results of search from URLScan
 65 | 
 66 |         Returns:
 67 |             array: Array of objects containing search results
 68 |         """
 69 |         try:
 70 |             past_time = prev_time.strftime(r'%Y-%m-%dT%H\:%M\:%S.%fZ')
 71 |             self.payload['q'] = f'({self._query}) AND date:>{past_time}'
 72 |             search_results = requests.get('https://urlscan.io/api/v1/search/',
 73 |                                           headers=self.header,
 74 |                                           params=self.payload)
 75 |             search_dat = search_results.json()
 76 |             results = search_dat.get('results', [])
 77 | 
 78 |             return results
 79 |         except Exception as e:
 80 |             logging.error(e)
 81 | 
 82 |             return []
 83 | 
 84 | 
 85 |     def parse_search_results(self, results):
 86 |         """Sends a list of results to Slack MessageQueue for processing.
 87 | 
 88 |         Arguments:
 89 |             results (array): rray of object results from URLScan
 90 |         """
 91 |         result_urls = [result.get('result') for result in results]
 92 |         div = math.ceil(len(result_urls) / self.num_workers)
 93 | 
 94 |         for i in range(self.num_workers):
 95 |             result_data = result_urls[i*div:(i+1)*div]
 96 | 
 97 |             if not result_data:
 98 |                 continue
 99 |             processor_data = {'processor':'urlscan_processor',
100 |                               'data': result_urls[i*div:(i+1)*div]}
101 |             text_data = json.dumps(processor_data)
102 | 
103 |             if text_data != "":
104 |                 msg = text_data
105 |                 self.message_queue.post_message(msg)
106 | 
107 | 
108 |     def search_job(self):
109 |         """Job that runs to fetch the next set of URLScan results."""
110 |         logging.info('Getting results')
111 |         search_results = self.get_results(self.prev_time)
112 | 
113 |         if len(search_results) == 0:
114 |             return
115 |         self.prev_time = dt.strptime(search_results[0]['task']['time'],
116 |                                      '%Y-%m-%dT%H:%M:%S.%fZ')
117 |         self.parse_search_results(search_results)
118 | 
119 | 
120 |     def run(self):
121 |         """Starts the application."""
122 |         msg = 'The man in black fled across the desert, and the ' \
123 |               'gunslinger followed\n\t- Stephen King, The Gunslinger'
124 |         self.message_queue.post_message(msg, reaction='gun')
125 |         scheduler = BlockingScheduler()
126 |         scheduler.add_job(self.search_job, CronTrigger.from_crontab(self.cron))
127 | 
128 |         while True:
129 |             scheduler.start()
130 | 
131 | 
132 | if __name__ == '__main__':
133 |     if not os.path.exists('logs'):
134 |         os.mkdir('logs')
135 |     PID = os.getpid()
136 |     logging.getLogger(__name__)
137 |     logging.basicConfig(filename=f'logs/reloader_{PID}.log',
138 |                         level=logging.DEBUG,
139 |                         format='%(asctime)s:%(levelname)s:%(name)s:%(message)s')
140 |     PARSER = argparse.ArgumentParser()
141 |     PARSER.add_argument('-c', '--config-file',
142 |                         help='Path to config file (default: '\
143 |                         'gunslinger.yaml)',
144 |                         default='gunslinger.yaml')
145 |     ARGS = PARSER.parse_args()
146 |     RELOADER = Reloader(**vars(ARGS))
147 |     RELOADER.run()
148 | 


--------------------------------------------------------------------------------
/gunslinger/backends/slack_backend.py:
--------------------------------------------------------------------------------
  1 | import time
  2 | import slack
  3 | import logging
  4 | 
  5 | class Slack_MQ():
  6 | 
  7 |     def __init__(self, **kwargs):
  8 |         logging.getLogger(__name__)
  9 |         queue_channel = kwargs.get('channel', 'mq')
 10 |         slack_token = kwargs.get('slack_token', '')
 11 |         self.client = slack.WebClient(token=slack_token)
 12 |         logging.info(f'Channel is {queue_channel}')
 13 |         self.channel = self.get_channel(queue_channel)
 14 | 
 15 | 
 16 |     def get_channel(self, channel):
 17 |         """Gets ID of Slack channel.
 18 | 
 19 |         Arguments:
 20 |             channel (str): channel to get ID of
 21 | 
 22 |         Returns:
 23 |             str: Channel ID
 24 |         """
 25 |         logging.info(f'Getting channel {channel}')
 26 |         channels = self.client.conversations_list()
 27 |         for slack_channel in channels['channels']:
 28 |             if slack_channel['name'] == channel:
 29 |                 return slack_channel['id']
 30 |         raise Exception('Channel does not exist')
 31 | 
 32 | 
 33 |     def post_message(self, text, **kwargs):
 34 |         """Posts message to Slack
 35 | 
 36 |         Arguments:
 37 |             text (str): message to send
 38 |             channel (str, optional): channel to send the message to
 39 |             reaction (str, optional): Slack reaction code to add to message
 40 | 
 41 |         Returns:
 42 |             (dict): Message response object from Slack API
 43 |         """
 44 |         channel = kwargs.get('channel', self.channel)
 45 |         reaction = kwargs.get('reaction', '')
 46 |         message_response = self.client.chat_postMessage(channel=channel,
 47 |                                                         text=text)
 48 |         if reaction:
 49 |             self.react_message(message_response['ts'],
 50 |                                reaction,
 51 |                                channel)
 52 |         return message_response
 53 | 
 54 | 
 55 |     def react_message(self, ts, reaction, channel=''):
 56 |         """Reacts to a Slack message
 57 | 
 58 |         Arguments:
 59 |             ts (str): timestamp of message to react to
 60 |             reaction (str): Slack reaction code
 61 |             channel (str, optional): channel that message is in
 62 | 
 63 |         Returns:
 64 |             (dict): Reaction response object from Slack API
 65 |         """
 66 |         if channel == '':
 67 |             channel = self.channel
 68 |         reaction_response = self.client.reactions_add(channel=channel,
 69 |                                                       name=reaction,
 70 |                                                       timestamp=ts)
 71 |         return reaction_response
 72 | 
 73 | 
 74 |     def get_next_message(self, **kwargs):
 75 |         """Gets next message in queue and reacts to it to mark it as taken
 76 | 
 77 |         Arguments:
 78 |             oldest (str, optional): timestamp of oldest message to check
 79 |             latest (str, optional): timestamp of latest message to check
 80 |             cursor (str, optional): cursor code used for pagination
 81 | 
 82 |         Returns:
 83 |             (str, str): message text and timestamp of message
 84 |         """
 85 |         oldest = kwargs.get('oldest', 0)
 86 |         latest = kwargs.get('latest', '')
 87 |         cursor = kwargs.get('cursor', '')
 88 |         try:
 89 |             r = self.client.conversations_history(channel=self.channel,
 90 |                                                   limit=999,
 91 |                                                   oldest=str(oldest),
 92 |                                                   latest=str(latest),
 93 |                                                   inclusive=1,
 94 |                                                   cursor=cursor)
 95 |             data = r.data
 96 |             messages = data['messages']
 97 |             i = 0
 98 |             for i in range(len(messages)-1):
 99 |                 m = messages[i]
100 |                 if 'reactions' in messages[i+1] and m['text'][0] == '{' \
101 |                    and not 'reactions' in m.keys():
102 |                     ts = m['ts']
103 |                     self.client.reactions_add(channel=self.channel,
104 |                                               name='+1',
105 |                                               timestamp=ts)
106 |                     oldest = ts
107 |                     dat = m['text'].strip()
108 |                     return dat, oldest
109 |                 if 'reactions' in m.keys():
110 |                     return [], 0
111 |             if 'reactions' in messages[i] and \
112 |                messages[i]['text'][0] == '{':
113 |                 ts = messages[i]['ts']
114 |                 self.client.reactions_add(channel=self.channel,
115 |                                           name='+1',
116 |                                           timestamp=ts)
117 |                 oldest = ts
118 |                 dat = m['text'].strip()
119 |                 return dat, oldest
120 |             if 'response_metadata' in data.keys() and \
121 |                'next_cursor' in data['response_metadata'].keys():
122 |                 logging.info('Getting next cursor')
123 |                 cursor = data['response_metadata']['next_cursor']
124 |                 return self.get_next_message(oldest=oldest,
125 |                                              latest=latest,
126 |                                              cursor=cursor)
127 |             return [], latest
128 |         except Exception as e:
129 |             logging.error(e)
130 |             if 'response' in dir(e):
131 |                 r = e.response
132 |                 logging.error(r)
133 |                 time.sleep(60)
134 |                 if r['error'] == 'ratelimited':
135 |                     return self.get_next_message(oldest=oldest,
136 |                                                  latest=latest,
137 |                                                  cursor=cursor)
138 |                 return [], 0
139 |             return [], oldest
140 | 


--------------------------------------------------------------------------------