├── .github └── FUNDING.yml ├── terraform ├── aws_config_file ├── boto_file ├── vars.tf ├── user-data.sh ├── example.yaml └── main.tf ├── docs └── images │ └── logo.png ├── gunslinger ├── rules │ └── example.py ├── launch.sh ├── requirements.txt ├── backends │ ├── outputs │ │ ├── slack_output.py │ │ └── http_output.py │ ├── sqs_backend.py │ ├── plugin_backend.py │ ├── processors │ │ ├── domain_processor.py │ │ └── urlscan_processor.py │ └── slack_backend.py ├── gunslinger.py └── inputs │ └── reloader.py ├── LICENSE ├── .gitignore └── README.md /.github/FUNDING.yml: -------------------------------------------------------------------------------- 1 | ko_fi: JacobPimental 2 | -------------------------------------------------------------------------------- /terraform/aws_config_file: -------------------------------------------------------------------------------- 1 | [default] 2 | region = ${region} 3 | output = json 4 | -------------------------------------------------------------------------------- /docs/images/logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/JacobPimental/gunslinger/HEAD/docs/images/logo.png -------------------------------------------------------------------------------- /terraform/boto_file: -------------------------------------------------------------------------------- 1 | [Credentials] 2 | aws_access_key_id = ${access_key} 3 | aws_secret_access_key = ${secret} 4 | -------------------------------------------------------------------------------- /gunslinger/rules/example.py: -------------------------------------------------------------------------------- 1 | import re 2 | 3 | def run(**kwargs): 4 | reg1 = r'function ant_cockroach' 5 | reg2 = r'cc_number' 6 | reg3 = r'payment_checkout[0-9]' 7 | script = kwargs.get('script', '') 8 | ant_cockroach = len(re.findall(reg1, script)) > 0 9 | cc_number = len(re.findall(reg2, script)) > 0 10 | checkout = len(re.findall(reg3, script)) > 0 11 | return ant_cockroach and cc_number and checkout 12 | -------------------------------------------------------------------------------- /terraform/vars.tf: -------------------------------------------------------------------------------- 1 | #DigitalOcean Variables 2 | variable "digitalocean_token" {} 3 | variable "server_pub_key" {} 4 | variable "server_priv_key" {} 5 | variable "server_region" { default="nyc1" } 6 | 7 | # Whether or not to use Amazon SQS Message Queue 8 | variable "use_sqs" { 9 | type=bool 10 | default=false 11 | } 12 | 13 | # Launch Script variables 14 | variable "rule_dir" { default="../gunslinger/rules/" } 15 | variable "num_workers" { default="" } 16 | 17 | # Miscellaneous 18 | variable "aws_region" { default="us-east-1" } 19 | -------------------------------------------------------------------------------- /terraform/user-data.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | sudo apt update 3 | sudo apt upgrade --yes --force-yes -o Dpkg::Options::="--force-confnew" 4 | sudo apt install python3-pip --yes --force-yes 5 | cd /opt/ 6 | while [[ ! -d gunslinger ]] 7 | do 8 | : 9 | done 10 | cd gunslinger 11 | while [[ ! -d gunslinger_rules ]] 12 | do 13 | : 14 | done 15 | while [[ -z $(ls -A gunslinger_rules) ]] 16 | do 17 | : 18 | done 19 | sudo pip3 install -r requirements.txt 20 | sudo chmod +x launch.sh 21 | echo "Running ./launch.sh ${num_workers}" 22 | ./launch.sh ${num_workers} 23 | -------------------------------------------------------------------------------- /gunslinger/launch.sh: -------------------------------------------------------------------------------- 1 | NUM_WORKERS=5 2 | CONFIG_FILE="-c gunslinger.yaml" 3 | 4 | while [[ $# -gt 0 ]] 5 | do 6 | case "$1" in 7 | -c|--config-file) 8 | QUEUE_CHANNEL="-c $2 " 9 | shift 10 | shift 11 | ;; 12 | 13 | -t|--num_workers) 14 | NUM_WORKERS="$2" 15 | shift 16 | shift 17 | ;; 18 | esac 19 | done 20 | 21 | mkdir logs 22 | 23 | chmod +x inputs/* 24 | 25 | for f in inputs/* 26 | do 27 | cmd="./$f $CONFIG_FILE" 28 | bn=$(basename $f) 29 | nohup $cmd & 30 | done 31 | sleep 60 32 | gunslinger_cmd="python3 gunslinger.py $CONFIG_FILE" 33 | echo $gunslinger_cmd 34 | for i in $(seq 1 $NUM_WORKERS) 35 | do 36 | nohup $gunslinger_cmd & 37 | if [ -z "$SQS_URL" ] 38 | then 39 | sleep 60 40 | fi 41 | done 42 | 43 | -------------------------------------------------------------------------------- /gunslinger/requirements.txt: -------------------------------------------------------------------------------- 1 | aiohttp==3.7.4 2 | appdirs==1.4.4 3 | APScheduler==3.6.3 4 | async-timeout==3.0.1 5 | attrs==19.3.0 6 | beautifulsoup4==4.9.1 7 | boto3==1.12.31 8 | botocore==1.15.31 9 | bs4==0.0.1 10 | certifi==2019.11.28 11 | chardet==3.0.4 12 | cssselect==1.1.0 13 | docutils==0.15.2 14 | fake-useragent==0.1.11 15 | idna==2.9 16 | jmespath==0.9.5 17 | lxml==4.6.3 18 | multidict==4.7.5 19 | parse==1.15.0 20 | pluginbase==1.0.0 21 | pyee==7.0.2 22 | pyppeteer==0.2.2 23 | pyquery==1.4.1 24 | python-dateutil==2.8.1 25 | pytz==2019.3 26 | PyYAML==5.4 27 | requests==2.23.0 28 | s3transfer==0.3.3 29 | six==1.14.0 30 | slackclient==2.5.0 31 | soupsieve==2.0.1 32 | tqdm==4.46.0 33 | tzlocal==2.0.0 34 | urllib3==1.26.5 35 | w3lib==1.22.0 36 | websockets==9.1 37 | yarl==1.4.2 38 | -------------------------------------------------------------------------------- /terraform/example.yaml: -------------------------------------------------------------------------------- 1 | processors: 2 | urlscan_processor: 3 | api_key: "" 4 | domain_processor: 5 | timeout: 10 # timeout value for requests 6 | outputs: 7 | - name: "slack_output" 8 | slack_token: "" 9 | channel: "" 10 | queue_data: 11 | slack_token: "" 12 | channel: "" 13 | rate_limit: 15 # Amount in seconds to wait when hitting rate limit 14 | inputs: 15 | urlscan_input: 16 | query: "" 17 | urlscan_key: "" 18 | cron: "" 19 | num_workers: 5 # number of gunslinger agents that will pull results from MQ 20 | rule_dir: '' 21 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2020 Jacob Pimental 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /gunslinger/backends/outputs/slack_output.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import slack 3 | import yaml 4 | 5 | class SlackHandler(): 6 | 7 | def __init__(self, **kwargs): 8 | logging.getLogger(__name__) 9 | channel = kwargs.get('channel', 'mq') 10 | slack_token = kwargs.get('slack_token', '') 11 | self.client = slack.WebClient(token=slack_token) 12 | logging.info(f'Channel is {channel}') 13 | self.channel = self.get_channel(channel) 14 | 15 | 16 | def get_channel(self, channel): 17 | """Gets ID of Slack channel. 18 | 19 | Arguments: 20 | channel (str): channel to get ID of 21 | 22 | Returns: 23 | str: Channel ID 24 | """ 25 | logging.info(f'Getting channel {channel}') 26 | channels = self.client.conversations_list() 27 | for slack_channel in channels['channels']: 28 | if slack_channel['name'] == channel: 29 | return slack_channel['id'] 30 | raise Exception('Channel does not exist') 31 | 32 | 33 | def post_message(self, text, **kwargs): 34 | """Posts message to Slack 35 | 36 | Arguments: 37 | text (str): message to send 38 | channel (str, optional): channel to send the message to 39 | reaction (str, optional): Slack reaction code to add to message 40 | 41 | Returns: 42 | (dict): Message response object from Slack API 43 | """ 44 | channel = kwargs.get('channel', self.channel) 45 | reaction = kwargs.get('reaction', '') 46 | message_response = self.client.chat_postMessage(channel=channel, 47 | text=text) 48 | return message_response 49 | 50 | 51 | def run(output_data, config_data): 52 | slack_handler = SlackHandler(**config_data) 53 | try: 54 | output_str = 'Hit!:gun:\n'+yaml.dump(output_data) 55 | slack_handler.post_message(output_str) 56 | except Exception as e: 57 | logging.error(e) 58 | -------------------------------------------------------------------------------- /gunslinger/backends/sqs_backend.py: -------------------------------------------------------------------------------- 1 | import boto3 2 | import logging 3 | 4 | class AWS_SQS(): 5 | 6 | def __init__(self, **kwargs): 7 | logging.getLogger(__name__) 8 | self.sqs = boto3.client('sqs') 9 | self.url = kwargs.get('url', '') 10 | 11 | 12 | def post_message(self, text, **kwargs): 13 | """Posts message to SQS. 14 | 15 | Arguments: 16 | text (str): Text to send to SQS 17 | 18 | Returns: 19 | dict: Response object from SQS 20 | """ 21 | while True: 22 | try: 23 | response = self.sqs.send_message(QueueUrl=self.url, 24 | MessageBody=text, 25 | MessageGroupId='gunslinger_group') 26 | break 27 | except Exception as e: 28 | logging.error(e) 29 | continue 30 | return response 31 | 32 | 33 | def get_next_message(self, **kwargs): 34 | """Gets next message from the queue and deletes it. 35 | 36 | Returns: 37 | list: list of results from SQS 38 | int: numeric 0 to comply with Gunslinger logic 39 | """ 40 | while True: 41 | try: 42 | response = self.sqs.receive_message(QueueUrl=self.url, 43 | MaxNumberOfMessages=1) 44 | break 45 | except Exception as e: 46 | logging.error(e) 47 | continue 48 | 49 | messages = response.get('Messages', []) 50 | if len(messages) == 0: 51 | return [], 0 52 | message = messages[0] 53 | self.sqs.delete_message(QueueUrl=self.url, 54 | ReceiptHandle=message['ReceiptHandle']) 55 | message_body = message['Body'] 56 | if 'gunslinger' in message_body: 57 | return [], 0 58 | dat = message_body.strip().split('\n')[1:] 59 | return dat, 0 60 | -------------------------------------------------------------------------------- /gunslinger/backends/outputs/http_output.py: -------------------------------------------------------------------------------- 1 | import requests 2 | import logging 3 | import json 4 | 5 | class HTTPOutputHandler(): 6 | 7 | def __init__(self, **kwargs): 8 | logging.getLogger(__name__) 9 | try: 10 | self.headers = kwargs.get('headers', {}) 11 | self.headers['content-type'] = 'application/json' 12 | self.fields = kwargs.get('fields', []) 13 | self.method = kwargs.get('method', 'POST') 14 | self.endpoint = kwargs['url'] 15 | except Exception as exc: 16 | logging.critical(f'HTTP Output configured incorrectly: {exc}') 17 | 18 | 19 | def get_field(self, data, field): 20 | if isinstance(data, dict): 21 | if field in data.keys(): 22 | yield data[field] 23 | else: 24 | for key in data.keys(): 25 | for val in self.get_field(data[key], field): 26 | yield val 27 | elif isinstance(data, list): 28 | for dat in data: 29 | for val in self.get_field(dat, field): 30 | yield val 31 | 32 | 33 | def create_data(self, data): 34 | if not 'results' in data: 35 | return 36 | num_results = len(data['results']) 37 | report_data = {'results':[{} for _ in range(num_results)]} 38 | for field in self.fields: 39 | field_data = list(self.get_field(data, field)) 40 | print(field_data) 41 | if len(field_data) < num_results: 42 | field_data += [''] * (num_results - len(field_data)) 43 | for i in range(num_results): 44 | print(field_data[i]) 45 | report_data['results'][i][field] = field_data[i] 46 | return report_data 47 | 48 | 49 | def send_data(self, data): 50 | report_data = self.create_data(data) 51 | response = requests.request(self.method, self.endpoint, 52 | data=json.dumps(report_data), 53 | headers=self.headers) 54 | if not response.ok: 55 | status = response.status_code 56 | logging.error(f'ERROR sending data to {self.endpoint}: {status} :'\ 57 | ' {response.text}') 58 | 59 | 60 | def run(output_data, config_info): 61 | http_handler = HTTPOutputHandler(**config_info) 62 | try: 63 | http_handler.send_data(output_data) 64 | except Exception as exc: 65 | logging.error(exc) 66 | -------------------------------------------------------------------------------- /gunslinger/backends/plugin_backend.py: -------------------------------------------------------------------------------- 1 | from pluginbase import PluginBase 2 | import os 3 | import sys 4 | import logging 5 | 6 | class PluginManager(): 7 | 8 | def __init__(self, **kwargs): 9 | logging.getLogger(__name__) 10 | package = kwargs.get('package', 'rule_backend.plugins') 11 | self._plugin_base = PluginBase(package=package) 12 | self._plugin_dir = kwargs.get('plugin_dir', '.') 13 | plugin_path = self.get_path(self._plugin_dir) 14 | self._source = self._plugin_base.make_plugin_source( 15 | searchpath=[plugin_path]) 16 | 17 | 18 | def get_path(self, directory): 19 | """Gets path of rule directory relative to working directory. 20 | 21 | Arguments: 22 | directory (str): Path to the directory of rule files 23 | 24 | Returns: 25 | str: A string of the absolute path to the rule directory 26 | """ 27 | here = os.path.abspath(os.getcwd()) 28 | return os.path.join(here, directory) 29 | 30 | 31 | def run_rules(self, **kwargs): 32 | """Runs rules via python plugins. 33 | 34 | Returns: 35 | list: List of all rules that returned True 36 | """ 37 | fired_rules = [] 38 | for plugin_name in self._source.list_plugins(): 39 | logging.info(f'Running rule {plugin_name}') 40 | rule = self._source.load_plugin(plugin_name) 41 | try: 42 | rule_fired = rule.run(**kwargs) 43 | if rule_fired: 44 | fired_rules.append(plugin_name) 45 | except Exception as e: 46 | logging.error(f'Cannot run rule {plugin_name} ' \ 47 | '(possibly formatted incorrectly)') 48 | logging.error(e) 49 | return fired_rules 50 | 51 | 52 | def run_processor(self, processor_name, processor_data, config_info, 53 | rule_manager): 54 | plugin = self._source.load_plugin(processor_name) 55 | try: 56 | returned_data = plugin.run(data=processor_data, 57 | config_info=config_info, 58 | rule_manager=rule_manager) 59 | return returned_data 60 | except Exception as e: 61 | logging.error(f'Cannot run processor {processor_name} ' \ 62 | '(possible misconfigured)') 63 | logging.error(e) 64 | return {} 65 | 66 | 67 | def run_output(self, output_name, output_data, config_info): 68 | plugin = self._source.load_plugin(output_name) 69 | try: 70 | plugin.run(output_data, config_info) 71 | except Exception as e: 72 | logging.error(f'Cannot run output {output_name} ' \ 73 | '(possibly misconfigured)') 74 | logging.error(e) 75 | -------------------------------------------------------------------------------- /gunslinger/backends/processors/domain_processor.py: -------------------------------------------------------------------------------- 1 | import gc 2 | import logging 3 | from bs4 import BeautifulSoup 4 | import requests 5 | import hashlib 6 | 7 | logger = logging.getLogger(__name__) 8 | 9 | def url_thread(url, timeout=10): 10 | clean_url = url.replace('<', '').replace('>', '') 11 | 12 | if '|' in clean_url: 13 | clean_url = clean_url[:clean_url.index('|')] 14 | 15 | if (not clean_url.startswith('http://') and not 16 | clean_url.startswith('https://')): 17 | clean_url = 'http://' + clean_url 18 | logger.info(f'Getting scripts at {clean_url}') 19 | r = requests.head(clean_url, allow_redirects=True, 20 | timeout=timeout) 21 | 22 | if not 'text/html' in r.headers['Content-Type']: 23 | return [] 24 | r = requests.get(clean_url, timeout=timeout) 25 | soup = BeautifulSoup(r.content.decode('ISO-8859-1') 26 | , "lxml") 27 | scripts = soup.find_all('script', {'src':True}) 28 | logger.info(f'Found {len(scripts)} at url') 29 | soup.decompose() 30 | data = [clean_url] 31 | 32 | for script in scripts: 33 | script_src = script['src'] 34 | 35 | if script_src[:2] == '//': 36 | script_src = 'http:' + script_src 37 | elif (not script_src.startswith('http://') and not 38 | script_src.startswith('https://')): 39 | script_src = 'http://' + script_src 40 | data.append(script_src) 41 | logger.info(f'Got {len(data)} script(s)') 42 | 43 | return data 44 | 45 | 46 | def get_js_content(js_dat, rule_manager, timeout=10): 47 | scripts_found = [] 48 | 49 | for url in js_dat: 50 | logger.info(f'Getting script at {url}') 51 | try: 52 | r = requests.get(url, timeout=timeout) 53 | logger.info('Running rules') 54 | fired_rules = rule_manager.run_rules( 55 | script=r.content.decode('ISO-8859-1')) 56 | 57 | if fired_rules: 58 | sha256_hash = hashlib.sha256(r.content).hexdigest() 59 | scripts_found.append({'url':url, 60 | 'fired_rules':fired_rules, 61 | 'hash':sha256_hash}) 62 | except Exception as e: 63 | logger.error(e) 64 | 65 | continue 66 | gc.collect() 67 | logger.info(f'Got script at {url}') 68 | 69 | return scripts_found 70 | 71 | 72 | def run(**kwargs): 73 | urls = kwargs.get('data', []) 74 | timeout = kwargs.get('config_info', 75 | {'timeout':10}).get('timeout', 10) 76 | rule_manager = kwargs.get('rule_manager') 77 | report_data = {'results':[]} 78 | 79 | for url in urls: 80 | js_urls = url_thread(url, timeout) 81 | found_scripts = get_js_content(js_urls, rule_manager, timeout) 82 | 83 | if found_scripts: 84 | for script_data in found_scripts: 85 | report = script_data 86 | report['submitted_url'] = url 87 | report_data['results'].append(report) 88 | else: 89 | del found_scripts 90 | if report_data['results']: 91 | return report_data 92 | return None 93 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | pip-wheel-metadata/ 24 | share/python-wheels/ 25 | *.egg-info/ 26 | .installed.cfg 27 | *.egg 28 | MANIFEST 29 | 30 | # PyInstaller 31 | # Usually these files are written by a python script from a template 32 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 33 | *.manifest 34 | *.spec 35 | 36 | # Installer logs 37 | pip-log.txt 38 | pip-delete-this-directory.txt 39 | 40 | # Unit test / coverage reports 41 | htmlcov/ 42 | .tox/ 43 | .nox/ 44 | .coverage 45 | .coverage.* 46 | .cache 47 | nosetests.xml 48 | coverage.xml 49 | *.cover 50 | *.py,cover 51 | .hypothesis/ 52 | .pytest_cache/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | target/ 76 | 77 | # Jupyter Notebook 78 | .ipynb_checkpoints 79 | 80 | # IPython 81 | profile_default/ 82 | ipython_config.py 83 | 84 | # pyenv 85 | .python-version 86 | 87 | # pipenv 88 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 89 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 90 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 91 | # install all needed dependencies. 92 | #Pipfile.lock 93 | 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 95 | __pypackages__/ 96 | 97 | # Celery stuff 98 | celerybeat-schedule 99 | celerybeat.pid 100 | 101 | # SageMath parsed files 102 | *.sage.py 103 | 104 | # Environments 105 | .env 106 | .venv 107 | env/ 108 | venv/ 109 | ENV/ 110 | env.bak/ 111 | venv.bak/ 112 | 113 | # Spyder project settings 114 | .spyderproject 115 | .spyproject 116 | 117 | # Rope project settings 118 | .ropeproject 119 | 120 | # mkdocs documentation 121 | /site 122 | 123 | # mypy 124 | .mypy_cache/ 125 | .dmypy.json 126 | dmypy.json 127 | 128 | # Pyre type checker 129 | .pyre/ 130 | 131 | # Local .terraform directories 132 | **/.terraform/* 133 | 134 | # .tfstate files 135 | *.tfstate 136 | *.tfstate.* 137 | 138 | # Crash log files 139 | crash.log 140 | 141 | # Ignore any .tfvars files that are generated automatically for each Terraform run. Most 142 | # .tfvars files are managed as part of configuration and so should be included in 143 | # version control. 144 | # 145 | *.tfvars 146 | 147 | # Ignore override files as they are usually used to override resources locally and so 148 | # are not checked in 149 | override.tf 150 | override.tf.json 151 | *_override.tf 152 | *_override.tf.json 153 | 154 | # Other terraform files 155 | .terraform.lock.hcl 156 | versions.tf 157 | 158 | # Include override files you do wish to add to version control using negated pattern 159 | # 160 | # !example_override.tf 161 | 162 | # Include tfplan files to ignore the plan output of command: terraform plan -out=tfplan 163 | # example: *tfplan* 164 | 165 | # Custom Rules 166 | proprietary_rules/ 167 | *.log 168 | gunslinger.yaml 169 | -------------------------------------------------------------------------------- /terraform/main.tf: -------------------------------------------------------------------------------- 1 | provider "aws" { 2 | region = var.aws_region 3 | } 4 | provider "digitalocean" { 5 | token = var.digitalocean_token 6 | } 7 | 8 | resource "aws_iam_user" "sqs_user" { 9 | name = "sqs_reader" 10 | path = "/" 11 | count = var.use_sqs == false ? 0 : 1 12 | } 13 | 14 | resource "aws_iam_user_policy" "sqs_user_policy" { 15 | name = "sqs_gunslinger_policy" 16 | user = aws_iam_user.sqs_user.0.name 17 | policy = <', '') 27 | web_requests, submitted_url, urlscan_url = self.get_requests(url) 28 | scripts_found = self.parse_requests(web_requests) 29 | 30 | if scripts_found: 31 | for script_data in scripts_found: 32 | report = script_data 33 | report['submitted_url'] = submitted_url 34 | report['urlscan_url'] = urlscan_url 35 | report_data['results'].append(report) 36 | except Exception as e: 37 | logger.error(e) 38 | 39 | continue 40 | if report_data['results']: 41 | return report_data 42 | return None 43 | 44 | 45 | def get_requests(self, url): 46 | """Gets the requests a URL makes when a webpage is loaded 47 | 48 | Arguments: 49 | url (string): Url of the URLScan page for the scanned website 50 | 51 | Returns: 52 | array: Array of request objects from URLScan API 53 | dict: Dict containing the original URL submitted to URLScan 54 | and the URLScan report 55 | """ 56 | result_dat = requests.get(url, headers=self.header, timeout=10).json() 57 | 58 | if not 'data' in result_dat.keys() or not 'task' in result_dat.keys(): 59 | return ([], '', '') 60 | submitted_url = result_dat['task']['url'] 61 | urlscan_url = result_dat['task']['reportURL'] 62 | 63 | return result_dat['data']['requests'], submitted_url, urlscan_url 64 | 65 | 66 | def get_response(self, response, h): 67 | """Gets the data returned from a request made by a webpage. 68 | 69 | Arguments: 70 | response (dict): URLScan response object 71 | h (str): Hash of the response, used to get the url for the response 72 | from URLScan 73 | 74 | Returns: 75 | string: The data returned by the request (i.e. scripts, html, etc.) 76 | """ 77 | logger.info(f'Getting hash {h}') 78 | script = '' 79 | response = response['response'] 80 | url = f'https://urlscan.io/responses/{h}/' #URLScan response URL 81 | script_r = requests.get(url, timeout=10) 82 | 83 | if script_r.status_code == 200: 84 | script = script_r.text 85 | 86 | return script 87 | 88 | 89 | def parse_requests(self, requests): 90 | """Parses the requests made by a webpage to look for Magecart. 91 | 92 | Arguments: 93 | requests (array): Array of objects contianing data on the request 94 | made 95 | """ 96 | scripts_found = [] 97 | 98 | for request in requests: 99 | try: 100 | response = request['response'] #Get the response for each request 101 | h = response['hash'] 102 | script = self.get_response(response, h) 103 | url = response['response']['url'] 104 | fired_rules = self.rule_manager.run_rules(script=script, 105 | response_data=response) 106 | 107 | if fired_rules: 108 | logger.info(f'Rule fired on {url}') 109 | script_data = {'url':url, 'hash':h, 110 | 'fired_rules':fired_rules} 111 | scripts_found.append(script_data) 112 | except Exception as e: 113 | logger.error(e) 114 | 115 | continue 116 | 117 | return scripts_found 118 | 119 | def run(**kwargs): 120 | config_data = kwargs.get('config_info') 121 | rule_manager = kwargs.get('rule_manager') 122 | urlscan_processor = URLScanProcessor(config_data, rule_manager) 123 | rule_data = urlscan_processor.parse_search_results(kwargs.get('data', 124 | [])) 125 | 126 | return rule_data 127 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ![](docs/images/logo.png) 2 | # Gunslinger 3 | "The man in black fled across the desert, and the gunslinger followed."
4 | \- Stephen King, The Gunslinger

5 | Gunslinger is a hunting tool that is based around URLScan's Search API. Gunslinger can crawl URLScan for JavaScript files that match a set of user-defined rules and reports the information back to Slack. 6 | 7 | ## Usage 8 | Gunslinger can be deployed via the Terraform modules in the `terraform` directory. If you have Terraform installed you can deploy the script by running `terraform apply` while in the directory. 9 | ### Terraform Variables 10 | ``` 11 | #Digital Ocean Variables 12 | digitalocean_token: Used to deploy the Gunslinger server to DigitalOcean 13 | server_pub_key: Path to the public SSH key for the server 14 | server_priv_key: Path to the private SSH key for the server (Used to scp scripts once deployed) 15 | server_region: DigitalOcean region to deploy the server to (default: nyc1) 16 | 17 | #API Keys 18 | slack_token: API Key for the Slack Bot that will be used 19 | urlscan_api_key: API Key to connect to URLScan 20 | 21 | #Launch Script Variables 22 | rule_dir: Path to directory where custom rules are stored (default: "../gunslinger/rules") 23 | num_workers: Number of gunslinger workers to spin up (default: 5) 24 | queue_channel: Slack Channel to use as a Message Queue (default: mq) 25 | urlscan_query: Query to use for URLScan Search API (default: *) 26 | cron: Cron schedule for the reloader module (note: replace asterisks with underscores; default: _ _ _ _ _ (every minute)) 27 | 28 | #Miscellaneous 29 | aws_region: Region to deploy any AWS resources (default: us-east-1) 30 | use_sqs: Whether or not to use AWS SQS for the Message Queue (default: false) 31 | ``` 32 | 33 | ## Python Modules 34 | ### Reloader Module 35 | This module will reach out to URLScan's search API on a specified cron schedule, split up the results between the number of gunslinger worker, and post that info the the Message Queue. 36 | #### Usage: 37 | ``` 38 | usage: reloader.py [-h] -u URLSCAN_KEY [-s SLACK_TOKEN] [-c QUEUE_CHANNEL] [-q QUERY] [-cr CRON CRON CRON CRON CRON] [-w NUM_WORKERS] [-a SQS_URL] 39 | 40 | optional arguments: 41 | -h, --help show this help message and exit 42 | -u URLSCAN_KEY, --urlscan_key URLSCAN_KEY 43 | URLScan API key 44 | -s SLACK_TOKEN, --slack_token SLACK_TOKEN 45 | Slack Token 46 | -c QUEUE_CHANNEL, --queue_channel QUEUE_CHANNEL 47 | Message Queue Channel (Default: 5) 48 | -q QUERY, --query QUERY 49 | URLScan query (Default: *) 50 | -cr CRON CRON CRON CRON CRON, --cron CRON CRON CRON CRON CRON 51 | Cron job for searches to run on (Default: _ _ _ _ _) 52 | -w NUM_WORKERS, --num_workers NUM_WORKERS 53 | Number of gunslinger works to divy tasks(Default: 5) 54 | -a SQS_URL, --sqs_url SQS_URL 55 | AWS SQS Url (optional) 56 | ``` 57 | ### Gunslinger Module 58 | This is the main worker module that will analyze jobs from the Reloader module and post results back to Slack. It is driven by a set of user-defined rules to perform its analysis. 59 | #### Usage: 60 | ``` 61 | usage: gunslinger.py [-h] -u URLSCAN_KEY -s SLACK_TOKEN [-c QUEUE_CHANNEL] [-d RULE_DIR] [-a SQS_URL] 62 | 63 | optional arguments: 64 | -h, --help show this help message and exit 65 | -u URLSCAN_KEY, --urlscan_key URLSCAN_KEY 66 | URLScan API key 67 | -s SLACK_TOKEN, --slack_token SLACK_TOKEN 68 | Slack Token 69 | -c QUEUE_CHANNEL, --queue_channel QUEUE_CHANNEL 70 | Message Queue Channel (Default: mq) 71 | -d RULE_DIR, --rule_dir RULE_DIR 72 | Directory containing python plugins to be used as rules (Default: ./rules) 73 | -a SQS_URL, --sqs_url SQS_URL 74 | URL of AWS SQS service (optional) 75 | ``` 76 | 77 | ## Rule Creation 78 | Gunslinger is driven y a set of user-defined Python modules that act as rules. This way the user has free reign over how to handle information. All modules must be contained in one directory (`rules` by default) and must have a function named `run` that will be called when analyzing scripts. The arguments passed to this function will be a string called `script` containing the script that was found by URLScan's API and a JSON object called `response_data` which contains the data returned from URLScan's API (see URLScan's API [documentation](https://urlscan.io/about-api/) for more info). 79 | ### Example: 80 | This is a very basic example that will catch an older Magecart sample from e4[.]ms/c1.js. You can see that the module contains the function`run` that accepts the arguments via `kwargs`. The function will also only return `True` or `False`. 81 | ``` 82 | import re 83 | 84 | def run(**kwargs): 85 | reg1 = r'function ant_cockroach' 86 | reg2 = r'cc_number' 87 | reg3 = r'payment_checkout[0-9]' 88 | script = kwargs.get('script', '') 89 | ant_cockroach = len(re.findall(reg1, script)) > 0 90 | cc_number = len(re.findall(reg2, script)) > 0 91 | checkout = len(re.findall(reg3, script)) > 0 92 | return ant_cockroach and cc_number and checkout 93 | ``` 94 | -------------------------------------------------------------------------------- /gunslinger/gunslinger.py: -------------------------------------------------------------------------------- 1 | import time 2 | import sys 3 | import argparse 4 | from datetime import datetime as dt, timedelta as td 5 | import json 6 | import os 7 | import logging 8 | import yaml 9 | from backends.slack_backend import Slack_MQ 10 | from backends.sqs_backend import AWS_SQS 11 | from backends.plugin_backend import PluginManager 12 | 13 | class Gunslinger(): 14 | """Main class for Gunslinger application. 15 | 16 | Attributes: 17 | header (dict): Header for communication with URLScan API 18 | payload (dict): Search parameters for URLScan API 19 | 20 | Arguments: 21 | urlscan_key (string): API key for urlscan 22 | slack_token (string): Slack token for channel you want to send data to 23 | query (string, optional): Query you want to use against URLScan 24 | num_results (int, optional): Number of search results to return 25 | """ 26 | 27 | def __init__(self, **kwargs): 28 | self.config_info = self.read_config_file(kwargs.get('config_file')) 29 | rule_directory = self.config_info.get('rule_dir', '.') 30 | self.rule_manager = PluginManager(package='gunslinger.rules', 31 | plugin_dir=rule_directory) 32 | processor_directory = self.config_info.get('processor_dir', 33 | './backends/processors') 34 | self.proc_manager = PluginManager(package='gunslinger.processors', 35 | plugin_dir=processor_directory) 36 | out_dir = self.config_info.get('output_plugin_dir', 37 | './backends/outputs') 38 | self.out_manager = PluginManager(package='gunslinger.outputs', 39 | plugin_dir=out_dir) 40 | mq_type = self.config_info.get('message_queue', 'slack_mq') 41 | queue_data = self.config_info.get('queue_data', {}) 42 | 43 | if mq_type == 'slack_mq': 44 | self.message_queue = Slack_MQ(**queue_data) 45 | elif mq_type == 'aws_sqs': 46 | self.message_queue = AWS_SQS(**queue_data) 47 | 48 | 49 | def read_config_file(self, config_file): 50 | here = os.path.abspath(os.getcwd()) 51 | config_path = os.path.join(here, config_file) 52 | try: 53 | with open(config_path) as f: 54 | config_data = yaml.load(f, Loader=yaml.FullLoader) 55 | 56 | return config_data 57 | except FileNotFoundError: 58 | logging.critical("Config file not found") 59 | sys.exit() 60 | 61 | 62 | def report(self, report_data): 63 | """Reports on urls that rules fired on. 64 | 65 | Arguments: 66 | report_data (dict): dictionary of data to report on 67 | """ 68 | 69 | for output in self.config_info['outputs']: 70 | output_name = output['name'] 71 | self.out_manager.run_output(output_name, 72 | report_data, 73 | output) 74 | del report_data 75 | 76 | 77 | def parse_message(self, data): 78 | processor_name = data.get('processor', '') 79 | 80 | if not processor_name: 81 | return 82 | logging.info(f'Loading processor {processor_name}') 83 | processor_data = data.get('data', {}) 84 | config_info = self.config_info.get(processor_name, {}) 85 | returned_data = self.proc_manager.run_processor( 86 | processor_name, processor_data, 87 | config_info, self.rule_manager) 88 | 89 | if returned_data: 90 | self.report(returned_data) 91 | 92 | 93 | def run(self): 94 | """Starts the application.""" 95 | logging.info('“The man in black fled across the desert, and the ' \ 96 | 'gunslinger followed.”') 97 | logging.info('\t― Stephen King, The Gunslinger') 98 | prev_time = 0 99 | latest = dt.now().timestamp() 100 | 101 | while True: 102 | data, prev_time = self.message_queue.get_next_message( 103 | oldest=prev_time, 104 | latest=latest) 105 | latest = (dt.fromtimestamp(float(prev_time)) + td(hours=1)).timestamp() 106 | 107 | if dt.fromtimestamp(latest) > dt.now(): 108 | latest = dt.now().timestamp() 109 | elif prev_time == 0: 110 | latest = dt.now().timestamp() 111 | try: 112 | json_data = json.loads(data) 113 | self.parse_message(json_data) 114 | except Exception as e: 115 | logging.error(e) 116 | logging.info('Sleeping') 117 | time.sleep(self.config_info['queue_data'].get('rate_limit', 0)) 118 | 119 | continue 120 | 121 | 122 | if __name__ == '__main__': 123 | if not os.path.exists('logs'): 124 | os.mkdir('logs') 125 | pid = os.getpid() 126 | logging.getLogger(__name__) 127 | logging.basicConfig(filename=f'logs/gunslinger_{pid}.log', 128 | level=logging.INFO, 129 | format='%(asctime)s:%(levelname)s:%(name)s:%(message)s') 130 | parser = argparse.ArgumentParser() 131 | parser.add_argument('-c', '--config-file', 132 | help='Path to config file (default: gunslinger.yaml)', 133 | default='gunslinger.yaml') 134 | args = parser.parse_args() 135 | gunslinger = Gunslinger(**vars(args)) 136 | gunslinger.run() 137 | -------------------------------------------------------------------------------- /gunslinger/inputs/reloader.py: -------------------------------------------------------------------------------- 1 | #!/bin/python3 2 | 3 | import sys 4 | import argparse 5 | from datetime import datetime as dt 6 | import os 7 | import math 8 | import json 9 | import yaml 10 | import logging 11 | 12 | import requests 13 | from apscheduler.schedulers.blocking import BlockingScheduler 14 | from apscheduler.triggers.cron import CronTrigger 15 | 16 | BASE_PATH = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) 17 | sys.path.append(BASE_PATH) 18 | 19 | from backends.slack_backend import Slack_MQ 20 | from backends.sqs_backend import AWS_SQS 21 | 22 | class Reloader(): 23 | 24 | def __init__(self, **kwargs): 25 | self.config_info = self.read_config_file(kwargs.get('config_file')) 26 | data = self.config_info.get('inputs', 27 | {'urlscan_input': {}})['urlscan_input'] 28 | # UrlScan API info 29 | self._query = data.get('query', '*') 30 | api_key = data.get('urlscan_key', '') 31 | self.header = {'Content-Type': 'application/json', 32 | 'Api-Key': api_key} 33 | self.payload = {'size':10000, 34 | 'sort':'date'} 35 | self.prev_time = dt.utcnow() 36 | self.cron = data.get('cron', '* * * * *') 37 | self.num_workers = data.get('num_workers', 5) 38 | queue_type = self.config_info.get('message_queue', '') 39 | queue_data = self.config_info.get('queue_data', {}) 40 | 41 | if queue_type == 'slack_mq': 42 | self.message_queue = Slack_MQ(**queue_data) 43 | elif queue_type == 'aws_sqs': 44 | self.message_queue = AWS_SQS(**queue_data) 45 | else: 46 | logging.critical('Error: No message queue specified!') 47 | sys.exit() 48 | 49 | 50 | def read_config_file(self, config_file): 51 | here = os.path.abspath(os.getcwd()) 52 | config_path = os.path.join(here, config_file) 53 | try: 54 | with open(config_path) as f: 55 | config_data = yaml.load(f, Loader=yaml.FullLoader) 56 | 57 | return config_data 58 | except FileNotFoundError: 59 | logging.critical('config file not found') 60 | exit() 61 | 62 | 63 | def get_results(self, prev_time): 64 | """Gets results of search from URLScan 65 | 66 | Returns: 67 | array: Array of objects containing search results 68 | """ 69 | try: 70 | past_time = prev_time.strftime(r'%Y-%m-%dT%H\:%M\:%S.%fZ') 71 | self.payload['q'] = f'({self._query}) AND date:>{past_time}' 72 | search_results = requests.get('https://urlscan.io/api/v1/search/', 73 | headers=self.header, 74 | params=self.payload) 75 | search_dat = search_results.json() 76 | results = search_dat.get('results', []) 77 | 78 | return results 79 | except Exception as e: 80 | logging.error(e) 81 | 82 | return [] 83 | 84 | 85 | def parse_search_results(self, results): 86 | """Sends a list of results to Slack MessageQueue for processing. 87 | 88 | Arguments: 89 | results (array): rray of object results from URLScan 90 | """ 91 | result_urls = [result.get('result') for result in results] 92 | div = math.ceil(len(result_urls) / self.num_workers) 93 | 94 | for i in range(self.num_workers): 95 | result_data = result_urls[i*div:(i+1)*div] 96 | 97 | if not result_data: 98 | continue 99 | processor_data = {'processor':'urlscan_processor', 100 | 'data': result_urls[i*div:(i+1)*div]} 101 | text_data = json.dumps(processor_data) 102 | 103 | if text_data != "": 104 | msg = text_data 105 | self.message_queue.post_message(msg) 106 | 107 | 108 | def search_job(self): 109 | """Job that runs to fetch the next set of URLScan results.""" 110 | logging.info('Getting results') 111 | search_results = self.get_results(self.prev_time) 112 | 113 | if len(search_results) == 0: 114 | return 115 | self.prev_time = dt.strptime(search_results[0]['task']['time'], 116 | '%Y-%m-%dT%H:%M:%S.%fZ') 117 | self.parse_search_results(search_results) 118 | 119 | 120 | def run(self): 121 | """Starts the application.""" 122 | msg = 'The man in black fled across the desert, and the ' \ 123 | 'gunslinger followed\n\t- Stephen King, The Gunslinger' 124 | self.message_queue.post_message(msg, reaction='gun') 125 | scheduler = BlockingScheduler() 126 | scheduler.add_job(self.search_job, CronTrigger.from_crontab(self.cron)) 127 | 128 | while True: 129 | scheduler.start() 130 | 131 | 132 | if __name__ == '__main__': 133 | if not os.path.exists('logs'): 134 | os.mkdir('logs') 135 | PID = os.getpid() 136 | logging.getLogger(__name__) 137 | logging.basicConfig(filename=f'logs/reloader_{PID}.log', 138 | level=logging.DEBUG, 139 | format='%(asctime)s:%(levelname)s:%(name)s:%(message)s') 140 | PARSER = argparse.ArgumentParser() 141 | PARSER.add_argument('-c', '--config-file', 142 | help='Path to config file (default: '\ 143 | 'gunslinger.yaml)', 144 | default='gunslinger.yaml') 145 | ARGS = PARSER.parse_args() 146 | RELOADER = Reloader(**vars(ARGS)) 147 | RELOADER.run() 148 | -------------------------------------------------------------------------------- /gunslinger/backends/slack_backend.py: -------------------------------------------------------------------------------- 1 | import time 2 | import slack 3 | import logging 4 | 5 | class Slack_MQ(): 6 | 7 | def __init__(self, **kwargs): 8 | logging.getLogger(__name__) 9 | queue_channel = kwargs.get('channel', 'mq') 10 | slack_token = kwargs.get('slack_token', '') 11 | self.client = slack.WebClient(token=slack_token) 12 | logging.info(f'Channel is {queue_channel}') 13 | self.channel = self.get_channel(queue_channel) 14 | 15 | 16 | def get_channel(self, channel): 17 | """Gets ID of Slack channel. 18 | 19 | Arguments: 20 | channel (str): channel to get ID of 21 | 22 | Returns: 23 | str: Channel ID 24 | """ 25 | logging.info(f'Getting channel {channel}') 26 | channels = self.client.conversations_list() 27 | for slack_channel in channels['channels']: 28 | if slack_channel['name'] == channel: 29 | return slack_channel['id'] 30 | raise Exception('Channel does not exist') 31 | 32 | 33 | def post_message(self, text, **kwargs): 34 | """Posts message to Slack 35 | 36 | Arguments: 37 | text (str): message to send 38 | channel (str, optional): channel to send the message to 39 | reaction (str, optional): Slack reaction code to add to message 40 | 41 | Returns: 42 | (dict): Message response object from Slack API 43 | """ 44 | channel = kwargs.get('channel', self.channel) 45 | reaction = kwargs.get('reaction', '') 46 | message_response = self.client.chat_postMessage(channel=channel, 47 | text=text) 48 | if reaction: 49 | self.react_message(message_response['ts'], 50 | reaction, 51 | channel) 52 | return message_response 53 | 54 | 55 | def react_message(self, ts, reaction, channel=''): 56 | """Reacts to a Slack message 57 | 58 | Arguments: 59 | ts (str): timestamp of message to react to 60 | reaction (str): Slack reaction code 61 | channel (str, optional): channel that message is in 62 | 63 | Returns: 64 | (dict): Reaction response object from Slack API 65 | """ 66 | if channel == '': 67 | channel = self.channel 68 | reaction_response = self.client.reactions_add(channel=channel, 69 | name=reaction, 70 | timestamp=ts) 71 | return reaction_response 72 | 73 | 74 | def get_next_message(self, **kwargs): 75 | """Gets next message in queue and reacts to it to mark it as taken 76 | 77 | Arguments: 78 | oldest (str, optional): timestamp of oldest message to check 79 | latest (str, optional): timestamp of latest message to check 80 | cursor (str, optional): cursor code used for pagination 81 | 82 | Returns: 83 | (str, str): message text and timestamp of message 84 | """ 85 | oldest = kwargs.get('oldest', 0) 86 | latest = kwargs.get('latest', '') 87 | cursor = kwargs.get('cursor', '') 88 | try: 89 | r = self.client.conversations_history(channel=self.channel, 90 | limit=999, 91 | oldest=str(oldest), 92 | latest=str(latest), 93 | inclusive=1, 94 | cursor=cursor) 95 | data = r.data 96 | messages = data['messages'] 97 | i = 0 98 | for i in range(len(messages)-1): 99 | m = messages[i] 100 | if 'reactions' in messages[i+1] and m['text'][0] == '{' \ 101 | and not 'reactions' in m.keys(): 102 | ts = m['ts'] 103 | self.client.reactions_add(channel=self.channel, 104 | name='+1', 105 | timestamp=ts) 106 | oldest = ts 107 | dat = m['text'].strip() 108 | return dat, oldest 109 | if 'reactions' in m.keys(): 110 | return [], 0 111 | if 'reactions' in messages[i] and \ 112 | messages[i]['text'][0] == '{': 113 | ts = messages[i]['ts'] 114 | self.client.reactions_add(channel=self.channel, 115 | name='+1', 116 | timestamp=ts) 117 | oldest = ts 118 | dat = m['text'].strip() 119 | return dat, oldest 120 | if 'response_metadata' in data.keys() and \ 121 | 'next_cursor' in data['response_metadata'].keys(): 122 | logging.info('Getting next cursor') 123 | cursor = data['response_metadata']['next_cursor'] 124 | return self.get_next_message(oldest=oldest, 125 | latest=latest, 126 | cursor=cursor) 127 | return [], latest 128 | except Exception as e: 129 | logging.error(e) 130 | if 'response' in dir(e): 131 | r = e.response 132 | logging.error(r) 133 | time.sleep(60) 134 | if r['error'] == 'ratelimited': 135 | return self.get_next_message(oldest=oldest, 136 | latest=latest, 137 | cursor=cursor) 138 | return [], 0 139 | return [], oldest 140 | --------------------------------------------------------------------------------