├── __init__.py
├── pastehunter
    ├── __init__.py
    ├── inputs
    │   ├── __init__.py
    │   ├── base_input.py
    │   ├── dumpz.py
    │   ├── pastebin.py
    │   ├── gists.py
    │   ├── stackexchange.py
    │   ├── slexy.py
    │   ├── ixio.py
    │   └── github.py
    ├── outputs
    │   ├── __init__.py
    │   ├── syslog_output.py
    │   ├── json_output.py
    │   ├── csv_output.py
    │   ├── http_output.py
    │   ├── splunk_output.py
    │   ├── slack_output.py
    │   ├── elastic_output.py
    │   ├── twilio_output.py
    │   └── smtp_output.py
    ├── sandboxes
    │   ├── __init__.py
    │   ├── viper.py
    │   └── cuckoo.py
    ├── postprocess
    │   ├── __init__.py
    │   ├── post_entropy.py
    │   ├── post_email.py
    │   ├── post_compress.py
    │   └── post_b64.py
    ├── YaraRules
    │   ├── email_filter.yar
    │   ├── general.yar
    │   ├── blacklist.yar
    │   ├── certificates.yar
    │   ├── aws.yar
    │   ├── password_leak.yar
    │   ├── hak5.yar
    │   ├── powershell.yar
    │   ├── core_keywords.yar
    │   ├── database.yar
    │   ├── test_rules.yar
    │   ├── api_keys.yar
    │   ├── CryptoExchangeApi.yar
    │   └── base64.yar
    └── common.py
├── pytest.ini
├── MANIFEST.in
├── requirements.txt
├── test
    ├── test_base62.py
    ├── test_email_filter.py
    ├── test_paste_objects.py
    └── test_ix.py
├── docs
    ├── Makefile
    ├── index.rst
    ├── sandboxes.rst
    ├── migrating.rst
    ├── postprocess.rst
    ├── outputs.rst
    ├── inputs.rst
    ├── installation.rst
    └── conf.py
├── setup.py
├── Dockerfile
├── README.md
├── docker-compose.yml
├── .travis.yml
├── CHANGELOG.md
├── .gitignore
├── settings.json.sample
├── pastehunter-cli
└── LICENSE


/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/pastehunter/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/pastehunter/inputs/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/pastehunter/outputs/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/pastehunter/sandboxes/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/pastehunter/postprocess/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/pytest.ini:
--------------------------------------------------------------------------------
1 | [pytest]
2 | addopts = test/


--------------------------------------------------------------------------------
/MANIFEST.in:
--------------------------------------------------------------------------------
1 | recursive-include pastehunter/YaraRules *.yar


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | requests>=2.20.0
2 | elasticsearch>=5.0.0,<6.0.0
3 | splunk-sdk
4 | yara-python


--------------------------------------------------------------------------------
/test/test_base62.py:
--------------------------------------------------------------------------------
 1 | from pastehunter.common import base62_decode, base62_encode
 2 | 
 3 | 
 4 | def test_b62_encode():
 5 |     assert base62_encode(622708) == '2BZG'
 6 |     assert base62_encode(622707) == '2BZF'
 7 | 
 8 | 
 9 | def test_b62_decode():
10 |     assert base62_decode('1') == 1
11 |     assert base62_decode('a') == 10
12 |     assert base62_decode('2BZF') == 622707
13 |     assert base62_decode('2BZG') == 622708


--------------------------------------------------------------------------------
/pastehunter/YaraRules/email_filter.yar:
--------------------------------------------------------------------------------
 1 | /*
 2 |     These rules attempt to find email leaks
 3 | */
 4 | 
 5 | rule email_filter
 6 | {
 7 |     meta:
 8 |         author = "@kovacsbalu"
 9 |         info = "Better email pattern"
10 |         reference = "https://github.com/securenetworx/PasteHunter/tree/fix-email-filter"
11 | 
12 |     strings:
13 | 	    $email_add = /\b[\w-]+(\.[\w-]+)*@[\w-]+(\.[\w-]+)*\.[a-zA-Z-]+[\w-]\b/
14 |     condition:
15 |         #email_add > 20
16 | 
17 | }
18 | 
19 | 
20 | 


--------------------------------------------------------------------------------
/pastehunter/YaraRules/general.yar:
--------------------------------------------------------------------------------
 1 | /*
 2 |     Just some General Rules. Dont need a file per rule.
 3 | */
 4 | 
 5 | rule php_obfuscation
 6 | {
 7 |     meta:
 8 |         author = "@KevTheHermit"
 9 |         info = "Part of PasteHunter"
10 |         reference = "https://github.com/kevthehermit/PasteHunter"
11 | 
12 |     strings:
13 |         $a = "eval(" nocase
14 |         $b = "gzinflate(" nocase
15 |         $c = "base64_decode("
16 |         $d = "\\142\\x61\\163\\145\\x36\\x34\\137\\144\\x65\\x63\\x6f\\x64\\x65"
17 |         $e = "str_rot13("
18 | 
19 |     condition:
20 |         2 of them
21 | }


--------------------------------------------------------------------------------
/pastehunter/postprocess/post_entropy.py:
--------------------------------------------------------------------------------
 1 | import re
 2 | import math
 3 | from collections import Counter
 4 | 
 5 | def shannon_entropy(s):
 6 |     # https://rosettacode.org/wiki/Entropy#Python
 7 |     s = str(s)
 8 |     p, lns = Counter(s), float(len(s))
 9 |     return -sum(count / lns * math.log(count / lns, 2) for count in p.values())
10 | 
11 | 
12 | def run(results, raw_paste_data, paste_object):
13 |     # Calculate the Shannon Entropy for the raw paste
14 |     paste_object["Shannon Entropy"] = shannon_entropy(raw_paste_data)
15 |     # Send the updated json back
16 |     return paste_object
17 | 


--------------------------------------------------------------------------------
/docs/Makefile:
--------------------------------------------------------------------------------
 1 | # Minimal makefile for Sphinx documentation
 2 | #
 3 | 
 4 | # You can set these variables from the command line.
 5 | SPHINXOPTS    =
 6 | SPHINXBUILD   = sphinx-build
 7 | SOURCEDIR     = .
 8 | BUILDDIR      = _build
 9 | 
10 | # Put it first so that "make" without argument is like "make help".
11 | help:
12 | 	@$(SPHINXBUILD) -M help "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)
13 | 
14 | .PHONY: help Makefile
15 | 
16 | # Catch-all target: route all unknown targets to Sphinx using the new
17 | # "make mode" option.  $(O) is meant as a shortcut for $(SPHINXOPTS).
18 | %: Makefile
19 | 	@$(SPHINXBUILD) -M $@ "$(SOURCEDIR)" "$(BUILDDIR)" $(SPHINXOPTS) $(O)


--------------------------------------------------------------------------------
/test/test_email_filter.py:
--------------------------------------------------------------------------------
 1 | from pastehunter.postprocess.post_email import run as run_email
 2 | 
 3 | 
 4 | def test_email_filter_post():
 5 |     email_dummy = """
 6 |     garbage text sdgusdfjhuhjgnujidhj
 7 |     jim@gmail.com:abc123
 8 |     sally@yahoo.com:cba321
 9 |     jim@gmail.com:abc123
10 |     sally@yahoo.com:cba321
11 |     jim@gmail.com:abc123
12 |     sally@yahoo.com:cba321
13 |     jim@gmail.com:abc123
14 |     sally@yahoo.com:cba321
15 |     jim@gmail.com:abc123
16 |     sally@yahoo.com:cba321
17 |     garbage text sdgusdfjhuhjgnujidhj
18 |     """
19 |     results = run_email(None, email_dummy, {})
20 |     assert results["total_emails"] == 10
21 |     assert results['unique_emails'] == 2
22 |     assert results['unique_domains'] == 2


--------------------------------------------------------------------------------
/pastehunter/sandboxes/viper.py:
--------------------------------------------------------------------------------
 1 | import io
 2 | import logging
 3 | import requests
 4 | from pastehunter.common import parse_config
 5 | conf = parse_config()
 6 | 
 7 | logger = logging.getLogger('pastehunter')
 8 | 
 9 | def upload_file(raw_file, paste_object):
10 |     viper_ip = conf["sandboxes"]["viper"]["api_host"]
11 |     viper_port = conf["sandboxes"]["viper"]["api_port"]
12 |     viper_host = 'http://{0}:{1}'.format(viper_ip, viper_port)
13 | 
14 |     submit_file_url = '{0}/tasks/create/file'.format(viper_host)
15 |     files = {'file': ('{0}.exe'.format(paste_object["pasteid"]), io.BytesIO(raw_file))}
16 |     submit_file = requests.post(submit_file_url, files=files).json()
17 | 
18 |     # Send any updated json back
19 |     return paste_object
20 | 


--------------------------------------------------------------------------------
/pastehunter/YaraRules/blacklist.yar:
--------------------------------------------------------------------------------
 1 | rule blacklist
 2 | {
 3 |     meta:
 4 |         author = "@KevTheHermit"
 5 |         info = "Part of PasteHunter"
 6 |         reference = "https://github.com/kevthehermit/PasteHunter"
 7 | 
 8 |     strings:
 9 |         $a = "#EXTINF:" nocase // IPTV stream Lists.
10 |         $b = "--app-name=LeagueClient" nocase // League of Legends Debug Log
11 |         $c = "common.application_name: LeagueClient" // League of Legends Debug Log
12 |         $d = /java\.(util|lang|io)/ // Minecraft and java errors
13 |         $e = "Traceback (most recent call last)"
14 |         $f = /define\(.*?\)|require_once\(.*?\)/
15 |         $g = "Technic Launcher is starting" // Minecraft mod dumps
16 |         $h = "OTL logfile created on" // 
17 |     condition:
18 |         any of them
19 | 
20 | }


--------------------------------------------------------------------------------
/docs/index.rst:
--------------------------------------------------------------------------------
 1 | .. PasteHunter documentation master file, created by
 2 |    sphinx-quickstart on Sat Feb  9 22:50:02 2019.
 3 |    You can adapt this file completely to your liking, but it should at least
 4 |    contain the root `toctree` directive.
 5 | 
 6 | Welcome to PasteHunter's documentation!
 7 | =======================================
 8 | 
 9 | 
10 | PasteHunter is a python3 application that is designed to query a collection of sites 
11 | that host publicliy pasted data. For all the pasts it finds it scans the raw contents 
12 | against a series of yara rules looking for information that can be used by an organisation or a researcher.
13 | 
14 | 
15 | 
16 | 
17 | .. toctree::
18 |    :maxdepth: 2
19 |    :caption: Contents:
20 |    
21 |    installation
22 |    inputs
23 |    outputs
24 |    postprocess
25 |    sandboxes
26 | 
27 | 
28 | 
29 | 


--------------------------------------------------------------------------------
/pastehunter/YaraRules/certificates.yar:
--------------------------------------------------------------------------------
 1 | /*
 2 |     This rule will look for common encoded certificates and secrets
 3 | */
 4 | 
 5 | rule certificates
 6 | {
 7 |     meta:
 8 |         author = "@KevTheHermit"
 9 |         info = "Part of PasteHunter"
10 |         reference = "https://github.com/kevthehermit/PasteHunter"
11 | 
12 |     strings:
13 |         $ssh_priv = "BEGIN RSA PRIVATE KEY" wide ascii nocase
14 |         $openssh_priv = "BEGIN OPENSSH PRIVATE KEY" wide ascii nocase
15 |         $dsa_priv = "BEGIN DSA PRIVATE KEY" wide ascii nocase
16 |         $ec_priv = "BEGIN EC PRIVATE KEY" wide ascii nocase
17 |         $pgp_priv = "BEGIN PGP PRIVATE KEY" wide ascii nocase
18 |         $pem_cert = "BEGIN CERTIFICATE" wide ascii nocase
19 |         $pkcs7 = "BEGIN PKCS7"
20 | 
21 |     condition:
22 |         any of them
23 | 
24 | }
25 | 
26 | 


--------------------------------------------------------------------------------
/pastehunter/YaraRules/aws.yar:
--------------------------------------------------------------------------------
 1 | rule aws_cli
 2 | {
 3 |     meta:
 4 |         author = "@KevTheHermit"
 5 |         info = "Part of PasteHunter"
 6 |         reference = "https://github.com/kevthehermit/PasteHunter"
 7 | 
 8 |     strings:
 9 |         $a1 = "aws s3 " ascii
10 |         $a2 = "aws ec2 " ascii
11 |         $a3 = "aws ecr " ascii
12 |         $a4 = "aws cognito-identity" ascii
13 |         $a5 = "aws iam "ascii
14 |         $a6 = "aws waf " ascii
15 | 
16 |     condition:
17 |         any of them
18 | 
19 | }
20 | 
21 | rule sw_bucket
22 | {
23 |     meta:
24 |         author = "@KevTheHermit"
25 |         info = "Part of PasteHunter"
26 |         reference = "https://github.com/kevthehermit/PasteHunter"
27 | 
28 |     strings:
29 |         $a1 = "s3.amazonaws.com" ascii
30 | 
31 |     condition:
32 |         any of them
33 | 
34 | 
35 | 
36 | }
37 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | #!/usr/bin/env python
 2 | from setuptools import setup, find_packages
 3 | 
 4 | with open("README.md", "r") as fh:
 5 |     long_description = fh.read()
 6 | 
 7 | setup(
 8 |     name='pastehunter',
 9 |     version='1.4.2',
10 |     author='@kevthehermit @Plazmaz',
11 |     author_email='info@pastehunter.com',
12 |     description="Pastehunter",
13 |     long_description=long_description,
14 |     long_description_content_type="text/markdown",
15 |     url='https://pastehunter.com',
16 |     license='GNU V3',
17 |     zip_safe=False,
18 |     packages=find_packages(),
19 |     include_package_data=True,
20 |     install_requires=[
21 |         'yara-python',
22 |         'requests',
23 |         'elasticsearch',
24 |         'splunk-sdk'
25 |     ],
26 |     scripts=['pastehunter-cli'],
27 |     package_data={'': ['*.yar', 'README.md, LICENSE']}
28 | )


--------------------------------------------------------------------------------
/docs/sandboxes.rst:
--------------------------------------------------------------------------------
 1 | Sandboxes
 2 | =========
 3 | 
 4 | There are a few sandboxes that can be configured and used in various post process steps.
 5 | 
 6 | There are a few generic options for each input.
 7 | 
 8 | - **enabled**: This turns the sandbox on and off. 
 9 | - **module**: This is used internally by pastehunter.
10 | 
11 | Cuckoo
12 | ------
13 | 
14 | If the samples match a binary file format you can optionaly send the file for analysis by a Cuckoo Sandbox.
15 | 
16 | - **api_host**: IP or hostname for a Cuckoo API endpoint. 
17 | - **api_port**: Port number for a Cuckoo API endpoint.
18 | 
19 | Viper
20 | -----
21 | 
22 | If the samples match a binary file format you can optionaly send the file to a Viper instance for further analysis.
23 | 
24 | - **api_host**: IP or hostname for a Viper API endpoint. 
25 | - **api_port**: Port number for a Viper API endpoint.
26 | 


--------------------------------------------------------------------------------
/pastehunter/YaraRules/password_leak.yar:
--------------------------------------------------------------------------------
 1 | /*
 2 |     These rules attempt to find password leaks / dumps
 3 | */
 4 | 
 5 | rule password_list
 6 | {
 7 |     meta:
 8 |         author = "@KevTheHermit and @Plazmaz"
 9 |         info = "Part of PasteHunter"
10 |         reference = "https://github.com/kevthehermit/PasteHunter"
11 | 
12 |     strings:
13 |         // Email validation---------------------------------------------------V
14 |         // Optional quotes -----------------------------------------------------v
15 |         // Seperator char (:|,) ------------------------------------------------------v
16 |         // Continue until word boundary or space ----------------------------------------------v
17 |         $data_format = /\b[\w-]+(\.[\w-]+)*@[\w-]+(\.[\w-]+)*\.[a-zA-Z-]+[\w-]["|']?(:|\|)[^\b\s]+\b/
18 | 
19 |     condition:
20 |         #data_format > 10
21 | 
22 | }


--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM python:3
 2 | 
 3 | RUN apt-get update && \
 4 | apt-get -y --no-install-recommends install automake libtool make gcc git python3-pip && \
 5 | rm -rf /var/lib/apt/lists/* && \
 6 | pip3 --no-cache-dir install yara-python && \
 7 | wget https://github.com/VirusTotal/yara/archive/v3.8.1.tar.gz -O yara.tar.gz && \
 8 | tar -zxf yara.tar.gz && \
 9 | rm yara.tar.gz
10 | 
11 | RUN cd yara-3.8.1 && \
12 | ./bootstrap.sh && \
13 | ./configure && \
14 | make && \
15 | make install
16 | 
17 | WORKDIR /usr/src/wait-for-it
18 | RUN git clone --depth 1 https://github.com/vishnubob/wait-for-it . && \
19 | chmod +x /usr/src/wait-for-it/wait-for-it.sh
20 | 
21 | WORKDIR /usr/src/pastehunter
22 | 
23 | COPY . ./
24 | RUN pip3 --no-cache-dir install -r requirements.txt
25 | 
26 | CMD ["/usr/src/wait-for-it/wait-for-it.sh","-t", "0","172.16.10.10:9200","--", "python3", "pastehunter-cli"]
27 | 
28 | 


--------------------------------------------------------------------------------
/pastehunter/outputs/syslog_output.py:
--------------------------------------------------------------------------------
 1 | import socket
 2 | from pastehunter.common import parse_config
 3 | 
 4 | config = parse_config()
 5 | 
 6 | class SyslogOutput():
 7 | 
 8 |     def store_paste(self, paste_data):
 9 |         host = config['outputs']['syslog_output']['host']
10 |         port = config['outputs']['syslog_output']['port']
11 | 
12 |         syslog_line = '"{0}" "{1}" "{2}" "{3}" "{4}"'.format(paste_data['@timestamp'],
13 |                                                 paste_data['pasteid'],
14 |                                                 paste_data['YaraRule'],
15 |                                                 paste_data['scrape_url'],
16 |                                                 paste_data['pastesite'])
17 |         syslog = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
18 |         syslog.connect((host, port))
19 |         syslog.send(syslog_line.encode('utf-8'))
20 |         syslog.close()
21 | 


--------------------------------------------------------------------------------
/pastehunter/YaraRules/hak5.yar:
--------------------------------------------------------------------------------
 1 | /*
 2 |     This rule will look for ducky / bunny code
 3 | */
 4 | 
 5 | rule ducky_code
 6 | {
 7 |     meta:
 8 |         author = "@KevTheHermit"
 9 |         info = "Part of PasteHunter"
10 |         reference = "https://github.com/kevthehermit/PasteHunter"
11 | 
12 |     strings:
13 |         $a1 = "DELAY"
14 |         $a2 = "GUI r"
15 |         $a3 = "STRING"
16 |         $a4 = "ENTER"
17 |         $a5 = "DEFAULTDELAY"
18 |         $a6 = "WINDOWS"
19 |         $a7 = "SHIFT"
20 |     condition:
21 |         4 of them
22 | }
23 | 
24 | rule bunny_code
25 | {
26 |     meta:
27 |         author = "@KevTheHermit"
28 |         info = "Part of PasteHunter"
29 |         reference = "https://github.com/kevthehermit/PasteHunter"
30 | 
31 |     strings:
32 |         $a1 = "ATTACKMODE"
33 |         $a2 = "QUACK"
34 |         $a3 = "ECM_ETHERNET"
35 |         $a4 = "RNDIS_ETHERNET"
36 |         $a5 = "LED"
37 |         $a6 = "GET SWITCH_POSITION"
38 |         $a7 = "REQUIRETOOL"
39 |     condition:
40 |         4 of them
41 | }


--------------------------------------------------------------------------------
/pastehunter/postprocess/post_email.py:
--------------------------------------------------------------------------------
 1 | import re
 2 | 
 3 | 
 4 | def run(results, raw_paste_data, paste_object):
 5 |     # Use the rule name to determine what postprocess to do
 6 | 
 7 |     # Get total unique emails.
 8 | 
 9 |     all_emails = re.findall(r'\b([\w-]+(?:\.[\w-]+)*@[\w-]+(?:\.[\w-]+)*\.[a-zA-Z-]+[\w-])\b', raw_paste_data)
10 |     domain_list = []
11 |     for email_address in all_emails:
12 |         email_domain = email_address.split("@")
13 |         domain_list.append(email_domain[-1])
14 | 
15 |     unique_emails = set(all_emails)
16 |     unique_domains = set(domain_list)
17 |     # We can filter some of the false positives from the yara match here
18 | 
19 |     if len(unique_emails) < 10:
20 |         paste_object["results"] = []
21 | 
22 |     # Get unique domain count
23 |     # Update the json
24 |     paste_object["total_emails"] = len(all_emails)
25 |     paste_object["unique_emails"] = len(unique_emails)
26 |     paste_object["unique_domains"] = len(unique_domains)
27 | 
28 |     # Send the updated json back
29 |     return paste_object
30 | 


--------------------------------------------------------------------------------
/docs/migrating.rst:
--------------------------------------------------------------------------------
 1 | 
 2 | Migrating From a <1.2.1 Config
 3 | ==================================
 4 | 1.2.1 introduces some breaking configuration changes due to various project structure changes. Most notably,
 5 | all module names will need to be prefixed with "pastehunter.". So for example, this part of the configuration.::
 6 | 
 7 |     "gists": {
 8 |       "enabled": true,
 9 |       "module": "inputs.gists",
10 |       "api_token": "",
11 | 
12 | Will need to change to be .::
13 | 
14 |     "gists": {
15 |       "enabled": true,
16 |       "module": "pastehunter.inputs.gists",
17 |       "api_token": "",
18 | 
19 | 
20 | This applies to inputs, outputs, sandboxes, and post modules. There is one more change required to migrate your configuration.
21 | You will need to change your yara configuration to look something like this:.::
22 | 
23 |   "yara": {
24 |     "default_rules": true,
25 |     "custom_rules": "none",
26 |     "exclude_rules": [],
27 |     "blacklist": true,
28 |     "test_rules": false
29 |   }
30 | 
31 | If you have created any custom rules, change "none" to reflect the path to your custom rules. Finally, move your ``settings.json`` file to ``~/.config/pastehunter.json``.


--------------------------------------------------------------------------------
/pastehunter/outputs/json_output.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | import logging
 3 | import os
 4 | 
 5 | from pastehunter.common import parse_config
 6 | 
 7 | logger = logging.getLogger('pastehunter')
 8 | 
 9 | config = parse_config()
10 | 
11 | class JsonOutput():
12 |     def __init__(self):
13 |         base_path = config['outputs']['json_output']['output_path']
14 |         self.json_path = base_path
15 |         if not os.path.exists(base_path):
16 |             try:
17 |                 os.makedirs(base_path)
18 |                 self.test = True
19 |             except OSError as e:
20 |                 logger.error("Unable to create Json Path: {0}".format(e))
21 |                 self.test = False
22 |         else:
23 |             self.test = True
24 | 
25 |     def store_paste(self, paste_data):
26 |         if not config['outputs']['json_output']['store_raw']:
27 |             del paste_data['raw_paste']
28 | 
29 |         if self.test:
30 |             json_file = os.path.join(self.json_path, str(paste_data['pasteid']))
31 |             with open(json_file, 'w') as out:
32 |                 out.write(json.dumps(paste_data, indent=4))
33 |         else:
34 |             logger.error("JsonOutput Error")
35 | 


--------------------------------------------------------------------------------
/pastehunter/postprocess/post_compress.py:
--------------------------------------------------------------------------------
 1 | import lzma
 2 | import base64
 3 | import logging
 4 | from pastehunter.common import parse_config
 5 | logger = logging.getLogger('pastehunter')
 6 | config = parse_config()
 7 | 
 8 | def run(results, raw_paste_data, paste_object):
 9 |     if config['outputs']['json_output']['store_raw']:
10 |         original = raw_paste_data
11 |         orig_size = len(original.encode())
12 |         logger.debug("Compressing paste... Pre-compression size: {}", orig_size)
13 |         compressed = base64.b64encode(lzma.compress(raw_paste_data.encode()))
14 |         compressed_size = len(compressed)
15 |         logger.debug("Compressing paste... Post-compression size: {}", compressed_size)
16 | 
17 |         # In some cases compressed blobs may be larger
18 |         # if not much data is compressed
19 |         if orig_size > compressed_size:
20 |             paste_object['raw_paste'] = compressed.decode('utf-8')
21 |             logger.debug("Compressed data smaller than original blob. Keeping compressed.")
22 |         else:
23 |             logger.debug("Original smaller than compressed blob. Keeping original.")
24 | 
25 |     # Regardless of modification, return the paste object
26 |     return paste_object
27 | 


--------------------------------------------------------------------------------
/pastehunter/YaraRules/powershell.yar:
--------------------------------------------------------------------------------
 1 | /*
 2 |     This rule will look for common powershell elements
 3 | */
 4 | 
 5 | rule powershell
 6 | {
 7 |     meta:
 8 |         author = "@KevTheHermit"
 9 |         info = "Part of PasteHunter"
10 |         reference = "https://github.com/kevthehermit/PasteHunter"
11 | 
12 |     strings:
13 |         $a1 = "powershell" fullword wide ascii nocase
14 |         $a2 = "IEX" fullword wide ascii nocase
15 |         $a3 = "new-object" fullword wide ascii nocase
16 |         $a4 = "webclient" fullword wide ascii nocase
17 |         $a5 = "downloadstring" fullword wide ascii nocase
18 |         $a6 = "-WindowStyle Hidden" fullword wide ascii nocase
19 |         $a7 = "invoke" fullword wide ascii nocase
20 |         $a8 = "bitsadmin" fullword wide ascii nocase
21 |         $a9 = "certutil -decode" fullword wide ascii nocase
22 |         $a10 = "hidden" fullword wide ascii nocase
23 |         $a11 = "nop" fullword wide ascii nocase
24 |         $a12 = "Invoke-" fullword wide ascii nocase
25 |         $a13 = "FromBase64String(" fullword wide ascii nocase
26 | 
27 | 
28 | 
29 |         $not1 = "chocolatey" nocase
30 |         $not2 = "XmlConfiguration is now operational" nocase
31 |     condition:
32 |         4 of ($a*) and not any of ($not*)
33 | 
34 | }
35 | 


--------------------------------------------------------------------------------
/pastehunter/YaraRules/core_keywords.yar:
--------------------------------------------------------------------------------
 1 | /*
 2 |     This rule will match any of the keywords in the list
 3 | */
 4 | 
 5 | rule core_keywords
 6 | {
 7 |     meta:
 8 |         author = "@KevTheHermit"
 9 |         info = "Part of PasteHunter"
10 |         reference = "https://github.com/kevthehermit/PasteHunter"
11 | 
12 |     strings:
13 |         $tango_down = "TANGO DOWN" wide ascii nocase
14 |         $antisec = "antisec" wide ascii nocase
15 |         $hacked = "hacked by" wide ascii nocase
16 |         $onion_url = /.*.\.onion/
17 |         $nmap_scan = "Nmap scan report for" wide ascii nocase
18 |         $enabled_sec = "enable secret" wide ascii nocase
19 |         $enable_pass = "enable password" wide ascii nocase
20 |     condition:
21 |         any of them
22 | 
23 | }
24 | 
25 | rule dox
26 | {
27 |     meta:
28 |         author = "@KevTheHermit"
29 |         info = "Part of PasteHunter"
30 |         reference = "https://github.com/kevthehermit/PasteHunter"
31 | 
32 |     strings:
33 |         $dox = "DOX" wide ascii nocase fullword
34 |         $keyword1 = "name" wide ascii nocase
35 |         $keyword2 = "dob" wide ascii nocase
36 |         $keyword3 = "age" wide ascii nocase
37 |         $keyword4 = "password" wide ascii nocase
38 |         $keyword5 = "email" wide ascii nocase
39 |     condition:
40 |         $dox and 3 of ($keyword*)
41 | 
42 | }
43 | 


--------------------------------------------------------------------------------
/pastehunter/sandboxes/cuckoo.py:
--------------------------------------------------------------------------------
 1 | import io
 2 | import logging
 3 | import requests
 4 | from pastehunter.common import parse_config
 5 | conf = parse_config()
 6 | 
 7 | logger = logging.getLogger('pastehunter')
 8 | 
 9 | def upload_file(raw_file, paste_object):
10 |     try:
11 |         task_id = send_to_cuckoo(raw_file, paste_object["pasteid"])
12 |         paste_object["Cuckoo Task ID"] = task_id
13 |         logger.info("exe submitted to Cuckoo with task id {0}".format(task_id))
14 |     except Exception as e:
15 |         logger.error("Unabled to submit sample to cuckoo")
16 | 
17 |     # Send any updated json back
18 |     return paste_object
19 | 
20 | def send_to_cuckoo(raw_exe, pasteid):
21 |     cuckoo_ip = conf["sandboxes"]["cuckoo"]["api_host"]
22 |     cuckoo_port = conf["sandboxes"]["cuckoo"]["api_port"]
23 |     cuckoo_host = 'http://{0}:{1}'.format(cuckoo_ip, cuckoo_port)
24 |     submit_file_url = '{0}/tasks/create/file'.format(cuckoo_host)
25 |     files = {'file': ('{0}.exe'.format(pasteid), io.BytesIO(raw_exe))}
26 |     submit_file = requests.post(submit_file_url, files=files).json()
27 |     task_id = None
28 |     try:
29 |         task_id = submit_file['task_id']
30 |     except KeyError:
31 |         try:
32 |             task_id = submit_file['task_ids'][0]
33 |         except KeyError:
34 |             logger.error(submit_file)
35 | 
36 |     return task_id
37 | 


--------------------------------------------------------------------------------
/pastehunter/YaraRules/database.yar:
--------------------------------------------------------------------------------
 1 | /*
 2 |     This rule will look for Database elements
 3 | */
 4 | 
 5 | rule db_connection
 6 | {
 7 |     meta:
 8 |         author = "@KevTheHermit"
 9 |         info = "Part of PasteHunter"
10 |         reference = "https://github.com/kevthehermit/PasteHunter"
11 | 
12 |     strings:
13 |         $a = /\b(mongodb|http|https|ftp|mysql|postgresql|oracle):\/\/(\S*):(\S*)@(\S*)\b/
14 |         $n1 = "#EXTINF"
15 |         $n2 = "m3u8"
16 | 
17 |     condition:
18 |         $a and not any of ($n*)
19 | }
20 | 
21 | rule db_structure
22 | {
23 |     meta:
24 |         author = "@KevTheHermit"
25 |         info = "Part of PasteHunter"
26 |         reference = "https://github.com/kevthehermit/PasteHunter"
27 | 
28 |     strings:
29 |         $a = "CREATE TABLE" nocase
30 |         $b = "INSERT INTO" nocase
31 |         $c = "VALUES" nocase
32 |         $d = "ENGINE" nocase
33 |         $e = "CHARSET" nocase
34 |         $f = "NOT NULL" nocase
35 |         $g = "varchar" nocase
36 |         $h = "PRIMARY KEY"
37 | 
38 |     condition:
39 |         5 of them
40 | }
41 | 
42 | rule db_create_user
43 | {
44 |     meta:
45 |         author = "@KevTheHermit"
46 |         info = "Part of PasteHunter"
47 |         reference = "https://github.com/kevthehermit/PasteHunter"
48 | 
49 |     strings:
50 |         $a = "GRANT ALL PRIVILEGES" nocase
51 |         $b = "IDENTIFIED BY" nocase
52 |         $c = "GRANT SELECT" nocase
53 |         $d = "CREATE USER" nocase
54 | 
55 |     condition:
56 |         2 of them
57 | }


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # PasteHunter
 2 | PasteHunter is a python3 application that is designed to query a collection of sites that host publicly pasted data. 
 3 | For all the pastes it finds it scans the raw contents against a series of Yara rules looking for information that can be used 
 4 | by an organisation or a researcher.
 5 | 
 6 | 
 7 | ## Setup 
 8 | For setup instructions please see the official documentation https://pastehunter.readthedocs.io/en/latest/installation.html
 9 | 
10 | [![PyPI version](https://badge.fury.io/py/pastehunter.svg)](https://badge.fury.io/py/pastehunter)
11 | 
12 | [![Build Status](https://travis-ci.org/kevthehermit/PasteHunter.svg?branch=master)](https://travis-ci.org/kevthehermit/PasteHunter)
13 | 
14 | 
15 | ## Supported Inputs
16 | Pastehunter currently has support for the following sites:
17 |  - pastebin.com
18 |  - gist.github.com # Gists
19 |  - github.com # Public commit activity feed
20 |  - slexy.org
21 |  - stackexchange # There are about 176! 
22 | 
23 | ## Supported Outputs
24 | Pastehunter supports several output modules:
25 |  - dump to ElasticSearch DB (default).
26 |  - Email alerts (SMTP).
27 |  - Slack Channel notifications.
28 |  - Dump to JSON file.
29 |  - Dump to CSV file.
30 |  - Send to syslog.
31 |  - POST to URL
32 | 
33 |  ## Supported Sandboxes
34 |  Pastehunter supports several sandboxes that decoded data can be sent to:
35 |  - Cuckoo
36 |  - Viper
37 | 
38 | For examples of data discovered using pastehunter check out my posts https://techanarchy.net/blog/hunting-pastebin-with-pastehunter and https://techanarchy.net/blog/pastehunter-the-results
39 | 


--------------------------------------------------------------------------------
/pastehunter/outputs/csv_output.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | import os
 3 | import datetime
 4 | from pastehunter.common import parse_config
 5 | 
 6 | logger = logging.getLogger('pastehunter')
 7 | config = parse_config()
 8 | 
 9 | 
10 | class CSVOutput(object):
11 |     def __init__(self):
12 |         base_path = config['outputs']['csv_output']['output_path']
13 |         # Get todays CSV
14 |         dtg = datetime.date.today().strftime("%Y-%m-%d")
15 |         csv_name = 'pastes_{0}.csv'.format(dtg)
16 |         self.csv_path = os.path.join(base_path, csv_name)
17 | 
18 |         if not os.path.exists(base_path):
19 |             try:
20 |                 os.makedirs(base_path)
21 |                 self.test = True
22 |             except OSError as e:
23 |                 logger.error("Unable to create CSV Path: {}".format(e))
24 |                 self.test = False
25 |         else:
26 |             self.test = True
27 | 
28 |     def store_paste(self, paste_data):
29 |         if self.test:
30 |             # date, _id, YaraRule, raw_url
31 |             csv_line = '{0},{1},{2},{3},{4}'.format(paste_data['@timestamp'],
32 |                                                 paste_data['pasteid'],
33 |                                                 paste_data['YaraRule'],
34 |                                                 paste_data['scrape_url'],
35 |                                                 paste_data['pastesite'])
36 |             with open(self.csv_path, 'a') as out:
37 |                 out.write('{0}\n'.format(csv_line))
38 |         else:
39 |             logging.error("CSV Output Error. Output path '{}' was never created.".format(self.csv_path))
40 | 


--------------------------------------------------------------------------------
/pastehunter/common.py:
--------------------------------------------------------------------------------
 1 | import json
 2 | import logging
 3 | import os.path
 4 | 
 5 | logger = logging.getLogger('pastehunter')
 6 | home = os.path.expanduser("~")
 7 | 
 8 | BASE62_CHARS = '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ'
 9 | BASE_LOOKUP = dict((c, i) for i, c in enumerate(BASE62_CHARS))
10 | BASE_LEN = len(BASE62_CHARS)
11 | 
12 | # Parse the config file in to a dict
13 | def parse_config():
14 |     conf = None
15 |     settings_file = os.path.join(home, ".config", "pastehunter.json")
16 | 
17 |     if os.path.exists(settings_file):
18 |         conf_file = settings_file
19 |     else:
20 |         #ToDo: Copy base settings to the settings file
21 |         conf_file = None
22 | 
23 |     if conf_file:
24 |         try:
25 |             with open(conf_file, 'r') as read_conf:
26 |                 conf = json.load(read_conf)
27 |         except Exception as e:
28 |             logger.error("Unable to parse config file: {0}".format(e))
29 |     else:
30 |         logger.error("Unable to read config file '~/.config/pastehunter.json'")
31 | 
32 |     return conf
33 | 
34 | 
35 | # Most of this was pulled from https://stackoverflow.com/a/2549514
36 | def base62_decode(input: str) -> int:
37 |     length = len(BASE_LOOKUP)
38 |     ret = 0
39 |     for i, c in enumerate(input[::-1]):
40 |         ret += (length ** i) * BASE_LOOKUP[c]
41 | 
42 |     return ret
43 | 
44 | 
45 | def base62_encode(integer) -> str:
46 |     if integer == 0:
47 |         return BASE62_CHARS[0]
48 | 
49 |     ret = ''
50 |     while integer != 0:
51 |         ret = BASE62_CHARS[integer % BASE_LEN] + ret
52 |         integer //= BASE_LEN
53 | 
54 |     return ret
55 | 


--------------------------------------------------------------------------------
/docker-compose.yml:
--------------------------------------------------------------------------------
 1 | version: "2"
 2 | services:
 3 |   pastehunter:
 4 |     build: .
 5 |     container_name: pastehunter
 6 |     volumes:
 7 |       - ./logs:/usr/src/pastehunter/logs
 8 |       - ~/.config/pastehunter.json:/root/.config/pastehunter.json
 9 |     networks:
10 |       esnet:
11 |         ipv4_address: 172.16.10.11
12 |     depends_on:
13 |       - "elasticsearch"
14 |       - "kibana"
15 |   elasticsearch:
16 |     image: docker.elastic.co/elasticsearch/elasticsearch:6.4.2
17 |     container_name: elasticsearch
18 |     environment:
19 |       - cluster.name=docker-cluster
20 |       - bootstrap.memory_lock=true
21 |       - "ES_JAVA_OPTS=-Xms512m -Xmx512m"
22 |       - xpack.security.enabled=false
23 |       - xpack.monitoring.enabled=false
24 |       - xpack.ml.enabled=false
25 |       - xpack.graph.enabled=false
26 |       - xpack.watcher.enabled=false
27 |       - discovery.zen.minimum_master_nodes=1
28 |       - discovery.type=single-node
29 |     ulimits:
30 |       memlock:
31 |         soft: -1
32 |         hard: -1
33 |     mem_limit: 1g
34 |     volumes:
35 |       - esdata1:/usr/share/elasticsearch/data
36 |     ports:
37 |       - "127.0.0.1:9200:9200"
38 |     networks:
39 |       esnet:
40 |         ipv4_address: 172.16.10.10
41 | 
42 |   kibana:
43 |     image: docker.elastic.co/kibana/kibana:6.4.2
44 |     container_name: kibana
45 |     ports:
46 |       - "127.0.0.1:5601:5601"
47 |     networks:
48 |       esnet:
49 |         ipv4_address: 172.16.10.12
50 |     depends_on:
51 |       - "elasticsearch"
52 | 
53 | volumes:
54 |   esdata1:
55 | 
56 | networks:
57 |   esnet:
58 |     driver: bridge
59 |     ipam:
60 |       config:
61 |       - subnet: 172.16.10.0/24
62 |       
63 | 


--------------------------------------------------------------------------------
/pastehunter/inputs/base_input.py:
--------------------------------------------------------------------------------
 1 | from abc import ABC, abstractmethod
 2 | from typing import Any, Optional, Dict, List, Union
 3 | 
 4 | import requests
 5 | 
 6 | 
 7 | class BasePasteSite(ABC):
 8 |     def make_request(self, url: str, timeout: Optional[int] = 10, headers: Optional[Dict[str, Any]] = None):
 9 |         """
10 |         Make a request and return the results
11 |         :param url: The url to request
12 |         :param timeout: The timeout for the request
13 |         :param headers: The headers dict
14 |         :return:
15 |         """
16 |         req = requests.get(url, headers=headers, timeout=timeout)
17 |         return req
18 | 
19 |     @abstractmethod
20 |     def remap_raw_item(self, raw_item: [str, Dict]) -> Dict[str, Any]:
21 |         """
22 |         Takes a raw item and remaps it to a normalize paste dict
23 |         :param raw_item:
24 |         :return: The paste dict
25 |         """
26 |         pass
27 | 
28 |     @abstractmethod
29 |     def get_paste_for_id(self, paste_id: Any) -> str:
30 |         """
31 |         Returns a paste for the given paste_id
32 |         :param paste_id: The paste to retrieve
33 |         :return: A raw paste object
34 |         """
35 |         pass
36 | 
37 |     @abstractmethod
38 |     def get_paste_id(self, paste_obj: Dict[str, Any]) -> Union[str, int]:
39 |         """
40 |         Returns an id for the given paste object
41 |         :param paste_obj: The raw paste dict
42 |         :return: The paste id
43 |         passd (str or int)
44 |         """
45 | 
46 |     @abstractmethod
47 |     def get_recent_items(self, input_history: List[str]):
48 |         """
49 |         Gets recent items
50 |         :return: a list of recent items
51 |         """
52 |         pass
53 | 


--------------------------------------------------------------------------------
/pastehunter/outputs/http_output.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | 
 3 | import requests
 4 | 
 5 | from pastehunter.common import parse_config
 6 | 
 7 | logger = logging.getLogger('pastehunter')
 8 | 
 9 | config = parse_config()
10 | 
11 | class HttpOutput():
12 |     def __init__(self):
13 |         self.valid = True
14 |         self.endpoint_url = config['outputs']['http_output']['endpoint_url']
15 |         self.http_headers = config['outputs']['http_output']['headers']
16 |         self.http_auth = config['outputs']['http_output']['http_auth']
17 |         self.http_user = config['outputs']['http_output']['http_user']
18 |         self.http_password = config['outputs']['http_output']['http_password']
19 |         self.ignore_fields = config['outputs']['http_output']['ignore_fields']
20 |         self.timezone = config['outputs']['http_output']['timezone']
21 | 
22 |         if self.endpoint_url == '':
23 |             logging.error("HTTP endpoint not configured")
24 |             self.valid = False
25 | 
26 |     def store_paste(self, paste_data):
27 |         if self.valid:
28 | 
29 |             json_data = paste_data
30 | 
31 |             for field in self.ignore_fields:
32 |                 del json_data[field]
33 | 
34 |             json_data['@timestamp'] += self.timezone
35 | 
36 |             if self.http_auth:
37 |                 req = requests.post(self.endpoint_url, headers=self.http_headers, json=json_data, auth=(self.http_user, self.http_password))
38 |             else:
39 |                 req = requests.post(self.endpoint_url, headers=self.http_headers, data=json_data)
40 | 
41 |             if req.status_code == 200 or req.status_code == 201:
42 |                 logger.debug("Paste sent to HTTP endpoint")
43 |             else:
44 |                 logger.error("Failed to post to HTTP endpoint {0}".format(req.status_code))
45 | 


--------------------------------------------------------------------------------
/.travis.yml:
--------------------------------------------------------------------------------
 1 | language: python
 2 | dist: bionic
 3 | group: edge
 4 | cache:
 5 |   pip: true
 6 | env:
 7 |   - PYTHONPATH=pastehunter
 8 | python:
 9 | - 3.6
10 | - 3.6-dev
11 | before_install:
12 | - sudo apt-get update -qq
13 | - sudo apt-get install automake libtool make gcc libmagic-dev -yqq python3-pip unzip
14 | - wget https://github.com/VirusTotal/yara/archive/v3.10.0.tar.gz
15 | - tar -xzvf v3.10.0.tar.gz
16 | - cd yara-3.10.0/ && ./bootstrap.sh && ./configure --enable-dotnet --enable-magic
17 |   && make && sudo make install && cd ../
18 | - git clone --recursive https://github.com/VirusTotal/yara-python
19 | - pip3 install pytest codecov pytest-cov
20 | - cd yara-python
21 | - python setup.py build --enable-magic --enable-dotnet
22 | - python setup.py install && cd ../ && rm -rf yara-python && rm -rf yara-3.10.0/
23 | install:
24 | - pip install -r requirements.txt
25 | - pip install pytest
26 | - pip install -e .
27 | script:
28 | - pastehunter-cli
29 | - python -m pytest
30 | after_success:
31 | - python setup.py sdist
32 | deploy:
33 |   provider: pypi
34 |   user: __token__
35 |   password:
36 |     secure: ZYILSwAsPcCWa4Ccslu2F+HVw02Rafdf4HqnQla3uCCTlEQQ+cFyuTKxQB46xytgblFQv/99oxq3SwVTUX4C6cIa8D+zHm/6lR4Tu+YPthYZX9IashF/AMKkyKks8bxbB0x/3t7hBX+7w++OcC1wwCXUyX7btsiOBa28k1NZCsB26NgdpBn02wF/GwqDhkxKkW9Bi7KDjb58GdiyhgVXxOOaOYbRyKiNZqUKQx504zmc0aGSPYCs0gSPwoA0T3FUet4IBcjjTP9DsjjkyQ7K6iMWYNGsAP91HnZe5J4sZYqwrGs++vndJVa/bYpiyMCjUrG4c6okdS0zpSmfbrqJay12wH5qroqqLxwuLtrXcHK+ChlyvhsGHMN51rqX811zdt/IzDwi+hXz84e8Y8/YgUTx7j0/HPEdrHjIIbMoIEd9Wy42+TcRCHJOULjsg7Kc7KLd1ILvxxyV+REnkfaazeqmgSNlqFxM2A65dkq3xNt9CDtYQlX/IhTDBy2/qY3m60uOh92ptd5f5eHF28W89APnkRAHD2JSEVRym1fHNrvPl1NCJT8NavbdYup/dH8hQadMx72X022lmyFASHN92G78O3uA0fZ8B/hzCpVQ4KTTIT4/LqkAXuWlfW4z9wC62V2ZdL6E76lqbMPokeXfH8Tf+chAaw/XHr7Wk6bWkOQ=
37 |   on:
38 |     branch: master
39 |   skip_existing: true
40 | 


--------------------------------------------------------------------------------
/pastehunter/outputs/splunk_output.py:
--------------------------------------------------------------------------------
 1 | from pastehunter.common import parse_config
 2 | import json
 3 | import logging
 4 | import splunklib.client as client
 5 | 
 6 | logger = logging.getLogger('pastehunter')
 7 | config = parse_config()
 8 | 
 9 | class SplunkOutput():
10 |     def __init__(self):
11 |         # Set up the database connection
12 |         splunk_host = config['outputs']['splunk_output']['splunk_host']
13 |         splunk_port = config['outputs']['splunk_output']['splunk_port']
14 |         splunk_user = config['outputs']['splunk_output']['splunk_user']
15 |         splunk_pass = config['outputs']['splunk_output']['splunk_pass']
16 |         self.splunk_index = config['outputs']['splunk_output']['splunk_index']
17 | 
18 |         try:
19 |             self.service = client.connect(
20 |                 host=splunk_host,
21 |                 port=splunk_port,
22 |                 username=splunk_user,
23 |                 password=splunk_pass,
24 |                 autologin=True)
25 | 
26 |             self.index = self.service.indexes[self.splunk_index]
27 |         except Exception as e:
28 |             logger.error(e)
29 |             raise Exception('Unable to connect or missing index') from None
30 | 
31 |     def store_paste(self, paste_data):
32 |         # Make a copy so we don't affect any other output modules
33 |         local_data = dict(paste_data)
34 |         if not config['outputs']['splunk_output']['store_raw']:
35 |             del local_data['raw_paste']
36 | 
37 |         try:
38 |             # The edit_tcp capability is required to access this API
39 |             sourcetype = config['outputs']['splunk_output']['splunk_sourcetype']
40 |             self.index.submit(json.dumps(local_data), sourcetype=sourcetype)
41 |         except Exception as e:
42 |             logger.exception('Error submitting paste_data to splunk', e)
43 | 


--------------------------------------------------------------------------------
/pastehunter/inputs/dumpz.py:
--------------------------------------------------------------------------------
 1 | import requests
 2 | import logging
 3 | 
 4 | logger = logging.getLogger('pastehunter')
 5 | 
 6 | def recent_pastes(conf, input_history):
 7 |     # populate vars from config
 8 |     paste_limit = conf['inputs']['dumpz']['paste_limit']
 9 |     api_scrape = conf['inputs']['dumpz']['api_scrape']
10 |     history = []
11 |     paste_list = []
12 |     try:
13 |         # Create the API uri
14 |         scrape_uri = '{0}?limit={1}'.format(api_scrape, paste_limit)
15 |         # Get some pastes and convert to json
16 |         # Get last 'paste_limit' pastes
17 |         paste_list_request = requests.get(scrape_uri)
18 |         paste_list_json = paste_list_request.json()
19 | 
20 |         for paste in paste_list_json['dumps']:
21 |             # Track paste ids to prevent dupes
22 |             history.append(paste['id'])
23 |             if paste['id'] in input_history:
24 |                 continue
25 | 
26 |             # We don't want password protected pastes
27 |             if paste['pwd'] == 1:
28 |                 continue
29 | 
30 |             # Create a new paste dict for us to normalize
31 |             paste_data = paste
32 |             paste_data['confname'] = 'dumpz'
33 |             paste_data['pasteid'] = paste['id']
34 |             paste_data['pastesite'] = 'dumpz.org'
35 | 
36 |             #paste_data['scrape_url'] = '{0}{1}'.format(conf['dumpz']['api_raw'], paste['id'])
37 | 
38 |             paste_data['scrape_url'] = 'https://dumpz.org/{0}/text/'.format(paste['id'])
39 | 
40 |             # Add a date field that kibana will map
41 |             paste_data['@timestamp'] = paste_data['date']
42 |             paste_list.append(paste_data)
43 |         return paste_list, history
44 | 
45 |     except Exception as e:
46 |         logger.error("Unable to parse paste results: {0}".format(e))
47 |         return paste_list, history


--------------------------------------------------------------------------------
/CHANGELOG.md:
--------------------------------------------------------------------------------
 1 | # Changelog
 2 | All notable changes to this project will be documented in this file.
 3 | 
 4 | The format is based on [Keep a Changelog](https://keepachangelog.com/en/1.0.0/),
 5 | and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0.html).
 6 | 
 7 | ## [1.4.2] - 2020-12-02
 8 | ### Changed
 9 | - Fixed ix.io import
10 | - Made slexy's timeout configurable (#121)
11 | 
12 | ## [1.4.1] - 2020-11-25
13 | ### Changed
14 | - Fixed typo in 1.4.0 (Thanks @cham423!)
15 | 
16 | ## [1.4.0] - 2020-11-22
17 | ### Changed
18 | - Added some error state checks and retry logic to pastebin scraping (#116)
19 | - Refactored paste inputs to use a base class
20 | 
21 | ### Added
22 | - Support for ix.io (#95)
23 | - Additional unit tests (pytest still has some issues with import paths on travis)
24 | 
25 | 
26 | ## [1.3.2] - 2020-02-15
27 | ### Changed
28 | Minor patch fixing error in email yara regexp
29 | 
30 | ## [1.3.1] - 2020-02-15
31 | ### Changed
32 | - Tweaked base64.yar to ignore data uris that contain numbers
33 | - Improved error handling around missing pastes (404s)
34 | - Fixed slexy timeout/rapid requests
35 | - Began ignoring CSS (.css), SASS (.scss), and Unreal asset (.uasset) files by default for GitHub
36 | - Fixed github filename blacklist being ignored
37 | - GitHub now uses file blob hashes instead of commit ids for paste_id. This is to prevent collision for commits with multiple matching files
38 | - Reduced false positives returned from password_list rule
39 | - Removed email_list rule (superseded) by email_filter
40 | 
41 | ### Added
42 | - HTTP Output (#104)
43 | 
44 | ## [1.2.1] - 2019-12-29
45 | ### Changed
46 | - move config file to ~/.config
47 | - move custom yara rules
48 | - refactor yara rules location
49 | 
50 | ## [1.2.0] - 2019-12-28
51 | ### Added
52 | - Changelog
53 | - travis CI
54 | - PyPi Installation
55 | 
56 | ### Changed
57 | - FilePaths to enable pip
58 | 


--------------------------------------------------------------------------------
/docs/postprocess.rst:
--------------------------------------------------------------------------------
 1 | PostProcess
 2 | ===========
 3 | 
 4 | There are a handful of post process modules that can run additional checks on the raw paste data. 
 5 | 
 6 | There are a few generic options for each input.
 7 | 
 8 | - **enabled**: This turns the input on and off. 
 9 | - **module**: This is used internally by pastehunter.
10 | 
11 | 
12 | Email
13 | -----
14 | This postprocess module extracts additional information from data that includes email addresses. It will extract counts for:
15 | 
16 | - Total Emails
17 | - Unique Email addresses
18 | - Unique Email domains
19 | 
20 | These 3 values are then added to the meta data for storage. 
21 | 
22 | - **rule_list**: List of rules that will trigger the postprocess module. 
23 | 
24 | Base64
25 | ------
26 | 
27 | This postprocess will attempt to decode base64 data and then apply further processing on the new file data. At the moment this module only operates
28 | when the full paste is a base64 blob, i.e. it will not extract base64 code that is embedded in other data. 
29 | 
30 | - **rule_list**: List of rules that will trigger the postprocess module. 
31 | 
32 | See the `Sandboxes documentation <sandboxes.rst>`_ for information on how to configure the sandboxes used for scanning decoded base64 data.
33 | 
34 | 
35 | Entropy
36 | -------
37 | 
38 | This postprocess module calculates shannon entropy on the raw paste data. This can be used to help identify binary and encoded or encrytped data. 
39 | 
40 | - **rule_list**: List of rules that will trigger the postprocess module. 
41 | 
42 | Compress
43 | --------
44 | Compresses the data using LZMA(lossless compression) if it will reduce the size. Small pastes or pastes that don't benefit from compression will not be affected by this module. 
45 | Its outputs can be decompressed by base64-decoding, then using the `xz command <https://www.systutorials.com/docs/linux/man/1-xz/>`_.
46 | 
47 | - **rule_list**: List of rules that will trigger the postprocess module. 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | env/
 12 | build/
 13 | develop-eggs/
 14 | dist/
 15 | downloads/
 16 | eggs/
 17 | .eggs/
 18 | lib/
 19 | lib64/
 20 | parts/
 21 | sdist/
 22 | var/
 23 | wheels/
 24 | *.egg-info/
 25 | .installed.cfg
 26 | *.egg
 27 | 
 28 | # PyInstaller
 29 | #  Usually these files are written by a python script from a template
 30 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 31 | *.manifest
 32 | *.spec
 33 | 
 34 | # Installer logs
 35 | pip-log.txt
 36 | pip-delete-this-directory.txt
 37 | 
 38 | # Unit test / coverage reports
 39 | htmlcov/
 40 | .tox/
 41 | .coverage
 42 | .coverage.*
 43 | .cache
 44 | nosetests.xml
 45 | coverage.xml
 46 | *.cover
 47 | .hypothesis/
 48 | 
 49 | # Translations
 50 | *.mo
 51 | *.pot
 52 | 
 53 | # Django stuff:
 54 | *.log
 55 | local_settings.py
 56 | 
 57 | # Flask stuff:
 58 | instance/
 59 | .webassets-cache
 60 | 
 61 | # Scrapy stuff:
 62 | .scrapy
 63 | 
 64 | # Sphinx documentation
 65 | docs/_build/
 66 | 
 67 | # PyBuilder
 68 | target/
 69 | 
 70 | # Jupyter Notebook
 71 | .ipynb_checkpoints
 72 | 
 73 | # pyenv
 74 | .python-version
 75 | 
 76 | # celery beat schedule file
 77 | celerybeat-schedule
 78 | 
 79 | # SageMath parsed files
 80 | *.sage.py
 81 | 
 82 | # dotenv
 83 | .env
 84 | 
 85 | # virtualenv
 86 | .venv
 87 | venv/
 88 | ENV/
 89 | 
 90 | # Spyder project settings
 91 | .spyderproject
 92 | .spyproject
 93 | 
 94 | # Rope project settings
 95 | .ropeproject
 96 | 
 97 | # mkdocs documentation
 98 | /site
 99 | 
100 | # mypy
101 | .mypy_cache/
102 | /settings.conf
103 | /YaraRules/custom_keywords.yar
104 | /paste_history.tmp
105 | /settings.json
106 | /.idea
107 | /postprocess/tester.py
108 | .vscode/
109 | logs/
110 | 
111 | .c9
112 | pastehunter/YaraRules/custom_keywords.yar
113 | 


--------------------------------------------------------------------------------
/pastehunter/outputs/slack_output.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | import requests
 3 | from pastehunter.common import parse_config
 4 | 
 5 | logger = logging.getLogger('pastehunter')
 6 | 
 7 | config = parse_config()
 8 | 
 9 | 
10 | class SlackOutput():
11 |     def __init__(self):
12 |         self.valid = True
13 |         self.webhook_url = config['outputs']['slack_output']['webhook_url']
14 |         self.accepted_rules = config['outputs']['slack_output']['rule_list']
15 | 
16 |         if self.webhook_url == '':
17 |             logging.error("Slack Webhook not configured")
18 |             self.valid = False
19 |         if self.webhook_url == '':
20 |             logging.error("No Rules configured to alert")
21 | 
22 |     def store_paste(self, paste_data):
23 |         if self.valid:
24 |             send = ('all' in self.accepted_rules)
25 | 
26 |             for rule in self.accepted_rules:
27 |                 if rule in paste_data['YaraRule']:
28 |                     send = True
29 | 
30 |             if send:
31 |                 json_data = {
32 |                     "text": "Pastehunter alert!",
33 |                     "attachments": [
34 |                         {
35 |                             "fallback": "Plan a vacation",
36 |                             "author_name": "PasteHunter",
37 |                             "title": "Paste ID {0}".format(paste_data['pasteid']),
38 |                             "text": "Yara Rule {0} Found on {1}\n\r{2}".format(paste_data['YaraRule'], paste_data['pastesite'], paste_data['scrape_url'])
39 |                         }
40 |                     ]
41 |                 }
42 | 
43 |                 req = requests.post(self.webhook_url, json=json_data)
44 |                 if req.status_code == 200 and req.text == 'ok':
45 |                     logger.debug("Paste sent to slack")
46 |                 else:
47 |                     logger.error(
48 |                         "Failed to post to slack Status Code {0}".format(req.status_code))
49 | 


--------------------------------------------------------------------------------
/pastehunter/YaraRules/test_rules.yar:
--------------------------------------------------------------------------------
 1 | /*
 2 |     These are test rules
 3 | */
 4 | 
 5 | rule test_hex_MZ
 6 | {
 7 |     meta:
 8 |         author = "kevthehermit"
 9 |         info = "Part of PasteHunter"
10 |         reference = "https://github.com/kevthehermit/PasteHunter"
11 | 
12 |     strings:
13 |         $mz_hex  = "4d5a" nocase wide ascii
14 | 
15 |     condition:
16 |         $mz_hex at 0
17 | 
18 | }
19 | 
20 | rule test_vbscript
21 | {
22 |     meta:
23 |         author = "kevthehermit"
24 |         info = "Part of PasteHunter"
25 |         reference = "https://github.com/kevthehermit/PasteHunter"
26 | 
27 |     strings:
28 |         $a = "Function" nocase wide ascii fullword
29 |         $b = "CreateObject" nocase wide ascii fullword
30 |         $c  = "Wscript" nocase wide ascii fullword
31 |         $d = "As Long" nocase wide ascii fullword
32 |         $e = "run" nocase wide ascii fullword
33 |         $f = "for each" nocase wide ascii fullword
34 |         $g = "end function" nocase wide ascii fullword
35 |         $h = "NtAllocateVirtualMemory" nocase wide ascii fullword
36 |         $i = "NtWriteVirtualMemory" nocase wide ascii fullword
37 | 
38 | 
39 |     condition:
40 |         5 of them
41 | }
42 | 
43 | rule test_autoit
44 | {
45 |     meta:
46 |         author = "kevthehermit"
47 |         info = "Part of PasteHunter"
48 |         reference = "https://github.com/kevthehermit/PasteHunter"
49 | 
50 |     strings:
51 |         $tray = "NoTrayIcon" nocase wide ascii fullword
52 |         $a = "iniread" nocase wide ascii fullword
53 |         $b = "fileinstall" nocase wide ascii fullword
54 |         $c  = "EndFunc" nocase wide ascii fullword
55 |         $d = "FileRead" nocase wide ascii fullword
56 |         $e = "DllStructSetData" nocase wide ascii fullword
57 |         $f = "Global Const" nocase wide ascii fullword
58 |         $g = "Run(@AutoItExe" nocase wide ascii fullword
59 |         $h = "StringReplace" nocase wide ascii fullword
60 |         $i = "filewrite" nocase wide ascii fullword
61 | 
62 | 
63 | 
64 |     condition:
65 |         ($tray and 3 of them) or (5 of them)
66 | }


--------------------------------------------------------------------------------
/pastehunter/outputs/elastic_output.py:
--------------------------------------------------------------------------------
 1 | from elasticsearch import Elasticsearch
 2 | from pastehunter.common import parse_config
 3 | from datetime import datetime
 4 | import logging
 5 | 
 6 | logger = logging.getLogger('pastehunter')
 7 | config = parse_config()
 8 | 
 9 | 
10 | class ElasticOutput():
11 |     def __init__(self):
12 |         # Set up the database connection
13 |         es_host = config['outputs']['elastic_output']['elastic_host']
14 |         es_port = config['outputs']['elastic_output']['elastic_port']
15 |         es_user = config['outputs']['elastic_output']['elastic_user']
16 |         es_pass = config['outputs']['elastic_output']['elastic_pass']
17 |         self.es_index = config['outputs']['elastic_output']['elastic_index']
18 |         self.weekly = config['outputs']['elastic_output']['weekly_index']
19 |         es_ssl = config['outputs']['elastic_output']['elastic_ssl']
20 |         self.test = False
21 |         try:
22 |             self.es = Elasticsearch(es_host, port=es_port, http_auth=(es_user, es_pass), use_ssl=es_ssl)
23 |             self.test = True
24 |         except Exception as e:
25 |             logger.error(e)
26 |             raise Exception('Unable to Connect') from None
27 | 
28 |     def store_paste(self, paste_data):
29 |         if self.test:
30 |             index_name = self.es_index
31 |             if self.weekly:
32 |                 year_number = datetime.date(datetime.now()).isocalendar()[0]
33 |                 week_number = datetime.date(datetime.now()).isocalendar()[1]
34 |                 index_name = '{0}-{1}-{2}'.format(index_name, year_number, week_number)
35 |             # ToDo: With multiple paste sites a pasteid collision is more likly!
36 |             try:
37 |                 pasteid = str(paste_data['pasteid'])
38 |                 self.es.index(index=index_name, doc_type='paste', id=pasteid, body=paste_data)
39 |                 logger.debug("Stored {0} Paste {1}, Matched Rule {2}".format(paste_data['pastesite'],
40 |                                                                              paste_data['pasteid'],
41 |                                                                              paste_data['YaraRule']
42 |                                                                              )
43 |                              )
44 |             except Exception as e:
45 |                 logger.error(e)
46 |         else:
47 |             logger.error("Elastic Search Enabled, not configured!")
48 | 


--------------------------------------------------------------------------------
/test/test_paste_objects.py:
--------------------------------------------------------------------------------
 1 | from inputs.pastebin import PastebinPasteSite
 2 | from inputs.slexy import SlexyPasteSite
 3 | 
 4 | pids_found = []
 5 | 
 6 | 
 7 | def mock_get_paste_for_pid(pid):
 8 |     pids_found.append(pid)
 9 |     return "pid_is_" + pid
10 | 
11 | 
12 | class FakeRequestJson(object):
13 |     def __init__(self, ret):
14 |         self.ret = ret
15 | 
16 |     def json(self):
17 |         return self.ret
18 | 
19 | 
20 | def test_slexy_site():
21 |     pid_list_fake = [0, 1, 2, 3, 4]
22 |     slexy_site = SlexyPasteSite({})
23 |     slexy_site.get_recent_items = lambda: pid_list_fake
24 |     slexy_site.get_paste_for_id = lambda pid: mock_get_paste_for_pid(str(pid))
25 |     slexy_site.remap_raw_item = lambda raw_data, pid: {"pid": 123}
26 |     recent_pids = slexy_site.get_recent_items()
27 |     assert recent_pids == pid_list_fake
28 |     for pid in recent_pids:
29 |         paste = slexy_site.get_paste_for_id(pid)
30 |         paste_data = slexy_site.remap_raw_item(paste, pid)
31 |         assert paste == 'pid_is_' + str(pid)
32 |         assert paste_data == {"pid": 123}
33 | 
34 | 
35 | def test_pastebin_site_remap():
36 |     fake_conf = {
37 |         'inputs': {
38 |             'pastebin': {
39 |                 'paste_limit': 100,
40 |                 'api_scrape': 'https://scrape.pastebin.com/api_scraping.php'
41 |             }
42 |         }
43 |     }
44 |     data = {
45 |         'key': 'a',
46 |         'test': 'b',
47 |         'date': '1582595793'
48 |     }
49 |     pastebin_site = PastebinPasteSite(fake_conf)
50 |     out = pastebin_site.remap_raw_item(data)
51 |     assert out == {'key': 'a', 'test': 'b', 'date': '1582595793', 'filename': 'a', 'confname': 'pastebin',
52 |                    'pasteid': 'a', 'pastesite': 'pastebin.com', '@timestamp': '2020-02-25T01:56:33'}
53 | 
54 | 
55 | def test_pastebin_site():
56 |     fake_conf = {
57 |         'inputs': {
58 |             'pastebin': {
59 |                 'paste_limit': 100,
60 |                 'api_scrape': 'https://scrape.pastebin.com/api_scraping.php'
61 |             }
62 |         }
63 |     }
64 |     pastebin_site = PastebinPasteSite(fake_conf)
65 |     pastebin_site.make_request = lambda url: FakeRequestJson([
66 |         {
67 |             'key': 'ab',
68 |             'date': '1582595793'
69 |         },
70 |         {
71 |             'key': 'bc',
72 |             'date': '1582595793'
73 |         }
74 |     ])
75 |     pastes, paste_ids = pastebin_site.get_recent_items([])
76 |     assert paste_ids == ['ab', 'bc']
77 |     assert pastes[0].get('key') == 'ab'
78 |     assert pastes[1].get('key') == 'bc'
79 | 


--------------------------------------------------------------------------------
/pastehunter/postprocess/post_b64.py:
--------------------------------------------------------------------------------
 1 | import hashlib
 2 | import importlib
 3 | import gzip
 4 | import logging
 5 | from base64 import b64decode
 6 | # This gets the raw paste and the paste_data json object
 7 | from pastehunter.common import parse_config
 8 | conf = parse_config()
 9 | 
10 | logger = logging.getLogger('pastehunter')
11 | 
12 | def run(results, raw_paste_data, paste_object):
13 | 
14 |     '''
15 | 
16 |     ToDo: Lets look at multiple base64 streams
17 |     for now only accept if the entire paste is
18 | 
19 |     # Figure out which b64 rule fire
20 | 
21 |     # The base64 re can hang on occasion with this one
22 |     # b64_re = '(?:[A-Za-z0-9+/]{4}){2,}(?:[A-Za-z0-9+/]{2}[AEIMQUYcgkosw048]=|[A-Za-z0-9+/][AQgw]==)'
23 | 
24 |     # This one has a few empty results i need to catch but doesn't kill pastehunter
25 |     b64_re = '(?:[A-Za-z0-9+/]{4}){3,}(?:[A-Za-z0-9+/]{2}==|[A-Za-z0-9+/]{3}=)?'
26 |     b64_strings = re.findall(b64_re, raw_paste_data)
27 | 
28 | 
29 |     # Set a counter for multiple streams.
30 |     counter = 0
31 |     for b64_str in b64_strings:
32 | 
33 |     '''
34 | 
35 |     for rule in results:
36 |         if len(raw_paste_data) > 0:
37 |             if rule == 'b64_gzip':
38 |                 # Lets try to decode and get a file listing.
39 |                 # Also get the MD5 of the decoded file
40 |                 try:
41 |                     uncompressed = gzip.decompress(b64decode(raw_paste_data))
42 |                     encoded = uncompressed.encode('utf-8')
43 |                     paste_object["decompressed_stream"] = encoded
44 |                 except Exception as e:
45 |                     logger.error("Unable to decompress gzip stream")
46 | 
47 |             if rule == 'b64_exe':
48 |                 try:
49 |                     raw_exe = b64decode(raw_paste_data)
50 |                     paste_object["exe_size"] = len(raw_exe)
51 |                     paste_object["exe_md5"] = hashlib.md5(raw_exe).hexdigest()
52 |                     paste_object["exe_sha256"] = hashlib.sha256(raw_exe).hexdigest()
53 | 
54 |                     # We are guessing that the sample has been submitted, and crafting a URL
55 |                     paste_object["VT"] = 'https://www.virustotal.com/#/file/{0}'.format(paste_object["exe_md5"])
56 | 
57 |                     # If sandbox modules are enabled then submit the file
58 |                     for sandbox, sandbox_values in conf["sandboxes"].items():
59 |                         if sandbox_values["enabled"]:
60 |                             logger.info("Uploading file {0} using {1}".format(paste_object["pasteid"], sandbox_values["module"]))
61 |                             sandbox_module = importlib.import_module(sandbox_values["module"])
62 |                             paste_object = sandbox_module.upload_file(raw_exe, paste_object)
63 | 
64 |                 except Exception as e:
65 |                     logger.error("Unable to decode exe file")
66 | 
67 |     # Get unique domain count
68 |     # Update the json
69 | 
70 |     # Send the updated json back
71 |     return paste_object
72 | 


--------------------------------------------------------------------------------
/pastehunter/YaraRules/api_keys.yar:
--------------------------------------------------------------------------------
  1 | /*
  2 |     This rule will look for standard API Keys.
  3 | */
  4 | 
  5 | rule generic_api
  6 | {
  7 |     meta:
  8 |         author = "@KevTheHermit"
  9 |         info = "Part of PasteHunter"
 10 |         reference = "https://github.com/kevthehermit/PasteHunter"
 11 | 
 12 |     strings:
 13 |         $a1 = "apikey" nocase
 14 |         $a2 = "api_key" nocase
 15 |         $hash32 = /\b[a-fA-F\d]{32}\b/
 16 |         $hash64 = /\b[a-fA-F\d]{64}\b/
 17 |         $n1 = "#EXTINF"
 18 |         $n2 = "m3u8"
 19 |         $n3 = "Chocolatey is running"
 20 | 
 21 |     condition:
 22 |         (any of ($a*)) and (any of ($hash*)) and (not any of ($n*))
 23 | 
 24 | }
 25 | 
 26 | rule twitter_api
 27 | {
 28 |     meta:
 29 |         author = "@KevTheHermit"
 30 |         info = "Part of PasteHunter"
 31 |         reference = "https://github.com/kevthehermit/PasteHunter"
 32 | 
 33 |     strings:
 34 |         $a = "consumer_key" nocase
 35 |         $b = "consumer_secret" nocase
 36 |         $c = "access_token" nocase
 37 |     condition:
 38 |         all of them
 39 | 
 40 | }
 41 | 
 42 | rule google_api
 43 | {
 44 |     meta:
 45 |         author = "@KevTheHermit"
 46 |         info = "Part of PasteHunter"
 47 |         reference = "https://github.com/kevthehermit/PasteHunter"
 48 | 
 49 |     strings:
 50 |         $a = /\bAIza.{35}\b/
 51 |     condition:
 52 |         any of them
 53 | }
 54 | 
 55 | rule slack_api
 56 | {
 57 |     meta:
 58 |         author = "@ntddk"
 59 |         info = "Part of PasteHunter"
 60 |         reference = "https://github.com/kevthehermit/PasteHunter"
 61 | 
 62 |     strings:
 63 |         $a = /(xox(p|b|o|a)-[0-9]{9,12}-[0-9]{9,12}-[0-9]{9,12}-[a-z0-9]{32})/
 64 |         $b = "hooks.slack.com" nocase
 65 |     condition:
 66 |         any of them
 67 | }
 68 | 
 69 | rule github_api
 70 | {
 71 |     meta:
 72 |         author = "@ntddk"
 73 |         info = "Part of PasteHunter"
 74 |         reference = "https://github.com/kevthehermit/PasteHunter"
 75 | 
 76 |     strings:
 77 |         $a = /[g|G][i|I][t|T][h|H][u|U][b|B].*[[\'|"]0-9a-zA-Z]{35,40}[\'|"]/
 78 |     condition:
 79 |         any of them
 80 | }
 81 | 
 82 | rule aws_api
 83 | {
 84 |     meta:
 85 |         author = "@ntddk"
 86 |         info = "Part of PasteHunter"
 87 |         reference = "https://github.com/kevthehermit/PasteHunter"
 88 | 
 89 |     strings:
 90 |         $a = /AKIA[0-9A-Z]{16}/
 91 |     condition:
 92 |         any of them
 93 | }
 94 | 
 95 | rule heroku_api
 96 | {
 97 |     meta:
 98 |         author = "@ntddk"
 99 |         info = "Part of PasteHunter"
100 |         reference = "https://github.com/kevthehermit/PasteHunter"
101 | 
102 |     strings:
103 |         $a = /[h|H][e|E][r|R][o|O][k|K][u|U].*[0-9A-F]{8}-[0-9A-F]{4}-[0-9A-F]{4}-[0-9A-F]{4}-[0-9A-F]{12}/
104 |     condition:
105 |         any of them
106 | }
107 | 
108 | rule discord_api
109 | {
110 |     meta:
111 |         author = "@ntddk"
112 |         info = "Part of PasteHunter"
113 |         reference = "https://github.com/kevthehermit/PasteHunter"
114 | 
115 |     strings:
116 |         $a = "DiscordRelay.BotToken" nocase
117 |         $b = "discordapp.com/api/webhooks" nocase
118 |     condition:
119 |         any of them
120 | }
121 | 


--------------------------------------------------------------------------------
/pastehunter/inputs/pastebin.py:
--------------------------------------------------------------------------------
 1 | from typing import Any, Dict, Union, Optional
 2 | 
 3 | import requests
 4 | import logging
 5 | from datetime import datetime
 6 | 
 7 | from pastehunter.inputs.base_input import BasePasteSite
 8 | 
 9 | logger = logging.getLogger('pastehunter')
10 | 
11 | 
12 | class PastebinPasteSite(BasePasteSite):
13 | 
14 |     def __init__(self, conf):
15 |         self.conf = conf
16 | 
17 |     def remap_raw_item(self, raw_item: Dict) -> Dict[str, Any]:
18 |         # Create a new paste dict for us to normalize
19 |         pid = self.get_paste_id(raw_item)
20 |         paste_data = raw_item
21 |         paste_data['filename'] = pid
22 |         paste_data['confname'] = 'pastebin'
23 |         paste_data['pasteid'] = pid
24 |         paste_data['pastesite'] = 'pastebin.com'
25 |         # Add a date field that kibana will map
26 |         date = datetime.utcfromtimestamp(float(paste_data['date'])).isoformat()
27 |         paste_data['@timestamp'] = date
28 |         return paste_data
29 | 
30 |     def make_request(self, url: str, timeout: Optional[int] = 10, headers: Optional[Dict[str, Any]] = None):
31 |         paste_list_request = super(PastebinPasteSite, self).make_request(url, timeout, headers)
32 | 
33 |         # Check to see if our IP is whitelisted or not.
34 |         if 'DOES NOT HAVE ACCESS' in paste_list_request.text:
35 |             logger.error("Your IP is not whitelisted visits 'https://pastebin.com/doc_scraping_api'")
36 |             return None
37 |         return paste_list_request
38 | 
39 |     def get_paste_for_id(self, paste_id: Any) -> str:
40 |         pass
41 | 
42 |     def get_paste_id(self, paste_obj: Dict[str, Any]) -> Union[str, int]:
43 |         return paste_obj['key']
44 | 
45 |     def get_recent_items(self, input_history):
46 |         paste_limit = self.conf['inputs']['pastebin']['paste_limit']
47 |         api_scrape = self.conf['inputs']['pastebin']['api_scrape']
48 | 
49 |         history = []
50 |         paste_list = []
51 |         try:
52 |             # Create the API uri
53 |             scrape_uri = '{0}?limit={1}'.format(api_scrape, paste_limit)
54 |             # Get some pastes and convert to json
55 |             # Get last 'paste_limit' pastes
56 | 
57 |             paste_list_request = self.make_request(scrape_uri)
58 | 
59 |             # IP not whitelisted
60 |             if not paste_list_request:
61 |                 return [], []
62 | 
63 |             paste_list_json = paste_list_request.json()
64 | 
65 |             for paste in paste_list_json:
66 |                 pid = self.get_paste_id(paste)
67 |                 # Track paste ids to prevent dupes
68 |                 history.append(pid)
69 |                 if pid in input_history:
70 |                     continue
71 | 
72 |                 paste_data = self.remap_raw_item(paste)
73 |                 paste_list.append(paste_data)
74 | 
75 |             return paste_list, history
76 | 
77 |         except Exception as e:
78 |             logger.error("Unable to parse paste results: {0}".format(e))
79 |             return paste_list, history
80 | 
81 | 
82 | def recent_pastes(conf, input_history):
83 |     site = PastebinPasteSite(conf)
84 |     # populate vars from config
85 |     return site.get_recent_items(input_history)
86 | 
87 | 
88 | 
89 | 


--------------------------------------------------------------------------------
/pastehunter/outputs/twilio_output.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | from twilio.rest import Client
 3 | from pastehunter.common import parse_config
 4 | 
 5 | logger = logging.getLogger('pastehunter')
 6 | config = parse_config()
 7 | 
 8 | class TwilioOutput(object):
 9 |     def __init__(self):
10 |         self.account_sid = config['outputs']['twilio_output']['account_sid']
11 |         self.auth_token = config['outputs']['twilio_output']['auth_token']
12 |         self.twilio_sender = config['outputs']['twilio_output']['twilio_sender']
13 |         self.recipient_list = config['outputs']['twilio_output']['recipient_list']
14 |         self.accepted_rules = config['outputs']['twilio_output']['rule_list']
15 |         self.message_type = 'sms' # Whatsapp is still in beta on twilio.
16 |         try:
17 |             self.client = Client(self.account_sid, self.auth_token)
18 |             self.test = True
19 |         except Exception as e:
20 |             logging.error("Unable to create twilio Client: {0}".format(e))
21 |             self.test = False
22 | 
23 | 
24 |     def store_paste(self, paste_data):
25 |         if self.test:
26 | 
27 | 
28 |             send = ('all' in self.accepted_rules)
29 | 
30 |             for rule in self.accepted_rules:
31 |                 if rule in paste_data['YaraRule']:
32 |                     send = True
33 | 
34 |             if send:
35 |                 message_body = "Yara Rule {0} Found on {1}\n\r{2}".format(
36 |                     paste_data['YaraRule'], 
37 |                     paste_data['pastesite'], 
38 |                     paste_data['scrape_url']
39 |                     )
40 | 
41 |                 logger.debug("Sending Twilio Message")
42 |                 if self.message_type == 'sms':
43 |                     for recipient in self.recipient_list:
44 |                         try:
45 |                             message = self.client.messages.create( 
46 |                                                         from_=self.twilio_sender,  
47 |                                                         body=message_body,      
48 |                                                         to=recipient 
49 |                                                     )
50 |                             logging.debug("Sent twilio message with ID: {0}".format(message.sid))
51 |                         except Exception as e:
52 |                             logging.error(e)
53 | 
54 |                 elif self.message_type == 'whatsapp':
55 |                     for recipient in self.recipient_list:
56 |                         try:
57 |                             message = self.client.messages.create( 
58 |                                                         from_='whatsapp:{0}'.format(self.twilio_sender),  
59 |                                                         body=message_body,      
60 |                                                         to='whatsapp:{0}'.format(recipient) 
61 |                                                     )
62 |                             logging.debug("Sent twilio message with ID: {0}".format(message.sid))
63 |                         except Exception as e:
64 |                             logging.error(e)
65 |                 else:
66 |                     logging.error("No Valid twilio message type found")
67 | 


--------------------------------------------------------------------------------
/pastehunter/inputs/gists.py:
--------------------------------------------------------------------------------
 1 | import requests
 2 | import math
 3 | import logging
 4 | from datetime import datetime
 5 | 
 6 | # Set some logging options
 7 | logger = logging.getLogger('pastehunter')
 8 | logging.getLogger('requests').setLevel(logging.ERROR)
 9 | 
10 | api_uri = 'https://api.github.com/gists/public'
11 | api_version = 'application/vnd.github.v3+json'  # Set Accept header to force api v3
12 | 
13 | # Some people use gists to store large blobs of data every 17 minutes. This just slows down the kibana UI
14 | 
15 | 
16 | 
17 | def recent_pastes(conf, input_history):
18 |     oauth_token = conf['inputs']['gists']['api_token']
19 |     gist_limit = conf['inputs']['gists']['api_limit']
20 |     headers = {'user-agent': 'PasteHunter',
21 |                'Accept': api_version,
22 |                'Authorization': 'token {0}'.format(oauth_token)}
23 | 
24 |     # calculate number of pages
25 |     page_count = int(math.ceil(gist_limit / 100))
26 | 
27 |     result_pages = []
28 |     history = []
29 |     paste_list = []
30 | 
31 |     gist_file_blacklist = conf['inputs']['gists']['file_blacklist']
32 |     gist_user_blacklist = conf['inputs']['gists']['user_blacklist']
33 | 
34 |     try:
35 |         # Get the required amount of entries via pagination
36 |         for page_num in range(1, page_count + 1):
37 |             url = '{0}?page={1}&per_page=100'.format(api_uri, page_num)
38 |             logger.debug("Fetching page: {0}".format(page_num))
39 |             req = requests.get(url, headers=headers)
40 |             # Check some headers
41 |             reset_date = datetime.utcfromtimestamp(float(req.headers['X-RateLimit-Reset'])).isoformat()
42 |             # logging.info("Limit Reset: {0}".format(reset_date))
43 |             logger.info("Remaining Limit: {0}. Resets at {1}".format(req.headers['X-RateLimit-Remaining'],
44 |                                                                       reset_date))
45 | 
46 |             if req.status_code == 200:
47 |                 result_pages.append(req.json())
48 | 
49 |             if req.status_code == 401:
50 |                 logger.error("Auth Failed")
51 | 
52 |             elif req.status_code == 403:
53 |                 logger.error("Login Attempts Exceeded")
54 | 
55 |         # Parse results
56 | 
57 |         for page in result_pages:
58 |             for gist_meta in page:
59 |                 # Track paste ids to prevent dupes
60 |                 history.append(gist_meta['id'])
61 |                 if gist_meta['id'] in input_history:
62 |                     continue
63 | 
64 |                 if gist_meta['user'] in gist_user_blacklist:
65 |                     logger.info("Blacklisting Gist from user: {0}".format(gist_meta['owner']['login']))
66 |                     continue
67 | 
68 |                 for file_name, file_meta in gist_meta['files'].items():
69 | 
70 |                     if file_name in gist_file_blacklist:
71 |                         logger.info("Blacklisting Paste {0}".format(file_name))
72 |                         continue
73 | 
74 |                     gist_data = file_meta
75 |                     gist_data['confname'] = 'gists'
76 |                     gist_data['@timestamp'] = gist_meta['created_at']
77 |                     gist_data['pasteid'] = gist_meta['id']
78 |                     gist_data['user'] = gist_meta['user']
79 |                     gist_data['pastesite'] = 'gist.github.com'
80 |                     gist_data['scrape_url'] = file_meta['raw_url']
81 |                     # remove some origional keys just to keep it a bit cleaner
82 |                     del gist_data['raw_url']
83 |                     paste_list.append(gist_data)
84 | 
85 |         # Return results and history
86 |         return paste_list, history
87 |     except Exception as e:
88 |         logger.error("Unable to parse paste results: {0}".format(e))
89 |         return paste_list, history
90 | 


--------------------------------------------------------------------------------
/pastehunter/inputs/stackexchange.py:
--------------------------------------------------------------------------------
 1 | import requests
 2 | import math
 3 | import logging
 4 | from datetime import datetime
 5 | 
 6 | # Set some logging options
 7 | logger = logging.getLogger('pastehunter')
 8 | logging.getLogger('requests').setLevel(logging.ERROR)
 9 | 
10 | # Test API Key from the docs - U4DMV*8nvpm3EOpvf69Rxw((
11 | # https://api.stackexchange.com/2.2/questions?key=U4DMV*8nvpm3EOpvf69Rxw((&site=stackoverflow&page=1&pagesize=100&order=desc&sort=creation&filter=default
12 | 
13 | 
14 | 
15 | def recent_pastes(conf, input_history):
16 |     api_key = conf['inputs']['stackexchange']['api_key']
17 |     api_scrape = conf['inputs']['stackexchange']['api_scrape']
18 |     site_list = conf['inputs']['stackexchange']['site_list']
19 |     store_filter = conf['inputs']['stackexchange']['store_filter']
20 |     question_body_filter = '!bA1dOlliDM)pi9'
21 |     pagesize = 100 # Default = 30
22 |     headers = {'user-agent': 'PasteHunter'}
23 | 
24 |     if api_key == '':
25 |         logger.error("No API Key configured for StackExchange Access")
26 |         return [], []
27 | 
28 |     result_pages = []
29 |     history = []
30 |     paste_list = []
31 | 
32 |     try:
33 |         
34 |         # For each of the stack sites we want to query
35 |         for site in site_list:
36 |             logger.info("Query Stack Exchange site: {0}".format(site))
37 | 
38 |             # Create the API uri
39 |             scrape_uri = '{0}?key={1}&site={2}&page=1&pagesize=100&order=desc&sort=creation&filter={3}'.format(api_scrape, api_key, site, store_filter)
40 |             # Get some pastes and convert to json
41 |             # Get last 'paste_limit' pastes
42 |             paste_list_request = requests.get(scrape_uri)
43 |     
44 |             # ToDo: Add an API rate test in here. 
45 |             paste_list_json = paste_list_request.json()
46 |             
47 |             if "error_id" in paste_list_json:
48 |                 logging.error("StackExchange API Error: {0}".format(paste_list_json['error_message']))
49 |                 return [], []
50 |             
51 |             
52 |     
53 |             for question in paste_list_json['items']:
54 |                 # Track question ids to prevent dupes
55 |                 history.append(question['question_id'])
56 |                 if question['question_id'] in input_history:
57 |                     continue
58 |     
59 |                 # Create a new question dict for us to normalize
60 |                 question_data = question
61 |                 question_data['filename'] = ''
62 |                 question_data['confname'] = "stackexchange"
63 |                 # Force type to string else it breaks ES Index mappings
64 |                 question_data['pasteid'] = str(question['question_id']) 
65 |                 question_data['pastesite'] = site
66 |                 # Set the raw uri to avoid breaking other things. Defaults to empty if not found
67 |                 question_data['scrape_url'] = question.get('link', '')
68 |                 # Get the author and then trim the data we store. 
69 |                 question_data['username'] = question['owner']['display_name']
70 |                 del question_data['owner']
71 |                 # Add a date field that kibana will map
72 |                 date = datetime.utcfromtimestamp(float(question_data['creation_date'])).isoformat()
73 |                 question_data['@timestamp'] = date
74 |                 paste_list.append(question_data)
75 |             
76 |             
77 |             # Record API Quota on last call to save some logging. 
78 |             quota_max = paste_list_json['quota_max']
79 |             quota_remaining = paste_list_json['quota_remaining']
80 |         
81 |         logger.info("Used {0} of {1} of StackExchange api quota".format(quota_remaining, quota_max))
82 |         # Return the pastes and update history
83 |         return paste_list, history
84 | 
85 |     except Exception as e:
86 |         logger.error("Unable to parse question results: {0}".format(e))
87 |         return paste_list, history


--------------------------------------------------------------------------------
/docs/outputs.rst:
--------------------------------------------------------------------------------
 1 | Outputs
 2 | =======
 3 | 
 4 | This page details all the confiuration options for the output modules/
 5 | There are a few generic options for each input.
 6 | 
 7 | - **enabled**: This turns the input on and off. 
 8 | - **module**: This is used internally by pastehunter.
 9 | - **classname**: This is used internally by pastehunter.
10 | 
11 | Elasticsearch
12 | -------------
13 | Elasticsearch was the default output. Storing all pastes and using Kibana as a graphical frontend to view the results
14 | 
15 | - **elastic_index**: The name of the index.
16 | - **weekly_index**: Use a numbered index for each week of the year instead of a single index.
17 | - **elastic_host**: Hostname or IP of the elasticsearch.
18 | - **elastic_port**: Port number for elasticsearch default is 9200
19 | - **elastic_user**: Username if using xpack / shield or basic auth.
20 | - **elastic_pass**: Password if using xpack / shield or basic auth.
21 | - **elastic_ssl**: True or false if Elasticsearch is served over SSL.
22 | 
23 | Splunk
24 | -------------
25 | Splunk output is similar to Elasticsearch. All the data is put into Splunk and then Splunk can be used for graphical frontend and querying.
26 | 
27 | - **splunk_host**: Hostname of IP of your Splunk instance.
28 | - **splunk_port**: The Splunk management port. (Usually port 8089)
29 | - **splunk_user**: Username of your Splunk user.
30 | - **splunk_pass**: Password for your Splunk user.
31 | - **splunk_index**: The name of the Splunk index to store the data in.
32 | - **store_raw**: Include the raw paste in the data sent to Splunk.
33 | 
34 | JSON
35 | ----
36 | 
37 | This output module will store each paste in a json file on disk. The name of the file is the pasteid. 
38 | 
39 | - **output_path**: Path on disk to store output files. 
40 | - **store_raw**: Include the raw paste in the json file. False jsut stores metadata.
41 | - **encode_raw**: Ignored, Reserved for future usage.
42 | 
43 | CSV
44 | ---
45 | 
46 | The CSV output will append lines to a CSV that contains basic metadata from all paste sources. The raw paste is not included.
47 | 
48 | - **output_path**: Path on disk to store output files. 
49 | 
50 | Stored elements are
51 | 
52 | - Timestamp
53 | - Pasteid
54 | - Yara Rules
55 | - Scrape URL
56 | - Pastesite
57 | 
58 | Syslog
59 | ------
60 | Using the same format as the CSV output this writes paste metadata to a syslog server. The raw paste is not included. 
61 | 
62 | - **host**: IP or hostname of the syslog server.
63 | - **port**: Port number of the syslog server.
64 | 
65 | SMTP
66 | ----
67 | 
68 | This output will send an email to specific email addresses depending on the YaraRules that are matched. You need to set up an SMTP server. 
69 | 
70 | - **smtp_host**: hostname for the SMTP server.
71 | - **smtp_port**: Port number for the SMTP Server.
72 | - **smtp_security**: One of ``tls``, ``starttls``, ``none``.
73 | - **smtp_user**: Username for SMTP Authentication.
74 | - **smtp_pass**: Password for SMTP Authentication.
75 | - **recipients**: Json array of recipients and rules.
76 |   - **address**: Email address to send alerts to.
77 |   - **rule_list**: A list of rules to alert on. Any of the rules in this list will trigger an email.
78 |   - **mandatory_rule_list**: List of rules that *MUST* be present to trigger an email alert. 
79 | 
80 | 
81 | Slack
82 | -----
83 | 
84 | This output will send a Notification to a slack web hook. You need to configure the URL and the channel in Slack.
85 | Head over to https://api.slack.com/apps?new_app=1
86 | 
87 | Create a new Slack App with a Name and the workspace that you want to send alerts to. 
88 | Once created under Add Features and Functionality select Incoming Webhooks and toggle the Active button to on.
89 | At the bottom of the page select *Add New Webhook to Workspace* This will show another page where you select the Channel that will receive the notifications. 
90 | Once it has authorized the app you will see a new Webhook URL. This is the URL that needs to be added to the pastehunter config. 
91 | 
92 | - **webhook_url**: Generated when creating a Slack App as described above. 
93 | - **rule_list**: List of rules that will generate an alert. 
94 | 


--------------------------------------------------------------------------------
/pastehunter/YaraRules/CryptoExchangeApi.yar:
--------------------------------------------------------------------------------
 1 | rule CryptoExchangeApi
 2 | {
 3 |     meta:
 4 |         description = "Contains Crypro Exchange API URL"
 5 |         author = "Jason Schorr (0xBanana)"
 6 |         source = "https://github.com/cryptodefense/PasteHunter-Yara/blob/master/CryptoExchangeApi.yar"
 7 |     strings:
 8 |     	$a = "api.binance.com" nocase wide ascii
 9 | 		$a0 = "1btcxe.com/api" nocase wide ascii
10 | 		$a1 = "acx.io/api" nocase wide ascii
11 | 		$a2 = "anxpro.com/api" nocase wide ascii
12 | 		$a3 = "anybits.com/api" nocase wide ascii
13 | 		$a4 = "www.bcex.top" nocase wide ascii
14 | 		$a5 = "api.bibox.com" nocase wide ascii
15 | 		$a6 = "bit2c.co.il" nocase wide ascii
16 | 		$a7 = "api.bitfinex.com" nocase wide ascii
17 | 		$a8 = "api.bitfinex.com" nocase wide ascii
18 | 		$a9 = "api.bitflyer.jp" nocase wide ascii
19 | 		$aa = "api.bitforex.com" nocase wide ascii
20 | 		$ab = "bitibu.com" nocase wide ascii
21 | 		$ac = "bitlish.com/api" nocase wide ascii
22 | 		$ad = "www.bitmex.com" nocase wide ascii
23 | 		$ae = "bitsane.com/api" nocase wide ascii
24 | 		$af = "api.bitso.com" nocase wide ascii
25 | 		$ag = "www.bitstamp.net/api" nocase wide ascii
26 | 		$ah = "www.bitstamp.net/api" nocase wide ascii
27 | 		$ai = "api.bl3p.eu" nocase wide ascii
28 | 		$aj = "braziliex.com/api/v1" nocase wide ascii
29 | 		$ak = "btc-alpha.com/api" nocase wide ascii
30 | 		$al = "www.btcbox.co.jp/api" nocase wide ascii
31 | 		$am = "www.btcexchange.ph/api" nocase wide ascii
32 | 		$an = "btc-trade.com.ua/api" nocase wide ascii
33 | 		$ao = "www.btcturk.com/api" nocase wide ascii
34 | 		$ap = "www.buda.com/api" nocase wide ascii
35 | 		$aq = "bx.in.th/api" nocase wide ascii
36 | 		$ar = "cex.io/api" nocase wide ascii
37 | 		$as = "api.cobinhood.com" nocase wide ascii
38 | 		$at = "api.coinbase.com" nocase wide ascii
39 | 		$au = "api.prime.coinbase.com" nocase wide ascii
40 | 		$av = "api.pro.coinbase.com" nocase wide ascii
41 | 		$aw = "coincheck.com/api" nocase wide ascii
42 | 		$ax = "www.coinexchange.io/api/v1" nocase wide ascii
43 | 		$ay = "coinfalcon.com" nocase wide ascii
44 | 		$az = "webapi.coinfloor.co.uk:8090/bist" nocase wide ascii
45 | 		$aa1 = "coinmate.io/api" nocase wide ascii
46 | 		$aa2 = "api.coinone.co.kr" nocase wide ascii
47 | 		$aa3 = "api.crex24.com" nocase wide ascii
48 | 		$aa4 = "api.cryptonbtc.com" nocase wide ascii
49 | 		$aa5 = "www.deribit.com" nocase wide ascii
50 | 		$aa6 = "api.ethfinex.com" nocase wide ascii
51 | 		$aa7 = "api.fcoin.com" nocase wide ascii
52 | 		$aa8 = "api.flowbtc.com:8405/ajax" nocase wide ascii
53 | 		$aa9 = "www.fybse.se/api/SEK" nocase wide ascii
54 | 		$aa0 = "www.fybsg.com/api/SGD" nocase wide ascii
55 | 		$aab = "api.gatecoin.com" nocase wide ascii
56 | 		$aac = "api.gdax.com" nocase wide ascii
57 | 		$aad = "api.gemini.com" nocase wide ascii
58 | 		$aae = "getbtc.org/api" nocase wide ascii
59 | 		$aaf = "api.hitbtc.com" nocase wide ascii
60 | 		$aag = "api.hitbtc.com" nocase wide ascii
61 | 		$aah = "api.huobi.com" nocase wide ascii
62 | 		$aai = "ice3x.com/api" nocase wide ascii
63 | 		$aaj = "api.itbit.com" nocase wide ascii
64 | 		$aak = "www.jubi.com/api" nocase wide ascii
65 | 		$aal = "kuna.io" nocase wide ascii
66 | 		$aam = "api.lakebtc.com" nocase wide ascii
67 | 		$aan = "api.lbank.info" nocase wide ascii
68 | 		$aao = "api.liquid.com" nocase wide ascii
69 | 		$aap = "api.livecoin.net" nocase wide ascii
70 | 		$aaq = "api.mybitx.com/api" nocase wide ascii
71 | 		$aar = "mixcoins.com/api" nocase wide ascii
72 | 		$aas = "novaexchange.com/remote" nocase wide ascii
73 | 		$aat = "paymium.com/api" nocase wide ascii
74 | 		$aau = "api.quadrigacx.com" nocase wide ascii
75 | 		$aav = "www.rightbtc.com/api" nocase wide ascii
76 | 		$aaw = "www.southxchange.com/api" nocase wide ascii
77 | 		$aax = "api.theocean.trade/api" nocase wide ascii
78 | 		$aay = "api.therocktrading.com" nocase wide ascii
79 | 		$aaz = "www.tidebit.com" nocase wide ascii
80 | 		$ba = "open-api.uex.com/open/api" nocase wide ascii
81 | 		$bb = "api.vaultoro.com" nocase wide ascii
82 | 		$bc = "cryptottlivewebapi.xbtce.net:8443/api" nocase wide ascii
83 | 		$bd = "yunbi.com" nocase wide ascii
84 | 		$be = "api.zaif.jp" nocase wide ascii
85 | 
86 |     condition:
87 |        any of them
88 | }


--------------------------------------------------------------------------------
/docs/inputs.rst:
--------------------------------------------------------------------------------
 1 | Inputs
 2 | ======
 3 | 
 4 | This page details all the configuration options per input. 
 5 | 
 6 | There are a few generic options for each input. 
 7 | - **enabled**: This turns the input on and off. 
 8 | - **store_all**: ignore the only store on matching rule.
 9 | - **module**: This is used internally by pastehunter.
10 | 
11 | Pastebin
12 | ------------
13 | To use the pastebin API you need an API key. These need to be purchased and are almost always on some sort of offer!
14 | https://pastebin.com/pro The API uses your IP to authenticate instead of a key. You will need to whitelist your IP at https://pastebin.com/api_scraping_faq
15 | 
16 | - **api_scrape**: The URL endpoint for the list of recent paste ids.
17 | - **api_raw**: The URL endpoint for the raw paste.
18 | - **paste_limit**: How many pasteids to fetch from the recent list. 
19 | - **store_all**: Store all pastes regardless of a rule match.
20 | 
21 | Github Gists
22 | ---------------
23 | Github has an API that can be used at no cost to query recent gists. There are two options here. 
24 | 
25 | - Without an access key - You will have a low rate limit.
26 | - With an access key - You will have a higher rate limit. 
27 | 
28 | The unauthenticated option is not suitable for pastehunter running full time. 
29 | To create your key visit https://github.com/settings/tokens
30 | 
31 | *YOU DO NOT NEED TO GIVE IT ANY ACCESS PERMISSIONS*
32 | 
33 | - **api_token**: The token you generated.
34 | - **api_limit**: Rate limit to prevent being blocked.
35 | - **store_all**: Store all pastes regardless of a rule match.
36 | - **user_blacklist**: Do not process gists created by these usernames.
37 | - **file_blacklist**: Do not process gists that match these filenames.
38 | 
39 | Github Activity
40 | ---------------
41 | Github's activity feed is a list of public changes made. We specifically filter on commits. It can be accessed in a similar manner to gists:
42 | 
43 | - Without an access key - You will have a low rate limit.
44 | - With an access key - You will have a higher rate limit.
45 | 
46 | Again, the unauthenticated option is not suitable for pastehunter running full time, particularly if you're also running the gist
47 | input. However, the same token may be used for both inputs.
48 | 
49 | - **api_token**: The token you generated.
50 | - **api_limit**: Rate limit to prevent being blocked.
51 | - **store_all**: Store all pastes regardless of a rule match.
52 | - **user_blacklist**: Do not process gists created by these usernames.
53 | - **ignore_bots**: Ignore users with ``[bot]`` in their username (only actual bots can do this)
54 | - **file_blacklist**: Do not process gists that match these filenames. Supports glob syntax.
55 | 
56 | Slexy
57 | ---------
58 | 
59 | Slexy has some heavy rate limits (30 requests per 30 seconds), but may still return interesting results.
60 | 
61 | - **store_all**: Store all pastes regardless of a rule match.
62 | - **api_scrape**: The URL endpoint for the list of recent pastes.
63 | - **api_raw**: The URL endpoint for the raw paste.
64 | - **api_view**: The URL enpoint to view the paste.
65 | 
66 | ix.io
67 | ---------
68 | 
69 | ix.io is a smaller site used primarily for console/command line pastes.
70 | 
71 | - **store_all**: Store all pastes regardless of a rule match.
72 | 
73 | StackExchange
74 | -------------
75 | 
76 | The same API is used to query them all. Similar to github there is a public API which has a reduced rate limit 
77 | or an App API which has a higher cap. There is a cap on 10,000 requests per day per IP, so pulling all would be impractical. 
78 | Generate a key at https://stackapps.com/.
79 | 
80 | There are over 170 exchanges that form stackexchange. The following list is the most likly to expose privldidged information.
81 | 
82 | * stackoverflow
83 | * serverfault
84 | * superuser
85 | * webapps
86 | * webmasters
87 | * dba
88 | 
89 | - **site_list**: List of site shorttitles that will be scraped. 
90 | - **api_key**: API App key as generated above.
91 | - **store_filter**: This is the stackexchange filter that determines what fields are returned. It must contain the body element.
92 | - **pagesize**: How many questions to pull from the latest list. 
93 | - **store_all**: Store all pastes regardless of a rule match.


--------------------------------------------------------------------------------
/pastehunter/inputs/slexy.py:
--------------------------------------------------------------------------------
  1 | import logging
  2 | import re
  3 | from datetime import datetime
  4 | from time import sleep
  5 | from typing import Any, Dict, Optional, List, Union
  6 | 
  7 | from pastehunter.inputs.base_input import BasePasteSite
  8 | 
  9 | logger = logging.getLogger('pastehunter')
 10 | 
 11 | 
 12 | class SlexyPasteSite(BasePasteSite):
 13 | 
 14 |     def __init__(self, conf):
 15 |         self.url = None
 16 |         self.site = "slexy.org"
 17 |         url_slexy = "https://" + self.site
 18 |         self.url_recent = url_slexy + "/recent"
 19 |         self.url_view = url_slexy + "/view"
 20 |         self.url_raw = url_slexy + "/raw"
 21 |         self.fetch_timeout = conf.get('fetch_timeout', 15)
 22 | 
 23 |     def make_request(self, url: str, timeout: Optional[int] = 15, headers: Optional[Dict[str, Any]] = None):
 24 |         req = super(SlexyPasteSite, self).make_request(url, timeout, {
 25 |             'Referer': self.url_recent,
 26 |             'User-Agent': 'PasteHunter'
 27 |         })
 28 | 
 29 |         ratelimit_limit = int(req.headers.get('RateLimit-Limit', 30))
 30 |         remaining = int(req.headers.get('RateLimit-Remaining', 30))
 31 |         logger.debug('Remaining Slexy Ratelimit: {0}'.format(remaining))
 32 | 
 33 |         if req.status_code == 429:
 34 |             delay = req.headers.get('Retry-After', 60)
 35 |             sleep(delay)
 36 |             return self.make_request(url, timeout)
 37 |         # If ratelimit_limit = 60, 60/60 = 1
 38 |         # If ratelimit_limit = 30, 60/30 = 2
 39 |         sleep(30 / ratelimit_limit)
 40 |         return req.text
 41 | 
 42 |     def get_timestamp(self, data):
 43 |         pattern = 'Timestamp: <b>(.*?)</b>'
 44 |         ts = re.findall(pattern, data)[0]
 45 |         return datetime.strptime(ts, "%Y-%m-%d %H:%M:%S %z").isoformat()
 46 | 
 47 |     def get_paste_id(self, paste_obj: Dict[str, Any]) -> Union[str, int]:
 48 |         return paste_obj.get('pasteid')
 49 | 
 50 |     def remap_raw_item(self, raw_item: [str, Dict]) -> Dict[str, Any]:
 51 |         timestamp = self.get_timestamp(raw_item)
 52 |         paste_id = self.get_paste_id(raw_item)
 53 |         raw_url = self.get_raw_link(raw_item, paste_id)
 54 |         self.get_paste_id(raw_item)
 55 |         return {
 56 |             'confname': 'slexy',
 57 |             'scrape_url': raw_url,
 58 |             'pasteid': paste_id,
 59 |             'pastesite': self.site,
 60 |             '@timestamp': timestamp
 61 |         }
 62 | 
 63 |     def get_raw_data(self, raw_url):
 64 |         return self.make_request(raw_url, self.fetch_timeout)
 65 | 
 66 |     def get_paste_for_id(self, paste_id: Any) -> str:
 67 |         return self.make_request("%s/%s" % (self.url_view, paste_id), self.fetch_timeout)
 68 | 
 69 |     def get_raw_link(self, data, pid):
 70 |         pattern = '<a href="/raw/%s(.*?)"' % pid
 71 |         token = re.findall(pattern, data)[0]
 72 |         return "%s/%s%s" % (self.url_raw, pid, token)
 73 | 
 74 |     def get_recent_items(self, input_history: List[str]):
 75 |         data = self.make_request(self.url_recent, self.fetch_timeout)
 76 |         pids = re.findall('<td><a href="/view/(.*?)">', data)
 77 |         return list(set(pids))
 78 | 
 79 | 
 80 | def recent_pastes(conf, input_history):
 81 |     history = []
 82 |     paste_list = []
 83 |     my_scraper = SlexyPasteSite(conf['inputs']['slexy'])
 84 |     recent_pids = my_scraper.get_recent_items(input_history)
 85 |     pid_to_process = set()
 86 |     for pid in recent_pids:
 87 |         if pid in input_history:
 88 |             history.append(pid)
 89 |         else:
 90 |             pid_to_process.add(pid)
 91 |     try:
 92 |         for pid in pid_to_process:
 93 |             paste_data = my_scraper.get_paste_for_id(pid)
 94 |             raw = my_scraper.get_raw_link(paste_data, pid)
 95 |             paste_data = {
 96 |                 'confname': 'slexy',
 97 |                 'scrape_url': raw,
 98 |                 'pasteid': pid,
 99 |                 'pastesite': my_scraper.site,
100 |                 '@timestamp': my_scraper.get_timestamp(paste_data)
101 |             }
102 |             paste_list.append(paste_data)
103 |         return paste_list, history
104 |     except Exception as e:
105 |         logger.error("Unable to parse paste results: %s", e)
106 |         return paste_list, history
107 | 


--------------------------------------------------------------------------------
/pastehunter/YaraRules/base64.yar:
--------------------------------------------------------------------------------
  1 | /*
  2 |     This rule will look for base64 encoded data.
  3 | */
  4 | 
  5 | rule b64_exe
  6 | {
  7 |     meta:
  8 |         author = "@KevTheHermit"
  9 |         info = "Part of PasteHunter"
 10 |         reference = "https://github.com/kevthehermit/PasteHunter"
 11 | 
 12 |     strings:
 13 |         $b64_exe = /\bTV(oA|pB|pQ|qA|qQ|ro)/
 14 |         // Double b64 = VFZxUU
 15 |     condition:
 16 |         $b64_exe at 0
 17 | 
 18 | }
 19 | 
 20 | rule b64_elf
 21 | {
 22 |     meta:
 23 |         author = "@KevTheHermit"
 24 |         info = "Part of PasteHunter"
 25 |         reference = "https://github.com/kevthehermit/PasteHunter"
 26 | 
 27 |     strings:
 28 |         $b64_elf = "f0VM"
 29 |     condition:
 30 |         $b64_elf at 0
 31 | 
 32 | }
 33 | 
 34 | rule b64_zip
 35 | {
 36 |     meta:
 37 |         author = "@KevTheHermit"
 38 |         info = "Part of PasteHunter"
 39 |         reference = "https://github.com/kevthehermit/PasteHunter"
 40 | 
 41 |     strings:
 42 |         $b64_zip = "UEs"
 43 |     condition:
 44 |         $b64_zip at 0
 45 | 
 46 | }
 47 | 
 48 | rule b64_rar
 49 | {
 50 |     meta:
 51 |         author = "@KevTheHermit"
 52 |         info = "Part of PasteHunter"
 53 |         reference = "https://github.com/kevthehermit/PasteHunter"
 54 | 
 55 |     strings:
 56 |         $b64_rar = "UmFy"
 57 |     condition:
 58 |         $b64_rar at 0
 59 | 
 60 | }
 61 | 
 62 | 
 63 | rule b64_gzip
 64 | {
 65 |     meta:
 66 |         author = "@KevTheHermit"
 67 |         info = "Part of PasteHunter"
 68 |         reference = "https://github.com/kevthehermit/PasteHunter"
 69 | 
 70 |     strings:
 71 |         $b64_gzip = "H4sI"
 72 |     condition:
 73 |         $b64_gzip at 0
 74 | 
 75 | }
 76 | 
 77 | rule b64_url
 78 | {
 79 |     meta:
 80 |         author = "@KevTheHermit"
 81 |         info = "Part of PasteHunter"
 82 |         reference = "https://github.com/kevthehermit/PasteHunter"
 83 | 
 84 |     strings:
 85 |         $a1 = "aHR0cDov" // http/s
 86 |         $a2 = "SFRUUDov" // HTTP/S
 87 |         $a3 = "d3d3Lg" // www.
 88 |         $a4 = "V1dXLg" // WWW.
 89 | 
 90 |         // ignore vendor certs in this rule. The certs rule will pick them up if we want them
 91 |         $not1 = "GlobalSign Root CA" nocase
 92 | 
 93 |         // Ignore data: uris. These are common in html, css, and svg files.
 94 |         $not2 = /data:[a-z0-9\/]+;(base64,)?aHR0cDov/ nocase
 95 |         $not3 = /data:[a-z0-9\/]+;(base64,)?SFRUUDov/ nocase
 96 |         $not4 = /data:[a-z0-9\/]+;(base64,)?d3d3Lg/ nocase
 97 |         $not5 = /data:[a-z0-9\/]+;(base64,)?V1dXLg/ nocase
 98 | 
 99 |     condition:
100 |         any of ($a*) and not any of ($not*)
101 | 
102 | }
103 | 
104 | rule b64_doc
105 | {
106 |     meta:
107 |         author = "@KevTheHermit"
108 |         info = "Part of PasteHunter"
109 |         reference = "https://github.com/kevthehermit/PasteHunter"
110 | 
111 |     strings:
112 |         $b64_doc = "0M8R4" // d0cf11
113 |     condition:
114 |         $b64_doc at 0
115 | 
116 | }
117 | 
118 | rule b64_rtf
119 | {
120 |     meta:
121 |         author = "@KevTheHermit"
122 |         info = "Part of PasteHunter"
123 |         reference = "https://github.com/kevthehermit/PasteHunter"
124 | 
125 |     strings:
126 |         $b64_rtf = "e1xydGY" // {\rtf
127 |     condition:
128 |         $b64_rtf at 0
129 | 
130 | }
131 | 
132 | rule b64_docx
133 | {
134 |     meta:
135 |         author = "@KevTheHermit"
136 |         info = "Part of PasteHunter"
137 |         reference = "https://github.com/kevthehermit/PasteHunter"
138 | 
139 |     strings:
140 |         $b64_zip = "UEs"
141 |         $docx1 = "d29yZC9fcmVsc" // word/_rel
142 |         $docx2 = "Zm9udFRhYmxl" // fontTable
143 |         $docx3 = "ZG9jUHJvcHM" // docProps
144 |         $docx4 = "Q29udGVudF9UeXBlcw" // Content_Types
145 |         $docx5 = "c2V0dGluZ3M" //settings
146 |     condition:
147 |         $b64_zip at 0 and 3 of ($docx*)
148 | 
149 | }
150 | 
151 | rule b64_xml_doc
152 | {
153 |     meta:
154 |         author = "@KevTheHermit"
155 |         info = "Part of PasteHunter"
156 |         reference = "https://github.com/kevthehermit/PasteHunter"
157 | 
158 |     strings:
159 |         $b64_xml = "PD94bWwg"
160 |         $docx1 = "b3BlbmRvY3VtZW50" // opendocument
161 |         $docx2 = "InBhcmFncmFwaCI" // "paragraph"
162 |         $docx3 = "b2ZmaWNlL3dvcmQv" // office/word/
163 |         $docx4 = "RG9jdW1lbnRQcm9wZXJ0aWVz" // DocumentProperties
164 |     condition:
165 |         $b64_xml at 0 and 3 of ($docx*)
166 | 
167 | }


--------------------------------------------------------------------------------
/pastehunter/outputs/smtp_output.py:
--------------------------------------------------------------------------------
  1 | import smtplib
  2 | import email.encoders
  3 | import email.header
  4 | import email.mime.base
  5 | import email.mime.multipart
  6 | import email.mime.text
  7 | from email.utils import formatdate
  8 | from email.mime.multipart import MIMEMultipart
  9 | import json
 10 | import logging
 11 | 
 12 | from pastehunter.common import parse_config
 13 | logger = logging.getLogger('pastehunter')
 14 | 
 15 | config = parse_config()
 16 | 
 17 | class SMTPOutput():
 18 |     def __init__(self):
 19 |         smtp_object = config['outputs']['smtp_output']
 20 |         self.smtp_host = smtp_object['smtp_host']
 21 |         self.smtp_port = smtp_object['smtp_port']
 22 |         self.smtp_security = smtp_object['smtp_security']
 23 |         self.smtp_user = smtp_object['smtp_user']
 24 |         self.smtp_pass = smtp_object['smtp_pass']
 25 |         if 'recipients' in smtp_object:
 26 |             self.recipients = smtp_object['recipients']
 27 |         else:
 28 |             # maintain compatibility with older single recipient config format
 29 |             self.recipients = {'main': {'address': smtp_object['recipient'],
 30 |                                         'rule_list': smtp_object['rule_list'],
 31 |                                         'mandatory_rule_list': []}}
 32 | 
 33 | 
 34 |     def _send_mail(self, send_to_address, paste_data):
 35 |         logger.info("crafting email for {0}".format(send_to_address))
 36 | 
 37 |         # Create the message
 38 |         msg = MIMEMultipart()
 39 |         msg['Subject'] = 'PasteHunter Alert {0}'.format(', '.join(paste_data['YaraRule']))
 40 |         msg['From'] = self.smtp_user
 41 |         msg['To'] = send_to_address
 42 |         msg["Date"] = formatdate(localtime=True)
 43 | 
 44 |         # Attach the body
 45 |         body = 'Rules : {0}\n' \
 46 |                'Paste : {1} from {2}\n\n' \
 47 |                'A Copy of the paste has been attached'.format(', '.join(paste_data['YaraRule']),
 48 |                                                               paste_data['pasteid'],
 49 |                                                               paste_data['pastesite'])
 50 |         msg.attach(email.mime.text.MIMEText(body, 'plain'))
 51 | 
 52 |         # Attach the raw paste as JSON
 53 |         attachment = email.mime.base.MIMEBase('application', 'json')
 54 |         json_body = json.dumps(paste_data)
 55 |         attachment.set_payload(json_body)
 56 |         email.encoders.encode_base64(attachment)
 57 |         attachment.add_header('Content-Disposition', 'attachment; filename="Alert-{0}.json"'.format(paste_data['pasteid']))
 58 |         msg.attach(attachment)
 59 | 
 60 |         # Connect to the SMTP server and send
 61 |         if self.smtp_security == 'ssl':
 62 |             smtp_conn = smtplib.SMTP_SSL(self.smtp_host, self.smtp_port)
 63 |         else:
 64 |             smtp_conn = smtplib.SMTP(self.smtp_host, self.smtp_port)
 65 |         smtp_conn.ehlo()
 66 |         if self.smtp_security == 'tls':
 67 |             smtp_conn.starttls()
 68 |         smtp_conn.login(self.smtp_user, self.smtp_pass)
 69 |         smtp_conn.send_message(msg)
 70 |         smtp_conn.quit()
 71 | 
 72 |         logger.info("Sent mail to {0} with rules {1}".format(send_to_address,
 73 |                                                               ', '.join(paste_data['YaraRule'])))
 74 | 
 75 | 
 76 |     def _check_recipient_rules(self, paste_data, recipient_name):
 77 | 
 78 |             # Read each recipient's config
 79 |             recipient = self.recipients[recipient_name]
 80 |             recipient_address = recipient['address']
 81 |             all_rules_mandatory = False
 82 |             if len(recipient['mandatory_rule_list']):
 83 |                 recipient_rule_list = recipient['mandatory_rule_list']
 84 |                 all_rules_mandatory = True
 85 |             else:
 86 |                 recipient_rule_list = recipient['rule_list']
 87 | 
 88 |             # Check if the recipient has special rule 'all' meaning it gets all alerts
 89 |             if 'all' in recipient_rule_list:
 90 |                 self._send_mail(recipient_address, paste_data)
 91 |                 return
 92 | 
 93 |             # Check if all of the recipient's rules need to be found in the alert
 94 |             if all_rules_mandatory:
 95 |                 if all(elem in paste_data['YaraRule'] for elem in recipient_rule_list):
 96 |                     self._send_mail(recipient_address, paste_data)
 97 |                 return
 98 | 
 99 |             # Nominal case, check if at least one rule is found in the alert
100 |             if any(elem in paste_data['YaraRule'] for elem in recipient_rule_list):
101 |                 self._send_mail(recipient_address, paste_data)
102 |                 return
103 | 
104 | 
105 |     def store_paste(self, paste_data):
106 |         for recipient_name in self.recipients:
107 |             self._check_recipient_rules(paste_data, recipient_name)
108 | 


--------------------------------------------------------------------------------
/pastehunter/inputs/ixio.py:
--------------------------------------------------------------------------------
  1 | import logging
  2 | import re
  3 | from datetime import datetime
  4 | from typing import List, Any, Dict, Union, Pattern
  5 | 
  6 | from pastehunter.common import base62_decode, base62_encode
  7 | from pastehunter.inputs.base_input import BasePasteSite
  8 | 
  9 | logger = logging.getLogger('pastehunter')
 10 | 
 11 | 
 12 | class IxDotIoSite(BasePasteSite):
 13 |     # Yeah, yeah, I know, no regex for HTML parsing...
 14 |     # If we end up doing a lot more of this, then maybe we'll use beautifulsoup or something.
 15 |     # Capturing groups:
 16 |     # 1. Paste ID
 17 |     # 2. Timestamp
 18 |     _ITEM_ID_RE: Pattern = re.compile('<div class="t">[\\sa-zA-Z0-9]+'
 19 |                                          '<a href="/(.*?)">\\[r][^\r\n]+'
 20 |                                          '\\s+@ (.*?)[\r\n]')
 21 | 
 22 |     def __init__(self, conf):
 23 |         self.conf = conf
 24 |         self.site = "ix.io"
 25 |         url_main = "http://" + self.site
 26 |         self.url_recent = url_main + "/user/"
 27 |         self.view_pattern = url_main + "/{}/"
 28 |         self.raw_pattern = url_main + "/{}"
 29 |         self.url = None
 30 | 
 31 |     def remap_raw_item(self, raw_item: [str, Dict]) -> Dict[str, Any]:
 32 |         pid = raw_item['pid']
 33 |         paste_data = {
 34 |             # at a
 35 |             'filename': str(pid),
 36 |             'confname': 'ixio',
 37 |             'pastesite': self.site,
 38 |             'pasteid': pid,
 39 |         }
 40 |         # Timezone is UTC/Zulu
 41 |         date = datetime.strptime(raw_item['date'], '%a %b %d %H:%M:%S %Y').isoformat()
 42 |         paste_data['@timestamp'] = date
 43 |         encoded_pid = self.get_paste_id(paste_data)
 44 |         paste_data['scrape_url'] = self.raw_pattern.format(encoded_pid)
 45 |         return paste_data
 46 | 
 47 |     def get_paste_for_id(self, paste_id: Any) -> str:
 48 |         self.make_request(self.raw_pattern.format(paste_id))
 49 | 
 50 |     def get_paste_id(self, paste_obj: Dict[str, Any]) -> str:
 51 |         decoded = paste_obj.get('pasteid')
 52 |         return base62_encode(decoded)
 53 | 
 54 |     def get_recent_items(self, input_history: List[str]):
 55 | 
 56 |         history = []
 57 |         paste_list = []
 58 |         try:
 59 |             recent_page = self.make_request(self.url_recent)
 60 |             item_data = self.get_data_for_page(recent_page.text)
 61 | 
 62 |             for val in item_data:
 63 |                 # Track paste ids to prevent dupes
 64 |                 pid = val['pid']
 65 |                 history.append(pid)
 66 |                 if pid in input_history:
 67 |                     continue
 68 |                 paste_data = self.remap_raw_item(val)
 69 |                 paste_list.append(paste_data)
 70 | 
 71 |             return paste_list, history
 72 | 
 73 |         except Exception as e:
 74 |             logger.error("Unable to parse ixio items: {0}".format(e))
 75 |             return paste_list, history
 76 | 
 77 |     def get_data_for_page(self, page_data: str) -> List[Dict[str, Union[int, str]]]:
 78 |         page: List[Dict[str, Union[int, str]]] = []
 79 |         last_item_id = -1
 80 |         regex_matches = self._ITEM_ID_RE.findall(page_data)
 81 |         # We are going to reverse the order because ix pages are structured newest -> oldest, and this makes it simpler.
 82 |         regex_matches.reverse()
 83 |         for encoded_id, created_at in regex_matches:
 84 |             # Okay so the logic here is a bit tricky. Basically, ix's all user page only returns anonymous pastes
 85 |             # BUT! We can infer the paste ids that aren't present by filling in the blanks, because ix IDs are
 86 |             # incremental. So first, we base62 decode the value so we can use it as an int
 87 |             item_id = base62_decode(encoded_id)
 88 |             # Then, we check if we've seen another value. If this is our first, we can skip a lot of this logic.
 89 |             # (we probably don't want to go back and grab every ix paste historically for most use cases)
 90 |             if last_item_id == -1:
 91 |                 page.append({'pid': item_id, 'date': created_at})
 92 |                 last_item_id = item_id
 93 |             # If there has been a delta, let's traverse it.
 94 |             elif item_id - last_item_id > 1:
 95 |                 # We've already hit last_item_id so we skip that and fill in the delta
 96 |                 for i in range(last_item_id + 1, item_id + 1):
 97 |                     # Copy the created date as a best guess
 98 |                     page.append({'pid': i, 'date': created_at})
 99 |                 last_item_id = item_id
100 |             else:
101 |                 # If there's no delta, just add this nromally
102 |                 page.append({'pid': item_id, 'date': created_at})
103 |                 last_item_id = item_id
104 |         return page
105 | 
106 | 
107 | def recent_pastes(conf, input_history):
108 |     site = IxDotIoSite(conf)
109 | 
110 |     # populate vars from config
111 |     return site.get_recent_items(input_history)
112 | 


--------------------------------------------------------------------------------
/docs/installation.rst:
--------------------------------------------------------------------------------
  1 | Installation
  2 | ============
  3 | 
  4 | There are a few ways to install PasteHunter. Pip is the recommended route for stable releases.
  5 | 
  6 | 
  7 | Pip Installation
  8 | ------------------
  9 | **Note** Pip or setup.py installation will require ``gcc`` and ``wheel``.
 10 | 
 11 | Pip installation is supported for versions after 1.2.1. This can easily be done using:
 12 | 
 13 | ``pip install pastehunter``
 14 | 
 15 | You will then need to configure pastehunter. To do this, use:.::
 16 | 
 17 |     mkdir -p ~/.config
 18 |     wget https://raw.githubusercontent.com/kevthehermit/PasteHunter/master/settings.json.sample -O ~/.config/pastehunter.json
 19 | 
 20 | Then modify ~/.config/pastehunter.json to match your desired settings and run the project using ``pasthunter-cli``
 21 | 
 22 | Local Installation
 23 | ------------------
 24 | 
 25 | Pastehunter
 26 | ^^^^^^^^^^^
 27 | If you want to run the latest stable version grab the latest release from https://github.com/kevthehermit/PasteHunter/releases.
 28 | If you want to run the development version clone the repository or download the latest archive. 
 29 | 
 30 | Pastehunter has very few dependancies you can install all the python libraries using the requirements.txt file and ``sudo pip3 install -r requirements.txt``
 31 | 
 32 | 
 33 | Yara
 34 | ^^^^
 35 | Yara is the scanning engine that scans each paste. Use the official documentation to install yara and the python3 library. 
 36 | https://yara.readthedocs.io/en/latest/gettingstarted.html#compiling-and-installing-yara
 37 | 
 38 | All yara rules are stored in the YaraRules directory. An index.yar file is created at run time that includes all additional yar files in this directory. 
 39 | To add or remove yara rules, simply add or remove the rule file from this directory. 
 40 | 
 41 | 
 42 | 
 43 | Elastic Search
 44 | ^^^^^^^^^^^^^^
 45 | If you want to use the elastic search output module you will need to install elastic search. Pastehunter has been tested with version 6.x of Elasticsearch.
 46 | To install follow the offical directions on https://www.elastic.co/guide/en/elasticsearch/reference/current/deb.html.
 47 | 
 48 | You will also need the elasticsearch python library which can be installed using ``sudo pip3 install elasticsearch``.
 49 | 
 50 | Kibana
 51 | ^^^^^^
 52 | Kibana is the frontend search to Elasticsearch. If you have enabled the Elasticsearch module you probably want this. 
 53 | To install follow the offical directions on https://www.elastic.co/guide/en/kibana/current/deb.html.
 54 | 
 55 | 
 56 | 
 57 | Docker Installation
 58 | -------------------
 59 | You will find a Dockerfile that will build the latest stable version of PasteHunter. 
 60 | 
 61 | 
 62 | This can be used with the included docker-compose.yml file. 
 63 | A sample podspec for kubernets is coming soon. 
 64 | 
 65 | 
 66 | Configuration
 67 | -------------
 68 | **See** :doc:`this page <./migrating>` **for help migrating configs from older versions (<1.2.1)**
 69 | 
 70 | Before you can get up and running you will need to set up the basic config. 
 71 | Copy the settings.json.sample to settings.json and edit with your editor of choice. 
 72 | 
 73 | Yara
 74 | ^^^^
 75 | 
 76 | - **rule_path**: defaults to the YaraRules directory in the PasteHunter root.
 77 | - **blacklist**: If set to true, any pastes that match this rule will be ignored.
 78 | - **test_rules**: Occasionaly I release some early test rules. Set this to ``true`` to use them.
 79 | 
 80 | log
 81 | ^^^
 82 | 
 83 | Logging for the application is configured here. 
 84 | 
 85 | - **log_to_file**: true or false, default is stdout.
 86 | - **log_file**: filename to log out to.
 87 | - **logging_level**: numerical value for logging level see the table below.
 88 | - **log_path**: path on disk to write log_file to.
 89 | - **format**: python logging format string - https://docs.python.org/3/library/logging.html#formatter-objects
 90 | 
 91 | ======== =========
 92 | Level    Numerical
 93 | ======== =========
 94 | CRITICAL 50
 95 | ERROR    40
 96 | WARNING  30
 97 | INFO     20
 98 | DEBUG    10
 99 | NETSET   0
100 | ======== =========
101 | 
102 | general
103 | ^^^^^^^
104 | 
105 | General config options here.
106 | 
107 | - **run_frequency**: Sleep delay between fetching list of inputs to download. This helps rate limits. 
108 | 
109 | 
110 | For Input, Output and Postprocess settings please refer to the relevant sections of the docs. 
111 |     
112 | 
113 | Starting
114 | --------
115 | 
116 | You can run pastehunter by calling the script by name. 
117 | 
118 | ``python3 pastehunter-cli``
119 | 
120 | Service
121 | ^^^^^^^
122 | 
123 | You can install pastehunter as a service if your planning on running for long periods of time. An example systemd service file is show below
124 | 
125 | Create a new service file ``/etc/systemd/system/pastehunter.service``
126 | 
127 | Add the following text updating as appropriate for your setup paying attention to file paths and usernames.:: 
128 | 
129 | 
130 |     [Unit]
131 |     Description=PasteHunter
132 |     
133 |     [Service]
134 |     WorkingDirectory=/opt/PasteHunter
135 |     ExecStart=/usr/bin/python3 /opt/PasteHunter/pastehunter-cli
136 |     User=localuser
137 |     Group=localuser
138 |     Restart=always
139 |     
140 |     [Install]
141 |     WantedBy=multi-user.target
142 | 
143 | 
144 | Before starting the service ensure you have tested the pastehunter app on the command line and identify any errors. Once your ready then update systemctl ``systemctl daemon-reload`` enable the new service ``systemctl enable pastehunter.service`` and start the service ``systemctl start pastehunter`` 
145 | 


--------------------------------------------------------------------------------
/docs/conf.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | #
  3 | # Configuration file for the Sphinx documentation builder.
  4 | #
  5 | # This file does only contain a selection of the most common options. For a
  6 | # full list see the documentation:
  7 | # http://www.sphinx-doc.org/en/master/config
  8 | 
  9 | # -- Path setup --------------------------------------------------------------
 10 | 
 11 | # If extensions (or modules to document with autodoc) are in another directory,
 12 | # add these directories to sys.path here. If the directory is relative to the
 13 | # documentation root, use os.path.abspath to make it absolute, like shown here.
 14 | #
 15 | # import os
 16 | # import sys
 17 | # sys.path.insert(0, os.path.abspath('.'))
 18 | 
 19 | 
 20 | # -- Project information -----------------------------------------------------
 21 | 
 22 | project = 'PasteHunter'
 23 | copyright = '2019, Kevin Breen'
 24 | author = 'Kevin Breen'
 25 | 
 26 | # The short X.Y version
 27 | version = '1.3'
 28 | # The full version, including alpha/beta/rc tags
 29 | release = '1.3.2'
 30 | 
 31 | 
 32 | # -- General configuration ---------------------------------------------------
 33 | 
 34 | # If your documentation needs a minimal Sphinx version, state it here.
 35 | #
 36 | # needs_sphinx = '1.0'
 37 | 
 38 | # Add any Sphinx extension module names here, as strings. They can be
 39 | # extensions coming with Sphinx (named 'sphinx.ext.*') or your custom
 40 | # ones.
 41 | extensions = [
 42 | ]
 43 | 
 44 | # Add any paths that contain templates here, relative to this directory.
 45 | templates_path = ['_templates']
 46 | 
 47 | # The suffix(es) of source filenames.
 48 | # You can specify multiple suffix as a list of string:
 49 | #
 50 | # source_suffix = ['.rst', '.md']
 51 | source_suffix = '.rst'
 52 | 
 53 | # The master toctree document.
 54 | master_doc = 'index'
 55 | 
 56 | # The language for content autogenerated by Sphinx. Refer to documentation
 57 | # for a list of supported languages.
 58 | #
 59 | # This is also used if you do content translation via gettext catalogs.
 60 | # Usually you set "language" from the command line for these cases.
 61 | language = None
 62 | 
 63 | # List of patterns, relative to source directory, that match files and
 64 | # directories to ignore when looking for source files.
 65 | # This pattern also affects html_static_path and html_extra_path.
 66 | exclude_patterns = ['_build', 'Thumbs.db', '.DS_Store']
 67 | 
 68 | # The name of the Pygments (syntax highlighting) style to use.
 69 | pygments_style = None
 70 | 
 71 | 
 72 | # -- Options for HTML output -------------------------------------------------
 73 | 
 74 | # The theme to use for HTML and HTML Help pages.  See the documentation for
 75 | # a list of builtin themes.
 76 | #
 77 | html_theme = 'sphinx_rtd_theme'
 78 | 
 79 | # Theme options are theme-specific and customize the look and feel of a theme
 80 | # further.  For a list of options available for each theme, see the
 81 | # documentation.
 82 | #
 83 | html_theme_options = {
 84 |     'canonical_url': '',
 85 |     #'analytics_id': 'UA-XXXXXXX-1',  #  Provided by Google in your dashboard
 86 |     'logo_only': False,
 87 |     'display_version': True,
 88 |     'prev_next_buttons_location': 'bottom',
 89 |     'style_external_links': False,
 90 |     # Toc options
 91 |     'collapse_navigation': True,
 92 |     'sticky_navigation': True,
 93 |     'navigation_depth': 4,
 94 |     'includehidden': True,
 95 |     'titles_only': False
 96 | }
 97 | 
 98 | # Add any paths that contain custom static files (such as style sheets) here,
 99 | # relative to this directory. They are copied after the builtin static files,
100 | # so a file named "default.css" will overwrite the builtin "default.css".
101 | html_static_path = ['_static']
102 | 
103 | # Custom sidebar templates, must be a dictionary that maps document names
104 | # to template names.
105 | #
106 | # The default sidebars (for documents that don't match any pattern) are
107 | # defined by theme itself.  Builtin themes are using these templates by
108 | # default: ``['localtoc.html', 'relations.html', 'sourcelink.html',
109 | # 'searchbox.html']``.
110 | #
111 | # html_sidebars = {}
112 | 
113 | 
114 | # -- Options for HTMLHelp output ---------------------------------------------
115 | 
116 | # Output file base name for HTML help builder.
117 | htmlhelp_basename = 'PasteHunterdoc'
118 | 
119 | 
120 | # -- Options for LaTeX output ------------------------------------------------
121 | 
122 | latex_elements = {
123 |     # The paper size ('letterpaper' or 'a4paper').
124 |     #
125 |     # 'papersize': 'letterpaper',
126 | 
127 |     # The font size ('10pt', '11pt' or '12pt').
128 |     #
129 |     # 'pointsize': '10pt',
130 | 
131 |     # Additional stuff for the LaTeX preamble.
132 |     #
133 |     # 'preamble': '',
134 | 
135 |     # Latex figure (float) alignment
136 |     #
137 |     # 'figure_align': 'htbp',
138 | }
139 | 
140 | # Grouping the document tree into LaTeX files. List of tuples
141 | # (source start file, target name, title,
142 | #  author, documentclass [howto, manual, or own class]).
143 | latex_documents = [
144 |     (master_doc, 'PasteHunter.tex', 'PasteHunter Documentation',
145 |      'Kevin Breen', 'manual'),
146 | ]
147 | 
148 | 
149 | # -- Options for manual page output ------------------------------------------
150 | 
151 | # One entry per manual page. List of tuples
152 | # (source start file, name, description, authors, manual section).
153 | man_pages = [
154 |     (master_doc, 'pastehunter', 'PasteHunter Documentation',
155 |      [author], 1)
156 | ]
157 | 
158 | 
159 | # -- Options for Texinfo output ----------------------------------------------
160 | 
161 | # Grouping the document tree into Texinfo files. List of tuples
162 | # (source start file, target name, title, author,
163 | #  dir menu entry, description, category)
164 | texinfo_documents = [
165 |     (master_doc, 'PasteHunter', 'PasteHunter Documentation',
166 |      author, 'PasteHunter', 'One line description of project.',
167 |      'Miscellaneous'),
168 | ]
169 | 
170 | 
171 | # -- Options for Epub output -------------------------------------------------
172 | 
173 | # Bibliographic Dublin Core info.
174 | epub_title = project
175 | 
176 | # The unique identifier of the text. This can be a ISBN number
177 | # or the project homepage.
178 | #
179 | # epub_identifier = ''
180 | 
181 | # A unique identification for the text.
182 | #
183 | # epub_uid = ''
184 | 
185 | # A list of files that should not be packed into the epub file.
186 | epub_exclude_files = ['search.html']
187 | 


--------------------------------------------------------------------------------
/pastehunter/inputs/github.py:
--------------------------------------------------------------------------------
  1 | import logging
  2 | import math
  3 | import re
  4 | from datetime import datetime
  5 | 
  6 | import fnmatch
  7 | import requests
  8 | 
  9 | # Future work/improvement that can happen here: support PR diffs, they contain a patch URL
 10 | # Set some logging options
 11 | logger = logging.getLogger('pastehunter')
 12 | logging.getLogger('requests').setLevel(logging.ERROR)
 13 | 
 14 | api_uri = 'https://api.github.com/events'
 15 | # This event refers to a commit being pushed, and is
 16 | # probably the most significant thing we're concerned about.
 17 | event_types = ['PushEvent']
 18 | api_version = 'application/vnd.github.v3+json'  # Set Accept header to force api v3
 19 | # Important note from github:
 20 | # 'We delay the public events feed by five minutes, which means the most recent event returned by the public events API actually occurred at least five minutes ago.'
 21 | 
 22 | # Beware, git diffs can sometimes be very large files, including binaries and zips.
 23 | #                MB    KB     B
 24 | diff_size_limit = 500 * 1000 * 1000
 25 | 
 26 | blob_hash_pattern = re.compile('https://github.com/.*/blob/(.*?)/.*')
 27 | 
 28 | 
 29 | def _make_request(url, headers):
 30 |     req = requests.get(url, headers=headers)
 31 |     reset_date = datetime.utcfromtimestamp(float(req.headers['X-RateLimit-Reset'])).isoformat()
 32 |     logger.info('Remaining Limit: {0}. Resets at {1}'.format(req.headers['X-RateLimit-Remaining'],
 33 |                                                               reset_date))
 34 | 
 35 |     if req.status_code == 200:
 36 |         return req.json()
 37 | 
 38 |     if req.status_code == 401:
 39 |         logger.error('Auth Failed')
 40 |         return None
 41 | 
 42 |     elif req.status_code == 403:
 43 |         logger.error('Login Attempts Exceeded')
 44 |         return None
 45 | 
 46 | def get_blob_hash(file_dict):
 47 |     blob_url = file_dict.get('blob_url')
 48 |     return blob_hash_pattern.findall(blob_url)[0]
 49 | 
 50 | def recent_pastes(conf, input_history):
 51 |     oauth_token = conf['inputs']['github']['api_token']
 52 |     conf_limit = conf['inputs']['github']['api_limit']
 53 |     gh_limit = min(conf_limit, 300)
 54 |     # From GitHub Docs (https://developer.github.com/v3/activity/events/#list-public-events):
 55 |     # Events support pagination, however the per_page option is unsupported. The fixed page size is 30 items. Fetching up to ten pages is supported, for a total of 300 events.
 56 |     # We modify this to be 100 per page, but the limit is still 300.
 57 |     if gh_limit != conf_limit:
 58 |         logger.warning('gh_limit exceeds github items returned from public feed. Limiting to 300.')
 59 |     headers = {'user-agent': 'PasteHunter',
 60 |                'Accept': api_version,
 61 |                'Authorization': 'token {0}'.format(oauth_token)}
 62 | 
 63 |     # calculate number of pages
 64 |     page_count = int(math.ceil(gh_limit / 100))
 65 | 
 66 |     result_pages = []
 67 |     history = []
 68 |     paste_list = []
 69 | 
 70 |     gh_file_blacklist = conf['inputs']['github']['file_blacklist']
 71 |     gh_user_blacklist = conf['inputs']['github']['user_blacklist']
 72 |     ignore_bots = conf['inputs']['github']['ignore_bots']
 73 | 
 74 |     try:
 75 |         # Get the required amount of entries via pagination
 76 |         for page_num in range(1, page_count + 1):
 77 |             url = '{0}?page={1}&per_page=100'.format(api_uri, page_num)
 78 |             logger.debug('Fetching page: {0}'.format(page_num))
 79 |             req = _make_request(url, headers)
 80 |             if req is not None:
 81 |                 result_pages.append(req)
 82 | 
 83 |         # Parse results
 84 | 
 85 |         for page in result_pages:
 86 |             for event_meta in page:
 87 |                 # Track paste ids to prevent dupes
 88 |                 event_id = event_meta['id']
 89 |                 history.append(event_id)
 90 |                 if event_id in input_history:
 91 |                     continue
 92 |                 if event_meta['type'] not in event_types:
 93 |                     logger.debug('Skipping event {} due to unwanted type "{}"'.format(event_id, event_meta['type']))
 94 |                 # Actor may have been deleted or changed
 95 |                 if 'actor' in event_meta:
 96 |                     # If the username is None, this will return false, while event_meta['login'] would error.
 97 |                     if event_meta.get('actor').get('login') in gh_user_blacklist:
 98 |                         logger.info('Blacklisting GitHub event from user: {0}'.format(event_meta.get('login')))
 99 |                         continue
100 |                     login = event_meta.get('actor').get('login')
101 |                     if ignore_bots and login and login.endswith("[bot]"):
102 |                         logger.info('Ignoring GitHub event from bot user: {}'.format(login))
103 |                         continue
104 | 
105 |                 payload = event_meta.get('payload')
106 |                 if not 'commits' in payload:
107 |                     # Debug, because this is high output
108 |                     logger.debug('Skipping event {} due to no commits.'.format(event_id))
109 |                     continue
110 |                 for commit_meta in payload.get('commits'):
111 |                     commit_url = commit_meta.get('url')
112 |                     commit_data = _make_request(commit_url, headers)
113 |                     if not commit_data:
114 |                         logger.info('No data returned for url {}. Skipping...'.format(commit_url))
115 |                         continue
116 |                     if commit_data.get('committer') and commit_data.get('committer').get('login') in gh_user_blacklist:
117 |                         logger.info('Blacklisting GitHub event from user: {0}'.format(event_meta['owner']['login']))
118 |                         continue
119 |                     for file_obj in commit_data.get('files'):
120 |                         is_blacklisted = False
121 |                         file_path = file_obj.get('filename')
122 |                         for pattern in gh_file_blacklist:
123 |                             if fnmatch.fnmatch(file_path, pattern):
124 |                                 logger.info('Blacklisting file {0} from event {1} (matched pattern "{2}")'.format(file_path, event_id, pattern))
125 |                                 is_blacklisted = True
126 |                                 break
127 | 
128 |                         if is_blacklisted:
129 |                             continue
130 | 
131 |                         github_data = file_obj
132 |                         github_data['confname'] = 'github'
133 |                         github_data['@timestamp'] = event_meta['created_at']
134 |                         github_data['pasteid'] = get_blob_hash(file_obj) or event_id
135 |                         github_data['user'] = event_meta.get('actor').get('login')
136 |                         github_data['pastesite'] = 'github.com'
137 |                         github_data['scrape_url'] = file_obj.get('raw_url')
138 |                         # remove some original keys just to keep it a bit cleaner
139 |                         del github_data['raw_url']
140 |                         paste_list.append(github_data)
141 | 
142 |         # Return results and history
143 |         return paste_list, history
144 |     except Exception as e:
145 |         logger.exception('Unable to parse paste results: {0}'.format(e), e)
146 |         return paste_list, history
147 | 


--------------------------------------------------------------------------------
/settings.json.sample:
--------------------------------------------------------------------------------
  1 | {
  2 |   "inputs": {
  3 |     "pastebin":{
  4 |       "enabled": true,
  5 |       "module": "pastehunter.inputs.pastebin",
  6 |       "api_scrape": "https://scrape.pastebin.com/api_scraping.php",
  7 |       "api_raw": "https://scrape.pastebin.com/api_scrape_item.php?i=",
  8 |       "paste_limit": 100,
  9 |       "store_all": false
 10 |     },
 11 |     "ixio":{
 12 |       "enabled": false,
 13 |       "module": "pastehunter.inputs.ixio",
 14 |       "store_all": false
 15 |     },
 16 |     "dumpz": {
 17 |       "enabled": false,
 18 |       "comment": "This api endpoint has been removed.",
 19 |       "module": "pastehunter.inputs.dumpz",
 20 |       "api_scrape": "https://dumpz.org/api/recent",
 21 |       "api_raw": "https://dumpz.org/api/dump",
 22 |       "paste_limit": 100,
 23 |       "store_all": false
 24 |     },
 25 |     "gists": {
 26 |       "enabled": true,
 27 |       "module": "pastehunter.inputs.gists",
 28 |       "api_token": "",
 29 |       "api_limit": 200,
 30 |       "store_all": false,
 31 |       "user_blacklist": [],
 32 |       "file_blacklist": ["grahamcofborg-eval-package-list", "Changed Paths"]
 33 |     },
 34 |     "github": {
 35 |       "enabled": false,
 36 |       "module": "pastehunter.inputs.github",
 37 |       "api_token": "",
 38 |       "api_limit": 300,
 39 |       "store_all": false,
 40 |       "ignore_bots": false,
 41 |       "user_blacklist": [],
 42 |       "file_blacklist": ["node_modules/*", "__pycache__/*", "*/grahamcofborg-eval-package-list", "*.lock", "*.3ds", "*.3g2", "*.3gp", "*.7z", "*.DS_Store", "*.a", "*.aac", "*.adp", "*.ai", "*.aif", "*.aiff", "*.alz", "*.ape", "*.apk", "*.ar", "*.arj", "*.asf", "*.au", "*.avi", "*.bak", "*.bh", "*.bin", "*.bk", "*.bmp", "*.btif", "*.bz2", "*.bzip2", "*.cab", "*.caf", "*.cgm", "*.class", "*.cmx", "*.cpio", "*.cr2", "*.cur", "*.dat", "*.deb", "*.dex", "*.djvu", "*.dll", "*.dmg", "*.dng", "*.doc", "*.docm", "*.docx", "*.dot", "*.dotm", "*.dra", "*.dsk", "*.dts", "*.dtshd", "*.dvb", "*.dwg", "*.dxf", "*.ecelp4800", "*.ecelp7470", "*.ecelp9600", "*.egg", "*.eol", "*.eot", "*.epub", "*.exe", "*.f4v", "*.fbs", "*.fh", "*.fla", "*.flac", "*.fli", "*.flv", "*.fpx", "*.fst", "*.fvt", "*.g3", "*.gif", "*.graffle", "*.gz", "*.gzip", "*.h261", "*.h263", "*.h264", "*.ico", "*.ief", "*.img", "*.ipa", "*.iso", "*.jar", "*.jpeg", "*.jpg", "*.jpgv", "*.jpm", "*.jxr","*.ktx", "*.lha", "*.lvp", "*.lz", "*.lzh", "*.lzma", "*.lzo", "*.m3u", "*.m4a", "*.m4v", "*.mar", "*.mdi", "*.mht", "*.mid", "*.midi", "*.mj2", "*.mka", "*.mkv", "*.mmr", "*.mng", "*.mobi", "*.mov", "*.movie", "*.mp3", "*.mp4", "*.mp4a", "*.mpeg", "*.mpg", "*.mpga", "*.mxu", "*.nef", "*.npx", "*.numbers", "*.o", "*.oga", "*.ogg", "*.ogv", "*.otf", "*.pages", "*.pbm", "*.pcx", "*.pdf", "*.pea", "*.pgm", "*.pic", "*.png", "*.pnm", "*.pot", "*.potm", "*.potx", "*.ppa", "*.ppam", "*.ppm", "*.pps", "*.ppsm", "*.ppsx", "*.ppt", "*.pptm", "*.pptx", "*.psd", "*.pya", "*.pyc", "*.pyo", "*.pyv", "*.qt", "*.rar", "*.ras", "*.raw", "*.rgb", "*.rip", "*.rlc", "*.rmf", "*.rmvb", "*.rtf", "*.rz", "*.s3m", "*.s7z", "*.scpt", "*.sgi", "*.shar", "*.sil", "*.sketch", "*.slk", "*.smv", "*.so", "*.sub", "*.swf", "*.tar", "*.tbz", "*.tbz2", "*.tga", "*.tgz", "*.thmx", "*.tif", "*.tiff", "*.tlz", "*.ttc", "*.ttf", "*.txz", "*.udf", "*.uvh", "*.uvi", "*.uvm", "*.uvp", "*.uvs", "*.uvu", "*.viv", "*.vob", "*.war", "*.wav", "*.wax", "*.wbmp", "*.wdp", "*.weba", "*.webm", "*.webp", "*.whl", "*.wim", "*.wm", "*.wma", "*.wmv", "*.wmx", "*.woff", "*.woff2", "*.wvx", "*.xbm", "*.xif", "*.xla", "*.xlam", "*.xls", "*.xlsb", "*.xlsm", "*.xlsx", "*.xlt", "*.xltm", "*.xltx", "*.xm", "*.xmind", "*.xpi", "*.xpm", "*.xwd", "*.xz", "*.z", "*.zip", "*.zipx", "*.css", "*.scss", "*.uasset"]
 43 |     },
 44 |     "slexy":{
 45 |       "enabled": true,
 46 |       "module": "pastehunter.inputs.slexy",
 47 |       "store_all": false,
 48 |       "api_scrape": "http://slexy.org/recent",
 49 |       "api_raw": "http://slexy.org/raw",
 50 |       "api_view": "http://slexy.org/view"
 51 |     },
 52 |     "stackexchange":{
 53 |       "enabled": false,
 54 |       "module": "pastehunter.inputs.stackexchange",
 55 |       "site_list": ["stackoverflow","serverfault", "superuser", "webapps", "webmasters", "dba"],
 56 |       "api_key": "",
 57 |       "store_filter": "!)r_ttsG0v3bE1vo3*8Ki",
 58 |       "pagesize": 100,
 59 |       "store_all": true,
 60 |       "api_scrape": "https://api.stackexchange.com/2.2/questions"
 61 |     }
 62 |   },
 63 |   "outputs": {
 64 |     "elastic_output": {
 65 |       "enabled": true,
 66 |       "module": "pastehunter.outputs.elastic_output",
 67 |       "classname": "ElasticOutput",
 68 |       "elastic_index": "paste-test",
 69 |       "elastic_host": "172.16.10.10",
 70 |       "elastic_port": 9200,
 71 |       "elastic_user": "elastic",
 72 |       "elastic_pass": "changeme",
 73 |       "elastic_ssl": false,
 74 |       "weekly_index": true
 75 |     },
 76 |     "splunk_output": {
 77 |       "enabled": false,
 78 |       "module": "pastehunter.outputs.splunk_output",
 79 |       "classname": "SplunkOutput",
 80 |       "splunk_host": "host",
 81 |       "splunk_port": 8089,
 82 |       "splunk_user": "user",
 83 |       "splunk_pass": "pass",
 84 |       "splunk_index": "paste-test",
 85 |       "splunk_sourcetype": "pastehunter",
 86 |       "store_raw": true
 87 |     },
 88 |     "json_output": {
 89 |       "enabled": false,
 90 |       "module": "pastehunter.outputs.json_output",
 91 |       "classname": "JsonOutput",
 92 |       "output_path": "logs/json/",
 93 |       "store_raw": true,
 94 |       "encode_raw": true
 95 |     },
 96 |     "csv_output": {
 97 |       "enabled": false,
 98 |       "module": "pastehunter.outputs.csv_output",
 99 |       "classname": "CSVOutput",
100 |       "output_path": "logs/csv/"
101 |     },
102 |     "syslog_output": {
103 |       "enabled": false,
104 |       "module": "pastehunter.outputs.syslog_output",
105 |       "classname": "SyslogOutput",
106 |       "host": "192.168.1.1",
107 |       "port": 514
108 |     },
109 |     "smtp_output": {
110 |       "enabled": false,
111 |       "module": "pastehunter.outputs.smtp_output",
112 |       "classname": "SMTPOutput",
113 |       "smtp_host": "smtp.server.com",
114 |       "smtp_port": 25,
115 |       "smtp_security": "starttls",
116 |       "smtp_user": "smtpusername",
117 |       "smtp_pass": "smtppassword",
118 |       "recipients" : {
119 |         "recipient_1": {
120 |           "address": "emailaddress that gets the alerts",
121 |           "rule_list": ["custom_keywords"],
122 |           "mandatory_rule_list": []
123 |         },
124 |         "recipient_2": {
125 |           "address": "emailaddress that gets the alerts",
126 |           "rule_list": [],
127 |           "mandatory_rule_list": ["keyword1", "keyword2"]
128 |         }
129 |       }
130 |     },
131 |     "slack_output": {
132 |       "enabled": false,
133 |       "module": "pastehunter.outputs.slack_output",
134 |       "classname": "SlackOutput",
135 |       "webhook_url": "",
136 |       "rule_list": ["custom_keywords"]
137 |     },
138 |     "http_output": {
139 |       "enabled": false,
140 |       "module": "pastehunter.outputs.http_output",
141 |       "classname": "HttpOutput",
142 |       "endpoint_url": "",
143 |       "headers": {},
144 |       "http_auth": false,
145 |       "http_user": "",
146 |       "http_password": "",
147 |       "ignore_fields": [],
148 |       "timezone": "Z"
149 |     },
150 |     "twilio_output": {
151 |       "enabled": false,
152 |       "module": "pastehunter.outputs.twilio_output",
153 |       "classname": "TwilioOutput",
154 |       "account_sid": "",
155 |       "auth_token": "",
156 |       "twilio_sender": "",
157 |       "recipient_list": [],
158 |       "rule_list": ["custom_keywords"]
159 |     }
160 |   },
161 |   "yara": {
162 |     "default_rules": true,
163 |     "custom_rules": "none",
164 |     "exclude_rules": [],
165 |     "blacklist": true,
166 |     "test_rules": false
167 |   },
168 |   "log": {
169 |     "log_to_file": false,
170 |     "log_file": "pastehunter",
171 |     "logging_level": 20,
172 |     "log_path": "logs",
173 |     "format": "%(asctime)s [%(threadName)-12.12s] %(levelname)s:%(message)s"
174 |   },
175 |   "general": {
176 |     "run_frequency": 300,
177 |     "process_timeout": 5
178 |   },
179 |   "sandboxes": {
180 |     "cuckoo": {
181 |       "enabled": false,
182 |       "module": "pastehunter.sandboxes.cuckoo",
183 |       "api_host": "127.0.0.1",
184 |       "api_port": 8080
185 |     },
186 |     "viper": {
187 |       "enabled": false,
188 |       "module": "pastehunter.sandboxes.viper",
189 |       "api_host": "127.0.0.1",
190 |       "api_port": 8080
191 |     }
192 |   },
193 |   "post_process": {
194 |     "post_email": {
195 |       "enabled": true,
196 |       "module": "pastehunter.postprocess.post_email",
197 |       "rule_list": ["email_filter"]
198 |     },
199 |     "post_b64": {
200 |       "enabled": true,
201 |       "module": "pastehunter.postprocess.post_b64",
202 |       "rule_list": ["b64_exe", "b64_rar", "b64_zip", "b64_gzip"]
203 |     },
204 |     "post_entropy": {
205 |       "enabled": false,
206 |       "module": "pastehunter.postprocess.post_entropy",
207 |       "rule_list": ["ALL"]
208 |     },
209 |     "post_compress": {
210 |       "enabled": false,
211 |       "module": "pastehunter.postprocess.post_compress",
212 |       "rule_list": ["ALL"]
213 |     }
214 |   }
215 | }
216 | 


--------------------------------------------------------------------------------
/test/test_ix.py:
--------------------------------------------------------------------------------
  1 | from inputs.ixio import IxDotIoSite
  2 | test_data = '''
  3 | <body>
  4 | 2CmG
  5 | <a href="/2CmG">[r]</a> <a href="/2CmG/">[h]</a>
  6 | @ Thu Oct 29 07:00:19 2020
  7 | </div>
  8 | </div>
  9 | <div class="t">
 10 | 2CmF
 11 | <a href="/2CmF">[r]</a> <a href="/2CmF/">[h]</a>
 12 | @ Thu Oct 29 07:00:14 2020
 13 | </div>
 14 | </div>
 15 | <div class="t">
 16 | 2CmE
 17 | <a href="/2CmE">[r]</a> <a href="/2CmE/">[h]</a>
 18 | @ Thu Oct 29 07:00:13 2020
 19 | </div>
 20 | </div>
 21 | <div class="t">
 22 | 2CmD
 23 | <a href="/2CmD">[r]</a> <a href="/2CmD/">[h]</a>
 24 | @ Thu Oct 29 06:47:22 2020
 25 | </div>
 26 | </div>
 27 | <div class="t">
 28 | 2CmC
 29 | <a href="/2CmC">[r]</a> <a href="/2CmC/">[h]</a>
 30 | @ Thu Oct 29 06:26:48 2020
 31 | </div>
 32 | </div>
 33 | <div class="t">
 34 | 2CmB
 35 | <a href="/2CmB">[r]</a> <a href="/2CmB/">[h]</a>
 36 | @ Thu Oct 29 06:21:48 2020
 37 | </div>
 38 | </div>
 39 | <div class="t">
 40 | 2CmA
 41 | <a href="/2CmA">[r]</a> <a href="/2CmA/">[h]</a>
 42 | @ Thu Oct 29 06:19:33 2020
 43 | </div>
 44 | </div>
 45 | <div class="t">
 46 | 2Cmz
 47 | <a href="/2Cmz">[r]</a> <a href="/2Cmz/">[h]</a>
 48 | @ Thu Oct 29 06:08:17 2020
 49 | </div>
 50 | </div>
 51 | <div class="t">
 52 | 2Cmy
 53 | <a href="/2Cmy">[r]</a> <a href="/2Cmy/">[h]</a>
 54 | @ Thu Oct 29 06:00:16 2020
 55 | </div>
 56 | </div>
 57 | <div class="t">
 58 | 2Cmx
 59 | <a href="/2Cmx">[r]</a> <a href="/2Cmx/">[h]</a>
 60 | @ Thu Oct 29 06:00:14 2020
 61 | </div>
 62 | </div>
 63 | <div class="t">
 64 | 2Cmw
 65 | <a href="/2Cmw">[r]</a> <a href="/2Cmw/">[h]</a>
 66 | @ Thu Oct 29 05:03:04 2020
 67 | </div>
 68 | </div>
 69 | <div class="t">
 70 | 2Cmv
 71 | <a href="/2Cmv">[r]</a> <a href="/2Cmv/">[h]</a>
 72 | @ Thu Oct 29 05:03:02 2020
 73 | </div>
 74 | </div>
 75 | <div class="t">
 76 | 2Cmu
 77 | <a href="/2Cmu">[r]</a> <a href="/2Cmu/">[h]</a>
 78 | @ Thu Oct 29 05:00:09 2020
 79 | </div>
 80 | </div>
 81 | <div class="t">
 82 | 2Cmt
 83 | <a href="/2Cmt">[r]</a> <a href="/2Cmt/">[h]</a>
 84 | @ Thu Oct 29 04:58:15 2020
 85 | </div>
 86 | </div>
 87 | <div class="t">
 88 | 2Cms
 89 | <a href="/2Cms">[r]</a> <a href="/2Cms/">[h]</a>
 90 | @ Thu Oct 29 04:57:39 2020
 91 | </div>
 92 | </div>
 93 | <div class="t">
 94 | 2Cmr
 95 | <a href="/2Cmr">[r]</a> <a href="/2Cmr/">[h]</a>
 96 | @ Thu Oct 29 04:57:31 2020
 97 | </div>
 98 | </div>
 99 | <div class="t">
100 | 2Cmq
101 | <a href="/2Cmq">[r]</a> <a href="/2Cmq/">[h]</a>
102 | @ Thu Oct 29 04:57:24 2020
103 | </div>
104 | </div>
105 | <div class="t">
106 | 2Cmp
107 | <a href="/2Cmp">[r]</a> <a href="/2Cmp/">[h]</a>
108 | @ Thu Oct 29 04:51:45 2020
109 | </div>
110 | </div>
111 | <div class="t">
112 | 2Cmo
113 | <a href="/2Cmo">[r]</a> <a href="/2Cmo/">[h]</a>
114 | @ Thu Oct 29 04:10:10 2020
115 | </div>
116 | </div>
117 | <div class="t">
118 | 2Cmn
119 | <a href="/2Cmn">[r]</a> <a href="/2Cmn/">[h]</a>
120 | @ Thu Oct 29 04:09:34 2020
121 | </div>
122 | </div>
123 | <div class="t">
124 | 2Cmm
125 | <a href="/2Cmm">[r]</a> <a href="/2Cmm/">[h]</a>
126 | @ Thu Oct 29 04:02:17 2020
127 | </div>
128 | </div>
129 | <div class="t">
130 | 2Cml
131 | <a href="/2Cml">[r]</a> <a href="/2Cml/">[h]</a>
132 | @ Thu Oct 29 04:00:14 2020
133 | </div>
134 | </div>
135 | <div class="t">
136 | 2Cmk
137 | <a href="/2Cmk">[r]</a> <a href="/2Cmk/">[h]</a>
138 | @ Thu Oct 29 04:00:04 2020
139 | </div>
140 | </div>
141 | <div class="t">
142 | 2Cmj
143 | <a href="/2Cmj">[r]</a> <a href="/2Cmj/">[h]</a>
144 | @ Thu Oct 29 03:58:55 2020
145 | </div>
146 | </div>
147 | <div class="t">
148 | 2Cmi
149 | <a href="/2Cmi">[r]</a> <a href="/2Cmi/">[h]</a>
150 | @ Thu Oct 29 03:57:40 2020
151 | </div>
152 | </div>
153 | <div class="t">
154 | 2Cmh
155 | <a href="/2Cmh">[r]</a> <a href="/2Cmh/">[h]</a>
156 | @ Thu Oct 29 03:50:57 2020
157 | </div>
158 | </div>
159 | <div class="t">
160 | 2Cmg
161 | <a href="/2Cmg">[r]</a> <a href="/2Cmg/">[h]</a>
162 | @ Thu Oct 29 03:42:28 2020
163 | </div>
164 | </div>
165 | <div class="t">
166 | 2Cmf
167 | <a href="/2Cmf">[r]</a> <a href="/2Cmf/">[h]</a>
168 | @ Thu Oct 29 03:40:56 2020
169 | </div>
170 | </div>
171 | <div class="t">
172 | 2Cme
173 | <a href="/2Cme">[r]</a> <a href="/2Cme/">[h]</a>
174 | @ Thu Oct 29 03:27:14 2020
175 | </div>
176 | </div>
177 | <div class="t">
178 | 2Cmd
179 | <a href="/2Cmd">[r]</a> <a href="/2Cmd/">[h]</a>
180 | @ Thu Oct 29 03:26:44 2020
181 | </div>
182 | </div>
183 | <div class="t">
184 | 2Cmc
185 | <a href="/2Cmc">[r]</a> <a href="/2Cmc/">[h]</a>
186 | @ Thu Oct 29 03:26:29 2020
187 | </div>
188 | </div>
189 | <div class="t">
190 | 2Cmb
191 | <a href="/2Cmb">[r]</a> <a href="/2Cmb/">[h]</a>
192 | @ Thu Oct 29 03:22:12 2020
193 | </div>
194 | </div>
195 | <div class="t">
196 | 2Cma
197 | <a href="/2Cma">[r]</a> <a href="/2Cma/">[h]</a>
198 | @ Thu Oct 29 03:19:14 2020
199 | </div>
200 | </div>
201 | <div class="t">
202 | 2Cm9
203 | <a href="/2Cm9">[r]</a> <a href="/2Cm9/">[h]</a>
204 | @ Thu Oct 29 03:19:00 2020
205 | </div>
206 | </div>
207 | <div class="t">
208 | 2Cm8
209 | <a href="/2Cm8">[r]</a> <a href="/2Cm8/">[h]</a>
210 | @ Thu Oct 29 03:18:46 2020
211 | </div>
212 | </div>
213 | <div class="t">
214 | 2Cm7
215 | <a href="/2Cm7">[r]</a> <a href="/2Cm7/">[h]</a>
216 | @ Thu Oct 29 03:18:05 2020
217 | </div>
218 | </div>
219 | <div class="t">
220 | 2Cm6
221 | <a href="/2Cm6">[r]</a> <a href="/2Cm6/">[h]</a>
222 | @ Thu Oct 29 03:00:16 2020
223 | </div>
224 | </div>
225 | <div class="t">
226 | 2Cm5
227 | <a href="/2Cm5">[r]</a> <a href="/2Cm5/">[h]</a>
228 | @ Thu Oct 29 02:59:56 2020
229 | </div>
230 | </div>
231 | <div class="t">
232 | 2Cm4
233 | <a href="/2Cm4">[r]</a> <a href="/2Cm4/">[h]</a>
234 | @ Thu Oct 29 02:54:27 2020
235 | </div>
236 | </div>
237 | <div class="t">
238 | 2Cm3
239 | <a href="/2Cm3">[r]</a> <a href="/2Cm3/">[h]</a>
240 | @ Thu Oct 29 02:30:04 2020
241 | </div>
242 | </div>
243 | <div class="t">
244 | 2Cm1
245 | <a href="/2Cm1">[r]</a> <a href="/2Cm1/">[h]</a>
246 | @ Thu Oct 29 02:09:03 2020
247 | </div>
248 | </div>
249 | <div class="t">
250 | 2Cm0
251 | <a href="/2Cm0">[r]</a> <a href="/2Cm0/">[h]</a>
252 | @ Thu Oct 29 02:04:08 2020
253 | </div>
254 | </div>
255 | <div class="t">
256 | 2ClZ
257 | <a href="/2ClZ">[r]</a> <a href="/2ClZ/">[h]</a>
258 | @ Thu Oct 29 02:02:27 2020
259 | </div>
260 | </div>
261 | <div class="t">
262 | 2ClY
263 | <a href="/2ClY">[r]</a> <a href="/2ClY/">[h]</a>
264 | @ Thu Oct 29 02:00:14 2020
265 | </div>
266 | </div>
267 | <div class="t">
268 | 2ClX
269 | <a href="/2ClX">[r]</a> <a href="/2ClX/">[h]</a>
270 | @ Thu Oct 29 02:00:13 2020
271 | </div>
272 | </div>
273 | <div class="t">
274 | 2ClW
275 | <a href="/2ClW">[r]</a> <a href="/2ClW/">[h]</a>
276 | @ Thu Oct 29 02:00:08 2020
277 | </div>
278 | </div>
279 | <div class="t">
280 | 2ClV
281 | <a href="/2ClV">[r]</a> <a href="/2ClV/">[h]</a>
282 | @ Thu Oct 29 01:56:47 2020
283 | </div>
284 | </div>
285 | <div class="t">
286 | 2ClU
287 | <a href="/2ClU">[r]</a> <a href="/2ClU/">[h]</a>
288 | @ Thu Oct 29 01:41:09 2020
289 | </div>
290 | </div>
291 | <div class="t">
292 | 2ClS
293 | <a href="/2ClS">[r]</a> <a href="/2ClS/">[h]</a>
294 | @ Thu Oct 29 01:30:02 2020
295 | </div>
296 | </div>
297 | <div class="t">
298 | 2ClR
299 | <a href="/2ClR">[r]</a> <a href="/2ClR/">[h]</a>
300 | @ Thu Oct 29 01:19:24 2020
301 | </div>
302 | </div>
303 | <div class="t">
304 | 2ClQ
305 | <a href="/2ClQ">[r]</a> <a href="/2ClQ/">[h]</a>
306 | @ Thu Oct 29 01:17:03 2020
307 | </div>
308 | </div>
309 | <div class="t">
310 | 2ClP
311 | <a href="/2ClP">[r]</a> <a href="/2ClP/">[h]</a>
312 | @ Thu Oct 29 01:00:13 2020
313 | </div>
314 | </div>
315 | <div class="t">
316 | 2ClO
317 | <a href="/2ClO">[r]</a> <a href="/2ClO/">[h]</a>
318 | @ Thu Oct 29 01:00:09 2020
319 | </div>
320 | </div>
321 | <div class="t">
322 | 2ClN
323 | <a href="/2ClN">[r]</a> <a href="/2ClN/">[h]</a>
324 | @ Thu Oct 29 00:46:53 2020
325 | </div>
326 | </div>
327 | <div class="t">
328 | 2ClM
329 | <a href="/2ClM">[r]</a> <a href="/2ClM/">[h]</a>
330 | @ Thu Oct 29 00:42:01 2020
331 | </div>
332 | </div>
333 | <div class="t">
334 | 2ClL
335 | <a href="/2ClL">[r]</a> <a href="/2ClL/">[h]</a>
336 | @ Thu Oct 29 00:27:03 2020
337 | </div>
338 | </div>
339 | <div class="t">
340 | 2ClK
341 | <a href="/2ClK">[r]</a> <a href="/2ClK/">[h]</a>
342 | @ Thu Oct 29 00:26:44 2020
343 | </div>
344 | </div>
345 | <div class="t">
346 | 2ClJ
347 | <a href="/2ClJ">[r]</a> <a href="/2ClJ/">[h]</a>
348 | @ Thu Oct 29 00:26:25 2020
349 | </div>
350 | </div>
351 | <div class="t">
352 | 2ClI
353 | <a href="/2ClI">[r]</a> <a href="/2ClI/">[h]</a>
354 | @ Thu Oct 29 00:26:05 2020
355 | </div>
356 | </div>
357 | <div class="t">
358 | 2ClH
359 | <a href="/2ClH">[r]</a> <a href="/2ClH/">[h]</a>
360 | @ Thu Oct 29 00:16:21 2020
361 | </div>
362 | </div>
363 | <div class="t">
364 | 2ClG
365 | <a href="/2ClG">[r]</a> <a href="/2ClG/">[h]</a>
366 | @ Thu Oct 29 00:16:07 2020
367 | </div>
368 | </div>
369 | <div class="t">
370 | 2ClF
371 | <a href="/2ClF">[r]</a> <a href="/2ClF/">[h]</a>
372 | @ Thu Oct 29 00:00:14 2020
373 | </div>
374 | </div>
375 | <div class="t">
376 | 2ClE
377 | <a href="/2ClE">[r]</a> <a href="/2ClE/">[h]</a>
378 | @ Thu Oct 29 00:00:07 2020
379 | </div>
380 | </div>
381 | <div class="t">
382 | 2ClD
383 | <a href="/2ClD">[r]</a> <a href="/2ClD/">[h]</a>
384 | @ Wed Oct 28 23:56:36 2020
385 | </div>
386 | </div>
387 | <div class="t">
388 | 2ClC
389 | <a href="/2ClC">[r]</a> <a href="/2ClC/">[h]</a>
390 | @ Wed Oct 28 23:54:07 2020
391 | </div>
392 | 2ClB
393 | <a href="/2ClB">[r]</a> <a href="/2ClB/">[h]</a>
394 | @ Wed Oct 28 23:53:07 2020
395 | </div>
396 | 2ClA
397 | <a href="/2ClA">[r]</a> <a href="/2ClA/">[h]</a>
398 | @ Wed Oct 28 23:51:55 2020
399 | </div>
400 | </div>
401 | <div class="t">
402 | 2Clz
403 | <a href="/2Clz">[r]</a> <a href="/2Clz/">[h]</a>
404 | @ Wed Oct 28 23:50:24 2020
405 | </div>
406 | </div>
407 | <div class="t">
408 | 2Cly
409 | <a href="/2Cly">[r]</a> <a href="/2Cly/">[h]</a>
410 | @ Wed Oct 28 23:44:58 2020
411 | </div>
412 | </div>
413 | <div class="t">
414 | 2Clx
415 | <a href="/2Clx">[r]</a> <a href="/2Clx/">[h]</a>
416 | @ Wed Oct 28 23:40:54 2020
417 | </div>
418 | </div>
419 | <div class="t">
420 | 2Clw
421 | <a href="/2Clw">[r]</a> <a href="/2Clw/">[h]</a>
422 | @ Wed Oct 28 23:40:13 2020
423 | </div>
424 | </div>
425 | <div class="t">
426 | 2Clv
427 | <a href="/2Clv">[r]</a> <a href="/2Clv/">[h]</a>
428 | @ Wed Oct 28 23:38:37 2020
429 | </div>
430 | </div>
431 | <div class="t">
432 | 2Clu
433 | <a href="/2Clu">[r]</a> <a href="/2Clu/">[h]</a>
434 | @ Wed Oct 28 23:37:22 2020
435 | </div>
436 | </div>
437 | <div class="t">
438 | 2Clt
439 | <a href="/2Clt">[r]</a> <a href="/2Clt/">[h]</a>
440 | @ Wed Oct 28 23:31:22 2020
441 | </div>
442 | </div>
443 | <div class="t">
444 | 2Cls
445 | <a href="/2Cls">[r]</a> <a href="/2Cls/">[h]</a>
446 | @ Wed Oct 28 23:30:27 2020
447 | </div>
448 | </div>
449 | <div class="t">
450 | 2Clr
451 | <a href="/2Clr">[r]</a> <a href="/2Clr/">[h]</a>
452 | @ Wed Oct 28 23:25:57 2020
453 | </div>
454 | </div>
455 | <div class="t">
456 | 2Clq
457 | <a href="/2Clq">[r]</a> <a href="/2Clq/">[h]</a>
458 | @ Wed Oct 28 23:25:24 2020
459 | </div>
460 | </div>
461 | <div class="t">
462 | 2Clo
463 | <a href="/2Clo">[r]</a> <a href="/2Clo/">[h]</a>
464 | @ Wed Oct 28 23:07:09 2020
465 | </div>
466 | </div>
467 | <div class="t">
468 | 2Cln
469 | <a href="/2Cln">[r]</a> <a href="/2Cln/">[h]</a>
470 | @ Wed Oct 28 23:05:48 2020
471 | </div>
472 | </div>
473 | <div class="t">
474 | 2Clm
475 | <a href="/2Clm">[r]</a> <a href="/2Clm/">[h]</a>
476 | @ Wed Oct 28 23:02:16 2020
477 | </div>
478 | </div>
479 | <div class="t">
480 | 2Cll
481 | <a href="/2Cll">[r]</a> <a href="/2Cll/">[h]</a>
482 | @ Wed Oct 28 23:00:14 2020
483 | </div>
484 | </div>
485 | <div class="t">
486 | 2Clk
487 | <a href="/2Clk">[r]</a> <a href="/2Clk/">[h]</a>
488 | @ Wed Oct 28 23:00:07 2020
489 | </div>
490 | </div>
491 | <div class="t">
492 | 2Clj
493 | <a href="/2Clj">[r]</a> <a href="/2Clj/">[h]</a>
494 | @ Wed Oct 28 22:35:28 2020
495 | </div>
496 | </div>
497 | <div class="t">
498 | 2Cli
499 | <a href="/2Cli">[r]</a> <a href="/2Cli/">[h]</a>
500 | @ Wed Oct 28 22:32:50 2020
501 | </div>
502 | </div>
503 | <div class="t">
504 | 2Clh
505 | <a href="/2Clh">[r]</a> <a href="/2Clh/">[h]</a>
506 | @ Wed Oct 28 22:27:14 2020
507 | </div>
508 | </div>
509 | <div class="t">
510 | 2Clg
511 | <a href="/2Clg">[r]</a> <a href="/2Clg/">[h]</a>
512 | @ Wed Oct 28 22:16:44 2020
513 | </div>
514 | </div>
515 | <div class="t">
516 | 2Clf
517 | <a href="/2Clf">[r]</a> <a href="/2Clf/">[h]</a>
518 | @ Wed Oct 28 22:15:30 2020
519 | </div>
520 | </div>
521 | <div class="t">
522 | 2Cle
523 | <a href="/2Cle">[r]</a> <a href="/2Cle/">[h]</a>
524 | @ Wed Oct 28 22:14:18 2020
525 | </div>
526 | </div>
527 | <div class="t">
528 | 2Cld
529 | <a href="/2Cld">[r]</a> <a href="/2Cld/">[h]</a>
530 | @ Wed Oct 28 22:13:33 2020
531 | </div>
532 | </div>
533 | <div class="t">
534 | 2Clc
535 | <a href="/2Clc">[r]</a> <a href="/2Clc/">[h]</a>
536 | @ Wed Oct 28 22:11:11 2020
537 | </div>
538 | </div>
539 | <div class="t">
540 | 2Clb
541 | <a href="/2Clb">[r]</a> <a href="/2Clb/">[h]</a>
542 | issue #15767 @ Wed Oct 28 22:09:53 2020
543 | </div>
544 | </div>
545 | <div class="t">
546 | 2Cla
547 | <a href="/2Cla">[r]</a> <a href="/2Cla/">[h]</a>
548 | @ Wed Oct 28 22:08:25 2020
549 | </div>
550 | </div>
551 | <div class="t">
552 | 2Cl9
553 | <a href="/2Cl9">[r]</a> <a href="/2Cl9/">[h]</a>
554 | @ Wed Oct 28 22:04:26 2020
555 | </div>
556 | </div>
557 | <div class="t">
558 | 2Cl7
559 | <a href="/2Cl7">[r]</a> <a href="/2Cl7/">[h]</a>
560 | @ Wed Oct 28 22:00:23 2020
561 | </div>
562 | </div>
563 | <div class="t">
564 | 2Cl6
565 | <a href="/2Cl6">[r]</a> <a href="/2Cl6/">[h]</a>
566 | @ Wed Oct 28 22:00:13 2020
567 | </div>
568 | </div>
569 | <div class="t">
570 | 2Cl5
571 | <a href="/2Cl5">[r]</a> <a href="/2Cl5/">[h]</a>
572 | @ Wed Oct 28 22:00:09 2020
573 | </div>
574 | </div>
575 | <div class="t">
576 | 2Cl4
577 | <a href="/2Cl4">[r]</a> <a href="/2Cl4/">[h]</a>
578 | @ Wed Oct 28 21:59:27 2020
579 | </div>
580 | </div>
581 | <div class="t">
582 | 2Cl3
583 | <a href="/2Cl3">[r]</a> <a href="/2Cl3/">[h]</a>
584 | 0001-DTS-sun8i-h2-plus-orangepi-zero-added-audio-codec.patch @ Wed Oct 28 21:58:51 2020
585 | </div>
586 | </div>
587 | <div class="t">
588 | 2Cl2
589 | <a href="/2Cl2">[r]</a> <a href="/2Cl2/">[h]</a>
590 | @ Wed Oct 28 21:58:17 2020
591 | </div>
592 | </div>
593 | <div class="t">
594 | 2Cl1
595 | <a href="/2Cl1">[r]</a> <a href="/2Cl1/">[h]</a>
596 | @ Wed Oct 28 21:56:42 2020
597 | </div>
598 | </div>
599 | <div class="t">
600 | </body>
601 | '''
602 | 
603 | 
604 | def test_page_items():
605 |     site = IxDotIoSite(None)
606 |     ids = [x['pid'] for x in site.get_data_for_page(test_data)]
607 |     assert ids == [i for i in range(624031, 624134)]
608 | 


--------------------------------------------------------------------------------
/pastehunter-cli:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/python3
  2 | import errno
  3 | import hashlib
  4 | import importlib
  5 | import json
  6 | import logging
  7 | import multiprocessing
  8 | import os
  9 | import signal
 10 | import sys
 11 | import time
 12 | from io import BytesIO
 13 | from logging import handlers
 14 | from time import sleep
 15 | from urllib.parse import unquote_plus
 16 | 
 17 | import requests
 18 | import yara
 19 | import pastehunter
 20 | from pastehunter.common import parse_config
 21 | 
 22 | VERSION = '1.4.0'
 23 | 
 24 | # Decided not to make this configurable as it currently really only applies to pastebin but may change in functionality later.
 25 | # If someone would like this as a config key, please feel free to open an issue or a PR :)
 26 | # TODO: @Plazmaz
 27 | MAX_ITEM_RETRIES = 5
 28 | 
 29 | # Setup Default logging
 30 | root = logging.getLogger()
 31 | ch = logging.StreamHandler()
 32 | ch.setLevel(logging.DEBUG)
 33 | formatter = logging.Formatter('%(levelname)s:%(filename)s:%(message)s')
 34 | ch.setFormatter(formatter)
 35 | root.addHandler(ch)
 36 | 
 37 | logger = logging.getLogger('pastehunter')
 38 | logger.setLevel(logging.INFO)
 39 | 
 40 | # Version info
 41 | logger.info("Starting PasteHunter Version: {}".format(VERSION))
 42 | 
 43 | # Parse the config file
 44 | logger.info("Reading Configs")
 45 | conf = parse_config()
 46 | 
 47 | # If the config failed to parse
 48 | if not conf:
 49 |     sys.exit()
 50 | 
 51 | 
 52 | class TimeoutError(Exception):
 53 |     pass
 54 | 
 55 | 
 56 | class timeout:
 57 |     def __init__(self, seconds=1, error_message='Timeout'):
 58 |         self.seconds = seconds
 59 |         self.error_message = error_message
 60 | 
 61 |     def handle_timeout(self, signum, frame):
 62 |         raise TimeoutError("Process timeout: {0}".format(self.error_message))
 63 | 
 64 |     def __enter__(self):
 65 |         signal.signal(signal.SIGALRM, self.handle_timeout)
 66 |         signal.alarm(self.seconds)
 67 | 
 68 |     def __exit__(self, type, value, traceback):
 69 |         signal.alarm(0)
 70 | 
 71 | 
 72 | # Set up the log file
 73 | if "log" in conf and conf["log"]["log_to_file"]:
 74 |     if conf["log"]["log_path"] != "":
 75 |         logfile = "{0}/{1}.log".format(conf["log"]["log_path"], conf["log"]["log_file"])
 76 |         # Assure directory exists
 77 |         try:
 78 |             os.makedirs(conf["log"]["log_path"], exist_ok=True)  # Python>3.2
 79 |         except TypeError:
 80 |             try:
 81 |                 os.makedirs(conf["log"]["log_path"])
 82 |             except OSError as exc:  # Python >2.5
 83 |                 if exc.errno == errno.EEXIST and os.path.isdir(conf["log"]["log_path"]):
 84 |                     pass
 85 |                 else:
 86 |                     logger.error("Can not create log file {0}: {1}".format(conf["log"]["log_path"], exc))
 87 |     else:
 88 |         logfile = "{0}.log".format(conf["log"]["log_file"])
 89 |     fileHandler = handlers.RotatingFileHandler(logfile, mode='a+', maxBytes=(1048576 * 5), backupCount=7)
 90 |     if conf["log"]["format"] != "":
 91 |         fileFormatter = logging.Formatter("{0}".format(conf["log"]["format"]))
 92 |         fileHandler.setFormatter(fileFormatter)
 93 |     else:
 94 |         fileHandler.setFormatter(formatter)
 95 |     fileHandler.setLevel(conf["log"]["logging_level"])
 96 |     logger.addHandler(fileHandler)
 97 |     logger.info("Enabled Log File: {0}".format(logfile))
 98 | else:
 99 |     logger.info("Logging to file disabled.")
100 | 
101 | # Override Log level if needed
102 | if "logging_level" in conf["log"]:
103 |     log_level = conf["log"]["logging_level"]
104 | elif "logging_level" in conf["general"]:
105 |     # For old configs
106 |     log_level = conf["general"]["logging_level"]
107 | else:
108 |     # For older configs
109 |     logger.error("Log Level not in config file. Update your base config file!")
110 |     log_level = 20
111 | 
112 | logger.info("Setting Log Level to {0}".format(log_level))
113 | logging.getLogger('requests').setLevel(log_level)
114 | logging.getLogger('elasticsearch').setLevel(log_level)
115 | logging.getLogger('pastehunter').setLevel(log_level)
116 | 
117 | # Configure Inputs
118 | logger.info("Configure Inputs")
119 | input_list = []
120 | for input_type, input_values in conf["inputs"].items():
121 |     if input_values["enabled"]:
122 |         input_list.append(input_values["module"])
123 |         logger.info("Enabled Input: {0}".format(input_type))
124 | 
125 | # Configure Outputs
126 | logger.info("Configure Outputs")
127 | outputs = []
128 | for output_type, output_values in conf["outputs"].items():
129 |     if output_values["enabled"]:
130 |         logger.info("Enabled Output: {0}".format(output_type))
131 |         _module = importlib.import_module(output_values["module"])
132 |         _class = getattr(_module, output_values["classname"])
133 |         instance = _class()
134 |         outputs.append(instance)
135 | 
136 | 
137 | def yara_index(default_rules, custom_rules, exclude_rules, blacklist, test_rules):
138 |     rules_list = {}
139 |     counter = 0
140 |     if default_rules:
141 |         for filename in os.listdir(default_rules):
142 |             if filename in exclude_rules:
143 |                 continue
144 |             if filename == 'blacklist.yar':
145 |                 if blacklist:
146 |                     logger.info("Enable Blacklist Rules")
147 |                 else:
148 |                     continue
149 |             if filename == 'test_rules.yar':
150 |                 if test_rules:
151 |                     logger.info("Enable Test Rules")
152 |                 else:
153 |                     continue
154 |             rules_list['namespace{0}'.format(counter)] = os.path.join(default_rules, filename)
155 |             logger.info("Adding rules from {0}".format(filename))
156 |             counter += 1
157 |     if custom_rules:
158 |         for filename in os.listdir(custom_rules):
159 |             rules_list['namespace{0}'.format(counter)] = os.path.join(custom_rules, filename)
160 |             logger.info("Adding custom rules from {0}".format(filename))
161 |             counter += 1
162 |     return rules_list
163 | 
164 | 
165 | def paste_scanner(paste_data, rules_buff):
166 |     # Grab yara rules from passed buffer
167 |     # Fetch the raw paste
168 |     # scan the Paste
169 |     # Store the Paste
170 | 
171 |     rules_buff.seek(0)
172 |     rules = yara.load(file=rules_buff)
173 |     try:
174 |         with timeout(seconds=conf['general']['process_timeout']):
175 |             # Start a timer
176 |             start_time = time.time()
177 |             paste_site = paste_data['pastesite']
178 |             logger.debug("Found New {0} paste {1}".format(paste_site, paste_data['pasteid']))
179 |             # get raw paste and hash them
180 |             try:
181 | 
182 |                 # Stack questions dont have a raw endpoint
183 |                 if ('stackexchange' in conf['inputs']) and (paste_site in conf['inputs']['stackexchange']['site_list']):
184 |                     # The body is already included in the first request so we do not need a second call to the API.
185 | 
186 |                     # Unescape the code block strings in the json body.
187 |                     raw_body = paste_data['body']
188 |                     raw_paste_data = unquote_plus(raw_body)
189 | 
190 |                     # now remove the old body key as we dont need it any more
191 |                     del paste_data['body']
192 | 
193 |                 else:
194 |                     raw_paste_uri = paste_data['scrape_url']
195 |                     if not raw_paste_uri:
196 |                         logger.info('Unable to retrieve paste, no uri found.')
197 |                         logger.debug(json.dumps(paste_data))
198 |                         raw_paste_data = ""
199 |                     else:
200 |                         raw_paste_data = ""
201 |                         headers = {}
202 |                         if paste_site == 'slexy.org':
203 |                             headers['User-Agent'] = 'PasteHunter'
204 | 
205 |                         attempt_count = 0
206 |                         while attempt_count < MAX_ITEM_RETRIES:
207 |                             attempt_count += 1
208 |                             req = requests.get(raw_paste_uri, headers=headers)
209 |                             if req.status_code == 200:
210 |                                 raw_paste_data = req.text
211 |                                 if attempt_count > 1:
212 |                                     logger.warning('Successfully resolved 429 exception')
213 |                                 break
214 | 
215 |                             # We may want to handle other status codes in the future,
216 |                             # for now 429 is the only code we retry for, just to avoid issues with
217 |                             # rate limiting and hammering sites for 404s or outages
218 |                             elif req.status_code == 429:
219 |                                 logger.warning('Encountered unexpected 429 when requesting item at %s'
220 |                                                + ' for site "%s". Retrying (attempt %d)...', raw_paste_uri,
221 |                                                paste_site, attempt_count + 1)
222 |                                 sleep(10)
223 |                             else:
224 |                                 logger.error("Request returned unexpected response code %d: %s", req.status_code,
225 |                                              req.text)
226 | 
227 |                         if attempt_count > 1:
228 |                             logger.error("Unable to resolve 429 exception after %d retries, giving up on item %s.",
229 |                                          MAX_ITEM_RETRIES, raw_paste_uri)
230 | 
231 |             # Cover fetch site SSLErrors
232 |             except requests.exceptions.SSLError as e:
233 |                 logger.error("Unable to scan raw paste : {0} - {1}".format(paste_data['pasteid'], e))
234 |                 raw_paste_data = ""
235 | 
236 |             # General Exception
237 |             except Exception as e:
238 |                 logger.error("Unable to scan raw paste : {0} - {1}".format(paste_data['pasteid'], e))
239 |                 raw_paste_data = ""
240 | 
241 |             # Pastebin Cache
242 |             if raw_paste_data == "File is not ready for scraping yet. Try again in 1 minute.":
243 |                 logger.info("Paste is still cached sleeping to try again")
244 |                 sleep(45)
245 |                 # get raw paste and hash them
246 |                 raw_paste_uri = paste_data['scrape_url']
247 |                 # Cover fetch site SSLErrors
248 |                 try:
249 |                     raw_paste_data = requests.get(raw_paste_uri).text
250 |                 except requests.exceptions.SSLError as e:
251 |                     logger.error("Unable to scan raw paste : {0} - {1}".format(paste_data['pasteid'], e))
252 |                     raw_paste_data = ""
253 | 
254 |                 # General Exception
255 |                 except Exception as e:
256 |                     logger.error("Unable to scan raw paste : {0} - {1}".format(paste_data['pasteid'], e))
257 |                     raw_paste_data = ""
258 | 
259 |             # Process the paste data here
260 |             try:
261 |                 # Scan with yara
262 |                 matches = rules.match(data=raw_paste_data, externals={'filename': paste_data.get('filename', '')})
263 |             except Exception as e:
264 |                 logger.error("Unable to scan raw paste : {0} - {1}".format(paste_data['pasteid'], e))
265 |                 return False
266 | 
267 |             results = []
268 |             for match in matches:
269 |                 # For keywords get the word from the matched string
270 |                 if match.rule == 'core_keywords' or match.rule == 'custom_keywords':
271 |                     for s in match.strings:
272 |                         rule_match = s[1].lstrip('$')
273 |                         if rule_match not in results:
274 |                             results.append(rule_match)
275 |                     results.append(str(match.rule))
276 | 
277 |                 # But a break in here for the base64. Will use it later.
278 |                 elif match.rule.startswith('b64'):
279 |                     results.append(match.rule)
280 | 
281 |                 # Else use the rule name
282 |                 else:
283 |                     results.append(match.rule)
284 | 
285 |             # Store additional fields for passing on to post processing
286 |             encoded_paste_data = raw_paste_data.encode('utf-8')
287 |             md5 = hashlib.md5(encoded_paste_data).hexdigest()
288 |             sha256 = hashlib.sha256(encoded_paste_data).hexdigest()
289 |             paste_data['MD5'] = md5
290 |             paste_data['SHA256'] = sha256
291 |             paste_data['raw_paste'] = raw_paste_data
292 |             paste_data['YaraRule'] = results
293 |             # Set the size for all pastes - This will override any size set by the source
294 |             paste_data['size'] = len(raw_paste_data)
295 | 
296 |             # Store all OverRides other options.
297 |             paste_site = paste_data['confname']
298 |             store_all = conf['inputs'][paste_site]['store_all']
299 |             # remove the confname key as its not really needed past this point
300 |             del paste_data['confname']
301 | 
302 |             # Blacklist Check
303 |             # If any of the blacklist rules appear then empty the result set
304 |             blacklisted = False
305 |             if conf['yara']['blacklist'] and 'blacklist' in results:
306 |                 results = []
307 |                 blacklisted = True
308 |                 logger.info("Blacklisted {0} paste {1}".format(paste_data['pastesite'], paste_data['pasteid']))
309 | 
310 |                 if store_all is True:
311 |                     results.append('no_match')
312 |                     return True
313 |                 return False
314 | 
315 |             # Post Process
316 | 
317 |             # If post module is enabled and the paste has a matching rule.
318 |             post_results = paste_data
319 |             for post_process, post_values in conf["post_process"].items():
320 |                 if post_values["enabled"]:
321 |                     if any(i in results for i in post_values["rule_list"]) or "ALL" in post_values["rule_list"]:
322 |                         if not blacklisted:
323 |                             logger.info("Running Post Module {0} on {1}".format(post_values["module"], paste_data["pasteid"]))
324 |                             post_module = importlib.import_module(post_values["module"])
325 |                             post_results = post_module.run(results,
326 |                                                            raw_paste_data,
327 |                                                            paste_data
328 |                                                            )
329 | 
330 |             # Throw everything back to paste_data for ease.
331 |             paste_data = post_results
332 | 
333 |             # If we have a result add some meta data and send to storage
334 |             # If results is empty, ie no match, and store_all is True,
335 |             # then append "no_match" to results. This will then force output.
336 | 
337 |             if store_all is True:
338 |                 if len(results) == 0:
339 |                     results.append('no_match')
340 | 
341 |             if len(results) > 0:
342 |                 for output in outputs:
343 |                     try:
344 |                         output.store_paste(paste_data)
345 |                     except Exception as e:
346 |                         logger.error("Unable to store {0} to {1} with error {2}".format(paste_data["pasteid"], output, e))
347 | 
348 |             end_time = time.time()
349 |             logger.debug("Processing Finished for {0} in {1} seconds".format(
350 |                 paste_data["pasteid"],
351 |                 (end_time - start_time)
352 |             ))
353 |             return True
354 |     except TimeoutError:
355 |         return False
356 |     except Exception as e:
357 |         logging.error(e)
358 | 
359 | 
360 | def main():
361 |     logger.info("Compile Yara Rules")
362 |     try:
363 |         if conf['yara']['default_rules']:
364 |             pastehunter_path = pastehunter.__path__[0]
365 |             default_rules = os.path.join(pastehunter_path, "YaraRules")
366 |         else:
367 |             default_rules = False
368 | 
369 |         if conf["yara"]["custom_rules"] != "none":
370 |             custom_rules = conf["yara"]["custom_rules"]
371 |         else:
372 |             custom_rules = False
373 | 
374 |         rule_files = yara_index(
375 |             default_rules,
376 |             custom_rules,
377 |             conf['yara']['exclude_rules'],
378 |             conf['yara']['blacklist'],
379 |             conf['yara']['test_rules']
380 |         )
381 | 
382 |         rules = yara.compile(filepaths=rule_files, externals={'filename': ''})
383 | 
384 |         # Used for sharing across processes
385 |         rules_buff = BytesIO()
386 |         rules.save(file=rules_buff)
387 | 
388 |     except Exception as e:
389 |         logger.exception("Unable to Create Yara index: {0}".format(e))
390 |         sys.exit()
391 | 
392 |     # Create Queue to hold paste URI's
393 |     pool = multiprocessing.Pool(processes=5)
394 |     results = []
395 | 
396 |     # Now Fill the Queue
397 |     try:
398 |         while True:
399 |             queue_count = 0
400 | 
401 |             # Paste History
402 |             logger.info("Populating Queue")
403 |             if os.path.exists('paste_history.tmp'):
404 |                 with open('paste_history.tmp') as json_file:
405 |                     paste_history = json.load(json_file)
406 |             else:
407 |                 paste_history = {}
408 | 
409 |             for input_name in input_list:
410 |                 if input_name in paste_history:
411 |                     input_history = paste_history[input_name]
412 |                 else:
413 |                     input_history = []
414 | 
415 |                 try:
416 | 
417 |                     i = importlib.import_module(input_name)
418 |                     # Get list of recent pastes
419 |                     logger.info("Fetching paste list from {0}".format(input_name))
420 |                     paste_list, history = i.recent_pastes(conf, input_history)
421 |                     for paste in paste_list:
422 |                         # Create a new async job for the existing pool and apply it to "results"
423 |                         results.append(pool.apply_async(paste_scanner, (paste, rules_buff)))
424 |                         queue_count += 1
425 |                     paste_history[input_name] = history
426 |                 except Exception as e:
427 |                     logger.error("Unable to fetch list from {0}: {1}".format(input_name, e))
428 | 
429 |             logger.debug("Writing History")
430 |             # Write History
431 |             with open('paste_history.tmp', 'w') as outfile:
432 |                 json.dump(paste_history, outfile)
433 |             logger.info("Added {0} Items to the queue".format(queue_count))
434 | 
435 |             # Wait for all work to finish
436 |             [result.wait() for result in results]
437 | 
438 |             # Slow it down a little
439 |             logger.info("Sleeping for " + str(conf['general']['run_frequency']) + " Seconds")
440 |             sleep(conf['general']['run_frequency'])
441 | 
442 | 
443 | 
444 |     except KeyboardInterrupt:
445 |         logger.info("Stopping Processes")
446 |         pool.terminate()
447 |         pool.join()
448 | 
449 | 
450 | if __name__ == '__main__':
451 |     main()
452 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
  1 |                     GNU GENERAL PUBLIC LICENSE
  2 |                        Version 3, 29 June 2007
  3 | 
  4 |  Copyright (C) 2007 Free Software Foundation, Inc. <http://fsf.org/>
  5 |  Everyone is permitted to copy and distribute verbatim copies
  6 |  of this license document, but changing it is not allowed.
  7 | 
  8 |                             Preamble
  9 | 
 10 |   The GNU General Public License is a free, copyleft license for
 11 | software and other kinds of works.
 12 | 
 13 |   The licenses for most software and other practical works are designed
 14 | to take away your freedom to share and change the works.  By contrast,
 15 | the GNU General Public License is intended to guarantee your freedom to
 16 | share and change all versions of a program--to make sure it remains free
 17 | software for all its users.  We, the Free Software Foundation, use the
 18 | GNU General Public License for most of our software; it applies also to
 19 | any other work released this way by its authors.  You can apply it to
 20 | your programs, too.
 21 | 
 22 |   When we speak of free software, we are referring to freedom, not
 23 | price.  Our General Public Licenses are designed to make sure that you
 24 | have the freedom to distribute copies of free software (and charge for
 25 | them if you wish), that you receive source code or can get it if you
 26 | want it, that you can change the software or use pieces of it in new
 27 | free programs, and that you know you can do these things.
 28 | 
 29 |   To protect your rights, we need to prevent others from denying you
 30 | these rights or asking you to surrender the rights.  Therefore, you have
 31 | certain responsibilities if you distribute copies of the software, or if
 32 | you modify it: responsibilities to respect the freedom of others.
 33 | 
 34 |   For example, if you distribute copies of such a program, whether
 35 | gratis or for a fee, you must pass on to the recipients the same
 36 | freedoms that you received.  You must make sure that they, too, receive
 37 | or can get the source code.  And you must show them these terms so they
 38 | know their rights.
 39 | 
 40 |   Developers that use the GNU GPL protect your rights with two steps:
 41 | (1) assert copyright on the software, and (2) offer you this License
 42 | giving you legal permission to copy, distribute and/or modify it.
 43 | 
 44 |   For the developers' and authors' protection, the GPL clearly explains
 45 | that there is no warranty for this free software.  For both users' and
 46 | authors' sake, the GPL requires that modified versions be marked as
 47 | changed, so that their problems will not be attributed erroneously to
 48 | authors of previous versions.
 49 | 
 50 |   Some devices are designed to deny users access to install or run
 51 | modified versions of the software inside them, although the manufacturer
 52 | can do so.  This is fundamentally incompatible with the aim of
 53 | protecting users' freedom to change the software.  The systematic
 54 | pattern of such abuse occurs in the area of products for individuals to
 55 | use, which is precisely where it is most unacceptable.  Therefore, we
 56 | have designed this version of the GPL to prohibit the practice for those
 57 | products.  If such problems arise substantially in other domains, we
 58 | stand ready to extend this provision to those domains in future versions
 59 | of the GPL, as needed to protect the freedom of users.
 60 | 
 61 |   Finally, every program is threatened constantly by software patents.
 62 | States should not allow patents to restrict development and use of
 63 | software on general-purpose computers, but in those that do, we wish to
 64 | avoid the special danger that patents applied to a free program could
 65 | make it effectively proprietary.  To prevent this, the GPL assures that
 66 | patents cannot be used to render the program non-free.
 67 | 
 68 |   The precise terms and conditions for copying, distribution and
 69 | modification follow.
 70 | 
 71 |                        TERMS AND CONDITIONS
 72 | 
 73 |   0. Definitions.
 74 | 
 75 |   "This License" refers to version 3 of the GNU General Public License.
 76 | 
 77 |   "Copyright" also means copyright-like laws that apply to other kinds of
 78 | works, such as semiconductor masks.
 79 | 
 80 |   "The Program" refers to any copyrightable work licensed under this
 81 | License.  Each licensee is addressed as "you".  "Licensees" and
 82 | "recipients" may be individuals or organizations.
 83 | 
 84 |   To "modify" a work means to copy from or adapt all or part of the work
 85 | in a fashion requiring copyright permission, other than the making of an
 86 | exact copy.  The resulting work is called a "modified version" of the
 87 | earlier work or a work "based on" the earlier work.
 88 | 
 89 |   A "covered work" means either the unmodified Program or a work based
 90 | on the Program.
 91 | 
 92 |   To "propagate" a work means to do anything with it that, without
 93 | permission, would make you directly or secondarily liable for
 94 | infringement under applicable copyright law, except executing it on a
 95 | computer or modifying a private copy.  Propagation includes copying,
 96 | distribution (with or without modification), making available to the
 97 | public, and in some countries other activities as well.
 98 | 
 99 |   To "convey" a work means any kind of propagation that enables other
100 | parties to make or receive copies.  Mere interaction with a user through
101 | a computer network, with no transfer of a copy, is not conveying.
102 | 
103 |   An interactive user interface displays "Appropriate Legal Notices"
104 | to the extent that it includes a convenient and prominently visible
105 | feature that (1) displays an appropriate copyright notice, and (2)
106 | tells the user that there is no warranty for the work (except to the
107 | extent that warranties are provided), that licensees may convey the
108 | work under this License, and how to view a copy of this License.  If
109 | the interface presents a list of user commands or options, such as a
110 | menu, a prominent item in the list meets this criterion.
111 | 
112 |   1. Source Code.
113 | 
114 |   The "source code" for a work means the preferred form of the work
115 | for making modifications to it.  "Object code" means any non-source
116 | form of a work.
117 | 
118 |   A "Standard Interface" means an interface that either is an official
119 | standard defined by a recognized standards body, or, in the case of
120 | interfaces specified for a particular programming language, one that
121 | is widely used among developers working in that language.
122 | 
123 |   The "System Libraries" of an executable work include anything, other
124 | than the work as a whole, that (a) is included in the normal form of
125 | packaging a Major Component, but which is not part of that Major
126 | Component, and (b) serves only to enable use of the work with that
127 | Major Component, or to implement a Standard Interface for which an
128 | implementation is available to the public in source code form.  A
129 | "Major Component", in this context, means a major essential component
130 | (kernel, window system, and so on) of the specific operating system
131 | (if any) on which the executable work runs, or a compiler used to
132 | produce the work, or an object code interpreter used to run it.
133 | 
134 |   The "Corresponding Source" for a work in object code form means all
135 | the source code needed to generate, install, and (for an executable
136 | work) run the object code and to modify the work, including scripts to
137 | control those activities.  However, it does not include the work's
138 | System Libraries, or general-purpose tools or generally available free
139 | programs which are used unmodified in performing those activities but
140 | which are not part of the work.  For example, Corresponding Source
141 | includes interface definition files associated with source files for
142 | the work, and the source code for shared libraries and dynamically
143 | linked subprograms that the work is specifically designed to require,
144 | such as by intimate data communication or control flow between those
145 | subprograms and other parts of the work.
146 | 
147 |   The Corresponding Source need not include anything that users
148 | can regenerate automatically from other parts of the Corresponding
149 | Source.
150 | 
151 |   The Corresponding Source for a work in source code form is that
152 | same work.
153 | 
154 |   2. Basic Permissions.
155 | 
156 |   All rights granted under this License are granted for the term of
157 | copyright on the Program, and are irrevocable provided the stated
158 | conditions are met.  This License explicitly affirms your unlimited
159 | permission to run the unmodified Program.  The output from running a
160 | covered work is covered by this License only if the output, given its
161 | content, constitutes a covered work.  This License acknowledges your
162 | rights of fair use or other equivalent, as provided by copyright law.
163 | 
164 |   You may make, run and propagate covered works that you do not
165 | convey, without conditions so long as your license otherwise remains
166 | in force.  You may convey covered works to others for the sole purpose
167 | of having them make modifications exclusively for you, or provide you
168 | with facilities for running those works, provided that you comply with
169 | the terms of this License in conveying all material for which you do
170 | not control copyright.  Those thus making or running the covered works
171 | for you must do so exclusively on your behalf, under your direction
172 | and control, on terms that prohibit them from making any copies of
173 | your copyrighted material outside their relationship with you.
174 | 
175 |   Conveying under any other circumstances is permitted solely under
176 | the conditions stated below.  Sublicensing is not allowed; section 10
177 | makes it unnecessary.
178 | 
179 |   3. Protecting Users' Legal Rights From Anti-Circumvention Law.
180 | 
181 |   No covered work shall be deemed part of an effective technological
182 | measure under any applicable law fulfilling obligations under article
183 | 11 of the WIPO copyright treaty adopted on 20 December 1996, or
184 | similar laws prohibiting or restricting circumvention of such
185 | measures.
186 | 
187 |   When you convey a covered work, you waive any legal power to forbid
188 | circumvention of technological measures to the extent such circumvention
189 | is effected by exercising rights under this License with respect to
190 | the covered work, and you disclaim any intention to limit operation or
191 | modification of the work as a means of enforcing, against the work's
192 | users, your or third parties' legal rights to forbid circumvention of
193 | technological measures.
194 | 
195 |   4. Conveying Verbatim Copies.
196 | 
197 |   You may convey verbatim copies of the Program's source code as you
198 | receive it, in any medium, provided that you conspicuously and
199 | appropriately publish on each copy an appropriate copyright notice;
200 | keep intact all notices stating that this License and any
201 | non-permissive terms added in accord with section 7 apply to the code;
202 | keep intact all notices of the absence of any warranty; and give all
203 | recipients a copy of this License along with the Program.
204 | 
205 |   You may charge any price or no price for each copy that you convey,
206 | and you may offer support or warranty protection for a fee.
207 | 
208 |   5. Conveying Modified Source Versions.
209 | 
210 |   You may convey a work based on the Program, or the modifications to
211 | produce it from the Program, in the form of source code under the
212 | terms of section 4, provided that you also meet all of these conditions:
213 | 
214 |     a) The work must carry prominent notices stating that you modified
215 |     it, and giving a relevant date.
216 | 
217 |     b) The work must carry prominent notices stating that it is
218 |     released under this License and any conditions added under section
219 |     7.  This requirement modifies the requirement in section 4 to
220 |     "keep intact all notices".
221 | 
222 |     c) You must license the entire work, as a whole, under this
223 |     License to anyone who comes into possession of a copy.  This
224 |     License will therefore apply, along with any applicable section 7
225 |     additional terms, to the whole of the work, and all its parts,
226 |     regardless of how they are packaged.  This License gives no
227 |     permission to license the work in any other way, but it does not
228 |     invalidate such permission if you have separately received it.
229 | 
230 |     d) If the work has interactive user interfaces, each must display
231 |     Appropriate Legal Notices; however, if the Program has interactive
232 |     interfaces that do not display Appropriate Legal Notices, your
233 |     work need not make them do so.
234 | 
235 |   A compilation of a covered work with other separate and independent
236 | works, which are not by their nature extensions of the covered work,
237 | and which are not combined with it such as to form a larger program,
238 | in or on a volume of a storage or distribution medium, is called an
239 | "aggregate" if the compilation and its resulting copyright are not
240 | used to limit the access or legal rights of the compilation's users
241 | beyond what the individual works permit.  Inclusion of a covered work
242 | in an aggregate does not cause this License to apply to the other
243 | parts of the aggregate.
244 | 
245 |   6. Conveying Non-Source Forms.
246 | 
247 |   You may convey a covered work in object code form under the terms
248 | of sections 4 and 5, provided that you also convey the
249 | machine-readable Corresponding Source under the terms of this License,
250 | in one of these ways:
251 | 
252 |     a) Convey the object code in, or embodied in, a physical product
253 |     (including a physical distribution medium), accompanied by the
254 |     Corresponding Source fixed on a durable physical medium
255 |     customarily used for software interchange.
256 | 
257 |     b) Convey the object code in, or embodied in, a physical product
258 |     (including a physical distribution medium), accompanied by a
259 |     written offer, valid for at least three years and valid for as
260 |     long as you offer spare parts or customer support for that product
261 |     model, to give anyone who possesses the object code either (1) a
262 |     copy of the Corresponding Source for all the software in the
263 |     product that is covered by this License, on a durable physical
264 |     medium customarily used for software interchange, for a price no
265 |     more than your reasonable cost of physically performing this
266 |     conveying of source, or (2) access to copy the
267 |     Corresponding Source from a network server at no charge.
268 | 
269 |     c) Convey individual copies of the object code with a copy of the
270 |     written offer to provide the Corresponding Source.  This
271 |     alternative is allowed only occasionally and noncommercially, and
272 |     only if you received the object code with such an offer, in accord
273 |     with subsection 6b.
274 | 
275 |     d) Convey the object code by offering access from a designated
276 |     place (gratis or for a charge), and offer equivalent access to the
277 |     Corresponding Source in the same way through the same place at no
278 |     further charge.  You need not require recipients to copy the
279 |     Corresponding Source along with the object code.  If the place to
280 |     copy the object code is a network server, the Corresponding Source
281 |     may be on a different server (operated by you or a third party)
282 |     that supports equivalent copying facilities, provided you maintain
283 |     clear directions next to the object code saying where to find the
284 |     Corresponding Source.  Regardless of what server hosts the
285 |     Corresponding Source, you remain obligated to ensure that it is
286 |     available for as long as needed to satisfy these requirements.
287 | 
288 |     e) Convey the object code using peer-to-peer transmission, provided
289 |     you inform other peers where the object code and Corresponding
290 |     Source of the work are being offered to the general public at no
291 |     charge under subsection 6d.
292 | 
293 |   A separable portion of the object code, whose source code is excluded
294 | from the Corresponding Source as a System Library, need not be
295 | included in conveying the object code work.
296 | 
297 |   A "User Product" is either (1) a "consumer product", which means any
298 | tangible personal property which is normally used for personal, family,
299 | or household purposes, or (2) anything designed or sold for incorporation
300 | into a dwelling.  In determining whether a product is a consumer product,
301 | doubtful cases shall be resolved in favor of coverage.  For a particular
302 | product received by a particular user, "normally used" refers to a
303 | typical or common use of that class of product, regardless of the status
304 | of the particular user or of the way in which the particular user
305 | actually uses, or expects or is expected to use, the product.  A product
306 | is a consumer product regardless of whether the product has substantial
307 | commercial, industrial or non-consumer uses, unless such uses represent
308 | the only significant mode of use of the product.
309 | 
310 |   "Installation Information" for a User Product means any methods,
311 | procedures, authorization keys, or other information required to install
312 | and execute modified versions of a covered work in that User Product from
313 | a modified version of its Corresponding Source.  The information must
314 | suffice to ensure that the continued functioning of the modified object
315 | code is in no case prevented or interfered with solely because
316 | modification has been made.
317 | 
318 |   If you convey an object code work under this section in, or with, or
319 | specifically for use in, a User Product, and the conveying occurs as
320 | part of a transaction in which the right of possession and use of the
321 | User Product is transferred to the recipient in perpetuity or for a
322 | fixed term (regardless of how the transaction is characterized), the
323 | Corresponding Source conveyed under this section must be accompanied
324 | by the Installation Information.  But this requirement does not apply
325 | if neither you nor any third party retains the ability to install
326 | modified object code on the User Product (for example, the work has
327 | been installed in ROM).
328 | 
329 |   The requirement to provide Installation Information does not include a
330 | requirement to continue to provide support service, warranty, or updates
331 | for a work that has been modified or installed by the recipient, or for
332 | the User Product in which it has been modified or installed.  Access to a
333 | network may be denied when the modification itself materially and
334 | adversely affects the operation of the network or violates the rules and
335 | protocols for communication across the network.
336 | 
337 |   Corresponding Source conveyed, and Installation Information provided,
338 | in accord with this section must be in a format that is publicly
339 | documented (and with an implementation available to the public in
340 | source code form), and must require no special password or key for
341 | unpacking, reading or copying.
342 | 
343 |   7. Additional Terms.
344 | 
345 |   "Additional permissions" are terms that supplement the terms of this
346 | License by making exceptions from one or more of its conditions.
347 | Additional permissions that are applicable to the entire Program shall
348 | be treated as though they were included in this License, to the extent
349 | that they are valid under applicable law.  If additional permissions
350 | apply only to part of the Program, that part may be used separately
351 | under those permissions, but the entire Program remains governed by
352 | this License without regard to the additional permissions.
353 | 
354 |   When you convey a copy of a covered work, you may at your option
355 | remove any additional permissions from that copy, or from any part of
356 | it.  (Additional permissions may be written to require their own
357 | removal in certain cases when you modify the work.)  You may place
358 | additional permissions on material, added by you to a covered work,
359 | for which you have or can give appropriate copyright permission.
360 | 
361 |   Notwithstanding any other provision of this License, for material you
362 | add to a covered work, you may (if authorized by the copyright holders of
363 | that material) supplement the terms of this License with terms:
364 | 
365 |     a) Disclaiming warranty or limiting liability differently from the
366 |     terms of sections 15 and 16 of this License; or
367 | 
368 |     b) Requiring preservation of specified reasonable legal notices or
369 |     author attributions in that material or in the Appropriate Legal
370 |     Notices displayed by works containing it; or
371 | 
372 |     c) Prohibiting misrepresentation of the origin of that material, or
373 |     requiring that modified versions of such material be marked in
374 |     reasonable ways as different from the original version; or
375 | 
376 |     d) Limiting the use for publicity purposes of names of licensors or
377 |     authors of the material; or
378 | 
379 |     e) Declining to grant rights under trademark law for use of some
380 |     trade names, trademarks, or service marks; or
381 | 
382 |     f) Requiring indemnification of licensors and authors of that
383 |     material by anyone who conveys the material (or modified versions of
384 |     it) with contractual assumptions of liability to the recipient, for
385 |     any liability that these contractual assumptions directly impose on
386 |     those licensors and authors.
387 | 
388 |   All other non-permissive additional terms are considered "further
389 | restrictions" within the meaning of section 10.  If the Program as you
390 | received it, or any part of it, contains a notice stating that it is
391 | governed by this License along with a term that is a further
392 | restriction, you may remove that term.  If a license document contains
393 | a further restriction but permits relicensing or conveying under this
394 | License, you may add to a covered work material governed by the terms
395 | of that license document, provided that the further restriction does
396 | not survive such relicensing or conveying.
397 | 
398 |   If you add terms to a covered work in accord with this section, you
399 | must place, in the relevant source files, a statement of the
400 | additional terms that apply to those files, or a notice indicating
401 | where to find the applicable terms.
402 | 
403 |   Additional terms, permissive or non-permissive, may be stated in the
404 | form of a separately written license, or stated as exceptions;
405 | the above requirements apply either way.
406 | 
407 |   8. Termination.
408 | 
409 |   You may not propagate or modify a covered work except as expressly
410 | provided under this License.  Any attempt otherwise to propagate or
411 | modify it is void, and will automatically terminate your rights under
412 | this License (including any patent licenses granted under the third
413 | paragraph of section 11).
414 | 
415 |   However, if you cease all violation of this License, then your
416 | license from a particular copyright holder is reinstated (a)
417 | provisionally, unless and until the copyright holder explicitly and
418 | finally terminates your license, and (b) permanently, if the copyright
419 | holder fails to notify you of the violation by some reasonable means
420 | prior to 60 days after the cessation.
421 | 
422 |   Moreover, your license from a particular copyright holder is
423 | reinstated permanently if the copyright holder notifies you of the
424 | violation by some reasonable means, this is the first time you have
425 | received notice of violation of this License (for any work) from that
426 | copyright holder, and you cure the violation prior to 30 days after
427 | your receipt of the notice.
428 | 
429 |   Termination of your rights under this section does not terminate the
430 | licenses of parties who have received copies or rights from you under
431 | this License.  If your rights have been terminated and not permanently
432 | reinstated, you do not qualify to receive new licenses for the same
433 | material under section 10.
434 | 
435 |   9. Acceptance Not Required for Having Copies.
436 | 
437 |   You are not required to accept this License in order to receive or
438 | run a copy of the Program.  Ancillary propagation of a covered work
439 | occurring solely as a consequence of using peer-to-peer transmission
440 | to receive a copy likewise does not require acceptance.  However,
441 | nothing other than this License grants you permission to propagate or
442 | modify any covered work.  These actions infringe copyright if you do
443 | not accept this License.  Therefore, by modifying or propagating a
444 | covered work, you indicate your acceptance of this License to do so.
445 | 
446 |   10. Automatic Licensing of Downstream Recipients.
447 | 
448 |   Each time you convey a covered work, the recipient automatically
449 | receives a license from the original licensors, to run, modify and
450 | propagate that work, subject to this License.  You are not responsible
451 | for enforcing compliance by third parties with this License.
452 | 
453 |   An "entity transaction" is a transaction transferring control of an
454 | organization, or substantially all assets of one, or subdividing an
455 | organization, or merging organizations.  If propagation of a covered
456 | work results from an entity transaction, each party to that
457 | transaction who receives a copy of the work also receives whatever
458 | licenses to the work the party's predecessor in interest had or could
459 | give under the previous paragraph, plus a right to possession of the
460 | Corresponding Source of the work from the predecessor in interest, if
461 | the predecessor has it or can get it with reasonable efforts.
462 | 
463 |   You may not impose any further restrictions on the exercise of the
464 | rights granted or affirmed under this License.  For example, you may
465 | not impose a license fee, royalty, or other charge for exercise of
466 | rights granted under this License, and you may not initiate litigation
467 | (including a cross-claim or counterclaim in a lawsuit) alleging that
468 | any patent claim is infringed by making, using, selling, offering for
469 | sale, or importing the Program or any portion of it.
470 | 
471 |   11. Patents.
472 | 
473 |   A "contributor" is a copyright holder who authorizes use under this
474 | License of the Program or a work on which the Program is based.  The
475 | work thus licensed is called the contributor's "contributor version".
476 | 
477 |   A contributor's "essential patent claims" are all patent claims
478 | owned or controlled by the contributor, whether already acquired or
479 | hereafter acquired, that would be infringed by some manner, permitted
480 | by this License, of making, using, or selling its contributor version,
481 | but do not include claims that would be infringed only as a
482 | consequence of further modification of the contributor version.  For
483 | purposes of this definition, "control" includes the right to grant
484 | patent sublicenses in a manner consistent with the requirements of
485 | this License.
486 | 
487 |   Each contributor grants you a non-exclusive, worldwide, royalty-free
488 | patent license under the contributor's essential patent claims, to
489 | make, use, sell, offer for sale, import and otherwise run, modify and
490 | propagate the contents of its contributor version.
491 | 
492 |   In the following three paragraphs, a "patent license" is any express
493 | agreement or commitment, however denominated, not to enforce a patent
494 | (such as an express permission to practice a patent or covenant not to
495 | sue for patent infringement).  To "grant" such a patent license to a
496 | party means to make such an agreement or commitment not to enforce a
497 | patent against the party.
498 | 
499 |   If you convey a covered work, knowingly relying on a patent license,
500 | and the Corresponding Source of the work is not available for anyone
501 | to copy, free of charge and under the terms of this License, through a
502 | publicly available network server or other readily accessible means,
503 | then you must either (1) cause the Corresponding Source to be so
504 | available, or (2) arrange to deprive yourself of the benefit of the
505 | patent license for this particular work, or (3) arrange, in a manner
506 | consistent with the requirements of this License, to extend the patent
507 | license to downstream recipients.  "Knowingly relying" means you have
508 | actual knowledge that, but for the patent license, your conveying the
509 | covered work in a country, or your recipient's use of the covered work
510 | in a country, would infringe one or more identifiable patents in that
511 | country that you have reason to believe are valid.
512 | 
513 |   If, pursuant to or in connection with a single transaction or
514 | arrangement, you convey, or propagate by procuring conveyance of, a
515 | covered work, and grant a patent license to some of the parties
516 | receiving the covered work authorizing them to use, propagate, modify
517 | or convey a specific copy of the covered work, then the patent license
518 | you grant is automatically extended to all recipients of the covered
519 | work and works based on it.
520 | 
521 |   A patent license is "discriminatory" if it does not include within
522 | the scope of its coverage, prohibits the exercise of, or is
523 | conditioned on the non-exercise of one or more of the rights that are
524 | specifically granted under this License.  You may not convey a covered
525 | work if you are a party to an arrangement with a third party that is
526 | in the business of distributing software, under which you make payment
527 | to the third party based on the extent of your activity of conveying
528 | the work, and under which the third party grants, to any of the
529 | parties who would receive the covered work from you, a discriminatory
530 | patent license (a) in connection with copies of the covered work
531 | conveyed by you (or copies made from those copies), or (b) primarily
532 | for and in connection with specific products or compilations that
533 | contain the covered work, unless you entered into that arrangement,
534 | or that patent license was granted, prior to 28 March 2007.
535 | 
536 |   Nothing in this License shall be construed as excluding or limiting
537 | any implied license or other defenses to infringement that may
538 | otherwise be available to you under applicable patent law.
539 | 
540 |   12. No Surrender of Others' Freedom.
541 | 
542 |   If conditions are imposed on you (whether by court order, agreement or
543 | otherwise) that contradict the conditions of this License, they do not
544 | excuse you from the conditions of this License.  If you cannot convey a
545 | covered work so as to satisfy simultaneously your obligations under this
546 | License and any other pertinent obligations, then as a consequence you may
547 | not convey it at all.  For example, if you agree to terms that obligate you
548 | to collect a royalty for further conveying from those to whom you convey
549 | the Program, the only way you could satisfy both those terms and this
550 | License would be to refrain entirely from conveying the Program.
551 | 
552 |   13. Use with the GNU Affero General Public License.
553 | 
554 |   Notwithstanding any other provision of this License, you have
555 | permission to link or combine any covered work with a work licensed
556 | under version 3 of the GNU Affero General Public License into a single
557 | combined work, and to convey the resulting work.  The terms of this
558 | License will continue to apply to the part which is the covered work,
559 | but the special requirements of the GNU Affero General Public License,
560 | section 13, concerning interaction through a network will apply to the
561 | combination as such.
562 | 
563 |   14. Revised Versions of this License.
564 | 
565 |   The Free Software Foundation may publish revised and/or new versions of
566 | the GNU General Public License from time to time.  Such new versions will
567 | be similar in spirit to the present version, but may differ in detail to
568 | address new problems or concerns.
569 | 
570 |   Each version is given a distinguishing version number.  If the
571 | Program specifies that a certain numbered version of the GNU General
572 | Public License "or any later version" applies to it, you have the
573 | option of following the terms and conditions either of that numbered
574 | version or of any later version published by the Free Software
575 | Foundation.  If the Program does not specify a version number of the
576 | GNU General Public License, you may choose any version ever published
577 | by the Free Software Foundation.
578 | 
579 |   If the Program specifies that a proxy can decide which future
580 | versions of the GNU General Public License can be used, that proxy's
581 | public statement of acceptance of a version permanently authorizes you
582 | to choose that version for the Program.
583 | 
584 |   Later license versions may give you additional or different
585 | permissions.  However, no additional obligations are imposed on any
586 | author or copyright holder as a result of your choosing to follow a
587 | later version.
588 | 
589 |   15. Disclaimer of Warranty.
590 | 
591 |   THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY
592 | APPLICABLE LAW.  EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT
593 | HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY
594 | OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO,
595 | THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
596 | PURPOSE.  THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM
597 | IS WITH YOU.  SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF
598 | ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
599 | 
600 |   16. Limitation of Liability.
601 | 
602 |   IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
603 | WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS
604 | THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY
605 | GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE
606 | USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF
607 | DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD
608 | PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS),
609 | EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF
610 | SUCH DAMAGES.
611 | 
612 |   17. Interpretation of Sections 15 and 16.
613 | 
614 |   If the disclaimer of warranty and limitation of liability provided
615 | above cannot be given local legal effect according to their terms,
616 | reviewing courts shall apply local law that most closely approximates
617 | an absolute waiver of all civil liability in connection with the
618 | Program, unless a warranty or assumption of liability accompanies a
619 | copy of the Program in return for a fee.
620 | 
621 |                      END OF TERMS AND CONDITIONS
622 | 
623 |             How to Apply These Terms to Your New Programs
624 | 
625 |   If you develop a new program, and you want it to be of the greatest
626 | possible use to the public, the best way to achieve this is to make it
627 | free software which everyone can redistribute and change under these terms.
628 | 
629 |   To do so, attach the following notices to the program.  It is safest
630 | to attach them to the start of each source file to most effectively
631 | state the exclusion of warranty; and each file should have at least
632 | the "copyright" line and a pointer to where the full notice is found.
633 | 
634 |     {one line to give the program's name and a brief idea of what it does.}
635 |     Copyright (C) {year}  {name of author}
636 | 
637 |     This program is free software: you can redistribute it and/or modify
638 |     it under the terms of the GNU General Public License as published by
639 |     the Free Software Foundation, either version 3 of the License, or
640 |     (at your option) any later version.
641 | 
642 |     This program is distributed in the hope that it will be useful,
643 |     but WITHOUT ANY WARRANTY; without even the implied warranty of
644 |     MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
645 |     GNU General Public License for more details.
646 | 
647 |     You should have received a copy of the GNU General Public License
648 |     along with this program.  If not, see <http://www.gnu.org/licenses/>.
649 | 
650 | Also add information on how to contact you by electronic and paper mail.
651 | 
652 |   If the program does terminal interaction, make it output a short
653 | notice like this when it starts in an interactive mode:
654 | 
655 |     {project}  Copyright (C) {year}  {fullname}
656 |     This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
657 |     This is free software, and you are welcome to redistribute it
658 |     under certain conditions; type `show c' for details.
659 | 
660 | The hypothetical commands `show w' and `show c' should show the appropriate
661 | parts of the General Public License.  Of course, your program's commands
662 | might be different; for a GUI interface, you would use an "about box".
663 | 
664 |   You should also get your employer (if you work as a programmer) or school,
665 | if any, to sign a "copyright disclaimer" for the program, if necessary.
666 | For more information on this, and how to apply and follow the GNU GPL, see
667 | <http://www.gnu.org/licenses/>.
668 | 
669 |   The GNU General Public License does not permit incorporating your program
670 | into proprietary programs.  If your program is a subroutine library, you
671 | may consider it more useful to permit linking proprietary applications with
672 | the library.  If this is what you want to do, use the GNU Lesser General
673 | Public License instead of this License.  But first, please read
674 | <http://www.gnu.org/philosophy/why-not-lgpl.html>.


--------------------------------------------------------------------------------