├── scumblr_spillguard
    ├── tests
    │   ├── __init__.py
    │   ├── test_spill.py
    │   ├── conftest.py
    │   └── vectors.py
    ├── exceptions.py
    ├── __init__.py
    ├── secrets.py
    ├── __about__.py
    ├── utils.py
    ├── bitbucket.py
    ├── scumblr.py
    ├── github.py
    └── handler.py
├── AUTHORS
├── things.txt
├── setup.cfg
├── package.json
├── serverless.example.yml
├── setup.py
├── README.md
└── .gitignore


/scumblr_spillguard/tests/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/AUTHORS:
--------------------------------------------------------------------------------
1 | Jeremy jeremy@murkylabs.com
2 | Kevin kevgliss@gmail.com


--------------------------------------------------------------------------------
/things.txt:
--------------------------------------------------------------------------------
1 | aslkjfsa
2 | aslkjas
3 | salkjasf
4 | sflkjsxoxb-
5 | alksjlksa
6 | salsfakjfsa
7 | sfalkjsaflkfsa
8 | 


--------------------------------------------------------------------------------
/setup.cfg:
--------------------------------------------------------------------------------
 1 | [metadata]
 2 | description-file = README.md
 3 | 
 4 | [wheel]
 5 | universal = 0
 6 | 
 7 | [egg_info]
 8 | tag_build =
 9 | tag_date = 0
10 | tag_svn_revision = 0


--------------------------------------------------------------------------------
/scumblr_spillguard/exceptions.py:
--------------------------------------------------------------------------------
 1 | class GeneralFailure(Exception):
 2 |     pass
 3 | 
 4 | 
 5 | class AuthenticationError(GeneralFailure):
 6 |     pass
 7 | 
 8 | 
 9 | class AuthorizationError(GeneralFailure):
10 |     pass
11 | 
12 | 
13 | class ThrottledError(GeneralFailure):
14 |     pass


--------------------------------------------------------------------------------
/scumblr_spillguard/__init__.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import logging
 3 | 
 4 | logging.basicConfig()
 5 | log = logging.getLogger()
 6 | log.setLevel(os.environ.get('LOG_LEVEL', 'DEBUG'))
 7 | 
 8 | logging.getLogger('boto3').setLevel(logging.CRITICAL)
 9 | logging.getLogger('botocore').setLevel(logging.CRITICAL)
10 | 


--------------------------------------------------------------------------------
/scumblr_spillguard/secrets.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import base64
 3 | import boto3
 4 | from scumblr_spillguard import log
 5 | 
 6 | 
 7 | def get_secret(name):
 8 |     """Retrieves secret from KMS using the name env variable."""
 9 |     log.info('Fetching secret from env var. VAR: {}'.format(name))
10 |     kms = boto3.session.Session().client("kms")
11 |     return kms.decrypt(CiphertextBlob=base64.b64decode(os.environ[name]))["Plaintext"]
12 | 


--------------------------------------------------------------------------------
/scumblr_spillguard/__about__.py:
--------------------------------------------------------------------------------
 1 | from __future__ import absolute_import, division, print_function
 2 | 
 3 | __all__ = [
 4 |     "__title__", "__summary__", "__uri__", "__version__", "__author__",
 5 |     "__email__", "__license__", "__copyright__",
 6 | ]
 7 | 
 8 | __title__ = "scumblr-spillguard"
 9 | __summary__ = ("Secret monitoring of github commits.")
10 | __uri__ = "https://github.com/Netflix-Skunkworks/scumblr-spillguard"
11 | 
12 | __version__ = "0.1.0"
13 | 
14 | __author__ = "The Scumblr developers"
15 | __email__ = "security@netflix.com"
16 | 
17 | __license__ = "Apache License, Version 2.0"
18 | __copyright__ = "Copyright 2018 {0}".format(__author__)


--------------------------------------------------------------------------------
/package.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "name": "scumblr-spillguard",
 3 |   "version": "1.0.0",
 4 |   "description": "A github webhook for Scumblr",
 5 |   "main": "index.js",
 6 |   "scripts": {
 7 |     "test": "python test.py"
 8 |   },
 9 |   "repository": {
10 |     "type": "git",
11 |     "url": "git+https://github.com/Netflix-Skunkworks/scumblr-spillguard.git"
12 |   },
13 |   "keywords": [
14 |     "python",
15 |     "aws",
16 |     "lambda",
17 |     "serverless"
18 |   ],
19 |   "author": "kevgliss",
20 |   "license": "Apache",
21 |   "bugs": {
22 |     "url": "https://github.com/Netflix-Skunkworks/scumblr-spillguard/issues"
23 |   },
24 |   "homepage": "https://github.com/Netflix-Skunkworks/scumblr-spillguard#readme",
25 |   "dependencies": {
26 |     "serverless-python-requirements": "^2.2.1"
27 |   }
28 | }


--------------------------------------------------------------------------------
/serverless.example.yml:
--------------------------------------------------------------------------------
 1 | service: scumbler-spillguard
 2 | 
 3 | provider:
 4 |   name: aws
 5 |   runtime: python3.6
 6 |   memorySize: 512
 7 |   timeout: 300
 8 |   awsKmsKeyArn: <YOUR-KMS-KEY-HERE>
 9 | 
10 | functions:
11 |   githubWebhookListener:
12 |     events:
13 |       - http:
14 |           path: github
15 |           method: post
16 |     handler: scumblr_spillguard.handler.github
17 |     description: Forwards matching events to Scumblr
18 |     vpc:
19 |       securityGroupIds:
20 |         - <YOUR-SECURITY-GROUP-ID-HERE>
21 |       subnetIds:
22 |         - <YOUR-SUBNET-IDS-HERE>
23 |     environment:
24 |       ENCRYPTED_GITHUB_TOKEN: <YOUR-KMS-ENCRYPTED-GITHUB-OAUTH-TOKEN-HERE>
25 |       ENCRYPTED_SCUMBLR_KEY: <YOUR-KMS-ENCRYPTED-SCUMBLR-CLIENT-KEY-HERE>
26 |       ENCRYPTED_WEBHOOK_SECRET: <YOUR-KMS-ENCRYPTED-GITHUB-WEBHOOK-HERE>
27 |       SCUMBLR_URL: <YOUR-SCUMBLR-URL-HERE>
28 | 
29 | plugins:
30 |   - serverless-python-requirements


--------------------------------------------------------------------------------
/scumblr_spillguard/utils.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import tempfile
 3 | import ipaddress
 4 | from contextlib import contextmanager
 5 | 
 6 | from scumblr_spillguard import log
 7 | from scumblr_spillguard.exceptions import AuthorizationError
 8 | 
 9 | 
10 | def validate_ip(source_ip, whitelist):
11 |     """Determine if we are getting a request from a whitelisted ip."""
12 |     log.debug("Validating source IP")
13 |     for cidr in whitelist:
14 |         if ipaddress.IPv4Address(source_ip) in ipaddress.IPv4Network(cidr):
15 |             log.debug("{} is in {}".format(source_ip, cidr))
16 |             return True
17 |         else:
18 |             log.debug("{} is NOT in {}".format(source_ip, cidr))
19 | 
20 |     raise AuthorizationError()
21 | 
22 | 
23 | @contextmanager
24 | def mktempfile():
25 |     with tempfile.NamedTemporaryFile(delete=False) as f:
26 |         name = f.name
27 |     try:
28 |         yield name
29 |     finally:
30 |         try:
31 |             os.unlink(name)
32 |         except OSError as e:
33 |             log.debug("No file {0}".format(name))
34 | 
35 | 
36 | 
37 | 


--------------------------------------------------------------------------------
/setup.py:
--------------------------------------------------------------------------------
 1 | """
 2 | Scumblr-Spillguard
 3 | ==================
 4 | 
 5 | Processes github commit events to look for known secrets.
 6 | """
 7 | import sys
 8 | import os.path
 9 | 
10 | from setuptools import setup, find_packages
11 | 
12 | 
13 | ROOT = os.path.realpath(os.path.join(os.path.dirname(__file__)))
14 | 
15 | # When executing the setup.py, we need to be able to import ourselves, this
16 | # means that we need to add the src/ directory to the sys.path.
17 | sys.path.insert(0, ROOT)
18 | 
19 | about = {}
20 | with open(os.path.join(ROOT, "scumblr_spillguard", "__about__.py")) as f:
21 |     exec(f.read(), about)
22 | 
23 | 
24 | install_requires = [
25 |     'requests==2.11.1',
26 |     'retrying',
27 |     'raven_python_lambda'
28 | ]
29 | 
30 | tests_require = [
31 |     'pytest',
32 |     'responses'
33 | ]
34 | 
35 | 
36 | setup(
37 |     name=about["__title__"],
38 |     version=about["__version__"],
39 |     author=about["__author__"],
40 |     author_email=about["__email__"],
41 |     url=about["__uri__"],
42 |     description=about["__summary__"],
43 |     long_description='See README.md',
44 |     packages=find_packages(),
45 |     include_package_data=True,
46 |     zip_safe=False,
47 |     install_requires=install_requires,
48 |     extras_require={
49 |         'tests': tests_require
50 |     },
51 |     keywords=['github', 'secret_management'],
52 |     classifiers=[
53 |         'Programming Language :: Python',
54 |         'Programming Language :: Python :: 3',
55 |         'Programming Language :: Python :: 3.6',
56 |     ],
57 | )


--------------------------------------------------------------------------------
/scumblr_spillguard/tests/test_spill.py:
--------------------------------------------------------------------------------
 1 | import pytest
 2 | from scumblr_spillguard.tests.vectors import *
 3 | from scumblr_spillguard.exceptions import *
 4 | 
 5 | 
 6 | def generate_github_hmac(event):
 7 |     import hmac
 8 |     import json
 9 |     import hashlib
10 |     from scumblr_spillguard.secrets import get_secret
11 | 
12 |     message_hmac = hmac.new(
13 |         get_secret('WEBHOOK_SECRET'),
14 |         json.dumps(event['body']).encode('utf-8'),
15 |         hashlib.sha1
16 |     )
17 | 
18 |     event['headers']['X-Hub-Signature'] = 'sha1=' + message_hmac.hexdigest()
19 |     return event
20 | 
21 | 
22 | def test_scumblr_get_config(mocked_env, github_scumblr_config):
23 |     from scumblr_spillguard.scumblr import get_config
24 |     assert get_config('GithubEventAnalyzer') == GITHUB_SCUMBLR_CONFIG_RESPONSE
25 | 
26 | 
27 | def test_scumblr_send_results(mocked_env, github_scumblr_result):
28 |     from scumblr_spillguard.scumblr import send_results
29 |     assert send_results() == {}
30 | 
31 | 
32 | def test_github_validate(mocked_env):
33 |     from scumblr_spillguard.github import validate
34 |     validate(generate_github_hmac(GITHUB_APIGATEWAY_EVENT))
35 | 
36 |     with pytest.raises(AuthorizationError):
37 |         e = GITHUB_APIGATEWAY_EVENT.copy()
38 |         e['requestContext']['identity']['sourceIp'] = '192.168.1.1'
39 |         validate(e)
40 | 
41 | 
42 | def test_github_authorize(mocked_env):
43 |     from scumblr_spillguard.github import authorize
44 | 
45 |     e = generate_github_hmac(GITHUB_APIGATEWAY_EVENT)
46 |     authorize(e['body'], e['headers'], e['requestContext']['identity']['sourceIp'])
47 | 
48 | 


--------------------------------------------------------------------------------
/scumblr_spillguard/bitbucket.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import json
 3 | import requests
 4 | from retrying import retry
 5 | 
 6 | from scumblr_spillguard import log
 7 | from scumblr_spillguard.secrets import get_secret
 8 | from scumblr_spillguard.exceptions import GeneralFailure
 9 | 
10 | 
11 | def get_rest_url(url):
12 |     """Breaks the base url of the event and reassembles them into a base rest url."""
13 |     rest = ['1.0', 'api', 'rest']
14 |     parts = url.split('/')[2:]
15 | 
16 |     for r in rest:
17 |         parts.insert(1, r)
18 | 
19 |     parts.insert(0, 'https:/')
20 |     return '/'.join(parts)
21 | 
22 | 
23 | def reconstruct_contents(lines):
24 |     return '\n'.join([l['text'] for l in lines['lines']])
25 | 
26 | 
27 | def get_file_url(url):
28 |     """Formats the file URL into something we can actually fetch."""
29 |     parts = url.split('/')[:-2]
30 |     parts.append('browse')
31 | 
32 |     sha, path = url.split('/')[-1:][0].split('#')
33 |     parts.append(path)
34 | 
35 |     url = '/'.join(parts) + '?at={}'.format(sha)
36 |     return url
37 | 
38 | 
39 | def request(url):
40 |     """Attempt to make a stash request."""
41 |     user = os.environ['BITBUCKET_USER']
42 |     password = get_secret('ENCRYPTED_BITBUCKET_PASSWORD').decode('utf-8')
43 | 
44 |     url = get_rest_url(url)
45 | 
46 |     log.debug('Bitbucket Request. Url: {} User: {}'.format(url, user))
47 |     response = requests.get(url, auth=(user, password))
48 | 
49 |     if not response.ok:
50 |         raise GeneralFailure('Request to Bitbucket failed. URL: {0}'.format(url))
51 | 
52 |     log.debug('Bitbucket Response. Status: {0} Data: {1}'.format(
53 |         response.status_code,
54 |         json.dumps(response.json(), indent=2)
55 |     ))
56 | 
57 |     return response.json()
58 | 


--------------------------------------------------------------------------------
/scumblr_spillguard/scumblr.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import json
 3 | import requests
 4 | 
 5 | from scumblr_spillguard import log
 6 | from scumblr_spillguard.utils import mktempfile
 7 | from scumblr_spillguard.secrets import get_secret
 8 | from scumblr_spillguard.exceptions import GeneralFailure
 9 | 
10 | CWD = os.path.dirname(os.path.dirname(os.path.realpath(__file__)))
11 | 
12 | SCUMBLR_URL = os.environ["SCUMBLR_URL"]
13 | SCUMBLR_CLIENT_PATH = os.path.join(CWD, os.environ.get("SCUMBLR_CLIENT_PATH", "SCUMBLR_CLIENT.cert"))
14 | 
15 | 
16 | def get_config(name):
17 |     """Return the current scumblr task configuration."""
18 |     return request(
19 |         '/tasks/search?q[task_type_eq]=ScumblrTask::{0}&resolve_system_metadata=true'.format(
20 |             name
21 |         ))
22 | 
23 | 
24 | def send_results(results):
25 |     """Send analysis results back to scumblr."""
26 |     return request(
27 |         '/tasks/{task_id}/run'.format(task_id=results['task_id']), data=results)
28 | 
29 | 
30 | # TODO add retry logic here too?
31 | def request(url, data=None):
32 |     """Attempt to make a scumblr request."""
33 |     with mktempfile() as tmpfile:
34 |         with open(tmpfile, 'w') as f:
35 |             f.write(get_secret("ENCRYPTED_SCUMBLR_KEY").decode('utf-8'))
36 | 
37 |         if data:
38 |             data = json.dumps(data, indent=2)
39 |             log.debug("Scumblr Request. URL: {0} Data: {1}".format(
40 |                 url,
41 |                 data
42 |             ))
43 | 
44 |             response = requests.post(SCUMBLR_URL + url, cert=(
45 |                 SCUMBLR_CLIENT_PATH,
46 |                 tmpfile), data=data)
47 |         else:
48 |             log.debug("Scumblr Request. URL: {0}".format(
49 |                 url
50 |             ))
51 |             response = requests.get(SCUMBLR_URL + url, cert=(
52 |                 SCUMBLR_CLIENT_PATH,
53 |                 tmpfile))
54 | 
55 |     if not response.ok:
56 |         log.debug(response.content)
57 |         raise GeneralFailure("Request to Scumblr failed. URL: {0} Data: {1}".format(
58 |             url, data
59 |         ))
60 | 
61 |     log.debug("Scumblr Response. Status: {0}".format(
62 |         response.status_code,
63 |     ))
64 | 
65 |     if not data:
66 |         return response.json()
67 | 
68 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Scumblr GithubSpillGuard
 2 | [![serverless](http://public.serverless.com/badges/v3.svg)](http://www.serverless.com)
 3 | 
 4 | A simple github webhook integration. Uses Scumblr as a source of terms to check incomming commits for. If a hit is found, sends the results to Scumblr for remediation.
 5 | 
 6 | The service has a dependency on external packages (`requests` and `retrying`) and it exposes 1 REST API endpoints:
 7 | 
 8 | | **Endpoint** |**Description**|
 9 | |-------|------|
10 | | `POST /github` | Analyzes github commits based on Scumblr configuration  |
11 | 
12 | 
13 | ## Serverless Setup
14 | | **Step** | **Command** |**Description**|
15 | |---|-------|------|
16 | |  1. | `npm install -g serverless` | Install Serverless CLI  |
17 | |  2. | `npm install` | Install our package and it's dependencies |
18 | 
19 | 
20 | ## AWS Setup
21 | Serverless requires different IAM credentials to deploy depending what infrastructure exists. If we assume that you have never used serverless before you will need `admin` credentials to deploy this lambda.
22 | 
23 | 1. Create KMS Key
24 | 2. Create scumblr-spill-guard security group
25 | 
26 | 
27 | ## Configure serverless.yml
28 | Replace variables in the `serverless.yml` with your own.
29 | 
30 | ### KMS Encryption
31 | To encrypt your variables, with your KMS key run:
32 | 
33 | `aws kms encrypt --key-id <YOUR-KEY-ID> --plaintext fileb://ExamplePlaintextFile --output text --query CiphertextBlob`
34 | 
35 | 
36 | # Usage
37 | ## Deployment
38 | 
39 | 	sls deploy
40 | 
41 | ### Invocation
42 | 
43 | 	curl <host>/github
44 | 
45 | # Tips & Tricks
46 | 
47 | ### `help` command
48 | Just use it on anything:
49 | 
50 | 	sls  help
51 | or
52 | 
53 | 	sls <command> --help
54 | 
55 | ### `deploy function` command
56 | Deploy only one function:
57 | 
58 | 	sls deploy function -f <function-name>
59 | 
60 | ### `logs` command
61 | Tail the logs of a function:
62 | 
63 | 	sls logs -f <function-name> -t
64 | 
65 | ### `info` command
66 | Information about the service (stage, region, endpoints, functions):
67 | 
68 | 	sls info
69 | 
70 | ### `invoke` command
71 | Run a specific function with a provided input and get the logs
72 | 
73 | 	sls invoke -f <function-name> -p event.json -l
74 | 
75 | 
76 | ## Development
77 | | **Step** | **Command** |**Description**|
78 | |---|-------|------|
79 | |  1. | `mkvirtualenv posts` | Create virtual environment |
80 | |  2. | `pip install -r requirements.txt` | Install dependencies|
81 | 
82 | 
83 | # Thanks
84 | Big thanks to Jeremy for the project idea and initial implementation
85 | 
86 | 
87 | 


--------------------------------------------------------------------------------
/scumblr_spillguard/tests/conftest.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import boto3
 3 | import pytest
 4 | import responses
 5 | 
 6 | from moto import mock_kms
 7 | 
 8 | from scumblr_spillguard.tests.vectors import *
 9 | 
10 | 
11 | @pytest.fixture(scope='function')
12 | def mocked_env():
13 |     os.environ["SCUMBLR_URL"] = SCUMBLR_URL
14 |     os.environ["SCUMBLR_CERT_PATH"] = '.'
15 | 
16 |     with mock_kms():
17 |         client = boto3.client('kms')
18 |         os.environ["WEBHOOK_SECRET"] = client.encrypt(
19 |             KeyId='1234abcd-12ab-34cd-56ef-1234567890ab',
20 |             Plaintext=b'bytes'
21 |         )['CiphertextBlob'].decode('utf-8')
22 |         os.environ["GITHUB_OAUTH_TOKEN"] = client.encrypt(
23 |             KeyId='1234abcd-12ab-34cd-56ef-1234567890ac',
24 |             Plaintext=b'bytes'
25 |         )['CiphertextBlob'].decode('utf-8')
26 | 
27 |         os.environ["GITHUB_OAUTH_TOKEN"] = client.encrypt(
28 |             KeyId='1234abcd-12ab-34cd-56ef-1234567890ae',
29 |             Plaintext=b'bytes'
30 |         )['CiphertextBlob'].decode('utf-8')
31 | 
32 |         os.environ["SCUMBLR_KEY"] = client.encrypt(
33 |             KeyId='1234abcd-12ab-34cd-56ef-1234567890af',
34 |             Plaintext=b'bytes'
35 |         )['CiphertextBlob'].decode('utf-8')
36 |         yield
37 | 
38 | 
39 | @pytest.fixture(scope='function')
40 | def mocked_responses():
41 |     with responses.RequestsMock(assert_all_requests_are_fired=False) as rsps:
42 |         yield rsps
43 | 
44 | 
45 | @pytest.fixture(scope='function')
46 | def github_scumblr_config(mocked_responses):
47 |     url = '{}/tasks/search?q[task_type_eq]=ScumblrTask::GithubEventAnalyzer&resolve_system_metadata=true'
48 | 
49 |     mocked_responses.add(
50 |         responses.GET,
51 |         url.format(SCUMBLR_URL),
52 |         json=GITHUB_SCUMBLR_CONFIG_RESPONSE,
53 |         status=200
54 |     )
55 | 
56 | 
57 | @pytest.fixture(scope='function')
58 | def github_scumblr_result(mocked_responses):
59 |     url = '{}/tasks/105/run'
60 | 
61 |     mocked_responses.add(
62 |         responses.POST,
63 |         url.format(SCUMBLR_URL),
64 |         json={},
65 |         status=200
66 |     )
67 | 
68 | 
69 | @pytest.fixture(scope='function')
70 | def github_blob_response(mocked_responses):
71 |     url = '{}/'
72 | 
73 |     mocked_responses.add(
74 |         responses.GET,
75 |         url.format(GITHUB_URL),
76 |         json=GITHUB_BLOB_RESPONSE,
77 |         status=200
78 |     )
79 | 
80 | 
81 | @pytest.fixture(scope='function')
82 | def github_commit_response(mocked_responses):
83 |     url = '{}'
84 |     mocked_responses.add(
85 |         responses.GET,
86 |         url.format(GITHUB_URL),
87 |         json=GITHUB_COMMIT_RESPONSE,
88 |         status=200
89 |     )
90 | 


--------------------------------------------------------------------------------
/scumblr_spillguard/tests/vectors.py:
--------------------------------------------------------------------------------
 1 | SCUMBLR_URL = 'http://scumblr.test'
 2 | GITHUB_URL = 'http://api.github.com'
 3 | 
 4 | GITHUB_SCUMBLR_CONFIG_RESPONSE = [{
 5 |     'id': 105,
 6 |     'task_type': 'ScumblrTask::GithubEventAnalyzer',
 7 |     'options': {
 8 |         'severity': 'high',
 9 |         'github_terms': {
10 |             'exec': 'ProcessUtil.getRuntime().exec',
11 |             'exec1': 'Runtime.getRuntime().exec',
12 |             'slack token': 'xoxb',
13 |             'slack token1': 'xoxp-'
14 |         }
15 |     }
16 | }]
17 | 
18 | GITHUB_SCUMBLR_RESULT = {
19 |     'task_id': 105,
20 |     'config': GITHUB_SCUMBLR_CONFIG_RESPONSE[0],
21 |     'findings': [
22 |         {
23 |             'commit_id': '779e77e65338156c35f8e053d54f696d464a32e6',
24 |             'findings': [{
25 |                 'content_urls': 'https://api.github.com/repos/Netflix-Skunkworks/test-gh-spillguard/contents/kevin?ref=779e77e65338156c35f8e053d54f696d464a32e6',
26 |                 'hits': ['slack token']
27 |             }]
28 |         }]
29 | }
30 | 
31 | GITHUB_APIGATEWAY_EVENT = {
32 |     'resource': '/github',
33 |     'headers': {
34 |         'X-Hub-Signature': 'sha1=6af368b8a04b7a39d6469ef4faaab2721f07f177'
35 |     },
36 |     'requestContext': {
37 |         'identity': {
38 |             'sourceIp': '192.30.252.3'
39 |         }
40 |     },
41 |     'body': {
42 |         'sha': '6af368b8a04b7a39d6469ef4faaab2721f07f177',
43 |         'size': 91,
44 |         'url': 'https://api.github.com/repos/Netflix-Skunkworks/test-gh-spillguard/git/blobs/6af368b8a04b7a39d6469ef4faaab2721f07f177',
45 |         'content': 'YWxrZmphc2RrbGZhanMKc2FmbGtmc2FqbGtmYXNqeG94Yi0KCmxmYXNqbGtm\nYWpmc2EKZnNhbGtzYWZqbGtmYWpzCmZzYWpmc2FrbGpmc2Fsa3NhZgp0ZXN0\nCg==\n',
46 |         'encoding': 'base64'
47 |     }
48 | }
49 | 
50 | GITHUB_COMMIT_RESPONSE = {
51 |     'sha': '779e77e65338156c35f8e053d54f696d464a32e6',
52 |     'url': 'https://api.github.com/repos/Netflix-Skunkworks/test-gh-spillguard/commits/779e77e65338156c35f8e053d54f696d464a32e6',
53 |     'files': [
54 |         {
55 |             'sha': '6af368b8a04b7a39d6469ef4faaab2721f07f177',
56 |             'blob_url': 'https://github.com/Netflix-Skunkworks/test-gh-spillguard/blob/779e77e65338156c35f8e053d54f696d464a32e6/kevin',
57 |             'raw_url': 'https://github.com/Netflix-Skunkworks/test-gh-spillguard/raw/779e77e65338156c35f8e053d54f696d464a32e6/kevin',
58 |             'contents_url': 'https://api.github.com/repos/Netflix-Skunkworks/test-gh-spillguard/contents/kevin?ref=779e77e65338156c35f8e053d54f696d464a32e6',
59 |             'patch': '@@ -4,3 +4,4 @@ saflkfsajlkfasjxoxb-\n lfasjlkfajfsa\n fsalksafjlkfajs\n fsajfsakljfsalksaf\n+test'
60 |         }
61 |     ]
62 | }
63 | 
64 | ROCKETCI_SNS_EVENT = {}
65 | 
66 | BITBUCKET_APIGATEWAY_EVENT = {}
67 | 


--------------------------------------------------------------------------------
/scumblr_spillguard/github.py:
--------------------------------------------------------------------------------
 1 | import hmac
 2 | import json
 3 | import hashlib
 4 | import requests
 5 | from retrying import retry
 6 | 
 7 | from scumblr_spillguard import log
 8 | from scumblr_spillguard.utils import validate_ip
 9 | from scumblr_spillguard.secrets import get_secret
10 | from scumblr_spillguard.exceptions import GeneralFailure, ThrottledError, AuthorizationError
11 | 
12 | 
13 | GITHUB_CIDR_WHITELIST = ['192.30.252.0/22', '185.199.108.0/22', '140.82.112.0/20']
14 | 
15 | 
16 | def github_thottled(exception):
17 |     """We should retry if we think we can successfully complete the request within the lambda timeout."""
18 |     log.exception(exception)
19 |     return isinstance(exception, ThrottledError)
20 | 
21 | 
22 | def validate(event):
23 |     """Ensure the incoming event is a github event."""
24 |     authorize(event['body'], event['headers'], event['requestContext']['identity']['sourceIp'])
25 |     if event.get('resource') == '/github':
26 |         if event.get('requestContext'):
27 |             if event['requestContext'].get('identity'):
28 |                 if event['requestContext']['identity'].get('userAgent'):
29 |                     if event['requestContext']['identity']['userAgent'].startswith('GitHub-Hookshot'):
30 |                         return
31 | 
32 |     raise GeneralFailure('Invalid event. Event: {}'.format(event))
33 | 
34 | 
35 | def authorize(body, headers, source_ip):
36 |     """Ensures that we have a valid github webhook."""
37 |     validate_ip(source_ip, GITHUB_CIDR_WHITELIST)
38 | 
39 |     sha_name, signature = headers['X-Hub-Signature'].split('=')
40 |     if sha_name != 'sha1':
41 |         raise AuthorizationError('Signature algorithm is not SHA1')
42 | 
43 |     message_hmac = hmac.new(
44 |         get_secret('ENCRYPTED_WEBHOOK_SECRET'),
45 |         body.encode('utf-8'),
46 |         hashlib.sha1
47 |     )
48 | 
49 |     if not hmac.compare_digest(signature, message_hmac.hexdigest()):
50 |         raise AuthorizationError('Computed HMAC {} does not match signature {}'.format(message_hmac.hexdigest(), signature))
51 | 
52 |     log.debug('Computed HMAC {} matches signature {}'.format(message_hmac.hexdigest(), signature))
53 | 
54 | 
55 | @retry(retry_on_exception=github_thottled, wait_random_min=1000, wait_random_max=10000)
56 | def request(url):
57 |     """Attempt to make a Github request."""
58 |     params = {'access_token': get_secret('ENCRYPTED_GITHUB_TOKEN')}
59 | 
60 |     log.debug('Github Request. Url: {}'.format(url))
61 | 
62 |     response = requests.get(url, params=params)
63 | 
64 |     if not response.ok:
65 |         raise GeneralFailure('Request to Github failed. URL: {0}'.format(url))
66 | 
67 |     if response.headers['X-RateLimit-Remaining'] == 0:
68 |         log.info('Throttled by Github. X-RateLimit-Limit: {0}'.format(
69 |             response.headers['X-RateLimit-Limit']))
70 |         raise ThrottledError()
71 | 
72 |     log.debug('Github Response. Status: {0} Data: {1}'.format(
73 |         response.status_code,
74 |         json.dumps(response.json(), indent=2)
75 |     ))
76 | 
77 |     return response.json()
78 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | 
  2 | # Created by https://www.gitignore.io/api/python,visualstudiocode,node,serverless
  3 | 
  4 | .idea
  5 | *.cert
  6 | *.key
  7 | 
  8 | ### Node ###
  9 | # Logs
 10 | logs
 11 | *.log
 12 | npm-debug.log*
 13 | yarn-debug.log*
 14 | yarn-error.log*
 15 | 
 16 | # Runtime data
 17 | pids
 18 | *.pid
 19 | *.seed
 20 | *.pid.lock
 21 | 
 22 | # Directory for instrumented libs generated by jscoverage/JSCover
 23 | lib-cov
 24 | 
 25 | # Coverage directory used by tools like istanbul
 26 | coverage
 27 | 
 28 | # nyc test coverage
 29 | .nyc_output
 30 | 
 31 | # Grunt intermediate storage (http://gruntjs.com/creating-plugins#storing-task-files)
 32 | .grunt
 33 | 
 34 | # Bower dependency directory (https://bower.io/)
 35 | bower_components
 36 | 
 37 | # node-waf configuration
 38 | .lock-wscript
 39 | 
 40 | # Compiled binary addons (http://nodejs.org/api/addons.html)
 41 | build/Release
 42 | 
 43 | # Dependency directories
 44 | node_modules/
 45 | jspm_packages/
 46 | 
 47 | # Typescript v1 declaration files
 48 | typings/
 49 | 
 50 | # Optional npm cache directory
 51 | .npm
 52 | 
 53 | # Optional eslint cache
 54 | .eslintcache
 55 | 
 56 | # Optional REPL history
 57 | .node_repl_history
 58 | 
 59 | # Output of 'npm pack'
 60 | *.tgz
 61 | 
 62 | # Yarn Integrity file
 63 | .yarn-integrity
 64 | 
 65 | # dotenv environment variables file
 66 | .env
 67 | 
 68 | 
 69 | ### Python ###
 70 | # Byte-compiled / optimized / DLL files
 71 | __pycache__/
 72 | *.py[cod]
 73 | *$py.class
 74 | 
 75 | # C extensions
 76 | *.so
 77 | 
 78 | # Distribution / packaging
 79 | .Python
 80 | env/
 81 | build/
 82 | develop-eggs/
 83 | dist/
 84 | downloads/
 85 | eggs/
 86 | .eggs/
 87 | lib/
 88 | lib64/
 89 | parts/
 90 | sdist/
 91 | var/
 92 | wheels/
 93 | *.egg-info/
 94 | .installed.cfg
 95 | *.egg
 96 | 
 97 | # PyInstaller
 98 | #  Usually these files are written by a python script from a template
 99 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
100 | *.manifest
101 | *.spec
102 | 
103 | # Installer logs
104 | pip-log.txt
105 | pip-delete-this-directory.txt
106 | 
107 | # Unit test / coverage reports
108 | htmlcov/
109 | .tox/
110 | .coverage
111 | .coverage.*
112 | .cache
113 | nosetests.xml
114 | coverage.xml
115 | *,cover
116 | .hypothesis/
117 | 
118 | # Translations
119 | *.mo
120 | *.pot
121 | 
122 | # Django stuff:
123 | local_settings.py
124 | 
125 | # Flask stuff:
126 | instance/
127 | .webassets-cache
128 | 
129 | # Scrapy stuff:
130 | .scrapy
131 | 
132 | # Sphinx documentation
133 | docs/_build/
134 | 
135 | # PyBuilder
136 | target/
137 | 
138 | # Jupyter Notebook
139 | .ipynb_checkpoints
140 | 
141 | # pyenv
142 | .python-version
143 | 
144 | # celery beat schedule file
145 | celerybeat-schedule
146 | 
147 | # SageMath parsed files
148 | *.sage.py
149 | 
150 | # dotenv
151 | 
152 | # virtualenv
153 | .venv
154 | venv/
155 | ENV/
156 | 
157 | # Spyder project settings
158 | .spyderproject
159 | .spyproject
160 | 
161 | # Rope project settings
162 | .ropeproject
163 | 
164 | # mkdocs documentation
165 | /site
166 | 
167 | ### Serverless ###
168 | # Ignore build directory
169 | .serverless
170 | .requirements
171 | 
172 | *.test.yml
173 | serverless.yml
174 | serverless_configs/*
175 | 
176 | 
177 | ### VisualStudioCode ###
178 | .vscode
179 | 
180 | # End of https://www.gitignore.io/api/python,visualstudiocode,node,serverless


--------------------------------------------------------------------------------
/scumblr_spillguard/handler.py:
--------------------------------------------------------------------------------
  1 | import re
  2 | import json
  3 | import base64
  4 | from raven_python_lambda import RavenLambdaWrapper
  5 | 
  6 | from scumblr_spillguard import log
  7 | from scumblr_spillguard import scumblr, github, bitbucket
  8 | 
  9 | 
 10 | def find_violations(contents, terms):
 11 |     """Find any violations in a given file."""
 12 |     hits = []
 13 | 
 14 |     for name, pattern in terms.items():
 15 |         match = re.search(pattern, contents, flags=re.MULTILINE | re.DOTALL)
 16 | 
 17 |         if match:
 18 |             log.debug("Contents hit on pattern {}".format(pattern))
 19 |             hits.append(name)
 20 | 
 21 |     return hits
 22 | 
 23 | 
 24 | def process_task_configs(commit, configs):
 25 |     """Iterates over all items in config analyzing each.
 26 | 
 27 |     Scumblr expects the following format::
 28 | 
 29 |     {
 30 |       "task_id": 105,
 31 |       "config": {
 32 |         "id": 105,
 33 |         "task_type": "ScumblrTask::GithubEventAnalyzer",
 34 |         "options": {
 35 |           "severity": "high",
 36 |           "github_terms": {
 37 |             "exec": "ProcessUtil.getRuntime().exec",
 38 |             "exec1": "Runtime.getRuntime().exec",
 39 |             "slack token": "xoxb",
 40 |             "slack token1": "xoxp-"
 41 |           }
 42 |         }
 43 |       },
 44 |       "commit": {
 45 |         "ref": "refs/heads/master",
 46 |         "head_commit": {
 47 |           "committer": {
 48 |             "name": "GitHub",
 49 |             "email": "noreply@github.com",
 50 |             "username": "web-flow"
 51 |           }
 52 |         },
 53 |         "repository": {
 54 |           "html_url": "https://github.com/Netflix-Skunkworks/test-gh-spillguard"
 55 |         }
 56 |       },
 57 |       "hits": true,
 58 |       "findings": [
 59 |         {
 60 |           "commit_id": "74ff78c232c8d8516f42c69767d30b5ef37e4041",
 61 |           "findings": [
 62 |             {
 63 |               "content_urls": "https://api.github.com/repos/Netflix-Skunkworks/test-gh-spillguard/contents/test1?ref=74ff78c232c8d8516f42c69767d30b5ef37e4041",
 64 |               "hits": [
 65 |                 "slack token"
 66 |               ]
 67 |             }
 68 |           ]
 69 |         }
 70 |       ]
 71 |     }
 72 | 
 73 |     """
 74 |     for config in configs:
 75 |         result = {
 76 |             'task_id': config['id'],
 77 |             'task_type': config['task_type'],
 78 |             'findings': [],
 79 |             'config': {
 80 |                 'id': config['id'],
 81 |                 'task_type': config['task_type'],
 82 |                 'options': config['options']
 83 |             },
 84 |             'commit': {
 85 |                 'ref': commit['ref'],
 86 |                 'head_commit': {
 87 |                     'committer': commit['committer']
 88 |                 },
 89 |                 'repository': {'html_url': commit['html_url']}
 90 |             }
 91 |         }
 92 | 
 93 |         log.info('Working on config. Config: {0}'.format(
 94 |             json.dumps(config, indent=2)
 95 |         ))
 96 | 
 97 |         hits = find_violations(commit['contents'],
 98 |                                config['options']['github_terms'])  # todo 'github_terms' should be generic 'terms'
 99 | 
100 |         if hits:
101 |             result['findings'].append(
102 |                 {
103 |                     'commit_id': commit['sha'],
104 |                     'findings': [
105 |                         {
106 |                             'hits': hits,
107 |                             'content_urls': commit['contents_url'].split('?')[0]
108 |                         }
109 |                     ]}
110 |             )
111 | 
112 |         if result['findings']:
113 |             log.error('Has findings. Sending result to scumblr. Result: {0}'.format(json.dumps(result, indent=2)))
114 |             scumblr.send_results(result)
115 | 
116 |         log.warning('Finished working on config. Result: {0}'.format(
117 |             json.dumps(result, indent=2),
118 |         ))
119 | 
120 | 
121 | @RavenLambdaWrapper()
122 | def github_handler(event, context):
123 |     """
124 |     Handles the processing of Github commit events.
125 | 
126 |     The general flow of processing is as follows::
127 | 
128 |     1) Receive Github Webhook event.
129 |     2) Validate event for SourceIp, User-Agent and HMAC digest using a pre-shared secret.
130 |     3) Fetch terms from Scumblr for processing.
131 |     4) Fetch commit information from Github.
132 |     5) Fetch full file information via the blob api.
133 |     6) Analyze blob with terms defined by the Scumblr configuration.
134 |     7) Return analysis results to Scumblr.
135 |     """
136 |     log.debug('Entering lambda handler with event: {}'.format(json.dumps(event, indent=2)))
137 | 
138 |     # github has a very low timeout (10s) we make sure that we can prewarm our function to prevent
139 |     # the service from timing out
140 |     if event.get('source') == 'aws.events':
141 |         return {'statusCode': '200', 'body': '{}'}
142 | 
143 |     github.validate(event)
144 |     body = json.loads(event['body'])
145 | 
146 |     commit_url = body['repository']['commits_url'][:-len('{/sha}')]
147 |     blobs_url = body['repository']['blobs_url'][:-len('{/sha}')]
148 | 
149 |     # get search terms from scumblr
150 |     config = scumblr.get_config('GithubEventAnalyzer')
151 | 
152 |     log.debug('Body contains {} commits'.format(len(body['commits'])))
153 | 
154 |     for c in body['commits']:
155 |         commit_data = github.request(commit_url + '/' + c['id'])
156 | 
157 |         for f in commit_data['files']:
158 |             data = github.request(blobs_url + '/' + f['sha'])['content']
159 |             try:
160 |                 commit_data['contents'] = base64.b64decode(data).decode('utf-8', 'ignore')
161 |                 commit_data['contents_url'] = f['contents_url']
162 |                 commit_data['committer'] = c['committer']
163 |                 commit_data['ref'] = body['ref']
164 |                 commit_data['html_url'] = body['repository']['html_url']
165 |             except Exception as e:
166 |                 log.exception(e)
167 |                 continue
168 | 
169 |             process_task_configs(commit_data, config)
170 | 
171 |     return {'statusCode': '200', 'body': '{}'}
172 | 
173 | 
174 | @RavenLambdaWrapper()
175 | def rocketci_handler(event, context):
176 |     """
177 |     Handles processing of RocketCI commit events.
178 | 
179 |     The general flow of processing is as follows::
180 | 
181 |     1) Receive RocketCI event.
182 |     2) Fetch terms from Scumblr for processing.
183 |     3) Fetch commit information from Stash/Bitbucket.
184 |     4) Fetch full file information via api.
185 |     5) Analyze blob with terms defined by the Scumblr configuration.
186 |     6) Return analysis results to Scumblr.
187 | 
188 |     :param event:
189 |     :param context:
190 |     :return:
191 |     """
192 |     log.debug('Entering lambda handler with event: {}'.format(json.dumps(event, indent=2)))
193 |     for r in event['Records']:
194 |         body = json.loads(r['Sns']['Message'])
195 |         if body.get('eventSource') == 'stash-stable':
196 |             if body.get('codeEventType') == 'create_commit':
197 |                 # get search terms from scumblr
198 |                 log.debug('Got Message: {}'.format(json.dumps(body, indent=2)))
199 |                 config = scumblr.get_config('GithubEventAnalyzer')  # TODO separate out terms
200 |                 commit_data = bitbucket.request(body['source']['url'] + '/' + 'changes')
201 | 
202 |                 for f in commit_data['values']:
203 |                     file_url = bitbucket.get_file_url(f['links']['self'][0]['href'])
204 |                     data = bitbucket.reconstruct_contents(bitbucket.request(file_url))
205 | 
206 |                     # normalize commit data
207 |                     commit_data['contents'] = data
208 |                     commit_data['contents_url'] = file_url
209 |                     commit_data['sha'] = body['source']['sha']
210 |                     commit_data['committer'] = body['source']['author']['email']
211 |                     commit_data['ref'] = body['source']['refId']
212 |                     commit_data['html_url'] = body['source']['url']
213 | 
214 |                     # send to scumblr
215 |                     process_task_configs(commit_data, config)
216 | 
217 |     return {'statusCode': '200', 'body': '{}'}
218 | 
219 | 
220 | @RavenLambdaWrapper()
221 | def bitbucket_handler(event, context):
222 |     """
223 |     Handles processing of bitbucket commit events.
224 | 
225 |     The general flow of processing is as follows::
226 | 
227 |     1) Receive Bitbucket Webhook.
228 |     2) Validate event for SourceIp, User-Agent and HMAC digest using a pre-shared secret.
229 |     3) Fetch terms from Scumblr for processing.
230 |     4) Fetch commit information from Stash.
231 |     5) Fetch full file information via the blob api.
232 |     6) Analyze blob with terms defined by the Scumblr configuration.
233 |     7) Return analysis results to Scumblr.
234 | 
235 |     :param event:
236 |     :param context:
237 |     :return:
238 |     """
239 |     raise NotImplementedError
240 | 


--------------------------------------------------------------------------------