├── scumblr_spillguard ├── tests │ ├── __init__.py │ ├── test_spill.py │ ├── conftest.py │ └── vectors.py ├── exceptions.py ├── __init__.py ├── secrets.py ├── __about__.py ├── utils.py ├── bitbucket.py ├── scumblr.py ├── github.py └── handler.py ├── AUTHORS ├── things.txt ├── setup.cfg ├── package.json ├── serverless.example.yml ├── setup.py ├── README.md └── .gitignore /scumblr_spillguard/tests/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /AUTHORS: -------------------------------------------------------------------------------- 1 | Jeremy jeremy@murkylabs.com 2 | Kevin kevgliss@gmail.com -------------------------------------------------------------------------------- /things.txt: -------------------------------------------------------------------------------- 1 | aslkjfsa 2 | aslkjas 3 | salkjasf 4 | sflkjsxoxb- 5 | alksjlksa 6 | salsfakjfsa 7 | sfalkjsaflkfsa 8 | -------------------------------------------------------------------------------- /setup.cfg: -------------------------------------------------------------------------------- 1 | [metadata] 2 | description-file = README.md 3 | 4 | [wheel] 5 | universal = 0 6 | 7 | [egg_info] 8 | tag_build = 9 | tag_date = 0 10 | tag_svn_revision = 0 -------------------------------------------------------------------------------- /scumblr_spillguard/exceptions.py: -------------------------------------------------------------------------------- 1 | class GeneralFailure(Exception): 2 | pass 3 | 4 | 5 | class AuthenticationError(GeneralFailure): 6 | pass 7 | 8 | 9 | class AuthorizationError(GeneralFailure): 10 | pass 11 | 12 | 13 | class ThrottledError(GeneralFailure): 14 | pass -------------------------------------------------------------------------------- /scumblr_spillguard/__init__.py: -------------------------------------------------------------------------------- 1 | import os 2 | import logging 3 | 4 | logging.basicConfig() 5 | log = logging.getLogger() 6 | log.setLevel(os.environ.get('LOG_LEVEL', 'DEBUG')) 7 | 8 | logging.getLogger('boto3').setLevel(logging.CRITICAL) 9 | logging.getLogger('botocore').setLevel(logging.CRITICAL) 10 | -------------------------------------------------------------------------------- /scumblr_spillguard/secrets.py: -------------------------------------------------------------------------------- 1 | import os 2 | import base64 3 | import boto3 4 | from scumblr_spillguard import log 5 | 6 | 7 | def get_secret(name): 8 | """Retrieves secret from KMS using the name env variable.""" 9 | log.info('Fetching secret from env var. VAR: {}'.format(name)) 10 | kms = boto3.session.Session().client("kms") 11 | return kms.decrypt(CiphertextBlob=base64.b64decode(os.environ[name]))["Plaintext"] 12 | -------------------------------------------------------------------------------- /scumblr_spillguard/__about__.py: -------------------------------------------------------------------------------- 1 | from __future__ import absolute_import, division, print_function 2 | 3 | __all__ = [ 4 | "__title__", "__summary__", "__uri__", "__version__", "__author__", 5 | "__email__", "__license__", "__copyright__", 6 | ] 7 | 8 | __title__ = "scumblr-spillguard" 9 | __summary__ = ("Secret monitoring of github commits.") 10 | __uri__ = "https://github.com/Netflix-Skunkworks/scumblr-spillguard" 11 | 12 | __version__ = "0.1.0" 13 | 14 | __author__ = "The Scumblr developers" 15 | __email__ = "security@netflix.com" 16 | 17 | __license__ = "Apache License, Version 2.0" 18 | __copyright__ = "Copyright 2018 {0}".format(__author__) -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "scumblr-spillguard", 3 | "version": "1.0.0", 4 | "description": "A github webhook for Scumblr", 5 | "main": "index.js", 6 | "scripts": { 7 | "test": "python test.py" 8 | }, 9 | "repository": { 10 | "type": "git", 11 | "url": "git+https://github.com/Netflix-Skunkworks/scumblr-spillguard.git" 12 | }, 13 | "keywords": [ 14 | "python", 15 | "aws", 16 | "lambda", 17 | "serverless" 18 | ], 19 | "author": "kevgliss", 20 | "license": "Apache", 21 | "bugs": { 22 | "url": "https://github.com/Netflix-Skunkworks/scumblr-spillguard/issues" 23 | }, 24 | "homepage": "https://github.com/Netflix-Skunkworks/scumblr-spillguard#readme", 25 | "dependencies": { 26 | "serverless-python-requirements": "^2.2.1" 27 | } 28 | } -------------------------------------------------------------------------------- /serverless.example.yml: -------------------------------------------------------------------------------- 1 | service: scumbler-spillguard 2 | 3 | provider: 4 | name: aws 5 | runtime: python3.6 6 | memorySize: 512 7 | timeout: 300 8 | awsKmsKeyArn: 9 | 10 | functions: 11 | githubWebhookListener: 12 | events: 13 | - http: 14 | path: github 15 | method: post 16 | handler: scumblr_spillguard.handler.github 17 | description: Forwards matching events to Scumblr 18 | vpc: 19 | securityGroupIds: 20 | - 21 | subnetIds: 22 | - 23 | environment: 24 | ENCRYPTED_GITHUB_TOKEN: 25 | ENCRYPTED_SCUMBLR_KEY: 26 | ENCRYPTED_WEBHOOK_SECRET: 27 | SCUMBLR_URL: 28 | 29 | plugins: 30 | - serverless-python-requirements -------------------------------------------------------------------------------- /scumblr_spillguard/utils.py: -------------------------------------------------------------------------------- 1 | import os 2 | import tempfile 3 | import ipaddress 4 | from contextlib import contextmanager 5 | 6 | from scumblr_spillguard import log 7 | from scumblr_spillguard.exceptions import AuthorizationError 8 | 9 | 10 | def validate_ip(source_ip, whitelist): 11 | """Determine if we are getting a request from a whitelisted ip.""" 12 | log.debug("Validating source IP") 13 | for cidr in whitelist: 14 | if ipaddress.IPv4Address(source_ip) in ipaddress.IPv4Network(cidr): 15 | log.debug("{} is in {}".format(source_ip, cidr)) 16 | return True 17 | else: 18 | log.debug("{} is NOT in {}".format(source_ip, cidr)) 19 | 20 | raise AuthorizationError() 21 | 22 | 23 | @contextmanager 24 | def mktempfile(): 25 | with tempfile.NamedTemporaryFile(delete=False) as f: 26 | name = f.name 27 | try: 28 | yield name 29 | finally: 30 | try: 31 | os.unlink(name) 32 | except OSError as e: 33 | log.debug("No file {0}".format(name)) 34 | 35 | 36 | 37 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | """ 2 | Scumblr-Spillguard 3 | ================== 4 | 5 | Processes github commit events to look for known secrets. 6 | """ 7 | import sys 8 | import os.path 9 | 10 | from setuptools import setup, find_packages 11 | 12 | 13 | ROOT = os.path.realpath(os.path.join(os.path.dirname(__file__))) 14 | 15 | # When executing the setup.py, we need to be able to import ourselves, this 16 | # means that we need to add the src/ directory to the sys.path. 17 | sys.path.insert(0, ROOT) 18 | 19 | about = {} 20 | with open(os.path.join(ROOT, "scumblr_spillguard", "__about__.py")) as f: 21 | exec(f.read(), about) 22 | 23 | 24 | install_requires = [ 25 | 'requests==2.11.1', 26 | 'retrying', 27 | 'raven_python_lambda' 28 | ] 29 | 30 | tests_require = [ 31 | 'pytest', 32 | 'responses' 33 | ] 34 | 35 | 36 | setup( 37 | name=about["__title__"], 38 | version=about["__version__"], 39 | author=about["__author__"], 40 | author_email=about["__email__"], 41 | url=about["__uri__"], 42 | description=about["__summary__"], 43 | long_description='See README.md', 44 | packages=find_packages(), 45 | include_package_data=True, 46 | zip_safe=False, 47 | install_requires=install_requires, 48 | extras_require={ 49 | 'tests': tests_require 50 | }, 51 | keywords=['github', 'secret_management'], 52 | classifiers=[ 53 | 'Programming Language :: Python', 54 | 'Programming Language :: Python :: 3', 55 | 'Programming Language :: Python :: 3.6', 56 | ], 57 | ) -------------------------------------------------------------------------------- /scumblr_spillguard/tests/test_spill.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from scumblr_spillguard.tests.vectors import * 3 | from scumblr_spillguard.exceptions import * 4 | 5 | 6 | def generate_github_hmac(event): 7 | import hmac 8 | import json 9 | import hashlib 10 | from scumblr_spillguard.secrets import get_secret 11 | 12 | message_hmac = hmac.new( 13 | get_secret('WEBHOOK_SECRET'), 14 | json.dumps(event['body']).encode('utf-8'), 15 | hashlib.sha1 16 | ) 17 | 18 | event['headers']['X-Hub-Signature'] = 'sha1=' + message_hmac.hexdigest() 19 | return event 20 | 21 | 22 | def test_scumblr_get_config(mocked_env, github_scumblr_config): 23 | from scumblr_spillguard.scumblr import get_config 24 | assert get_config('GithubEventAnalyzer') == GITHUB_SCUMBLR_CONFIG_RESPONSE 25 | 26 | 27 | def test_scumblr_send_results(mocked_env, github_scumblr_result): 28 | from scumblr_spillguard.scumblr import send_results 29 | assert send_results() == {} 30 | 31 | 32 | def test_github_validate(mocked_env): 33 | from scumblr_spillguard.github import validate 34 | validate(generate_github_hmac(GITHUB_APIGATEWAY_EVENT)) 35 | 36 | with pytest.raises(AuthorizationError): 37 | e = GITHUB_APIGATEWAY_EVENT.copy() 38 | e['requestContext']['identity']['sourceIp'] = '192.168.1.1' 39 | validate(e) 40 | 41 | 42 | def test_github_authorize(mocked_env): 43 | from scumblr_spillguard.github import authorize 44 | 45 | e = generate_github_hmac(GITHUB_APIGATEWAY_EVENT) 46 | authorize(e['body'], e['headers'], e['requestContext']['identity']['sourceIp']) 47 | 48 | -------------------------------------------------------------------------------- /scumblr_spillguard/bitbucket.py: -------------------------------------------------------------------------------- 1 | import os 2 | import json 3 | import requests 4 | from retrying import retry 5 | 6 | from scumblr_spillguard import log 7 | from scumblr_spillguard.secrets import get_secret 8 | from scumblr_spillguard.exceptions import GeneralFailure 9 | 10 | 11 | def get_rest_url(url): 12 | """Breaks the base url of the event and reassembles them into a base rest url.""" 13 | rest = ['1.0', 'api', 'rest'] 14 | parts = url.split('/')[2:] 15 | 16 | for r in rest: 17 | parts.insert(1, r) 18 | 19 | parts.insert(0, 'https:/') 20 | return '/'.join(parts) 21 | 22 | 23 | def reconstruct_contents(lines): 24 | return '\n'.join([l['text'] for l in lines['lines']]) 25 | 26 | 27 | def get_file_url(url): 28 | """Formats the file URL into something we can actually fetch.""" 29 | parts = url.split('/')[:-2] 30 | parts.append('browse') 31 | 32 | sha, path = url.split('/')[-1:][0].split('#') 33 | parts.append(path) 34 | 35 | url = '/'.join(parts) + '?at={}'.format(sha) 36 | return url 37 | 38 | 39 | def request(url): 40 | """Attempt to make a stash request.""" 41 | user = os.environ['BITBUCKET_USER'] 42 | password = get_secret('ENCRYPTED_BITBUCKET_PASSWORD').decode('utf-8') 43 | 44 | url = get_rest_url(url) 45 | 46 | log.debug('Bitbucket Request. Url: {} User: {}'.format(url, user)) 47 | response = requests.get(url, auth=(user, password)) 48 | 49 | if not response.ok: 50 | raise GeneralFailure('Request to Bitbucket failed. URL: {0}'.format(url)) 51 | 52 | log.debug('Bitbucket Response. Status: {0} Data: {1}'.format( 53 | response.status_code, 54 | json.dumps(response.json(), indent=2) 55 | )) 56 | 57 | return response.json() 58 | -------------------------------------------------------------------------------- /scumblr_spillguard/scumblr.py: -------------------------------------------------------------------------------- 1 | import os 2 | import json 3 | import requests 4 | 5 | from scumblr_spillguard import log 6 | from scumblr_spillguard.utils import mktempfile 7 | from scumblr_spillguard.secrets import get_secret 8 | from scumblr_spillguard.exceptions import GeneralFailure 9 | 10 | CWD = os.path.dirname(os.path.dirname(os.path.realpath(__file__))) 11 | 12 | SCUMBLR_URL = os.environ["SCUMBLR_URL"] 13 | SCUMBLR_CLIENT_PATH = os.path.join(CWD, os.environ.get("SCUMBLR_CLIENT_PATH", "SCUMBLR_CLIENT.cert")) 14 | 15 | 16 | def get_config(name): 17 | """Return the current scumblr task configuration.""" 18 | return request( 19 | '/tasks/search?q[task_type_eq]=ScumblrTask::{0}&resolve_system_metadata=true'.format( 20 | name 21 | )) 22 | 23 | 24 | def send_results(results): 25 | """Send analysis results back to scumblr.""" 26 | return request( 27 | '/tasks/{task_id}/run'.format(task_id=results['task_id']), data=results) 28 | 29 | 30 | # TODO add retry logic here too? 31 | def request(url, data=None): 32 | """Attempt to make a scumblr request.""" 33 | with mktempfile() as tmpfile: 34 | with open(tmpfile, 'w') as f: 35 | f.write(get_secret("ENCRYPTED_SCUMBLR_KEY").decode('utf-8')) 36 | 37 | if data: 38 | data = json.dumps(data, indent=2) 39 | log.debug("Scumblr Request. URL: {0} Data: {1}".format( 40 | url, 41 | data 42 | )) 43 | 44 | response = requests.post(SCUMBLR_URL + url, cert=( 45 | SCUMBLR_CLIENT_PATH, 46 | tmpfile), data=data) 47 | else: 48 | log.debug("Scumblr Request. URL: {0}".format( 49 | url 50 | )) 51 | response = requests.get(SCUMBLR_URL + url, cert=( 52 | SCUMBLR_CLIENT_PATH, 53 | tmpfile)) 54 | 55 | if not response.ok: 56 | log.debug(response.content) 57 | raise GeneralFailure("Request to Scumblr failed. URL: {0} Data: {1}".format( 58 | url, data 59 | )) 60 | 61 | log.debug("Scumblr Response. Status: {0}".format( 62 | response.status_code, 63 | )) 64 | 65 | if not data: 66 | return response.json() 67 | 68 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Scumblr GithubSpillGuard 2 | [![serverless](http://public.serverless.com/badges/v3.svg)](http://www.serverless.com) 3 | 4 | A simple github webhook integration. Uses Scumblr as a source of terms to check incomming commits for. If a hit is found, sends the results to Scumblr for remediation. 5 | 6 | The service has a dependency on external packages (`requests` and `retrying`) and it exposes 1 REST API endpoints: 7 | 8 | | **Endpoint** |**Description**| 9 | |-------|------| 10 | | `POST /github` | Analyzes github commits based on Scumblr configuration | 11 | 12 | 13 | ## Serverless Setup 14 | | **Step** | **Command** |**Description**| 15 | |---|-------|------| 16 | | 1. | `npm install -g serverless` | Install Serverless CLI | 17 | | 2. | `npm install` | Install our package and it's dependencies | 18 | 19 | 20 | ## AWS Setup 21 | Serverless requires different IAM credentials to deploy depending what infrastructure exists. If we assume that you have never used serverless before you will need `admin` credentials to deploy this lambda. 22 | 23 | 1. Create KMS Key 24 | 2. Create scumblr-spill-guard security group 25 | 26 | 27 | ## Configure serverless.yml 28 | Replace variables in the `serverless.yml` with your own. 29 | 30 | ### KMS Encryption 31 | To encrypt your variables, with your KMS key run: 32 | 33 | `aws kms encrypt --key-id --plaintext fileb://ExamplePlaintextFile --output text --query CiphertextBlob` 34 | 35 | 36 | # Usage 37 | ## Deployment 38 | 39 | sls deploy 40 | 41 | ### Invocation 42 | 43 | curl /github 44 | 45 | # Tips & Tricks 46 | 47 | ### `help` command 48 | Just use it on anything: 49 | 50 | sls help 51 | or 52 | 53 | sls --help 54 | 55 | ### `deploy function` command 56 | Deploy only one function: 57 | 58 | sls deploy function -f 59 | 60 | ### `logs` command 61 | Tail the logs of a function: 62 | 63 | sls logs -f -t 64 | 65 | ### `info` command 66 | Information about the service (stage, region, endpoints, functions): 67 | 68 | sls info 69 | 70 | ### `invoke` command 71 | Run a specific function with a provided input and get the logs 72 | 73 | sls invoke -f -p event.json -l 74 | 75 | 76 | ## Development 77 | | **Step** | **Command** |**Description**| 78 | |---|-------|------| 79 | | 1. | `mkvirtualenv posts` | Create virtual environment | 80 | | 2. | `pip install -r requirements.txt` | Install dependencies| 81 | 82 | 83 | # Thanks 84 | Big thanks to Jeremy for the project idea and initial implementation 85 | 86 | 87 | -------------------------------------------------------------------------------- /scumblr_spillguard/tests/conftest.py: -------------------------------------------------------------------------------- 1 | import os 2 | import boto3 3 | import pytest 4 | import responses 5 | 6 | from moto import mock_kms 7 | 8 | from scumblr_spillguard.tests.vectors import * 9 | 10 | 11 | @pytest.fixture(scope='function') 12 | def mocked_env(): 13 | os.environ["SCUMBLR_URL"] = SCUMBLR_URL 14 | os.environ["SCUMBLR_CERT_PATH"] = '.' 15 | 16 | with mock_kms(): 17 | client = boto3.client('kms') 18 | os.environ["WEBHOOK_SECRET"] = client.encrypt( 19 | KeyId='1234abcd-12ab-34cd-56ef-1234567890ab', 20 | Plaintext=b'bytes' 21 | )['CiphertextBlob'].decode('utf-8') 22 | os.environ["GITHUB_OAUTH_TOKEN"] = client.encrypt( 23 | KeyId='1234abcd-12ab-34cd-56ef-1234567890ac', 24 | Plaintext=b'bytes' 25 | )['CiphertextBlob'].decode('utf-8') 26 | 27 | os.environ["GITHUB_OAUTH_TOKEN"] = client.encrypt( 28 | KeyId='1234abcd-12ab-34cd-56ef-1234567890ae', 29 | Plaintext=b'bytes' 30 | )['CiphertextBlob'].decode('utf-8') 31 | 32 | os.environ["SCUMBLR_KEY"] = client.encrypt( 33 | KeyId='1234abcd-12ab-34cd-56ef-1234567890af', 34 | Plaintext=b'bytes' 35 | )['CiphertextBlob'].decode('utf-8') 36 | yield 37 | 38 | 39 | @pytest.fixture(scope='function') 40 | def mocked_responses(): 41 | with responses.RequestsMock(assert_all_requests_are_fired=False) as rsps: 42 | yield rsps 43 | 44 | 45 | @pytest.fixture(scope='function') 46 | def github_scumblr_config(mocked_responses): 47 | url = '{}/tasks/search?q[task_type_eq]=ScumblrTask::GithubEventAnalyzer&resolve_system_metadata=true' 48 | 49 | mocked_responses.add( 50 | responses.GET, 51 | url.format(SCUMBLR_URL), 52 | json=GITHUB_SCUMBLR_CONFIG_RESPONSE, 53 | status=200 54 | ) 55 | 56 | 57 | @pytest.fixture(scope='function') 58 | def github_scumblr_result(mocked_responses): 59 | url = '{}/tasks/105/run' 60 | 61 | mocked_responses.add( 62 | responses.POST, 63 | url.format(SCUMBLR_URL), 64 | json={}, 65 | status=200 66 | ) 67 | 68 | 69 | @pytest.fixture(scope='function') 70 | def github_blob_response(mocked_responses): 71 | url = '{}/' 72 | 73 | mocked_responses.add( 74 | responses.GET, 75 | url.format(GITHUB_URL), 76 | json=GITHUB_BLOB_RESPONSE, 77 | status=200 78 | ) 79 | 80 | 81 | @pytest.fixture(scope='function') 82 | def github_commit_response(mocked_responses): 83 | url = '{}' 84 | mocked_responses.add( 85 | responses.GET, 86 | url.format(GITHUB_URL), 87 | json=GITHUB_COMMIT_RESPONSE, 88 | status=200 89 | ) 90 | -------------------------------------------------------------------------------- /scumblr_spillguard/tests/vectors.py: -------------------------------------------------------------------------------- 1 | SCUMBLR_URL = 'http://scumblr.test' 2 | GITHUB_URL = 'http://api.github.com' 3 | 4 | GITHUB_SCUMBLR_CONFIG_RESPONSE = [{ 5 | 'id': 105, 6 | 'task_type': 'ScumblrTask::GithubEventAnalyzer', 7 | 'options': { 8 | 'severity': 'high', 9 | 'github_terms': { 10 | 'exec': 'ProcessUtil.getRuntime().exec', 11 | 'exec1': 'Runtime.getRuntime().exec', 12 | 'slack token': 'xoxb', 13 | 'slack token1': 'xoxp-' 14 | } 15 | } 16 | }] 17 | 18 | GITHUB_SCUMBLR_RESULT = { 19 | 'task_id': 105, 20 | 'config': GITHUB_SCUMBLR_CONFIG_RESPONSE[0], 21 | 'findings': [ 22 | { 23 | 'commit_id': '779e77e65338156c35f8e053d54f696d464a32e6', 24 | 'findings': [{ 25 | 'content_urls': 'https://api.github.com/repos/Netflix-Skunkworks/test-gh-spillguard/contents/kevin?ref=779e77e65338156c35f8e053d54f696d464a32e6', 26 | 'hits': ['slack token'] 27 | }] 28 | }] 29 | } 30 | 31 | GITHUB_APIGATEWAY_EVENT = { 32 | 'resource': '/github', 33 | 'headers': { 34 | 'X-Hub-Signature': 'sha1=6af368b8a04b7a39d6469ef4faaab2721f07f177' 35 | }, 36 | 'requestContext': { 37 | 'identity': { 38 | 'sourceIp': '192.30.252.3' 39 | } 40 | }, 41 | 'body': { 42 | 'sha': '6af368b8a04b7a39d6469ef4faaab2721f07f177', 43 | 'size': 91, 44 | 'url': 'https://api.github.com/repos/Netflix-Skunkworks/test-gh-spillguard/git/blobs/6af368b8a04b7a39d6469ef4faaab2721f07f177', 45 | 'content': 'YWxrZmphc2RrbGZhanMKc2FmbGtmc2FqbGtmYXNqeG94Yi0KCmxmYXNqbGtm\nYWpmc2EKZnNhbGtzYWZqbGtmYWpzCmZzYWpmc2FrbGpmc2Fsa3NhZgp0ZXN0\nCg==\n', 46 | 'encoding': 'base64' 47 | } 48 | } 49 | 50 | GITHUB_COMMIT_RESPONSE = { 51 | 'sha': '779e77e65338156c35f8e053d54f696d464a32e6', 52 | 'url': 'https://api.github.com/repos/Netflix-Skunkworks/test-gh-spillguard/commits/779e77e65338156c35f8e053d54f696d464a32e6', 53 | 'files': [ 54 | { 55 | 'sha': '6af368b8a04b7a39d6469ef4faaab2721f07f177', 56 | 'blob_url': 'https://github.com/Netflix-Skunkworks/test-gh-spillguard/blob/779e77e65338156c35f8e053d54f696d464a32e6/kevin', 57 | 'raw_url': 'https://github.com/Netflix-Skunkworks/test-gh-spillguard/raw/779e77e65338156c35f8e053d54f696d464a32e6/kevin', 58 | 'contents_url': 'https://api.github.com/repos/Netflix-Skunkworks/test-gh-spillguard/contents/kevin?ref=779e77e65338156c35f8e053d54f696d464a32e6', 59 | 'patch': '@@ -4,3 +4,4 @@ saflkfsajlkfasjxoxb-\n lfasjlkfajfsa\n fsalksafjlkfajs\n fsajfsakljfsalksaf\n+test' 60 | } 61 | ] 62 | } 63 | 64 | ROCKETCI_SNS_EVENT = {} 65 | 66 | BITBUCKET_APIGATEWAY_EVENT = {} 67 | -------------------------------------------------------------------------------- /scumblr_spillguard/github.py: -------------------------------------------------------------------------------- 1 | import hmac 2 | import json 3 | import hashlib 4 | import requests 5 | from retrying import retry 6 | 7 | from scumblr_spillguard import log 8 | from scumblr_spillguard.utils import validate_ip 9 | from scumblr_spillguard.secrets import get_secret 10 | from scumblr_spillguard.exceptions import GeneralFailure, ThrottledError, AuthorizationError 11 | 12 | 13 | GITHUB_CIDR_WHITELIST = ['192.30.252.0/22', '185.199.108.0/22', '140.82.112.0/20'] 14 | 15 | 16 | def github_thottled(exception): 17 | """We should retry if we think we can successfully complete the request within the lambda timeout.""" 18 | log.exception(exception) 19 | return isinstance(exception, ThrottledError) 20 | 21 | 22 | def validate(event): 23 | """Ensure the incoming event is a github event.""" 24 | authorize(event['body'], event['headers'], event['requestContext']['identity']['sourceIp']) 25 | if event.get('resource') == '/github': 26 | if event.get('requestContext'): 27 | if event['requestContext'].get('identity'): 28 | if event['requestContext']['identity'].get('userAgent'): 29 | if event['requestContext']['identity']['userAgent'].startswith('GitHub-Hookshot'): 30 | return 31 | 32 | raise GeneralFailure('Invalid event. Event: {}'.format(event)) 33 | 34 | 35 | def authorize(body, headers, source_ip): 36 | """Ensures that we have a valid github webhook.""" 37 | validate_ip(source_ip, GITHUB_CIDR_WHITELIST) 38 | 39 | sha_name, signature = headers['X-Hub-Signature'].split('=') 40 | if sha_name != 'sha1': 41 | raise AuthorizationError('Signature algorithm is not SHA1') 42 | 43 | message_hmac = hmac.new( 44 | get_secret('ENCRYPTED_WEBHOOK_SECRET'), 45 | body.encode('utf-8'), 46 | hashlib.sha1 47 | ) 48 | 49 | if not hmac.compare_digest(signature, message_hmac.hexdigest()): 50 | raise AuthorizationError('Computed HMAC {} does not match signature {}'.format(message_hmac.hexdigest(), signature)) 51 | 52 | log.debug('Computed HMAC {} matches signature {}'.format(message_hmac.hexdigest(), signature)) 53 | 54 | 55 | @retry(retry_on_exception=github_thottled, wait_random_min=1000, wait_random_max=10000) 56 | def request(url): 57 | """Attempt to make a Github request.""" 58 | params = {'access_token': get_secret('ENCRYPTED_GITHUB_TOKEN')} 59 | 60 | log.debug('Github Request. Url: {}'.format(url)) 61 | 62 | response = requests.get(url, params=params) 63 | 64 | if not response.ok: 65 | raise GeneralFailure('Request to Github failed. URL: {0}'.format(url)) 66 | 67 | if response.headers['X-RateLimit-Remaining'] == 0: 68 | log.info('Throttled by Github. X-RateLimit-Limit: {0}'.format( 69 | response.headers['X-RateLimit-Limit'])) 70 | raise ThrottledError() 71 | 72 | log.debug('Github Response. Status: {0} Data: {1}'.format( 73 | response.status_code, 74 | json.dumps(response.json(), indent=2) 75 | )) 76 | 77 | return response.json() 78 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | 2 | # Created by https://www.gitignore.io/api/python,visualstudiocode,node,serverless 3 | 4 | .idea 5 | *.cert 6 | *.key 7 | 8 | ### Node ### 9 | # Logs 10 | logs 11 | *.log 12 | npm-debug.log* 13 | yarn-debug.log* 14 | yarn-error.log* 15 | 16 | # Runtime data 17 | pids 18 | *.pid 19 | *.seed 20 | *.pid.lock 21 | 22 | # Directory for instrumented libs generated by jscoverage/JSCover 23 | lib-cov 24 | 25 | # Coverage directory used by tools like istanbul 26 | coverage 27 | 28 | # nyc test coverage 29 | .nyc_output 30 | 31 | # Grunt intermediate storage (http://gruntjs.com/creating-plugins#storing-task-files) 32 | .grunt 33 | 34 | # Bower dependency directory (https://bower.io/) 35 | bower_components 36 | 37 | # node-waf configuration 38 | .lock-wscript 39 | 40 | # Compiled binary addons (http://nodejs.org/api/addons.html) 41 | build/Release 42 | 43 | # Dependency directories 44 | node_modules/ 45 | jspm_packages/ 46 | 47 | # Typescript v1 declaration files 48 | typings/ 49 | 50 | # Optional npm cache directory 51 | .npm 52 | 53 | # Optional eslint cache 54 | .eslintcache 55 | 56 | # Optional REPL history 57 | .node_repl_history 58 | 59 | # Output of 'npm pack' 60 | *.tgz 61 | 62 | # Yarn Integrity file 63 | .yarn-integrity 64 | 65 | # dotenv environment variables file 66 | .env 67 | 68 | 69 | ### Python ### 70 | # Byte-compiled / optimized / DLL files 71 | __pycache__/ 72 | *.py[cod] 73 | *$py.class 74 | 75 | # C extensions 76 | *.so 77 | 78 | # Distribution / packaging 79 | .Python 80 | env/ 81 | build/ 82 | develop-eggs/ 83 | dist/ 84 | downloads/ 85 | eggs/ 86 | .eggs/ 87 | lib/ 88 | lib64/ 89 | parts/ 90 | sdist/ 91 | var/ 92 | wheels/ 93 | *.egg-info/ 94 | .installed.cfg 95 | *.egg 96 | 97 | # PyInstaller 98 | # Usually these files are written by a python script from a template 99 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 100 | *.manifest 101 | *.spec 102 | 103 | # Installer logs 104 | pip-log.txt 105 | pip-delete-this-directory.txt 106 | 107 | # Unit test / coverage reports 108 | htmlcov/ 109 | .tox/ 110 | .coverage 111 | .coverage.* 112 | .cache 113 | nosetests.xml 114 | coverage.xml 115 | *,cover 116 | .hypothesis/ 117 | 118 | # Translations 119 | *.mo 120 | *.pot 121 | 122 | # Django stuff: 123 | local_settings.py 124 | 125 | # Flask stuff: 126 | instance/ 127 | .webassets-cache 128 | 129 | # Scrapy stuff: 130 | .scrapy 131 | 132 | # Sphinx documentation 133 | docs/_build/ 134 | 135 | # PyBuilder 136 | target/ 137 | 138 | # Jupyter Notebook 139 | .ipynb_checkpoints 140 | 141 | # pyenv 142 | .python-version 143 | 144 | # celery beat schedule file 145 | celerybeat-schedule 146 | 147 | # SageMath parsed files 148 | *.sage.py 149 | 150 | # dotenv 151 | 152 | # virtualenv 153 | .venv 154 | venv/ 155 | ENV/ 156 | 157 | # Spyder project settings 158 | .spyderproject 159 | .spyproject 160 | 161 | # Rope project settings 162 | .ropeproject 163 | 164 | # mkdocs documentation 165 | /site 166 | 167 | ### Serverless ### 168 | # Ignore build directory 169 | .serverless 170 | .requirements 171 | 172 | *.test.yml 173 | serverless.yml 174 | serverless_configs/* 175 | 176 | 177 | ### VisualStudioCode ### 178 | .vscode 179 | 180 | # End of https://www.gitignore.io/api/python,visualstudiocode,node,serverless -------------------------------------------------------------------------------- /scumblr_spillguard/handler.py: -------------------------------------------------------------------------------- 1 | import re 2 | import json 3 | import base64 4 | from raven_python_lambda import RavenLambdaWrapper 5 | 6 | from scumblr_spillguard import log 7 | from scumblr_spillguard import scumblr, github, bitbucket 8 | 9 | 10 | def find_violations(contents, terms): 11 | """Find any violations in a given file.""" 12 | hits = [] 13 | 14 | for name, pattern in terms.items(): 15 | match = re.search(pattern, contents, flags=re.MULTILINE | re.DOTALL) 16 | 17 | if match: 18 | log.debug("Contents hit on pattern {}".format(pattern)) 19 | hits.append(name) 20 | 21 | return hits 22 | 23 | 24 | def process_task_configs(commit, configs): 25 | """Iterates over all items in config analyzing each. 26 | 27 | Scumblr expects the following format:: 28 | 29 | { 30 | "task_id": 105, 31 | "config": { 32 | "id": 105, 33 | "task_type": "ScumblrTask::GithubEventAnalyzer", 34 | "options": { 35 | "severity": "high", 36 | "github_terms": { 37 | "exec": "ProcessUtil.getRuntime().exec", 38 | "exec1": "Runtime.getRuntime().exec", 39 | "slack token": "xoxb", 40 | "slack token1": "xoxp-" 41 | } 42 | } 43 | }, 44 | "commit": { 45 | "ref": "refs/heads/master", 46 | "head_commit": { 47 | "committer": { 48 | "name": "GitHub", 49 | "email": "noreply@github.com", 50 | "username": "web-flow" 51 | } 52 | }, 53 | "repository": { 54 | "html_url": "https://github.com/Netflix-Skunkworks/test-gh-spillguard" 55 | } 56 | }, 57 | "hits": true, 58 | "findings": [ 59 | { 60 | "commit_id": "74ff78c232c8d8516f42c69767d30b5ef37e4041", 61 | "findings": [ 62 | { 63 | "content_urls": "https://api.github.com/repos/Netflix-Skunkworks/test-gh-spillguard/contents/test1?ref=74ff78c232c8d8516f42c69767d30b5ef37e4041", 64 | "hits": [ 65 | "slack token" 66 | ] 67 | } 68 | ] 69 | } 70 | ] 71 | } 72 | 73 | """ 74 | for config in configs: 75 | result = { 76 | 'task_id': config['id'], 77 | 'task_type': config['task_type'], 78 | 'findings': [], 79 | 'config': { 80 | 'id': config['id'], 81 | 'task_type': config['task_type'], 82 | 'options': config['options'] 83 | }, 84 | 'commit': { 85 | 'ref': commit['ref'], 86 | 'head_commit': { 87 | 'committer': commit['committer'] 88 | }, 89 | 'repository': {'html_url': commit['html_url']} 90 | } 91 | } 92 | 93 | log.info('Working on config. Config: {0}'.format( 94 | json.dumps(config, indent=2) 95 | )) 96 | 97 | hits = find_violations(commit['contents'], 98 | config['options']['github_terms']) # todo 'github_terms' should be generic 'terms' 99 | 100 | if hits: 101 | result['findings'].append( 102 | { 103 | 'commit_id': commit['sha'], 104 | 'findings': [ 105 | { 106 | 'hits': hits, 107 | 'content_urls': commit['contents_url'].split('?')[0] 108 | } 109 | ]} 110 | ) 111 | 112 | if result['findings']: 113 | log.error('Has findings. Sending result to scumblr. Result: {0}'.format(json.dumps(result, indent=2))) 114 | scumblr.send_results(result) 115 | 116 | log.warning('Finished working on config. Result: {0}'.format( 117 | json.dumps(result, indent=2), 118 | )) 119 | 120 | 121 | @RavenLambdaWrapper() 122 | def github_handler(event, context): 123 | """ 124 | Handles the processing of Github commit events. 125 | 126 | The general flow of processing is as follows:: 127 | 128 | 1) Receive Github Webhook event. 129 | 2) Validate event for SourceIp, User-Agent and HMAC digest using a pre-shared secret. 130 | 3) Fetch terms from Scumblr for processing. 131 | 4) Fetch commit information from Github. 132 | 5) Fetch full file information via the blob api. 133 | 6) Analyze blob with terms defined by the Scumblr configuration. 134 | 7) Return analysis results to Scumblr. 135 | """ 136 | log.debug('Entering lambda handler with event: {}'.format(json.dumps(event, indent=2))) 137 | 138 | # github has a very low timeout (10s) we make sure that we can prewarm our function to prevent 139 | # the service from timing out 140 | if event.get('source') == 'aws.events': 141 | return {'statusCode': '200', 'body': '{}'} 142 | 143 | github.validate(event) 144 | body = json.loads(event['body']) 145 | 146 | commit_url = body['repository']['commits_url'][:-len('{/sha}')] 147 | blobs_url = body['repository']['blobs_url'][:-len('{/sha}')] 148 | 149 | # get search terms from scumblr 150 | config = scumblr.get_config('GithubEventAnalyzer') 151 | 152 | log.debug('Body contains {} commits'.format(len(body['commits']))) 153 | 154 | for c in body['commits']: 155 | commit_data = github.request(commit_url + '/' + c['id']) 156 | 157 | for f in commit_data['files']: 158 | data = github.request(blobs_url + '/' + f['sha'])['content'] 159 | try: 160 | commit_data['contents'] = base64.b64decode(data).decode('utf-8', 'ignore') 161 | commit_data['contents_url'] = f['contents_url'] 162 | commit_data['committer'] = c['committer'] 163 | commit_data['ref'] = body['ref'] 164 | commit_data['html_url'] = body['repository']['html_url'] 165 | except Exception as e: 166 | log.exception(e) 167 | continue 168 | 169 | process_task_configs(commit_data, config) 170 | 171 | return {'statusCode': '200', 'body': '{}'} 172 | 173 | 174 | @RavenLambdaWrapper() 175 | def rocketci_handler(event, context): 176 | """ 177 | Handles processing of RocketCI commit events. 178 | 179 | The general flow of processing is as follows:: 180 | 181 | 1) Receive RocketCI event. 182 | 2) Fetch terms from Scumblr for processing. 183 | 3) Fetch commit information from Stash/Bitbucket. 184 | 4) Fetch full file information via api. 185 | 5) Analyze blob with terms defined by the Scumblr configuration. 186 | 6) Return analysis results to Scumblr. 187 | 188 | :param event: 189 | :param context: 190 | :return: 191 | """ 192 | log.debug('Entering lambda handler with event: {}'.format(json.dumps(event, indent=2))) 193 | for r in event['Records']: 194 | body = json.loads(r['Sns']['Message']) 195 | if body.get('eventSource') == 'stash-stable': 196 | if body.get('codeEventType') == 'create_commit': 197 | # get search terms from scumblr 198 | log.debug('Got Message: {}'.format(json.dumps(body, indent=2))) 199 | config = scumblr.get_config('GithubEventAnalyzer') # TODO separate out terms 200 | commit_data = bitbucket.request(body['source']['url'] + '/' + 'changes') 201 | 202 | for f in commit_data['values']: 203 | file_url = bitbucket.get_file_url(f['links']['self'][0]['href']) 204 | data = bitbucket.reconstruct_contents(bitbucket.request(file_url)) 205 | 206 | # normalize commit data 207 | commit_data['contents'] = data 208 | commit_data['contents_url'] = file_url 209 | commit_data['sha'] = body['source']['sha'] 210 | commit_data['committer'] = body['source']['author']['email'] 211 | commit_data['ref'] = body['source']['refId'] 212 | commit_data['html_url'] = body['source']['url'] 213 | 214 | # send to scumblr 215 | process_task_configs(commit_data, config) 216 | 217 | return {'statusCode': '200', 'body': '{}'} 218 | 219 | 220 | @RavenLambdaWrapper() 221 | def bitbucket_handler(event, context): 222 | """ 223 | Handles processing of bitbucket commit events. 224 | 225 | The general flow of processing is as follows:: 226 | 227 | 1) Receive Bitbucket Webhook. 228 | 2) Validate event for SourceIp, User-Agent and HMAC digest using a pre-shared secret. 229 | 3) Fetch terms from Scumblr for processing. 230 | 4) Fetch commit information from Stash. 231 | 5) Fetch full file information via the blob api. 232 | 6) Analyze blob with terms defined by the Scumblr configuration. 233 | 7) Return analysis results to Scumblr. 234 | 235 | :param event: 236 | :param context: 237 | :return: 238 | """ 239 | raise NotImplementedError 240 | --------------------------------------------------------------------------------