├── src ├── libs │ ├── __init__.py │ ├── __pycache__ │ │ ├── github.cpython-37.pyc │ │ ├── init.cpython-37.pyc │ │ ├── regexs.cpython-37.pyc │ │ ├── rules.cpython-37.pyc │ │ ├── slack.cpython-37.pyc │ │ ├── utils.cpython-37.pyc │ │ ├── __init__.cpython-37.pyc │ │ └── directory.cpython-37.pyc │ ├── init.py │ ├── slack.py │ ├── rules.py │ ├── directory.py │ ├── utils.py │ ├── github.py │ └── regexs.py ├── parse │ ├── __init__.py │ ├── __pycache__ │ │ ├── config.cpython-37.pyc │ │ └── __init__.cpython-37.pyc │ └── config.py ├── requirements.txt ├── rules │ └── template.yaml ├── config.ini └── gitmonitor.py ├── images ├── diagram.png └── GitMonitor-logo.png └── README.md /src/libs/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/parse/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/requirements.txt: -------------------------------------------------------------------------------- 1 | requests 2 | gitpython 3 | pyyaml -------------------------------------------------------------------------------- /images/diagram.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Agent00049/GitMonitor/master/images/diagram.png -------------------------------------------------------------------------------- /images/GitMonitor-logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Agent00049/GitMonitor/master/images/GitMonitor-logo.png -------------------------------------------------------------------------------- /src/libs/__pycache__/github.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Agent00049/GitMonitor/master/src/libs/__pycache__/github.cpython-37.pyc -------------------------------------------------------------------------------- /src/libs/__pycache__/init.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Agent00049/GitMonitor/master/src/libs/__pycache__/init.cpython-37.pyc -------------------------------------------------------------------------------- /src/libs/__pycache__/regexs.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Agent00049/GitMonitor/master/src/libs/__pycache__/regexs.cpython-37.pyc -------------------------------------------------------------------------------- /src/libs/__pycache__/rules.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Agent00049/GitMonitor/master/src/libs/__pycache__/rules.cpython-37.pyc -------------------------------------------------------------------------------- /src/libs/__pycache__/slack.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Agent00049/GitMonitor/master/src/libs/__pycache__/slack.cpython-37.pyc -------------------------------------------------------------------------------- /src/libs/__pycache__/utils.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Agent00049/GitMonitor/master/src/libs/__pycache__/utils.cpython-37.pyc -------------------------------------------------------------------------------- /src/libs/__pycache__/__init__.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Agent00049/GitMonitor/master/src/libs/__pycache__/__init__.cpython-37.pyc -------------------------------------------------------------------------------- /src/parse/__pycache__/config.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Agent00049/GitMonitor/master/src/parse/__pycache__/config.cpython-37.pyc -------------------------------------------------------------------------------- /src/libs/__pycache__/directory.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Agent00049/GitMonitor/master/src/libs/__pycache__/directory.cpython-37.pyc -------------------------------------------------------------------------------- /src/parse/__pycache__/__init__.cpython-37.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Agent00049/GitMonitor/master/src/parse/__pycache__/__init__.cpython-37.pyc -------------------------------------------------------------------------------- /src/libs/init.py: -------------------------------------------------------------------------------- 1 | type_search = ['repos', 'code', 'commit'] 2 | config_file = "config.ini" 3 | list_info = ["full_name", "html_url", "clone_url", "updated_at"] 4 | ignore_file = ["LICENSE", '.gitignore'] 5 | ignore_ex = ['png', 'jpg', 'jpeg', 'gif', 'svg', 'exe'] 6 | special = ['\"', '\'', '(', ")", " ", "&"] 7 | logs = {} 8 | possible = {} -------------------------------------------------------------------------------- /src/parse/config.py: -------------------------------------------------------------------------------- 1 | from configparser import ConfigParser 2 | 3 | 4 | def get_file_config(file): 5 | confi = {} 6 | config = ConfigParser() 7 | config.read(file) 8 | sections = config.sections() 9 | for section in sections: 10 | options = config.options(section) 11 | for option in options: 12 | value = config.get(section, option) 13 | name = str(section) + "_" + str(option) 14 | confi[name] = value 15 | return confi -------------------------------------------------------------------------------- /src/rules/template.yaml: -------------------------------------------------------------------------------- 1 | #Comment any options you don't want to use, uncomment if you want to use it 2 | 3 | # ID or rule 4 | id: Project_X_Matching 5 | # Define the keyword 6 | key: X 7 | # Programming language is included when searching 8 | language: 9 | - java 10 | # File name is included when searching 11 | filename: 12 | - apiController.java 13 | # File extension is included when searching 14 | extension: 15 | - java 16 | ignore: 17 | # Programming language is excluded when searching 18 | language: 19 | - php 20 | # File extension is excluded when searching 21 | filename: 22 | - LICENSE 23 | #File extension is excluded when searching 24 | extension: 25 | - html 26 | - txt 27 | -------------------------------------------------------------------------------- /src/config.ini: -------------------------------------------------------------------------------- 1 | [git] 2 | user = 3 | pass = 4 | url_code = https://api.github.com/search/code?q={}+in:file&sort=indexed&order=desc 5 | url_repos = https://api.github.com/search/repositories?q={}+size:>0+is:public&sort=indexed&order=desc 6 | url_commit = https://api.github.com/search/commits?q={}+is:public&sort=indexed&order=desc 7 | rpp = 50 8 | 9 | [slack] 10 | webhooks = 11 | 12 | [path] 13 | rule = 14 | source = 15 | log = 16 | 17 | [msg] 18 | start = ====================**********==================== 19 | 20 | *Start scanning at {}* 21 | _Clone completed successfully:_ 22 | end = ====================**********==================== 23 | 24 | *Scanning Done at {}* 25 | _Detected possible repository:_ 26 | all = ====================**********==================== 27 | 28 | *All repository:* -------------------------------------------------------------------------------- /src/libs/slack.py: -------------------------------------------------------------------------------- 1 | import requests 2 | import json 3 | 4 | 5 | def get_message(dic, rule_id): 6 | msg = '' 7 | if len(dic) == 0: 8 | msg = ">Nothing new" 9 | for i in dic: 10 | if "matched" not in dic[i]: 11 | msg += ">{}\n".format(dic[i]['html_url']) 12 | else: 13 | msg += ">{} - Matched: {}\n".format(dic[i]['html_url'], dic[i]["matched"]) 14 | msg = "[Rule ID: {}]\n{}".format(rule_id, msg) 15 | return msg 16 | 17 | 18 | def send_message(msg, conf): 19 | headers = {'Content-type': 'application/json'} 20 | data = {'text': msg} 21 | requests.post(conf['slack_webhooks'], data=json.dumps(data), headers=headers) 22 | 23 | 24 | def send_list(dic, conf, rule_id): 25 | message = get_message(dic, rule_id) 26 | send_message(message, conf) 27 | -------------------------------------------------------------------------------- /src/libs/rules.py: -------------------------------------------------------------------------------- 1 | import yaml 2 | 3 | 4 | def get_rule(rule_file): 5 | a_yaml_file = open(rule_file) 6 | parsed_yaml_file = yaml.load(a_yaml_file, Loader=yaml.FullLoader) 7 | if "key" not in parsed_yaml_file: 8 | print("ERROR: Not found key in rule file - {}".format(rule_file)) 9 | exit() 10 | if "id" not in parsed_yaml_file: 11 | print("ERROR: Not found id in rule file - {}".format(rule_file)) 12 | exit() 13 | return parsed_yaml_file 14 | 15 | 16 | def build_query(rule_file): 17 | rule = get_rule(rule_file) 18 | queries = [] 19 | key = "\"{}\"".format(rule['key'].strip()) 20 | for k in rule: 21 | if k.strip() not in ['key', 'id', 'ignore'] and rule[k] is not None: 22 | for i in rule[k]: 23 | query = key + " {}:{}".format(k, i) 24 | queries.append(query) 25 | if k == 'ignore': 26 | query = key 27 | for j in rule[k]: 28 | if rule[k][j] is not None: 29 | for l in rule[k][j]: 30 | query += " -{}:{}".format(j, l) 31 | if query != key: 32 | queries.append(query) 33 | if len(queries) == 0: 34 | queries.append(key) 35 | return queries, rule['id'].strip() 36 | -------------------------------------------------------------------------------- /src/libs/directory.py: -------------------------------------------------------------------------------- 1 | import os 2 | import shutil 3 | from libs.utils import get_filename, get_extension 4 | from libs.init import ignore_file, special, ignore_ex 5 | from libs.regexs import to_match 6 | 7 | 8 | def check_regex(path2file, matches): 9 | for m in to_match: 10 | if m['match_type'] in matches: 11 | continue 12 | cmd = os.popen("egrep -e \"{}\" {}".format(m['match_regex'], path2file)).read() 13 | if cmd != "": 14 | print("{}-{}".format(m['match_type'], path2file)) 15 | matches.append(m['match_type']) 16 | 17 | 18 | def find_sensitive(path2src): 19 | matches = [] 20 | for r, d, f in os.walk(path2src): 21 | for file in f: 22 | path2file = os.path.join(r, file) 23 | for s in special: 24 | if s in path2file: 25 | path2file = path2file.replace(s, "\\" + s) 26 | filename = get_filename(path2file) 27 | ex = get_extension(filename) 28 | if "/.git/" not in path2file and filename not in ignore_file and ex not in ignore_ex: 29 | check_regex(path2file, matches) 30 | return matches 31 | 32 | 33 | def handle_directory(logs, conf, possible, fn): 34 | matches = [] 35 | if logs[fn]['state'] == "new": 36 | logs[fn]['state'] = 'old' 37 | path2src = "{}/{}".format(conf['path_source'], fn) 38 | matches = find_sensitive(path2src) 39 | shutil.rmtree(path2src) 40 | if len(matches) != 0: 41 | m = "" 42 | for i in matches: 43 | m += i + ", " 44 | possible[fn] = {"html_url": logs[fn]['html_url'], 'matched': m.rstrip(", ")} -------------------------------------------------------------------------------- /src/libs/utils.py: -------------------------------------------------------------------------------- 1 | import os 2 | import json 3 | import datetime 4 | from parse.config import get_file_config 5 | 6 | 7 | def get_config(config_file): 8 | if not os.path.exists(config_file): 9 | print("Can not find " + config_file) 10 | exit() 11 | return get_file_config(config_file) 12 | 13 | 14 | def exists_file(filename): 15 | if not os.path.isfile(filename): 16 | return False 17 | return True 18 | 19 | 20 | def exists_dir(path): 21 | if not os.path.exists(path): 22 | return False 23 | return True 24 | 25 | 26 | def write_file(filename, log): 27 | with open(filename, 'w') as f: 28 | json.dump(log, f) 29 | f.close() 30 | 31 | 32 | def get_json(log_name): 33 | log = {} 34 | if exists_file(log_name): 35 | with open(log_name) as f: 36 | log = f.read() 37 | log = json.loads(log) 38 | f.close() 39 | return log 40 | 41 | 42 | def get_extension(filename): 43 | tmp = filename.split(".") 44 | l = len(tmp) 45 | return tmp[l-1] 46 | 47 | 48 | def get_filename(path2file): 49 | tmp = path2file.split("/") 50 | l = len(tmp) 51 | return tmp[l-1] 52 | 53 | 54 | def get_time(): 55 | current = datetime.datetime.now() 56 | time = current.strftime("%d/%m/%Y") 57 | return time 58 | 59 | 60 | def initialization(config_file): 61 | conf = get_config(config_file) 62 | if not exists_dir(conf['path_source']): 63 | os.mkdir(conf['path_source']) 64 | if not exists_dir(conf['path_rule']): 65 | print("ERROR: Not found rule folder.") 66 | exit() 67 | conf['path_source'] = conf['path_source'].rstrip("/") 68 | return conf 69 | 70 | 71 | def readfile(filename): 72 | with open(filename) as f: 73 | contents = f.readlines() 74 | return contents 75 | 76 | -------------------------------------------------------------------------------- /src/gitmonitor.py: -------------------------------------------------------------------------------- 1 | import time 2 | import os 3 | from libs.utils import initialization, write_file, get_json, get_time 4 | from libs.init import config_file, possible, type_search 5 | from libs.github import get_page_number, search_repository, handle_page 6 | from libs.slack import send_message, send_list 7 | from libs.rules import build_query 8 | 9 | 10 | conf = initialization(config_file) 11 | logs = get_json("{}/{}".format(conf['path_source'], conf['path_log'])) 12 | send_message(conf['msg_start'].format(get_time()), conf) 13 | for r, d, f in os.walk(conf['path_rule']): 14 | for file in f: 15 | clone = {} 16 | rule_file = os.path.join(r, file) 17 | if rule_file[-5:] != ".yaml": 18 | continue 19 | queries, rule_id = build_query(rule_file) 20 | if rule_id not in logs: 21 | logs[rule_id] = {} 22 | if rule_id not in possible: 23 | possible[rule_id] = {} 24 | for typ in type_search: 25 | for query in queries: 26 | if typ != "code": 27 | query = query.split(" ", 1)[0] 28 | pages, rep = get_page_number(query, conf, typ) 29 | if pages is None or rep is None: 30 | continue 31 | handle_page(rep['items'], logs[rule_id], conf, clone, possible[rule_id]) 32 | if pages > 2: 33 | for i in range(2, pages): 34 | repo = search_repository(query, conf, typ, "&per_page={}&page={}".format(conf['git_rpp'], str(i))) 35 | if repo is None: 36 | continue 37 | time.sleep(10) 38 | handle_page(repo['items'], logs[rule_id], conf, clone, possible[rule_id]) 39 | if typ != "code": 40 | break 41 | send_list(clone, conf, rule_id) 42 | send_message(conf['msg_end'].format(get_time()), conf) 43 | for rule_id in possible: 44 | send_list(possible[rule_id], conf, rule_id) 45 | send_message(conf['msg_all'], conf) 46 | for rule_id in logs: 47 | send_list(logs[rule_id], conf, rule_id) 48 | write_file("{}/{}".format(conf['path_source'], conf['path_log']), logs) -------------------------------------------------------------------------------- /src/libs/github.py: -------------------------------------------------------------------------------- 1 | import requests 2 | import json 3 | import math 4 | import git 5 | import time 6 | from libs.init import list_info 7 | from libs.utils import write_file 8 | from libs.slack import send_message 9 | from libs.directory import handle_directory 10 | 11 | 12 | def handle_error_search(repo, conf): 13 | if repo['message'] == "Validation Failed": 14 | return True 15 | elif "API rate limit exceeded for user" in repo['message'] or "You have triggered an abuse detection mechanism" in repo['message']: 16 | time.sleep(15) 17 | return False 18 | else: 19 | send_message("*ERROR*: {}".format(repo['message']), conf) 20 | exit() 21 | 22 | 23 | def search_repository(query, conf, typ, uri=""): 24 | url = conf['git_url_' + typ].format(query) + uri 25 | headers = {"Accept": "application/vnd.github.cloak-preview"} 26 | r = requests.get(url, auth=(conf['git_user'], conf['git_pass']), headers=headers) 27 | while "total_count" not in json.loads(r.text) and "message" in json.loads(r.text): 28 | if handle_error_search(json.loads(r.text), conf): 29 | return None 30 | r = requests.get(url, auth=(conf['git_user'], conf['git_pass']), headers=headers) 31 | return json.loads(r.text) 32 | 33 | 34 | def get_page_number(query, conf, typ): 35 | rpp = int(conf['git_rpp']) 36 | repos = search_repository(query, conf, typ) 37 | if repos is None: 38 | return None, None 39 | total = repos['total_count'] 40 | if total > 1000: 41 | total = 1000 42 | pages = math.ceil(total / rpp) + 1 43 | return pages, search_repository(query, conf, typ, "&per_page={}&page=1".format(conf['git_rpp'])) 44 | 45 | 46 | def handle_error_download(conf, e, logs): 47 | print("ERROR:" + str(e)) 48 | if "enough space" in str(e): 49 | send_message("*ERROR*: Not enough space when cloning repository", conf) 50 | write_file("{}/{}".format(conf['path_source'], conf['file_log']), logs) 51 | exit() 52 | pass 53 | 54 | 55 | def handle_page(rp_items, logs, conf, cloned, possible): 56 | for rp in rp_items: 57 | rpi = {} 58 | if "repository" in rp: 59 | r = requests.get(rp['repository']['url'], auth=(conf['git_user'], conf['git_pass'])) 60 | rp = json.loads(r.text) 61 | for i in list_info: 62 | rpi[i] = rp[i] 63 | folder_name = "{}_{}".format(rpi['full_name'].split("/")[0], rpi['full_name'].split("/")[1]) 64 | if folder_name not in logs or rpi["updated_at"] != logs[folder_name]['updated_at']: 65 | try: 66 | print("Cloning " + rpi['clone_url']) 67 | git.Repo.clone_from(rpi['clone_url'], "{}/{}".format(conf['path_source'], folder_name)) 68 | js = {"html_url": rpi['html_url'], "updated_at": rpi['updated_at'], "state": 'new'} 69 | logs[folder_name] = js 70 | cloned[folder_name] = {"html_url": rpi['html_url']} 71 | handle_directory(logs, conf, possible, folder_name) 72 | except Exception as e: 73 | handle_error_download(conf, e, logs) -------------------------------------------------------------------------------- /src/libs/regexs.py: -------------------------------------------------------------------------------- 1 | to_match = [ 2 | {'match_regex': 'password', 'match_type': 'Password'}, 3 | {'match_regex': '(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)(\.(25[0-5]|2[0-4][0-9]|[01]?[0-9][0-9]?)){3,}', 'match_type': 'IP'}, 4 | {'match_regex': 'username', 'match_type': 'Username'}, 5 | {'match_regex': '([A-Za-z0-9._%%+-]+[a-zA-Z0-9])@([A-Za-z0-9.-]+[a-zA-Z0-9])\.([A-Za-z]{2,})', 'match_type': 'Email'}, 6 | {'match_regex': '(A3T[A-Z0-9]|AKIA|AGPA|AIDA|AROA|AIPA|ANPA|ANVA|ASIA)[A-Z0-9]{16,}', 'match_type': "AWS Client ID"}, 7 | {'match_regex': '(?i)aws(.{0,20})\?[\\\'\\\"][0-9a-zA-Z\/+]{40,}[\\\'\\\"]', 'match_type': "AWS Secret Key"}, 8 | {'match_regex': 'amzn\.mws\.[0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12,}', 'match_type': "AWS MWS key"}, 9 | {'match_regex': '\-\-\-\-\-BEGIN PRIVATE KEY\-\-\-\-\-', 'match_type': "PKCS8"}, 10 | {'match_regex': '\-\-\-\-\-BEGIN RSA PRIVATE KEY\-\-\-\-\-', 'match_type': "RSA"}, 11 | {'match_regex': '\-\-\-\-\-BEGIN OPENSSH PRIVATE KEY\-\-\-\-\-', 'match_type': "SSH"}, 12 | {'match_regex': '\-\-\-\-\-BEGIN PGP PRIVATE KEY BLOCK\-\-\-\-\-', 'match_type': "PGP"}, 13 | {'match_regex': '(?i)(facebook|fb)(.{0,20})?[\\\'\\\"][0-9a-f]{32,}[\\\'\\\"]', 'match_type': "Facebook Secret Key"}, 14 | {'match_regex': '(?i)(facebook|fb)(.{0,20})?[\\\'\\\"][0-9]{13,17}[\\\'\\\"]', 'match_type': "Facebook Client ID"}, 15 | {'match_regex': '[f|F][a|A][c|C][e|E][b|B][o|O][o|O][k|K].*[\\\'|\\\"][0-9a-f]{32,}[\\\'|\\\"]', 'match_type': "Facebook Oauth"}, 16 | {'match_regex': 'EAACEdEose0cBA[0-9A-Za-z]+', 'match_type': "Facebook access token"}, 17 | {'match_regex': '(?i)twitter(.{0,20})?[\\\'\\\"][0-9a-z]{35,44}[\\\'\\\"]', 'match_type': "Twitter Secret Key"}, 18 | {'match_regex': '(?i)twitter(.{0,20})?[\\\'\\\"][0-9a-z]{18,25}[\\\'\\\"]', 'match_type': "Twitter Client ID"}, 19 | {'match_regex': '[t|T][w|W][i|I][t|T][t|T][e|E][r|R].{0,30}[\\\'\\\"\\s][0-9a-zA-Z]{35,44}[\\\'\\\"\\s]', 'match_type': "Twitter Oauth"}, 20 | {'match_regex': '(?i)github(.{0,20})?[\\\'\\\"][0-9a-zA-Z]{35,40}[\\\'\\\"]', 'match_type': "Github"}, 21 | {'match_regex': '(?i)linkedin(.{0,20})?[\\\'\\\"][0-9a-z]{12,}[\\\'\\\"]', 'match_type': "LinkedIn Client ID"}, 22 | {'match_regex': '(?i)linkedin(.{0,20})?[\\\'\\\"][0-9a-z]{16,}[\\\'\\\"]', 'match_type': "LinkedIn Secret Key"}, 23 | {'match_regex': 'xox[baprs]-([0-9a-zA-Z]{10,48})?', 'match_type': "Slack"}, 24 | {'match_regex': '\-\-\-\-\-BEGIN EC PRIVATE KEY\-\-\-\-\-', 'match_type': "EC"}, 25 | {'match_regex': '(?i)api_key(.{0,20})?[\\\'|\\\"][0-9a-zA-Z]{32,45}[\\\'|\\\"]', 'match_type': "Generic API key"}, 26 | {'match_regex': '(?i)secret(.{0,20})?[\\\'|\\\"][0-9a-zA-Z]{32,45}[\\\'|\\\"]', 'match_type': "Generic Secret"}, 27 | {'match_regex': 'AIza[0-9A-Za-z\\\-\_]{35,}', 'match_type': "Google API key"}, 28 | {'match_regex': '(?i)(google|gcp|youtube|drive|yt)(.{0,20})?[\\\'\\\"][AIza[0-9a-z\\\-\_]{35,}][\\\'\\\"]', 'match_type': "Google Cloud Platform API key"}, 29 | {'match_regex': '(?i)(google|gcp|auth)(.{0,20})?[\\\'\\\"][0-9]+-[0-9a-z_]{32,}\.apps\.googleusercontent\.com[\\\'\\\"]', 'match_type': "Google OAuth"}, 30 | {'match_regex': 'ya29\.[0-9A-Za-z\-_]+', 'match_type': "Google OAuth access token"}, 31 | {'match_regex': '(?i)heroku(.{0,20})?[\\\'\\\"][0-9a-f]{8}-[0-9a-f]{4}-[0-9a-f]{4}-[0-9a-f]{12}[\\\'\\\"]', 'match_type': "Heroku API key"}, 32 | {'match_regex': '(?i)(mailchimp|mc)(.{0,20})?[\\\'\\\"][0-9a-f]{32,}-us[0-9]{1,2}[\\\'\\\"]', 'match_type': "MailChimp API key"}, 33 | {'match_regex': '(?i)(mailgun|mg)(.{0,20})?[\\\'\\\"][0-9a-z]{32,}[\\\'\\\"]', 'match_type': "Mailgun API key"}, 34 | {'match_regex': '[a-zA-Z]{3,10}:\/\/[^\/\s:@]{3,20}:[^\/\s:@]{3,20}@.{1,100}\/?.?', 'match_type': "Password in URL"}, 35 | {'match_regex': 'access_token\$production\$[0-9a-z]{16,}\$[0-9a-f]{32,}', 'match_type': "PayPal Braintree access token"}, 36 | {'match_regex': 'sk_live_[0-9a-z]{32,}', 'match_type': "Picatic API key"}, 37 | {'match_regex': 'https\:\/\/hooks.slack.com\/services\/T[a-zA-Z0-9_]{8}\/B[a-zA-Z0-9_]{10,}\/[a-zA-Z0-9_]{24,}', 'match_type': "Slack Webhook"}, 38 | {'match_regex': '(?i)stripe(.{0,20})?[\\\'\\\"][sk|rk]_live_[0-9a-zA-Z]{24,}', 'match_type': "Stripe API key"}, 39 | {'match_regex': 'sq0atp-[0-9A-Za-z\-_]{22,}', 'match_type': "Square access token"}, 40 | {'match_regex': 'sq0csp-[0-9A-Za-z\\-_]{43,}', 'match_type': "Square OAuth secret"}, 41 | {'match_regex': '(?i)twilio(.{0,20})?[\\\'\\\"][0-9a-f]{32,}[\\\'\\\"]', 'match_type': "Twilio API key"} 42 | ] -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # GitMonitor is a Github scanning system to look for leaked sensitive information based on rules 2 | 3 | [![License: GPL v3](https://img.shields.io/badge/License-GPL%20v3-blue.svg)](https://www.gnu.org/licenses/gpl-3.0) 4 |

5 | 6 |

7 | 8 |
9 |

GitMonitor

10 |
11 | 12 | ## Summary 13 | 14 | GitMonitor is a Github scanning system to look for leaked sensitive information based on rules. I know that there are a lot of very good other tools for finding sensitive information leaked on Github right now, I myself currently still use some of them. However, I think they still lack some features like: 15 | 16 | + A scanning tool based on the rules. 17 | + The rules mechanism allows me to write rules in the most flexible way possible. The rules allow me to filter information by brand name, file format and by language. As well as allowing me to skip specific file formats and languages (Searching rules). Then clone the repositories that have matched the rules to local before start looking for the sensitive information that exists there based on regular expressions (Sensitive filtering rules). You can do that by defining keywords related to your company brand name, keywords related to your company's projects, email prefixes, or anything else in the rules. 18 | + The tool can launch on schedule and has a flexible reporting mechanism. 19 | 20 | That is why I created this tool - GitMonitor. GitMonitor uses two different sets of rules to find what you need. The Searching rules will search for repositories that may be related to your organization or internal projects, or anything else, clone repositories that matched to local. Then, Sensitive filtering rules to check if those repositories exist sensitive information. Finally the tool will report via Slack. You can use this tool with Cronjob to create a monitoring system to track sensitive information related to your organization that leaked on Github and receive results via Slack. 21 | 22 | ## Features 23 | 24 | + Search the repository based on rules (Searching rules). You can write rules to search for repositories that may be related to your company. The repositories matching the rules will be cloned to local. 25 | + Use Regex (Sensitive filtering rules) to search for sensitive information that exists in cloned repository, for classification purposes. 26 | + Report via Slack. 27 | + Rules and regex are defined separately 28 | + Users can define rules and regex easily and intuitively. 29 | 30 | ![Working Diagram](images/diagram.png) 31 | 32 | ## Requirements 33 | 34 | + Python3, Python3-pip 35 | 36 | Tested on Ubuntu 18.04. 37 | 38 | ## Setup 39 | 40 | + Install requirements: 41 | 42 | ```bash 43 | Python3 -m pip install -r requirements.txt 44 | ``` 45 | 46 | Please make sure you have Pyyaml version 5x or higher installed 47 | 48 | + Fill in the required information in the configuration file (config.ini): 49 | 50 | ```ini 51 | [git] 52 | user = 53 | pass = 54 | url_code = https://api.github.com/search/code?q={}+in:file&sort=indexed&order=desc 55 | url_repos = https://api.github.com/search/repositories?q={}+size:>0+is:public&sort=indexed&order=desc 56 | url_commit = https://api.github.com/search/commits?q={}+is:public&sort=indexed&order=desc 57 | rpp = 50 58 | 59 | [slack] 60 | webhooks = 61 | 62 | 63 | [path] 64 | rule = 65 | source = 66 | log = 67 | 68 | [msg] 69 | start = ====================**********==================== 70 | 71 | *Start scanning at {}* 72 | _Clone completed successfully:_ 73 | end = ====================**********==================== 74 | 75 | *Scanning Done at {}* 76 | _Detected possible repository:_ 77 | all = ====================**********==================== 78 | 79 | ``` 80 | 81 | + Write the rules (Searching rules). Put your rules in the rules directory: 82 | 83 | ```yaml 84 | id: Project_X_Matching 85 | key: X 86 | language: 87 | - java 88 | #filename: 89 | # - LICENSE 90 | #extension: 91 | # - py 92 | # - md 93 | ignore: 94 | # language: 95 | # - php 96 | filename: 97 | - LICENSE 98 | extension: 99 | - html 100 | - txt 101 | 102 | ``` 103 | 104 | + Define the regular expressions in libs/regex.py file (Sensitive filtering rules). 105 | 106 | + Run: 107 | 108 | ```bash 109 | Python3 gitmonitor.py 110 | ``` 111 | 112 | + You can schedule automatic running for the tool by using Cronjob. 113 | 114 | ## My Team 115 | 116 | + [Tony](https://github.com/crazykid95) - Project Lead 117 | + [musashi137](https://github.com/musashi137) - Core Dev 118 | 119 | ## Special Thanks 120 | 121 | + [GitMAD](https://github.com/deepdivesec/GitMAD) for regex-based sensitive information search mechanism 122 | 123 | ## Contributing 124 | 125 | Many areas of this project could be improved and change significantly while refactoring current code and implementing new features. Feedback with improvements and pull requests from the community will be highly appreciated and accepted. 126 | 127 | In general, we follow the "fork-and-pull" Git workflow. 128 | 129 | 1. Fork the repo on GitHub 130 | 2. Clone the project to your own machine 131 | 3. Commit changes to your own branch 132 | 4. Push your work back up to your fork 133 | 5. Submit a Pull request so that we can review your changes 134 | 135 | NOTE: Be sure to merge the latest from "upstream" before making a pull request! 136 | --------------------------------------------------------------------------------