├── truffleHog ├── __init__.py ├── regexChecks.py └── truffleHog.py ├── .config └── config.template ├── requirements.txt ├── .gitignore ├── LICENSE ├── scripts ├── parseGitrobSignatures.py ├── converted_gitron.txt ├── bitbucket.py └── gitrob-signatures.txt ├── conduct.md ├── README.md └── bitchecker.py /truffleHog/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /.config/config.template: -------------------------------------------------------------------------------- 1 | [BITBUCKET] 2 | username = provide 3 | secret = provide 4 | owner = provide 5 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | logging 2 | json2html 3 | json2csv 4 | GitPython 5 | unittest2 6 | pytest-cov 7 | codecov 8 | bitbucket 9 | requests_oauthlib 10 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | __pycache__/ 2 | 3 | # Ignore dev folders and files 4 | .idea 5 | *.sublime* 6 | # Ignore folders 7 | **/repos/ 8 | **/checks/ 9 | **/tmp/ 10 | **/results/ 11 | **/logs/ 12 | **/venv/ 13 | 14 | # Ignore files with extensions 15 | *.json 16 | *.csv 17 | *.out 18 | *.db 19 | *.cfg 20 | *.log 21 | *.pyc 22 | 23 | # Ignore partiuclar files 24 | /.config/config.cfg 25 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2018 unk1nd0n3 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /scripts/parseGitrobSignatures.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # __version__ = '0.2' 3 | 4 | import json 5 | 6 | 7 | def read_json_file(path): 8 | """ 9 | Func for store json formatted data to local file 10 | :param path: string 11 | :return: none 12 | """ 13 | try: 14 | return json.loads(open(path).read()) 15 | except IOError: 16 | print("File is missed. Please check") 17 | 18 | 19 | def write_to_file(filename, data): 20 | """ 21 | Func for store dictionary to local file 22 | :param filename: string 23 | :param data: dictionary 24 | :return: None 25 | """ 26 | json_outfile = open(filename, 'w') 27 | json_outfile.write(data) 28 | json_outfile.close() 29 | 30 | 31 | def main(): 32 | """ 33 | 34 | :return: 35 | """ 36 | signatures = read_json_file('gitrob-signatures.txt') 37 | converted = '' 38 | for signature in signatures: 39 | regex = signature['pattern'].replace('\\A', '').replace('\\z', '').replace('\.?', '.?') 40 | line = '"{0}": re.compile(\'{1}\'),\n'.format(signature['caption'], regex) 41 | converted += line 42 | # print converted 43 | write_to_file('converted_gitron.txt', converted) 44 | return converted 45 | 46 | 47 | 48 | if __name__ == '__main__': 49 | main() -------------------------------------------------------------------------------- /conduct.md: -------------------------------------------------------------------------------- 1 | # Contributor Covenant Code of Conduct 2 | 3 | ## Our Pledge 4 | 5 | In the interest of fostering an open and welcoming environment, we as contributors and maintainers pledge to making participation in our project and our community a harassment-free experience for everyone, regardless of age, body size, disability, ethnicity, gender identity and expression, level of experience, nationality, personal appearance, race, religion, or sexual identity and orientation. 6 | 7 | ## Our Standards 8 | 9 | Examples of behavior that contributes to creating a positive environment include: 10 | 11 | * Using welcoming and inclusive language 12 | * Being respectful of differing viewpoints and experiences 13 | * Gracefully accepting constructive criticism 14 | * Focusing on what is best for the community 15 | * Showing empathy towards other community members 16 | 17 | Examples of unacceptable behavior by participants include: 18 | 19 | * The use of sexualized language or imagery and unwelcome sexual attention or advances 20 | * Trolling, insulting/derogatory comments, and personal or political attacks 21 | * Public or private harassment 22 | * Publishing others' private information, such as a physical or electronic address, without explicit permission 23 | * Other conduct which could reasonably be considered inappropriate in a professional setting 24 | 25 | ## Our Responsibilities 26 | 27 | Project maintainers are responsible for clarifying the standards of acceptable behavior and are expected to take appropriate and fair corrective action in response to any instances of unacceptable behavior. 28 | 29 | Project maintainers have the right and responsibility to remove, edit, or reject comments, commits, code, wiki edits, issues, and other contributions that are not aligned to this Code of Conduct, or to ban temporarily or permanently any contributor for other behaviors that they deem inappropriate, threatening, offensive, or harmful. 30 | 31 | ## Scope 32 | 33 | This Code of Conduct applies both within project spaces and in public spaces when an individual is representing the project or its community. Examples of representing a project or community include using an official project e-mail address, posting via an official social media account, or acting as an appointed representative at an online or offline event. Representation of a project may be further defined and clarified by project maintainers. 34 | 35 | ## Enforcement 36 | 37 | Instances of abusive, harassing, or otherwise unacceptable behavior may be reported by contacting the project team at unkindone@gmail.com. The project team will review and investigate all complaints, and will respond in a way that it deems appropriate to the circumstances. The project team is obligated to maintain confidentiality with regard to the reporter of an incident. Further details of specific enforcement policies may be posted separately. 38 | 39 | Project maintainers who do not follow or enforce the Code of Conduct in good faith may face temporary or permanent repercussions as determined by other members of the project's leadership. 40 | 41 | ## Attribution 42 | 43 | This Code of Conduct is adapted from the [Contributor Covenant][homepage], version 1.4, available at [http://contributor-covenant.org/version/1/4][version] 44 | 45 | [homepage]: http://contributor-covenant.org 46 | [version]: http://contributor-covenant.org/version/1/4/ 47 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Bitbucket Trufflehog 2 | 3 | Multithreading search through Bitbucket git repositories for sensitive data (see TruffleHog/regexChecks.py), digging deep into commit history, branches and filenames. 4 | This is effective at finding secrets accidentally committed. 5 | Regex patterns were expanded with rules from PasteHunter, Gitrob and mien own 6 | 7 | ## Getting Started 8 | 9 | These instructions will get you a copy of the project up and running on your local machine for development and testing purposes. 10 | See deployment for notes on how to deploy the project on a live system. 11 | 12 | ### Prerequisites and Installing ### 13 | Install all Py modules described in requirements.txt. 14 | 15 | ``` 16 | clone repository 17 | cd repository 18 | pip install virtualenv 19 | virtualenv venv 20 | source venv/bin/activate 21 | pip install -r requirements.txt 22 | ``` 23 | 24 | ### Configuration ### 25 | Rename config template .config/config.template to bitbucket-creds-checker/.config/config.cfg and add you credentials 26 | ```bash 27 | [BITBUCKET] 28 | username = ADD_YOUR_OWN_USERNAME 29 | secret = ADD_YOUR_OWN. Create separate App password in Bitbucket account with custom permissions 30 | owner = ADD_YOUR_OWN 31 | 32 | ``` 33 | 34 | ### Running ### 35 | How to use tool please read original Readme from TruffleHog: https://github.com/dxa4481/truffleHog 36 | 37 | 38 | ### Customizing ### 39 | Custom regexes can be added to the following file: 40 | ``` 41 | truffleHog/truffleHog/regexChecks.py 42 | ``` 43 | 44 | 45 | ### Running Examples ### 46 | Check Bibucket account with regex but without entropy, starting from 1st repo slug 47 | (sorted by ASC) and saving output to: html, csv 48 | ``` 49 | python bitchecker.py --regex --csv --html --entropy=False --starts_with 0 50 | ``` 51 | Check Bibucket account with regex but with entropy, maximum 5 last commits and saving output to: csv 52 | ``` 53 | python bitchecker.py --regex --csv --entropy=True --max_depth 5 54 | ``` 55 | Check Bibucket account with regex but with entropy, not cloning or fetching repository and 56 | saving output to: csv 57 | ``` 58 | python bitchecker.py --regex --csv --entropy=True --not_clone 59 | ``` 60 | Prepare report and statistic only for previous Bitbucker account analyze. See report in folder results/ 61 | ``` 62 | python bitchecker.py --regex --csv --entropy=True --report 63 | ``` 64 | 65 | ### Help ### 66 | 67 | ``` 68 | usage: bitchecker.py [-h] [--json] [--html] [--csv] [--regex] 69 | [--entropy DO_ENTROPY] [--since_commit SINCE_COMMIT] 70 | [--max_depth MAX_DEPTH] [--starts_with STARTS_WITH] 71 | [--report] [--not_clone] 72 | 73 | Find secrets hidden in the depths of git. 74 | 75 | optional arguments: 76 | -h, --help show this help message and exit 77 | --json Output in JSON 78 | --html Output in HTML 79 | --csv Output in CSV 80 | --regex Enable high signal regex checks 81 | --entropy DO_ENTROPY Enable entropy checks 82 | --since_commit SINCE_COMMIT 83 | Only scan from a given commit hash 84 | --max_depth MAX_DEPTH 85 | Max commit depth to go back when searching for secrets 86 | --starts_with STARTS_WITH 87 | Perform checks starting from N repository 88 | --report Calculate statistic if you've ready file with checks 89 | --not_clone No clone or fetch repositories (in case they were 90 | cloned before 91 | ### ToDo 92 | --- Add multithreading support. Very actual for large git repositories 93 | --- Improve regex pattern for sensitive data search -------------------------------------------------------------------------------- /scripts/converted_gitron.txt: -------------------------------------------------------------------------------- 1 | "Private SSH key": re.compile('.*_rsa'), 2 | "Private SSH key": re.compile('.*_dsa'), 3 | "Private SSH key": re.compile('.*_ed25519'), 4 | "Private SSH key": re.compile('.*_ecdsa'), 5 | "SSH configuration file": re.compile('.?ssh/config'), 6 | "Potential cryptographic private key": re.compile('pem'), 7 | "Potential cryptographic private key": re.compile('key(pair)?'), 8 | "Potential cryptographic key bundle": re.compile('pkcs12'), 9 | "Potential cryptographic key bundle": re.compile('pfx'), 10 | "Potential cryptographic key bundle": re.compile('p12'), 11 | "Potential cryptographic key bundle": re.compile('asc'), 12 | "Pidgin OTR private key": re.compile('otr.private_key'), 13 | "Shell command history file": re.compile('.?(bash_|zsh_|z)?history'), 14 | "MySQL client command history file": re.compile('.?mysql_history'), 15 | "PostgreSQL client command history file": re.compile('.?psql_history'), 16 | "PostgreSQL password file": re.compile('.?pgpass'), 17 | "Ruby IRB console history file": re.compile('.?irb_history'), 18 | "Pidgin chat client account configuration file": re.compile('.?purple\/accounts\.xml'), 19 | "Hexchat/XChat IRC client server list configuration file": re.compile('.?xchat2?\/servlist_?\.conf'), 20 | "Irssi IRC client configuration file": re.compile('.?irssi\/config'), 21 | "Recon-ng web reconnaissance framework API key database": re.compile('.?recon-ng\/keys\.db'), 22 | "DBeaver SQL database manager configuration file": re.compile('.?dbeaver-data-sources.xml'), 23 | "Mutt e-mail client configuration file": re.compile('.?muttrc'), 24 | "S3cmd configuration file": re.compile('.?s3cfg'), 25 | "AWS CLI credentials file": re.compile('.?aws/credentials'), 26 | "T command-line Twitter client configuration file": re.compile('.?trc'), 27 | "OpenVPN client configuration file": re.compile('ovpn'), 28 | "Well, this is awkward... Gitrob configuration file": re.compile('.?gitrobrc'), 29 | "Shell configuration file": re.compile('.?(bash|zsh)rc'), 30 | "Shell profile configuration file": re.compile('.?(bash_|zsh_)?profile'), 31 | "Shell command alias configuration file": re.compile('.?(bash_|zsh_)?aliases'), 32 | "Ruby On Rails secret token configuration file": re.compile('secret_token.rb'), 33 | "OmniAuth configuration file": re.compile('omniauth.rb'), 34 | "Carrierwave configuration file": re.compile('carrierwave.rb'), 35 | "Ruby On Rails database schema file": re.compile('schema.rb'), 36 | "Potential Ruby On Rails database configuration file": re.compile('database.yml'), 37 | "Django configuration file": re.compile('settings.py'), 38 | "PHP configuration file": re.compile('(.*)?config(\.inc)?\.php'), 39 | "KeePass password manager database file": re.compile('kdb'), 40 | "1Password password manager database file": re.compile('agilekeychain'), 41 | "Apple Keychain database file": re.compile('keychain'), 42 | "GNOME Keyring database file": re.compile('key(store|ring)'), 43 | "Log file": re.compile('log'), 44 | "Network traffic capture file": re.compile('pcap'), 45 | "SQL dump file": re.compile('sql(dump)?'), 46 | "GnuCash database file": re.compile('gnucash'), 47 | "Contains word: backup": re.compile('backup'), 48 | "Contains word: dump": re.compile('dump'), 49 | "Contains word: password": re.compile('password'), 50 | "Contains word: credential": re.compile('credential'), 51 | "Contains word: secret": re.compile('secret'), 52 | "Contains words: private, key": re.compile('private.*key'), 53 | "Jenkins publish over SSH plugin file": re.compile('jenkins.plugins.publish_over_ssh.BapSshPublisherPlugin.xml'), 54 | "Potential Jenkins credentials file": re.compile('credentials.xml'), 55 | "Apache htpasswd file": re.compile('.?htpasswd'), 56 | "Configuration file for auto-login process": re.compile('(\.|_)?netrc'), 57 | "KDE Wallet Manager database file": re.compile('kwallet'), 58 | "Potential MediaWiki configuration file": re.compile('LocalSettings.php'), 59 | "Tunnelblick VPN configuration file": re.compile('tblk'), 60 | "Rubygems credentials file": re.compile('.?gem/credentials'), 61 | "Potential MSBuild publish profile": re.compile('*\.pubxml(\.user)?'), 62 | "Sequel Pro MySQL database manager bookmark file": re.compile('Favorites.plist'), 63 | "Little Snitch firewall configuration file": re.compile('configuration.user.xpl'), 64 | "Day One journal file": re.compile('dayone'), 65 | "Potential jrnl journal file": re.compile('journal.txt'), 66 | "Tugboat DigitalOcean management tool configuration": re.compile('.?tugboat'), 67 | "git-credential-store helper credentials file": re.compile('.?git-credentials'), 68 | "Git configuration file": re.compile('.?gitconfig'), 69 | "Chef Knife configuration file": re.compile('knife.rb'), 70 | "Chef private key": re.compile('.?chef/(.*)\.pem'), 71 | "cPanel backup ProFTPd credentials file": re.compile('proftpdpasswd'), 72 | "Robomongo MongoDB manager configuration file": re.compile('robomongo.json'), 73 | "FileZilla FTP configuration file": re.compile('filezilla.xml'), 74 | "FileZilla FTP recent servers file": re.compile('recentservers.xml'), 75 | "Ventrilo server configuration file": re.compile('ventrilo_srv.ini'), 76 | "Docker configuration file": re.compile('.?dockercfg'), 77 | "NPM configuration file": re.compile('.?npmrc'), 78 | "Terraform variable config file": re.compile('terraform.tfvars'), 79 | "Environment configuration file": re.compile('.?env'), 80 | -------------------------------------------------------------------------------- /scripts/bitbucket.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # git+git://github.com/Sheeprider/BitBucket-api.git 3 | 4 | __all__ = ['Bitbucket', ] 5 | 6 | try: 7 | from urlparse import parse_qs 8 | except ImportError: 9 | from urllib.parse import parse_qs 10 | 11 | import json 12 | import re 13 | 14 | from requests import Request, Session 15 | from requests_oauthlib import OAuth1 16 | import requests 17 | 18 | from .issue import Issue 19 | from .repository import Repository 20 | from .service import Service 21 | from .ssh import SSH 22 | from .deploy_key import DeployKey 23 | 24 | 25 | # ======== 26 | # = URLs = 27 | # ======== 28 | URLS = { 29 | 'BASE': 'https://api.bitbucket.org/2.0/%s', 30 | # Get user profile and repos 31 | 'GET_USER': 'users/%(username)s/', 32 | 'GET_USER_PRIVILEGES': 'user/privileges', 33 | # Search repo 34 | # 'SEARCH_REPO': 'repositories/?name=%(search)s', 35 | # Get tags & branches 36 | 'GET_TAGS': 'repositories/%(username)s/%(repo_slug)s/tags/', 37 | 'GET_BRANCHES': 'repositories/%(username)s/%(repo_slug)s/branches/', 38 | 39 | 'REQUEST_TOKEN': 'oauth/request_token/', 40 | 'AUTHENTICATE': 'oauth/authenticate?oauth_token=%(token)s', 41 | 'ACCESS_TOKEN': 'oauth/access_token/' 42 | } 43 | 44 | 45 | class Bitbucket(object): 46 | """ This class lets you interact with the bitbucket public API. """ 47 | def __init__(self, username='', password='', repo_name_or_slug=''): 48 | self.username = username 49 | self.password = password 50 | self.repo_slug = repo_name_or_slug 51 | self.repo_tree = {} 52 | self.URLS = URLS 53 | 54 | self.repository = Repository(self) 55 | self.service = Service(self) 56 | self.ssh = SSH(self) 57 | self.issue = Issue(self) 58 | self.deploy_key = DeployKey(self) 59 | 60 | self.access_token = None 61 | self.access_token_secret = None 62 | self.consumer_key = None 63 | self.consumer_secret = None 64 | self.oauth = None 65 | 66 | # =================== 67 | # = Getters/Setters = 68 | # =================== 69 | 70 | @property 71 | def auth(self): 72 | """ Return credentials for current Bitbucket user. """ 73 | if self.oauth: 74 | return self.oauth 75 | return (self.username, self.password) 76 | 77 | @property 78 | def username(self): 79 | """Return your repository's username.""" 80 | return self._username 81 | 82 | @username.setter 83 | def username(self, value): 84 | try: 85 | if isinstance(value, basestring): 86 | self._username = unicode(value) 87 | except NameError: 88 | self._username = value 89 | 90 | if value is None: 91 | self._username = None 92 | 93 | @username.deleter 94 | def username(self): 95 | del self._username 96 | 97 | @property 98 | def password(self): 99 | """Return your repository's password.""" 100 | return self._password 101 | 102 | @password.setter 103 | def password(self, value): 104 | try: 105 | if isinstance(value, basestring): 106 | self._password = unicode(value) 107 | except NameError: 108 | self._password = value 109 | 110 | if value is None: 111 | self._password = None 112 | 113 | @password.deleter 114 | def password(self): 115 | del self._password 116 | 117 | @property 118 | def repo_slug(self): 119 | """Return your repository's slug name.""" 120 | return self._repo_slug 121 | 122 | @repo_slug.setter 123 | def repo_slug(self, value): 124 | if value is None: 125 | self._repo_slug = None 126 | else: 127 | try: 128 | if isinstance(value, basestring): 129 | value = unicode(value) 130 | except NameError: 131 | pass 132 | value = value.lower() 133 | self._repo_slug = re.sub(r'[^a-z0-9_-]+', '-', value) 134 | 135 | @repo_slug.deleter 136 | def repo_slug(self): 137 | del self._repo_slug 138 | 139 | # ======================== 140 | # = Oauth authentication = 141 | # ======================== 142 | 143 | def authorize(self, consumer_key, consumer_secret, callback_url=None, 144 | access_token=None, access_token_secret=None): 145 | """ 146 | Call this with your consumer key, secret and callback URL, to 147 | generate a token for verification. 148 | """ 149 | self.consumer_key = consumer_key 150 | self.consumer_secret = consumer_secret 151 | 152 | if not access_token and not access_token_secret: 153 | if not callback_url: 154 | return (False, "Callback URL required") 155 | oauth = OAuth1( 156 | consumer_key, 157 | client_secret=consumer_secret, 158 | callback_uri=callback_url) 159 | r = requests.post(self.url('REQUEST_TOKEN'), auth=oauth) 160 | if r.status_code == 200: 161 | creds = parse_qs(r.content) 162 | 163 | self.access_token = creds.get('oauth_token')[0] 164 | self.access_token_secret = creds.get('oauth_token_secret')[0] 165 | else: 166 | return (False, r.content) 167 | else: 168 | self.finalize_oauth(access_token, access_token_secret) 169 | 170 | return (True, None) 171 | 172 | def verify(self, verifier, consumer_key=None, consumer_secret=None, 173 | access_token=None, access_token_secret=None): 174 | """ 175 | After converting the token into verifier, call this to finalize the 176 | authorization. 177 | """ 178 | # Stored values can be supplied to verify 179 | self.consumer_key = consumer_key or self.consumer_key 180 | self.consumer_secret = consumer_secret or self.consumer_secret 181 | self.access_token = access_token or self.access_token 182 | self.access_token_secret = access_token_secret or self.access_token_secret 183 | 184 | oauth = OAuth1( 185 | self.consumer_key, 186 | client_secret=self.consumer_secret, 187 | resource_owner_key=self.access_token, 188 | resource_owner_secret=self.access_token_secret, 189 | verifier=verifier) 190 | r = requests.post(self.url('ACCESS_TOKEN'), auth=oauth) 191 | if r.status_code == 200: 192 | creds = parse_qs(r.content) 193 | else: 194 | return (False, r.content) 195 | 196 | self.finalize_oauth(creds.get('oauth_token')[0], 197 | creds.get('oauth_token_secret')[0]) 198 | return (True, None) 199 | 200 | def finalize_oauth(self, access_token, access_token_secret): 201 | """ Called internally once auth process is complete. """ 202 | self.access_token = access_token 203 | self.access_token_secret = access_token_secret 204 | 205 | # Final OAuth object 206 | self.oauth = OAuth1( 207 | self.consumer_key, 208 | client_secret=self.consumer_secret, 209 | resource_owner_key=self.access_token, 210 | resource_owner_secret=self.access_token_secret) 211 | 212 | # ====================== 213 | # = High lvl functions = 214 | # ====================== 215 | 216 | def dispatch(self, method, url, auth=None, params=None, **kwargs): 217 | """ Send HTTP request, with given method, 218 | credentials and data to the given URL, 219 | and return the success and the result on success. 220 | """ 221 | r = Request( 222 | method=method, 223 | url=url, 224 | auth=auth, 225 | params=params, 226 | data=kwargs) 227 | s = Session() 228 | resp = s.send(r.prepare()) 229 | status = resp.status_code 230 | text = resp.text 231 | error = resp.reason 232 | if status >= 200 and status < 300: 233 | if text: 234 | try: 235 | return (True, json.loads(text)) 236 | except TypeError: 237 | pass 238 | except ValueError: 239 | pass 240 | return (True, text) 241 | elif status >= 300 and status < 400: 242 | return ( 243 | False, 244 | 'Unauthorized access, ' 245 | 'please check your credentials.') 246 | elif status >= 400 and status < 500: 247 | return (False, 'Service not found.') 248 | elif status >= 500 and status < 600: 249 | return (False, 'Server error.') 250 | else: 251 | return (False, error) 252 | 253 | def url(self, action, **kwargs): 254 | """ Construct and return the URL for a specific API service. """ 255 | # TODO : should be static method ? 256 | return self.URLS['BASE'] % self.URLS[action] % kwargs 257 | 258 | # ===================== 259 | # = General functions = 260 | # ===================== 261 | 262 | def get_user(self, username=None): 263 | """ Returns user informations. 264 | If username is not defined, tries to return own informations. 265 | """ 266 | username = username or self.username or '' 267 | url = self.url('GET_USER', username=username) 268 | response = self.dispatch('GET', url) 269 | try: 270 | return (response[0], response[1]['user']) 271 | except TypeError: 272 | pass 273 | return response 274 | 275 | def get_tags(self, repo_slug=None): 276 | """ Get a single repository on Bitbucket and return its tags.""" 277 | repo_slug = repo_slug or self.repo_slug or '' 278 | url = self.url('GET_TAGS', username=self.username, repo_slug=repo_slug) 279 | return self.dispatch('GET', url, auth=self.auth) 280 | 281 | def get_branches(self, repo_slug=None): 282 | """ Get a single repository on Bitbucket and return its branches.""" 283 | repo_slug = repo_slug or self.repo_slug or '' 284 | url = self.url('GET_BRANCHES', 285 | username=self.username, 286 | repo_slug=repo_slug) 287 | return self.dispatch('GET', url, auth=self.auth) 288 | 289 | def get_privileges(self): 290 | """ Get privledges for this user. """ 291 | url = self.url('GET_USER_PRIVILEGES') 292 | return self.dispatch('GET', url, auth=self.auth) 293 | -------------------------------------------------------------------------------- /truffleHog/regexChecks.py: -------------------------------------------------------------------------------- 1 | import re 2 | 3 | regexes_txt = { 4 | # Compnay internal domains 5 | "Internal Dev subdomain": re.compile('([a-z0-9]+[.]*[.]addyourDEVdomain[.]com)'), 6 | "Internal MGMT subdomain": re.compile('([a-z0-9]+[.]*[.]addyourMGMTdomain[.]com)'), 7 | "Internal PROD subdomain": re.compile('([a-z0-9]+[.]*[.]addyourPRODdomain[.]com)'), 8 | # Core keywords 9 | "Slack Token": re.compile('(xox(p|b|o|a)-[0-9]{12}-[0-9]{12}-[0-9]{12}-[a-z0-9]{32})'), 10 | "RSA private key": re.compile('BEGIN RSA PRIVATE KEY'), 11 | "SSH(OPENSSH) private key": re.compile('BEGIN OPENSSH PRIVATE KEY'), 12 | "SSH(DSA) private key": re.compile('BEGIN DSA PRIVATE KEY'), 13 | "SSH(EC) private key": re.compile('BEGIN EC PRIVATE KEY'), 14 | "PGP private key block": re.compile('BEGIN PGP PRIVATE KEY'), 15 | "Hacked by": re.compile('hacked by'), 16 | "Tor onion URI": re.compile('.*[.]onion'), 17 | # Credentials 18 | "Contains hardcoded username": re.compile('(?i)(db_)?(user|username)[\'|"]?\s*i?s?e?q?[!:=]+\s*[^{%()+$\\n\\r]+?.+?[\'|"]+?[^{%()+$\\n\\r]+?'), 19 | "Contains hardcoded password": re.compile('(?i)(pass|password)[\'|"]?\s*i?s?e?q?[!:=]+\s*[^{%()+$\\n\\r]+?.+?[\'|"]+?[^{%()+$\\n\\r]+?'), 20 | "Possible password leak": re.compile('\b([@a-zA-Z0-9._-]{5,})(:|\|)(.*)\b'), 21 | "Possible email leak": re.compile('[a-zA-Z0-9_.+-]+?@[a-zA-Z0-9-]+?\.[a-zA-Z0-9-.]+'), 22 | "Contains hardcoded credential": re.compile('(?i)credential[\'|"]?\s*i?s?e?q?[!:=]+\s*[^{%()+$\\n\\r]+?.+?[\'|"]+?[^{%()+$\\n\\r]+?'), 23 | "Contains hardcoded secret": re.compile('(?i)secret[\'|"]?\s*i?s?e?q?[!:=]+\s*[^{%()+$\\n\\r]+?.+?[\'|"]+?[^{%()+$\\n\\r]+?'), 24 | "Facebook Oauth": re.compile('(?i)facebook.*[\'|"][0-9a-f]{32}[\'|"]'), 25 | "Twitter Oauth": re.compile('(?i)twitter.*[\'|"][0-9a-zA-Z]{35,44}[\'|"]'), 26 | "GitHub": re.compile('(?i)github.*[[\'|"]0-9a-zA-Z]{35,40}[\'|"]'), 27 | "Google Oauth": re.compile('("(?i)client_secret":"[a-zA-Z0-9-_]{24}")'), 28 | "AWS API Key": re.compile('AKIA[0-9A-Z]{16}'), 29 | "Google API Key": re.compile('\bAIza.{35}\b'), 30 | "API Key hash32": re.compile('\b[a-fA-F\d]{32}\b'), 31 | "API Key hash64": re.compile('\b[a-fA-F\d]{64}\b'), 32 | "Heroku API Key": re.compile('[h|H][e|E][r|R][o|O][k|K][u|U].*[0-9A-F]{8}-[0-9A-F]{4}-[0-9A-F]{4}-[0-9A-F]{4}-[0-9A-F]{12}'), 33 | "Generic Secret": re.compile('(?i)secret.*[\'|"][0-9a-zA-Z]{32,45}[\'|"]'), 34 | # Database sensitive data 35 | "Database connection string": re.compile('\b(mongodb|http|https|ftp|mysql|postgresql|oracle)://(\S*):(\S*)@(\S*)\b'), 36 | "DML: GRANT ALL PRIVILEGES": re.compile('GRANT ALL PRIVILEGES'), 37 | "DML: IDENTIFIED BY": re.compile('IDENTIFIED BY'), 38 | "DML: GRANT SELECT": re.compile('GRANT SELECT'), 39 | "DML: CREATE USER": re.compile('CREATE USER'), 40 | # Php obfuscation 41 | "Php: dangerous function eval": re.compile('eval\('), 42 | "Php: dangerous function gzinflate": re.compile('gzinflate\('), 43 | "Php: dangerous function base64_decode": re.compile('base64_decode\('), 44 | "Php: dangerous function base64_decode in HEX": re.compile('\\142\\x61\\163\\145\\x36\\x34\\137\\144\\x65\\x63\\x6f\\x64\\x65'), 45 | "Php: dangerous function str_rot13": re.compile('str_rot13\(') 46 | } 47 | 48 | regexes_fs = { 49 | # Application config files 50 | "File: SQL dump file": re.compile('\S+\.sql(dump)?'), 51 | "Sequel Pro MySQL database manager bookmark file": re.compile('Favorites.plist'), 52 | "MySQL client command history file": re.compile('.?mysql_history'), 53 | "PostgreSQL client command history file": re.compile('.?psql_history'), 54 | "PostgreSQL password file": re.compile('.?pgpass'), 55 | "SSH configuration file": re.compile('.?ssh/config'), 56 | "Pidgin OTR private key": re.compile('otr.private_key'), 57 | "Shell command history file": re.compile('.?(bash_|zsh_|z)?history'), 58 | "Ruby IRB console history file": re.compile('.?irb_history'), 59 | "Recon-ng web reconnaissance framework API key database": re.compile('.?recon-ng/keys\.db'), 60 | "S3cmd configuration file": re.compile('.?s3cfg'), 61 | # "Files with private keys": re.compile('private.*key'), 62 | "AWS CLI credentials file": re.compile('.?aws/credentials'), 63 | "T command-line Twitter client configuration file": re.compile('.?trc'), 64 | "OpenVPN client configuration file": re.compile('ovpn'), 65 | "Shell configuration file": re.compile('.?(bash|zsh)rc'), 66 | "Shell profile configuration file": re.compile('.?(bash_|zsh_)?profile$'), 67 | "Shell command alias configuration file": re.compile('.?(bash_|zsh_)?aliases$'), 68 | "Ruby On Rails secret token configuration file": re.compile('secret_token\.rb$'), 69 | "Ruby On Rails database schema file": re.compile('schema\.rb$'), 70 | "Potential Ruby On Rails database configuration file": re.compile('database\.yml$'), 71 | "Django configuration file": re.compile('settings\.py$'), 72 | "PHP configuration file": re.compile('(.*)?config\.(inc|php)$'), 73 | "KeePass password manager database file": re.compile('kdb'), 74 | "1Password password manager database file": re.compile('agilekeychain'), 75 | "Apple Keychain database file": re.compile('keychain$'), 76 | "GNOME Keyring database file": re.compile('key(store|ring)$'), 77 | "Log file": re.compile('\w+\.log$'), 78 | "Network traffic capture file": re.compile('pcap$'), 79 | "GnuCash database file": re.compile('gnucash$'), 80 | "Contains word: backup": re.compile('(?!.*(py|sh|php|rb)$)(?=.*backup[^/]*)^(.*)'), 81 | "Contains word: dump": re.compile('(?!.*(py|sh|php|rb)$)(?=.*dump[^/]*)^(.*)'), 82 | "Jenkins publish over SSH plugin file": re.compile('jenkins.plugins.publish_over_ssh.BapSshPublisherPlugin.xml'), 83 | "Potential Jenkins credentials file": re.compile('credentials\.xml$'), 84 | "Apache htpasswd file": re.compile('.?htpasswd$'), 85 | "Configuration file for auto-login process": re.compile('(\.|_)?netrc$'), 86 | "KDE Wallet Manager database file": re.compile('kwallet$'), 87 | "Potential MediaWiki configuration file": re.compile('(?i)LocalSettings\.php$'), 88 | "Tunnelblick VPN configuration file": re.compile('tblk'), 89 | "Rubygems credentials file": re.compile('.?gem/credentials'), 90 | "Potential MSBuild publish profile": re.compile('.*\.pubxml(\.user)?'), 91 | "Little Snitch firewall configuration file": re.compile('configuration.user.xpl'), 92 | "Day One journal file": re.compile('dayone'), 93 | "Potential jrnl journal file": re.compile('journal.txt'), 94 | "Tugboat DigitalOcean management tool configuration": re.compile('.?tugboat'), 95 | "git-credential-store helper credentials file": re.compile('.?git-credentials'), 96 | "Git configuration file": re.compile('.?gitconfig'), 97 | "Chef Knife configuration file": re.compile('knife.rb'), 98 | "Chef private key": re.compile('.?chef/(.*)\.pem'), 99 | "cPanel backup ProFTPd credentials file": re.compile('proftpdpasswd'), 100 | "Robomongo MongoDB manager configuration file": re.compile('robomongo.json'), 101 | "FileZilla FTP configuration file": re.compile('filezilla.xml'), 102 | "FileZilla FTP recent servers file": re.compile('recentservers.xml'), 103 | "Ventrilo server configuration file": re.compile('ventrilo_srv.ini'), 104 | "Docker configuration file": re.compile('.?dockercfg'), 105 | "NPM configuration file": re.compile('.?npmrc'), 106 | "Terraform variable config file": re.compile('terraform.tfvars'), 107 | "Environment configuration file": re.compile('.?env'), 108 | "Potential cryptographic private key pem": re.compile('pem'), 109 | "Potential cryptographic private key": re.compile('private.*key(pair)?'), 110 | "Potential cryptographic key bundle ": re.compile('(pkcs12|pfx|p12|asc)'), 111 | "Private SSH key": re.compile('.*_(rsa|dsa|ed25519|ecdsa)'), 112 | "Pidgin chat client account configuration file": re.compile('.?purple/accounts\.xml'), 113 | "Hexchat/XChat IRC client server list configuration file": re.compile('.?xchat2?/servlist_?\.conf'), 114 | "Irssi IRC client configuration file": re.compile('.?irssi/config'), 115 | "DBeaver SQL database manager configuration file": re.compile('.?dbeaver-data-sources.xml'), 116 | "Mutt e-mail client configuration file": re.compile('.?muttrc'), 117 | "OmniAuth configuration file": re.compile('omniauth\.rb'), 118 | "Carrierwave configuration file": re.compile('carrierwave\.rb'), 119 | "Well, this is awkward... Gitrob configuration file": re.compile('.?gitrobrc') 120 | } 121 | 122 | regexes_txt_raw = { 123 | # Compnay internal domains 124 | "Internal DEV subdomain": '([a-z0-9]+[.]*[.]addyourDEVdomain[.]com)', 125 | "Internal MGMT subdomain": '([a-z0-9]+[.]*[.]addyourMGMTdomain[.]com)', 126 | "Internal PROD subdomain": '([a-z0-9]+[.]*[.]addyourPRODdomain[.]com)', 127 | # Core keywords 128 | "Slack Token": '(xox[p|b|o|a]-[0-9]{12}-[0-9]{12}-[0-9]{12}-[a-z0-9]{32})', 129 | "RSA private key": 'BEGIN RSA PRIVATE KEY', 130 | "SSH(OPENSSH) private key": 'BEGIN OPENSSH PRIVATE KEY', 131 | "SSH(DSA) private key": 'BEGIN DSA PRIVATE KEY', 132 | "SSH(EC) private key": 'BEGIN EC PRIVATE KEY', 133 | "PGP private key block": 'BEGIN PGP PRIVATE KEY', 134 | "Hacked by": 'hacked by', 135 | "Tor onion URI": '.*[.]onion', 136 | # Credentials 137 | "Contains word username": '(?i)(db_)?(user(name)?|usrname)[\'|"]?\s*?[:=,]+\s*[\'|"]+[^{]+?\S+?[^}][\'|"]', 138 | "Contains word: password": '(?i)pass(word)?[\'|"]?\s*?[:=,]+\s*[\'|"]+[^{]+?\S+?[^}][\'|"]', 139 | "Possible password leak": '\b([@a-zA-Z0-9._-]{5,})(:|\|)(.*)\b', 140 | "Possible email leak": '[a-zA-Z0-9_.+-]+?@[a-zA-Z0-9-]+?\.[a-zA-Z0-9-.]+', 141 | "Contains word: credential": '(?i)credential?\s*[:=,]+\s*?[\'|"].*?[\'|"]', 142 | "Contains word: secret": '(?i)sec(ret)?\s*?=\s*?[\'|"].*?[\'|"]', 143 | "Facebook Oauth": '(?i)facebook.*[\'|"][0-9a-f]{32}[\'|"]', 144 | "Twitter Oauth": '(?i)twitter.*[\'|"][0-9a-zA-Z]{35,44}[\'|"]', 145 | "GitHub": '(?i)github.*[[\'|"]0-9a-zA-Z]{35,40}[\'|"]', 146 | "Google Oauth": '("(?i)client_secret":"[a-zA-Z0-9-_]{24}")', 147 | "AWS API Key": 'AKIA[0-9A-Z]{16}', 148 | "Google API Key": '\bAIza.{35}\b', 149 | "API Key hash32": '\b[a-fA-F\d]{32}\b', 150 | "API Key hash64": '\b[a-fA-F\d]{64}\b', 151 | "Heroku API Key": '[h|H][e|E][r|R][o|O][k|K][u|U].*[0-9A-F]{8}-[0-9A-F]{4}-[0-9A-F]{4}-[0-9A-F]{4}-[0-9A-F]{12}', 152 | "Generic Secret": '(?i)secret.*[\'|"][0-9a-zA-Z]{32,45}[\'|"]', 153 | # Database sensitive data 154 | "Database connection string": '\b(mongodb|http|https|ftp|mysql|postgresql|oracle):\/\/(\S*):(\S*)@(\S*)\b', 155 | "DML: GRANT ALL PRIVILEGES": 'GRANT ALL PRIVILEGES', 156 | "DML: IDENTIFIED BY": 'IDENTIFIED BY', 157 | "DML: GRANT SELECT": 'GRANT SELECT', 158 | "DML: CREATE USER": 'CREATE USER', 159 | # Php obfuscation 160 | "Php: dangerous function eval": 'eval\(', 161 | "Php: dangerous function gzinflate": 'gzinflate\(', 162 | "Php: dangerous function base64_decode": 'base64_decode\(', 163 | "Php: dangerous function base64_decode HEX": '\\142\\x61\\163\\145\\x36\\x34\\137\\144\\x65\\x63\\x6f\\x64\\x65', 164 | "Php: dangerous function str_rot13": 'str_rot13\(' 165 | } 166 | 167 | regexes_fs_raw = { 168 | # Application config files 169 | "File: SQL dump file": 'sql(dump)?', 170 | "Sequel Pro MySQL database manager bookmark file": 'Favorites.plist', 171 | "MySQL client command history file": '.?mysql_history', 172 | "PostgreSQL client command history file": '.?psql_history', 173 | "PostgreSQL password file": '.?pgpass', 174 | "SSH configuration file": '.?ssh/config', 175 | "Pidgin OTR private key": 'otr.private_key', 176 | "Shell command history file": '.?(bash_|zsh_|z)?history', 177 | "Ruby IRB console history file": '.?irb_history', 178 | "Recon-ng web reconnaissance framework API key database": '.?recon-ng\/keys\.db', 179 | "S3cmd configuration file": '.?s3cfg', 180 | # "Files with private keys": 'private.*key', 181 | "AWS CLI credentials file": '.?aws/credentials', 182 | "T command-line Twitter client configuration file": '.?trc', 183 | "OpenVPN client configuration file": 'ovpn', 184 | "Shell configuration file": '.?(bash|zsh)rc', 185 | "Shell profile configuration file": '.?(bash_|zsh_)?profile', 186 | "Shell command alias configuration file": '.?(bash_|zsh_)?aliases', 187 | "Ruby On Rails secret token configuration file": 'secret_token\.rb', 188 | "Ruby On Rails database schema file": 'schema\.rb', 189 | "Potential Ruby On Rails database configuration file": 'database\.yml', 190 | "Django configuration file": 'settings\.py', 191 | "PHP configuration file": '(.*)?config(\.inc)?\.php', 192 | "KeePass password manager database file": 'kdb', 193 | "1Password password manager database file": 'agilekeychain', 194 | "Apple Keychain database file": 'keychain', 195 | "GNOME Keyring database file": 'key(store|ring)', 196 | "Log file": 'log', 197 | "Network traffic capture file": 'pcap', 198 | "GnuCash database file": 'gnucash', 199 | "Contains word: backup": 'backup', 200 | "Contains word: dump": 'dump', 201 | "Jenkins publish over SSH plugin file": 'jenkins.plugins.publish_over_ssh.BapSshPublisherPlugin.xml', 202 | "Potential Jenkins credentials file": 'credentials.xml', 203 | "Apache htpasswd file": '.?htpasswd', 204 | "Configuration file for auto-login process": '(\.|_)?netrc', 205 | "KDE Wallet Manager database file": 'kwallet', 206 | "Potential MediaWiki configuration file": 'LocalSettings\.php', 207 | "Tunnelblick VPN configuration file": 'tblk', 208 | "Rubygems credentials file": '.?gem/credentials', 209 | "Potential MSBuild publish profile": '.*\.pubxml(\.user)?', 210 | "Little Snitch firewall configuration file": 'configuration.user.xpl', 211 | "Day One journal file": 'dayone', 212 | "Potential jrnl journal file": 'journal.txt', 213 | "Tugboat DigitalOcean management tool configuration": '.?tugboat', 214 | "git-credential-store helper credentials file": '.?git-credentials', 215 | "Git configuration file": '.?gitconfig', 216 | "Chef Knife configuration file": 'knife.rb', 217 | "Chef private key": '.?chef/(.*)\.pem', 218 | "cPanel backup ProFTPd credentials file": 'proftpdpasswd', 219 | "Robomongo MongoDB manager configuration file": 'robomongo.json', 220 | "FileZilla FTP configuration file": 'filezilla.xml', 221 | "FileZilla FTP recent servers file": 'recentservers.xml', 222 | "Ventrilo server configuration file": 'ventrilo_srv.ini', 223 | "Docker configuration file": '.?dockercfg', 224 | "NPM configuration file": '.?npmrc', 225 | "Terraform variable config file": 'terraform.tfvars', 226 | "Environment configuration file": '.?env', 227 | "Potential cryptographic private key pem": 'pem', 228 | "Potential cryptographic private key": 'private.*key(pair)?', 229 | "Potential cryptographic key bundle": '(pkcs12|pfx|p12|asc)', 230 | "Private SSH key": '.*_(rsa|dsa|ed25519|ecdsa)', 231 | "Pidgin chat client account configuration file": '.?purple\/accounts\.xml', 232 | "Hexchat/XChat IRC client server list configuration file": '.?xchat2?\/servlist_?\.conf', 233 | "Irssi IRC client configuration file": '.?irssi\/config', 234 | "DBeaver SQL database manager configuration file": '.?dbeaver-data-sources.xml', 235 | "Mutt e-mail client configuration file": '.?muttrc', 236 | "OmniAuth configuration file": 'omniauth\.rb', 237 | "Carrierwave configuration file": 'carrierwave\.rb', 238 | "Well, this is awkward... Gitrob configuration file": '.?gitrobrc' 239 | } 240 | -------------------------------------------------------------------------------- /truffleHog/truffleHog.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | # -*- coding: utf-8 -*- 3 | import sys 4 | import math 5 | import datetime 6 | import argparse 7 | import os 8 | import json 9 | import stat 10 | import time 11 | import re 12 | from regexChecks import regexes_txt, regexes_fs 13 | from git import Repo 14 | 15 | BASE64_CHARS = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/=" 16 | HEX_CHARS = "1234567890abcdefABCDEF" 17 | 18 | # Get current date 19 | CURR_TIME = time.strftime("%Y-%m-%d %H:%M:%S", time.gmtime()) 20 | 21 | 22 | def str2bool(v): 23 | if not v: 24 | return True 25 | if v.lower() in ('yes', 'true', 't', 'y', '1'): 26 | return True 27 | elif v.lower() in ('no', 'false', 'f', 'n', '0'): 28 | return False 29 | else: 30 | raise argparse.ArgumentTypeError('Boolean value expected.') 31 | 32 | 33 | def del_rw(action, name, exc): 34 | os.chmod(name, stat.S_IWRITE) 35 | os.remove(name) 36 | 37 | 38 | def shannon_entropy(data, iterator): 39 | """ 40 | Borrowed from http://blog.dkbza.org/2007/05/scanning-data-for-entropy-anomalies.html 41 | """ 42 | if not data: 43 | return 0 44 | entropy = 0 45 | for x in iterator: 46 | p_x = float(data.count(x))/len(data) 47 | if p_x > 0: 48 | entropy += - p_x*math.log(p_x, 2) 49 | return entropy 50 | 51 | 52 | def get_strings_of_set(word, char_set, threshold=20): 53 | count = 0 54 | letters = "" 55 | strings = [] 56 | for char in word: 57 | if char in char_set: 58 | letters += char 59 | count += 1 60 | else: 61 | if count > threshold: 62 | strings.append(letters) 63 | letters = "" 64 | count = 0 65 | if count > threshold: 66 | strings.append(letters) 67 | return strings 68 | 69 | 70 | class bcolors: 71 | HEADER = '\033[95m' 72 | OKBLUE = '\033[94m' 73 | OKGREEN = '\033[92m' 74 | WARNING = '\033[93m' 75 | FAIL = '\033[91m' 76 | ENDC = '\033[0m' 77 | BOLD = '\033[1m' 78 | UNDERLINE = '\033[4m' 79 | 80 | 81 | def print_results(printJson, issue): 82 | commit_time = issue['date'] 83 | branch_name = issue['branch'] 84 | prev_commit = issue['commit'] 85 | printableDiff = issue['printDiff'] 86 | commitHash = issue['commitHash'] 87 | reason = issue['reason'] 88 | path = issue['path'] 89 | 90 | if printJson: 91 | print(json.dumps(issue, sort_keys=True, indent=4)) 92 | else: 93 | print("~~~~~~~~~~~~~~~~~~~~~") 94 | reason = "{}Reason: {}{}".format(bcolors.OKGREEN, reason, bcolors.ENDC) 95 | print(reason) 96 | dateStr = "{}Date: {}{}".format(bcolors.OKGREEN, commit_time, bcolors.ENDC) 97 | print(dateStr) 98 | hashStr = "{}Hash: {}{}".format(bcolors.OKGREEN, commitHash, bcolors.ENDC) 99 | print(hashStr) 100 | filePath = "{}Filepath: {}{}".format(bcolors.OKGREEN, path, bcolors.ENDC) 101 | print(filePath) 102 | 103 | if sys.version_info >= (3, 0): 104 | branchStr = "{}Branch: {}{}".format(bcolors.OKGREEN, branch_name, bcolors.ENDC) 105 | print(branchStr) 106 | commitStr = "{}Commit: {}{}".format(bcolors.OKGREEN, prev_commit, bcolors.ENDC) 107 | print(commitStr) 108 | print(printableDiff) 109 | else: 110 | branchStr = "{}Branch: {}{}".format(bcolors.OKGREEN, branch_name.encode('utf-8'), bcolors.ENDC) 111 | print(branchStr) 112 | commitStr = "{}Commit: {}{}".format(bcolors.OKGREEN, prev_commit.encode('utf-8'), bcolors.ENDC) 113 | print(commitStr) 114 | print(printableDiff.encode('utf-8')) 115 | print("~~~~~~~~~~~~~~~~~~~~~") 116 | 117 | 118 | def find_entropy(printableDiff, commit_time, branch_name, prev_commit, blob, commitHash, git_url, json_repos): 119 | stringsFound = [] 120 | lines = printableDiff.split("\n") 121 | for line in lines: 122 | for word in line.split(): 123 | base64_strings = get_strings_of_set(word, BASE64_CHARS) 124 | hex_strings = get_strings_of_set(word, HEX_CHARS) 125 | for string in base64_strings: 126 | b64Entropy = shannon_entropy(string, BASE64_CHARS) 127 | if b64Entropy > 4.5: 128 | stringsFound.append(string) 129 | printableDiff = printableDiff.replace(string, bcolors.WARNING + string + bcolors.ENDC) 130 | for string in hex_strings: 131 | hexEntropy = shannon_entropy(string, HEX_CHARS) 132 | if hexEntropy > 3: 133 | stringsFound.append(string) 134 | printableDiff = printableDiff.replace(string, bcolors.WARNING + string + bcolors.ENDC) 135 | entropicDiff = None 136 | if len(stringsFound) > 0: 137 | entropicDiff = {} 138 | entropicDiff['gitUrl'] = git_url 139 | entropicDiff['gitSlug'] = json_repos[git_url]['slug'] 140 | entropicDiff['project'] = json_repos[git_url]['project'] 141 | entropicDiff['projectName'] = json_repos[git_url]['project_name'] 142 | entropicDiff['language'] = json_repos[git_url]['language'] 143 | entropicDiff['date'] = commit_time 144 | entropicDiff['creation_date'] = CURR_TIME 145 | entropicDiff['path'] = blob.b_path if blob.b_path else blob.a_path 146 | entropicDiff['author'] = prev_commit.committer if prev_commit.committer else prev_commit.author.email 147 | entropicDiff['branch'] = branch_name 148 | entropicDiff['type'] = 'Entropy' 149 | entropicDiff['commit'] = prev_commit.message 150 | entropicDiff['diff'] = blob.diff.decode('utf-8', errors='replace') 151 | entropicDiff['stringsFound'] = stringsFound 152 | entropicDiff['printDiff'] = printableDiff 153 | entropicDiff['commitHash'] = commitHash 154 | entropicDiff['reason'] = "High Entropy" 155 | return entropicDiff 156 | 157 | 158 | def idx_bound_verification(bound, idx, printableDiff): 159 | """ 160 | Check if expanded boundaries in git diff are True 161 | :param bound: 162 | :param idx: 163 | :param printableDiff: 164 | :return: 165 | """ 166 | lower_idx, upper_idx = (index - bound for index in idx) 167 | lower_boundary, upper_boundary = False, False 168 | while not lower_boundary: 169 | try: 170 | printableDiff[lower_idx] 171 | lower_boundary = True 172 | except ValueError: 173 | lower_idx += 1 174 | while not upper_boundary: 175 | try: 176 | printableDiff[upper_idx] 177 | upper_boundary = True 178 | except ValueError: 179 | upper_idx -= 1 180 | return lower_idx, upper_idx 181 | 182 | 183 | def regex_txt_check(printableDiff, commit_time, branch_name, prev_commit, blob, commitHash, git_url, json_repos): 184 | regex_matches = [] 185 | # Set bound for expanded code match in git diff 186 | bound = 30 187 | for key in regexes_txt.keys(): 188 | found_strings_search = regexes_txt[key].search(printableDiff) 189 | 190 | # for found_string in found_strings: 191 | # found_diff += bcolors.WARNING + str(found_string) + bcolors.ENDC + '\n' 192 | # for found_string_exp in found_strings_expand: 193 | # found_diff_exp += bcolors.OKGREEN + str(found_string_exp) + bcolors.ENDC + '\n' 194 | # if regexes_txt[key].group: 195 | if found_strings_search: 196 | # found_strings, found_strings_exp, found_strings_clr = '', '', '' 197 | idx = found_strings_search.regs[0] 198 | found_string = re.sub(r'(\r|\n)', '', str(printableDiff[idx[0]:idx[1]])) 199 | # found_strings += found_string 200 | # found_strings_clr += bcolors.WARNING + found_string + bcolors.ENDC 201 | lower_idx, upper_idx = idx_bound_verification(bound, idx, printableDiff) 202 | found_string_exp = re.sub(r'(\r|\n)', '', str(printableDiff[lower_idx:upper_idx])) 203 | # found_strings_exp += found_string_exp 204 | 205 | found_regex = {} 206 | found_regex['gitUrl'] = git_url 207 | found_regex['gitSlug'] = json_repos[git_url]['slug'] 208 | found_regex['project'] = json_repos[git_url]['project'] 209 | found_regex['projectName'] = json_repos[git_url]['project_name'] 210 | found_regex['language'] = json_repos[git_url]['language'] 211 | found_regex['commit_date'] = commit_time 212 | found_regex['audit_date'] = CURR_TIME 213 | try: 214 | found_regex['path'] = blob.a_blob.abspath if blob.a_blob.abspath else blob.a_path 215 | except AttributeError: 216 | found_regex['path'] = blob.b_blob.abspath if blob.b_blob.abspath else blob.abspath 217 | found_regex['branch'] = branch_name 218 | found_regex['commit'] = re.sub(r'(\r|\n)', '', prev_commit.message) 219 | found_regex['author'] = prev_commit.committer.name if prev_commit.committer.name else prev_commit.author.email 220 | diff = re.compile('(^.+?)\n').findall(printableDiff) 221 | found_regex['diff'] = "Diff details: " + str(diff) + '\nMatched string in diff context:\n' + \ 222 | "-----begin omitted-----\n" + found_string_exp + "\n-----end omitted-----", 223 | found_regex['type'] = 'MatchStringInDiff' 224 | found_regex['stringsFound'] = found_string 225 | # found_regex['printDiff'] = '' 226 | found_regex['reason'] = key 227 | found_regex['commitHash'] = commitHash 228 | regex_matches.append(found_regex) 229 | return regex_matches 230 | 231 | 232 | def regex_fs_check_tree(commit_time, branch_name, prev_commit, commitHash, git_url, json_repos): 233 | regex_matches = [] 234 | for file_git in prev_commit.tree.blobs: 235 | for key in regexes_fs: 236 | repo_path = file_git.abspath.split("/repos/")[-1] 237 | found_strings = regexes_fs[key].search(repo_path) 238 | if found_strings: 239 | # found_strings, found_strings_exp, found_strings_clr = '', '', '' 240 | for idx in found_strings.regs: 241 | found_string = re.sub(r'(\r|\n)', '', str(repo_path[idx[0]:idx[1]])) 242 | 243 | found_regex = {} 244 | found_regex['gitUrl'] = git_url 245 | found_regex['gitSlug'] = json_repos[git_url]['slug'] 246 | found_regex['project'] = json_repos[git_url]['project'] 247 | found_regex['projectName'] = json_repos[git_url]['project_name'] 248 | found_regex['language'] = json_repos[git_url]['language'] 249 | found_regex['commit_date'] = commit_time 250 | found_regex['audit_date'] = CURR_TIME 251 | found_regex['path'] = repo_path 252 | found_regex['branch'] = branch_name 253 | found_regex['author'] = prev_commit.committer.name if prev_commit.committer.name else prev_commit.author.email 254 | found_regex['commit'] = re.sub(r'(\r|\n)', '', prev_commit.message) 255 | found_regex['diff'] = '' 256 | found_regex['type'] = 'MatchInFilename' 257 | found_regex['stringsFound'] = found_string 258 | found_regex['reason'] = key 259 | found_regex['commitHash'] = commitHash 260 | regex_matches.append(found_regex) 261 | return regex_matches 262 | 263 | 264 | # def searchSensitiveFilesInRepo(project_path, git_url, json_repos): 265 | # """ 266 | # Deprecated function 267 | # :param project_path: 268 | # :param git_url: 269 | # :param json_repos: 270 | # :return: 271 | # """ 272 | # fs_objects = os.listdir(project_path) 273 | # # repo = Repo(project_path) 274 | # # changed = [item.a_path for item in repo.index.diff(None) ] 275 | # foundIssues = [] 276 | # for fs_object in fs_objects: 277 | # found_regexes = regex_fs_check_tree(fs_object, git_url, json_repos) 278 | # foundIssues += found_regexes 279 | # return foundIssues 280 | 281 | 282 | def find_strings(project_path, git_url, json_repos, since_commit=None, max_depth=None, do_regex=False, do_entropy=True): 283 | """ 284 | Serch sensitive data in git commit diffs 285 | :param project_path: string 286 | :param git_url: string 287 | :param json_repos: dictionary 288 | :param since_commit: integer 289 | :param max_depth: integer 290 | :param do_regex: boolean 291 | :param do_entropy: boolean 292 | :return: dictionary 293 | """ 294 | repo = Repo(project_path) 295 | already_searched = set() 296 | 297 | found_issues = [] 298 | for remote_branch in repo.remotes.origin.fetch(): 299 | since_commit_reached = False 300 | branch_name = remote_branch.name.split('/')[1] 301 | try: 302 | repo.git.checkout(remote_branch, b=branch_name) 303 | except: 304 | pass 305 | prev_commit = None 306 | for curr_commit in repo.iter_commits(max_count=max_depth): 307 | commitHash = curr_commit.hexsha 308 | if commitHash == since_commit: 309 | since_commit_reached = True 310 | if since_commit and since_commit_reached: 311 | prev_commit = curr_commit 312 | continue 313 | if not prev_commit: 314 | pass 315 | else: 316 | # Avoid searching the same diffs 317 | hashes = str(prev_commit) + str(curr_commit) 318 | if hashes in already_searched: 319 | prev_commit = curr_commit 320 | continue 321 | already_searched.add(hashes) 322 | 323 | diff = prev_commit.diff(curr_commit, create_patch=True) 324 | for blob in diff: 325 | printableDiff = blob.diff.decode('utf-8', errors='replace') 326 | if printableDiff.startswith("Binary files"): 327 | continue 328 | commit_time = datetime.datetime.fromtimestamp(prev_commit.committed_date).strftime('%Y-%m-%d %H:%M:%S') 329 | foundIssues = [] 330 | if do_entropy: 331 | entropicDiff = find_entropy(printableDiff, commit_time, branch_name, prev_commit, blob, 332 | commitHash, git_url, json_repos) 333 | if entropicDiff: 334 | foundIssues.append(entropicDiff) 335 | if do_regex: 336 | found_regexes = regex_txt_check(printableDiff, commit_time, branch_name, prev_commit, blob, 337 | commitHash, git_url, json_repos) 338 | foundIssues += found_regexes 339 | found_files = regex_fs_check_tree(commit_time, branch_name, prev_commit, commitHash, git_url, 340 | json_repos) 341 | foundIssues += found_files 342 | 343 | for foundIssue in foundIssues: 344 | # print_results(printJson, foundIssue) 345 | # print("Issue is ", foundIssue) 346 | found_issues.append(foundIssue) 347 | 348 | prev_commit = curr_commit 349 | # output["project_path"] = project_path 350 | # shutil.rmtree(project_path, onerror=del_rw) 351 | return found_issues 352 | 353 | -------------------------------------------------------------------------------- /scripts/gitrob-signatures.txt: -------------------------------------------------------------------------------- 1 | [ 2 | { 3 | "part": "filename", 4 | "type": "regex", 5 | "pattern": "\\A.*_rsa\\z", 6 | "caption": "Private SSH key", 7 | "description": null 8 | }, 9 | { 10 | "part": "filename", 11 | "type": "regex", 12 | "pattern": "\\A.*_dsa\\z", 13 | "caption": "Private SSH key", 14 | "description": null 15 | }, 16 | { 17 | "part": "filename", 18 | "type": "regex", 19 | "pattern": "\\A.*_ed25519\\z", 20 | "caption": "Private SSH key", 21 | "description": null 22 | }, 23 | { 24 | "part": "filename", 25 | "type": "regex", 26 | "pattern": "\\A.*_ecdsa\\z", 27 | "caption": "Private SSH key", 28 | "description": null 29 | }, 30 | { 31 | "part": "path", 32 | "type": "regex", 33 | "pattern": "\\.?ssh/config\\z", 34 | "caption": "SSH configuration file", 35 | "description": null 36 | }, 37 | { 38 | "part": "extension", 39 | "type": "match", 40 | "pattern": "pem", 41 | "caption": "Potential cryptographic private key", 42 | "description": null 43 | }, 44 | { 45 | "part": "extension", 46 | "type": "regex", 47 | "pattern": "\\Akey(pair)?\\z", 48 | "caption": "Potential cryptographic private key", 49 | "description": null 50 | }, 51 | { 52 | "part": "extension", 53 | "type": "match", 54 | "pattern": "pkcs12", 55 | "caption": "Potential cryptographic key bundle", 56 | "description": null 57 | }, 58 | { 59 | "part": "extension", 60 | "type": "match", 61 | "pattern": "pfx", 62 | "caption": "Potential cryptographic key bundle", 63 | "description": null 64 | }, 65 | { 66 | "part": "extension", 67 | "type": "match", 68 | "pattern": "p12", 69 | "caption": "Potential cryptographic key bundle", 70 | "description": null 71 | }, 72 | { 73 | "part": "extension", 74 | "type": "match", 75 | "pattern": "asc", 76 | "caption": "Potential cryptographic key bundle", 77 | "description": null 78 | }, 79 | { 80 | "part": "filename", 81 | "type": "match", 82 | "pattern": "otr.private_key", 83 | "caption": "Pidgin OTR private key", 84 | "description": null 85 | }, 86 | { 87 | "part": "filename", 88 | "type": "regex", 89 | "pattern": "\\A\\.?(bash_|zsh_|z)?history\\z", 90 | "caption": "Shell command history file", 91 | "description": null 92 | }, 93 | { 94 | "part": "filename", 95 | "type": "regex", 96 | "pattern": "\\A\\.?mysql_history\\z", 97 | "caption": "MySQL client command history file", 98 | "description": null 99 | }, 100 | { 101 | "part": "filename", 102 | "type": "regex", 103 | "pattern": "\\A\\.?psql_history\\z", 104 | "caption": "PostgreSQL client command history file", 105 | "description": null 106 | }, 107 | { 108 | "part": "filename", 109 | "type": "regex", 110 | "pattern": "\\A\\.?pgpass\\z", 111 | "caption": "PostgreSQL password file", 112 | "description": null 113 | }, 114 | { 115 | "part": "filename", 116 | "type": "regex", 117 | "pattern": "\\A\\.?irb_history\\z", 118 | "caption": "Ruby IRB console history file", 119 | "description": null 120 | }, 121 | { 122 | "part": "path", 123 | "type": "regex", 124 | "pattern": "\\.?purple\\/accounts\\.xml\\z", 125 | "caption": "Pidgin chat client account configuration file", 126 | "description": null 127 | }, 128 | { 129 | "part": "path", 130 | "type": "regex", 131 | "pattern": "\\.?xchat2?\\/servlist_?\\.conf\\z", 132 | "caption": "Hexchat/XChat IRC client server list configuration file", 133 | "description": null 134 | }, 135 | { 136 | "part": "path", 137 | "type": "regex", 138 | "pattern": "\\.?irssi\\/config\\z", 139 | "caption": "Irssi IRC client configuration file", 140 | "description": null 141 | }, 142 | { 143 | "part": "path", 144 | "type": "regex", 145 | "pattern": "\\.?recon-ng\\/keys\\.db\\z", 146 | "caption": "Recon-ng web reconnaissance framework API key database", 147 | "description": null 148 | }, 149 | { 150 | "part": "filename", 151 | "type": "regex", 152 | "pattern": "\\A\\.?dbeaver-data-sources.xml\\z", 153 | "caption": "DBeaver SQL database manager configuration file", 154 | "description": null 155 | }, 156 | { 157 | "part": "filename", 158 | "type": "regex", 159 | "pattern": "\\A\\.?muttrc\\z", 160 | "caption": "Mutt e-mail client configuration file", 161 | "description": null 162 | }, 163 | { 164 | "part": "filename", 165 | "type": "regex", 166 | "pattern": "\\A\\.?s3cfg\\z", 167 | "caption": "S3cmd configuration file", 168 | "description": null 169 | }, 170 | { 171 | "part": "path", 172 | "type": "regex", 173 | "pattern": "\\.?aws/credentials\\z", 174 | "caption": "AWS CLI credentials file", 175 | "description": null 176 | }, 177 | { 178 | "part": "filename", 179 | "type": "regex", 180 | "pattern": "\\A\\.?trc\\z", 181 | "caption": "T command-line Twitter client configuration file", 182 | "description": null 183 | }, 184 | { 185 | "part": "extension", 186 | "type": "match", 187 | "pattern": "ovpn", 188 | "caption": "OpenVPN client configuration file", 189 | "description": null 190 | }, 191 | { 192 | "part": "filename", 193 | "type": "regex", 194 | "pattern": "\\A\\.?gitrobrc\\z", 195 | "caption": "Well, this is awkward... Gitrob configuration file", 196 | "description": null 197 | }, 198 | { 199 | "part": "filename", 200 | "type": "regex", 201 | "pattern": "\\A\\.?(bash|zsh)rc\\z", 202 | "caption": "Shell configuration file", 203 | "description": "Shell configuration files might contain information such as server hostnames, passwords and API keys." 204 | }, 205 | { 206 | "part": "filename", 207 | "type": "regex", 208 | "pattern": "\\A\\.?(bash_|zsh_)?profile\\z", 209 | "caption": "Shell profile configuration file", 210 | "description": "Shell configuration files might contain information such as server hostnames, passwords and API keys." 211 | }, 212 | { 213 | "part": "filename", 214 | "type": "regex", 215 | "pattern": "\\A\\.?(bash_|zsh_)?aliases\\z", 216 | "caption": "Shell command alias configuration file", 217 | "description": "Shell configuration files might contain information such as server hostnames, passwords and API keys." 218 | }, 219 | { 220 | "part": "filename", 221 | "type": "match", 222 | "pattern": "secret_token.rb", 223 | "caption": "Ruby On Rails secret token configuration file", 224 | "description": "If the Rails secret token is known, it can allow for remote code execution. (http://www.exploit-db.com/exploits/27527/)" 225 | }, 226 | { 227 | "part": "filename", 228 | "type": "match", 229 | "pattern": "omniauth.rb", 230 | "caption": "OmniAuth configuration file", 231 | "description": "The OmniAuth configuration file might contain client application secrets." 232 | }, 233 | { 234 | "part": "filename", 235 | "type": "match", 236 | "pattern": "carrierwave.rb", 237 | "caption": "Carrierwave configuration file", 238 | "description": "Can contain credentials for online storage systems such as Amazon S3 and Google Storage." 239 | }, 240 | { 241 | "part": "filename", 242 | "type": "match", 243 | "pattern": "schema.rb", 244 | "caption": "Ruby On Rails database schema file", 245 | "description": "Contains information on the database schema of a Ruby On Rails application." 246 | }, 247 | { 248 | "part": "filename", 249 | "type": "match", 250 | "pattern": "database.yml", 251 | "caption": "Potential Ruby On Rails database configuration file", 252 | "description": "Might contain database credentials." 253 | }, 254 | { 255 | "part": "filename", 256 | "type": "match", 257 | "pattern": "settings.py", 258 | "caption": "Django configuration file", 259 | "description": "Might contain database credentials, online storage system credentials, secret keys, etc." 260 | }, 261 | { 262 | "part": "filename", 263 | "type": "regex", 264 | "pattern": "\\A(.*)?config(\\.inc)?\\.php\\z", 265 | "caption": "PHP configuration file", 266 | "description": "Might contain credentials and keys." 267 | }, 268 | { 269 | "part": "extension", 270 | "type": "match", 271 | "pattern": "kdb", 272 | "caption": "KeePass password manager database file", 273 | "description": null 274 | }, 275 | { 276 | "part": "extension", 277 | "type": "match", 278 | "pattern": "agilekeychain", 279 | "caption": "1Password password manager database file", 280 | "description": null 281 | }, 282 | { 283 | "part": "extension", 284 | "type": "match", 285 | "pattern": "keychain", 286 | "caption": "Apple Keychain database file", 287 | "description": null 288 | }, 289 | { 290 | "part": "extension", 291 | "type": "regex", 292 | "pattern": "\\Akey(store|ring)\\z", 293 | "caption": "GNOME Keyring database file", 294 | "description": null 295 | }, 296 | { 297 | "part": "extension", 298 | "type": "match", 299 | "pattern": "log", 300 | "caption": "Log file", 301 | "description": "Log files might contain information such as references to secret HTTP endpoints, session IDs, user information, passwords and API keys." 302 | }, 303 | { 304 | "part": "extension", 305 | "type": "match", 306 | "pattern": "pcap", 307 | "caption": "Network traffic capture file", 308 | "description": null 309 | }, 310 | { 311 | "part": "extension", 312 | "type": "regex", 313 | "pattern": "\\Asql(dump)?\\z", 314 | "caption": "SQL dump file", 315 | "description": null 316 | }, 317 | { 318 | "part": "extension", 319 | "type": "match", 320 | "pattern": "gnucash", 321 | "caption": "GnuCash database file", 322 | "description": null 323 | }, 324 | { 325 | "part": "filename", 326 | "type": "regex", 327 | "pattern": "backup", 328 | "caption": "Contains word: backup", 329 | "description": null 330 | }, 331 | { 332 | "part": "filename", 333 | "type": "regex", 334 | "pattern": "dump", 335 | "caption": "Contains word: dump", 336 | "description": null 337 | }, 338 | { 339 | "part": "filename", 340 | "type": "regex", 341 | "pattern": "password", 342 | "caption": "Contains word: password", 343 | "description": null 344 | }, 345 | { 346 | "part": "filename", 347 | "type": "regex", 348 | "pattern": "credential", 349 | "caption": "Contains word: credential", 350 | "description": null 351 | }, 352 | { 353 | "part": "filename", 354 | "type": "regex", 355 | "pattern": "secret", 356 | "caption": "Contains word: secret", 357 | "description": null 358 | }, 359 | { 360 | "part": "filename", 361 | "type": "regex", 362 | "pattern": "private.*key", 363 | "caption": "Contains words: private, key", 364 | "description": null 365 | }, 366 | { 367 | "part": "filename", 368 | "type": "match", 369 | "pattern": "jenkins.plugins.publish_over_ssh.BapSshPublisherPlugin.xml", 370 | "caption": "Jenkins publish over SSH plugin file", 371 | "description": null 372 | }, 373 | { 374 | "part": "filename", 375 | "type": "match", 376 | "pattern": "credentials.xml", 377 | "caption": "Potential Jenkins credentials file", 378 | "description": null 379 | }, 380 | { 381 | "part": "filename", 382 | "type": "regex", 383 | "pattern": "\\A\\.?htpasswd\\z", 384 | "caption": "Apache htpasswd file", 385 | "description": null 386 | }, 387 | { 388 | "part": "filename", 389 | "type": "regex", 390 | "pattern": "\\A(\\.|_)?netrc\\z", 391 | "caption": "Configuration file for auto-login process", 392 | "description": "Might contain username and password." 393 | }, 394 | { 395 | "part": "extension", 396 | "type": "match", 397 | "pattern": "kwallet", 398 | "caption": "KDE Wallet Manager database file", 399 | "description": null 400 | }, 401 | { 402 | "part": "filename", 403 | "type": "match", 404 | "pattern": "LocalSettings.php", 405 | "caption": "Potential MediaWiki configuration file", 406 | "description": null 407 | }, 408 | { 409 | "part": "extension", 410 | "type": "match", 411 | "pattern": "tblk", 412 | "caption": "Tunnelblick VPN configuration file", 413 | "description": null 414 | }, 415 | { 416 | "part": "path", 417 | "type": "regex", 418 | "pattern": "\\.?gem/credentials\\z", 419 | "caption": "Rubygems credentials file", 420 | "description": "Might contain API key for a rubygems.org account." 421 | }, 422 | { 423 | "part": "filename", 424 | "type": "regex", 425 | "pattern": "\\A*\\.pubxml(\\.user)?\\z", 426 | "caption": "Potential MSBuild publish profile", 427 | "description": null 428 | }, 429 | { 430 | "part": "filename", 431 | "type": "match", 432 | "pattern": "Favorites.plist", 433 | "caption": "Sequel Pro MySQL database manager bookmark file", 434 | "description": null 435 | }, 436 | { 437 | "part": "filename", 438 | "type": "match", 439 | "pattern": "configuration.user.xpl", 440 | "caption": "Little Snitch firewall configuration file", 441 | "description": "Contains traffic rules for applications" 442 | }, 443 | { 444 | "part": "extension", 445 | "type": "match", 446 | "pattern": "dayone", 447 | "caption": "Day One journal file", 448 | "description": null 449 | }, 450 | { 451 | "part": "filename", 452 | "type": "match", 453 | "pattern": "journal.txt", 454 | "caption": "Potential jrnl journal file", 455 | "description": null 456 | }, 457 | { 458 | "part": "filename", 459 | "type": "regex", 460 | "pattern": "\\A\\.?tugboat\\z", 461 | "caption": "Tugboat DigitalOcean management tool configuration", 462 | "description": null 463 | }, 464 | { 465 | "part": "filename", 466 | "type": "regex", 467 | "pattern": "\\A\\.?git-credentials\\z", 468 | "caption": "git-credential-store helper credentials file", 469 | "description": null 470 | }, 471 | { 472 | "part": "filename", 473 | "type": "regex", 474 | "pattern": "\\A\\.?gitconfig\\z", 475 | "caption": "Git configuration file", 476 | "description": null 477 | }, 478 | { 479 | "part": "filename", 480 | "type": "match", 481 | "pattern": "knife.rb", 482 | "caption": "Chef Knife configuration file", 483 | "description": "Might contain references to Chef servers" 484 | }, 485 | { 486 | "part": "path", 487 | "type": "regex", 488 | "pattern": "\\.?chef/(.*)\\.pem\\z", 489 | "caption": "Chef private key", 490 | "description": "Can be used to authenticate against Chef servers" 491 | }, 492 | { 493 | "part": "filename", 494 | "type": "match", 495 | "pattern": "proftpdpasswd", 496 | "caption": "cPanel backup ProFTPd credentials file", 497 | "description": "Contains usernames and password hashes for FTP accounts" 498 | }, 499 | { 500 | "part": "filename", 501 | "type": "match", 502 | "pattern": "robomongo.json", 503 | "caption": "Robomongo MongoDB manager configuration file", 504 | "description": "Might contain credentials for MongoDB databases" 505 | }, 506 | { 507 | "part": "filename", 508 | "type": "match", 509 | "pattern": "filezilla.xml", 510 | "caption": "FileZilla FTP configuration file", 511 | "description": "Might contain credentials for FTP servers" 512 | }, 513 | { 514 | "part": "filename", 515 | "type": "match", 516 | "pattern": "recentservers.xml", 517 | "caption": "FileZilla FTP recent servers file", 518 | "description": "Might contain credentials for FTP servers" 519 | }, 520 | { 521 | "part": "filename", 522 | "type": "match", 523 | "pattern": "ventrilo_srv.ini", 524 | "caption": "Ventrilo server configuration file", 525 | "description": "Might contain passwords" 526 | }, 527 | { 528 | "part": "filename", 529 | "type": "regex", 530 | "pattern": "\\A\\.?dockercfg\\z", 531 | "caption": "Docker configuration file", 532 | "description": "Might contain credentials for public or private Docker registries" 533 | }, 534 | { 535 | "part": "filename", 536 | "type": "regex", 537 | "pattern": "\\A\\.?npmrc\\z", 538 | "caption": "NPM configuration file", 539 | "description": "Might contain credentials for NPM registries" 540 | }, 541 | { 542 | "part": "filename", 543 | "type": "match", 544 | "pattern": "terraform.tfvars", 545 | "caption": "Terraform variable config file", 546 | "description": "Might contain credentials for terraform providers" 547 | }, 548 | { 549 | "part": "filename", 550 | "type": "regex", 551 | "pattern": "\\A\\.?env\\z", 552 | "caption": "Environment configuration file", 553 | "description": null 554 | } 555 | ] -------------------------------------------------------------------------------- /bitchecker.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | # __version__ = '0.9' 3 | 4 | from __future__ import print_function 5 | import logging 6 | import os 7 | import errno 8 | try: 9 | # for python 3 10 | import urllib.request as urllib_request 11 | except ImportError: 12 | # for python 2 13 | import urllib2 as urllib_request 14 | import json 15 | import time 16 | import re 17 | import datetime 18 | import argparse 19 | from truffleHog import truffleHog 20 | from json2html import * 21 | import csv 22 | from scripts.bitbucket import Bitbucket 23 | from git import Repo, GitCommandError 24 | import multiprocessing as mp 25 | 26 | # Set global 'utf8' support 27 | reload(sys) 28 | sys.setdefaultencoding('utf8') 29 | 30 | 31 | try: 32 | # for python 2 33 | import ConfigParser as configparser 34 | except ImportError: 35 | # for python 3 36 | import configparser 37 | 38 | # General logging configuration 39 | log_file_time = time.strftime("%Y-%m-%d-%H-%M", time.gmtime()) 40 | log_file_name = "logs/" + log_file_time + "-bitbucket-checker.log" 41 | logfile = os.path.realpath(os.path.join(os.path.dirname(__file__), log_file_name)) 42 | print('All logs are stored in file - {0}'.format(logfile)) 43 | 44 | # create logger with 'spam_application' 45 | logger = logging.getLogger('creds-checker') 46 | logger.setLevel(logging.DEBUG) 47 | # create file handler which logs even debug messages 48 | fh = logging.FileHandler(logfile) 49 | fh.setLevel(logging.DEBUG) 50 | 51 | # create formatter and add it to the handlers 52 | formatter = logging.Formatter('%(asctime)s %(levelname)s %(name)s %(message)s') 53 | fh.setFormatter(formatter) 54 | 55 | # add the handlers to the logger 56 | logger.addHandler(fh) 57 | 58 | CONFIG = configparser.ConfigParser() 59 | CONFIG.read('.config/config.cfg') 60 | 61 | 62 | def datetime_handler(x): 63 | """ 64 | For for EC2 datetime.datetime() value handling 65 | :param x: 66 | :return: string 67 | """ 68 | if isinstance(x, datetime): 69 | return x.isoformat() 70 | raise TypeError("Unknown type") 71 | 72 | 73 | def createDirs(): 74 | """ 75 | Create program working dirs 76 | :return: none 77 | """ 78 | # Create directories 79 | directories = ['tmp', 'logs', 'repos', 'backup', '.config', 'results', 'checks', 'stats'] 80 | for directory in directories: 81 | try: 82 | os.makedirs(directory) 83 | except OSError as e: 84 | if e.errno != errno.EEXIST: 85 | raise 86 | 87 | 88 | def write_to_file(filename, directory, data): 89 | """ 90 | Func for store json formatted data to local file 91 | :param filename: string 92 | :param directory: string 93 | :param data: dictionary 94 | :return: string 95 | """ 96 | if not os.path.isdir(directory): 97 | os.makedirs(directory) 98 | filename = directory + '/' + log_file_time + "-" + filename 99 | outfile = open(filename, 'w') 100 | outfile.write(data) 101 | outfile.close() 102 | return filename 103 | 104 | 105 | def write_json_to_file(filename, directory, data, time=True): 106 | """ 107 | Func for store json formatted data to local file 108 | :param filename: string 109 | :param directory: string 110 | :param data: dictionary 111 | :param time: boolean 112 | :return: None 113 | """ 114 | if not os.path.isdir(directory): 115 | os.makedirs(directory) 116 | if time: 117 | filename = directory + '/' + log_file_time + "-" + filename 118 | else: 119 | filename = directory + '/' + filename 120 | json_outfile = open(filename, 'w') 121 | json.dump(data, json_outfile, default=datetime_handler) 122 | json_outfile.close() 123 | return filename 124 | 125 | 126 | def read_json_file(path): 127 | """ 128 | Func for store json formatted data to local file 129 | :param path: string 130 | :return: none 131 | """ 132 | try: 133 | return json.loads(open(path).read()) 134 | except IOError: 135 | print("File is missed. Please check") 136 | 137 | 138 | def json2csvFile(filename, directory, json_input, header): 139 | """ 140 | Convert JSON to CSV 141 | :param filename: string 142 | :param directory: string 143 | :param json_input: boolean 144 | :param header: list 145 | :return: string 146 | """ 147 | file_path = directory + "/" + log_file_time + "-" + filename 148 | # Create header for CSV document 149 | columns = [] 150 | if header and type(header) == list: 151 | columns = header 152 | elif not header: 153 | if type(json_input) == list: 154 | columns = sorted(json_input[0].keys()) 155 | elif type(json_input) == dict: 156 | columns = sorted(json_input.keys()) 157 | json_input = [json_input] 158 | 159 | with open(file_path, 'wb') as csvF: 160 | csv.register_dialect('escaped', escapechar='\\', quotechar="\"", doublequote=True, 161 | quoting=csv.QUOTE_ALL, delimiter=',', skipinitialspace=True) 162 | 163 | csv_writer = csv.DictWriter(csvF, fieldnames=columns, dialect='escaped') 164 | csv_writer.writeheader() 165 | for issue in json_input: 166 | csv_writer.writerow(issue) 167 | return file_path 168 | 169 | 170 | def get_repos_uri(bb_session, owner, pagination): 171 | """ 172 | Get repository URI 173 | :param bb_session: urllib http object 174 | :param owner: string 175 | :param pagination: boolean 176 | :return: list 177 | """ 178 | success, repo = bb_session.repository.all(owner=owner, pagination=pagination) 179 | return success, repo 180 | 181 | 182 | def getRepositoriesURI(response): 183 | """ 184 | Get repositories URIs 185 | :param response: dictionary 186 | :return: list 187 | """ 188 | git_uris = [] 189 | for page in response.keys(): 190 | for value in response[page]['values']: 191 | git_uris.append(value['links']['clone'][0]['href']) 192 | return sorted(git_uris) 193 | 194 | 195 | def prepareReposToJson(json_repos): 196 | """ 197 | Convert information about repo to proper format 198 | :param json_repos: 199 | :return: dictionary 200 | """ 201 | columns = ["created_on", "description", "fork_policy", "full_name", "has_issues", "has_wiki", "is_private", 202 | "language", "links", "mainbranch", "name", "owner", "project", "scm", "size", "slug", "type", 203 | "updated_on", "uuid", "website"] 204 | repos = {} 205 | for page in json_repos.keys(): 206 | for value in json_repos[page]['values']: 207 | new_value = {col: value[col] for col in columns} 208 | new_value['links'] = value['links']['html']['href'] 209 | new_value['project_link'] = value['project']['links']['html']['href'] 210 | new_value['project'] = value['project']['key'] 211 | new_value['project_name'] = value['project']['name'] 212 | new_value['owner_link'] = value['owner']['links']['html']['href'] 213 | new_value['owner'] = value['owner']['username'] 214 | clone_link = 'https://' + re.findall('^https://\S+@(.*)$', value['links']['clone'][0]['href'])[0] 215 | repos[clone_link] = new_value.copy() 216 | return repos 217 | 218 | 219 | def formBitbucketReposLists(bb_session, owner): 220 | """ 221 | Get Bitbucket repositories list 222 | :param bb_session: http authenticated session 223 | :param owner: string 224 | :return: dictionary 225 | """ 226 | # Get all repositories 227 | success, repositories = get_repos_uri(bb_session, owner, pagination=True) 228 | json_file = write_json_to_file('all_repositories.json', 'tmp', repositories, True) 229 | repos_json = prepareReposToJson(repositories) 230 | json_formatted_file = write_json_to_file('formatted_repositories.json', 'tmp', repos_json, True) 231 | json2csvFile('all_repositories.csv', 'tmp', repos_json.values(), False) 232 | logger.info('Repositories were exported to JSON file - {0}'.format(json_file)) 233 | logger.info('Repositories were exported to CSV file - {0}'.format(json_formatted_file)) 234 | return repos_json 235 | 236 | 237 | def clone_git_repo(git_auth_url, git_url, json_repos, count, not_clone): 238 | """ 239 | Clone or fetch bitbucket repository 240 | :param git_auth_url: string 241 | :param git_url: string 242 | :param json_repos: dictionary 243 | :param count: integer 244 | :param not_clone: boolean 245 | :return: string 246 | """ 247 | project_path = os.getcwd() + "/repos/" + json_repos[git_url]["slug"] 248 | project_git_path = project_path + "/.git" 249 | git_slug = git_url.split('/')[-1] 250 | # Clone repository or fetch new commits 251 | if os.path.exists(project_git_path): 252 | logger.info('Rep #{0}. Dir .git dit exists and started to pull Bitbucket repo: {1}'.format(count, git_slug)) 253 | repo = Repo(project_git_path) 254 | if not not_clone: 255 | repo.remote().pull() 256 | logger.info('Rep #{0}. Successfully Fetched Bitbucket repo: {1}'.format(count, git_slug)) 257 | elif not os.path.exists(project_path): 258 | os.makedirs(project_path) 259 | logger.info('Rep #{0}. Created dir and started to clone Bitbucket repo: {1}'.format(count, git_slug)) 260 | Repo.clone_from(git_auth_url, project_path) 261 | logger.info('Rep #{0}. Successfully Cloned Bitbucket repo: {1}'.format(count, git_slug)) 262 | elif not os.path.exists(project_git_path): 263 | logger.info('Rep#{0} Dir exists and started to clone Bitbucket repo: {1}'.format(count, git_slug)) 264 | Repo.clone_from(git_auth_url, project_path) 265 | logger.info('Rep#{0} Successfully Cloned Bitbucket repo: {1}'.format(count, git_slug)) 266 | 267 | return project_path 268 | 269 | 270 | def combine_all_checks(output_csv, output_html): 271 | """ 272 | Save all findings in files with different format 273 | :param output_csv: boolean 274 | :param output_html: boolean 275 | :return: string, dictionary 276 | """ 277 | check_dir = os.path.realpath(os.path.dirname(__file__) + "/checks") 278 | check_files = [os.path.join(check_dir, f) for f in os.listdir(check_dir) 279 | if os.path.isfile(os.path.join(check_dir, f))] 280 | if not check_files: 281 | print('No file with fidnings found. Run script without arg: "--report" first') 282 | exit(0) 283 | found_leaks = [] 284 | header = ["branch", "type", "reason", "stringsFound", "diff", "language", "commit", "gitSlug", "gitUrl", "author", 285 | "commitHash", "audit_date", "commit_date", "stringsFound", "path", "project", "projectName"] 286 | for check in sorted(check_files): 287 | output = read_json_file(check) 288 | found_leaks.extend(output) 289 | 290 | # Save results in different formats 291 | if output_html: 292 | found_leaks_html = json2html.convert(json=found_leaks, encode=True, escape=True) 293 | write_to_file('found-leaks.html', 'results', found_leaks_html) 294 | if output_csv: 295 | csv_file = json2csvFile('found-leaks.csv', 'results', found_leaks, header) 296 | realpath = os.path.realpath(os.path.join(os.path.dirname(__file__), "/results/")) 297 | csv_final_file = realpath + csv_file 298 | return csv_final_file, found_leaks 299 | return False, found_leaks 300 | 301 | 302 | def count_reason_stats(json_file): 303 | """ 304 | Count stats for found leaks in Company repositories 305 | :param json_file: 306 | :return: none 307 | """ 308 | # Count statistic for all findings 309 | reason_count = {} 310 | for leak in json_file: 311 | # General stats 312 | if leak['reason'] not in reason_count.keys(): 313 | reason_count[leak['reason']] = 0 314 | reason_count[leak['reason']] += 1 315 | 316 | reason_stats = [] 317 | header = ["project", "count"] 318 | for k, v in reason_count.iteritems(): 319 | stats = {header[0]: k, header[1]: v} 320 | reason_stats.append(stats.copy()) 321 | 322 | json2csvFile('reason_stats.csv', 'stats', reason_stats, False) 323 | write_json_to_file('reason_stats.json', 'stats', reason_stats, True) 324 | 325 | 326 | def count_project_stats(json_file): 327 | """ 328 | Count stats for found leaks in Company repositories 329 | :param json_file: 330 | :return: none 331 | """ 332 | # Count statistic for all findings 333 | project_count = {} 334 | for leak in json_file: 335 | # Project stats 336 | if leak['project'] not in project_count.keys(): 337 | project_count[leak['project']] = {} 338 | if leak['reason'] not in project_count[leak['project']].keys(): 339 | project_count[leak['project']][leak['reason']] = 0 340 | project_count[leak['project']][leak['reason']] += 1 341 | 342 | project_stats = [] 343 | header = ["project", "reason", "count"] 344 | for proj, reasons in project_count.iteritems(): 345 | for reason, count in reasons.iteritems(): 346 | stats = {header[0]: proj, header[1]: reason, header[2]: count} 347 | project_stats.append(stats.copy()) 348 | 349 | json2csvFile('project_stats.csv', 'stats', project_stats, False) 350 | write_json_to_file('project_stats.json', 'stats', project_stats, True) 351 | 352 | 353 | def count_repo_stats(json_file): 354 | """ 355 | Count stats for found leaks in Company repositories 356 | :param json_file: 357 | :return: none 358 | """ 359 | # Count statistic for all findings 360 | repo_count = {} 361 | for leak in json_file: 362 | # Repository stats 363 | if leak['project'] not in repo_count.keys(): 364 | repo_count[leak['project']] = {} 365 | if leak['gitSlug'] not in repo_count[leak['project']].keys(): 366 | repo_count[leak['project']][leak['gitSlug']] = {} 367 | if leak['reason'] not in repo_count[leak['project']][leak['gitSlug']].keys(): 368 | repo_count[leak['project']][leak['gitSlug']][leak['reason']] = 0 369 | repo_count[leak['project']][leak['gitSlug']][leak['reason']] += 1 370 | 371 | repo_stats = [] 372 | header = ["project", "repository", "reason", "count"] 373 | for project, repositories in repo_count.iteritems(): 374 | for repo, reasons in repositories.iteritems(): 375 | for reason, count in reasons.iteritems(): 376 | stats = {header[0]: project, header[1]: repo, header[2]: reason, header[3]: count} 377 | repo_stats.append(stats.copy()) 378 | 379 | json2csvFile('repo_stats.csv', 'stats', repo_stats, False) 380 | write_json_to_file('repo_stats.json', 'stats', repo_stats, True) 381 | exit(0) 382 | 383 | 384 | def search_bitbucket(count, git_urls, username, secret, args, json_repos, total_rep): 385 | 386 | git_url = git_urls[count] 387 | try: 388 | git_slug = git_url.split('/')[-1][:-4] 389 | git_auth_url = git_url.replace('https://', 'https://' + username + ':' + secret + '@') 390 | do_entropy = truffleHog.str2bool(args.do_entropy) 391 | 392 | project_path = clone_git_repo(git_auth_url, git_url, json_repos, count, args.not_clone) 393 | logger.info('Rep #{0}. Starting to verify Bitbucket repo #{1} from {2} {3}'.format(count, 394 | count, 395 | total_rep, 396 | git_slug)) 397 | # Search sensitive data using regexChecks.regexes_txt in folder: truffleHog 398 | found_leaks = truffleHog.find_strings(project_path, git_url, json_repos, args.since_commit, 399 | args.max_depth, args.do_regex, do_entropy) 400 | fount_leaks_file = str(count) + "-code-" + git_slug + ".json" 401 | if found_leaks: 402 | write_json_to_file(fount_leaks_file, 'checks', found_leaks, False) 403 | 404 | # Search sensitive data using regexChecks.regexes_fs in folder: truffleHog 405 | found_fs_leaks = truffleHog.searchSensitiveFilesInRepo(project_path, git_url, json_repos) 406 | fs_file = str(count) + "-fs-" + git_slug + ".json" 407 | if found_fs_leaks: 408 | write_json_to_file(fs_file, 'checks', found_fs_leaks, False) 409 | logger.info('Rep #{0}. Successfully Verified Bitbucket repo #{1} from {2} {3}\n'.format(count, 410 | count, 411 | total_rep, 412 | git_slug)) 413 | print("Repo#", count, ". Slug. ", git_slug) 414 | count += 1 415 | except GitCommandError as exception: 416 | logger.info('Rep #{0}. Exception in parsing repo {1}. Details are - {2}'.format(count, 417 | git_slug, 418 | str(exception))) 419 | 420 | 421 | def main(): 422 | """ 423 | Main func for Bitbucket sensitive data scanner tool 424 | :return: none 425 | """ 426 | parser = argparse.ArgumentParser(description='Find secrets hidden in the depths of git.') 427 | parser.add_argument("--json", dest="output_json", action="store_true", help="Output in JSON") 428 | parser.add_argument("--html", dest="output_html", action="store_true", help="Output in HTML") 429 | parser.add_argument("--csv", dest="output_csv", action="store_true", help="Output in CSV") 430 | parser.add_argument("--regex", dest="do_regex", action="store_true", help="Enable high signal regex checks") 431 | parser.add_argument("--entropy", dest="do_entropy", help="Enable entropy checks") 432 | parser.add_argument("--since_commit", dest="since_commit", help="Only scan from a given commit hash") 433 | parser.add_argument("--max_depth", dest="max_depth", help="Max commit depth to go back when searching for secrets") 434 | parser.add_argument("--starts_with", dest="starts_with", help="Perform checks starting from N repository") 435 | parser.add_argument("--report", dest="report", action="store_true", 436 | help="Calculate statistic if you've ready file with checks") 437 | parser.add_argument("--not_clone", dest="not_clone", action="store_true", 438 | help="No clone or fetch repositories (in case they were cloned before") 439 | parser.set_defaults(regex=False) 440 | parser.set_defaults(max_depth=10) 441 | parser.set_defaults(since_commit=None) 442 | parser.set_defaults(entropy=False) 443 | parser.set_defaults(output_csv=False) 444 | parser.set_defaults(output_html=False) 445 | parser.set_defaults(output_json=False) 446 | parser.set_defaults(starts_with=0) 447 | parser.set_defaults(stats_only=False) 448 | parser.set_defaults(not_clone=False) 449 | parser.set_defaults(report=False) 450 | args = parser.parse_args() 451 | # Create dirs 452 | createDirs() 453 | # Count statistic only 454 | if args.report: 455 | _, json_file = combine_all_checks(args.output_csv, args.output_html) 456 | # json_file = read_json_file('results/2018-01-04-14-09-found-leaks.json') 457 | count_reason_stats(json_file) 458 | count_project_stats(json_file) 459 | count_repo_stats(json_file) 460 | # Make connection to Bitbucket 461 | username = CONFIG.get('BITBUCKET', 'username') 462 | secret = CONFIG.get('BITBUCKET', 'secret') 463 | owner = CONFIG.get('BITBUCKET', 'owner') 464 | bb_session = Bitbucket(username, secret) 465 | json_repos = formBitbucketReposLists(bb_session, owner) 466 | # json_repos = read_json_file('tmp/2018-02-04-10-52-formatted_repositories.json') 467 | # json_repos = prepareReposToJson(response) 468 | # Check particular repository 469 | git_urls = sorted(json_repos.keys()) 470 | total_rep = len(json_repos.keys()) 471 | logger.info('Fetched %s Bitbucket repositories from Company account.\n' % total_rep) 472 | count = int(args.starts_with) 473 | 474 | # Prepare pool of processes (max by default) 475 | processes = mp.cpu_count() 476 | logger.info('Amount of vCPUs - {0}\n'.format(processes)) 477 | pool = mp.Pool() 478 | results = [pool.apply_async(search_bitbucket, args=(rep_count, git_urls, username, secret, args, json_repos, total_rep)) 479 | for rep_count in range(count, total_rep)] 480 | for proc in results: 481 | proc.get() 482 | 483 | # Final steps 484 | csv_file, _ = combine_all_checks(args.output_csv, args.output_html) 485 | if csv_file: 486 | logger.info('Checks has been completed successfully. See file {0}'.format(csv_file)) 487 | 488 | 489 | if __name__ == '__main__': 490 | main() 491 | --------------------------------------------------------------------------------