├── .gitignore ├── README.md └── githarvester.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | 5 | # C extensions 6 | *.so 7 | 8 | # Distribution / packaging 9 | .Python 10 | env/ 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | *.egg-info/ 23 | .installed.cfg 24 | *.egg 25 | 26 | # PyInstaller 27 | # Usually these files are written by a python script from a template 28 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 29 | *.manifest 30 | *.spec 31 | 32 | # Installer logs 33 | pip-log.txt 34 | pip-delete-this-directory.txt 35 | 36 | # Unit test / coverage reports 37 | htmlcov/ 38 | .tox/ 39 | .coverage 40 | .coverage.* 41 | .cache 42 | nosetests.xml 43 | coverage.xml 44 | *,cover 45 | 46 | # Translations 47 | *.mo 48 | *.pot 49 | 50 | # Django stuff: 51 | *.log 52 | 53 | # Sphinx documentation 54 | docs/_build/ 55 | 56 | # PyBuilder 57 | target/ 58 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # GitHarvester 2 |

 3 | $ ./githarvester.py -h
 4 | 
 5 |   _____ _ _     _    _                           _
 6 |  / ____(_) |   | |  | |                         | |
 7 | | |  __ _| |_  | |__| | __ _ _ ____   _____  ___| |_ ___ _ __
 8 | | | |_ | | __| |  __  |/ _` | '__\ \ / / _ \/ __| __/ _ \ '__|
 9 | | |__| | | |_  | |  | | (_| | |   \ V /  __/\__ \ ||  __/ |
10 |  \_____|_|\__| |_|  |_|\__,_|_|    \_/ \___||___/\__\___|_|
11 | 
12 | Version 0.8
13 | By: @metacortex of @dc801
14 | 
15 | usage: githarvester.py [-h] [-a ACCOUNT] [-d DIRECTORY] [-o ORGANIZE]
16 |                        [-p PROJECT] [-r CUSTOM_REGEX] [-s CUSTOM_SEARCH] [-u]
17 |                        [-v] [-w WRITE_FILE]
18 | 
19 | This tool is used for harvesting information from GitHub. By default it looks
20 | for code with the filename of 'wp-config.php' and pulls out auth info
21 | 
22 | optional arguments:
23 |   -h, --help        show this help message and exit
24 |   -a ACCOUNT        Specify a specific user account
25 |   -d DIRECTORY      Download results to a specific directory
26 |   -o ORGANIZE       Organize results by 'new', 'old', 'best', or 'all'
27 |   -p PROJECT        Specific project to search. Use with -a
28 |   -r CUSTOM_REGEX   Custom regex string
29 |   -s CUSTOM_SEARCH  Custom GitHub search string
30 |   -u, --url         Output URL of found object
31 |   -v, --verbose     Turn verbose output on. This will output matched lines
32 |   -w WRITE_FILE     Write results to a file
33 |

34 | -------------------------------------------------------------------------------- /githarvester.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python 2 | 3 | # Import all the things! 4 | import sys 5 | import os 6 | try: 7 | import argparse 8 | except: 9 | print '[!] argparse is not installed. Try "pip install argparse"' 10 | sys.exit(0) 11 | try: 12 | from urllib import urlopen 13 | from urllib import urlretrieve 14 | from urllib import urlencode 15 | except: 16 | print '[!] urllib is not installed. Try "pip install urllib"' 17 | sys.exit(0) 18 | try: 19 | from bs4 import BeautifulSoup 20 | except: 21 | print '[!] BeautifulSoup is not installed. Try "pip install beautifulsoup4"' 22 | sys.exit(0) 23 | try: 24 | import re 25 | except: 26 | print '[!] re is not installed. Try "pip install re"' 27 | sys.exit(0) 28 | try: 29 | import pycurl 30 | except: 31 | print '[!] pycurl is not installed. Try "pip install pycurl"' 32 | sys.exit(0) 33 | 34 | # Display Startup Banner 35 | def banner(): 36 | print "" 37 | print " _____ _ _ _ _ _" 38 | print " / ____(_) | | | | | | |" 39 | print "| | __ _| |_ | |__| | __ _ _ ____ _____ ___| |_ ___ _ __ " 40 | print "| | |_ | | __| | __ |/ _` | '__\ \ / / _ \/ __| __/ _ \ '__|" 41 | print "| |__| | | |_ | | | | (_| | | \ V / __/\__ \ || __/ | " 42 | print " \_____|_|\__| |_| |_|\__,_|_| \_/ \___||___/\__\___|_| " 43 | print "" 44 | print "Version 0.8" 45 | print "By: @metacortex of @dc801" 46 | print "" 47 | 48 | # Parse GitHub search results 49 | def githubsearch(search, regex, order, sort, account, project): 50 | 51 | navbarlinks = [] 52 | if project: 53 | githubbase = 'https://github.com/' + account + '/' + project + '/search?' 54 | else: 55 | githubbase = 'https://github.com/search?' 56 | if account: 57 | search = 'user:' + account + ' ' + search 58 | githubsearchurl = {'o' : order, 'q' : search, 's' : sort, 'type' : 'Code', 'ref' : 'searchresults'} 59 | searchurl = githubbase + str(urlencode(githubsearchurl)) 60 | if (order == 'asc'): 61 | print '[+] Searching Github for ' + search + ' and ordering by OLDEST' 62 | print searchurl 63 | elif (order == 'desc'): 64 | print '[+] Searching Github for ' + search + ' and ordering by NEWEST' 65 | print searchurl 66 | else: 67 | print '[+] Searching Github for ' + search + ' and ordering by BEST MATCH' 68 | print searchurl 69 | searchresults = urlopen(searchurl).read() 70 | soup = BeautifulSoup(searchresults, 'html.parser') 71 | 72 | # Find the bottom nav bar and parse out those links 73 | pagenav = soup.findAll('div', attrs={'class':'pagination'}) 74 | if pagenav: 75 | for page in pagenav: 76 | pages = page.findAll('a') 77 | for a in pages: 78 | navbarlinks.append(a) 79 | try: 80 | totalpages = int(str(re.findall(r">.*", str(navbarlinks[-2]))).strip('[').strip(']').strip('\'').strip('>').strip('')) # Because I suck at code 81 | except IndexError: 82 | print ' [!] Search error' 83 | sys.exit(0) 84 | print ' [+] Returned ' + str(totalpages) + ' total pages' 85 | 86 | # Parse each page of results 87 | currentpage = 1 88 | while (currentpage <= totalpages): 89 | parseresultpage(currentpage, search, order, sort, regex, account, project) 90 | currentpage += 1 91 | else: 92 | print ' [+] Only one page of results' 93 | parseresultpage(1, search, order, sort, regex, account, project) 94 | 95 | def parseresultpage(page, search, order, sort, regex, account, project): 96 | print ' [+] Pulling results from page ' + str(page) 97 | if project: 98 | githubbase = 'https://github.com/' + account + '/' + project + '/search?' 99 | else: 100 | githubbase = 'https://github.com/search?' 101 | githubsearchurl = {'o' : order, 'p' : page, 'q' : search, 's' : sort, 'type' : 'Code', 'ref' : 'searchresults'} 102 | searchurl = githubbase + str(urlencode(githubsearchurl)) 103 | pagehtml = urlopen(searchurl).read() 104 | soup = BeautifulSoup(pagehtml, 'html.parser') 105 | 106 | # Find GitHub div with code results 107 | results = soup.findAll('div', attrs={'class':'code-list-item'}) 108 | 109 | # Pull url's from results and hit each of them 110 | soup1 = BeautifulSoup(str(results), 'html.parser') 111 | for item in soup1.findAll('p', attrs={'class':'title'}): 112 | soup2 = BeautifulSoup(str(item), 'html.parser') 113 | try: 114 | individualresult = soup2.findAll('a')[1] 115 | except: 116 | individualresult = soup2.findAll('a')[0] 117 | individualresulturl = 'https://github.com/' + str(individualresult['href']) 118 | individualresultpage = urlopen(individualresulturl).read() 119 | soup3 = BeautifulSoup(str(individualresultpage), 'html.parser') 120 | for rawlink in soup3.findAll('a', attrs={'id':'raw-url'}): 121 | rawurl = 'https://github.com' + str(rawlink['href']) 122 | if (args.custom_regex): 123 | searchcode(rawurl, regex) 124 | else: 125 | wpsearchcode(rawurl, regex) 126 | 127 | def searchcode(url, regex): 128 | code = urlopen(url).read() 129 | result = '' 130 | try: 131 | regexresults = re.search(regex, str(code)) 132 | result = str(regexresults.group(0)) 133 | if result is not None: 134 | if (args.url == True): 135 | print " " + str(url) 136 | if (args.verbose == True): 137 | print " [+] Found the following results" 138 | print " " + str(result) 139 | if args.write_file: 140 | if (result == ''): 141 | pass 142 | else: 143 | f = open(args.write_file, 'a') 144 | f.write(str(result + '\n')) 145 | f.close() 146 | 147 | 148 | if args.directory: 149 | filename = args.directory + "/" + url.replace('/', '-') 150 | if not os.path.exists(args.directory): 151 | os.makedirs(args.directory) 152 | print " [+] Downloading " + filename 153 | urlretrieve(url, filename) 154 | fp = open(filename, 'wb') 155 | fp.write(code) 156 | fp.close() 157 | else: 158 | pass 159 | except: 160 | pass 161 | 162 | #This whole function is confusing as hell FYI 163 | def wpsearchcode(url, regex): 164 | code = urlopen(url).read() 165 | try: 166 | regexdb = re.search(r"define\(\'DB_NAME.*;", str(code), re.IGNORECASE) 167 | regexuser = re.search(r"define\(\'DB_USER.*;", str(code), re.IGNORECASE) 168 | regexpass = re.search(r"define\(\'DB_PASSWORD.*;", str(code), re.IGNORECASE) 169 | regexhost = re.search(r"define\(\'DB_HOST.*;", str(code), re.IGNORECASE) 170 | db = str(regexdb.group(0)).strip('define(\'').strip('\');').replace('\', \'', ':').strip('DB_NAME:') 171 | user = str(regexuser.group(0)).strip('define(\'').strip('\');').replace('\', \'', ':').strip('DB_USER:') 172 | password = str(regexpass.group(0)).strip('define(\'').strip('\');').replace('\', \'', ':').strip('DB_PASSWORD:') 173 | host = str(regexhost.group(0)).strip('define(\'').strip('\');').replace('\', \'', ':').strip('DB_HOST:') 174 | 175 | if (db == '\', '): # Check for blank database because...shitty code 176 | db = '' 177 | if (user == '\', '): # Check for blank user because...shitty code 178 | user = '' 179 | if (password == '\', '): # Check for blank password because...shitty code 180 | password = '' 181 | if (host == '\', '): # Check for blank host because...shitty code 182 | host = '' 183 | 184 | if (args.verbose == True): 185 | print ' [+] Found the following credentials' 186 | if (args.url == True): 187 | print ' ' + str(url) 188 | print ' database: ' + db 189 | print ' user: ' + user 190 | print ' password: ' + password 191 | print ' host: ' + host 192 | 193 | if args.write_file: 194 | f = open(args.write_file, 'a') 195 | results = 'Database: ' + db + '\nUser: ' + user + '\nPassword: ' + password + '\nHost: ' + host + '\n---\n' 196 | f.write(results) 197 | f.close() 198 | 199 | except: 200 | pass 201 | 202 | 203 | def main(): 204 | banner() # Brandwhore 205 | 206 | # Parsing arguments 207 | parser = argparse.ArgumentParser(description='This tool is used for harvesting information from GitHub. By default it looks for code with the filename of \'wp-config.php\' and pulls out auth info') 208 | parser.add_argument('-a', action='store', dest='account', help='Specify a specific user account', type=str) 209 | parser.add_argument('-d', action='store', dest='directory', help='Download results to a specific directory', type=str) 210 | parser.add_argument('-o', action='store', dest='organize', help='Organize results by \'new\', \'old\', \'best\', or \'all\'', type=str) 211 | parser.add_argument('-p', action='store', dest='project', help='Specific project to search. Use with -a', type=str) 212 | parser.add_argument('-r', action='store', dest='custom_regex', help='Custom regex string', type=str) 213 | parser.add_argument('-s', action='store', dest='custom_search', help='Custom GitHub search string', type=str) 214 | parser.add_argument('-u', '--url', action='store_true', help='Output URL of found object') 215 | parser.add_argument('-v', '--verbose', action='store_true', help='Turn verbose output on. This will output matched lines') 216 | parser.add_argument('-w', action='store', dest='write_file', help='Write results to a file', type=str) 217 | global args 218 | args = parser.parse_args() 219 | 220 | if not len(sys.argv) > 1: 221 | args.verbose = True 222 | if (args.project): 223 | if not args.account: 224 | print '[!] Need -u for -p' 225 | parser.print_help() 226 | sys.exit(0) 227 | 228 | if args.account: 229 | account = args.account 230 | print '[+] Searching the account ' + account 231 | if args.project: 232 | project = args.project 233 | print '[+] Searching the ' + project + ' project' 234 | else: 235 | project = None 236 | else: 237 | account = None 238 | project = None 239 | if args.custom_search: 240 | search = args.custom_search 241 | print '[+] Custom search is: ' + str(search) 242 | else: 243 | search = 'filename:wp-config.php' 244 | print '[+] Using default search' 245 | if args.custom_regex: 246 | regex = args.custom_regex 247 | print '[+] Custom regex is: ' + str(regex) 248 | else: 249 | regex = 'regexhere' 250 | print '[+] Using default regex' 251 | 252 | if (args.organize == 'new'): 253 | githubsearch(search, regex, 'desc', 'indexed', account, project) 254 | elif (args.organize == 'old'): 255 | githubsearch(search, regex, 'asc', 'indexed', account, project) 256 | elif (args.organize == 'best'): 257 | githubsearch(search, regex, '', '', account, project) 258 | elif (args.organize == 'all'): 259 | githubsearch(search, regex, '', '', account, project) 260 | githubsearch(search, regex, 'desc', 'indexed', account, project) 261 | githubsearch(search, regex, 'asc', 'indexed', account, project) 262 | else: 263 | githubsearch(search, regex, '', '', account, project) 264 | 265 | print '[+] DONE' 266 | 267 | try: 268 | if __name__ == "__main__": 269 | main() 270 | except KeyboardInterrupt: 271 | print "[!] Keyboard Interrupt. Shutting down" 272 | --------------------------------------------------------------------------------