├── wordlists ├── web-files.txt ├── login.txt └── url-redirection.txt ├── requirements.txt ├── README.md ├── LICENSE ├── .gitignore └── dorker.py /wordlists/web-files.txt: -------------------------------------------------------------------------------- 1 | robots.txt 2 | sitemap.xml -------------------------------------------------------------------------------- /wordlists/login.txt: -------------------------------------------------------------------------------- 1 | login 2 | signin 3 | signup 4 | register -------------------------------------------------------------------------------- /wordlists/url-redirection.txt: -------------------------------------------------------------------------------- 1 | url=https 2 | url=http 3 | u=https 4 | u=http 5 | redirect?https 6 | redirect?http 7 | redirect=https 8 | redirect=http 9 | link=http 10 | link=https -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | beautifulsoup4==4.8.2 2 | bs4==0.0.1 3 | certifi==2019.11.28 4 | chardet==3.0.4 5 | docopt==0.6.2 6 | idna==2.8 7 | requests==2.22.0 8 | soupsieve==1.9.5 9 | urllib3==1.25.7 10 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ![Twitter Follow](https://img.shields.io/twitter/follow/isaacwangethi30?label=Follow%20%40isaacwangethi30&style=social) 2 | 3 | # google-dorker 4 | A script to search for URLs with parameters worthy testing for vulnerabilities 5 | 6 | ## Prerequisites 7 | ``` 8 | - Python 3 9 | - Packages in requirements.txt 10 | ``` 11 | 12 | ## Installation 13 | Clone the repo 14 | ``` 15 | git clone https://github.com/izo30/google-dorker.git 16 | ``` 17 | Install the requirements 18 | ``` 19 | python3 -m pip install -r requirements.txt 20 | ``` 21 | Run the app 22 | ``` 23 | python3 dorker.py 24 | ``` 25 | 26 | ## Built with 27 | Python 28 | 29 | ## Authors 30 | [Isaac Wangethi](https://github.com/izo30 "Isaac Wangethi") 31 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2020 izo30 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | pip-wheel-metadata/ 24 | share/python-wheels/ 25 | *.egg-info/ 26 | .installed.cfg 27 | *.egg 28 | MANIFEST 29 | 30 | # PyInstaller 31 | # Usually these files are written by a python script from a template 32 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 33 | *.manifest 34 | *.spec 35 | 36 | # Installer logs 37 | pip-log.txt 38 | pip-delete-this-directory.txt 39 | 40 | # Unit test / coverage reports 41 | htmlcov/ 42 | .tox/ 43 | .nox/ 44 | .coverage 45 | .coverage.* 46 | .cache 47 | nosetests.xml 48 | coverage.xml 49 | *.cover 50 | *.py,cover 51 | .hypothesis/ 52 | .pytest_cache/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | target/ 76 | 77 | # Jupyter Notebook 78 | .ipynb_checkpoints 79 | 80 | # IPython 81 | profile_default/ 82 | ipython_config.py 83 | 84 | # pyenv 85 | .python-version 86 | 87 | # pipenv 88 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 89 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 90 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 91 | # install all needed dependencies. 92 | #Pipfile.lock 93 | 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 95 | __pypackages__/ 96 | 97 | # Celery stuff 98 | celerybeat-schedule 99 | celerybeat.pid 100 | 101 | # SageMath parsed files 102 | *.sage.py 103 | 104 | # Environments 105 | .env 106 | .venv 107 | env/ 108 | venv/ 109 | ENV/ 110 | env.bak/ 111 | venv.bak/ 112 | .vscode/ 113 | 114 | # Spyder project settings 115 | .spyderproject 116 | .spyproject 117 | 118 | # Rope project settings 119 | .ropeproject 120 | 121 | # mkdocs documentation 122 | /site 123 | 124 | # mypy 125 | .mypy_cache/ 126 | .dmypy.json 127 | dmypy.json 128 | 129 | # Pyre type checker 130 | .pyre/ 131 | -------------------------------------------------------------------------------- /dorker.py: -------------------------------------------------------------------------------- 1 | """Google dorker 1.0 2 | 3 | Usage: 4 | dorker.py 5 | dorker.py (-h | --help) 6 | dorker.py --version 7 | 8 | Arguments: 9 | Domain to be Searched 10 | File containing strings to search in url (located inside wordlists folder) 11 | Number of pages 12 | Number of parallel processes 13 | 14 | Options: 15 | -h --help Show this screen 16 | --version Show version 17 | 18 | """ 19 | 20 | import requests 21 | import re 22 | import sys 23 | import os 24 | from docopt import docopt 25 | from bs4 import BeautifulSoup 26 | from time import time as timer 27 | from functools import partial 28 | from multiprocessing import Pool 29 | 30 | # Search the dork string and retrieve urls 31 | def get_urls(search_string, start): 32 | temp = [] 33 | url = 'http://www.google.com/search' 34 | payload = {'q': search_string, 'start': start} 35 | my_headers = {'User-agent': 'Mozilla/11.0'} 36 | r = requests.get(url, params=payload, headers=my_headers) 37 | soup = BeautifulSoup(r.text, 'html.parser') 38 | divtags = soup.find_all('div', class_='kCrYT') 39 | 40 | for div in divtags: 41 | try: 42 | temp.append(re.search('url\?q=(.+?)\&sa', div.a['href']).group(1)) 43 | except: 44 | continue 45 | return temp 46 | 47 | # Join search terms in a single dork string 48 | def create_dork_string(domain, file_name): 49 | 50 | file_path = './wordlists/{}' .format(file_name) 51 | 52 | if not os.path.isfile(file_path): 53 | print('File "' +file_path+ '" does not exist') 54 | sys.exit() 55 | 56 | try: 57 | dork_string = '' 58 | 59 | # Read the contents of file and create a search string 60 | with open(file_path) as fp: 61 | for line in fp: 62 | if dork_string == '': 63 | dork_string = 'inurl:' + line.strip() 64 | continue 65 | dork_string += ' OR inurl:' + line.strip() 66 | return 'site:' + domain + ' ' + dork_string 67 | except: 68 | print('Error occured while reading file') 69 | sys.exit() 70 | 71 | 72 | def main(): 73 | start = timer() 74 | result = [] 75 | 76 | # Command line interface 77 | arguments = docopt(__doc__, version='Google dorker 1.0') 78 | 79 | # Get input 80 | domain = arguments[''] 81 | file_name = arguments[''] 82 | pages = arguments[''] 83 | processes = int(arguments['']) 84 | 85 | # Create search string 86 | search = create_dork_string(domain, file_name) 87 | 88 | # Multi-Processing 89 | make_request = partial(get_urls, search) 90 | pagelist = [str(x*10) for x in range(0, int(pages))] 91 | with Pool(processes) as p: 92 | tmp = p.map(make_request, pagelist) 93 | for x in tmp: 94 | result.extend(x) 95 | 96 | # Remove duplicate urls 97 | result = list(set(result)) 98 | 99 | print(*result, sep='\n') 100 | print('\nTotal URLs Scraped : %s ' % str(len(result))) 101 | print('Script Execution Time : %s ' % (timer() - start, )) 102 | 103 | 104 | if __name__ == '__main__': 105 | main() 106 | --------------------------------------------------------------------------------