├── wordlists
    ├── web-files.txt
    ├── login.txt
    └── url-redirection.txt
├── requirements.txt
├── README.md
├── LICENSE
├── .gitignore
└── dorker.py


/wordlists/web-files.txt:
--------------------------------------------------------------------------------
1 | robots.txt
2 | sitemap.xml


--------------------------------------------------------------------------------
/wordlists/login.txt:
--------------------------------------------------------------------------------
1 | login
2 | signin
3 | signup
4 | register


--------------------------------------------------------------------------------
/wordlists/url-redirection.txt:
--------------------------------------------------------------------------------
 1 | url=https
 2 | url=http
 3 | u=https
 4 | u=http
 5 | redirect?https
 6 | redirect?http
 7 | redirect=https
 8 | redirect=http
 9 | link=http
10 | link=https


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | beautifulsoup4==4.8.2
 2 | bs4==0.0.1
 3 | certifi==2019.11.28
 4 | chardet==3.0.4
 5 | docopt==0.6.2
 6 | idna==2.8
 7 | requests==2.22.0
 8 | soupsieve==1.9.5
 9 | urllib3==1.25.7
10 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | ![Twitter Follow](https://img.shields.io/twitter/follow/isaacwangethi30?label=Follow%20%40isaacwangethi30&style=social)
 2 | 
 3 | # google-dorker
 4 | A script to search for URLs with parameters worthy testing for vulnerabilities
 5 | 
 6 | ## Prerequisites
 7 | ```
 8 | - Python 3
 9 | - Packages in requirements.txt
10 | ```
11 | 
12 | ## Installation
13 | Clone the repo
14 | ```
15 | git clone https://github.com/izo30/google-dorker.git
16 | ```
17 | Install the requirements
18 | ```
19 | python3 -m pip install -r requirements.txt
20 | ```
21 | Run the app
22 | ```
23 | python3 dorker.py
24 | ```
25 | 
26 | ## Built with
27 | Python
28 | 
29 | ## Authors
30 | [Isaac Wangethi](https://github.com/izo30 "Isaac Wangethi")
31 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2020 izo30
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | 
  6 | # C extensions
  7 | *.so
  8 | 
  9 | # Distribution / packaging
 10 | .Python
 11 | build/
 12 | develop-eggs/
 13 | dist/
 14 | downloads/
 15 | eggs/
 16 | .eggs/
 17 | lib/
 18 | lib64/
 19 | parts/
 20 | sdist/
 21 | var/
 22 | wheels/
 23 | pip-wheel-metadata/
 24 | share/python-wheels/
 25 | *.egg-info/
 26 | .installed.cfg
 27 | *.egg
 28 | MANIFEST
 29 | 
 30 | # PyInstaller
 31 | #  Usually these files are written by a python script from a template
 32 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 33 | *.manifest
 34 | *.spec
 35 | 
 36 | # Installer logs
 37 | pip-log.txt
 38 | pip-delete-this-directory.txt
 39 | 
 40 | # Unit test / coverage reports
 41 | htmlcov/
 42 | .tox/
 43 | .nox/
 44 | .coverage
 45 | .coverage.*
 46 | .cache
 47 | nosetests.xml
 48 | coverage.xml
 49 | *.cover
 50 | *.py,cover
 51 | .hypothesis/
 52 | .pytest_cache/
 53 | 
 54 | # Translations
 55 | *.mo
 56 | *.pot
 57 | 
 58 | # Django stuff:
 59 | *.log
 60 | local_settings.py
 61 | db.sqlite3
 62 | db.sqlite3-journal
 63 | 
 64 | # Flask stuff:
 65 | instance/
 66 | .webassets-cache
 67 | 
 68 | # Scrapy stuff:
 69 | .scrapy
 70 | 
 71 | # Sphinx documentation
 72 | docs/_build/
 73 | 
 74 | # PyBuilder
 75 | target/
 76 | 
 77 | # Jupyter Notebook
 78 | .ipynb_checkpoints
 79 | 
 80 | # IPython
 81 | profile_default/
 82 | ipython_config.py
 83 | 
 84 | # pyenv
 85 | .python-version
 86 | 
 87 | # pipenv
 88 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 89 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 90 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 91 | #   install all needed dependencies.
 92 | #Pipfile.lock
 93 | 
 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow
 95 | __pypackages__/
 96 | 
 97 | # Celery stuff
 98 | celerybeat-schedule
 99 | celerybeat.pid
100 | 
101 | # SageMath parsed files
102 | *.sage.py
103 | 
104 | # Environments
105 | .env
106 | .venv
107 | env/
108 | venv/
109 | ENV/
110 | env.bak/
111 | venv.bak/
112 | .vscode/
113 | 
114 | # Spyder project settings
115 | .spyderproject
116 | .spyproject
117 | 
118 | # Rope project settings
119 | .ropeproject
120 | 
121 | # mkdocs documentation
122 | /site
123 | 
124 | # mypy
125 | .mypy_cache/
126 | .dmypy.json
127 | dmypy.json
128 | 
129 | # Pyre type checker
130 | .pyre/
131 | 


--------------------------------------------------------------------------------
/dorker.py:
--------------------------------------------------------------------------------
  1 | """Google dorker 1.0
  2 | 
  3 | Usage:
  4 |   dorker.py <domain> <file-name> <pages> <processes>
  5 |   dorker.py (-h | --help)
  6 |   dorker.py --version
  7 | 
  8 | Arguments:
  9 |   <domain>        Domain to be Searched
 10 |   <file-name>     File containing strings to search in url (located inside wordlists folder)
 11 |   <pages>         Number of pages
 12 |   <processes>     Number of parallel processes
 13 | 
 14 | Options:
 15 |   -h --help     Show this screen
 16 |   --version     Show version
 17 | 
 18 | """
 19 | 
 20 | import requests
 21 | import re
 22 | import sys
 23 | import os
 24 | from docopt import docopt
 25 | from bs4 import BeautifulSoup
 26 | from time import time as timer
 27 | from functools import partial
 28 | from multiprocessing import Pool
 29 | 
 30 | # Search the dork string and retrieve urls
 31 | def get_urls(search_string, start):
 32 |     temp = []
 33 |     url = 'http://www.google.com/search'
 34 |     payload = {'q': search_string, 'start': start}
 35 |     my_headers = {'User-agent': 'Mozilla/11.0'}
 36 |     r = requests.get(url, params=payload, headers=my_headers)
 37 |     soup = BeautifulSoup(r.text, 'html.parser')
 38 |     divtags = soup.find_all('div', class_='kCrYT')
 39 | 
 40 |     for div in divtags:
 41 |         try:
 42 |             temp.append(re.search('url\?q=(.+?)\&sa', div.a['href']).group(1))
 43 |         except:
 44 |             continue
 45 |     return temp
 46 | 
 47 | # Join search terms in a single dork string
 48 | def create_dork_string(domain, file_name):
 49 | 
 50 |     file_path = './wordlists/{}' .format(file_name)
 51 | 
 52 |     if not os.path.isfile(file_path):
 53 |         print('File "' +file_path+ '" does not exist')
 54 |         sys.exit()
 55 | 
 56 |     try:
 57 |         dork_string = ''
 58 | 
 59 |         # Read the contents of file and create a search string
 60 |         with open(file_path) as fp:
 61 |             for line in fp:
 62 |                 if dork_string == '':
 63 |                     dork_string = 'inurl:' + line.strip()
 64 |                     continue
 65 |                 dork_string += ' OR inurl:' + line.strip()
 66 |         return 'site:' + domain + ' ' + dork_string
 67 |     except:
 68 |         print('Error occured while reading file')
 69 |         sys.exit()
 70 | 
 71 | 
 72 | def main():
 73 |     start = timer()
 74 |     result = []
 75 | 
 76 |     # Command line interface
 77 |     arguments = docopt(__doc__, version='Google dorker 1.0')
 78 | 
 79 |     # Get input
 80 |     domain = arguments['<domain>']
 81 |     file_name = arguments['<file-name>']
 82 |     pages = arguments['<pages>']
 83 |     processes = int(arguments['<processes>'])
 84 | 
 85 |     # Create search string
 86 |     search = create_dork_string(domain, file_name)
 87 | 
 88 |     # Multi-Processing
 89 |     make_request = partial(get_urls, search)
 90 |     pagelist = [str(x*10) for x in range(0, int(pages))]
 91 |     with Pool(processes) as p:
 92 |         tmp = p.map(make_request, pagelist)
 93 |     for x in tmp:
 94 |         result.extend(x)
 95 |     
 96 |     # Remove duplicate urls
 97 |     result = list(set(result))
 98 |     
 99 |     print(*result, sep='\n')
100 |     print('\nTotal URLs Scraped : %s ' % str(len(result)))
101 |     print('Script Execution Time : %s ' % (timer() - start, ))
102 | 
103 | 
104 | if __name__ == '__main__':
105 |     main()
106 | 


--------------------------------------------------------------------------------