├── __init__.py ├── apis ├── __init__.py ├── api_keys.db ├── api_keys_reference.db ├── api_virustotal.py ├── api_securitytrails.py └── api_hudsonrock.py ├── docs ├── __init__.py └── dpulse-docs │ ├── docs │ ├── contact_dev.md │ ├── demo.md │ ├── pagesearch.md │ ├── basic_scan.md │ ├── logging.md │ ├── snapshotting.md │ ├── index.md │ ├── api.md │ ├── reporting.md │ ├── getting_started.md │ ├── dorking.md │ └── config.md │ └── mkdocs.yml ├── pagesearch ├── __init__.py └── pagesearch_parsers.py ├── dorking ├── __init__.py ├── iot_dorking.db ├── basic_dorking.db ├── files_dorking.db ├── adminpanels_dorking.db ├── webstructure_dorking.db ├── ua_rotator.py ├── proxies_rotator.py ├── db_creator.py └── dorking_handler.py ├── service ├── __init__.py ├── misc.py ├── logs_processing.py ├── files_processing.py ├── config_processing.py ├── db_processing.py └── cli_init.py ├── reporting_modules ├── __init__.py └── html_report_creation.py ├── snapshotting ├── __init__.py ├── html_snapshotting.py ├── screen_snapshotting.py └── archive_snapshotting.py ├── datagather_modules ├── __init__.py ├── networking_processor.py ├── crawl_processor.py └── data_assembler.py ├── report_examples ├── html_report_example │ ├── 01-robots.txt │ └── ps_documents │ │ └── extracted_About%20the%20HackThisSite%20Mirror.txt └── xlsx_report_example │ ├── 01-robots.txt │ ├── hackthissiteorg_(28-12-2024, 09h33m02s).csv │ └── ps_documents │ └── extracted_About%20the%20HackThisSite%20Mirror.txt ├── .dockerignore ├── .github ├── ISSUE_TEMPLATE │ ├── custom.md │ ├── feature_request.md │ └── bug_report.md └── workflows │ ├── python-package.yml │ └── codeql.yml ├── .readthedocs.yaml ├── requirements.txt ├── SECURITY.md ├── Dockerfile ├── LICENSE ├── docker-entrypoint.sh ├── pyproject.toml ├── CODE_OF_CONDUCT.md └── README.md /__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /apis/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /docs/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /pagesearch/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /dorking/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /service/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /reporting_modules/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /snapshotting/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /datagather_modules/__init__.py: -------------------------------------------------------------------------------- 1 | 2 | -------------------------------------------------------------------------------- /apis/api_keys.db: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OSINT-TECHNOLOGIES/dpulse/HEAD/apis/api_keys.db -------------------------------------------------------------------------------- /dorking/iot_dorking.db: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OSINT-TECHNOLOGIES/dpulse/HEAD/dorking/iot_dorking.db -------------------------------------------------------------------------------- /apis/api_keys_reference.db: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OSINT-TECHNOLOGIES/dpulse/HEAD/apis/api_keys_reference.db -------------------------------------------------------------------------------- /dorking/basic_dorking.db: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OSINT-TECHNOLOGIES/dpulse/HEAD/dorking/basic_dorking.db -------------------------------------------------------------------------------- /dorking/files_dorking.db: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OSINT-TECHNOLOGIES/dpulse/HEAD/dorking/files_dorking.db -------------------------------------------------------------------------------- /dorking/adminpanels_dorking.db: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OSINT-TECHNOLOGIES/dpulse/HEAD/dorking/adminpanels_dorking.db -------------------------------------------------------------------------------- /dorking/webstructure_dorking.db: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OSINT-TECHNOLOGIES/dpulse/HEAD/dorking/webstructure_dorking.db -------------------------------------------------------------------------------- /report_examples/html_report_example/01-robots.txt: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /missions/ 3 | Disallow: /killing/all/humans/ 4 | -------------------------------------------------------------------------------- /report_examples/xlsx_report_example/01-robots.txt: -------------------------------------------------------------------------------- 1 | User-agent: * 2 | Disallow: /missions/ 3 | Disallow: /killing/all/humans/ 4 | -------------------------------------------------------------------------------- /.dockerignore: -------------------------------------------------------------------------------- 1 | .git 2 | .github 3 | __pycache__/ 4 | *.pyc 5 | *.pyo 6 | *.pyd 7 | .venv/ 8 | .env 9 | dist/ 10 | build/ 11 | *.log 12 | docs/ 13 | report_examples/ 14 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/custom.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Custom issue template 3 | about: Describe this issue template's purpose here. 4 | title: '' 5 | labels: '' 6 | assignees: '' 7 | 8 | --- 9 | 10 | 11 | -------------------------------------------------------------------------------- /report_examples/xlsx_report_example/hackthissiteorg_(28-12-2024, 09h33m02s).csv: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/OSINT-TECHNOLOGIES/dpulse/HEAD/report_examples/xlsx_report_example/hackthissiteorg_(28-12-2024, 09h33m02s).csv -------------------------------------------------------------------------------- /.readthedocs.yaml: -------------------------------------------------------------------------------- 1 | version: 2 2 | 3 | build: 4 | os: ubuntu-20.04 5 | tools: 6 | python: "3.10" 7 | jobs: 8 | pre_build: 9 | - pip install mkdocs mkdocs-material 10 | 11 | python: 12 | install: 13 | - requirements: requirements.txt 14 | 15 | mkdocs: 16 | configuration: docs/dpulse-docs/mkdocs.yml 17 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | Jinja2>=3.1.6 2 | beautifulsoup4==4.12.2 3 | requests==2.32.4 4 | python-whois==0.9.4 5 | colorama==0.4.6 6 | pyfiglet==1.0.2 7 | rich==13.7.1 8 | MechanicalSoup==1.3.0 9 | builtwith==1.3.4 10 | dnspython==2.6.1 11 | openpyxl==3.1.2 12 | PyMuPDF==1.26.6 13 | selenium==4.28.1 14 | webdriver-manager==4.0.2 15 | undetected_chromedriver==3.5.5 16 | setuptools==80.9.0 17 | -------------------------------------------------------------------------------- /docs/dpulse-docs/docs/contact_dev.md: -------------------------------------------------------------------------------- 1 | ## Contact developer 2 | 3 | DPULSE developer will be glad to see your messages with feedback, questions and suggestions. Feel free to contact developer with most convenient ways for you: 4 | 5 | * OSINT-TECHNOLOGIES e-mail: osint.technologies@gmail.com 6 | * [Make an issue page on DPULSE GitHub repository](https://github.com/OSINT-TECHNOLOGIES/dpulse/issues/new/choose) 7 | 8 | 9 | 10 | 11 | -------------------------------------------------------------------------------- /docs/dpulse-docs/mkdocs.yml: -------------------------------------------------------------------------------- 1 | site_name: DPULSE Docs 2 | theme: 3 | name: readthedocs 4 | nav: 5 | - Home: index.md 6 | - Getting started: getting_started.md 7 | - Basic scan: basic_scan.md 8 | - PageSearch: pagesearch.md 9 | - Built-in automatic Dorking: dorking.md 10 | - Built-in API scanning: api.md 11 | - Snapshotting: snapshotting.md 12 | - Reporting and report types: reporting.md 13 | - Configuration file: config.md 14 | - Logging: logging.md 15 | - Demo and use-cases: demo.md 16 | - Contact developer: contact_dev.md 17 | 18 | -------------------------------------------------------------------------------- /snapshotting/html_snapshotting.py: -------------------------------------------------------------------------------- 1 | import requests 2 | from colorama import Fore, Style 3 | 4 | def save_page_as_html(url, filename): 5 | try: 6 | print(Fore.GREEN + "Getting web page's content" + Style.RESET_ALL) 7 | response = requests.get(url) 8 | print(Fore.GREEN + "Creating .HTML file" + Style.RESET_ALL) 9 | with open(filename, 'w', encoding='utf-8') as file: 10 | file.write(response.text) 11 | print(Fore.GREEN + ".HTML snapshot was successfully created" + Style.RESET_ALL) 12 | except Exception as e: 13 | print(Fore.RED + f"Error: {e}" + Style.RESET_ALL) 14 | -------------------------------------------------------------------------------- /SECURITY.md: -------------------------------------------------------------------------------- 1 | # Security Policy 2 | 3 | ## Supported Versions 4 | 5 | In a table below you can see versions which will be updated with security measures and vulnerabilities fixes when someone will find them 6 | 7 | | Version | Supported | 8 | | ------- | ------------------ | 9 | | >= 1.2 | :white_check_mark: | 10 | | < 1.2 | :x: | 11 | 12 | ## Reporting a Vulnerability 13 | 14 | You can report vulnerabilities using issues "Report a security vulnerability" section. When we talk about security, you should describe problem as much as you can. All security reports are top-1 priority, so we'll investigate them as soon as we see report. 15 | -------------------------------------------------------------------------------- /dorking/ua_rotator.py: -------------------------------------------------------------------------------- 1 | import random 2 | import sys 3 | sys.path.append('service') 4 | from config_processing import read_config 5 | from colorama import Fore, Style 6 | 7 | class UserAgentRotator: 8 | def __init__(self): 9 | config_values = read_config() 10 | self.user_agents = config_values['user_agents'] 11 | 12 | def get_random_user_agent(self): 13 | print(Fore.GREEN + "Changed User-Agent to " + Style.RESET_ALL + Fore.LIGHTCYAN_EX + Style.BRIGHT + f"{random.choice(self.user_agents)}" + Style.RESET_ALL) 14 | return random.choice(self.user_agents) 15 | 16 | user_agent_rotator = UserAgentRotator() 17 | -------------------------------------------------------------------------------- /service/misc.py: -------------------------------------------------------------------------------- 1 | import socket 2 | 3 | def time_processing(end): 4 | if end < 60: 5 | endtime = round(end) 6 | endtime_string = f'approximately {endtime} seconds' 7 | else: 8 | time_minutes = round(end / 60) 9 | if time_minutes == 1: 10 | endtime_string = f'approximately {time_minutes} minute' 11 | else: 12 | endtime_string = f'approximately {time_minutes} minutes' 13 | return endtime_string 14 | 15 | def domain_precheck(domain): 16 | try: 17 | socket.create_connection((domain, 80), timeout=5) 18 | return True 19 | except OSError: 20 | return False -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:3.11-slim 2 | 3 | ENV PYTHONUNBUFFERED=1 \ 4 | PIP_NO_CACHE_DIR=1 \ 5 | POETRY_NO_INTERACTION=1 \ 6 | POETRY_VIRTUALENVS_CREATE=false \ 7 | POETRY_VERSION=1.8.3 8 | 9 | WORKDIR /app 10 | 11 | ENV PYTHONPATH=/app:/app/service:/app/apis:/app/datagather_modules:/app/dorking:/app/pagesearch:/app/reporting_modules:/app/snapshotting:$PYTHONPATH 12 | 13 | RUN pip install "poetry==${POETRY_VERSION}" 14 | 15 | COPY pyproject.toml poetry.lock* ./ 16 | RUN poetry install --no-root 17 | 18 | COPY . . 19 | 20 | COPY docker-entrypoint.sh /app/docker-entrypoint.sh 21 | RUN chmod +x /app/docker-entrypoint.sh 22 | 23 | ENTRYPOINT ["/app/docker-entrypoint.sh"] 24 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/feature_request.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Feature request 3 | about: Suggest an idea for this project 4 | title: '' 5 | labels: '' 6 | assignees: '' 7 | 8 | --- 9 | 10 | **Is your feature request related to a problem? Please describe.** 11 | A clear and concise description of what the problem is. Ex. I'm always frustrated when [...] 12 | 13 | **Describe the solution you'd like** 14 | A clear and concise description of what you want to happen. 15 | 16 | **Describe alternatives you've considered** 17 | A clear and concise description of any alternative solutions or features you've considered. 18 | 19 | **Additional context** 20 | Add any other context or screenshots about the feature request here. 21 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/bug_report.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Bug report 3 | about: Create a report to help us improve 4 | title: '' 5 | labels: '' 6 | assignees: '' 7 | 8 | --- 9 | 10 | **Describe the bug** 11 | A clear and concise description of what the bug is. 12 | 13 | **To Reproduce** 14 | Steps to reproduce the behavior: 15 | 1. Go to '...' 16 | 2. Click on '....' 17 | 3. Scroll down to '....' 18 | 4. See error 19 | 20 | **Expected behavior** 21 | A clear and concise description of what you expected to happen. 22 | 23 | **Screenshots** 24 | If applicable, add screenshots to help explain your problem. 25 | 26 | **Desktop (please complete the following information):** 27 | - OS: [e.g. iOS] 28 | - Version [e.g. 22] 29 | 30 | **Additional context** 31 | Add any other context about the problem here. 32 | -------------------------------------------------------------------------------- /docs/dpulse-docs/docs/demo.md: -------------------------------------------------------------------------------- 1 | # DPULSE demos & use-cases 2 | 3 | On this page you can see some actual DPULSE demos and use-cases in some cybersecurity and OSINT scenarios 4 | 5 | ## Demo №1. Starting DPULSE 6 | 7 | Yes, starting DPULSE is as simple as that. Don't forget to install all requirements before starting DPULSE 8 | 9 | ![dpulse_start](https://github.com/user-attachments/assets/9ec0ab73-2206-4d38-bae6-e88656e17f95) 10 | 11 | ## Demo №2. DPULSE basic scan workflow 12 | 13 | Here you can see complete process of DPULSE basic scan from the beginning to the end. Remember that sometimes your scan might not be ideal and errors may appear. However, they will not interrupt scanning process, but will only affect the number and variety of results in the final report. 14 | 15 | ![dpulse_bs](https://github.com/user-attachments/assets/b0ad7827-6dac-4f82-a369-4447a0e1c878) 16 | 17 | -------------------------------------------------------------------------------- /docs/dpulse-docs/docs/pagesearch.md: -------------------------------------------------------------------------------- 1 | # PageSearch mode 2 | 3 | PageSearch is an additional extended subdomains deep search function, which starts in addition to basic scan. User can choose whether to use PageSearch or not during pre-scan preparation steps. 4 | 5 | ## PageSearch results 6 | 7 | PageSearch returns extended information about found subdomains during basic scan. Extended information contains following: 8 | 9 | 1. More e-mail addresses 10 | 2. API keys 11 | 3. Exposed passwords 12 | 4. Cookies 13 | 5. Hidden forms of data and other web page elements 14 | 6. Documents, config files, databases files (and PageSearch can download them!) 15 | 7. Specified words by user in PDF files 16 | 17 | PageSearch scan example (this example is not so representative because scanned site is not an example of real domain): 18 | 19 | ![pagesearch1](https://github.com/user-attachments/assets/ed91f37f-578f-462b-a464-5281dd06ba0c) 20 | 21 | -------------------------------------------------------------------------------- /service/logs_processing.py: -------------------------------------------------------------------------------- 1 | import logging 2 | from config_processing import read_config 3 | from colorama import Fore, Style 4 | 5 | config_values = read_config() 6 | logging_level = (config_values['logging_level']).upper() 7 | 8 | if logging_level == 'DEBUG': 9 | level = logging.DEBUG 10 | elif logging_level == 'INFO': 11 | level = logging.INFO 12 | elif logging_level == 'WARNING': 13 | level = logging.WARNING 14 | elif logging_level == 'ERROR': 15 | level = logging.ERROR 16 | elif logging_level == 'CRITICAL': 17 | level = logging.CRITICAL 18 | else: 19 | print(Fore.RED + "You've entered wrong logging level in config file. Please verify proper mods and re-enter it" + Style.RESET_ALL) 20 | print(Fore.RED + "Setting config level as DEBUG for this session" + Style.RESET_ALL) 21 | level = logging.DEBUG 22 | 23 | logging.basicConfig(filename="journal.log", level=level, format="%(asctime)s - %(levelname)s - %(message)s") 24 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2025 OSINT-TECHNOLOGIES 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /docker-entrypoint.sh: -------------------------------------------------------------------------------- 1 | #!/bin/sh 2 | set -e 3 | 4 | if [ ! -d service ]; then 5 | mkdir -p service 6 | fi 7 | 8 | if [ ! -f service/config.ini ] && [ -f /app/service/config.ini ]; then 9 | cp /app/service/config.ini service/config.ini 10 | fi 11 | 12 | if [ ! -d dorking ]; then 13 | mkdir -p dorking 14 | fi 15 | 16 | if ls /app/dorking/*.db >/dev/null 2>&1; then 17 | for dbfile in /app/dorking/*.db; do 18 | dest="dorking/$(basename "$dbfile")" 19 | if [ ! -f "$dest" ]; then 20 | cp "$dbfile" "$dest" 21 | fi 22 | done 23 | fi 24 | 25 | if [ ! -d apis ]; then 26 | mkdir -p apis 27 | fi 28 | 29 | if ls /app/apis/*.db >/dev/null 2>&1; then 30 | for dbfile in /app/apis/*.db; do 31 | dest="apis/$(basename "$dbfile")" 32 | if [ ! -f "$dest" ]; then 33 | cp "$dbfile" "$dest" 34 | fi 35 | done 36 | fi 37 | 38 | if [ -d /app/service/pdf_report_templates ]; then 39 | if [ ! -d service/pdf_report_templates ]; then 40 | mkdir -p service/pdf_report_templates 41 | fi 42 | 43 | for tmpl in /app/service/pdf_report_templates/*; do 44 | dest="service/pdf_report_templates/$(basename "$tmpl")" 45 | if [ ! -f "$dest" ]; then 46 | cp "$tmpl" "$dest" 47 | fi 48 | done 49 | fi 50 | 51 | exec python /app/dpulse.py 52 | -------------------------------------------------------------------------------- /report_examples/html_report_example/ps_documents/extracted_About%20the%20HackThisSite%20Mirror.txt: -------------------------------------------------------------------------------- 1 | -=- What is the HackThisSite Mirror? -=- 2 | 3 | HackThisSite may from time to time mirror things we think are deserving of our 4 | bandwidth and maintenance, simply because of a belief in the cause or a general 5 | liking of the data and its producers. 6 | 7 | 8 | -=- Will you mirror my data? -=- 9 | 10 | You are more than welcome to request mirroring by emailing us at 11 | admin - at - hackthissite - dot - org, but we will NOT guarantee anything! 12 | Our bandwidth and time is precious, and just because you think your data is 13 | well-deserving of our attention, does not mean we will think the same. 14 | 15 | 16 | -=- What do you generally mirror? -=- 17 | 18 | HackThisSite will mirror projects we are involved with, or have a vested 19 | interest in. For example, we are affiliated with Hackbloc, which produces 20 | the HackThisZine periodical. Due to this, we mirror their publications. 21 | Other such examples could someday include FreeBSD, nginx, Asterisk, UnrealIRC, 22 | among others, since these are all projects whose benefits we reap. 23 | 24 | 25 | -=- What are your mirror specifications? -=- 26 | 27 | mirror.hackthissite.org is hosted on a dedicated server in Europe with 28 | redundant drives and a 1 Gbps connection. 29 | -------------------------------------------------------------------------------- /report_examples/xlsx_report_example/ps_documents/extracted_About%20the%20HackThisSite%20Mirror.txt: -------------------------------------------------------------------------------- 1 | -=- What is the HackThisSite Mirror? -=- 2 | 3 | HackThisSite may from time to time mirror things we think are deserving of our 4 | bandwidth and maintenance, simply because of a belief in the cause or a general 5 | liking of the data and its producers. 6 | 7 | 8 | -=- Will you mirror my data? -=- 9 | 10 | You are more than welcome to request mirroring by emailing us at 11 | admin - at - hackthissite - dot - org, but we will NOT guarantee anything! 12 | Our bandwidth and time is precious, and just because you think your data is 13 | well-deserving of our attention, does not mean we will think the same. 14 | 15 | 16 | -=- What do you generally mirror? -=- 17 | 18 | HackThisSite will mirror projects we are involved with, or have a vested 19 | interest in. For example, we are affiliated with Hackbloc, which produces 20 | the HackThisZine periodical. Due to this, we mirror their publications. 21 | Other such examples could someday include FreeBSD, nginx, Asterisk, UnrealIRC, 22 | among others, since these are all projects whose benefits we reap. 23 | 24 | 25 | -=- What are your mirror specifications? -=- 26 | 27 | mirror.hackthissite.org is hosted on a dedicated server in Europe with 28 | redundant drives and a 1 Gbps connection. 29 | -------------------------------------------------------------------------------- /docs/dpulse-docs/docs/basic_scan.md: -------------------------------------------------------------------------------- 1 | # Basic scan 2 | 3 | Basic scan is default and the most basic scanning mode which starts after all preparation steps. It always starts first and you can't start scanning without it. 4 | 5 | ## Basic scan results 6 | 7 | Basic scan returns some basic information open information about target domain, such as: 8 | 9 | 1. WHOIS information which contains domain name, full URL, IP address, registrar info, creation and expiration dates, organization name and contact e-mails. 10 | 2. Subdomains list 11 | 3. E-mail addresses list (gathered from subdomains) 12 | 4. IP addresses list (gathered from subdomains) 13 | 5. Social medias links, posts and profiles. Supported social medias are Facebook, Twitter (X.com), Instagram, Telegram, TikTok, LinkedIn, VKontakte, YouTube, Odnoklassniki, WeChat 14 | 6. DNS and SSL information. DNS information contains name servers and MX addresses. SSL certificate information contains issuer, subject, creation and expiration dates, certificate name and serial number. 15 | 7. Basic pre-pentest information such as possible vulnerabilities (CVEs), open ports and hostnames. 16 | 8. Development and deployment services and frameworks information, such as CMS, web servers, used programming languages and web frameworks, analytics services, tags and so on. 17 | 9. Downloaded copies of sitemap.xml and robots.txt files from a domain 18 | 19 | -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | [build-system] 2 | requires = ["poetry-core"] 3 | build-backend = "poetry.core.masonry.api" 4 | 5 | [tool.poetry] 6 | name = "dpulse" 7 | version = "1.4" 8 | description = "Convenient, fast and user-friendly collector of domain information from Open-Sources" 9 | authors = ["OSINT-TECHNOLOGIES "] 10 | readme = "README.md" 11 | license = "MIT" 12 | homepage = "https://github.com/OSINT-TECHNOLOGIES/dpulse" 13 | repository = "https://github.com/OSINT-TECHNOLOGIES/dpulse" 14 | classifiers = [ 15 | "Development Status :: 5 - Production/Stable", 16 | "Programming Language :: Python :: 3", 17 | "Intended Audience :: Information Technology", 18 | "Operating System :: OS Independent", 19 | "License :: OSI Approved :: MIT License", 20 | "Natural Language :: English" 21 | ] 22 | 23 | [tool.poetry.dependencies] 24 | python = ">=3.10,<4.0" 25 | Jinja2 = "^3.1.6" 26 | beautifulsoup4 = "^4.12.2" 27 | requests = "^2.32.4" 28 | python-whois = "^0.9.4" 29 | colorama = "^0.4.6" 30 | pyfiglet = "^1.0.2" 31 | rich = "^13.7.1" 32 | MechanicalSoup = "^1.3.0" 33 | builtwith = "^1.3.4" 34 | dnspython = "^2.6.1" 35 | PyMuPDF = "^1.26.6" 36 | selenium = "^4.28.1" 37 | webdriver-manager = "^4.0.2" 38 | undetected_chromedriver = "^3.5.5" 39 | 40 | [tool.poetry.scripts] 41 | dpulse = 'dpulse.dpulse:run' 42 | 43 | [tool.poetry.group.dev.dependencies] 44 | deptry = "^0.23.0" 45 | -------------------------------------------------------------------------------- /.github/workflows/python-package.yml: -------------------------------------------------------------------------------- 1 | # This workflow will install Python dependencies, run tests and lint with a variety of Python versions 2 | # For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-python 3 | 4 | name: Python package 5 | 6 | on: 7 | push: 8 | branches: [ "main" ] 9 | pull_request: 10 | branches: [ "main" ] 11 | 12 | jobs: 13 | build: 14 | 15 | runs-on: ubuntu-latest 16 | strategy: 17 | fail-fast: false 18 | matrix: 19 | python-version: ["3.9", "3.10", "3.11", "3.12"] 20 | 21 | steps: 22 | - uses: actions/checkout@v4 23 | - name: Set up Python ${{ matrix.python-version }} 24 | uses: actions/setup-python@v3 25 | with: 26 | python-version: ${{ matrix.python-version }} 27 | - name: Install dependencies 28 | run: | 29 | python -m pip install --upgrade pip 30 | python -m pip install flake8 pytest 31 | if [ -f requirements.txt ]; then pip install -r requirements.txt; fi 32 | - name: Lint with flake8 33 | run: | 34 | # stop the build if there are Python syntax errors or undefined names 35 | flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics 36 | # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide 37 | flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics 38 | - name: Test with pytest 39 | run: | 40 | pytest 41 | -------------------------------------------------------------------------------- /service/files_processing.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | def get_blob(file): 4 | with open(file, 'rb') as report_file: 5 | blob_data = report_file.read() 6 | return blob_data 7 | 8 | def find_files(filename): 9 | root_directory = os.getcwd() 10 | for root, dirs, files in os.walk(root_directory): 11 | if filename in files: 12 | return os.path.join(root, filename) 13 | return None 14 | 15 | def get_db_columns(report_folder): 16 | try: 17 | with open(report_folder + "//" + '01-robots.txt', 'r') as robots_file: 18 | robots_content = robots_file.read() 19 | except: 20 | robots_content = 'robots.txt: empty' 21 | pass 22 | try: 23 | with open(report_folder + "//" + '02-sitemap.txt', 'r') as sitemap_xml: 24 | sitemap_content = sitemap_xml.read() 25 | except: 26 | sitemap_content = 'sitemap.xml: empty' 27 | pass 28 | try: 29 | with open(report_folder + "//" + '03-sitemap_links.txt', 'r') as sitemap_links: 30 | sitemap_links_content = sitemap_links.read() 31 | except: 32 | sitemap_links_content = 'Sitemap links: empty' 33 | pass 34 | try: 35 | with open(report_folder + "//" + '04-dorking_results.txt', 'r') as dorking_file: 36 | dorking_content = dorking_file.read() 37 | except: 38 | dorking_content = 'Dorking content: empty' 39 | pass 40 | return robots_content, sitemap_content, sitemap_links_content, dorking_content 41 | -------------------------------------------------------------------------------- /docs/dpulse-docs/docs/logging.md: -------------------------------------------------------------------------------- 1 | # About logging 2 | 3 | Logging is a way to record events and messages that occur during the execution of a program, which helps in debugging and monitoring the application's behavior. Since DPULSE is written on Python, it uses in-built ***logging*** method. 4 | 5 | ## Levels of logging 6 | 7 | There are five built-in levels of the log message which are supported in DPULSE: 8 | 9 | - Debug (used to give Detailed information, typically of interest only when diagnosing problems) 10 | - Info (used to confirm that things are working as expected) 11 | - Warning (used as an indication that something unexpected happened, or is indicative of some problem in the near future) 12 | - Error (tells that due to a more serious problem, the software has not been able to perform some function) 13 | - Critical (tells serious error, indicating that the program itself may be unable to continue running) 14 | 15 | You can use these levels according to your needs by editing the configuration file. You can read more about changing configuration parameters in "Configuration file" paragraph. 16 | 17 | ## How it looks like in practice 18 | 19 | In DPULSE, first creation of logging file (journal.log) happens with first DPULSE start, and first strings in log file will appear with first scan. Standard string in this file contains date (YYYY-MM-DD format), time (HH:MM:SS, MS format), level of config, process name and its status (additionaly string contains full error if status was bad). Also DPULSE separates log info for each scan with STARTS HERE and ENDS HERE lines. Content of log file looks like that: 20 | 21 | ![logging](https://github.com/user-attachments/assets/50acae24-f024-4793-8b45-9d7e284329a6) 22 | 23 | ![logging2](https://github.com/user-attachments/assets/b27f8a93-115d-49ad-bf1b-c7f72613de9d) 24 | -------------------------------------------------------------------------------- /docs/dpulse-docs/docs/snapshotting.md: -------------------------------------------------------------------------------- 1 | # Snapshotting and screenshotting 2 | 3 | A website snapshot is a representation of a website at a specific point in time. Unlike a visual representation, a snapshot encapsulates the user interface elements, allowing you to open and navigate the website online or offline at a later date. Screenshots, on the other hand, lack this capacity for interactive navigation and are limited to visual inspection alone. In other words, it’s a capture of a device's point of view at a specific moment. DPULSE supports both of these methods to provide full capability for capturing target's contents. You will be prompted to select snapshotting mode during pre-scan interview: 4 | 5 | ![snap](https://github.com/user-attachments/assets/c24d297d-d52e-45e1-9770-97229abcc2ce) 6 | 7 | ## Screenshotting 8 | 9 | Screenshotting, as it says, is basically a process of taking screenshot of domain page. It uses selenium library and its headless browser mode in order to take screenshot. It is crucial to configrate its parameters correctly (read "Configuration file" paragraph, "Config file content" section). After scan ends, you will find screenshot of domain's main page in scan folder. 10 | 11 | ## Snapshotting: Web copy and Wayback Archive 12 | 13 | There are two ways to make a snapshot of target domain using DPULSE. First way is a common snapshot: it saves web-page's copy as a HTML file, so it is fully interactive and contains all web elements like HTML code, DOM structure and so on. Second way is Wayback Archive snapshot. It uses Wayback Machine API in order to get all copies of a website within a certain period of time specified by user like shown below: 14 | 15 | ![snap1](https://github.com/user-attachments/assets/dd82a133-95a8-4fa4-9dc7-ed18d2768d16) 16 | 17 | After scan ends, you will find snapshots of domain's main page in scan folder (in case of Wayback snapshot, there'll be additional folder to store all snapshots). 18 | -------------------------------------------------------------------------------- /docs/dpulse-docs/docs/index.md: -------------------------------------------------------------------------------- 1 | # Welcome to DPULSE documentation 2 | 3 | Convenient, fast and user-friendly collector of domain information from open-sources 4 | 5 | ## What is DPULSE 6 | 7 | DPULSE is a software solution for conducting OSINT research in relation to a certain domain. In general, it provides you with a certain set of functions, such as: 8 | 9 | 1. ***Basic scan:*** extracts general information about domain such as WHOIS information, subdomains, e-mail addresses, IP addresses, social medias links/posts/profiles, SSL certificate info, possible vulnerabilities, open ports, CPEs, used web-technologies and so on. It also can download sitemap.xml and robots.txt files from a domain 10 | 11 | 2. ***PageSearch scan:*** extended subdomains deep search function, which starts in addition to basic scan and which can find more e-mail addresses, API keys, exposed passwords, cookies, hidden forms of data and other web page elements, documents, config files, databases files (and PageSearch can download them!), specified words by user in PDF files 12 | 13 | 3. ***Dorking scan:*** extended domain research function with prepared Google Dorking databases for different purposes, such as IoT dorking, files dorking, admin panels dorking, web elements dorking. Moreover, this mode allows you to create your own custom Google Dorking database 14 | 15 | 4. ***API scan:*** extended domain research function with prepared functions for 3rd party APIs usage. Currently DPULSE supports VirusTotal API (for brief domain information gathering) and SecurityTrails API (deep subdomains and DNS enumeration) 16 | 17 | 5. ***Snapshotting:*** extended domain research function which copys domain's home page content in various ways: by screenshotting it (screenshot snapshot), by HTML copying (HTML snapshot) and by finding and downloading previous versions (Wayback Machine snapshot) 18 | 19 | Finally, DPULSE compiles all found data into an easy-to-read HTML or XLSX report by category. It also saves all information about scan in local report storage database, which can be restored later. 20 | -------------------------------------------------------------------------------- /dorking/proxies_rotator.py: -------------------------------------------------------------------------------- 1 | import random 2 | import sys 3 | sys.path.append('service') 4 | from config_processing import read_config 5 | from colorama import Fore, Style 6 | import requests 7 | from requests.exceptions import ProxyError, ConnectionError, Timeout 8 | 9 | class ProxiesRotator: 10 | def __init__(self): 11 | config_values = read_config() 12 | self.proxy_file_path = str(config_values['proxies_file_path']) 13 | 14 | def check_proxies(self, proxies_list): 15 | working_proxies = [] 16 | print(Fore.GREEN + f'Checking {len(proxies_list)} proxies, please wait...' + Style.RESET_ALL) 17 | for proxy in proxies_list: 18 | proxies = { 19 | "http": proxy 20 | } 21 | try: 22 | response = requests.get('https://google.com', proxies=proxies, timeout=5) 23 | if response.status_code == 200: 24 | working_proxies.append(proxy) 25 | #print(Fore.GREEN + f"Proxy {proxy} is working" + Style.RESET_ALL) 26 | else: 27 | pass 28 | #print(Fore.GREEN +f"Proxy {proxy} returned status code {response.status_code}" + Style.RESET_ALL) 29 | except (ProxyError, ConnectionError, Timeout): 30 | pass 31 | #print(Fore.GREEN + f"Proxy {proxy} is not working" + Style.RESET_ALL) 32 | print(Fore.GREEN + f'Found {len(working_proxies)} working proxies' + Style.RESET_ALL) 33 | return working_proxies 34 | 35 | def get_proxies(self): 36 | if self.proxy_file_path == 'NONE': 37 | print(Fore.RED + "Path to file with proxies was not set in config file. Proxification of Google Dorking won't be applied\n" + Style.RESET_ALL) 38 | return 0, "" 39 | else: 40 | with open(self.proxy_file_path, 'r') as f: 41 | print(Fore.GREEN + 'Found path to get proxies from. Continuation' + Style.RESET_ALL) 42 | proxies_list = [proxy.strip() for proxy in f] 43 | return 1, proxies_list 44 | 45 | def get_random_proxy(self, proxies_list): 46 | print(Fore.GREEN + "Set proxy to " + Style.RESET_ALL + Fore.LIGHTCYAN_EX + Style.BRIGHT + f"{random.choice(proxies_list)}" + Style.RESET_ALL) 47 | return random.choice(proxies_list) 48 | 49 | proxies_rotator = ProxiesRotator() 50 | -------------------------------------------------------------------------------- /dorking/db_creator.py: -------------------------------------------------------------------------------- 1 | import sqlite3 2 | from colorama import Fore 3 | import os 4 | 5 | def manage_dorks(db_name): 6 | db_prep_string = str(db_name) + '.db' 7 | if os.path.exists('dorking//' + db_prep_string): 8 | print(Fore.RED + f"Sorry, but {db_prep_string} database is already exists. Choose other name for your custom DB") 9 | pass 10 | else: 11 | conn = sqlite3.connect('dorking//' + str(db_prep_string)) 12 | cursor = conn.cursor() 13 | 14 | cursor.execute(''' 15 | CREATE TABLE IF NOT EXISTS dorks ( 16 | dork_id INTEGER PRIMARY KEY, 17 | dork TEXT NOT NULL 18 | ) 19 | ''') 20 | conn.commit() 21 | 22 | def add_dork(dork_id, dork): 23 | try: 24 | cursor.execute('INSERT INTO dorks (dork_id, dork) VALUES (?, ?)', (dork_id, dork)) 25 | conn.commit() 26 | print(Fore.GREEN + "Successfully added new dork") 27 | except sqlite3.IntegrityError: 28 | print(Fore.RED + "Attention, dork_id variable must be unique") 29 | 30 | while True: 31 | dork_id = input(Fore.YELLOW + "Enter dork_id (or 'q' to quit this mode and save changes) >> ") 32 | if dork_id.lower() == 'q': 33 | break 34 | dork = input(Fore.YELLOW + "Enter new dork >> ") 35 | add_dork(int(dork_id), dork) 36 | conn.close() 37 | 38 | def get_dorking_query(short_domain, dorking_db_path, table): 39 | print(Fore.GREEN + "Getting dorking query from database") 40 | try: 41 | conn = sqlite3.connect(dorking_db_path) 42 | cursor = conn.cursor() 43 | cursor.execute(f"SELECT dork FROM {table}") 44 | rows = cursor.fetchall() 45 | search_query = [row[0].format(short_domain) for row in rows] 46 | conn.close() 47 | return search_query 48 | except Exception as e: 49 | print(Fore.RED + f"Error getting dorking query: {e}") 50 | return [] 51 | pass 52 | 53 | def get_columns_amount(dorking_db_path, table): 54 | try: 55 | conn = sqlite3.connect(dorking_db_path) 56 | cursor = conn.cursor() 57 | cursor.execute(f"SELECT COUNT(*) FROM {table}") 58 | row_count = cursor.fetchone()[0] 59 | except Exception as e: 60 | print(f"Error getting column count: {e}") 61 | return None 62 | finally: 63 | conn.close() 64 | return row_count 65 | -------------------------------------------------------------------------------- /snapshotting/screen_snapshotting.py: -------------------------------------------------------------------------------- 1 | from selenium import webdriver 2 | from selenium.webdriver.chrome.service import Service as ChromeService 3 | from selenium.webdriver.firefox.service import Service as FirefoxService 4 | from selenium.webdriver.edge.service import Service as EdgeService 5 | from webdriver_manager.chrome import ChromeDriverManager 6 | from webdriver_manager.firefox import GeckoDriverManager 7 | from webdriver_manager.microsoft import EdgeChromiumDriverManager 8 | from selenium.webdriver.chrome.options import Options as ChromeOptions 9 | from selenium.webdriver.firefox.options import Options as FirefoxOptions 10 | from selenium.webdriver.edge.options import Options as EdgeOptions 11 | from colorama import Fore, Style 12 | import sys 13 | sys.path.append('snapshotting') 14 | 15 | def setup_driver(browser_name): 16 | if browser_name == "chrome": 17 | service = ChromeService(ChromeDriverManager().install()) 18 | options = ChromeOptions() 19 | options.add_argument('--headless=new') 20 | driver = webdriver.Chrome(service=service, options=options) 21 | 22 | elif browser_name == "firefox": 23 | service = FirefoxService(GeckoDriverManager().install()) 24 | options = FirefoxOptions() 25 | options.add_argument('-headless') 26 | driver = webdriver.Firefox(service=service, options=options) 27 | 28 | elif browser_name == "edge": 29 | service = EdgeService(EdgeChromiumDriverManager().install()) 30 | options = EdgeOptions() 31 | options.add_argument('--headless=new') 32 | driver = webdriver.Edge(service=service, options=options) 33 | 34 | elif browser_name == "safari": 35 | options = webdriver.SafariOptions() 36 | driver = webdriver.Safari(options=options) 37 | 38 | elif browser_name == "opera": 39 | from config_processing import read_config 40 | config_values = read_config() 41 | service = ChromeService(ChromeDriverManager().install()) 42 | options = ChromeOptions() 43 | options.add_argument('--headless=new') 44 | options.binary_location = config_values['opera_browser_path'] 45 | driver = webdriver.Chrome(service=service, options=options) 46 | else: 47 | raise ValueError("Unsupported browser") 48 | driver.set_window_size(1920, 1080) 49 | return driver 50 | 51 | def take_screenshot(browser_name, url, screenshot_path): 52 | try: 53 | print(Fore.GREEN + f"Starting {browser_name} browser in headless mode..." + Style.RESET_ALL) 54 | driver = setup_driver(browser_name) 55 | print(Fore.GREEN + f"Going to {url}" + Style.RESET_ALL) 56 | driver.get(url) 57 | print(Fore.GREEN + "Taking screenshot..." + Style.RESET_ALL) 58 | driver.save_screenshot(screenshot_path) 59 | driver.quit() 60 | print(Fore.GREEN + f"Screenshot successfully saved in report folder" + Style.RESET_ALL) 61 | except Exception as e: 62 | print(Fore.RED + f"Error appeared: {str(e)}" + Style.RESET_ALL) 63 | if 'driver' in locals(): 64 | driver.quit() 65 | -------------------------------------------------------------------------------- /docs/dpulse-docs/docs/api.md: -------------------------------------------------------------------------------- 1 | # Third-party API scan mode 2 | 3 | Currently DPULSE supports two third-party APIs: 4 | 5 | * SecurityTrails API (securitytrails.com) for deep subdomains and DNS enumeration (this API requires key) 6 | * VirusTotal API (virustotal.com) for brief domain information gathering (this API requires key) 7 | * HudsonRock API (hudsonrock.com) for querying domain through a database of over 30,821,440 computers which were compromised through global info-stealer campaigns performed by threat actors (this API does not require key) 8 | 9 | ## SecurityTrails API (key required) 10 | 11 | SecurityTrails API is used to gather information about a specified domain. It retrieves various types of DNS records, subdomains, and other details. SecurityTrails API in DPULSE returns these details about target domain: 12 | 13 | * Alexa rank 14 | * Apex domain 15 | * Hostname 16 | * A/MX/NS/SOA/TXT records 17 | * All subdomains list 18 | * Alive (pingable) subdomains list 19 | 20 | ## VirusTotal API (key required) 21 | 22 | VirusTotal API is used to interact with the VirusTotal service programmatically and analyze files and URLs using multiple antivirus engines and website scanners, providing insights into whether they are malicious. VirusTotal API in DPULSE returns these details about target domain: 23 | 24 | * Categories 25 | * Detected samples 26 | * Undetected samples 27 | * Detected URLs 28 | 29 | ## HudsonRock API (no key required) 30 | 31 | HudsonRock Cavalier API is based on forensic technologies and operational knowhow developed at the IDF’s 8200 Unit to counter nation-state adversaries and professional threat-actors. It is a unique cybercrime intelligence data source composed of millions of machines compromised in global malware spreading campaigns. 32 | 33 | ## API Keys database 34 | 35 | In order to ensure the functioning of API services individually for each DPULSE user, API keys storage database was created. Similar to report storage database, it is lightweight .db extension database with simple structure shown below: 36 | 37 | ![apistordb](https://github.com/user-attachments/assets/02233813-781e-4bf8-be7c-76ec7627be06) 38 | 39 | Since every API key is individual for each user, you can see fillers instead of actual keys when you start DPULSE for the first time, so until you replace filler with a real API key, you can't start using API in scans. You can enter your actual API keys using DPULSE CLI. You can see full process on the screenshot below: 40 | 41 | ![apiproc](https://github.com/user-attachments/assets/effb27ab-dd4b-4470-b90c-34c6f9a43d8c) 42 | 43 | For the first time you will see red-colored API key field, which means that scan is not available with this API. After changing filler for actual key, you will see that color changed, which indicates that you can use your API key for scanning. Be advised that every free API service provided with some limitations (you can see them in DPULSE CLI for all supported API), so keep in mind that frequent usage of free API plans is not possible. 44 | 45 | In case if you want to fully replace API keys, you can use reference API keys database. You can see menu point for this action on the screenshot above. This action will delete your actual API keys database, copy reference database and rename it. This action is very optional because you can change your API keys by just using first menu point in API Keys DB Manager (according to the screenshot above) 46 | 47 | -------------------------------------------------------------------------------- /snapshotting/archive_snapshotting.py: -------------------------------------------------------------------------------- 1 | import requests 2 | import os 3 | import time 4 | from colorama import Fore, Style 5 | import sys 6 | from config_processing import read_config 7 | 8 | sys.path.append('service') 9 | CDX_API = "http://web.archive.org/cdx/search/cdx" 10 | 11 | def get_values_from_config(): 12 | config_values = read_config() 13 | retries = int(config_values['wayback_retries_amount']) 14 | pause_between_requests = int(config_values['wayback_requests_pause']) 15 | return retries, pause_between_requests 16 | 17 | def get_snapshots(url, from_date, to_date): 18 | params = { 19 | "url": url, 20 | "from": from_date, 21 | "to": to_date, 22 | "output": "json", 23 | "fl": "timestamp,original,mime", 24 | "filter": "statuscode:200", 25 | "collapse": "digest" 26 | } 27 | print(Fore.GREEN + f"Sending request to Wayback CDX API for {url}, period: {from_date} - {to_date}..." + Style.RESET_ALL) 28 | response = requests.get(CDX_API, params=params) 29 | response.raise_for_status() 30 | data = response.json() 31 | return data[1:] 32 | 33 | def snapshot_enum(snapshot_storage_folder, timestamp, original_url, index): 34 | retries, _ = get_values_from_config() 35 | archive_url = f"https://web.archive.org/web/{timestamp}id_/{original_url}" 36 | for attempt in range(1, retries + 1): 37 | try: 38 | response = requests.get(archive_url, timeout=15) 39 | response.raise_for_status() 40 | filename = f"{index}_{timestamp}.html" 41 | filepath = os.path.join(snapshot_storage_folder, filename) 42 | with open(filepath, "w", encoding="utf-8") as f: 43 | f.write(response.text) 44 | print(Fore.GREEN + f"[{index}] Downloaded: " + Fore.LIGHTCYAN_EX + Style.BRIGHT + f"{archive_url}" + Style.RESET_ALL) 45 | return True 46 | except Exception as e: 47 | print(Fore.RED + f"[{index}] Attempt {attempt}/{retries} failed for {archive_url}. Retrying..." + Style.RESET_ALL) 48 | time.sleep(2) 49 | print(Fore.RED + f"[{index}] Failed to download after {retries} attempts: {archive_url}" + Style.RESET_ALL) 50 | return False 51 | 52 | def download_snapshot(short_domain, from_date, end_date, report_folder): 53 | _, pause_between_requests = get_values_from_config() 54 | snapshot_storage_folder = report_folder + '//wayback_snapshots' 55 | os.makedirs(snapshot_storage_folder, exist_ok=True) 56 | snapshots = get_snapshots(short_domain, from_date, end_date) 57 | print(Fore.GREEN + "Total snapshots found:" + Style.RESET_ALL + Fore.LIGHTCYAN_EX + Style.BRIGHT + f" {len(snapshots)}" + Style.RESET_ALL) 58 | html_snapshots = [ 59 | s for s in snapshots 60 | if len(s) >= 2 and ( 61 | s[1].endswith(".html") or s[1].endswith("/") or s[1] == short_domain) 62 | ] 63 | print(Fore.GREEN + "HTML snapshots to download:" + Style.RESET_ALL + Fore.LIGHTCYAN_EX + Style.BRIGHT + f" {len(html_snapshots)}\n" + Style.RESET_ALL) 64 | if not html_snapshots: 65 | print(Fore.RED + "No HTML snapshots available for download." + Style.RESET_ALL) 66 | return 67 | for i, (timestamp, original_url, *_) in enumerate(html_snapshots): 68 | snapshot_enum(snapshot_storage_folder, timestamp, original_url, i+1) 69 | time.sleep(pause_between_requests) 70 | print(Fore.GREEN + "\nFinished downloading HTML snapshots" + Style.RESET_ALL) 71 | -------------------------------------------------------------------------------- /.github/workflows/codeql.yml: -------------------------------------------------------------------------------- 1 | # For most projects, this workflow file will not need changing; you simply need 2 | # to commit it to your repository. 3 | # 4 | # You may wish to alter this file to override the set of languages analyzed, 5 | # or to provide custom queries or build logic. 6 | # 7 | # ******** NOTE ******** 8 | # We have attempted to detect the languages in your repository. Please check 9 | # the `language` matrix defined below to confirm you have the correct set of 10 | # supported CodeQL languages. 11 | # 12 | name: "CodeQL" 13 | 14 | on: 15 | push: 16 | branches: [ "main" ] 17 | pull_request: 18 | branches: [ "main" ] 19 | schedule: 20 | - cron: '21 1 * * 0' 21 | 22 | jobs: 23 | analyze: 24 | name: Analyze (${{ matrix.language }}) 25 | # Runner size impacts CodeQL analysis time. To learn more, please see: 26 | # - https://gh.io/recommended-hardware-resources-for-running-codeql 27 | # - https://gh.io/supported-runners-and-hardware-resources 28 | # - https://gh.io/using-larger-runners (GitHub.com only) 29 | # Consider using larger runners or machines with greater resources for possible analysis time improvements. 30 | runs-on: ${{ (matrix.language == 'swift' && 'macos-latest') || 'ubuntu-latest' }} 31 | timeout-minutes: ${{ (matrix.language == 'swift' && 120) || 360 }} 32 | permissions: 33 | # required for all workflows 34 | security-events: write 35 | 36 | # required to fetch internal or private CodeQL packs 37 | packages: read 38 | 39 | # only required for workflows in private repositories 40 | actions: read 41 | contents: read 42 | 43 | strategy: 44 | fail-fast: false 45 | matrix: 46 | include: 47 | - language: python 48 | build-mode: none 49 | # CodeQL supports the following values keywords for 'language': 'c-cpp', 'csharp', 'go', 'java-kotlin', 'javascript-typescript', 'python', 'ruby', 'swift' 50 | # Use `c-cpp` to analyze code written in C, C++ or both 51 | # Use 'java-kotlin' to analyze code written in Java, Kotlin or both 52 | # Use 'javascript-typescript' to analyze code written in JavaScript, TypeScript or both 53 | # To learn more about changing the languages that are analyzed or customizing the build mode for your analysis, 54 | # see https://docs.github.com/en/code-security/code-scanning/creating-an-advanced-setup-for-code-scanning/customizing-your-advanced-setup-for-code-scanning. 55 | # If you are analyzing a compiled language, you can modify the 'build-mode' for that language to customize how 56 | # your codebase is analyzed, see https://docs.github.com/en/code-security/code-scanning/creating-an-advanced-setup-for-code-scanning/codeql-code-scanning-for-compiled-languages 57 | steps: 58 | - name: Checkout repository 59 | uses: actions/checkout@v4 60 | 61 | # Initializes the CodeQL tools for scanning. 62 | - name: Initialize CodeQL 63 | uses: github/codeql-action/init@v3 64 | with: 65 | languages: ${{ matrix.language }} 66 | build-mode: ${{ matrix.build-mode }} 67 | # If you wish to specify custom queries, you can do so here or in a config file. 68 | # By default, queries listed here will override any specified in a config file. 69 | # Prefix the list here with "+" to use these queries and those in the config file. 70 | 71 | # For more details on CodeQL's query packs, refer to: https://docs.github.com/en/code-security/code-scanning/automatically-scanning-your-code-for-vulnerabilities-and-errors/configuring-code-scanning#using-queries-in-ql-packs 72 | # queries: security-extended,security-and-quality 73 | 74 | # If the analyze step fails for one of the languages you are analyzing with 75 | # "We were unable to automatically build your code", modify the matrix above 76 | # to set the build mode to "manual" for that language. Then modify this step 77 | # to build your code. 78 | # ℹ️ Command-line programs to run using the OS shell. 79 | # 📚 See https://docs.github.com/en/actions/using-workflows/workflow-syntax-for-github-actions#jobsjob_idstepsrun 80 | - if: matrix.build-mode == 'manual' 81 | shell: bash 82 | run: | 83 | echo 'If you are using a "manual" build mode for one or more of the' \ 84 | 'languages you are analyzing, replace this with the commands to build' \ 85 | 'your code, for example:' 86 | echo ' make bootstrap' 87 | echo ' make release' 88 | exit 1 89 | 90 | - name: Perform CodeQL Analysis 91 | uses: github/codeql-action/analyze@v3 92 | with: 93 | category: "/language:${{matrix.language}}" 94 | -------------------------------------------------------------------------------- /apis/api_virustotal.py: -------------------------------------------------------------------------------- 1 | from datetime import datetime 2 | import requests 3 | import sqlite3 4 | from colorama import Fore, Style 5 | import re 6 | 7 | def virustotal_html_prep(formatted_output): 8 | formatted_output = re.sub(r'\x1b\[([0-9,A-Z]{1,2}(;[0-9]{1,2})?(;[0-9]{3})?)?[m|K]?', '', formatted_output) 9 | start_marker = "=== VIRUSTOTAL API REPORT ===" 10 | end_marker = "[+] Domain Information:" 11 | start_index = formatted_output.find(start_marker) 12 | end_index = formatted_output.find(end_marker) 13 | if start_index != -1 and end_index != -1: 14 | formatted_output = formatted_output[:start_index] + formatted_output[end_index:] 15 | return formatted_output 16 | 17 | def check_domain(domain, api_key): 18 | api_key = api_key.strip() 19 | api_key = re.sub(r'[\s\u200B\uFEFF]+', '', api_key) 20 | 21 | url = f"https://www.virustotal.com/api/v3/domains/{domain}" 22 | headers = { 23 | "x-apikey": api_key 24 | } 25 | response = requests.get(url, headers=headers) 26 | 27 | try: 28 | result = response.json() 29 | formatted_output = Fore.LIGHTBLUE_EX + "\n=== VIRUSTOTAL API REPORT ===\n" + Style.RESET_ALL 30 | formatted_output += f"\n{Fore.LIGHTBLUE_EX}[+] Domain Information:{Style.RESET_ALL}\n" 31 | formatted_output += f"{Fore.GREEN}Domain:{Style.RESET_ALL} {Fore.LIGHTCYAN_EX}{domain}{Style.RESET_ALL}\n" 32 | formatted_output += f"{Fore.GREEN}Creation Date:{Style.RESET_ALL} {Fore.LIGHTCYAN_EX}{datetime.fromtimestamp(result['data']['attributes']['creation_date']).strftime('%Y-%m-%d')}{Style.RESET_ALL}\n" 33 | formatted_output += f"{Fore.GREEN}Last Update:{Style.RESET_ALL} {Fore.LIGHTCYAN_EX}{datetime.fromtimestamp(result['data']['attributes']['last_update_date']).strftime('%Y-%m-%d')}{Style.RESET_ALL}\n" 34 | formatted_output += f"\n{Fore.LIGHTBLUE_EX}[+] DNS Records:{Style.RESET_ALL}\n" 35 | for record in result['data']['attributes']['last_dns_records']: 36 | formatted_output += f"{Fore.GREEN}Type:{Style.RESET_ALL} {Fore.LIGHTCYAN_EX}{record['type']:<6}{Style.RESET_ALL} " 37 | formatted_output += f"{Fore.GREEN}TTL:{Style.RESET_ALL} {Fore.LIGHTCYAN_EX}{record['ttl']:<6}{Style.RESET_ALL} " 38 | formatted_output += f"{Fore.GREEN}Value:{Style.RESET_ALL} {Fore.LIGHTCYAN_EX}{record['value']}{Style.RESET_ALL}\n" 39 | formatted_output += f"\n{Fore.LIGHTBLUE_EX}[+] Categories:{Style.RESET_ALL}\n" 40 | for vendor, category in result['data']['attributes']['categories'].items(): 41 | formatted_output += f"{Fore.GREEN}{vendor:<25}:{Style.RESET_ALL} {Fore.LIGHTCYAN_EX}{category}{Style.RESET_ALL}\n" 42 | formatted_output += f"\n{Fore.LIGHTBLUE_EX}[+] Analysis Stats:{Style.RESET_ALL}\n" 43 | stats = result['data']['attributes']['last_analysis_stats'] 44 | formatted_output += f"{Fore.GREEN}Harmless:{Style.RESET_ALL} {Fore.LIGHTCYAN_EX}{stats['harmless']}{Style.RESET_ALL}\n" 45 | formatted_output += f"{Fore.GREEN}Malicious:{Style.RESET_ALL} {Fore.LIGHTCYAN_EX}{stats['malicious']}{Style.RESET_ALL}\n" 46 | formatted_output += f"{Fore.GREEN}Suspicious:{Style.RESET_ALL} {Fore.LIGHTCYAN_EX}{stats['suspicious']}{Style.RESET_ALL}\n" 47 | formatted_output += f"{Fore.GREEN}Undetected:{Style.RESET_ALL} {Fore.LIGHTCYAN_EX}{stats['undetected']}{Style.RESET_ALL}\n" 48 | formatted_output += f"\n{Fore.LIGHTBLUE_EX}[+] Detailed Analysis Results:{Style.RESET_ALL}\n" 49 | results = result['data']['attributes']['last_analysis_results'] 50 | categories = {'harmless': [], 'malicious': [], 'suspicious': [], 'undetected': []} 51 | for engine, data in results.items(): 52 | categories[data['category']].append(engine) 53 | for category, engines in categories.items(): 54 | if engines: 55 | formatted_output += f"\n{Fore.GREEN}{category.title()} ({len(engines)}):{Style.RESET_ALL}\n" 56 | for engine in sorted(engines): 57 | formatted_output += f"{Fore.LIGHTCYAN_EX}- {engine}{Style.RESET_ALL}\n" 58 | print(formatted_output) 59 | return formatted_output 60 | except Exception as e: 61 | formatted_output = Fore.RED + f"Error while parsing JSON: {e}" + Style.RESET_ALL 62 | print(formatted_output) 63 | return None 64 | 65 | def api_virustotal_check(domain): 66 | conn = sqlite3.connect('apis//api_keys.db') 67 | cursor = conn.cursor() 68 | cursor.execute("SELECT api_name, api_key FROM api_keys") 69 | rows = cursor.fetchall() 70 | api_key = None 71 | for row in rows: 72 | api_name, key = row 73 | if api_name == 'VirusTotal': 74 | api_key = key 75 | print(Fore.GREEN + 'Got VirusTotal API key. Starting VirusTotal scan...') 76 | break 77 | if not api_key: 78 | print(Fore.RED + "VirusTutal API key was not found.") 79 | conn.close() 80 | return None 81 | 82 | formatted_output = check_domain(domain, api_key) 83 | return formatted_output 84 | -------------------------------------------------------------------------------- /docs/dpulse-docs/docs/reporting.md: -------------------------------------------------------------------------------- 1 | # About reporting system 2 | 3 | DPULSE as every OSINT tool is highly dependent on reporting system. User-friendly reports are crucial for detailed results presentation and further storage. DPULSE supports the most common types of reports: HTML and XLSX. Moreover, DPULSE provides you with reports storage database, which contains some information about scan, report and extracted data and gives you opportunity for long-term reports storage in one place. Also you can move this database between different DPULSE versions, which brings a little bit better user-experience. 4 | 5 | ## HTML report 6 | 7 | HTML report was the first supported type of report. HTML is a widely supported format that can be opened in any web browser, allowing for the creation of visually appealing reports using tables, charts, diagrams, and other elements. It supports links and hyperlinks that can be used to create navigation within the report and link to external resources, and enables creating dynamic content that can be updated in real-time. HTML is often used in web applications, making it easy to integrate reports with existing web systems. And, in general, this report format is more user-friendly, which makes it more convenient for sharing with investigation customers, OSINT teams and usage in presentations. Moreover, unlike PDF report generation, HTML is easier to handle when both developing and delievering, as it doesn't require to install 3rd party applications (like wkhtmltopdf). You can see example of DPULSE generated HTML report [here](https://github.com/OSINT-TECHNOLOGIES/dpulse/tree/rolling/report_examples/html_report_example). 8 | 9 | ## XLSX report 10 | 11 | XLSX is a widely supported format that can be opened in most spreadsheet and office applications, including Microsoft Excel, Google Sheets, and LibreOffice Calc, making it easy to analyze and process data. It allows storing data in a structured format, supports formulas and functions that can be used to automate calculations and data analysis, and enables creating charts and diagrams to visualize data. Additionally, XLSX is often used in business applications, making it easy to integrate reports with existing systems. You can see example of DPULSE generated XLSX report [here](https://github.com/OSINT-TECHNOLOGIES/dpulse/tree/rolling/report_examples/xlsx_report_example). 12 | 13 | ## Side files 14 | 15 | As you may have noticed in report examples on GitHub page, there are also some side files except for report file. These files may be the following: 16 | 17 | * target's robots.txt file (if accessible) 18 | * target's sitemap.xml file (if accessible) 19 | * ps_documents folder with extracted documents from domain and its subdomains (if PageSearch was selected for scan) 20 | 21 | ## Report storage database 22 | 23 | As said above, report storage database contains key information about scan, report and extracted data. DPULSE generates this database when DPULSE is first launched or if database file was not found in the root directory, so users don't need to worry about it's manual creation. Report storage database is a simple .db file (with hard-coded report_storage.db name) with structure which shown below: 24 | 25 | ![rsdbstr](https://github.com/user-attachments/assets/491d1147-78ca-47a8-a405-5e351dc2730e) 26 | 27 | Lets describe these fields in more detailed way: 28 | 29 | * id - integer value that displays the number of reports generated and the order in which they are generated 30 | * report_file_extension - string which shows main report file extension, in current DPULSE version this value could be xlsx, pdf or html 31 | * report_content - BLOB or HTML data which contains main report file's copy 32 | * comment - string which shows comment to your cases, which you can enter before each scan 33 | * target - string which shows domain which you've scanned 34 | * creation_date - string which shows when your report was generated (YYYYMMDD format) 35 | * dorks_results - text array which contains a copy of Google Dorking results (if this mode was selected before scan) 36 | * robots_text - text array which contains a copy of robots.txt file from scanned domain 37 | * sitemap_text - text array which contains all sitemap.xml links file from scanned domain 38 | * sitemap_file - text array which contains a copy of sitemap.xml file from scanned domain 39 | * api_scan - string which indicates whether API scanning was activated or not, and if it was activated - contains used APIs 40 | 41 | Interacting with report storage database is a very simple process. First of all, after each scan you can see several messages which indicate that your report was successfully saved in report storage database: 42 | 43 | ![rsdb1](https://github.com/user-attachments/assets/db3b22f8-1e74-4095-8ab7-99fd5837aa0a) 44 | 45 | Also, you have separate menu item in DPULSE CLI to work with report storage database which named "Report storage DB manager": 46 | 47 | ![rsdb2](https://github.com/user-attachments/assets/519682dc-5d01-4844-8dcd-67e1914bb765) 48 | 49 | As you can see, there are menu points for both seeing DB content and recreating reports. Lets see what DPULSE will return if we select first menu item: 50 | 51 | ![rsdb3](https://github.com/user-attachments/assets/6778cf83-e9cf-4580-b46d-7c187cbdde9d) 52 | 53 | Report recreating process is shown below: 54 | 55 | ![rsdb4](https://github.com/user-attachments/assets/d7af9b03-703e-46b2-846b-05d99b33b900) 56 | 57 | And that's how recreated report looks like inside: 58 | 59 | ![rsdb5](https://github.com/user-attachments/assets/799d45cb-bc51-43ca-8b06-14e236d21912) 60 | -------------------------------------------------------------------------------- /docs/dpulse-docs/docs/getting_started.md: -------------------------------------------------------------------------------- 1 | # Installation and Quick Start 2 | 3 | ## System Requirements 4 | 5 | DPULSE is built on Python and designed to run across various environments. To ensure stability and full functionality, your system must meet the following criteria: 6 | 7 | * **Operating System:** Linux (recommended), macOS, or Windows. 8 | * **Python Version:** Python **3.10**, **3.11**, or **3.12**. 9 | * *Note:* Older versions (3.9 and below) are not supported due to dependency conflicts. 10 | * **Network:** A stable, high-speed internet connection is crucial. Modules like *Dorking Scan* and *PageSearch* rely on active scraping; unstable connections may lead to timeouts or incomplete results. 11 | * **Dependencies:** 12 | * **Docker** (Recommended for isolation and ease of use). 13 | * **Poetry** (Recommended for local Python installation). 14 | * **Git** (Required for cloning the repository). 15 | 16 | --- 17 | 18 | ## Installation Methods 19 | 20 | We provide three methods to install DPULSE. **Docker is the recommended method** as it eliminates environment conflicts. 21 | 22 | ### Method 1: Docker (Recommended) 23 | 24 | Using Docker ensures you have all necessary system libraries pre-installed without polluting your host machine. 25 | 26 | 1. **Pull the official image:** 27 | ```bash 28 | docker pull osinttechnologies/dpulse:latest 29 | ``` 30 | 31 | 2. **Run the container:** 32 | * **Linux / macOS:** 33 | ```bash 34 | docker run --rm -it -v "$PWD":/data -w /data osinttechnologies/dpulse:latest 35 | ``` 36 | * **Windows (PowerShell):** 37 | ```powershell 38 | docker run --rm -it -v "${PWD}:/data" -w /data osinttechnologies/dpulse:latest 39 | ``` 40 | 41 | --- 42 | 43 | ### Method 2: Poetry 44 | 45 | If you prefer running DPULSE natively, use [Poetry](https://python-poetry.org/). It handles virtual environments and dependency locking automatically. 46 | 47 | 1. **Clone the repository:** 48 | * For the **Stable** version: 49 | ```bash 50 | git clone https://github.com/OSINT-TECHNOLOGIES/dpulse 51 | cd dpulse 52 | ``` 53 | * For the **Rolling** (Dev) version: 54 | ```bash 55 | git clone --branch rolling --single-branch https://github.com/OSINT-TECHNOLOGIES/dpulse.git 56 | cd dpulse 57 | ``` 58 | 59 | 2. **Install dependencies:** 60 | ```bash 61 | poetry install 62 | ``` 63 | 64 | 3. **Run DPULSE:** 65 | ```bash 66 | poetry run python dpulse.py 67 | ``` 68 | 69 | --- 70 | 71 | ### Method 3: Standard PIP (Legacy) 72 | 73 | This method is available but **not recommended** due to potential version conflicts with other Python packages on your system. 74 | 75 | 1. **Clone the repository and enter the directory:** 76 | ```bash 77 | git clone https://github.com/OSINT-TECHNOLOGIES/dpulse 78 | cd dpulse 79 | ``` 80 | 81 | 2. **Install requirements:** 82 | ```bash 83 | pip install -r requirements.txt 84 | ``` 85 | 86 | 3. **Run DPULSE:** 87 | ```bash 88 | python dpulse.py 89 | ``` 90 | 91 | > **Note:** The deprecated `.bat` and `.sh` installer scripts have been removed in favor of standard package managers to ensure security and reliability. 92 | 93 | --- 94 | 95 | ## Conducting Your First Scan 96 | 97 | Once DPULSE is running, follow this workflow to perform a reconnaissance task. 98 | 99 | ### 1. Main Menu 100 | Upon launch, the CLI interface will appear. To start a standard investigation, select **Option 1**. 101 | 102 | ![Main Menu](https://github.com/user-attachments/assets/5b45d4f0-9fad-4e17-8d74-96989037a66a) 103 | 104 | ### 2. Target Input 105 | DPULSE operates strictly with **domain names** (e.g., `example.com`), not full URLs (e.g., `https://www.example.com/page`). 106 | 107 | * **Input:** Enter the target domain when prompted. 108 | * **Correction:** If you accidentally enter a URL, DPULSE will attempt to extract the domain, but manual input is preferred for accuracy. 109 | 110 | ![Target Input](https://github.com/user-attachments/assets/cc5676d5-e11c-4aeb-b0b4-dd4c23fa228a) 111 | 112 | ### 3. Scan Configuration (Modifiers) 113 | You will be asked to configure the scan parameters. You can customize the depth and scope of the research: 114 | 115 | * **Case Comment:** A brief description for your internal records (e.g., "Investigation #42"). 116 | * **PageSearch:** Enables deep crawling of the domain to find sensitive files (PDFs, configs) and exposed secrets. 117 | * *Keywords:* If PageSearch is active, you can specify keywords to search for within downloaded documents. 118 | * **Dorking Mode:** Activates Google Dorking to find admin panels, IoT devices, or sensitive directories. 119 | * **API Usage:** Toggles third-party integrations (VirusTotal, SecurityTrails, HudsonRock). 120 | * **Snapshotting:** Enables capturing the target website's visual state via screenshots or Wayback Machine. 121 | 122 | ![Modifiers Selection](https://github.com/user-attachments/assets/9470350f-edf3-4692-b9bd-7c327cea2017) 123 | 124 | ### 4. Results 125 | Once the scan is complete, DPULSE will generate a report in the `./reports` directory and save the case metadata to the local database. 126 | 127 | ![Scan Complete](https://github.com/user-attachments/assets/4e16f1e6-df60-441c-b730-79ea69134bb7) 128 | 129 | You can now open the generated report file to view the gathered intelligence. 130 | -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | # Contributor Covenant Code of Conduct 2 | 3 | ## Our Pledge 4 | 5 | We as members, contributors, and leaders pledge to make participation in our 6 | community a harassment-free experience for everyone, regardless of age, body 7 | size, visible or invisible disability, ethnicity, sex characteristics, gender 8 | identity and expression, level of experience, education, socio-economic status, 9 | nationality, personal appearance, race, religion, or sexual identity 10 | and orientation. 11 | 12 | We pledge to act and interact in ways that contribute to an open, welcoming, 13 | diverse, inclusive, and healthy community. 14 | 15 | ## Our Standards 16 | 17 | Examples of behavior that contributes to a positive environment for our 18 | community include: 19 | 20 | * Demonstrating empathy and kindness toward other people 21 | * Being respectful of differing opinions, viewpoints, and experiences 22 | * Giving and gracefully accepting constructive feedback 23 | * Accepting responsibility and apologizing to those affected by our mistakes, 24 | and learning from the experience 25 | * Focusing on what is best not just for us as individuals, but for the 26 | overall community 27 | 28 | Examples of unacceptable behavior include: 29 | 30 | * The use of sexualized language or imagery, and sexual attention or 31 | advances of any kind 32 | * Trolling, insulting or derogatory comments, and personal or political attacks 33 | * Public or private harassment 34 | * Publishing others' private information, such as a physical or email 35 | address, without their explicit permission 36 | * Other conduct which could reasonably be considered inappropriate in a 37 | professional setting 38 | 39 | ## Enforcement Responsibilities 40 | 41 | Community leaders are responsible for clarifying and enforcing our standards of 42 | acceptable behavior and will take appropriate and fair corrective action in 43 | response to any behavior that they deem inappropriate, threatening, offensive, 44 | or harmful. 45 | 46 | Community leaders have the right and responsibility to remove, edit, or reject 47 | comments, commits, code, wiki edits, issues, and other contributions that are 48 | not aligned to this Code of Conduct, and will communicate reasons for moderation 49 | decisions when appropriate. 50 | 51 | ## Scope 52 | 53 | This Code of Conduct applies within all community spaces, and also applies when 54 | an individual is officially representing the community in public spaces. 55 | Examples of representing our community include using an official e-mail address, 56 | posting via an official social media account, or acting as an appointed 57 | representative at an online or offline event. 58 | 59 | ## Enforcement 60 | 61 | Instances of abusive, harassing, or otherwise unacceptable behavior may be 62 | reported to the community leaders responsible for enforcement at 63 | osint.technologies@gmail.com. 64 | All complaints will be reviewed and investigated promptly and fairly. 65 | 66 | All community leaders are obligated to respect the privacy and security of the 67 | reporter of any incident. 68 | 69 | ## Enforcement Guidelines 70 | 71 | Community leaders will follow these Community Impact Guidelines in determining 72 | the consequences for any action they deem in violation of this Code of Conduct: 73 | 74 | ### 1. Correction 75 | 76 | **Community Impact**: Use of inappropriate language or other behavior deemed 77 | unprofessional or unwelcome in the community. 78 | 79 | **Consequence**: A private, written warning from community leaders, providing 80 | clarity around the nature of the violation and an explanation of why the 81 | behavior was inappropriate. A public apology may be requested. 82 | 83 | ### 2. Warning 84 | 85 | **Community Impact**: A violation through a single incident or series 86 | of actions. 87 | 88 | **Consequence**: A warning with consequences for continued behavior. No 89 | interaction with the people involved, including unsolicited interaction with 90 | those enforcing the Code of Conduct, for a specified period of time. This 91 | includes avoiding interactions in community spaces as well as external channels 92 | like social media. Violating these terms may lead to a temporary or 93 | permanent ban. 94 | 95 | ### 3. Temporary Ban 96 | 97 | **Community Impact**: A serious violation of community standards, including 98 | sustained inappropriate behavior. 99 | 100 | **Consequence**: A temporary ban from any sort of interaction or public 101 | communication with the community for a specified period of time. No public or 102 | private interaction with the people involved, including unsolicited interaction 103 | with those enforcing the Code of Conduct, is allowed during this period. 104 | Violating these terms may lead to a permanent ban. 105 | 106 | ### 4. Permanent Ban 107 | 108 | **Community Impact**: Demonstrating a pattern of violation of community 109 | standards, including sustained inappropriate behavior, harassment of an 110 | individual, or aggression toward or disparagement of classes of individuals. 111 | 112 | **Consequence**: A permanent ban from any sort of public interaction within 113 | the community. 114 | 115 | ## Attribution 116 | 117 | This Code of Conduct is adapted from the [Contributor Covenant][homepage], 118 | version 2.0, available at 119 | https://www.contributor-covenant.org/version/2/0/code_of_conduct.html. 120 | 121 | Community Impact Guidelines were inspired by [Mozilla's code of conduct 122 | enforcement ladder](https://github.com/mozilla/diversity). 123 | 124 | [homepage]: https://www.contributor-covenant.org 125 | 126 | For answers to common questions about this code of conduct, see the FAQ at 127 | https://www.contributor-covenant.org/faq. Translations are available at 128 | https://www.contributor-covenant.org/translations. 129 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 |
2 | 3 | # 🌐 DPULSE 4 | ### Advanced Domain OSINT & Reconnaissance Tool 5 | 6 | 7 | 8 |

9 | 10 | [![Stable Version](https://img.shields.io/badge/v1.3.4-STABLE-success?style=for-the-badge)](https://github.com/OSINT-TECHNOLOGIES/dpulse/releases) 11 | [![Rolling Version](https://img.shields.io/badge/v1.4-DEV_BUILD-orange?style=for-the-badge)](https://github.com/OSINT-TECHNOLOGIES/dpulse/tree/rolling) 12 | [![Python](https://img.shields.io/badge/Python-3.10+-blue?style=for-the-badge&logo=python&logoColor=white)](https://python.org) 13 | [![Docker](https://img.shields.io/badge/Docker-Ready-2496ED?style=for-the-badge&logo=docker&logoColor=white)](https://hub.docker.com/r/osinttechnologies/dpulse) 14 | [![Documentation](https://img.shields.io/badge/Docs-ReadTheDocs-informational?style=for-the-badge&logo=readthedocs&logoColor=white)](https://dpulse.readthedocs.io) 15 | 16 | **Convenient, fast, and user-friendly collector of domain information from open sources.** 17 | 18 | [Report Bug](https://github.com/OSINT-TECHNOLOGIES/dpulse/issues) • [Request Feature](https://github.com/OSINT-TECHNOLOGIES/dpulse/issues) • [Roadmap](https://github.com/users/OSINT-TECHNOLOGIES/projects/1) 19 | 20 |
21 | 22 | --- 23 | 24 | > ⚠️ **Disclaimer:** DPULSE is a research tool tailored for OSINT professionals. It is **not** intended for criminal activities. The developer is not responsible for any misuse of this tool. Use strictly on allowed domains and for legal purposes. 25 | 26 | --- 27 | 28 | ## 🚀 Key Features 29 | 30 | DPULSE automates the boring stuff in domain reconnaissance. It compiles data into clean **HTML/XLSX reports**. 31 | 32 | | Feature | Description | 33 | | :--- | :--- | 34 | | 🔍 **Basic Scan** | Automates WHOIS, subdomains, emails, IPs, social media, SSL info, open ports, and tech stack detection. | 35 | | 🕵️‍♂️ **PageSearch** | Deep dive into subdomains to find API keys, exposed passwords, cookies, hidden forms, and sensitive documents (PDF, config files). | 36 | | 🧩 **Dorking Mode** | Automated Google Dorking for IoT, admin panels, sensitive files, and custom user-defined dorks. | 37 | | 🔗 **API Integrations** | Native support for **VirusTotal**, **SecurityTrails**, and **HudsonRock** (compromised hosts check). | 38 | | 📸 **Snapshotting** | Captures target via Screenshots, HTML downloads, or Wayback Machine archiving. | 39 | 40 | --- 41 | 42 | ## ⚡ Quick Start 43 | 44 | ### Option 1: Docker (Recommended) 45 | The fastest way to run DPULSE without worrying about dependencies. 46 | 47 | ```bash 48 | # 1. Pull the image 49 | docker pull osinttechnologies/dpulse:latest 50 | 51 | # 2. Run DPULSE (Linux/macOS) 52 | docker run --rm -it -v "$PWD":/data -w /data osinttechnologies/dpulse:latest 53 | 54 | # 2. Run DPULSE (Windows PowerShell) 55 | docker run --rm -it -v "${PWD}:/data" -w /data osinttechnologies/dpulse:latest 56 | ``` 57 | 58 | ### Option 2: Source Code (Poetry) 59 | For developers or those who prefer a local environment. 60 | 61 | ```bash 62 | git clone https://github.com/OSINT-TECHNOLOGIES/dpulse 63 | cd dpulse 64 | poetry install 65 | poetry run python dpulse.py 66 | ``` 67 | 68 |
69 | Click to see Legacy Installation (pip) 70 |
71 | If you don't use Poetry, you can use standard pip (might have conflicts): 72 | 73 | ```bash 74 | git clone https://github.com/OSINT-TECHNOLOGIES/dpulse 75 | cd dpulse 76 | pip install -r requirements.txt 77 | python dpulse.py 78 | ``` 79 |
80 | 81 | --- 82 | 83 | ## 🖥️ Interface & Reports 84 | 85 | **Main Menu** 86 | Clean CLI interface for easy navigation. 87 | ![dpulse_start](https://github.com/user-attachments/assets/9ec0ab73-2206-4d38-bae6-e88656e17f95) 88 | 89 | **Scanning Process** 90 | Real-time feedback during the scan. 91 | ![dpulse_bs](https://github.com/user-attachments/assets/b0ad7827-6dac-4f82-a369-4447a0e1c878) 92 | 93 | **Output** 94 | Organized report folders with timestamps. 95 | ![Report Folder](https://github.com/OSINT-TECHNOLOGIES/dpulse/assets/77023667/7de73250-c9b6-4373-b21e-16bbb7a63882) 96 | 97 | --- 98 | 99 | ## 🏆 Community & Mentions 100 | 101 | We are proud to be mentioned by industry leaders and the cybersecurity community. 102 | 103 | * **HudsonRock:** [Featured in cybercrime intelligence update](https://www.linkedin.com/feed/update/urn:li:share:7294336938495385600/) 104 | * **DarkWebInformer:** [Tool for complex approach to domain OSINT](https://darkwebinformer.com/dpulse-tool-for-complex-approach-to-domain-osint/) 105 | * **Ethical Hackers Academy:** [Tool Review](https://ethicalhacksacademy.com/blogs/cyber-security-tools/dpulse) 106 | 107 |
108 | View all mentions (Social Media & Blogs) 109 | 110 | ### X.com (Twitter) 111 | * [@DarkWebInformer](https://x.com/DarkWebInformer/status/1787583156775759915?t=Ak1W9ddUPpDvLAkVyQG8fQ&s=19) 112 | * [@OSINTech_](https://x.com/OSINTech_/status/1805902553885888649) 113 | * [@cyb_detective](https://x.com/cyb_detective/status/1821337404763959487?t=vbyRUeXM2C6gf47l7XvJnQ&s=19) 114 | * [@DailyOsint](https://x.com/DailyOsint/status/1823013991951523997?t=Fr-oDCZ2pFmFJpUT3BKl5A&s=19) 115 | * [@UndeadSec](https://x.com/UndeadSec/status/1827692406797689032) 116 | * [@0xtechrock](https://x.com/0xtechrock/status/1804470459741978974?t=us1EVJEECNZdSmSe5CQjQA&s=19) 117 | 118 | ### LinkedIn 119 | * [Maory Schroder](https://fr.linkedin.com/posts/maory-schroder_osint-cybers%C3%A9curit%C3%A9-pentest-activity-7227562302009491456-sXoZ?trk=public_profile) 120 | * [Maxim Marshak](https://www.linkedin.com/pulse/bormaxi8080-osint-timeline-64-27062024-maxim-marshak-jojbf) 121 | * [DailyOSINT](https://www.linkedin.com/posts/daily-osint_osint-reconnaissance-infosec-activity-7228779678096850946-H-zC) 122 | 123 | ### Telegram Channels 124 | * Cyber Detective 125 | * Hackers Factory 126 | * C.I.T Security 127 | * Реальний OSINT 128 | 129 |
130 | 131 | --- 132 | 133 |
134 | 135 | **Created by OSINT-TECHNOLOGIES** 136 | 137 | [Documentation](https://dpulse.readthedocs.io) • [Contact Developer](https://dpulse.readthedocs.io/en/latest/contact_dev/#) 138 | 139 |
140 | -------------------------------------------------------------------------------- /docs/dpulse-docs/docs/dorking.md: -------------------------------------------------------------------------------- 1 | # Automatic Google Dorking scan mode 2 | 3 | Automatic Google Dorking scan is an extended domain research function with prepared Google Dorking databases for different purposes. 4 | 5 | ## Prepared Dorking databases description 6 | 7 | At the moment DPULSE offers the following prepared databases for automatic Google Dorking: 8 | 9 | 1. IoT dorking 10 | 2. Files dorking 11 | 3. Admin panels dorking 12 | 4. Web elements dorking 13 | 14 | IoT dorking table contains following 20 dorks: 15 | ``` 16 | inurl:":8080" site:{} 17 | inurl:":1883" site:{} 18 | inurl:":8883" site:{} 19 | inurl:":554" site:{} 20 | inurl:":81" site:{} 21 | inurl:":5000" site:{} 22 | inurl:":9000" site:{} 23 | inurl:":10000" site:{} 24 | inurl:debug site:{} 25 | inurl:device site:{} 26 | inurl:control site:{} 27 | inurl:status site:{} 28 | inurl:service site:{} 29 | inurl:monitor site:{} 30 | inurl:stream site:{} 31 | inurl:video site:{} 32 | inurl:camera site:{} 33 | inurl:sensor site:{} 34 | inurl:api site:{} 35 | inurl:firmware site:{} 36 | ``` 37 | 38 | Files dorking table contains following 30 dorks: 39 | ``` 40 | filetype:pdf site:{} 41 | filetype:doc site:{} 42 | filetype:docx site:{} 43 | filetype:xlsx site:{} 44 | filetype:xls site:{} 45 | filetype:ppt site:{} 46 | filetype:pptx site:{} 47 | filetype:txt site:{} 48 | filetype:csv site:{} 49 | filetype:xml site:{} 50 | filetype:json site:{} 51 | filetype:html site:{} 52 | filetype:php site:{} 53 | filetype:asp site:{} 54 | filetype:aspx site:{} 55 | filetype:js site:{} 56 | filetype:css site:{} 57 | filetype:jpg site:{} 58 | filetype:jpeg site:{} 59 | filetype:png site:{} 60 | filetype:gif site:{} 61 | filetype:mp3 site:{} 62 | filetype:mp4 site:{} 63 | filetype:avi site:{} 64 | filetype:zip site:{} 65 | filetype:rar site:{} 66 | filetype:sql site:{} 67 | filetype:db site:{} 68 | filetype:conf site:{} 69 | filetype:ini site:{} 70 | ``` 71 | 72 | Admin panels dorking table contains following 72 dorks: 73 | ``` 74 | site:{} intitle:"WordPress Login" 75 | site:{} inurl:/wp-admin/ 76 | site:{} intext:"Войти в WordPress" 77 | site:{} intitle:"Dashboard" "WordPress" 78 | site:{} intitle:"Joomla! Administrator Login" 79 | site:{} inurl:/administrator/ 80 | site:{} intitle:"Joomla! 3.x" "Login" 81 | site:{} intitle:"Drupal login" 82 | site:{} inurl:/user/login 83 | site:{} intitle:"Drupal 8" "Login" 84 | site:{} intitle:"phpMyAdmin" 85 | site:{} inurl:/phpmyadmin/ 86 | site:{} intitle:"phpMyAdmin 4.x" 87 | site:{} intitle:"Magento Admin" 88 | site:{} inurl:/admin/ 89 | site:{} intitle:"Magento 2" "Admin" 90 | site:{} intitle:"vBulletin Admin CP" 91 | site:{} inurl:/admincp/ 92 | site:{} intitle:"vBulletin 4.x" "Admin" 93 | site:{} intitle:"osCommerce Administration" 94 | site:{} intitle:"osCommerce 2.x" "Admin" 95 | site:{} intitle:"PrestaShop Back Office" 96 | site:{} inurl:/admin-dev/ 97 | site:{} intitle:"PrestaShop 1.7" "Back Office" 98 | site:{} intitle:"OpenCart Admin Panel" 99 | site:{} intitle:"OpenCart 3.x" "Admin" 100 | site:{} intitle:"Zen Cart Admin" 101 | site:{} intitle:"Zen Cart 1.5" "Admin" 102 | site:{} intitle:"MediaWiki" "Special:UserLogin" 103 | site:{} inurl:/mediawiki/index.php/Special:UserLogin 104 | site:{} intitle:"Moodle" "Log in to the site" 105 | site:{} inurl:/login/index.php 106 | site:{} intitle:"Concrete5" "Sign In" 107 | site:{} inurl:/index.php/dashboard/ 108 | site:{} intitle:"TYPO3" "Backend Login" 109 | site:{} inurl:/typo3/ 110 | site:{} intitle:"Plone" "Log in" 111 | site:{} inurl:/login_form 112 | site:{} intitle:"Django" "Site administration" 113 | site:{} inurl:/rails/admin/ 114 | site:{} intitle:"Ruby on Rails" "Admin" 115 | site:{} intitle:"Craft CMS" "Control Panel" 116 | site:{} inurl:/admin/ 117 | site:{} intitle:"ExpressionEngine" "Control Panel" 118 | site:{} inurl:/admin.php 119 | site:{} intitle:"Kentico" "CMS Desk" 120 | site:{} inurl:/cmsdesk/ 121 | site:{} intitle:"Umbraco" "Backoffice" 122 | site:{} inurl:/umbraco/ 123 | site:{} intitle:"Sitecore" "Launchpad" 124 | site:{} inurl:/sitecore/ 125 | site:{} intitle:"DotNetNuke" "Host" 126 | site:{} inurl:/host/ 127 | site:{} intitle:"SharePoint" "Sign In" 128 | site:{} inurl:/_layouts/15/ 129 | site:{} intitle:"Plesk" "Login" 130 | site:{} inurl:login.php?user=admin 131 | site:{} inurl:dashboard 132 | site:{} intitle:"admin login" 133 | site:{} intitle:"administrator login" 134 | site:{} "admin panel" 135 | site:{} inurl:panel 136 | site:{} inurl:cp 137 | site:{} inurl:controlpanel 138 | site:{} inurl:backend 139 | site:{} inurl:management 140 | site:{} inurl:administration 141 | site:{} intitle:"admin access" 142 | site:{} intitle:"control panel" 143 | site:{} "admin login" +directory 144 | site:{} "administrator login" +password 145 | site:{} inurl:/plesk-login/ 146 | ``` 147 | 148 | Web elements dorking table contains following 25 dorks: 149 | ``` 150 | site:{} intext:"index of" 151 | site:{} inurl:admin 152 | site:{} inurl:login 153 | site:{} inurl:dashboard 154 | site:{} inurl:wp-content 155 | site:{} inurl:backup 156 | site:{} inurl:old 157 | site:{} inurl:temp 158 | site:{} inurl:upload 159 | site:{} inurl:download 160 | site:{} inurl:config 161 | site:{} inurl:setup 162 | site:{} inurl:install 163 | site:{} inurl:database 164 | site:{} inurl:log 165 | site:{} inurl:debug 166 | site:{} inurl:api 167 | site:{} inurl:secret 168 | site:{} inurl:private 169 | site:{} inurl:secure 170 | site:{} inurl:password 171 | site:{} inurl:auth 172 | site:{} inurl:token 173 | site:{} inurl:session 174 | site:{} inurl:panel 175 | ``` 176 | 177 | ## Creating custom Dorking database 178 | 179 | DPULSE allows you to create your own custom Google Dorking database. You can do it using DPULSE CLI by selecting menus as shown below: 180 | 181 | ![dorking_start](https://github.com/user-attachments/assets/fc8fe1ba-1845-46d1-a9b9-d09d3dc03ce6) 182 | 183 | After you select this menu point you will be welcomed with custom Dorking DB generator. It's very simple to use. First you should enter your new custom Dorking DB name without any extensions. Then you'll be prompted to enter id of your first dork (first id in custom DB is always 1, and every next dork gives +1 to id) and dork itself. There's a rule DPULSE requires from you when inputting dorks: when it comes to define domain in dork, put {} instead of it so the program code will replace these brackets with actual domain you'll enter lately. 184 | 185 | Example of custom Dorking DB generator interaction is shown below: 186 | 187 | ![customdork](https://github.com/user-attachments/assets/8f3e8ca5-feec-4bf5-add8-048f54931b67) 188 | 189 | In result, new .db file will appear in dorking folder, which can be selected later to use in scan: 190 | 191 | ![dorking_customdbresult](https://github.com/user-attachments/assets/0cd4facc-215b-4e56-ab56-aa23cb5136db) 192 | 193 | And how it looks inside: 194 | 195 | ![look_inside](https://github.com/user-attachments/assets/023467c2-008b-451f-8e14-88b7e54a8c3c) 196 | 197 | 198 | 199 | -------------------------------------------------------------------------------- /service/config_processing.py: -------------------------------------------------------------------------------- 1 | import configparser 2 | import os 3 | from colorama import Fore, Style 4 | 5 | def create_config(): 6 | basic_user_agents = [ 7 | 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3', 8 | 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.2704.103 Safari/537.36', 9 | 'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:46.0) Gecko/20100101 Firefox/46.0', 10 | 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.115 Safari/537.36', 11 | 'Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/61.0.3163.100 Safari/537.36', 12 | 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/61.0.3163.100 Safari/537.36', 13 | 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36', 14 | 'Mozilla/5.0 (Linux; Android 7.0; SM-G930F Build/NRD90M) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Mobile Safari/537.36', 15 | 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.90 Safari/537.36', 16 | 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/62.0.3202.84 Safari/537.36', 17 | 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/62.0.3202.84 Safari/537.36', 18 | 'Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36', 19 | 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/61.0.3163.100 Safari/537.36', 20 | 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/61.0.3163.100 Safari/537.36', 21 | 'Mozilla/5.0 (Linux; Android 8.0; SM-G960F Build/R16NW) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/62.0.3202.84 Mobile Safari/537.36', 22 | 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/62.0.3202.84 Safari/537.36', 23 | 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36', 24 | 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36', 25 | 'Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/61.0.3163.100 Safari/537.36', 26 | 'Mozilla/5.0 (Linux; Android 7.1.2; SM-G955F Build/N2G48H) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.125 Mobile Safari/537.36' 27 | ] 28 | 29 | config = configparser.ConfigParser() 30 | config['HTML_REPORTING'] = {'template': 'modern', 'delete_txt_files': 'n'} 31 | config['LOGGING'] = {'log_level': 'info'} 32 | config['CLI VISUAL'] = {'preview_color': 'red', 'font': 'slant'} 33 | config['DORKING'] = {'dorking_delay (secs)': '2', 'delay_step': '5', 34 | 'full_path_to_browser': r'path\to\browser\for\dorking', 'browser_mode': 'nonheadless'} 35 | config['SNAPSHOTTING'] = {'installed_browser': 'firefox', 'opera_browser_path': 'None', 'wayback_retries': '3', 36 | 'wayback_req_pause': '2'} 37 | config['USER-AGENTS'] = {} 38 | for i, agent in enumerate(basic_user_agents): 39 | config['USER-AGENTS'][f'agent_{i + 1}'] = agent 40 | config['PROXIES'] = {'proxies_file_path': 'NONE'} 41 | 42 | with open('service//config.ini', 'w') as configfile: 43 | config.write(configfile) 44 | 45 | def check_cfg_presence(): 46 | cfg_presence = os.path.isfile('service//config.ini') 47 | return cfg_presence 48 | 49 | def read_config(): 50 | if not check_cfg_presence(): 51 | create_config() 52 | 53 | config = configparser.ConfigParser() 54 | config.read('service//config.ini') 55 | 56 | if not config.has_section('LOGGING'): 57 | config.add_section('LOGGING') 58 | config.set('LOGGING', 'log_level', 'info') 59 | with open('service//config.ini', 'w') as configfile: 60 | config.write(configfile) 61 | 62 | log_level = config.get('LOGGING', 'log_level') 63 | cli_preview_color = config.get('CLI VISUAL', 'preview_color') 64 | wm_font = config.get('CLI VISUAL', 'font') 65 | dorking_delay = config.get('DORKING', 'dorking_delay (secs)') 66 | delay_step = config.get('DORKING', 'delay_step') 67 | user_agents = [value for key, value in config['USER-AGENTS'].items()] 68 | proxies_file_path = config.get('PROXIES', 'proxies_file_path') 69 | installed_browser = config.get('SNAPSHOTTING', 'installed_browser') 70 | opera_browser_path = config.get('SNAPSHOTTING', 'opera_browser_path') 71 | wayback_retries_amount = config.get('SNAPSHOTTING', 'wayback_retries') 72 | wayback_requests_pause = config.get('SNAPSHOTTING', 'wayback_req_pause') 73 | html_report_template = config.get('HTML_REPORTING', 'template') 74 | dorking_browser = config.get('DORKING', 'full_path_to_browser') 75 | dorking_browser_mode = config.get('DORKING', 'browser_mode') 76 | delete_txt_files = config.get('HTML_REPORTING', 'delete_txt_files') 77 | 78 | config_values = { 79 | 'logging_level': log_level, 80 | 'preview_color': cli_preview_color, 81 | 'wm_font': wm_font, 82 | 'dorking_delay (secs)': dorking_delay, 83 | 'delay_step': delay_step, 84 | 'user_agents': user_agents, 85 | 'proxies_file_path': proxies_file_path, 86 | 'installed_browser': installed_browser, 87 | 'opera_browser_path': opera_browser_path, 88 | 'wayback_retries_amount': wayback_retries_amount, 89 | 'wayback_requests_pause': wayback_requests_pause, 90 | 'template': html_report_template, 91 | 'dorking_browser': dorking_browser, 92 | 'dorking_browser_mode': dorking_browser_mode, 93 | 'delete_txt_files': delete_txt_files 94 | } 95 | 96 | return config_values 97 | 98 | def print_and_return_config(): 99 | if not check_cfg_presence(): 100 | create_config() 101 | config = configparser.ConfigParser() 102 | config.read('service//config.ini') 103 | print(Fore.LIGHTMAGENTA_EX + "\n[CURRENT CONFIG CONTENT START]" + Style.RESET_ALL) 104 | for section in config.sections(): 105 | print('\n') 106 | print(Fore.GREEN + f"[{section}]" + Style.RESET_ALL) 107 | for key in config[section]: 108 | print(Fore.GREEN + f"{key} = {config[section][key]}" + Style.RESET_ALL) 109 | print(Fore.LIGHTMAGENTA_EX + "\n\n[CURRENT CONFIG CONTENT END]" + Style.RESET_ALL) 110 | return config 111 | -------------------------------------------------------------------------------- /apis/api_securitytrails.py: -------------------------------------------------------------------------------- 1 | import requests 2 | import sqlite3 3 | import re 4 | from colorama import Fore, Style 5 | 6 | def securitytrails_html_prep(formatted_output): 7 | formatted_output = re.sub(r'\x1b\[([0-9,A-Z]{1,2}(;[0-9]{1,2})?(;[0-9]{3})?)?[m|K]?', '', formatted_output) 8 | start_marker = "=== SECURITYTRAILS API REPORT ===" 9 | end_marker = "[+] Domain General Information:" 10 | start_index = formatted_output.find(start_marker) 11 | end_index = formatted_output.find(end_marker) 12 | if start_index != -1 and end_index != -1: 13 | formatted_output = formatted_output[:start_index] + formatted_output[end_index:] 14 | return formatted_output 15 | 16 | def check_domain_securitytrails(domain, api_key): 17 | api_key = api_key.strip() 18 | api_key = re.sub(r'[\s\u200B\uFEFF]+', '', api_key) 19 | 20 | subdomains_url = f"https://api.securitytrails.com/v1/domain/{domain}/subdomains?apikey={api_key}" 21 | general_url = f"https://api.securitytrails.com/v1/domain/{domain}?apikey={api_key}" 22 | 23 | try: 24 | general_response = requests.get(general_url) 25 | general_data = general_response.json() 26 | except Exception as e: 27 | return Fore.RED + f"Error while parsing JSON: {e}" + Style.RESET_ALL 28 | 29 | formatted_output = Fore.LIGHTBLUE_EX + "=== SECURITYTRAILS API REPORT ===\n" + Style.RESET_ALL 30 | formatted_output += f"\n{Fore.LIGHTBLUE_EX}[+] Domain General Information:{Style.RESET_ALL}\n" 31 | formatted_output += ( 32 | f"{Fore.GREEN}Alexa Rank: {Style.RESET_ALL}{Fore.LIGHTCYAN_EX}{general_data.get('alexa_rank')}{Style.RESET_ALL}\n" 33 | f"{Fore.GREEN}Apex Domain: {Style.RESET_ALL}{Fore.LIGHTCYAN_EX}{general_data.get('apex_domain')}{Style.RESET_ALL}\n" 34 | f"{Fore.GREEN}Hostname: {Style.RESET_ALL}{Fore.LIGHTCYAN_EX}{general_data.get('hostname')}{Style.RESET_ALL}\n" 35 | ) 36 | 37 | formatted_output += f"\n{Fore.LIGHTBLUE_EX}[+] DNS Records:{Style.RESET_ALL}\n" 38 | current_dns = general_data.get('current_dns', {}) 39 | for record_type, record_data in current_dns.items(): 40 | formatted_output += f"\n{Fore.GREEN}[{record_type.upper()} RECORDS]:{Style.RESET_ALL}\n" 41 | for value in record_data.get('values', []): 42 | if record_type == 'a': 43 | ip = value.get('ip', '') 44 | org = value.get('ip_organization', '') 45 | formatted_output += ( 46 | f"{Fore.GREEN}IP: {Style.RESET_ALL}{Fore.LIGHTCYAN_EX}{ip}{Style.RESET_ALL} " 47 | f"{Fore.GREEN}| Organization: {Style.RESET_ALL}{Fore.LIGHTCYAN_EX}{org}{Style.RESET_ALL}\n" 48 | ) 49 | elif record_type == 'mx': 50 | hostname = value.get('hostname', '') 51 | priority = value.get('priority', '') 52 | org = value.get('hostname_organization', '') 53 | formatted_output += ( 54 | f"{Fore.GREEN}Hostname: {Style.RESET_ALL}{Fore.LIGHTCYAN_EX}{hostname}{Style.RESET_ALL} " 55 | f"{Fore.GREEN}| Priority: {Style.RESET_ALL}{Fore.LIGHTCYAN_EX}{priority}{Style.RESET_ALL} " 56 | f"{Fore.GREEN}| Organization: {Style.RESET_ALL}{Fore.LIGHTCYAN_EX}{org}{Style.RESET_ALL}\n" 57 | ) 58 | elif record_type == 'ns': 59 | nameserver = value.get('nameserver', '') 60 | org = value.get('nameserver_organization', '') 61 | formatted_output += ( 62 | f"{Fore.GREEN}Nameserver: {Style.RESET_ALL}{Fore.LIGHTCYAN_EX}{nameserver}{Style.RESET_ALL} " 63 | f"{Fore.GREEN}| Organization: {Style.RESET_ALL}{Fore.LIGHTCYAN_EX}{org}{Style.RESET_ALL}\n" 64 | ) 65 | elif record_type == 'soa': 66 | email = value.get('email', '') 67 | ttl = value.get('ttl', '') 68 | formatted_output += ( 69 | f"{Fore.GREEN}Email: {Style.RESET_ALL}{Fore.LIGHTCYAN_EX}{email}{Style.RESET_ALL} " 70 | f"{Fore.GREEN}| TTL: {Style.RESET_ALL}{Fore.LIGHTCYAN_EX}{ttl}{Style.RESET_ALL}\n" 71 | ) 72 | elif record_type == 'txt': 73 | txt_value = value.get('value', '') 74 | formatted_output += ( 75 | f"{Fore.GREEN}Value: {Style.RESET_ALL}{Fore.LIGHTCYAN_EX}{txt_value}{Style.RESET_ALL}\n" 76 | ) 77 | 78 | subdomains_response = requests.get(subdomains_url) 79 | if subdomains_response.status_code == 200: 80 | subdomains_data = subdomains_response.json() 81 | sub_count = subdomains_data.get('subdomain_count', 0) 82 | subdomains = subdomains_data.get('subdomains', []) 83 | 84 | formatted_output += f"\n{Fore.LIGHTBLUE_EX}[+] Subdomains Deep Enumeration:{Style.RESET_ALL}\n" 85 | formatted_output += ( 86 | f"{Fore.GREEN}Found {Style.RESET_ALL}{Fore.LIGHTCYAN_EX}{sub_count}{Style.RESET_ALL}" 87 | f"{Fore.GREEN} subdomains.{Style.RESET_ALL}\n" 88 | ) 89 | 90 | if subdomains: 91 | formatted_output += f"{Fore.GREEN}Subdomains list:{Style.RESET_ALL}\n" 92 | alive_count = 0 93 | for i, subdomain in enumerate(subdomains, start=1): 94 | subdomain_url = f"http://{subdomain}.{domain}" 95 | try: 96 | r = requests.get(subdomain_url, timeout=5) 97 | if r.status_code == 200: 98 | alive_count += 1 99 | formatted_output += ( 100 | f"{Fore.GREEN}{i}. {Style.RESET_ALL}{Fore.LIGHTCYAN_EX}{subdomain_url}{Style.RESET_ALL}" 101 | f"{Fore.GREEN} is alive{Style.RESET_ALL}\n" 102 | ) 103 | except Exception: 104 | pass 105 | 106 | if alive_count == 0: 107 | formatted_output += (f"{Fore.RED}No alive subdomains found (by HTTP 200 check).{Style.RESET_ALL}\n") 108 | else: 109 | formatted_output += f"{Fore.RED}No subdomains found in SecurityTrails data.{Style.RESET_ALL}\n" 110 | else: 111 | formatted_output += (f"{Fore.RED}Error while gathering subdomains: {subdomains_response.status_code}{Style.RESET_ALL}\n") 112 | 113 | return formatted_output 114 | 115 | 116 | def api_securitytrails_check(domain): 117 | conn = sqlite3.connect('apis//api_keys.db') 118 | cursor = conn.cursor() 119 | cursor.execute("SELECT api_name, api_key FROM api_keys") 120 | rows = cursor.fetchall() 121 | 122 | api_key = None 123 | for row in rows: 124 | api_name, key = row 125 | if api_name == 'SecurityTrails': 126 | api_key = str(key) 127 | api_key = api_key.strip() 128 | api_key = re.sub(r'[\s\u200B\uFEFF]+', '', api_key) 129 | print(Fore.GREEN + 'Got SecurityTrails API key. Starting SecurityTrails scan...\n' + Style.RESET_ALL) 130 | break 131 | 132 | if not api_key: 133 | print(Fore.RED + "SecurityTrails API key not found." + Style.RESET_ALL) 134 | conn.close() 135 | return None 136 | 137 | formatted_output = check_domain_securitytrails(domain, api_key) 138 | conn.close() 139 | print(formatted_output) 140 | return formatted_output 141 | -------------------------------------------------------------------------------- /docs/dpulse-docs/docs/config.md: -------------------------------------------------------------------------------- 1 | # Configuration file 2 | 3 | As you can understand, configuration file is a file that contains certain parameters which are necessary for certain DPULSE modules. Let's see what parameters are contained in this file and how to interact with it using DPULSE CLI. 4 | 5 | ## Config file content 6 | 7 | Configuration file (config.ini) located in 'serivce' folder, which is located inside DPULSE root folder. Default config.ini file generated with your first DPULSE start and it looks like that: 8 | ``` 9 | [HTML_REPORTING] 10 | template = modern 11 | delete_txt_files = n 12 | 13 | [LOGGING] 14 | log_level = info 15 | 16 | [CLI VISUAL] 17 | preview_color = red 18 | font = slant 19 | 20 | [DORKING] 21 | dorking_delay (secs) = 2 22 | delay_step = 5 23 | full_path_to_browser = path\to\browser\for\dorking 24 | browser_mode = nonheadless 25 | 26 | [SNAPSHOTTING] 27 | installed_browser = firefox 28 | opera_browser_path = None 29 | wayback_retries = 3 30 | wayback_req_pause = 2 31 | 32 | [USER-AGENTS] 33 | agent_1 = Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3 34 | agent_2 = Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.2704.103 Safari/537.36 35 | agent_3 = Mozilla/5.0 (Windows NT 6.1; WOW64; rv:46.0) Gecko/20100101 Firefox/46.0 36 | agent_4 = Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.115 Safari/537.36 37 | agent_5 = Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/61.0.3163.100 Safari/537.36 38 | agent_6 = Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/61.0.3163.100 Safari/537.36 39 | agent_7 = Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36 40 | agent_8 = Mozilla/5.0 (Linux; Android 7.0; SM-G930F Build/NRD90M) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Mobile Safari/537.36 41 | agent_9 = Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.90 Safari/537.36 42 | agent_10 = Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/62.0.3202.84 Safari/537.36 43 | agent_11 = Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/62.0.3202.84 Safari/537.36 44 | agent_12 = Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36 45 | agent_13 = Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/61.0.3163.100 Safari/537.36 46 | agent_14 = Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/61.0.3163.100 Safari/537.36 47 | agent_15 = Mozilla/5.0 (Linux; Android 8.0; SM-G960F Build/R16NW) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/62.0.3202.84 Mobile Safari/537.36 48 | agent_16 = Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/62.0.3202.84 Safari/537.36 49 | agent_17 = Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36 50 | agent_18 = Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36 51 | agent_19 = Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/61.0.3163.100 Safari/537.36 52 | agent_20 = Mozilla/5.0 (Linux; Android 7.1.2; SM-G955F Build/N2G48H) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.125 Mobile Safari/537.36 53 | 54 | [PROXIES] 55 | proxies_file_path = NONE 56 | ``` 57 | 58 | As you can see, config file built with sections, which represent separated DPULSE functions. Lets describe these sections and parameters: 59 | 60 | | SECTION | PARAMETER | POSSIBLE VALUES | COMMENT | 61 | | ------------- | ------------- | ------------- | ------------- | 62 | | [HTML_REPORTING] | template | modern / legacy | Determines which HTML report template should be used while creating report itself. Modern contains more features like analytics, graphs and interactive features, while legacy is not supported anymore but may be convenient choice for someone | 63 | | [HTML_REPORTING] | delete_txt_files | y / n | Modern HTML report template contains text boxes for the content of robots.txt and sitemap.xml files, so maybe you won't need them as .txt files. Y parameter makes DPULSE delete this files from report folder, and N parameter leave everything as is | 64 | | [LOGGING] | log_level | See [here](https://docs.python.org/3/library/logging.html#logging-levels) | Determines how much technical info about program's execution will be logged in journal.log file | 65 | | [CLI_VISUAL] | preview_color | See [here](https://pypi.org/project/colorama/) | Determines the color of DPULSE ASCII art's color | 66 | | [CLI_VISUAL] | font | - | - | 67 | | [DORKING] | dorking_delay (secs) | Any integer value >=0 | Determines how much time browser will be on pause between dorks | 68 | | [DORKING] | delay_step | Any integer value >0 | Determines the amount of dorks browser should handle before activating delay | 69 | | [DORKING] | full_path_to_browser | Full path to your browser's executable file with \ symbol as a separator | Determines which browser will be used for Dorking | 70 | | [DORKING] | browser_mode | headless / nonheadless | Sets which browser mode will be used during Dorking process (headless means that browser window won't be opened, so nonheadless means that browser window will be opened every new dork, and actually it gives better results with TOS and Captcha bypassing) | 71 | | [SNAPSHOTTING] | installed_browser | Cell 1, Row 2 | Cell 1, Row 2 | 72 | | [SNAPSHOTTING] | opera_browser_path | Full path to your Opera.exe / None | Enter your Opera.exe path only if you decided to use Opera for screenshot snapshotting, in other cases - leave it as None | 73 | | [SNAPSHOTTING] | wayback_retries | Any integer value >0 | Determines how many retries DPULSE will make before abandoning inaccessible Wayback link | 74 | | [SNAPSHOTTING] | wayback_req_pause | Any integer value >0 | Determines how many seconds DPULSE will wait between different retries to an inaccessible Wayback link | 75 | | [USER-AGENTS] | agent_N | Any default user-agent string | User-agent are used to try bypassing TOS and Captcha when Dorking domain | 76 | | [PROXIES] | proxies_file_path | Full path to your proxies .txt file | Determines path to .txt file with "//" symbols as a separator which contains list of proxies (one proxy per row) | 77 | 78 | ## Editing configuration file 79 | 80 | First step in editing configuration file will be main menu of DPULSE. Here you should find 2nd menu point and select it like that: 81 | 82 | ![config1](https://github.com/user-attachments/assets/d4eda335-102c-4dc6-ab5d-206ac01202d8) 83 | 84 | Then Setting menu will pop-out. Here you will find two menu points related to config. First is "Print current config file" and second is "Edit config file". You can see them in the image below: 85 | 86 | ![config2](https://github.com/user-attachments/assets/035e5a94-ca5f-43ca-89c7-8fca36048243) 87 | 88 | If you select "Print current config file", you will see config file content in DPULSE CLI, just like that: 89 | 90 | ![config3](https://github.com/user-attachments/assets/a86ee852-0b2c-4c83-9a48-bca7499c4671) 91 | 92 | If you select "Edit config file" you will see current config file's content and you will be prompted to enter section and parameter to update, and, then, some new value for that: 93 | 94 | ![config4](https://github.com/user-attachments/assets/b522f2d8-e05e-43a5-968e-b0306ad1de2e) 95 | 96 | 97 | -------------------------------------------------------------------------------- /datagather_modules/networking_processor.py: -------------------------------------------------------------------------------- 1 | import sys 2 | sys.path.append('service') 3 | from logs_processing import logging 4 | 5 | try: 6 | import dns.resolver 7 | import ssl 8 | import socket 9 | from colorama import Fore, Style 10 | import requests 11 | import xml.etree.ElementTree as ET 12 | import builtwith 13 | except ImportError as e: 14 | print(Fore.RED + "Import error appeared. Reason: {}".format(e) + Style.RESET_ALL) 15 | sys.exit() 16 | 17 | def get_dns_info(short_domain, report_file_extension): 18 | try: 19 | logging.info('DNS INFO GATHERING: OK') 20 | mx_list = [] 21 | mx_records = dns.resolver.resolve(short_domain, 'MX') 22 | for record in mx_records: 23 | mx_list.append(record.exchange) 24 | if not mx_list: 25 | mx_list.append('MX records were not gathered') 26 | if report_file_extension == 'xlsx': 27 | return ', '.join(map(str, mx_list)) 28 | elif report_file_extension == 'pdf': 29 | return ', '.join(map(str, mx_list)) 30 | except dns.resolver.NoAnswer as error_noans: 31 | print(Fore.RED + "No answer from domain about MX records. See journal for details") 32 | logging.error(f'DNS INFO GATHERING: ERROR. REASON: {error_noans}') 33 | return 'No information about MX records was gathered' 34 | except dns.resolver.Timeout as error_timeout: 35 | print(Fore.RED + "Timeout while getting MX records. See journal for details") 36 | logging.error(f'DNS INFO GATHERING: ERROR. REASON: {error_timeout}') 37 | return 'No information about MX records was gathered' 38 | 39 | def get_ssl_certificate(short_domain, port=443): 40 | try: 41 | logging.info('SSL CERTIFICATE GATHERING: OK') 42 | context = ssl.create_default_context() 43 | context.minimum_version = ssl.TLSVersion.TLSv1_2 44 | conn = socket.create_connection((short_domain, port)) 45 | sock = context.wrap_socket(conn, server_hostname=short_domain) 46 | cert = sock.getpeercert() 47 | issuer = cert['issuer'][0][0][1] 48 | subject = cert['subject'][0][0][1] 49 | notBefore = cert['notBefore'] 50 | notAfter = cert['notAfter'] 51 | commonName = str(cert['issuer'][2][0][1]) + ', version: ' + str(cert['version']) 52 | serialNumber = cert['serialNumber'] 53 | return issuer, subject, notBefore, notAfter, commonName, serialNumber 54 | except Exception as e: 55 | print(Fore.RED + "Error while gathering info about SSL certificate. See journal for details") 56 | logging.error(f'SSL CERTIFICATE GATHERING: ERROR. REASON: {e}') 57 | issuer = subject = notBefore = notAfter = commonName = serialNumber = "No information about SSL certificate was gathered" 58 | return issuer, subject, notBefore, notAfter, commonName, serialNumber 59 | 60 | def query_internetdb(ip, report_file_extension): 61 | try: 62 | logging.info('INTERNETDB DATA GATHERING: OK') 63 | url = f"https://internetdb.shodan.io/{ip}" 64 | response = requests.get(url) 65 | if response.status_code == 200: 66 | data = response.json() 67 | ports = data.get("ports", []) 68 | hostnames = data.get("hostnames", []) 69 | cpes = data.get("cpes", []) 70 | tags = data.get("tags", []) 71 | vulns = data.get("vulns", []) 72 | if not ports: 73 | ports = ['Open ports were not found'] 74 | if not hostnames: 75 | hostnames = ['Hostnames were not found'] 76 | if not cpes: 77 | cpes = ['CPEs were not found'] 78 | if not tags: 79 | tags = ['Tags were not found'] 80 | if not vulns: 81 | vulns = ['Vulnerabilities were not found'] 82 | if report_file_extension == 'pdf' or report_file_extension == 'html': 83 | return ports, hostnames, cpes, tags, vulns 84 | elif report_file_extension == 'xlsx': 85 | return ports, hostnames, cpes, tags, vulns 86 | else: 87 | print(Fore.RED + "No information was found on InternetDB" + Style.RESET_ALL) 88 | ports = hostnames = cpes = tags = vulns = ["No info about this web resource on InternetDB"] 89 | return ports, hostnames, cpes, tags, vulns 90 | except Exception as e: 91 | print(Fore.RED + "No information was found on InternetDB due to some error. See journal for details" + Style.RESET_ALL) 92 | ports = hostnames = cpes = tags = vulns = ["No info about this web resource on InternetDB"] 93 | logging.error(f'INTERNETDB DATA GATHERING: ERROR. REASON: {e}') 94 | return ports, hostnames, cpes, tags, vulns 95 | 96 | 97 | def get_robots_txt(url, robots_path): 98 | try: 99 | logging.info('ROBOTS.TXT EXTRACTION: OK') 100 | if not url.startswith('http'): 101 | url = 'http://' + url 102 | robots_url = url + '/robots.txt' 103 | response = requests.get(robots_url) 104 | if response.status_code == 200: 105 | with open(robots_path, 'w') as f: 106 | f.write(response.text) 107 | return 'File "robots.txt" was extracted to text file in report folder' 108 | else: 109 | return 'File "robots.txt" was not found' 110 | except Exception as e: 111 | print(Fore.RED + 'robots.txt file was not extracted due to some error. See journal for details') 112 | logging.error(f'ROBOTS.TXT EXTRACTION: ERROR. REASON: {e}') 113 | return 'File "robots.txt" was not found' 114 | 115 | def get_sitemap_xml(url, sitemap_path): 116 | try: 117 | logging.info('SITEMAP.XML EXTRACTION: OK') 118 | if not url.startswith('http'): 119 | url = 'http://' + url 120 | sitemap_url = url + '/sitemap.xml' 121 | response = requests.get(sitemap_url) 122 | if len(response.text) > 0: 123 | if response.status_code == 200: 124 | with open(sitemap_path, 'w') as f: 125 | f.write(response.text) 126 | return 'File "sitemap.xml" was extracted to text file in report folder' 127 | else: 128 | return 'File "sitemap.xml" was not found' 129 | else: 130 | with open(sitemap_path, 'w') as f: 131 | f.write('0') 132 | print(Fore.RED + "Error while gathering sitemap.xml. Probably it's unreachable") 133 | return 'File "sitemap.xml" was not found' 134 | except Exception as e: 135 | print(Fore.RED + "Error while gathering sitemap.xml. See journal for details") 136 | logging.error(f'SITEMAP.XML EXTRACTION: ERROR. REASON: {e}') 137 | return 'Error occured during sitemap.xml gathering' 138 | 139 | def extract_links_from_sitemap(sitemap_links_path, sitemap_path): 140 | try: 141 | logging.info('SITEMAP.XML LINKS EXTRACTION: OK') 142 | tree = ET.parse(sitemap_path) 143 | root = tree.getroot() 144 | links = [elem.text for elem in root.iter('{http://www.sitemaps.org/schemas/sitemap/0.9}loc')] 145 | with open(sitemap_links_path, 'w') as f: 146 | for link in links: 147 | f.write(f"{link}\n") 148 | return 'Links from "sitemap.txt" were successfully parsed' 149 | except (ET.ParseError, FileNotFoundError) as e: 150 | print(Fore.RED + "Links from sitemap.txt were not parsed. See journal for details") 151 | logging.error(f'SITEMAP.XML LINKS EXTRACTION: ERROR. REASON: {e}') 152 | return 'Links from "sitemap.txt" were not parsed' 153 | 154 | def get_technologies(url): 155 | try: 156 | logging.info('WEB-TECHNOLOGIES GATHERING: OK') 157 | tech = builtwith.parse(url) 158 | web_servers = tech.get('web-servers', []) 159 | cms = tech.get('cms', []) 160 | programming_languages = tech.get('programming-languages', []) 161 | web_frameworks = tech.get('web-frameworks', []) 162 | analytics = tech.get('analytics', []) 163 | javascript_frameworks = tech.get('javascript-frameworks', []) 164 | if not web_servers: 165 | web_servers = ['Web-servers were not found'] 166 | if not cms: 167 | cms = ['CMS were not found'] 168 | if not programming_languages: 169 | programming_languages = ['Used programming languages were not determined'] 170 | if not web_frameworks: 171 | web_frameworks = ['Used web frameworks were not determined'] 172 | if not analytics: 173 | analytics = ['Used analytics services were not determined'] 174 | if not javascript_frameworks: 175 | javascript_frameworks = ['Used JS frameworks were not determined'] 176 | return web_servers, cms, programming_languages, web_frameworks, analytics, javascript_frameworks 177 | except Exception as e: 178 | web_servers = cms = programming_languages = web_frameworks = analytics = javascript_frameworks = ['Found nothing related to web-technologies due to some error'] 179 | print(Fore.RED + "Error when gathering info about web technologies. See journal for details") 180 | logging.error(f'WEB-TECHNOLOGIES GATHERING: ERROR. REASON: {e}') 181 | return web_servers, cms, programming_languages, web_frameworks, analytics, javascript_frameworks 182 | -------------------------------------------------------------------------------- /reporting_modules/html_report_creation.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import os 3 | from jinja2 import Environment, FileSystemLoader 4 | from colorama import Fore, Style 5 | 6 | sys.path.append('service') 7 | sys.path.append('service//pdf_report_templates') 8 | sys.path.append('apis') 9 | 10 | from logs_processing import logging 11 | import db_processing as db 12 | import files_processing as fp 13 | from api_hudsonrock import hudsonrock_html_prep 14 | from api_virustotal import virustotal_html_prep 15 | from api_securitytrails import securitytrails_html_prep 16 | from config_processing import read_config 17 | 18 | def generate_report(data, output_file, template_path): 19 | env = Environment(loader=FileSystemLoader('.')) 20 | template = env.get_template(template_path) 21 | html_output = template.render(data) 22 | with open(output_file, 'w', encoding='utf-8') as f: 23 | f.write(html_output) 24 | return True 25 | 26 | def report_assembling(short_domain, url, case_comment, data_array, report_info_array, pagesearch_ui_mark, end, snapshotting_ui_mark): 27 | try: 28 | ip = data_array[0] 29 | res = data_array[1] 30 | mails = data_array[2] 31 | subdomains = data_array[3] 32 | subdomains_amount = data_array[4] 33 | social_medias = data_array[5] 34 | subdomain_mails = data_array[6] 35 | subdomain_ip = data_array[8] 36 | issuer = data_array[9] 37 | subject = data_array[10] 38 | notBefore = data_array[11] 39 | notAfter = data_array[12] 40 | commonName = data_array[13] 41 | serialNumber = data_array[14] 42 | mx_records = data_array[15] 43 | robots_txt_result = data_array[16] 44 | sitemap_xml_result = data_array[17] 45 | sitemap_links_status = data_array[18] 46 | web_servers = data_array[19] 47 | cms = data_array[20] 48 | programming_languages = data_array[21] 49 | web_frameworks = data_array[22] 50 | analytics = data_array[23] 51 | javascript_frameworks = data_array[24] 52 | ports = data_array[25] 53 | hostnames = data_array[26] 54 | cpes = data_array[27] 55 | tags = data_array[28] 56 | vulns = data_array[29] 57 | common_socials = data_array[30] 58 | total_socials = data_array[31] 59 | ps_emails_return = data_array[32] 60 | accessible_subdomains = data_array[33] 61 | emails_amount = data_array[34] 62 | files_counter = data_array[35] 63 | cookies_counter = data_array[36] 64 | api_keys_counter = data_array[37] 65 | website_elements_counter = data_array[38] 66 | exposed_passwords_counter = data_array[39] 67 | total_links_counter = data_array[40] 68 | accessed_links_counter = data_array[41] 69 | keywords_messages_list = data_array[42] 70 | dorking_status = data_array[43] 71 | dorking_file_path = data_array[44] 72 | virustotal_output = data_array[45] 73 | securitytrails_output = data_array[46] 74 | hudsonrock_output = data_array[47] 75 | ps_string = data_array[48] 76 | total_ports = data_array[49] 77 | total_ips = data_array[50] 78 | total_vulns = data_array[51] 79 | casename = report_info_array[0] 80 | db_casename = report_info_array[1] 81 | db_creation_date = report_info_array[2] 82 | report_folder = report_info_array[3] 83 | report_ctime = report_info_array[6] 84 | api_scan_db = report_info_array[7] 85 | used_api_flag = report_info_array[8] 86 | 87 | hudsonrock_output = hudsonrock_html_prep(hudsonrock_output) 88 | virustotal_output = virustotal_html_prep(virustotal_output) 89 | securitytrails_output = securitytrails_html_prep(securitytrails_output) 90 | 91 | if len(ps_emails_return) > 0: 92 | subdomain_mails += ps_emails_return 93 | subdomain_mails = list(set(subdomain_mails)) 94 | subdomain_mails_cleaned = [] 95 | substrings = ['m=Base64', 'Ë','Á','Æ','Å','Ä','Ò','Á','ó','ð','É','ë','â'] 96 | for substring in substrings: 97 | if any(substring in s for s in subdomain_mails): 98 | subdomain_mails.remove(next(s for s in subdomain_mails if substring in s)) 99 | for email in subdomain_mails: 100 | new_emails = email.split(', ') 101 | subdomain_mails_cleaned.extend(new_emails) 102 | else: 103 | subdomain_mails = list(set(subdomain_mails)) 104 | subdomain_mails_cleaned = [] 105 | substrings = ['m=Base64', 'Ë','Á','Æ','Å','Ä','Ò','Á','ó','ð','É','ë','â'] 106 | for substring in substrings: 107 | if any(substring in s for s in subdomain_mails): 108 | subdomain_mails.remove(next(s for s in subdomain_mails if substring in s)) 109 | for email in subdomain_mails: 110 | new_emails = email.split(', ') 111 | subdomain_mails_cleaned.extend(new_emails) 112 | 113 | total_mails = len(subdomain_mails_cleaned) 114 | pdf_templates_path = 'service//pdf_report_templates' 115 | config_values = read_config() 116 | delete_txt_files = config_values['delete_txt_files'] 117 | template_path = pdf_templates_path + '//modern_report_template.html' 118 | dorking_results_path = report_folder + '//04-dorking_results.txt' 119 | if os.path.isfile(dorking_results_path): 120 | with open(dorking_results_path, 'r') as f: 121 | add_dsi = f.read() 122 | else: 123 | add_dsi = 'Dorking mode was not enabled so there is no results to see' 124 | 125 | robots_content, sitemap_content, sitemap_links_content, dorking_content = fp.get_db_columns(report_folder) 126 | 127 | context = {'sh_domain': short_domain, 'full_url': url, 'ip_address': ip, 'registrar': res['registrar'], 128 | 'creation_date': res['creation_date'], 'expiration_date': res['expiration_date'], 129 | 'name_servers': ', '.join(res['name_servers']), 'org': res['org'], 130 | 'mails': mails, 'subdomain_mails': subdomain_mails_cleaned, 'subdomain_socials': social_medias, 131 | 'subdomain_ip': subdomain_ip, 132 | 'subdomains': subdomains, 'fb_links': common_socials['Facebook'], 133 | 'tw_links': common_socials['Twitter'], 'inst_links': common_socials['Instagram'], 134 | 'tg_links': common_socials['Telegram'], 'tt_links': common_socials['TikTok'], 135 | 'li_links': common_socials['LinkedIn'], 'vk_links': common_socials['VKontakte'], 136 | 'yt_links': common_socials['YouTube'], 'wc_links': common_socials['WeChat'], 137 | 'ok_links': common_socials['Odnoklassniki'], 'xcom_links': common_socials['X.com'], 'robots_txt_result': robots_txt_result, 138 | 'sitemap_xml_result': sitemap_xml_result, 139 | 'sitemap_links': sitemap_links_status, 'web_servers': web_servers, 'cms': cms, 140 | 'programming_languages': programming_languages, 'web_frameworks': web_frameworks, 141 | 'analytics': analytics, 142 | 'javascript_frameworks': javascript_frameworks, 143 | 'ctime': report_ctime, 'a_tsf': subdomains_amount, 'mx_records': mx_records, 'issuer': issuer, 144 | 'subject': subject, 'notBefore': notBefore, 'notAfter': notAfter, 145 | 'commonName': commonName, 'serialNumber': serialNumber, 'ports': ports, 'hostnames': hostnames, 146 | 'cpes': cpes, 147 | 'tags': tags, 'vulns': vulns, 'a_tsm': total_socials, 'pagesearch_ui_mark': pagesearch_ui_mark, 148 | 'dorking_status': dorking_status, 149 | 'add_dsi': add_dsi, 'ps_s': accessible_subdomains, 'ps_e': emails_amount, 'ps_f': files_counter, 'ps_c': cookies_counter, 'ps_a': api_keys_counter, 150 | 'ps_w': website_elements_counter, 'ps_p': exposed_passwords_counter, 'ss_l': total_links_counter, 'ss_a': accessed_links_counter, 'hudsonrock_output': hudsonrock_output, "snapshotting_ui_mark": snapshotting_ui_mark, 151 | 'virustotal_output': virustotal_output, 'securitytrails_output': securitytrails_output, 'ps_string': ps_string, 'a_tops': total_ports, 152 | 'a_temails': total_mails, 'a_tips': total_ips, 'a_tpv': total_vulns, 'robots_content': robots_content, 'sitemap_xml_content': sitemap_content, 'sitemap_txt_content': sitemap_links_content} 153 | 154 | html_report_name = report_folder + '//' + casename 155 | if generate_report(context, html_report_name, template_path): 156 | print(Fore.GREEN + "HTML report for {} case was created at {}".format(short_domain, report_ctime) + Style.RESET_ALL) 157 | print(Fore.GREEN + f"Scan elapsed time: {end}" + Style.RESET_ALL) 158 | pdf_blob = fp.get_blob(html_report_name) 159 | db.insert_blob('HTML', pdf_blob, db_casename, db_creation_date, case_comment, robots_content, sitemap_content, sitemap_links_content, dorking_content, api_scan_db) 160 | 161 | if delete_txt_files.lower() == 'y': 162 | files_to_remove = [ 163 | '04-dorking_results.txt', 164 | '03-sitemap_links.txt', 165 | '02-sitemap.txt', 166 | '01-robots.txt' 167 | ] 168 | for file in files_to_remove: 169 | file_path = os.path.join(report_folder, file) 170 | if os.path.exists(file_path): 171 | os.remove(file_path) 172 | elif delete_txt_files.lower() == 'n': 173 | pass 174 | 175 | except Exception as e: 176 | print(Fore.RED + 'Unable to create HTML report. See journal for details') 177 | logging.error(f'HTML REPORT CREATION: ERROR. REASON: {e}') 178 | -------------------------------------------------------------------------------- /service/db_processing.py: -------------------------------------------------------------------------------- 1 | from colorama import Fore, Style 2 | import os 3 | import sqlite3 4 | import sys 5 | from rich import box 6 | from rich.table import Table 7 | from rich.console import Console 8 | 9 | sys.path.append('apis//api_keys.db') 10 | 11 | console = Console() 12 | 13 | def db_connect(): 14 | sqlite_connection = sqlite3.connect('report_storage.db') 15 | cursor = sqlite_connection.cursor() 16 | return cursor, sqlite_connection 17 | 18 | def check_rsdb_presence(db_path): 19 | if not os.path.exists(db_path): 20 | print(Fore.RED + "Report storage database was not found. DPULSE will create it in a second" + Style.RESET_ALL) 21 | return False 22 | else: 23 | return True 24 | 25 | def db_creation(db_path): 26 | cursor, sqlite_connection = db_connect() 27 | create_table_sql = """ 28 | CREATE TABLE "report_storage" ( 29 | "id" INTEGER NOT NULL UNIQUE, 30 | "report_file_extension" TEXT NOT NULL, 31 | "report_content" BLOB NOT NULL, 32 | "comment" TEXT NOT NULL, 33 | "target" TEXT NOT NULL, 34 | "creation_date" INTEGER NOT NULL, 35 | "dorks_results" TEXT, 36 | "robots_text" TEXT, 37 | "sitemap_text" TEXT, 38 | "sitemap_file" TEXT, 39 | "api_scan" TEXT, 40 | PRIMARY KEY("id" AUTOINCREMENT) 41 | ); 42 | """ 43 | cursor.execute(create_table_sql) 44 | sqlite_connection.commit() 45 | sqlite_connection.close() 46 | 47 | def db_select(): 48 | cursor, sqlite_connection = db_connect() 49 | if_rows = "SELECT * FROM report_storage" 50 | cursor.execute(if_rows) 51 | rows = cursor.fetchall() 52 | data_presence_flag = False 53 | if rows: 54 | try: 55 | select_query = "SELECT creation_date, report_file_extension, target, id, comment, dorks_results, robots_text, sitemap_text, sitemap_file, api_scan FROM report_storage;" 56 | cursor.execute(select_query) 57 | records = cursor.fetchall() 58 | table = Table(title="[white on magenta]DATABASE CONTENT[/white on magenta]", show_lines=True, border_style="magenta", box=box.ROUNDED) 59 | table.add_column("ID", style="cyan", justify="center") 60 | table.add_column("Target", style="white", justify="center") 61 | table.add_column("Extension", style="white", justify="center") 62 | table.add_column("Comment", style="white", justify="center") 63 | table.add_column("Created", style="white", justify="center") 64 | table.add_column("Dorking", style="white", justify="center") 65 | table.add_column("robots.txt", style="white", justify="center") 66 | table.add_column("sitemap.xml", style="white", justify="center") 67 | table.add_column("API scan", style="white", justify="center") 68 | 69 | for row in records: 70 | dorks_presence = "None" 71 | robots_presence = "None" 72 | sitemap_presence = "None" 73 | if row[5] and len(str(row[5])) > 1: 74 | dorks_presence = "In DB" 75 | if row[6] and len(str(row[6])) > 1: 76 | robots_presence = "In DB" 77 | if row[7] and len(str(row[7])) > 1: 78 | sitemap_presence = "In DB" 79 | table.add_row( 80 | str(row[3]), 81 | str(row[2]), 82 | str(row[1]), 83 | str(row[4]), 84 | str(row[0]), 85 | dorks_presence, 86 | robots_presence, 87 | sitemap_presence, 88 | str(row[9]) 89 | ) 90 | data_presence_flag = True 91 | console.print(table) 92 | except sqlite3.Error as e: 93 | print(Fore.RED + "Failed to see storage database's content. Reason: {}".format(e)) 94 | sqlite_connection.close() 95 | data_presence_flag = False 96 | else: 97 | print(Fore.RED + 'No data found in report storage database') 98 | sqlite_connection.close() 99 | data_presence_flag = False 100 | return cursor, sqlite_connection, data_presence_flag 101 | 102 | def db_select_silent(): 103 | cursor, sqlite_connection = db_connect() 104 | if_rows = "SELECT * FROM report_storage" 105 | cursor.execute(if_rows) 106 | rows = cursor.fetchall() 107 | if rows: 108 | try: 109 | select_query = "SELECT creation_date, report_file_extension, target, id, comment, dorks_results, robots_text, sitemap_text, sitemap_file, api_scan FROM report_storage;" 110 | cursor.execute(select_query) 111 | except sqlite3.Error as e: 112 | sqlite_connection.close() 113 | else: 114 | sqlite_connection.close() 115 | return cursor, sqlite_connection 116 | 117 | def db_report_recreate(extracted_folder_name, id_to_extract): 118 | cursor, sqlite_connection = db_select_silent() 119 | cursor.execute("SELECT report_content FROM report_storage WHERE id=?", (id_to_extract,)) 120 | try: 121 | blob = cursor.fetchone() 122 | if blob is not None: 123 | blob_data = blob[0] 124 | cursor.execute("SELECT report_file_extension FROM report_storage WHERE id=?", (id_to_extract,)) 125 | report_file_extension = (cursor.fetchone())[0] 126 | if str(report_file_extension).upper() == 'XLSX': 127 | with open(extracted_folder_name + '//report_extracted.xlsx', 'wb') as file: 128 | file.write(blob_data) 129 | elif str(report_file_extension).upper() == 'HTML': 130 | with open(extracted_folder_name + '//report_extracted.html', 'wb') as file: 131 | file.write(blob_data) 132 | cursor.execute("SELECT dorks_results FROM report_storage WHERE id=?", (id_to_extract,)) 133 | dorks_results = (cursor.fetchone())[0] 134 | with open(extracted_folder_name + '//dorks_extracted.txt', 'w') as file: 135 | file.write(dorks_results) 136 | cursor.execute("SELECT robots_text FROM report_storage WHERE id=?", (id_to_extract,)) 137 | robots_results = (cursor.fetchone())[0] 138 | with open(extracted_folder_name + '//robots_extracted.txt', 'w') as file: 139 | file.write(robots_results) 140 | cursor.execute("SELECT sitemap_file FROM report_storage WHERE id=?", (id_to_extract,)) 141 | sitemap_results = (cursor.fetchone())[0] 142 | with open(extracted_folder_name + '//sitemap_extracted.txt', 'w') as file: 143 | file.write(sitemap_results) 144 | cursor.execute("SELECT sitemap_text FROM report_storage WHERE id=?", (id_to_extract,)) 145 | sitemap_links_results = (cursor.fetchone())[0] 146 | with open(extracted_folder_name + '//sitemap_links_extracted.txt', 'w') as file: 147 | file.write(sitemap_links_results) 148 | print(Fore.GREEN + "\nReport was successfully recreated from report storage database and saved in {} folder".format(extracted_folder_name)) 149 | except Exception as e: 150 | print(Fore.RED + "Error appeared when recreating report from database. Reason: {}".format(e)) 151 | 152 | def insert_blob(report_file_type, pdf_blob, db_casename, creation_date, case_comment, robots, sitemap_xml, sitemap_links, dorking_results, api_scan_db): 153 | try: 154 | sqlite_connection = sqlite3.connect('report_storage.db') 155 | cursor = sqlite_connection.cursor() 156 | print(Fore.GREEN + "Connected to report storage database") 157 | apis = [api for api in ['VirusTotal', 'SecurityTrails', 'HudsonRock'] if api in api_scan_db] 158 | if len(apis) == 0: 159 | api_scan_insert = 'No' 160 | elif len(apis) == 1: 161 | api_scan_insert = apis[0] 162 | else: 163 | api_scan_insert = ', '.join(apis[:-1]) + ' and ' + apis[-1] 164 | 165 | sqlite_insert_blob_query = """INSERT INTO report_storage 166 | (report_file_extension, report_content, creation_date, target, comment, sitemap_file, robots_text, sitemap_text, dorks_results, api_scan) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)""" 167 | 168 | data_tuple = (report_file_type, pdf_blob, creation_date, db_casename, case_comment, sitemap_xml, robots, sitemap_links, dorking_results, api_scan_insert) 169 | cursor.execute(sqlite_insert_blob_query, data_tuple) 170 | sqlite_connection.commit() 171 | print(Fore.GREEN + "Scanning results are successfully saved in report storage database") 172 | cursor.close() 173 | except sqlite3.Error as e: 174 | print(Fore.RED + "Failed to insert scanning results in report storage database. Reason: {}".format(e)) 175 | finally: 176 | if sqlite_connection: 177 | sqlite_connection.close() 178 | print(Fore.GREEN + "Database connection is successfully closed") 179 | 180 | def check_api_keys(used_api_flag): 181 | for key in used_api_flag: 182 | conn = sqlite3.connect('apis//api_keys.db') 183 | cursor = conn.cursor() 184 | cursor.execute("SELECT api_key FROM api_keys WHERE id = ?", (key,)) 185 | result = cursor.fetchone() 186 | if result[0] == 'YOUR_API_KEY': 187 | return False 188 | return True 189 | 190 | def select_api_keys(mode): 191 | conn = sqlite3.connect('apis//api_keys.db') 192 | cursor = conn.cursor() 193 | cursor.execute("SELECT id, api_name, api_key, limitations FROM api_keys") 194 | rows = cursor.fetchall() 195 | console = Console() 196 | if rows: 197 | try: 198 | table = Table( 199 | title="[white on magenta]SUPPORTED API AND YOUR KEYS[/white on magenta]", 200 | show_lines=True, 201 | border_style="magenta", 202 | box=box.ROUNDED 203 | ) 204 | table.add_column("ID", style="cyan", justify="center") 205 | table.add_column("API Name", style="white", justify="center") 206 | table.add_column("API Key", style="white", justify="center") 207 | table.add_column("Limitations", style="white", justify="center") 208 | for row in rows: 209 | api_key = f"[red]{row[2]}[/red]" if row[2] == "YOUR_API_KEY" else str(row[2]) 210 | table.add_row( 211 | str(row[0]), 212 | str(row[1]), 213 | api_key, 214 | str(row[3]) 215 | ) 216 | console.print(table) 217 | except sqlite3.Error as e: 218 | print(Fore.RED + "Failed to see API keys database's content. Reason: {}".format(e)) 219 | conn.close() 220 | else: 221 | print(Fore.RED + 'No data found in API keys database') 222 | conn.close() 223 | if mode == 'printing': 224 | conn.close() 225 | return None 226 | else: 227 | return cursor, conn 228 | -------------------------------------------------------------------------------- /dorking/dorking_handler.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import random 3 | import time 4 | import os 5 | import logging 6 | from colorama import Fore, Style 7 | import undetected_chromedriver as uc 8 | from selenium.webdriver.common.by import By 9 | from selenium.webdriver.common.keys import Keys 10 | 11 | sys.path.append('service') 12 | from logs_processing import logging 13 | from ua_rotator import user_agent_rotator 14 | from proxies_rotator import proxies_rotator 15 | from config_processing import read_config 16 | 17 | def proxy_transfer(): 18 | proxy_flag, proxies_list = proxies_rotator.get_proxies() 19 | if proxy_flag == 0: 20 | pass 21 | return proxy_flag, "" 22 | else: 23 | working_proxies = proxies_rotator.check_proxies(proxies_list) 24 | return proxy_flag, working_proxies 25 | 26 | def solid_google_dorking(query, proxy_flag, proxies_list, pages=1): 27 | result_query = [] 28 | request_count = 0 29 | try: 30 | config_values = read_config() 31 | options = uc.ChromeOptions() 32 | options.binary_location = r"{}".format(config_values['dorking_browser']) 33 | dorking_browser_mode = config_values['dorking_browser_mode'] 34 | if dorking_browser_mode.lower() == 'headless': 35 | options.add_argument("--headless=new") 36 | elif dorking_browser_mode.lower() == 'nonheadless': 37 | pass 38 | options.add_argument("--no-sandbox") 39 | options.add_argument("--disable-dev-shm-usage") 40 | options.add_argument("--disable-blink-features=AutomationControlled") 41 | options.add_argument("--disable-infobars") 42 | options.add_argument("--disable-extensions") 43 | options.add_argument(f"user-agent={user_agent_rotator.get_random_user_agent()}") 44 | if proxy_flag == 1: 45 | proxy = proxies_rotator.get_random_proxy(proxies_list) 46 | options.add_argument(f'--proxy-server={proxy["http"]}') 47 | driver = uc.Chrome(options=options) 48 | for page in range(pages): 49 | try: 50 | driver.get("https://www.google.com") 51 | time.sleep(random.uniform(2, 4)) 52 | try: 53 | accepted = False 54 | try: 55 | accept_btn = driver.find_element(By.XPATH, '//button[contains(text(), "Принять все") or contains(text(), "Accept all")]') 56 | driver.execute_script("arguments[0].click();", accept_btn) 57 | print(Fore.GREEN + 'Pressed "Accept all" button!' + Style.RESET_ALL) 58 | accepted = True 59 | time.sleep(random.uniform(2, 3)) 60 | except: 61 | pass 62 | if not accepted: 63 | iframes = driver.find_elements(By.TAG_NAME, "iframe") 64 | for iframe in iframes: 65 | driver.switch_to.frame(iframe) 66 | try: 67 | accept_btn = driver.find_element(By.XPATH, '//button[contains(text(), "Принять все") or contains(text(), "Accept all")]') 68 | driver.execute_script("arguments[0].click();", accept_btn) 69 | print(Fore.GREEN + 'Pressed "Accept all" button!' + Style.RESET_ALL) 70 | accepted = True 71 | driver.switch_to.default_content() 72 | time.sleep(random.uniform(2, 3)) 73 | break 74 | except: 75 | driver.switch_to.default_content() 76 | continue 77 | driver.switch_to.default_content() 78 | if not accepted: 79 | print(Fore.GREEN + "Google TOS button was not found. Seems good..." + Style.RESET_ALL) 80 | except Exception: 81 | print(Fore.RED + f'Error with pressing "Accept all" button. Closing...' + Style.RESET_ALL) 82 | driver.save_screenshot("consent_error.png") 83 | driver.switch_to.default_content() 84 | search_box = driver.find_element(By.NAME, "q") 85 | for char in query: 86 | search_box.send_keys(char) 87 | time.sleep(random.uniform(0.05, 0.2)) 88 | time.sleep(random.uniform(0.5, 1.2)) 89 | search_box.send_keys(Keys.RETURN) 90 | time.sleep(random.uniform(2.5, 4)) 91 | links = driver.find_elements(By.CSS_SELECTOR, 'a') 92 | for link in links: 93 | href = link.get_attribute('href') 94 | if href and href.startswith('http') and 'google.' not in href and 'webcache.googleusercontent.com' not in href: 95 | result_query.append(href) 96 | request_count += 1 97 | try: 98 | next_button = driver.find_element(By.ID, 'pnnext') 99 | next_button.click() 100 | time.sleep(random.uniform(2, 3)) 101 | except: 102 | break 103 | except Exception as e: 104 | logging.error(f'DORKING PROCESSING (SELENIUM): ERROR. REASON: {e}') 105 | continue 106 | driver.quit() 107 | if len(result_query) >= 2: 108 | del result_query[-2:] 109 | return result_query 110 | except Exception as e: 111 | logging.error(f'DORKING PROCESSING: ERROR. REASON: {e}') 112 | print(Fore.RED + "Error while running Selenium dorking. See journal for details." + Style.RESET_ALL) 113 | return [] 114 | 115 | def save_results_to_txt(folderpath, table, queries, pages=1): 116 | try: 117 | config_values = read_config() 118 | dorking_delay = int(config_values['dorking_delay (secs)']) 119 | delay_step = int(config_values['delay_step']) 120 | txt_writepath = folderpath + '//04-dorking_results.txt' 121 | total_results = [] 122 | total_dorks_amount = len(queries) 123 | with open(txt_writepath, 'w') as f: 124 | print(Fore.GREEN + "Started Google Dorking. Please, be patient, it may take some time") 125 | print(Fore.GREEN + f"{dorking_delay} seconds delay after each {delay_step} dorking requests was configured" + Style.RESET_ALL) 126 | proxy_flag, proxies_list = proxy_transfer() 127 | dorked_query_counter = 0 128 | for i, query in enumerate(queries, start=1): 129 | f.write(f"QUERY #{i}: {query}\n") 130 | try: 131 | results = solid_google_dorking(query, proxy_flag, proxies_list, pages) 132 | if not results: 133 | f.write("=> NO RESULT FOUND\n") 134 | total_results.append((query, 0)) 135 | else: 136 | total_results.append((query, len(results))) 137 | for result in results: 138 | f.write(f"=> {result}\n") 139 | except Exception as e: 140 | logging.error(f"DORKING PROCESSING: ERROR. REASON: {e}") 141 | total_results.append((query, 0)) 142 | f.write("\n") 143 | dorked_query_counter += 1 144 | print(Fore.GREEN + f" Dorking with " + Style.RESET_ALL + Fore.LIGHTCYAN_EX + Style.BRIGHT + f"{dorked_query_counter}/{total_dorks_amount}" + Style.RESET_ALL + Fore.GREEN + " dork" + Style.RESET_ALL, end="\r") 145 | print(Fore.GREEN + "\nGoogle Dorking end. Results successfully saved in HTML report\n" + Style.RESET_ALL) 146 | print(Fore.GREEN + f"During Google Dorking with {table.upper()}:") 147 | for query, count in total_results: 148 | if count == 0: 149 | count = 'no results' 150 | print(Fore.GREEN + f"[+] Found results for " + Fore.LIGHTCYAN_EX + f'{query}' + Fore.GREEN + ' query: ' + Fore.LIGHTRED_EX + f'{count}' + Style.RESET_ALL) 151 | else: 152 | print(Fore.GREEN + f"[+] Found results for " + Fore.LIGHTCYAN_EX + f'{query}' + Fore.GREEN + ' query: ' + Fore.LIGHTCYAN_EX + f'{count}' + Style.RESET_ALL) 153 | return f'Successfully dorked domain with {table.upper()} dorks table', txt_writepath 154 | except Exception as e: 155 | print(Fore.RED + 'Error appeared while trying to dork target. See journal for details') 156 | logging.error(f'DORKING PROCESSING: ERROR. REASON: {e}') 157 | return 'Domain dorking failed. See journal for details', txt_writepath 158 | 159 | def transfer_results_to_xlsx(table, queries, pages=10): 160 | config_values = read_config() 161 | dorking_delay = int(config_values['dorking_delay (secs)']) 162 | delay_step = int(config_values['delay_step']) 163 | print(Fore.GREEN + "Started Google Dorking. Please, be patient, it may take some time") 164 | print(Fore.GREEN + f"{dorking_delay} seconds delay after each {delay_step} dorking requests was configured" + Style.RESET_ALL) 165 | proxy_flag, proxies_list = proxy_transfer() 166 | dorked_query_counter = 0 167 | total_dorks_amount = len(queries) 168 | dorking_return_list = [] 169 | for i, query in enumerate(queries, start=1): 170 | dorking_return_list.append(f"QUERY #{i}: {query}\n") 171 | results = solid_google_dorking(query, dorking_delay, delay_step, proxy_flag, proxies_list) 172 | if not results: 173 | dorking_return_list.append("NO RESULT FOUND\n") 174 | else: 175 | for result in results: 176 | dorking_return_list.append(f"{result}\n") 177 | dorked_query_counter += 1 178 | dorking_return_list.append("\n") 179 | print(Fore.GREEN + f" Dorking with " + Style.RESET_ALL + Fore.LIGHTCYAN_EX + Style.BRIGHT + f"{dorked_query_counter}/{total_dorks_amount}" + Style.RESET_ALL + Fore.GREEN + " dork" + Style.RESET_ALL, end="\r") 180 | print(Fore.GREEN + "\nGoogle Dorking end. Results successfully saved in XLSX report\n" + Style.RESET_ALL) 181 | return f'Successfully dorked domain with {table.upper()} dorks table', dorking_return_list 182 | 183 | def dorks_files_check(): 184 | dorks_path = 'dorking//' 185 | dorks_files = ['iot_dorking.db', 'files_dorking.db', 'basic_dorking.db', 'adminpanels_dorking.db', 'webstructure_dorking.db'] 186 | dorks_files_counter = 0 187 | for dork_files in dorks_files: 188 | files_path = os.path.join(dorks_path, dork_files) 189 | if os.path.isfile(files_path): 190 | dorks_files_counter += 1 191 | else: 192 | pass 193 | if dorks_files_counter == 5: 194 | print(Fore.GREEN + "Dorks databases presence: OK" + Style.RESET_ALL) 195 | else: 196 | print(Fore.RED + "Dorks databases presence: NOT OK\nSome files may not be in folder. Please compare dorking folder with the same folder on the official repository\n" + Style.RESET_ALL) 197 | sys.exit() 198 | -------------------------------------------------------------------------------- /apis/api_hudsonrock.py: -------------------------------------------------------------------------------- 1 | import requests 2 | from colorama import Fore, Style 3 | import re 4 | 5 | def hudsonrock_html_prep(formatted_output): 6 | formatted_output = re.sub(r'\x1b\[([0-9,A-Z]{1,2}(;[0-9]{1,2})?(;[0-9]{3})?)?[m|K]?', '', formatted_output) 7 | start_marker = "=== HUDSONROCK API REPORT ===" 8 | end_marker = "[+] Email Data:" 9 | start_index = formatted_output.find(start_marker) 10 | end_index = formatted_output.find(end_marker) 11 | if start_index != -1 and end_index != -1: 12 | formatted_output = formatted_output[:start_index] + formatted_output[end_index:] 13 | return formatted_output 14 | 15 | def api_hudsonrock_get(email=None, username=None, domain=None, ip=None): 16 | base_url = "https://cavalier.hudsonrock.com/api/json/v2/osint-tools/" 17 | results = {} 18 | 19 | def make_request(url): 20 | try: 21 | response = requests.get(url) 22 | response.raise_for_status() 23 | return response.json() 24 | except requests.RequestException as e: 25 | return {'error': str(e)} 26 | 27 | if email: 28 | email_url = f"{base_url}search-by-email?email={email}" 29 | results['email'] = make_request(email_url) 30 | 31 | if username: 32 | username_url = f"{base_url}search-by-username?username={username}" 33 | results['username'] = make_request(username_url) 34 | 35 | if domain: 36 | domain_url = f"{base_url}search-by-domain?domain={domain}" 37 | results['domain'] = make_request(domain_url) 38 | 39 | urls_by_domain_url = f"{base_url}urls-by-domain?domain={domain}" 40 | results['urls_by_domain'] = make_request(urls_by_domain_url) 41 | 42 | if ip: 43 | ip_url = f"{base_url}search-by-ip?ip={ip}" 44 | results['ip'] = make_request(ip_url) 45 | 46 | return results 47 | 48 | 49 | def api_hudsonrock_check(domain, ip, email, username): 50 | results = api_hudsonrock_get(email, username, domain, ip) 51 | formatted_output = Fore.LIGHTBLUE_EX + "\n=== HUDSONROCK API REPORT ===\n" + Style.RESET_ALL 52 | formatted_output += f"\n{Fore.LIGHTBLUE_EX}[+] Provided Data:{Style.RESET_ALL}\n" 53 | formatted_output += f"{Fore.GREEN}Domain:{Style.RESET_ALL} {Fore.LIGHTCYAN_EX}{domain}{Style.RESET_ALL}\n" 54 | formatted_output += f"{Fore.GREEN}IP:{Style.RESET_ALL} {Fore.LIGHTCYAN_EX}{ip}{Style.RESET_ALL}\n" 55 | formatted_output += f"{Fore.GREEN}E-mail:{Style.RESET_ALL} {Fore.LIGHTCYAN_EX}{email}{Style.RESET_ALL}\n" 56 | formatted_output += f"{Fore.GREEN}Username:{Style.RESET_ALL} {Fore.LIGHTCYAN_EX}{username}{Style.RESET_ALL}\n" 57 | 58 | def format_section(title, data): 59 | nonlocal formatted_output 60 | formatted_output += f"\n{Fore.LIGHTBLUE_EX}[+] {title}:{Style.RESET_ALL}\n" 61 | if 'error' in data: 62 | formatted_output += f"{Fore.RED}Error appeared when trying to get results for {title} requests. Probably given data is incorrect.{Style.RESET_ALL}\n" 63 | return 64 | 65 | if title == 'Email Data': 66 | formatted_output += f"{Fore.GREEN}Message:{Style.RESET_ALL} {Fore.LIGHTCYAN_EX}{data.get('message', 'No message available')}{Style.RESET_ALL}\n" 67 | for i, stealer in enumerate(data.get('stealers', []), 1): 68 | formatted_output += f"\n{Fore.GREEN}--- STEALER {i} ---{Style.RESET_ALL}\n" 69 | formatted_output += f"{Fore.GREEN}Computer Name:{Style.RESET_ALL} {Fore.LIGHTCYAN_EX}{stealer.get('computer_name', 'Not Found')}{Style.RESET_ALL}\n" 70 | formatted_output += f"{Fore.GREEN}OS:{Style.RESET_ALL} {Fore.LIGHTCYAN_EX}{stealer.get('operating_system', 'Not Found')}{Style.RESET_ALL}\n" 71 | formatted_output += f"{Fore.GREEN}Date Compromised:{Style.RESET_ALL} {Fore.LIGHTCYAN_EX}{stealer.get('date_compromised', 'Not Found')}{Style.RESET_ALL}\n" 72 | formatted_output += f"{Fore.GREEN}Malware Path:{Style.RESET_ALL} {Fore.LIGHTCYAN_EX}{stealer.get('malware_path', 'Not Found')}{Style.RESET_ALL}\n" 73 | formatted_output += f"{Fore.GREEN}IP:{Style.RESET_ALL} {Fore.LIGHTCYAN_EX}{stealer.get('ip', 'Not Found')}{Style.RESET_ALL}\n" 74 | formatted_output += f"{Fore.GREEN}Top Passwords:{Style.RESET_ALL} {Fore.LIGHTCYAN_EX}{', '.join(stealer.get('top_passwords', []))}{Style.RESET_ALL}\n" 75 | formatted_output += f"{Fore.GREEN}Top Logins:{Style.RESET_ALL} {Fore.LIGHTCYAN_EX}{', '.join(stealer.get('top_logins', []))}{Style.RESET_ALL}\n" 76 | 77 | elif title == 'Username Data': 78 | formatted_output += f"{Fore.GREEN}Message:{Style.RESET_ALL} {Fore.LIGHTCYAN_EX}{data.get('message', 'No message available')}{Style.RESET_ALL}\n" 79 | for i, stealer in enumerate(data.get('stealers', []), 1): 80 | formatted_output += f"\n{Fore.GREEN}--- STEALER {i} ---{Style.RESET_ALL}\n" 81 | formatted_output += f"{Fore.GREEN}Stealer Family:{Style.RESET_ALL} {Fore.LIGHTCYAN_EX}{stealer.get('stealer_family', 'Not Found')}{Style.RESET_ALL}\n" 82 | formatted_output += f"{Fore.GREEN}Computer Name:{Style.RESET_ALL} {Fore.LIGHTCYAN_EX}{stealer.get('computer_name', 'Not Found')}{Style.RESET_ALL}\n" 83 | formatted_output += f"{Fore.GREEN}OS:{Style.RESET_ALL} {Fore.LIGHTCYAN_EX}{stealer.get('operating_system', 'Not Found')}{Style.RESET_ALL}\n" 84 | formatted_output += f"{Fore.GREEN}Date Compromised:{Style.RESET_ALL} {Fore.LIGHTCYAN_EX}{stealer.get('date_compromised', 'Not Found')}{Style.RESET_ALL}\n" 85 | formatted_output += f"{Fore.GREEN}Malware Path:{Style.RESET_ALL} {Fore.LIGHTCYAN_EX}{stealer.get('malware_path', 'Not Found')}{Style.RESET_ALL}\n" 86 | formatted_output += f"{Fore.GREEN}IP:{Style.RESET_ALL} {Fore.LIGHTCYAN_EX}{stealer.get('ip', 'Not Found')}{Style.RESET_ALL}\n" 87 | formatted_output += f"{Fore.GREEN}Top Passwords:{Style.RESET_ALL} {Fore.LIGHTCYAN_EX}{', '.join(stealer.get('top_passwords', []))}{Style.RESET_ALL}\n" 88 | formatted_output += f"{Fore.GREEN}Top Logins:{Style.RESET_ALL} {Fore.LIGHTCYAN_EX}{', '.join(stealer.get('top_logins', []))}{Style.RESET_ALL}\n" 89 | 90 | elif title == 'Domain Data': 91 | formatted_output += f"{Fore.GREEN}Total Entries:{Style.RESET_ALL} {Fore.LIGHTCYAN_EX}{data.get('total', 0)}{Style.RESET_ALL}\n" 92 | formatted_output += f"{Fore.GREEN}Total Stealers:{Style.RESET_ALL} {Fore.LIGHTCYAN_EX}{data.get('totalStealers', 0)}{Style.RESET_ALL}\n" 93 | formatted_output += f"\n{Fore.GREEN}Sample Employee URLs:{Style.RESET_ALL}\n" 94 | employee_urls = data.get('data', {}).get('employees_urls', []) 95 | if employee_urls: 96 | for url_data in employee_urls[:10]: 97 | formatted_output += f"{Fore.GREEN}Type:{Style.RESET_ALL} {Fore.LIGHTCYAN_EX}{url_data.get('type', 'N/A')}{Style.RESET_ALL}" 98 | formatted_output += f" {Fore.GREEN}| URL:{Style.RESET_ALL} {Fore.LIGHTCYAN_EX}{url_data.get('url', 'N/A')}{Style.RESET_ALL}" 99 | formatted_output += f" {Fore.GREEN}| Occurrence:{Style.RESET_ALL} {Fore.LIGHTCYAN_EX}{url_data.get('occurrence', 'N/A')}{Style.RESET_ALL}\n" 100 | else: 101 | formatted_output += f"{Fore.RED}No employee URLs available.{Style.RESET_ALL}\n" 102 | 103 | elif title == 'Attack Surface Data': 104 | formatted_output += f"{Fore.GREEN}Message:{Style.RESET_ALL} {Fore.LIGHTCYAN_EX}{data.get('message', 'No message available')}{Style.RESET_ALL}\n" 105 | formatted_output += f"\n{Fore.GREEN}Sample Employee URLs:{Style.RESET_ALL}\n" 106 | employees = data.get('data', {}).get('employees_urls', []) 107 | if employees: 108 | for url_data in employees[:10]: 109 | formatted_output += f"{Fore.GREEN}Type:{Style.RESET_ALL} {Fore.LIGHTCYAN_EX}{url_data.get('type', 'N/A')}{Style.RESET_ALL}" 110 | formatted_output += f" {Fore.GREEN}| URL:{Style.RESET_ALL} {Fore.LIGHTCYAN_EX}{url_data.get('url', 'N/A')}{Style.RESET_ALL}" 111 | formatted_output += f" {Fore.GREEN}| Occurrence:{Style.RESET_ALL} {Fore.LIGHTCYAN_EX}{url_data.get('occurrence', 'N/A')}{Style.RESET_ALL}\n" 112 | else: 113 | formatted_output += f"{Fore.RED}No employee URLs available{Style.RESET_ALL}\n" 114 | formatted_output += f"\n{Fore.GREEN}Sample Client URLs:{Style.RESET_ALL}\n" 115 | clients = data.get('data', {}).get('clients_urls', []) 116 | if clients: 117 | for url_data in clients[:10]: 118 | formatted_output += f"{Fore.GREEN}Type:{Style.RESET_ALL} {Fore.LIGHTCYAN_EX}{url_data.get('type', 'N/A')}{Style.RESET_ALL}" 119 | formatted_output += f" {Fore.GREEN}| URL:{Style.RESET_ALL} {Fore.LIGHTCYAN_EX}{url_data.get('url', 'N/A')}{Style.RESET_ALL}" 120 | formatted_output += f" {Fore.GREEN}| Occurrence:{Style.RESET_ALL} {Fore.LIGHTCYAN_EX}{url_data.get('occurrence', 'N/A')}{Style.RESET_ALL}\n" 121 | else: 122 | formatted_output += f"{Fore.RED}No client URLs available{Style.RESET_ALL}\n" 123 | 124 | elif title == 'IP Data': 125 | formatted_output += f"{Fore.GREEN}Message:{Style.RESET_ALL} {Fore.LIGHTCYAN_EX}{data.get('message', 'No message available')}{Style.RESET_ALL}\n" 126 | if data.get('stealers'): 127 | for i, stealer in enumerate(data.get('stealers', []), 1): 128 | formatted_output += f"\n{Fore.GREEN}--- STEALER {i} ---{Style.RESET_ALL}\n" 129 | formatted_output += f"{Fore.GREEN}Computer Name:{Style.RESET_ALL} {Fore.LIGHTCYAN_EX}{stealer.get('computer_name', 'Not Found')}{Style.RESET_ALL}\n" 130 | formatted_output += f"{Fore.GREEN}OS:{Style.RESET_ALL} {Fore.LIGHTCYAN_EX}{stealer.get('operating_system', 'Not Found')}{Style.RESET_ALL}\n" 131 | formatted_output += f"{Fore.GREEN}Date Compromised:{Style.RESET_ALL} {Fore.LIGHTCYAN_EX}{stealer.get('date_compromised', 'Not Found')}{Style.RESET_ALL}\n" 132 | formatted_output += f"{Fore.GREEN}Malware Path:{Style.RESET_ALL} {Fore.LIGHTCYAN_EX}{stealer.get('malware_path', 'Not Found')}{Style.RESET_ALL}\n" 133 | formatted_output += f"{Fore.GREEN}IP:{Style.RESET_ALL} {Fore.LIGHTCYAN_EX}{stealer.get('ip', 'Not Found')}{Style.RESET_ALL}\n" 134 | formatted_output += f"{Fore.GREEN}Top Passwords:{Style.RESET_ALL} {Fore.LIGHTCYAN_EX}{', '.join(stealer.get('top_passwords', []))}{Style.RESET_ALL}\n" 135 | formatted_output += f"{Fore.GREEN}Top Logins:{Style.RESET_ALL} {Fore.LIGHTCYAN_EX}{', '.join(stealer.get('top_logins', []))}{Style.RESET_ALL}\n" 136 | formatted_output += "\n" 137 | 138 | if 'email' in results: 139 | format_section('Email Data', results['email']) 140 | if 'username' in results: 141 | format_section('Username Data', results['username']) 142 | if 'domain' in results: 143 | format_section('Domain Data', results['domain']) 144 | if 'urls_by_domain' in results: 145 | format_section('Attack Surface Data', results['urls_by_domain']) 146 | if 'ip' in results: 147 | format_section('IP Data', results['ip']) 148 | 149 | print(formatted_output) 150 | return formatted_output 151 | -------------------------------------------------------------------------------- /service/cli_init.py: -------------------------------------------------------------------------------- 1 | import sys 2 | from config_processing import read_config 3 | from rich.panel import Panel 4 | from rich.table import Table 5 | from rich.layout import Layout 6 | from rich.text import Text 7 | from rich.prompt import Prompt 8 | from rich.progress import Progress, SpinnerColumn, TextColumn 9 | from rich import box 10 | 11 | try: 12 | from colorama import Fore, Back, Style 13 | from pyfiglet import Figlet 14 | from rich.console import Console 15 | except ImportError as e: 16 | print(Fore.RED + "Import error appeared. Reason: {}".format(e) + Style.RESET_ALL) 17 | sys.exit() 18 | 19 | 20 | class Menu: 21 | def __init__(self): 22 | self.console = Console() 23 | 24 | def welcome_menu(self): 25 | config_values = read_config() 26 | preview_style = (config_values['preview_color']).lower() 27 | wm_font = (config_values['wm_font']).lower() 28 | fig = Figlet(font=wm_font) 29 | print('\n') 30 | combined_panel = Panel( 31 | Text.assemble( 32 | (fig.renderText('DPULSE'), preview_style), 33 | ("\n", ""), 34 | ("DPULSE-CLI - v1.4 rolling - OSINT-TECHNOLOGIES\n\n", "magenta bold"), 35 | ("Visit our pages:\n", "white"), 36 | ("GitHub: ", "white"), ("https://github.com/OSINT-TECHNOLOGIES\n", "blue underline"), 37 | ("PyPi: ", "white"), ("https://pypi.org/project/dpulse/\n", "blue underline"), 38 | ("Docs: ", "white"), ("https://dpulse.readthedocs.io", "blue underline") 39 | ), 40 | title="Current version info", 41 | box=box.ROUNDED, 42 | border_style="magenta" 43 | ) 44 | 45 | self.console.print(combined_panel) 46 | 47 | def print_main_menu(self): 48 | table = Table( 49 | show_header=False, 50 | box=box.ROUNDED, 51 | border_style="magenta", 52 | show_edge=False 53 | ) 54 | 55 | table.add_column("Option", style="cyan", justify="right") 56 | table.add_column("Description", style="white") 57 | table.add_row("1.", "Target selection & scanning") 58 | table.add_row("2.", "General settings") 59 | table.add_row("3.", "Dorking module manager") 60 | table.add_row("4.", "Report storage DB manager") 61 | table.add_row("5.", "API modules manager") 62 | table.add_row("6.", "Help (browser will be opened!)") 63 | table.add_row("7.", "[red]Exit DPULSE[/red]") 64 | 65 | menu_panel = Panel( 66 | table, 67 | title="[white on magenta]MAIN MENU[/white on magenta]", 68 | border_style="magenta" 69 | ) 70 | 71 | self.console.print("\n") 72 | self.console.print(menu_panel) 73 | 74 | def print_settings_menu(self): 75 | table = Table( 76 | show_header=False, 77 | box=box.ROUNDED, 78 | border_style="magenta", 79 | show_edge=False 80 | ) 81 | 82 | table.add_column("Option", style="cyan", justify="right") 83 | table.add_column("Description", style="white") 84 | 85 | table.add_row("1.", "Print current config file") 86 | table.add_row("2.", "Edit config file") 87 | table.add_row("3.", "Clear journal content") 88 | table.add_row("4.", "[red]Return to main menu[/red]") 89 | 90 | menu_panel = Panel( 91 | table, 92 | title="[white on magenta]SETTINGS MENU[/white on magenta]", 93 | border_style="magenta" 94 | ) 95 | 96 | self.console.print("\n") 97 | self.console.print(menu_panel) 98 | 99 | def print_db_menu(self): 100 | table = Table( 101 | show_header=False, 102 | box=box.ROUNDED, 103 | border_style="magenta", 104 | show_edge=False 105 | ) 106 | 107 | table.add_column("Option", style="cyan", justify="right") 108 | table.add_column("Description", style="white") 109 | 110 | table.add_row("1.", "Show database content") 111 | table.add_row("2.", "Recreate report from database") 112 | table.add_row("3.", "[red]Return to main menu[/red]") 113 | 114 | menu_panel = Panel( 115 | table, 116 | title="[white on magenta]REPORTS DATABASE MANAGER[/white on magenta]", 117 | border_style="magenta" 118 | ) 119 | 120 | self.console.print("\n") 121 | self.console.print(menu_panel) 122 | 123 | def dorking_db_manager(self): 124 | table = Table( 125 | show_header=False, 126 | box=box.ROUNDED, 127 | border_style="magenta", 128 | show_edge=False 129 | ) 130 | 131 | table.add_column("Option", style="cyan", justify="right") 132 | table.add_column("Description", style="white") 133 | 134 | table.add_row("1.", "Generate custom Dorking DB") 135 | table.add_row("2.", "[red]Return to main menu[/red]") 136 | 137 | menu_panel = Panel( 138 | table, 139 | title="[white on magenta]DORKING DB MANAGER[/white on magenta]", 140 | border_style="magenta" 141 | ) 142 | 143 | self.console.print("\n") 144 | self.console.print(menu_panel) 145 | 146 | def api_manager(self): 147 | table = Table( 148 | show_header=False, 149 | box=box.ROUNDED, 150 | border_style="magenta", 151 | show_edge=False 152 | ) 153 | 154 | table.add_column("Option", style="cyan", justify="right") 155 | table.add_column("Description", style="white") 156 | 157 | table.add_row("1.", "Add API key") 158 | table.add_row("2.", "Restore reference API Keys DB") 159 | table.add_row("3.", "[red]Return to main menu[/red]") 160 | 161 | menu_panel = Panel( 162 | table, 163 | title="[white on magenta]API KEYS DB MANAGER[/white on magenta]", 164 | border_style="magenta" 165 | ) 166 | 167 | self.console.print("\n") 168 | self.console.print(menu_panel) 169 | 170 | 171 | def print_prescan_summary(short_domain, report_filetype, pagesearch_ui_mark, dorking_ui_mark, used_api_ui, case_comment, snapshotting_ui_mark): 172 | table = Table( 173 | show_header=False, 174 | box=box.ROUNDED, 175 | border_style="magenta" 176 | ) 177 | 178 | table.add_column("Parameter", style="green") 179 | table.add_column("Value", style="cyan bold") 180 | 181 | table.add_row("Determined target:", short_domain) 182 | table.add_row("Report type:", report_filetype.lower()) 183 | table.add_row("PageSearch conduction:", pagesearch_ui_mark) 184 | table.add_row("Dorking conduction:", dorking_ui_mark) 185 | table.add_row("APIs scan:", used_api_ui) 186 | table.add_row("Snapshotting conduction:", snapshotting_ui_mark) 187 | table.add_row("Case comment:", case_comment) 188 | 189 | summary_panel = Panel( 190 | table, 191 | title="[magenta]PRE-SCAN SUMMARY[/magenta]", 192 | border_style="magenta" 193 | ) 194 | 195 | Console().print("\n") 196 | Console().print(summary_panel) 197 | 198 | def print_api_db_msg(): 199 | print(Fore.GREEN + "\nYou've entered custom Dorking DB generator!\n" + Style.RESET_ALL) 200 | print(Fore.GREEN + "Remember some rules in order to successfully create your custom Dorking DB:" + Style.RESET_ALL) 201 | print(Fore.GREEN + "[1] - dork_id variable must be unique, starting with 1 and then +1 every new dork" + Style.RESET_ALL) 202 | print(Fore.GREEN + "[2] - When it comes to define domain in dork, put {} in it\n" + Style.RESET_ALL) 203 | print(Fore.GREEN + "Examples: related:{}, site:{} inurl:login and so on\n" + Style.RESET_ALL) 204 | 205 | def print_ps_cli_report(subdomains_list, accessible_subdomains, ps_emails_return, files_counter, cookies_counter, api_keys_counter, website_elements_counter, exposed_passwords_counter): 206 | if len(subdomains_list) == 0: 207 | print(Fore.GREEN + "\nDuring subdomains analysis:\n[+] Total " + Fore.LIGHTRED_EX + Style.BRIGHT + f"{len(subdomains_list)}" + Style.RESET_ALL + Fore.GREEN + " subdomains were checked" + Style.RESET_ALL) 208 | else: 209 | print(Fore.GREEN + "\nDuring subdomains analysis:\n[+] Total " + Fore.LIGHTCYAN_EX + Style.BRIGHT + f"{len(subdomains_list)}" + Style.RESET_ALL + Fore.GREEN + " subdomains were checked" + Style.RESET_ALL) 210 | if accessible_subdomains == 0: 211 | print(Fore.GREEN + "[+] Among them " + Fore.LIGHTRED_EX + Style.BRIGHT + f"{accessible_subdomains}" + Style.RESET_ALL + Fore.GREEN + " subdomains were accessible" + Style.RESET_ALL) 212 | else: 213 | print(Fore.GREEN + "[+] Among them " + Fore.LIGHTCYAN_EX + Style.BRIGHT + f"{accessible_subdomains}" + Style.RESET_ALL + Fore.GREEN + " subdomains were accessible" + Style.RESET_ALL) 214 | if len(ps_emails_return) == 0: 215 | print(Fore.GREEN + "[+] In result, " + Fore.LIGHTRED_EX + Style.BRIGHT + f"{len(ps_emails_return)}" + Style.RESET_ALL + Fore.GREEN + " unique e-mail addresses were found" + Style.RESET_ALL) 216 | else: 217 | print(Fore.GREEN + "[+] In result, " + Fore.LIGHTCYAN_EX + Style.BRIGHT + f"{len(ps_emails_return)}" + Style.RESET_ALL + Fore.GREEN + " unique e-mail addresses were found" + Style.RESET_ALL) 218 | if files_counter == 0: 219 | print(Fore.GREEN + "[+] Also, " + Fore.LIGHTRED_EX + Style.BRIGHT + f"{files_counter}" + Style.RESET_ALL + Fore.GREEN + " files were extracted" + Style.RESET_ALL) 220 | else: 221 | print(Fore.GREEN + "[+] Also, " + Fore.LIGHTCYAN_EX + Style.BRIGHT + f"{files_counter}" + Style.RESET_ALL + Fore.GREEN + " files were extracted" + Style.RESET_ALL) 222 | if cookies_counter == 0: 223 | print(Fore.GREEN + "[+] Found " + Fore.LIGHTRED_EX + Style.BRIGHT + f"{cookies_counter}" + Style.RESET_ALL + Fore.GREEN + " cookies with values" + Style.RESET_ALL) 224 | else: 225 | print(Fore.GREEN + "[+] Found " + Fore.LIGHTCYAN_EX + Style.BRIGHT + f"{cookies_counter}" + Style.RESET_ALL + Fore.GREEN + " cookies with values" + Style.RESET_ALL) 226 | if api_keys_counter == 0: 227 | print(Fore.GREEN + "[+] Found " + Fore.LIGHTRED_EX + Style.BRIGHT + f"{api_keys_counter}" + Style.RESET_ALL + Fore.GREEN + " API keys" + Style.RESET_ALL) 228 | else: 229 | print(Fore.GREEN + "[+] Found " + Fore.LIGHTCYAN_EX + Style.BRIGHT + f"{api_keys_counter}" + Style.RESET_ALL + Fore.GREEN + " API keys" + Style.RESET_ALL) 230 | if website_elements_counter == 0: 231 | print(Fore.GREEN + "[+] Found " + Fore.LIGHTRED_EX + Style.BRIGHT + f"{website_elements_counter}" + Style.RESET_ALL + Fore.GREEN + " different web page elements" + Style.RESET_ALL) 232 | else: 233 | print(Fore.GREEN + "[+] Found " + Fore.LIGHTCYAN_EX + Style.BRIGHT + f"{website_elements_counter}" + Style.RESET_ALL + Fore.GREEN + " different web page elements" + Style.RESET_ALL) 234 | if exposed_passwords_counter == 0: 235 | print(Fore.GREEN + "[+] Found " + Fore.LIGHTRED_EX + Style.BRIGHT + f"{exposed_passwords_counter}" + Style.RESET_ALL + Fore.GREEN + " exposed passwords" + Style.RESET_ALL) 236 | else: 237 | print(Fore.GREEN + "[+] Found " + Fore.LIGHTCYAN_EX + Style.BRIGHT + f"{exposed_passwords_counter}" + Style.RESET_ALL + Fore.GREEN + " exposed passwords" + Style.RESET_ALL) 238 | -------------------------------------------------------------------------------- /datagather_modules/crawl_processor.py: -------------------------------------------------------------------------------- 1 | import sys 2 | import socket 3 | import re 4 | import urllib 5 | from collections import defaultdict 6 | from urllib.parse import urlparse, unquote 7 | import whois 8 | import requests 9 | from bs4 import BeautifulSoup 10 | from colorama import Fore, Style 11 | 12 | sys.path.append('service') 13 | from logs_processing import logging 14 | 15 | def ip_gather(short_domain): 16 | ip_address = socket.gethostbyname(short_domain) 17 | return ip_address 18 | 19 | def whois_gather(short_domain): 20 | try: 21 | logging.info('WHOIS INFO GATHERING: OK') 22 | w = whois.whois(short_domain) 23 | if w.org is None: 24 | w['org'] = 'Organization name was not extracted' 25 | logging.info('WHOIS INFO GATHERING: OK') 26 | return w 27 | except Exception as e: 28 | print(Fore.RED + "Error while gathering WHOIS information. See journal for details") 29 | logging.error(f'WHOIS GATHERING: ERROR. REASON: {e}') 30 | w = { 31 | 'registrar': 'N/A', 32 | 'creation_date': 'N/A', 33 | 'expiration_date': 'N/A', 34 | 'name_servers': ['N/A'], 35 | 'org': 'N/A' 36 | } 37 | return w 38 | pass 39 | 40 | def contact_mail_gather(url): 41 | try: 42 | logging.info('CONTACT MAIL GATHERING: OK') 43 | r = requests.get(url) 44 | data = r.text 45 | soup = BeautifulSoup(data, "html.parser") 46 | mails = [] 47 | for i in soup.find_all(href=re.compile("mailto")): 48 | i.encode().decode() 49 | mails.append(i.string) 50 | mails = [mail for mail in mails if mail is not None] 51 | if (not mails) or (mails is None): 52 | logging.info('CONTACT MAIL GATHERING: OK (BUT NO MAILS WERE FOUND)') 53 | return 'No contact e-mails were found' 54 | else: 55 | logging.info('CONTACT MAIL GATHERING: OK') 56 | return ', '.join(map(str, mails)) 57 | except requests.RequestException as e: 58 | print(Fore.RED + "Error while gathering e-mails. See journal for details") 59 | logging.error(f'CONTACT MAIL GATHERING: ERROR. REASON: {e}') 60 | pass 61 | 62 | def subdomains_mail_gather(url): 63 | try: 64 | logging.info('SUBDOMAINS MAIL GATHERING: OK') 65 | r = requests.get(url) 66 | data = r.text 67 | soup = BeautifulSoup(data, "html.parser") 68 | mails_uncleaned = [] 69 | for i in soup.find_all(href=re.compile("mailto")): 70 | i.encode().decode() 71 | mails_uncleaned.append(i.string) 72 | mails_cleaned = [item for item in mails_uncleaned if item is not None] 73 | mails = [''.join(sublist) for sublist in mails_cleaned] 74 | return mails 75 | except requests.RequestException as e: 76 | print(Fore.RED + "Error while gathering e-mails. See journal for details") 77 | logging.error(f'SUBDOMAINS MAIL GATHERING: ERROR. REASON: {e}') 78 | pass 79 | 80 | def subdomains_gather(url, short_domain): 81 | try: 82 | logging.info('SUBDOMAINS GATHERING: OK') 83 | response = requests.get(url) 84 | soup = BeautifulSoup(response.text, 'html.parser') 85 | linked_domains = set() 86 | for link in soup.find_all('a', href=True): 87 | domain = urlparse(link['href']).netloc 88 | if domain and domain != urlparse(url).netloc: 89 | linked_domains.add(domain) 90 | finder = short_domain 91 | subdomains = [urllib.parse.unquote(i) for i in linked_domains if finder in i] 92 | subdomains_amount = len(subdomains) 93 | if not subdomains: 94 | subdomains = ['No subdomains were found'] 95 | logging.info('SUBDOMAINS GATHERING: OK') 96 | return subdomains, subdomains_amount 97 | except Exception as e: 98 | print(Fore.RED + f"Cannot gather subdomains due to error. See journal for details" + Style.RESET_ALL) 99 | logging.error(f'SUBDOMAINS GATHERING: ERROR. REASON: {e}') 100 | pass 101 | return ['No subdomains were found'], 0 102 | 103 | def sm_gather(url): 104 | social_domains = { 105 | 'Facebook': ('facebook.com',), 106 | 'Twitter': ('twitter.com',), 107 | 'Instagram': ('instagram.com',), 108 | 'Telegram': ('t.me',), 109 | 'TikTok': ('tiktok.com',), 110 | 'LinkedIn': ('linkedin.com',), 111 | 'VKontakte': ('vk.com',), 112 | 'YouTube': ('youtube.com', 'youtu.be'), 113 | 'Odnoklassniki': ('ok.ru',), 114 | 'WeChat': ('wechat.com',), 115 | 'X.com': ('x.com',), 116 | } 117 | 118 | categorized_links = {name: [] for name in social_domains.keys()} 119 | parsed_input = urlparse(url) 120 | host_input = (parsed_input.hostname or parsed_input.netloc or '').lower() 121 | 122 | if host_input.startswith('www.'): 123 | host_input = host_input[4:] 124 | 125 | for name, domains in social_domains.items(): 126 | if any(host_input == d or host_input.endswith('.' + d) for d in domains): 127 | categorized_links[name].append(unquote(url)) 128 | break 129 | 130 | response = requests.get(url, timeout=10) 131 | response.raise_for_status() 132 | soup = BeautifulSoup(response.text, 'html.parser') 133 | for a in soup.find_all('a', href=True): 134 | href = a['href'] 135 | parsed = urlparse(href) 136 | host = parsed.hostname or parsed.netloc 137 | if not host: 138 | continue 139 | 140 | host = host.lower() 141 | if host.startswith('www.'): 142 | host = host[4:] 143 | 144 | for name, domains in social_domains.items(): 145 | if any(host == d or host.endswith('.' + d) for d in domains): 146 | categorized_links[name].append(unquote(href)) 147 | break 148 | 149 | for name, links in categorized_links.items(): 150 | if not links: 151 | links.append(f'{name} links were not found') 152 | 153 | return categorized_links 154 | 155 | def domains_reverse_research(subdomains, report_file_type): 156 | subdomain_urls = [] 157 | subdomain_mails = [] 158 | subdomain_socials = [] 159 | subdomain_ip = [] 160 | 161 | try: 162 | for subdomain in subdomains: 163 | subdomain_url = "http://" + subdomain + "/" 164 | subdomain_urls.append(subdomain_url) 165 | except Exception as e: 166 | print(Fore.RED + "Some URL seems unreachable! DPULSE will continue to work, but the URL causing the error won't be included in report. See journal for details" + Style.RESET_ALL) 167 | logging.error(f'SUBDOMAINS URL FORMING: ERROR. REASON: {e}') 168 | pass 169 | 170 | try: 171 | for subdomain in subdomains: 172 | subdomains_ip = ip_gather(subdomain) 173 | subdomain_ip.append(subdomains_ip) 174 | subdomain_ip = list(set(subdomain_ip)) 175 | except Exception as e: 176 | print(Fore.RED + "Some URL seems unreachable! DPULSE will continue to work, but the URL causing the error won't be included in report. See journal for details" + Style.RESET_ALL) 177 | logging.error(f'SUBDOMAINS IP GATHERING: ERROR. REASON: {e}') 178 | pass 179 | 180 | try: 181 | for subdomain_url in subdomain_urls: 182 | subdomain_mail = subdomains_mail_gather(subdomain_url) 183 | subdomain_mails.append(subdomain_mail) 184 | subdomain_social = sm_gather(subdomain_url) 185 | subdomain_socials.append(subdomain_social) 186 | except Exception as e: 187 | print(Fore.RED + "Some URL seems unreachable! DPULSE will continue to work, but the URL causing the error won't be included in report. See journal for details" + Style.RESET_ALL) 188 | logging.error(f'SUBDOMAINS MAIL/SOCIALS GATHERING: ERROR. REASON: {e}') 189 | pass 190 | 191 | subdomain_mails = [sublist for sublist in subdomain_mails if sublist] 192 | subdomain_mails = [sublist for sublist in subdomain_mails if sublist != [None]] 193 | subdomain_mails = list(map(''.join, subdomain_mails)) 194 | subdomain_socials = [{k: v for k, v in d.items() if v} for d in subdomain_socials] 195 | subdomain_socials = [d for d in subdomain_socials if d] 196 | subdomain_socials_grouped = defaultdict(list) 197 | 198 | for d in subdomain_socials: 199 | for key, value in d.items(): 200 | subdomain_socials_grouped[key].extend(value) 201 | 202 | subdomain_socials_grouped = list(dict(subdomain_socials_grouped).values()) 203 | 204 | sd_socials = {'Facebook': [], 'Twitter': [], 'Instagram': [], 'Telegram': [], 'TikTok': [], 'LinkedIn': [], 205 | 'VKontakte': [], 'YouTube': [], 'Odnoklassniki': [], 'WeChat': [], 'X.com': []} 206 | 207 | for inner_list in subdomain_socials_grouped: 208 | for link in inner_list: 209 | hostname = urlparse(link).hostname 210 | if hostname and (hostname == 'facebook.com' or hostname.endswith('.facebook.com')): 211 | sd_socials['Facebook'].append(urllib.parse.unquote(link)) 212 | elif hostname and (hostname == 'twitter.com' or hostname.endswith('.twitter.com')): 213 | sd_socials['Twitter'].append(urllib.parse.unquote(link)) 214 | elif hostname and (hostname == 'instagram.com' or hostname.endswith('.instagram.com')): 215 | sd_socials['Instagram'].append(urllib.parse.unquote(link)) 216 | elif hostname and (hostname == 't.me' or hostname.endswith('.t.me')): 217 | sd_socials['Telegram'].append(urllib.parse.unquote(link)) 218 | elif hostname and (hostname == 'tiktok.com' or hostname.endswith('.tiktok.com')): 219 | sd_socials['TikTok'].append(urllib.parse.unquote(link)) 220 | elif hostname and (hostname == 'linkedin.com' or hostname.endswith('.linkedin.com')): 221 | sd_socials['LinkedIn'].append(urllib.parse.unquote(link)) 222 | elif hostname and (hostname == 'vk.com' or hostname.endswith('.vk.com')): 223 | sd_socials['VKontakte'].append(urllib.parse.unquote(link)) 224 | elif hostname and (hostname == 'youtube.com' or hostname.endswith('.youtube.com')): 225 | sd_socials['YouTube'].append(urllib.parse.unquote(link)) 226 | elif hostname and (hostname == 'wechat.com' or hostname.endswith('.wechat.com')): 227 | sd_socials['WeChat'].append(urllib.parse.unquote(link)) 228 | elif hostname and (hostname == 'ok.ru' or hostname.endswith('.ok.ru')): 229 | sd_socials['Odnoklassniki'].append(urllib.parse.unquote(link)) 230 | elif hostname and (hostname == 'x.com' or hostname.endswith('.x.com')): 231 | sd_socials['Odnoklassniki'].append(urllib.parse.unquote(link)) 232 | 233 | sd_socials = {k: list(set(v)) for k, v in sd_socials.items()} 234 | 235 | if not subdomain_mails: 236 | subdomain_mails = ['No subdomains mails were found'] 237 | if not subdomain_ip: 238 | subdomain_ip = ["No subdomains IP's were found"] 239 | 240 | if report_file_type == 'html': 241 | return subdomain_mails, sd_socials, subdomain_ip 242 | elif report_file_type == 'xlsx': 243 | return subdomain_urls, subdomain_mails, subdomain_ip, sd_socials 244 | -------------------------------------------------------------------------------- /pagesearch/pagesearch_parsers.py: -------------------------------------------------------------------------------- 1 | import os 2 | import re 3 | import sys 4 | import fitz 5 | import requests 6 | 7 | from bs4 import BeautifulSoup 8 | from typing import List, Tuple 9 | from colorama import Fore, Style 10 | 11 | sys.path.append('service') 12 | from logs_processing import logging 13 | from cli_init import print_ps_cli_report 14 | 15 | ansi_re = re.compile(r'\x1b\[[0-9;]*[mK]') 16 | 17 | def make_recorder(storage: List[str]): 18 | def _rec(*parts, sep=" ", end="\n"): 19 | msg = sep.join(str(p) for p in parts) + end 20 | print(msg, end="") 21 | storage.append(ansi_re.sub("", msg)) 22 | return _rec 23 | 24 | def extract_text_from_pdf(filename: str) -> str: 25 | try: 26 | logging.info('TEXT EXTRACTION FROM PDF (PAGESEARCH): OK') 27 | doc = fitz.open(filename=filename) 28 | text = "" 29 | for page in doc: 30 | text += page.get_text() 31 | return text 32 | except Exception as e: 33 | print(Fore.RED + "Can't open some PDF file. See journal for details" + Style.RESET_ALL) 34 | logging.error(f'TEXT EXTRACTION FROM PDF (PAGESEARCH): ERROR. REASON: {e}') 35 | return "" 36 | 37 | def find_keywords_in_pdfs(ps_docs_path, keywords: List[str]) -> Tuple[dict, int]: 38 | try: 39 | logging.info('KEYWORDS SEARCH IN PDF (PAGESEARCH): OK') 40 | pdf_files = [f for f in os.listdir(ps_docs_path) if f.lower().endswith(".pdf")] 41 | results, pdf_with_keywords = {}, 0 42 | for pdf_file in pdf_files: 43 | pdf_path = os.path.join(ps_docs_path, pdf_file) 44 | extracted_text = extract_text_from_pdf(pdf_path) 45 | for keyword in keywords: 46 | if keyword.lower() in extracted_text.lower(): 47 | if pdf_file not in results: 48 | results[pdf_file] = [] 49 | results[pdf_file].append(keyword) 50 | pdf_with_keywords += 1 51 | return results, pdf_with_keywords 52 | except Exception as e: 53 | print(Fore.RED + "Can't find keywords. See journal for details" + Style.RESET_ALL) 54 | logging.error(f'KEYWORDS SEARCH IN PDF (PAGESEARCH): ERROR. REASON: {e}') 55 | return {}, 0 56 | 57 | def clean_bad_pdfs(ps_docs_path): 58 | pdf_files = [f for f in os.listdir(ps_docs_path) if f.lower().endswith(".pdf")] 59 | for pdf_file in pdf_files: 60 | try: 61 | fitz.open(filename=os.path.join(ps_docs_path, pdf_file)) 62 | except Exception: 63 | os.remove(os.path.join(ps_docs_path, pdf_file)) 64 | 65 | def subdomains_parser(subdomains_list, report_folder, keywords, keywords_flag): 66 | report_lines: List[str] = [] 67 | p = make_recorder(report_lines) 68 | #print(Fore.GREEN + "Conducting PageSearch. Please, be patient, it may take a long time\n" + Style.RESET_ALL) 69 | ps_docs_path = os.path.join(report_folder, 'ps_documents') 70 | if not os.path.exists(ps_docs_path): 71 | os.makedirs(ps_docs_path) 72 | 73 | email_pattern = r'[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}' 74 | total_emails, keywords_messages_list = [], [] 75 | accessible_subdomains = files_counter = website_elements_counter = 0 76 | exposed_passwords_counter = api_keys_counter = cookies_counter = 0 77 | tried_subdomains_counter = 0 78 | 79 | for url in subdomains_list: 80 | try: 81 | logging.info('ACCESSING SUBDOMAIN (PAGESEARCH): OK') 82 | response = requests.get('http://' + url) 83 | tried_subdomains_counter += 1 84 | if response.status_code == 200: 85 | accessible_subdomains += 1 86 | soup = BeautifulSoup(response.content, 'html.parser') 87 | else: 88 | continue 89 | except Exception as e: 90 | print(Fore.RED + "Can't access some subdomain. See journal for details" + Style.RESET_ALL) 91 | logging.error(f'ACCESSING SUBDOMAIN (PAGESEARCH): ERROR. REASON: {e}') 92 | continue 93 | 94 | try: 95 | logging.info('WEB RESOURCE ADDITIONAL INFO GATHERING (PAGESEARCH): OK') 96 | title = soup.title.string if soup.title else "No title" 97 | emails = re.findall(email_pattern, soup.text) 98 | total_emails.append(emails) 99 | if not emails: 100 | emails = ['None'] 101 | hidden_inputs = soup.find_all(type='hidden') 102 | search_query_input = soup.find('input', {'name': 'q'}) 103 | customization_input = soup.find('input', {'name': 'language'}) 104 | passwords = soup.find_all('input', {'type': 'password'}) 105 | p(Fore.LIGHTGREEN_EX + "-------------------------------------------------" + Style.RESET_ALL) 106 | p(Fore.GREEN + "Page number: " + Fore.LIGHTCYAN_EX + Style.BRIGHT + f"{tried_subdomains_counter}/{len(subdomains_list)}" + Style.RESET_ALL) 107 | p(Fore.GREEN + "Page URL: " + Fore.LIGHTCYAN_EX + Style.BRIGHT + f"{url}" + Style.RESET_ALL) 108 | p(Fore.GREEN + "Page title: " + Fore.LIGHTCYAN_EX + Style.BRIGHT + f"{title}" + Style.RESET_ALL) 109 | p(Fore.GREEN + "Found e-mails: " + Fore.LIGHTCYAN_EX + Style.BRIGHT + f"{', '.join(emails)}" + Style.RESET_ALL) 110 | 111 | if customization_input and customization_input.get('value'): 112 | p(Fore.GREEN + "Found site customization setting: " + Fore.LIGHTCYAN_EX + Style.BRIGHT + f"{customization_input.get('value')}" + Style.RESET_ALL) 113 | website_elements_counter += 1 114 | if search_query_input and search_query_input.get('value'): 115 | p(Fore.GREEN + "Found search query: " + Fore.LIGHTCYAN_EX + Style.BRIGHT + f"{search_query_input.get('value')}" + Style.RESET_ALL) 116 | website_elements_counter += 1 117 | for hidden_input in hidden_inputs: 118 | if hidden_input and hidden_input.get('value'): 119 | p(Fore.GREEN + "Found hidden form data: " + Fore.LIGHTCYAN_EX + Style.BRIGHT + f"{hidden_input.get('value')}" + Style.RESET_ALL) 120 | website_elements_counter += 1 121 | for password in passwords: 122 | if password and password.get('value'): 123 | p(Fore.GREEN + "Found exposed password: " + Fore.LIGHTCYAN_EX + Style.BRIGHT + f"{password.get('value')}" + Style.RESET_ALL) 124 | exposed_passwords_counter += 1 125 | api_keys = soup.find_all('input', attrs={'type': 'apikey'}) 126 | for key in api_keys: 127 | key_value = key.get('value') 128 | p(Fore.GREEN + f"Found API Key: " + Fore.LIGHTCYAN_EX + Style.BRIGHT + f"{key_value}" + Style.RESET_ALL) 129 | api_keys_counter += 1 130 | 131 | cookies_dict = response.cookies 132 | for cookie_name, cookie_value in cookies_dict.items(): 133 | p(Fore.GREEN + "Found cookie: " + Fore.LIGHTCYAN_EX + Style.BRIGHT + f"{cookie_name}. " + Style.RESET_ALL + Fore.GREEN + "Value: " + Fore.LIGHTCYAN_EX + Style.BRIGHT + f"{cookie_value}" + Style.RESET_ALL) 134 | cookies_counter += 1 135 | except Exception as e: 136 | print(Fore.RED + "Error while getting detailed info on web resource. See journal for details" + Style.RESET_ALL) 137 | logging.error(f'WEB RESOURCE ADDITIONAL INFO GATHERING (PAGESEARCH): ERROR. REASON: {e}') 138 | 139 | try: 140 | logging.info('FILES EXTRACTION (PAGESEARCH): OK') 141 | links = soup.find_all('a') 142 | for link in links: 143 | href = link.get('href') 144 | if href and href.lower().endswith(('.docx', '.xlsx', '.csv', '.pdf', '.pptx', '.doc', '.ppt', '.xls', '.rtf', '.conf', '.config', '.db', '.sql', '.json', '.txt')): 145 | document_url = 'http://' + url + href 146 | p(Fore.GREEN + "Found document: " + Fore.LIGHTCYAN_EX + Style.BRIGHT + f"{document_url}" + Style.RESET_ALL) 147 | response_doc = requests.get(document_url) 148 | file_extensions = { 149 | '.docx': 'extracted_{}.docx', 150 | '.xlsx': 'extracted_{}.xlsx', 151 | '.pdf': 'extracted_{}.pdf', 152 | '.csv': 'extracted_{}.csv', 153 | '.pptx': 'extracted_{}.pptx', 154 | '.doc': 'extracted_{}.doc', 155 | '.ppt': 'extracted_{}.ppt', 156 | '.xls': 'extracted_{}.xls', 157 | '.json': 'extracted_{}.json', 158 | '.txt': 'extracted_{}.txt', 159 | '.sql': 'extracted_{}.sql', 160 | '.db': 'extracted_{}.db', 161 | '.config': 'extracted_{}.config', 162 | '.conf': 'extracted_{}.conf' 163 | } 164 | if response_doc.status_code == 200: 165 | file_extension = os.path.splitext(href.lower())[1] 166 | if file_extension in file_extensions: 167 | filename = os.path.basename(href) 168 | extracted_path = os.path.join(ps_docs_path, file_extensions[file_extension].format(os.path.splitext(filename)[0])) 169 | with open(extracted_path, 'wb') as file: 170 | file.write(response_doc.content) 171 | files_counter += 1 172 | p(Fore.GREEN + "File was successfully saved" + Style.RESET_ALL) 173 | except Exception as e: 174 | print(Fore.RED + "This file can't be accessed to extract it. See journal for details" + Style.RESET_ALL) 175 | logging.error(f'FILES EXTRACTION (PAGESEARCH): ERROR. REASON: {e}') 176 | 177 | p(Fore.LIGHTGREEN_EX + "-------------------------------------------------" + Style.RESET_ALL) 178 | ps_emails_list = [x for x in total_emails if x] 179 | ps_emails_return = [', '.join(sublist) for sublist in ps_emails_list] 180 | 181 | clean_bad_pdfs(ps_docs_path) 182 | 183 | pdf_with_keywords = 0 184 | if keywords_flag == 1: 185 | print(Fore.GREEN + "Searching keywords in PDF files..." + Style.RESET_ALL) 186 | pdf_results, pdf_with_keywords = find_keywords_in_pdfs(ps_docs_path, keywords) 187 | for pdf_file, found_keywords in pdf_results.items(): 188 | p(Fore.GREEN + f"Keywords " + Fore.LIGHTCYAN_EX + Style.BRIGHT + f"{', '.join(found_keywords)}" + Style.RESET_ALL + Fore.GREEN + f" found in '{pdf_file}'" + Style.RESET_ALL) 189 | keywords_messages_list.append(f"Keywords {', '.join(found_keywords)} found in '{pdf_file}'") 190 | 191 | print_ps_cli_report(subdomains_list, accessible_subdomains, ps_emails_return, files_counter, cookies_counter, api_keys_counter, website_elements_counter, exposed_passwords_counter) 192 | 193 | if keywords_flag == 0: 194 | print(Fore.RED + "[+] Keywords were not gathered because of None user input" + Style.RESET_ALL) 195 | keywords_messages_list = ['No keywords were found because of None user input'] 196 | else: 197 | print(Fore.GREEN + f"[+] Total {pdf_with_keywords} keywords were found in PDF files" + Style.RESET_ALL) 198 | p(Fore.LIGHTGREEN_EX + "-------------------------------------------------" + Style.RESET_ALL) 199 | 200 | data_tuple = ( 201 | ps_emails_return, 202 | accessible_subdomains, 203 | len(ps_emails_return), 204 | files_counter, 205 | cookies_counter, 206 | api_keys_counter, 207 | website_elements_counter, 208 | exposed_passwords_counter, 209 | keywords_messages_list 210 | ) 211 | 212 | exclude = ("Conducting PageSearch", "Searching keywords", "Keywords were not gathered", "Total ") 213 | pagesearch_query = "\n".join(line for line in report_lines if not line.startswith(exclude)) 214 | return data_tuple, pagesearch_query 215 | -------------------------------------------------------------------------------- /datagather_modules/data_assembler.py: -------------------------------------------------------------------------------- 1 | import sys 2 | from datetime import datetime 3 | import os 4 | from colorama import Fore, Style 5 | from urllib.parse import urlparse 6 | 7 | sys.path.extend(['service', 'pagesearch', 'dorking', 'snapshotting']) 8 | 9 | from logs_processing import logging 10 | from config_processing import read_config 11 | from db_creator import get_dorking_query 12 | import crawl_processor as cp 13 | import dorking_handler as dp 14 | import networking_processor as np 15 | from pagesearch_parsers import subdomains_parser 16 | from api_virustotal import api_virustotal_check 17 | from api_securitytrails import api_securitytrails_check 18 | from api_hudsonrock import api_hudsonrock_check 19 | from screen_snapshotting import take_screenshot 20 | from html_snapshotting import save_page_as_html 21 | from archive_snapshotting import download_snapshot 22 | 23 | 24 | SOCIAL_KEYS = [ 25 | 'Facebook', 26 | 'Twitter', 27 | 'Instagram', 28 | 'Telegram', 29 | 'TikTok', 30 | 'LinkedIn', 31 | 'VKontakte', 32 | 'YouTube', 33 | 'Odnoklassniki', 34 | 'WeChat', 35 | 'X.com', 36 | ] 37 | 38 | def make_socials_dict(with_not_found: bool = False): 39 | if with_not_found: 40 | return {name: [f'{name} links were not found'] for name in SOCIAL_KEYS} 41 | return {name: [] for name in SOCIAL_KEYS} 42 | 43 | def ensure_list(value): 44 | if isinstance(value, list): 45 | return value 46 | if value is None: 47 | return [] 48 | return [value] 49 | 50 | def is_real_url(value: str) -> bool: 51 | if not isinstance(value, str): 52 | return False 53 | parsed = urlparse(value) 54 | return parsed.scheme in ('http', 'https') and bool(parsed.netloc) 55 | 56 | 57 | def establishing_dork_db_connection(dorking_flag): 58 | dorking_db_paths = { 59 | 'basic': 'dorking//basic_dorking.db', 60 | 'iot': 'dorking//iot_dorking.db', 61 | 'files': 'dorking//files_dorking.db', 62 | 'admins': 'dorking//adminpanels_dorking.db', 63 | 'web': 'dorking//webstructure_dorking.db', 64 | } 65 | dorking_tables = { 66 | 'basic': 'basic_dorks', 67 | 'iot': 'iot_dorks', 68 | 'files': 'files_dorks', 69 | 'admins': 'admins_dorks', 70 | 'web': 'web_dorks', 71 | } 72 | if dorking_flag in dorking_db_paths: 73 | dorking_db_path = dorking_db_paths[dorking_flag] 74 | table = dorking_tables[dorking_flag] 75 | elif dorking_flag.startswith('custom'): 76 | lst = dorking_flag.split('+') 77 | dorking_db_name = lst[1] 78 | dorking_db_path = 'dorking//' + dorking_db_name 79 | table = 'dorks' 80 | else: 81 | raise ValueError(f"Invalid dorking flag: {dorking_flag}") 82 | return dorking_db_path, table 83 | 84 | 85 | class DataProcessing(): 86 | def report_preprocessing(self, short_domain, report_file_type): 87 | report_ctime = datetime.now().strftime('%d-%m-%Y, %H:%M:%S') 88 | files_ctime = datetime.now().strftime('(%d-%m-%Y, %Hh%Mm%Ss)') 89 | files_body = short_domain.replace(".", "") + '_' + files_ctime 90 | casename = f"{files_body}.{report_file_type}" 91 | foldername = files_body 92 | db_casename = short_domain.replace(".", "") 93 | now = datetime.now() 94 | db_creation_date = str(now.year) + str(now.month) + str(now.day) 95 | report_folder = f"report_{foldername}" 96 | robots_filepath = os.path.join(report_folder, '01-robots.txt') 97 | sitemap_filepath = os.path.join(report_folder, '02-sitemap.txt') 98 | sitemap_links_filepath = os.path.join(report_folder, '03-sitemap_links.txt') 99 | os.makedirs(report_folder, exist_ok=True) 100 | return (casename, db_casename, db_creation_date, robots_filepath, 101 | sitemap_filepath, sitemap_links_filepath, report_file_type, 102 | report_folder, files_ctime, report_ctime) 103 | 104 | def data_gathering(self, short_domain, url, report_file_type, pagesearch_flag, 105 | keywords, keywords_flag, dorking_flag, used_api_flag, 106 | snapshotting_flag, username, from_date, end_date): 107 | 108 | (casename, db_casename, db_creation_date, robots_filepath, 109 | sitemap_filepath, sitemap_links_filepath, report_file_type, 110 | report_folder, ctime, report_ctime) = self.report_preprocessing(short_domain, report_file_type) 111 | 112 | logging.info(f'### THIS LOG PART FOR {casename} CASE, TIME: {ctime} STARTS HERE') 113 | print(Fore.LIGHTMAGENTA_EX + "\n[STARTED BASIC DOMAIN SCAN]" + Style.RESET_ALL) 114 | print(Fore.GREEN + "[1/11] Getting domain IP address" + Style.RESET_ALL) 115 | ip = cp.ip_gather(short_domain) 116 | print(Fore.GREEN + '[2/11] Gathering WHOIS information' + Style.RESET_ALL) 117 | res = cp.whois_gather(short_domain) 118 | print(Fore.GREEN + '[3/11] Processing e-mails gathering' + Style.RESET_ALL) 119 | mails = cp.contact_mail_gather(url) 120 | print(Fore.GREEN + '[4/11] Processing subdomain gathering' + Style.RESET_ALL) 121 | subdomains, subdomains_amount = cp.subdomains_gather(url, short_domain) 122 | print(Fore.GREEN + '[5/11] Processing social medias gathering' + Style.RESET_ALL) 123 | try: 124 | social_medias = cp.sm_gather(url) 125 | except Exception as e: 126 | print(Fore.RED + "Social medias were not gathered because of error" + Style.RESET_ALL) 127 | logging.exception("Error during social medias gathering") 128 | social_medias = make_socials_dict(with_not_found=True) 129 | 130 | print(Fore.GREEN + '[6/11] Processing subdomain analysis' + Style.RESET_ALL) 131 | if report_file_type == 'xlsx': 132 | subdomain_urls, subdomain_mails, subdomain_ip, sd_socials = cp.domains_reverse_research( 133 | subdomains, report_file_type 134 | ) 135 | elif report_file_type == 'html': 136 | subdomain_mails, sd_socials, subdomain_ip = cp.domains_reverse_research( 137 | subdomains, report_file_type 138 | ) 139 | else: 140 | subdomain_urls = [] 141 | subdomain_mails = [] 142 | subdomain_ip = [] 143 | sd_socials = make_socials_dict() 144 | 145 | print(Fore.GREEN + '[7/11] Processing SSL certificate gathering' + Style.RESET_ALL) 146 | issuer, subject, notBefore, notAfter, commonName, serialNumber = np.get_ssl_certificate(short_domain) 147 | 148 | print(Fore.GREEN + '[8/11] Processing DNS records gathering' + Style.RESET_ALL) 149 | mx_records = np.get_dns_info(short_domain, report_file_type) 150 | 151 | print(Fore.GREEN + '[9/11] Extracting robots.txt and sitemap.xml' + Style.RESET_ALL) 152 | robots_txt_result = np.get_robots_txt(short_domain, robots_filepath) 153 | sitemap_xml_result = np.get_sitemap_xml(short_domain, sitemap_filepath) 154 | try: 155 | sitemap_links_status = np.extract_links_from_sitemap(sitemap_links_filepath, sitemap_filepath) 156 | except Exception: 157 | sitemap_links_status = 'Sitemap links were not parsed' 158 | 159 | print(Fore.GREEN + '[10/11] Gathering info about website technologies' + Style.RESET_ALL) 160 | (web_servers, cms, programming_languages, 161 | web_frameworks, analytics, javascript_frameworks) = np.get_technologies(url) 162 | 163 | print(Fore.GREEN + '[11/11] Processing Shodan InternetDB search' + Style.RESET_ALL) 164 | ports, hostnames, cpes, tags, vulns = np.query_internetdb(ip, report_file_type) 165 | 166 | if not isinstance(social_medias, dict): 167 | logging.warning(f'social_medias is {type(social_medias)}, expected dict; replacing with empty socials dict') 168 | social_medias = make_socials_dict() 169 | 170 | if not isinstance(sd_socials, dict): 171 | logging.warning(f'sd_socials is {type(sd_socials)}, expected dict; replacing with empty socials dict') 172 | sd_socials = make_socials_dict() 173 | 174 | all_social_keys = set(SOCIAL_KEYS) | set(social_medias.keys()) | set(sd_socials.keys()) 175 | 176 | common_socials_raw = {} 177 | for key in all_social_keys: 178 | main_vals = ensure_list(social_medias.get(key, [])) 179 | sd_vals = ensure_list(sd_socials.get(key, [])) 180 | common_socials_raw[key] = main_vals + sd_vals 181 | 182 | common_socials = {} 183 | total_socials = 0 184 | 185 | for key, values in common_socials_raw.items(): 186 | seen = set() 187 | deduped = [] 188 | for v in values: 189 | if v not in seen: 190 | seen.add(v) 191 | deduped.append(v) 192 | 193 | real_links = [v for v in deduped if is_real_url(v)] 194 | 195 | if real_links: 196 | common_socials[key] = real_links 197 | total_socials += len(real_links) 198 | else: 199 | common_socials[key] = [f'{key} links were not found'] 200 | 201 | total_ports = len(ports) 202 | total_ips = len(subdomain_ip) + 1 203 | total_vulns = len(vulns) 204 | 205 | print(Fore.LIGHTMAGENTA_EX + "[ENDED BASIC DOMAIN SCAN]\n" + Style.RESET_ALL) 206 | 207 | if report_file_type == 'html': 208 | if pagesearch_flag.lower() == 'y': 209 | if subdomains and subdomains[0] != 'No subdomains were found': 210 | to_search_array = [subdomains, social_medias, sd_socials] 211 | print(Fore.LIGHTMAGENTA_EX + "[STARTED EXTENDED DOMAIN SCAN WITH PAGESEARCH]" + Style.RESET_ALL) 212 | ( 213 | ps_emails_return, 214 | accessible_subdomains, 215 | emails_amount, 216 | files_counter, 217 | cookies_counter, 218 | api_keys_counter, 219 | website_elements_counter, 220 | exposed_passwords_counter, 221 | keywords_messages_list 222 | ), ps_string = subdomains_parser( 223 | to_search_array[0], report_folder, keywords, keywords_flag 224 | ) 225 | total_links_counter = accessed_links_counter = "No results because PageSearch does not gather these categories" 226 | if len(keywords_messages_list) == 0: 227 | keywords_messages_list = ['No keywords were found'] 228 | print(Fore.LIGHTMAGENTA_EX + "[ENDED EXTENDED DOMAIN SCAN WITH PAGESEARCH]\n" + Style.RESET_ALL) 229 | else: 230 | print(Fore.RED + "Cant start PageSearch because no subdomains were detected\n") 231 | ps_emails_return = "" 232 | accessible_subdomains = files_counter = cookies_counter = api_keys_counter = \ 233 | website_elements_counter = exposed_passwords_counter = total_links_counter = \ 234 | accessed_links_counter = emails_amount = 'No results because no subdomains were found' 235 | ps_string = 'No PageSearch listing provided because no subdomains were found' 236 | keywords_messages_list = ['No data was gathered because no subdomains were found'] 237 | elif pagesearch_flag.lower() == 'n': 238 | accessible_subdomains = files_counter = cookies_counter = api_keys_counter = \ 239 | website_elements_counter = exposed_passwords_counter = total_links_counter = \ 240 | accessed_links_counter = emails_amount = keywords_messages_list = \ 241 | "No results because user did not selected PageSearch for this scan" 242 | ps_emails_return = "" 243 | ps_string = 'No PageSearch listing provided because user did not selected PageSearch mode for this scan' 244 | 245 | if dorking_flag == 'n': 246 | dorking_status = 'Google Dorking mode was not selected for this scan' 247 | dorking_file_path = 'Google Dorking mode was not selected for this scan' 248 | else: 249 | dorking_db_path, table = establishing_dork_db_connection(dorking_flag.lower()) 250 | print(Fore.LIGHTMAGENTA_EX + f"[STARTED EXTENDED DOMAIN SCAN WITH {dorking_flag.upper()} DORKING TABLE]" + Style.RESET_ALL) 251 | dorking_status, dorking_file_path = dp.save_results_to_txt( 252 | report_folder, table, get_dorking_query(short_domain, dorking_db_path, table) 253 | ) 254 | print(Fore.LIGHTMAGENTA_EX + f"[ENDED EXTENDED DOMAIN SCAN WITH {dorking_flag.upper()} DORKING TABLE]\n" + Style.RESET_ALL) 255 | 256 | api_scan_db = [] 257 | if used_api_flag != ['Empty']: 258 | print(Fore.LIGHTMAGENTA_EX + f"[STARTED EXTENDED DOMAIN SCAN WITH 3RD PARTY API]" + Style.RESET_ALL) 259 | if '1' in used_api_flag: 260 | virustotal_output = api_virustotal_check(short_domain) 261 | api_scan_db.append('VirusTotal') 262 | else: 263 | virustotal_output = 'No results because user did not selected VirusTotal API scan' 264 | 265 | if '2' in used_api_flag: 266 | securitytrails_output = api_securitytrails_check(short_domain) 267 | api_scan_db.append('SecurityTrails') 268 | else: 269 | securitytrails_output = 'No results because user did not selected SecurityTrails API scan' 270 | 271 | if '3' in used_api_flag: 272 | if username is None or (isinstance(username, str) and username.lower() == 'n'): 273 | username = None 274 | hudsonrock_output = api_hudsonrock_check(short_domain, ip, mails, username) 275 | api_scan_db.append('HudsonRock') 276 | else: 277 | hudsonrock_output = 'No results because user did not selected HudsonRock API scan' 278 | 279 | print(Fore.LIGHTMAGENTA_EX + f"[ENDED EXTENDED DOMAIN SCAN WITH 3RD PARTY API]\n" + Style.RESET_ALL) 280 | else: 281 | virustotal_output = 'No results because user did not selected VirusTotal API scan' 282 | securitytrails_output = 'No results because user did not selected SecurityTrails API scan' 283 | hudsonrock_output = 'No results because user did not selected HudsonRock API scan' 284 | api_scan_db.append('No') 285 | 286 | if snapshotting_flag.lower() in ['s', 'p', 'w']: 287 | config_values = read_config() 288 | installed_browser = config_values['installed_browser'] 289 | print(Fore.LIGHTMAGENTA_EX + f"[STARTED DOMAIN SNAPSHOTTING]" + Style.RESET_ALL) 290 | if snapshotting_flag.lower() == 's': 291 | take_screenshot(installed_browser, url, report_folder + '//screensnapshot.png') 292 | elif snapshotting_flag.lower() == 'p': 293 | save_page_as_html(url, report_folder + '//domain_html_copy.html') 294 | elif snapshotting_flag.lower() == 'w': 295 | download_snapshot(short_domain, from_date, end_date, report_folder) 296 | print(Fore.LIGHTMAGENTA_EX + f"[ENDED DOMAIN SNAPSHOTTING]\n" + Style.RESET_ALL) 297 | 298 | data_array = [ 299 | ip, res, mails, subdomains, subdomains_amount, social_medias, 300 | subdomain_mails, sd_socials, subdomain_ip, issuer, subject, 301 | notBefore, notAfter, commonName, serialNumber, mx_records, 302 | robots_txt_result, sitemap_xml_result, sitemap_links_status, 303 | web_servers, cms, programming_languages, web_frameworks, 304 | analytics, javascript_frameworks, ports, hostnames, cpes, tags, 305 | vulns, common_socials, total_socials, ps_emails_return, 306 | accessible_subdomains, emails_amount, files_counter, 307 | cookies_counter, api_keys_counter, website_elements_counter, 308 | exposed_passwords_counter, total_links_counter, 309 | accessed_links_counter, keywords_messages_list, dorking_status, 310 | dorking_file_path, virustotal_output, securitytrails_output, 311 | hudsonrock_output, ps_string, total_ports, total_ips, total_vulns 312 | ] 313 | 314 | report_info_array = [ 315 | casename, db_casename, db_creation_date, report_folder, 316 | ctime, report_file_type, report_ctime, api_scan_db, used_api_flag 317 | ] 318 | logging.info(f'### THIS LOG PART FOR {casename} CASE, TIME: {ctime} ENDS HERE') 319 | return data_array, report_info_array 320 | --------------------------------------------------------------------------------