├── __init__.py
├── apis
    ├── __init__.py
    ├── api_keys.db
    ├── api_keys_reference.db
    ├── api_virustotal.py
    ├── api_securitytrails.py
    └── api_hudsonrock.py
├── docs
    ├── __init__.py
    └── dpulse-docs
    │   ├── docs
    │       ├── contact_dev.md
    │       ├── demo.md
    │       ├── pagesearch.md
    │       ├── basic_scan.md
    │       ├── logging.md
    │       ├── snapshotting.md
    │       ├── index.md
    │       ├── api.md
    │       ├── reporting.md
    │       ├── getting_started.md
    │       ├── dorking.md
    │       └── config.md
    │   └── mkdocs.yml
├── pagesearch
    ├── __init__.py
    └── pagesearch_parsers.py
├── dorking
    ├── __init__.py
    ├── iot_dorking.db
    ├── basic_dorking.db
    ├── files_dorking.db
    ├── adminpanels_dorking.db
    ├── webstructure_dorking.db
    ├── ua_rotator.py
    ├── proxies_rotator.py
    ├── db_creator.py
    └── dorking_handler.py
├── service
    ├── __init__.py
    ├── misc.py
    ├── logs_processing.py
    ├── files_processing.py
    ├── config_processing.py
    ├── db_processing.py
    └── cli_init.py
├── reporting_modules
    ├── __init__.py
    └── html_report_creation.py
├── snapshotting
    ├── __init__.py
    ├── html_snapshotting.py
    ├── screen_snapshotting.py
    └── archive_snapshotting.py
├── datagather_modules
    ├── __init__.py
    ├── networking_processor.py
    ├── crawl_processor.py
    └── data_assembler.py
├── report_examples
    ├── html_report_example
    │   ├── 01-robots.txt
    │   └── ps_documents
    │   │   └── extracted_About%20the%20HackThisSite%20Mirror.txt
    └── xlsx_report_example
    │   ├── 01-robots.txt
    │   ├── hackthissiteorg_(28-12-2024, 09h33m02s).csv
    │   └── ps_documents
    │       └── extracted_About%20the%20HackThisSite%20Mirror.txt
├── .dockerignore
├── .github
    ├── ISSUE_TEMPLATE
    │   ├── custom.md
    │   ├── feature_request.md
    │   └── bug_report.md
    └── workflows
    │   ├── python-package.yml
    │   └── codeql.yml
├── .readthedocs.yaml
├── requirements.txt
├── SECURITY.md
├── Dockerfile
├── LICENSE
├── docker-entrypoint.sh
├── pyproject.toml
├── CODE_OF_CONDUCT.md
└── README.md


/__init__.py:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/apis/__init__.py:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/docs/__init__.py:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/pagesearch/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/dorking/__init__.py:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/service/__init__.py:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/reporting_modules/__init__.py:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/snapshotting/__init__.py:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/datagather_modules/__init__.py:
--------------------------------------------------------------------------------
1 | 
2 | 


--------------------------------------------------------------------------------
/apis/api_keys.db:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OSINT-TECHNOLOGIES/dpulse/HEAD/apis/api_keys.db


--------------------------------------------------------------------------------
/dorking/iot_dorking.db:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OSINT-TECHNOLOGIES/dpulse/HEAD/dorking/iot_dorking.db


--------------------------------------------------------------------------------
/apis/api_keys_reference.db:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OSINT-TECHNOLOGIES/dpulse/HEAD/apis/api_keys_reference.db


--------------------------------------------------------------------------------
/dorking/basic_dorking.db:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OSINT-TECHNOLOGIES/dpulse/HEAD/dorking/basic_dorking.db


--------------------------------------------------------------------------------
/dorking/files_dorking.db:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OSINT-TECHNOLOGIES/dpulse/HEAD/dorking/files_dorking.db


--------------------------------------------------------------------------------
/dorking/adminpanels_dorking.db:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OSINT-TECHNOLOGIES/dpulse/HEAD/dorking/adminpanels_dorking.db


--------------------------------------------------------------------------------
/dorking/webstructure_dorking.db:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OSINT-TECHNOLOGIES/dpulse/HEAD/dorking/webstructure_dorking.db


--------------------------------------------------------------------------------
/report_examples/html_report_example/01-robots.txt:
--------------------------------------------------------------------------------
1 | User-agent: *
2 | Disallow: /missions/
3 | Disallow: /killing/all/humans/
4 | 


--------------------------------------------------------------------------------
/report_examples/xlsx_report_example/01-robots.txt:
--------------------------------------------------------------------------------
1 | User-agent: *
2 | Disallow: /missions/
3 | Disallow: /killing/all/humans/
4 | 


--------------------------------------------------------------------------------
/.dockerignore:
--------------------------------------------------------------------------------
 1 | .git
 2 | .github
 3 | __pycache__/
 4 | *.pyc
 5 | *.pyo
 6 | *.pyd
 7 | .venv/
 8 | .env
 9 | dist/
10 | build/
11 | *.log
12 | docs/
13 | report_examples/
14 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/custom.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: Custom issue template
 3 | about: Describe this issue template's purpose here.
 4 | title: ''
 5 | labels: ''
 6 | assignees: ''
 7 | 
 8 | ---
 9 | 
10 | 
11 | 


--------------------------------------------------------------------------------
/report_examples/xlsx_report_example/hackthissiteorg_(28-12-2024, 09h33m02s).csv:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/OSINT-TECHNOLOGIES/dpulse/HEAD/report_examples/xlsx_report_example/hackthissiteorg_(28-12-2024, 09h33m02s).csv


--------------------------------------------------------------------------------
/.readthedocs.yaml:
--------------------------------------------------------------------------------
 1 | version: 2
 2 | 
 3 | build:
 4 |   os: ubuntu-20.04
 5 |   tools:
 6 |     python: "3.10"
 7 |   jobs:
 8 |     pre_build:
 9 |       - pip install mkdocs mkdocs-material
10 | 
11 | python:
12 |   install:
13 |     - requirements: requirements.txt
14 | 
15 | mkdocs:
16 |   configuration: docs/dpulse-docs/mkdocs.yml
17 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | Jinja2>=3.1.6
 2 | beautifulsoup4==4.12.2
 3 | requests==2.32.4
 4 | python-whois==0.9.4
 5 | colorama==0.4.6
 6 | pyfiglet==1.0.2
 7 | rich==13.7.1
 8 | MechanicalSoup==1.3.0
 9 | builtwith==1.3.4
10 | dnspython==2.6.1
11 | openpyxl==3.1.2
12 | PyMuPDF==1.26.6
13 | selenium==4.28.1
14 | webdriver-manager==4.0.2
15 | undetected_chromedriver==3.5.5
16 | setuptools==80.9.0
17 | 


--------------------------------------------------------------------------------
/docs/dpulse-docs/docs/contact_dev.md:
--------------------------------------------------------------------------------
 1 | ## Contact developer
 2 | 
 3 | DPULSE developer will be glad to see your messages with feedback, questions and suggestions. Feel free to contact developer with most convenient ways for you:
 4 | 
 5 | * OSINT-TECHNOLOGIES e-mail: osint.technologies@gmail.com
 6 | * [Make an issue page on DPULSE GitHub repository](https://github.com/OSINT-TECHNOLOGIES/dpulse/issues/new/choose)
 7 | 
 8 | 
 9 | 
10 | 
11 | 


--------------------------------------------------------------------------------
/docs/dpulse-docs/mkdocs.yml:
--------------------------------------------------------------------------------
 1 | site_name: DPULSE Docs
 2 | theme:
 3 |   name: readthedocs
 4 | nav:
 5 |   - Home: index.md
 6 |   - Getting started: getting_started.md
 7 |   - Basic scan: basic_scan.md
 8 |   - PageSearch: pagesearch.md
 9 |   - Built-in automatic Dorking: dorking.md
10 |   - Built-in API scanning: api.md
11 |   - Snapshotting: snapshotting.md
12 |   - Reporting and report types: reporting.md
13 |   - Configuration file: config.md
14 |   - Logging: logging.md
15 |   - Demo and use-cases: demo.md
16 |   - Contact developer: contact_dev.md
17 | 
18 | 


--------------------------------------------------------------------------------
/snapshotting/html_snapshotting.py:
--------------------------------------------------------------------------------
 1 | import requests
 2 | from colorama import Fore, Style
 3 | 
 4 | def save_page_as_html(url, filename):
 5 |     try:
 6 |         print(Fore.GREEN + "Getting web page's content" + Style.RESET_ALL)
 7 |         response = requests.get(url)
 8 |         print(Fore.GREEN + "Creating .HTML file" + Style.RESET_ALL)
 9 |         with open(filename, 'w', encoding='utf-8') as file:
10 |             file.write(response.text)
11 |         print(Fore.GREEN + ".HTML snapshot was successfully created" + Style.RESET_ALL)
12 |     except Exception as e:
13 |         print(Fore.RED + f"Error: {e}" + Style.RESET_ALL)
14 | 


--------------------------------------------------------------------------------
/SECURITY.md:
--------------------------------------------------------------------------------
 1 | # Security Policy
 2 | 
 3 | ## Supported Versions
 4 | 
 5 | In a table below you can see versions which will be updated with security measures and vulnerabilities fixes when someone will find them
 6 | 
 7 | | Version | Supported          |
 8 | | ------- | ------------------ |
 9 | | >= 1.2   | :white_check_mark: |
10 | | < 1.2  | :x: |
11 | 
12 | ## Reporting a Vulnerability
13 | 
14 | You can report vulnerabilities using issues "Report a security vulnerability" section. When we talk about security, you should describe problem as much as you can. All security reports are top-1 priority, so we'll investigate them as soon as we see report.
15 | 


--------------------------------------------------------------------------------
/dorking/ua_rotator.py:
--------------------------------------------------------------------------------
 1 | import random
 2 | import sys
 3 | sys.path.append('service')
 4 | from config_processing import read_config
 5 | from colorama import Fore, Style
 6 | 
 7 | class UserAgentRotator:
 8 |     def __init__(self):
 9 |         config_values = read_config()
10 |         self.user_agents = config_values['user_agents']
11 | 
12 |     def get_random_user_agent(self):
13 |         print(Fore.GREEN + "Changed User-Agent to " + Style.RESET_ALL + Fore.LIGHTCYAN_EX + Style.BRIGHT + f"{random.choice(self.user_agents)}" + Style.RESET_ALL)
14 |         return random.choice(self.user_agents)
15 | 
16 | user_agent_rotator = UserAgentRotator()
17 | 


--------------------------------------------------------------------------------
/service/misc.py:
--------------------------------------------------------------------------------
 1 | import socket
 2 | 
 3 | def time_processing(end):
 4 |     if end < 60:
 5 |         endtime = round(end)
 6 |         endtime_string = f'approximately {endtime} seconds'
 7 |     else:
 8 |         time_minutes = round(end / 60)
 9 |         if time_minutes == 1:
10 |             endtime_string = f'approximately {time_minutes} minute'
11 |         else:
12 |             endtime_string = f'approximately {time_minutes} minutes'
13 |     return endtime_string
14 | 
15 | def domain_precheck(domain):
16 |     try:
17 |         socket.create_connection((domain, 80), timeout=5)
18 |         return True
19 |     except OSError:
20 |         return False


--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM python:3.11-slim
 2 | 
 3 | ENV PYTHONUNBUFFERED=1 \
 4 |     PIP_NO_CACHE_DIR=1 \
 5 |     POETRY_NO_INTERACTION=1 \
 6 |     POETRY_VIRTUALENVS_CREATE=false \
 7 |     POETRY_VERSION=1.8.3
 8 | 
 9 | WORKDIR /app
10 | 
11 | ENV PYTHONPATH=/app:/app/service:/app/apis:/app/datagather_modules:/app/dorking:/app/pagesearch:/app/reporting_modules:/app/snapshotting:$PYTHONPATH
12 | 
13 | RUN pip install "poetry==${POETRY_VERSION}"
14 | 
15 | COPY pyproject.toml poetry.lock* ./
16 | RUN poetry install --no-root
17 | 
18 | COPY . .
19 | 
20 | COPY docker-entrypoint.sh /app/docker-entrypoint.sh
21 | RUN chmod +x /app/docker-entrypoint.sh
22 | 
23 | ENTRYPOINT ["/app/docker-entrypoint.sh"]
24 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/feature_request.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: Feature request
 3 | about: Suggest an idea for this project
 4 | title: ''
 5 | labels: ''
 6 | assignees: ''
 7 | 
 8 | ---
 9 | 
10 | **Is your feature request related to a problem? Please describe.**
11 | A clear and concise description of what the problem is. Ex. I'm always frustrated when [...]
12 | 
13 | **Describe the solution you'd like**
14 | A clear and concise description of what you want to happen.
15 | 
16 | **Describe alternatives you've considered**
17 | A clear and concise description of any alternative solutions or features you've considered.
18 | 
19 | **Additional context**
20 | Add any other context or screenshots about the feature request here.
21 | 


--------------------------------------------------------------------------------
/.github/ISSUE_TEMPLATE/bug_report.md:
--------------------------------------------------------------------------------
 1 | ---
 2 | name: Bug report
 3 | about: Create a report to help us improve
 4 | title: ''
 5 | labels: ''
 6 | assignees: ''
 7 | 
 8 | ---
 9 | 
10 | **Describe the bug**
11 | A clear and concise description of what the bug is.
12 | 
13 | **To Reproduce**
14 | Steps to reproduce the behavior:
15 | 1. Go to '...'
16 | 2. Click on '....'
17 | 3. Scroll down to '....'
18 | 4. See error
19 | 
20 | **Expected behavior**
21 | A clear and concise description of what you expected to happen.
22 | 
23 | **Screenshots**
24 | If applicable, add screenshots to help explain your problem.
25 | 
26 | **Desktop (please complete the following information):**
27 |  - OS: [e.g. iOS]
28 |  - Version [e.g. 22]
29 | 
30 | **Additional context**
31 | Add any other context about the problem here.
32 | 


--------------------------------------------------------------------------------
/docs/dpulse-docs/docs/demo.md:
--------------------------------------------------------------------------------
 1 | # DPULSE demos & use-cases
 2 | 
 3 | On this page you can see some actual DPULSE demos and use-cases in some cybersecurity and OSINT scenarios
 4 | 
 5 | ## Demo №1. Starting DPULSE
 6 | 
 7 | Yes, starting DPULSE is as simple as that. Don't forget to install all requirements before starting DPULSE
 8 | 
 9 | ![dpulse_start](https://github.com/user-attachments/assets/9ec0ab73-2206-4d38-bae6-e88656e17f95) 
10 | 
11 | ## Demo №2. DPULSE basic scan workflow
12 | 
13 | Here you can see complete process of DPULSE basic scan from the beginning to the end. Remember that sometimes your scan might not be ideal and errors may appear. However, they will not interrupt scanning process, but will only affect the number and variety of results in the final report.
14 | 
15 | ![dpulse_bs](https://github.com/user-attachments/assets/b0ad7827-6dac-4f82-a369-4447a0e1c878)
16 | 
17 | 


--------------------------------------------------------------------------------
/docs/dpulse-docs/docs/pagesearch.md:
--------------------------------------------------------------------------------
 1 | # PageSearch mode
 2 | 
 3 | PageSearch is an additional extended subdomains deep search function, which starts in addition to basic scan. User can choose whether to use PageSearch or not during pre-scan preparation steps.
 4 | 
 5 | ## PageSearch results
 6 | 
 7 | PageSearch returns extended information about found subdomains during basic scan. Extended information contains following:
 8 | 
 9 | 1. More e-mail addresses
10 | 2. API keys
11 | 3. Exposed passwords
12 | 4. Cookies
13 | 5. Hidden forms of data and other web page elements
14 | 6. Documents, config files, databases files (and PageSearch can download them!)
15 | 7. Specified words by user in PDF files
16 | 
17 | PageSearch scan example (this example is not so representative because scanned site is not an example of real domain):
18 | 
19 | ![pagesearch1](https://github.com/user-attachments/assets/ed91f37f-578f-462b-a464-5281dd06ba0c)
20 | 
21 | 


--------------------------------------------------------------------------------
/service/logs_processing.py:
--------------------------------------------------------------------------------
 1 | import logging
 2 | from config_processing import read_config
 3 | from colorama import Fore, Style
 4 | 
 5 | config_values = read_config()
 6 | logging_level = (config_values['logging_level']).upper()
 7 | 
 8 | if logging_level == 'DEBUG':
 9 |     level = logging.DEBUG
10 | elif logging_level == 'INFO':
11 |     level = logging.INFO
12 | elif logging_level == 'WARNING':
13 |     level = logging.WARNING
14 | elif logging_level == 'ERROR':
15 |     level = logging.ERROR
16 | elif logging_level == 'CRITICAL':
17 |     level = logging.CRITICAL
18 | else:
19 |     print(Fore.RED + "You've entered wrong logging level in config file. Please verify proper mods and re-enter it" + Style.RESET_ALL)
20 |     print(Fore.RED + "Setting config level as DEBUG for this session" + Style.RESET_ALL)
21 |     level = logging.DEBUG
22 | 
23 | logging.basicConfig(filename="journal.log", level=level, format="%(asctime)s - %(levelname)s - %(message)s")
24 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2025 OSINT-TECHNOLOGIES
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/docker-entrypoint.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/sh
 2 | set -e
 3 | 
 4 | if [ ! -d service ]; then
 5 |     mkdir -p service
 6 | fi
 7 | 
 8 | if [ ! -f service/config.ini ] && [ -f /app/service/config.ini ]; then
 9 |     cp /app/service/config.ini service/config.ini
10 | fi
11 | 
12 | if [ ! -d dorking ]; then
13 |     mkdir -p dorking
14 | fi
15 | 
16 | if ls /app/dorking/*.db >/dev/null 2>&1; then
17 |   for dbfile in /app/dorking/*.db; do
18 |     dest="dorking/$(basename "$dbfile")"
19 |     if [ ! -f "$dest" ]; then
20 |         cp "$dbfile" "$dest"
21 |     fi
22 |   done
23 | fi
24 | 
25 | if [ ! -d apis ]; then
26 |     mkdir -p apis
27 | fi
28 | 
29 | if ls /app/apis/*.db >/dev/null 2>&1; then
30 |   for dbfile in /app/apis/*.db; do
31 |     dest="apis/$(basename "$dbfile")"
32 |     if [ ! -f "$dest" ]; then
33 |         cp "$dbfile" "$dest"
34 |     fi
35 |   done
36 | fi
37 | 
38 | if [ -d /app/service/pdf_report_templates ]; then
39 |   if [ ! -d service/pdf_report_templates ]; then
40 |       mkdir -p service/pdf_report_templates
41 |   fi
42 | 
43 |   for tmpl in /app/service/pdf_report_templates/*; do
44 |       dest="service/pdf_report_templates/$(basename "$tmpl")"
45 |       if [ ! -f "$dest" ]; then
46 |           cp "$tmpl" "$dest"
47 |       fi
48 |   done
49 | fi
50 | 
51 | exec python /app/dpulse.py
52 | 


--------------------------------------------------------------------------------
/report_examples/html_report_example/ps_documents/extracted_About%20the%20HackThisSite%20Mirror.txt:
--------------------------------------------------------------------------------
 1 | -=- What is the HackThisSite Mirror? -=-
 2 | 
 3 | HackThisSite may from time to time mirror things we think are deserving of our
 4 | bandwidth and maintenance, simply because of a belief in the cause or a general
 5 | liking of the data and its producers.
 6 | 
 7 | 
 8 | -=- Will you mirror my data? -=-
 9 | 
10 | You are more than welcome to request mirroring by emailing us at
11 | admin - at - hackthissite - dot - org, but we will NOT guarantee anything!
12 | Our bandwidth and time is precious, and just because you think your data is
13 | well-deserving of our attention, does not mean we will think the same.
14 | 
15 | 
16 | -=- What do you generally mirror? -=-
17 | 
18 | HackThisSite will mirror projects we are involved with, or have a vested
19 | interest in.  For example, we are affiliated with Hackbloc, which produces
20 | the HackThisZine periodical.  Due to this, we mirror their publications.
21 | Other such examples could someday include FreeBSD, nginx, Asterisk, UnrealIRC,
22 | among others, since these are all projects whose benefits we reap.
23 | 
24 | 
25 | -=- What are your mirror specifications? -=-
26 | 
27 | mirror.hackthissite.org is hosted on a dedicated server in Europe with
28 | redundant drives and a 1 Gbps connection.
29 | 


--------------------------------------------------------------------------------
/report_examples/xlsx_report_example/ps_documents/extracted_About%20the%20HackThisSite%20Mirror.txt:
--------------------------------------------------------------------------------
 1 | -=- What is the HackThisSite Mirror? -=-
 2 | 
 3 | HackThisSite may from time to time mirror things we think are deserving of our
 4 | bandwidth and maintenance, simply because of a belief in the cause or a general
 5 | liking of the data and its producers.
 6 | 
 7 | 
 8 | -=- Will you mirror my data? -=-
 9 | 
10 | You are more than welcome to request mirroring by emailing us at
11 | admin - at - hackthissite - dot - org, but we will NOT guarantee anything!
12 | Our bandwidth and time is precious, and just because you think your data is
13 | well-deserving of our attention, does not mean we will think the same.
14 | 
15 | 
16 | -=- What do you generally mirror? -=-
17 | 
18 | HackThisSite will mirror projects we are involved with, or have a vested
19 | interest in.  For example, we are affiliated with Hackbloc, which produces
20 | the HackThisZine periodical.  Due to this, we mirror their publications.
21 | Other such examples could someday include FreeBSD, nginx, Asterisk, UnrealIRC,
22 | among others, since these are all projects whose benefits we reap.
23 | 
24 | 
25 | -=- What are your mirror specifications? -=-
26 | 
27 | mirror.hackthissite.org is hosted on a dedicated server in Europe with
28 | redundant drives and a 1 Gbps connection.
29 | 


--------------------------------------------------------------------------------
/docs/dpulse-docs/docs/basic_scan.md:
--------------------------------------------------------------------------------
 1 | # Basic scan
 2 | 
 3 | Basic scan is default and the most basic scanning mode which starts after all preparation steps. It always starts first and you can't start scanning without it.
 4 |  
 5 | ## Basic scan results
 6 | 
 7 | Basic scan returns some basic information open information about target domain, such as:
 8 | 
 9 | 1. WHOIS information which contains domain name, full URL, IP address, registrar info, creation and expiration dates, organization name and contact e-mails. 
10 | 2. Subdomains list
11 | 3. E-mail addresses list (gathered from subdomains)
12 | 4. IP addresses list (gathered from subdomains)
13 | 5. Social medias links, posts and profiles. Supported social medias are Facebook, Twitter (X.com), Instagram, Telegram, TikTok, LinkedIn, VKontakte, YouTube, Odnoklassniki, WeChat
14 | 6. DNS and SSL information. DNS information contains name servers and MX addresses. SSL certificate information contains issuer, subject, creation and expiration dates, certificate name and serial number.
15 | 7. Basic pre-pentest information such as possible vulnerabilities (CVEs), open ports and hostnames.
16 | 8. Development and deployment services and frameworks information, such as CMS, web servers, used programming languages and web frameworks, analytics services, tags and so on. 
17 | 9. Downloaded copies of sitemap.xml and robots.txt files from a domain
18 |      
19 | 


--------------------------------------------------------------------------------
/pyproject.toml:
--------------------------------------------------------------------------------
 1 | [build-system]
 2 | requires = ["poetry-core"]
 3 | build-backend = "poetry.core.masonry.api"
 4 | 
 5 | [tool.poetry]
 6 | name = "dpulse"
 7 | version = "1.4"
 8 | description = "Convenient, fast and user-friendly collector of domain information from Open-Sources"
 9 | authors = ["OSINT-TECHNOLOGIES <osint.technologies@gmail.com>"]
10 | readme = "README.md"
11 | license = "MIT"
12 | homepage = "https://github.com/OSINT-TECHNOLOGIES/dpulse"
13 | repository = "https://github.com/OSINT-TECHNOLOGIES/dpulse"
14 | classifiers = [
15 |     "Development Status :: 5 - Production/Stable",
16 |     "Programming Language :: Python :: 3",
17 |     "Intended Audience :: Information Technology",
18 |     "Operating System :: OS Independent",
19 |     "License :: OSI Approved :: MIT License",
20 |     "Natural Language :: English"
21 | ]
22 | 
23 | [tool.poetry.dependencies]
24 | python = ">=3.10,<4.0"
25 | Jinja2 = "^3.1.6"
26 | beautifulsoup4 = "^4.12.2"
27 | requests = "^2.32.4"
28 | python-whois = "^0.9.4"
29 | colorama = "^0.4.6"
30 | pyfiglet = "^1.0.2"
31 | rich = "^13.7.1"
32 | MechanicalSoup = "^1.3.0"          
33 | builtwith = "^1.3.4"
34 | dnspython = "^2.6.1"
35 | PyMuPDF = "^1.26.6"              
36 | selenium = "^4.28.1"
37 | webdriver-manager = "^4.0.2"
38 | undetected_chromedriver = "^3.5.5"
39 | 
40 | [tool.poetry.scripts]
41 | dpulse = 'dpulse.dpulse:run'
42 | 
43 | [tool.poetry.group.dev.dependencies]
44 | deptry = "^0.23.0"
45 | 


--------------------------------------------------------------------------------
/.github/workflows/python-package.yml:
--------------------------------------------------------------------------------
 1 | # This workflow will install Python dependencies, run tests and lint with a variety of Python versions
 2 | # For more information see: https://docs.github.com/en/actions/automating-builds-and-tests/building-and-testing-python
 3 | 
 4 | name: Python package
 5 | 
 6 | on:
 7 |   push:
 8 |     branches: [ "main" ]
 9 |   pull_request:
10 |     branches: [ "main" ]
11 | 
12 | jobs:
13 |   build:
14 | 
15 |     runs-on: ubuntu-latest
16 |     strategy:
17 |       fail-fast: false
18 |       matrix:
19 |         python-version: ["3.9", "3.10", "3.11", "3.12"]
20 | 
21 |     steps:
22 |     - uses: actions/checkout@v4
23 |     - name: Set up Python ${{ matrix.python-version }}
24 |       uses: actions/setup-python@v3
25 |       with:
26 |         python-version: ${{ matrix.python-version }}
27 |     - name: Install dependencies
28 |       run: |
29 |         python -m pip install --upgrade pip
30 |         python -m pip install flake8 pytest
31 |         if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
32 |     - name: Lint with flake8
33 |       run: |
34 |         # stop the build if there are Python syntax errors or undefined names
35 |         flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics
36 |         # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
37 |         flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
38 |     - name: Test with pytest
39 |       run: |
40 |         pytest
41 | 


--------------------------------------------------------------------------------
/service/files_processing.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | 
 3 | def get_blob(file):
 4 |     with open(file, 'rb') as report_file:
 5 |         blob_data = report_file.read()
 6 |     return blob_data
 7 | 
 8 | def find_files(filename):
 9 |     root_directory = os.getcwd()
10 |     for root, dirs, files in os.walk(root_directory):
11 |         if filename in files:
12 |             return os.path.join(root, filename)
13 |     return None
14 | 
15 | def get_db_columns(report_folder):
16 |     try:
17 |         with open(report_folder + "//" + '01-robots.txt', 'r') as robots_file:
18 |             robots_content = robots_file.read()
19 |     except:
20 |         robots_content = 'robots.txt: empty'
21 |         pass
22 |     try:
23 |         with open(report_folder + "//" + '02-sitemap.txt', 'r') as sitemap_xml:
24 |             sitemap_content = sitemap_xml.read()
25 |     except:
26 |         sitemap_content = 'sitemap.xml: empty'
27 |         pass
28 |     try:
29 |         with open(report_folder + "//" + '03-sitemap_links.txt', 'r') as sitemap_links:
30 |             sitemap_links_content = sitemap_links.read()
31 |     except:
32 |         sitemap_links_content = 'Sitemap links: empty'
33 |         pass
34 |     try:
35 |         with open(report_folder + "//" + '04-dorking_results.txt', 'r') as dorking_file:
36 |             dorking_content = dorking_file.read()
37 |     except:
38 |         dorking_content = 'Dorking content: empty'
39 |         pass
40 |     return robots_content, sitemap_content, sitemap_links_content, dorking_content
41 | 


--------------------------------------------------------------------------------
/docs/dpulse-docs/docs/logging.md:
--------------------------------------------------------------------------------
 1 | # About logging
 2 | 
 3 | Logging is a way to record events and messages that occur during the execution of a program, which helps in debugging and monitoring the application's behavior. Since DPULSE is written on Python, it uses in-built ***logging*** method.
 4 | 
 5 | ## Levels of logging
 6 | 
 7 | There are five built-in levels of the log message which are supported in DPULSE:  
 8 | 
 9 | - Debug (used to give Detailed information, typically of interest only when diagnosing problems)
10 | - Info (used to confirm that things are working as expected)
11 | - Warning (used as an indication that something unexpected happened, or is indicative of some problem in the near future)
12 | - Error (tells that due to a more serious problem, the software has not been able to perform some function)
13 | - Critical (tells serious error, indicating that the program itself may be unable to continue running)
14 | 
15 | You can use these levels according to your needs by editing the configuration file. You can read more about changing configuration parameters in "Configuration file" paragraph.
16 | 
17 | ## How it looks like in practice
18 | 
19 | In DPULSE, first creation of logging file (journal.log) happens with first DPULSE start, and first strings in log file will appear with first scan. Standard string in this file contains date (YYYY-MM-DD format), time (HH:MM:SS, MS format), level of config, process name and its status (additionaly string contains full error if status was bad). Also DPULSE separates log info for each scan with STARTS HERE and ENDS HERE lines. Content of log file looks like that:
20 | 
21 | ![logging](https://github.com/user-attachments/assets/50acae24-f024-4793-8b45-9d7e284329a6)
22 | 
23 | ![logging2](https://github.com/user-attachments/assets/b27f8a93-115d-49ad-bf1b-c7f72613de9d)
24 | 


--------------------------------------------------------------------------------
/docs/dpulse-docs/docs/snapshotting.md:
--------------------------------------------------------------------------------
 1 | # Snapshotting and screenshotting
 2 | 
 3 | A website snapshot is a representation of a website at a specific point in time. Unlike a visual representation, a snapshot encapsulates the user interface elements, allowing you to open and navigate the website online or offline at a later date. Screenshots, on the other hand, lack this capacity for interactive navigation and are limited to visual inspection alone. In other words, it’s a capture of a device's point of view at a specific moment. DPULSE supports both of these methods to provide full capability for capturing target's contents. You will be prompted to select snapshotting mode during pre-scan interview:
 4 | 
 5 | ![snap](https://github.com/user-attachments/assets/c24d297d-d52e-45e1-9770-97229abcc2ce)
 6 | 
 7 | ## Screenshotting 
 8 | 
 9 | Screenshotting, as it says, is basically a process of taking screenshot of domain page. It uses selenium library and its headless browser mode in order to take screenshot. It is crucial to configrate its parameters correctly (read "Configuration file" paragraph, "Config file content" section). After scan ends, you will find screenshot of domain's main page in scan folder.
10 | 
11 | ## Snapshotting: Web copy and Wayback Archive
12 | 
13 | There are two ways to make a snapshot of target domain using DPULSE. First way is a common snapshot: it saves web-page's copy as a HTML file, so it is fully interactive and contains all web elements like HTML code, DOM structure and so on. Second way is Wayback Archive snapshot. It uses Wayback Machine API in order to get all copies of a website within a certain period of time specified by user like shown below:
14 | 
15 | ![snap1](https://github.com/user-attachments/assets/dd82a133-95a8-4fa4-9dc7-ed18d2768d16)
16 | 
17 | After scan ends, you will find snapshots of domain's main page in scan folder (in case of Wayback snapshot, there'll be additional folder to store all snapshots).
18 | 


--------------------------------------------------------------------------------
/docs/dpulse-docs/docs/index.md:
--------------------------------------------------------------------------------
 1 | # Welcome to DPULSE documentation
 2 | 
 3 | Convenient, fast and user-friendly collector of domain information from open-sources
 4 |  
 5 | ## What is DPULSE
 6 | 
 7 | DPULSE is a software solution for conducting OSINT research in relation to a certain domain. In general, it provides you with a certain set of functions, such as:
 8 | 
 9 | 1. ***Basic scan:*** extracts general information about domain such as WHOIS information, subdomains, e-mail addresses, IP addresses, social medias links/posts/profiles, SSL certificate info, possible vulnerabilities, open ports, CPEs, used web-technologies and so on. It also can download sitemap.xml and robots.txt files from a domain
10 |      
11 | 2. ***PageSearch scan:*** extended subdomains deep search function, which starts in addition to basic scan and which can find more e-mail addresses, API keys, exposed passwords, cookies, hidden forms of data and other web page elements, documents, config files, databases files (and PageSearch can download them!), specified words by user in PDF files
12 | 
13 | 3. ***Dorking scan:*** extended domain research function with prepared Google Dorking databases for different purposes, such as IoT dorking, files dorking, admin panels dorking, web elements dorking. Moreover, this mode allows you to create your own custom Google Dorking database
14 | 
15 | 4. ***API scan:*** extended domain research function with prepared functions for 3rd party APIs usage. Currently DPULSE supports VirusTotal API (for brief domain information gathering) and SecurityTrails API (deep subdomains and DNS enumeration)
16 | 
17 | 5. ***Snapshotting:*** extended domain research function which copys domain's home page content in various ways: by screenshotting it (screenshot snapshot), by HTML copying (HTML snapshot) and by finding and downloading previous versions (Wayback Machine snapshot)
18 | 
19 | Finally, DPULSE compiles all found data into an easy-to-read HTML or XLSX report by category. It also saves all information about scan in local report storage database, which can be restored later.
20 | 


--------------------------------------------------------------------------------
/dorking/proxies_rotator.py:
--------------------------------------------------------------------------------
 1 | import random
 2 | import sys
 3 | sys.path.append('service')
 4 | from config_processing import read_config
 5 | from colorama import Fore, Style
 6 | import requests
 7 | from requests.exceptions import ProxyError, ConnectionError, Timeout
 8 | 
 9 | class ProxiesRotator:
10 |     def __init__(self):
11 |         config_values = read_config()
12 |         self.proxy_file_path = str(config_values['proxies_file_path'])
13 | 
14 |     def check_proxies(self, proxies_list):
15 |         working_proxies = []
16 |         print(Fore.GREEN + f'Checking {len(proxies_list)} proxies, please wait...' + Style.RESET_ALL)
17 |         for proxy in proxies_list:
18 |             proxies = {
19 |                 "http": proxy
20 |             }
21 |             try:
22 |                 response = requests.get('https://google.com', proxies=proxies, timeout=5)
23 |                 if response.status_code == 200:
24 |                     working_proxies.append(proxy)
25 |                     #print(Fore.GREEN + f"Proxy {proxy} is working" + Style.RESET_ALL)
26 |                 else:
27 |                     pass
28 |                     #print(Fore.GREEN +f"Proxy {proxy} returned status code {response.status_code}" + Style.RESET_ALL)
29 |             except (ProxyError, ConnectionError, Timeout):
30 |                 pass
31 |                 #print(Fore.GREEN + f"Proxy {proxy} is not working" + Style.RESET_ALL)
32 |         print(Fore.GREEN + f'Found {len(working_proxies)} working proxies' + Style.RESET_ALL)
33 |         return working_proxies
34 | 
35 |     def get_proxies(self):
36 |         if self.proxy_file_path == 'NONE':
37 |             print(Fore.RED + "Path to file with proxies was not set in config file. Proxification of Google Dorking won't be applied\n" + Style.RESET_ALL)
38 |             return 0, ""
39 |         else:
40 |             with open(self.proxy_file_path, 'r') as f:
41 |                 print(Fore.GREEN + 'Found path to get proxies from. Continuation' + Style.RESET_ALL)
42 |                 proxies_list = [proxy.strip() for proxy in f]
43 |             return 1, proxies_list
44 | 
45 |     def get_random_proxy(self, proxies_list):
46 |         print(Fore.GREEN + "Set proxy to " + Style.RESET_ALL + Fore.LIGHTCYAN_EX + Style.BRIGHT + f"{random.choice(proxies_list)}" + Style.RESET_ALL)
47 |         return random.choice(proxies_list)
48 | 
49 | proxies_rotator = ProxiesRotator()
50 | 


--------------------------------------------------------------------------------
/dorking/db_creator.py:
--------------------------------------------------------------------------------
 1 | import sqlite3
 2 | from colorama import Fore
 3 | import os
 4 | 
 5 | def manage_dorks(db_name):
 6 |     db_prep_string = str(db_name) + '.db'
 7 |     if os.path.exists('dorking//' + db_prep_string):
 8 |         print(Fore.RED + f"Sorry, but {db_prep_string} database is already exists. Choose other name for your custom DB")
 9 |         pass
10 |     else:
11 |         conn = sqlite3.connect('dorking//' + str(db_prep_string))
12 |         cursor = conn.cursor()
13 | 
14 |         cursor.execute('''
15 |         CREATE TABLE IF NOT EXISTS dorks (
16 |             dork_id INTEGER PRIMARY KEY,
17 |             dork TEXT NOT NULL
18 |         )
19 |         ''')
20 |         conn.commit()
21 | 
22 |         def add_dork(dork_id, dork):
23 |             try:
24 |                 cursor.execute('INSERT INTO dorks (dork_id, dork) VALUES (?, ?)', (dork_id, dork))
25 |                 conn.commit()
26 |                 print(Fore.GREEN + "Successfully added new dork")
27 |             except sqlite3.IntegrityError:
28 |                 print(Fore.RED + "Attention, dork_id variable must be unique")
29 | 
30 |         while True:
31 |             dork_id = input(Fore.YELLOW + "Enter dork_id (or 'q' to quit this mode and save changes) >> ")
32 |             if dork_id.lower() == 'q':
33 |                 break
34 |             dork = input(Fore.YELLOW + "Enter new dork >> ")
35 |             add_dork(int(dork_id), dork)
36 |         conn.close()
37 | 
38 | def get_dorking_query(short_domain, dorking_db_path, table):
39 |     print(Fore.GREEN + "Getting dorking query from database")
40 |     try:
41 |         conn = sqlite3.connect(dorking_db_path)
42 |         cursor = conn.cursor()
43 |         cursor.execute(f"SELECT dork FROM {table}")
44 |         rows = cursor.fetchall()
45 |         search_query = [row[0].format(short_domain) for row in rows]
46 |         conn.close()
47 |         return search_query
48 |     except Exception as e:
49 |         print(Fore.RED + f"Error getting dorking query: {e}")
50 |         return []
51 |         pass
52 | 
53 | def get_columns_amount(dorking_db_path, table):
54 |     try:
55 |         conn = sqlite3.connect(dorking_db_path)
56 |         cursor = conn.cursor()
57 |         cursor.execute(f"SELECT COUNT(*) FROM {table}")
58 |         row_count = cursor.fetchone()[0]
59 |     except Exception as e:
60 |         print(f"Error getting column count: {e}")
61 |         return None
62 |     finally:
63 |         conn.close()
64 |     return row_count
65 | 


--------------------------------------------------------------------------------
/snapshotting/screen_snapshotting.py:
--------------------------------------------------------------------------------
 1 | from selenium import webdriver
 2 | from selenium.webdriver.chrome.service import Service as ChromeService
 3 | from selenium.webdriver.firefox.service import Service as FirefoxService
 4 | from selenium.webdriver.edge.service import Service as EdgeService
 5 | from webdriver_manager.chrome import ChromeDriverManager
 6 | from webdriver_manager.firefox import GeckoDriverManager
 7 | from webdriver_manager.microsoft import EdgeChromiumDriverManager
 8 | from selenium.webdriver.chrome.options import Options as ChromeOptions
 9 | from selenium.webdriver.firefox.options import Options as FirefoxOptions
10 | from selenium.webdriver.edge.options import Options as EdgeOptions
11 | from colorama import Fore, Style
12 | import sys
13 | sys.path.append('snapshotting')
14 | 
15 | def setup_driver(browser_name):
16 |     if browser_name == "chrome":
17 |         service = ChromeService(ChromeDriverManager().install())
18 |         options = ChromeOptions()
19 |         options.add_argument('--headless=new')
20 |         driver = webdriver.Chrome(service=service, options=options)
21 | 
22 |     elif browser_name == "firefox":
23 |         service = FirefoxService(GeckoDriverManager().install())
24 |         options = FirefoxOptions()
25 |         options.add_argument('-headless')
26 |         driver = webdriver.Firefox(service=service, options=options)
27 | 
28 |     elif browser_name == "edge":
29 |         service = EdgeService(EdgeChromiumDriverManager().install())
30 |         options = EdgeOptions()
31 |         options.add_argument('--headless=new')
32 |         driver = webdriver.Edge(service=service, options=options)
33 | 
34 |     elif browser_name == "safari":
35 |         options = webdriver.SafariOptions()
36 |         driver = webdriver.Safari(options=options)
37 | 
38 |     elif browser_name == "opera":
39 |         from config_processing import read_config
40 |         config_values = read_config()
41 |         service = ChromeService(ChromeDriverManager().install())
42 |         options = ChromeOptions()
43 |         options.add_argument('--headless=new')
44 |         options.binary_location = config_values['opera_browser_path']
45 |         driver = webdriver.Chrome(service=service, options=options)
46 |     else:
47 |         raise ValueError("Unsupported browser")
48 |     driver.set_window_size(1920, 1080)
49 |     return driver
50 | 
51 | def take_screenshot(browser_name, url, screenshot_path):
52 |     try:
53 |         print(Fore.GREEN + f"Starting {browser_name} browser in headless mode..." + Style.RESET_ALL)
54 |         driver = setup_driver(browser_name)
55 |         print(Fore.GREEN + f"Going to {url}" + Style.RESET_ALL)
56 |         driver.get(url)
57 |         print(Fore.GREEN + "Taking screenshot..." + Style.RESET_ALL)
58 |         driver.save_screenshot(screenshot_path)
59 |         driver.quit()
60 |         print(Fore.GREEN + f"Screenshot successfully saved in report folder" + Style.RESET_ALL)
61 |     except Exception as e:
62 |         print(Fore.RED + f"Error appeared: {str(e)}" + Style.RESET_ALL)
63 |         if 'driver' in locals():
64 |             driver.quit()
65 | 


--------------------------------------------------------------------------------
/docs/dpulse-docs/docs/api.md:
--------------------------------------------------------------------------------
 1 | # Third-party API scan mode
 2 | 
 3 | Currently DPULSE supports two third-party APIs: 
 4 | 
 5 | * SecurityTrails API (securitytrails.com) for deep subdomains and DNS enumeration (this API requires key)
 6 | * VirusTotal API (virustotal.com) for brief domain information gathering (this API requires key)
 7 | * HudsonRock API (hudsonrock.com) for querying domain through a database of over 30,821,440 computers which were compromised through global info-stealer campaigns performed by threat actors (this API does not require key)
 8 | 
 9 | ## SecurityTrails API (key required)
10 | 
11 | SecurityTrails API is used to gather information about a specified domain. It retrieves various types of DNS records, subdomains, and other details. SecurityTrails API in DPULSE returns these details about target domain:
12 | 
13 | * Alexa rank
14 | * Apex domain
15 | * Hostname
16 | * A/MX/NS/SOA/TXT records
17 | * All subdomains list
18 | * Alive (pingable) subdomains list
19 | 
20 | ## VirusTotal API (key required)
21 | 
22 | VirusTotal API is used to interact with the VirusTotal service programmatically and analyze files and URLs using multiple antivirus engines and website scanners, providing insights into whether they are malicious. VirusTotal API in DPULSE returns these details about target domain:
23 | 
24 | * Categories
25 | * Detected samples
26 | * Undetected samples
27 | * Detected URLs
28 | 
29 | ## HudsonRock API (no key required)
30 | 
31 | HudsonRock Cavalier API is based on forensic technologies and operational knowhow developed at the IDF’s 8200 Unit to counter nation-state adversaries and professional threat-actors. It is a unique cybercrime intelligence data source composed of millions of machines compromised in global malware spreading campaigns. 
32 | 
33 | ## API Keys database
34 | 
35 | In order to ensure the functioning of API services individually for each DPULSE user, API keys storage database was created. Similar to report storage database, it is lightweight .db extension database with simple structure shown below: 
36 | 
37 | ![apistordb](https://github.com/user-attachments/assets/02233813-781e-4bf8-be7c-76ec7627be06)
38 | 
39 | Since every API key is individual for each user, you can see fillers instead of actual keys when you start DPULSE for the first time, so until you replace filler with a real API key, you can't start using API in scans. You can enter your actual API keys using DPULSE CLI. You can see full process on the screenshot below:
40 | 
41 | ![apiproc](https://github.com/user-attachments/assets/effb27ab-dd4b-4470-b90c-34c6f9a43d8c)
42 | 
43 | For the first time you will see red-colored API key field, which means that scan is not available with this API. After changing filler for actual key, you will see that color changed, which indicates that you can use your API key for scanning. Be advised that every free API service provided with some limitations (you can see them in DPULSE CLI for all supported API), so keep in mind that frequent usage of free API plans is not possible. 
44 | 
45 | In case if you want to fully replace API keys, you can use reference API keys database. You can see menu point for this action on the screenshot above. This action will delete your actual API keys database, copy reference database and rename it. This action is very optional because you can change your API keys by just using first menu point in API Keys DB Manager (according to the screenshot above)
46 | 
47 | 


--------------------------------------------------------------------------------
/snapshotting/archive_snapshotting.py:
--------------------------------------------------------------------------------
 1 | import requests
 2 | import os
 3 | import time
 4 | from colorama import Fore, Style
 5 | import sys
 6 | from config_processing import read_config
 7 | 
 8 | sys.path.append('service')
 9 | CDX_API = "http://web.archive.org/cdx/search/cdx"
10 | 
11 | def get_values_from_config():
12 |     config_values = read_config()
13 |     retries = int(config_values['wayback_retries_amount'])
14 |     pause_between_requests = int(config_values['wayback_requests_pause'])
15 |     return retries, pause_between_requests
16 | 
17 | def get_snapshots(url, from_date, to_date):
18 |     params = {
19 |         "url": url,
20 |         "from": from_date,
21 |         "to": to_date,
22 |         "output": "json",
23 |         "fl": "timestamp,original,mime",
24 |         "filter": "statuscode:200",
25 |         "collapse": "digest"
26 |     }
27 |     print(Fore.GREEN + f"Sending request to Wayback CDX API for {url}, period: {from_date} - {to_date}..." + Style.RESET_ALL)
28 |     response = requests.get(CDX_API, params=params)
29 |     response.raise_for_status()
30 |     data = response.json()
31 |     return data[1:]
32 | 
33 | def snapshot_enum(snapshot_storage_folder, timestamp, original_url, index):
34 |     retries, _ = get_values_from_config()
35 |     archive_url = f"https://web.archive.org/web/{timestamp}id_/{original_url}"
36 |     for attempt in range(1, retries + 1):
37 |         try:
38 |             response = requests.get(archive_url, timeout=15)
39 |             response.raise_for_status()
40 |             filename = f"{index}_{timestamp}.html"
41 |             filepath = os.path.join(snapshot_storage_folder, filename)
42 |             with open(filepath, "w", encoding="utf-8") as f:
43 |                 f.write(response.text)
44 |             print(Fore.GREEN + f"[{index}] Downloaded: " + Fore.LIGHTCYAN_EX + Style.BRIGHT + f"{archive_url}" + Style.RESET_ALL)
45 |             return True
46 |         except Exception as e:
47 |             print(Fore.RED + f"[{index}] Attempt {attempt}/{retries} failed for {archive_url}. Retrying..." + Style.RESET_ALL)
48 |             time.sleep(2)
49 |     print(Fore.RED + f"[{index}] Failed to download after {retries} attempts: {archive_url}" + Style.RESET_ALL)
50 |     return False
51 | 
52 | def download_snapshot(short_domain, from_date, end_date, report_folder):
53 |     _, pause_between_requests = get_values_from_config()
54 |     snapshot_storage_folder = report_folder + '//wayback_snapshots'
55 |     os.makedirs(snapshot_storage_folder, exist_ok=True)
56 |     snapshots = get_snapshots(short_domain, from_date, end_date)
57 |     print(Fore.GREEN + "Total snapshots found:" + Style.RESET_ALL + Fore.LIGHTCYAN_EX + Style.BRIGHT + f" {len(snapshots)}" + Style.RESET_ALL)
58 |     html_snapshots = [
59 |         s for s in snapshots
60 |         if len(s) >= 2 and (
61 |             s[1].endswith(".html") or s[1].endswith("/") or s[1] == short_domain)
62 |     ]
63 |     print(Fore.GREEN + "HTML snapshots to download:" + Style.RESET_ALL + Fore.LIGHTCYAN_EX + Style.BRIGHT + f" {len(html_snapshots)}\n" + Style.RESET_ALL)
64 |     if not html_snapshots:
65 |         print(Fore.RED + "No HTML snapshots available for download." + Style.RESET_ALL)
66 |         return
67 |     for i, (timestamp, original_url, *_) in enumerate(html_snapshots):
68 |         snapshot_enum(snapshot_storage_folder, timestamp, original_url, i+1)
69 |         time.sleep(pause_between_requests)
70 |     print(Fore.GREEN + "\nFinished downloading HTML snapshots" + Style.RESET_ALL)
71 | 


--------------------------------------------------------------------------------
/.github/workflows/codeql.yml:
--------------------------------------------------------------------------------
 1 | # For most projects, this workflow file will not need changing; you simply need
 2 | # to commit it to your repository.
 3 | #
 4 | # You may wish to alter this file to override the set of languages analyzed,
 5 | # or to provide custom queries or build logic.
 6 | #
 7 | # ******** NOTE ********
 8 | # We have attempted to detect the languages in your repository. Please check
 9 | # the `language` matrix defined below to confirm you have the correct set of
10 | # supported CodeQL languages.
11 | #
12 | name: "CodeQL"
13 | 
14 | on:
15 |   push:
16 |     branches: [ "main" ]
17 |   pull_request:
18 |     branches: [ "main" ]
19 |   schedule:
20 |     - cron: '21 1 * * 0'
21 | 
22 | jobs:
23 |   analyze:
24 |     name: Analyze (${{ matrix.language }})
25 |     # Runner size impacts CodeQL analysis time. To learn more, please see:
26 |     #   - https://gh.io/recommended-hardware-resources-for-running-codeql
27 |     #   - https://gh.io/supported-runners-and-hardware-resources
28 |     #   - https://gh.io/using-larger-runners (GitHub.com only)
29 |     # Consider using larger runners or machines with greater resources for possible analysis time improvements.
30 |     runs-on: ${{ (matrix.language == 'swift' && 'macos-latest') || 'ubuntu-latest' }}
31 |     timeout-minutes: ${{ (matrix.language == 'swift' && 120) || 360 }}
32 |     permissions:
33 |       # required for all workflows
34 |       security-events: write
35 | 
36 |       # required to fetch internal or private CodeQL packs
37 |       packages: read
38 | 
39 |       # only required for workflows in private repositories
40 |       actions: read
41 |       contents: read
42 | 
43 |     strategy:
44 |       fail-fast: false
45 |       matrix:
46 |         include:
47 |         - language: python
48 |           build-mode: none
49 |         # CodeQL supports the following values keywords for 'language': 'c-cpp', 'csharp', 'go', 'java-kotlin', 'javascript-typescript', 'python', 'ruby', 'swift'
50 |         # Use `c-cpp` to analyze code written in C, C++ or both
51 |         # Use 'java-kotlin' to analyze code written in Java, Kotlin or both
52 |         # Use 'javascript-typescript' to analyze code written in JavaScript, TypeScript or both
53 |         # To learn more about changing the languages that are analyzed or customizing the build mode for your analysis,
54 |         # see https://docs.github.com/en/code-security/code-scanning/creating-an-advanced-setup-for-code-scanning/customizing-your-advanced-setup-for-code-scanning.
55 |         # If you are analyzing a compiled language, you can modify the 'build-mode' for that language to customize how
56 |         # your codebase is analyzed, see https://docs.github.com/en/code-security/code-scanning/creating-an-advanced-setup-for-code-scanning/codeql-code-scanning-for-compiled-languages
57 |     steps:
58 |     - name: Checkout repository
59 |       uses: actions/checkout@v4
60 | 
61 |     # Initializes the CodeQL tools for scanning.
62 |     - name: Initialize CodeQL
63 |       uses: github/codeql-action/init@v3
64 |       with:
65 |         languages: ${{ matrix.language }}
66 |         build-mode: ${{ matrix.build-mode }}
67 |         # If you wish to specify custom queries, you can do so here or in a config file.
68 |         # By default, queries listed here will override any specified in a config file.
69 |         # Prefix the list here with "+" to use these queries and those in the config file.
70 | 
71 |         # For more details on CodeQL's query packs, refer to: https://docs.github.com/en/code-security/code-scanning/automatically-scanning-your-code-for-vulnerabilities-and-errors/configuring-code-scanning#using-queries-in-ql-packs
72 |         # queries: security-extended,security-and-quality
73 | 
74 |     # If the analyze step fails for one of the languages you are analyzing with
75 |     # "We were unable to automatically build your code", modify the matrix above
76 |     # to set the build mode to "manual" for that language. Then modify this step
77 |     # to build your code.
78 |     # ℹ️ Command-line programs to run using the OS shell.
79 |     # 📚 See https://docs.github.com/en/actions/using-workflows/workflow-syntax-for-github-actions#jobsjob_idstepsrun
80 |     - if: matrix.build-mode == 'manual'
81 |       shell: bash
82 |       run: |
83 |         echo 'If you are using a "manual" build mode for one or more of the' \
84 |           'languages you are analyzing, replace this with the commands to build' \
85 |           'your code, for example:'
86 |         echo '  make bootstrap'
87 |         echo '  make release'
88 |         exit 1
89 | 
90 |     - name: Perform CodeQL Analysis
91 |       uses: github/codeql-action/analyze@v3
92 |       with:
93 |         category: "/language:${{matrix.language}}"
94 | 


--------------------------------------------------------------------------------
/apis/api_virustotal.py:
--------------------------------------------------------------------------------
 1 | from datetime import datetime
 2 | import requests
 3 | import sqlite3
 4 | from colorama import Fore, Style
 5 | import re
 6 | 
 7 | def virustotal_html_prep(formatted_output):
 8 |     formatted_output = re.sub(r'\x1b\[([0-9,A-Z]{1,2}(;[0-9]{1,2})?(;[0-9]{3})?)?[m|K]?', '', formatted_output)
 9 |     start_marker = "=== VIRUSTOTAL API REPORT ==="
10 |     end_marker = "[+] Domain Information:"
11 |     start_index = formatted_output.find(start_marker)
12 |     end_index = formatted_output.find(end_marker)
13 |     if start_index != -1 and end_index != -1:
14 |         formatted_output = formatted_output[:start_index] + formatted_output[end_index:]
15 |     return formatted_output
16 | 
17 | def check_domain(domain, api_key):
18 |     api_key = api_key.strip()
19 |     api_key = re.sub(r'[\s\u200B\uFEFF]+', '', api_key)
20 | 
21 |     url = f"https://www.virustotal.com/api/v3/domains/{domain}"
22 |     headers = {
23 |         "x-apikey": api_key
24 |     }
25 |     response = requests.get(url, headers=headers)
26 | 
27 |     try:
28 |         result = response.json()
29 |         formatted_output = Fore.LIGHTBLUE_EX + "\n=== VIRUSTOTAL API REPORT ===\n" + Style.RESET_ALL
30 |         formatted_output += f"\n{Fore.LIGHTBLUE_EX}[+] Domain Information:{Style.RESET_ALL}\n"
31 |         formatted_output += f"{Fore.GREEN}Domain:{Style.RESET_ALL} {Fore.LIGHTCYAN_EX}{domain}{Style.RESET_ALL}\n"
32 |         formatted_output += f"{Fore.GREEN}Creation Date:{Style.RESET_ALL} {Fore.LIGHTCYAN_EX}{datetime.fromtimestamp(result['data']['attributes']['creation_date']).strftime('%Y-%m-%d')}{Style.RESET_ALL}\n"
33 |         formatted_output += f"{Fore.GREEN}Last Update:{Style.RESET_ALL} {Fore.LIGHTCYAN_EX}{datetime.fromtimestamp(result['data']['attributes']['last_update_date']).strftime('%Y-%m-%d')}{Style.RESET_ALL}\n"
34 |         formatted_output += f"\n{Fore.LIGHTBLUE_EX}[+] DNS Records:{Style.RESET_ALL}\n"
35 |         for record in result['data']['attributes']['last_dns_records']:
36 |             formatted_output += f"{Fore.GREEN}Type:{Style.RESET_ALL} {Fore.LIGHTCYAN_EX}{record['type']:<6}{Style.RESET_ALL} "
37 |             formatted_output += f"{Fore.GREEN}TTL:{Style.RESET_ALL} {Fore.LIGHTCYAN_EX}{record['ttl']:<6}{Style.RESET_ALL} "
38 |             formatted_output += f"{Fore.GREEN}Value:{Style.RESET_ALL} {Fore.LIGHTCYAN_EX}{record['value']}{Style.RESET_ALL}\n"
39 |         formatted_output += f"\n{Fore.LIGHTBLUE_EX}[+] Categories:{Style.RESET_ALL}\n"
40 |         for vendor, category in result['data']['attributes']['categories'].items():
41 |             formatted_output += f"{Fore.GREEN}{vendor:<25}:{Style.RESET_ALL} {Fore.LIGHTCYAN_EX}{category}{Style.RESET_ALL}\n"
42 |         formatted_output += f"\n{Fore.LIGHTBLUE_EX}[+] Analysis Stats:{Style.RESET_ALL}\n"
43 |         stats = result['data']['attributes']['last_analysis_stats']
44 |         formatted_output += f"{Fore.GREEN}Harmless:{Style.RESET_ALL} {Fore.LIGHTCYAN_EX}{stats['harmless']}{Style.RESET_ALL}\n"
45 |         formatted_output += f"{Fore.GREEN}Malicious:{Style.RESET_ALL} {Fore.LIGHTCYAN_EX}{stats['malicious']}{Style.RESET_ALL}\n"
46 |         formatted_output += f"{Fore.GREEN}Suspicious:{Style.RESET_ALL} {Fore.LIGHTCYAN_EX}{stats['suspicious']}{Style.RESET_ALL}\n"
47 |         formatted_output += f"{Fore.GREEN}Undetected:{Style.RESET_ALL} {Fore.LIGHTCYAN_EX}{stats['undetected']}{Style.RESET_ALL}\n"
48 |         formatted_output += f"\n{Fore.LIGHTBLUE_EX}[+] Detailed Analysis Results:{Style.RESET_ALL}\n"
49 |         results = result['data']['attributes']['last_analysis_results']
50 |         categories = {'harmless': [], 'malicious': [], 'suspicious': [], 'undetected': []}
51 |         for engine, data in results.items():
52 |             categories[data['category']].append(engine)
53 |         for category, engines in categories.items():
54 |             if engines:
55 |                 formatted_output += f"\n{Fore.GREEN}{category.title()} ({len(engines)}):{Style.RESET_ALL}\n"
56 |                 for engine in sorted(engines):
57 |                     formatted_output += f"{Fore.LIGHTCYAN_EX}- {engine}{Style.RESET_ALL}\n"
58 |         print(formatted_output)
59 |         return formatted_output
60 |     except Exception as e:
61 |         formatted_output = Fore.RED + f"Error while parsing JSON: {e}" + Style.RESET_ALL
62 |         print(formatted_output)
63 |         return None
64 | 
65 | def api_virustotal_check(domain):
66 |     conn = sqlite3.connect('apis//api_keys.db')
67 |     cursor = conn.cursor()
68 |     cursor.execute("SELECT api_name, api_key FROM api_keys")
69 |     rows = cursor.fetchall()
70 |     api_key = None
71 |     for row in rows:
72 |         api_name, key = row
73 |         if api_name == 'VirusTotal':
74 |             api_key = key
75 |             print(Fore.GREEN + 'Got VirusTotal API key. Starting VirusTotal scan...')
76 |             break
77 |     if not api_key:
78 |         print(Fore.RED + "VirusTutal API key was not found.")
79 |         conn.close()
80 |         return None
81 | 
82 |     formatted_output = check_domain(domain, api_key)
83 |     return formatted_output
84 | 


--------------------------------------------------------------------------------
/docs/dpulse-docs/docs/reporting.md:
--------------------------------------------------------------------------------
 1 | # About reporting system
 2 | 
 3 | DPULSE as every OSINT tool is highly dependent on reporting system. User-friendly reports are crucial for detailed results presentation and further storage. DPULSE supports the most common types of reports: HTML and XLSX. Moreover, DPULSE provides you with reports storage database, which contains some information about scan, report and extracted data and gives you opportunity for long-term reports storage in one place. Also you can move this database between different DPULSE versions, which brings a little bit better user-experience.
 4 | 
 5 | ## HTML report
 6 | 
 7 | HTML report was the first supported type of report. HTML is a widely supported format that can be opened in any web browser, allowing for the creation of visually appealing reports using tables, charts, diagrams, and other elements. It supports links and hyperlinks that can be used to create navigation within the report and link to external resources, and enables creating dynamic content that can be updated in real-time. HTML is often used in web applications, making it easy to integrate reports with existing web systems. And, in general, this report format is more user-friendly, which makes it more convenient for sharing with investigation customers, OSINT teams and usage in presentations. Moreover, unlike PDF report generation, HTML is easier to handle when both developing and delievering, as it doesn't require to install 3rd party applications (like wkhtmltopdf). You can see example of DPULSE generated HTML report [here](https://github.com/OSINT-TECHNOLOGIES/dpulse/tree/rolling/report_examples/html_report_example).
 8 | 
 9 | ## XLSX report
10 | 
11 | XLSX is a widely supported format that can be opened in most spreadsheet and office applications, including Microsoft Excel, Google Sheets, and LibreOffice Calc, making it easy to analyze and process data. It allows storing data in a structured format, supports formulas and functions that can be used to automate calculations and data analysis, and enables creating charts and diagrams to visualize data. Additionally, XLSX is often used in business applications, making it easy to integrate reports with existing systems. You can see example of DPULSE generated XLSX report [here](https://github.com/OSINT-TECHNOLOGIES/dpulse/tree/rolling/report_examples/xlsx_report_example).
12 | 
13 | ## Side files
14 | 
15 | As you may have noticed in report examples on GitHub page, there are also some side files except for report file. These files may be the following:
16 | 
17 | * target's robots.txt file (if accessible) 
18 | * target's sitemap.xml file (if accessible)
19 | * ps_documents folder with extracted documents from domain and its subdomains (if PageSearch was selected for scan)
20 | 
21 | ## Report storage database
22 | 
23 | As said above, report storage database contains key information about scan, report and extracted data. DPULSE generates this database when DPULSE is first launched or if database file was not found in the root directory, so users don't need to worry about it's manual creation. Report storage database is a simple .db file (with hard-coded report_storage.db name) with structure which shown below:
24 | 
25 | ![rsdbstr](https://github.com/user-attachments/assets/491d1147-78ca-47a8-a405-5e351dc2730e)
26 | 
27 | Lets describe these fields in more detailed way:
28 | 
29 | * id - integer value that displays the number of reports generated and the order in which they are generated
30 | * report_file_extension - string which shows main report file extension, in current DPULSE version this value could be xlsx, pdf or html
31 | * report_content - BLOB or HTML data which contains main report file's copy
32 | * comment - string which shows comment to your cases, which you can enter before each scan
33 | * target - string which shows domain which you've scanned
34 | * creation_date - string which shows when your report was generated (YYYYMMDD format)
35 | * dorks_results - text array which contains a copy of Google Dorking results (if this mode was selected before scan)
36 | * robots_text - text array which contains a copy of robots.txt file from scanned domain
37 | * sitemap_text - text array which contains all sitemap.xml links file from scanned domain
38 | * sitemap_file - text array which contains a copy of sitemap.xml file from scanned domain
39 | * api_scan - string which indicates whether API scanning was activated or not, and if it was activated - contains used APIs
40 | 
41 | Interacting with report storage database is a very simple process. First of all, after each scan you can see several messages which indicate that your report was successfully saved in report storage database:
42 | 
43 | ![rsdb1](https://github.com/user-attachments/assets/db3b22f8-1e74-4095-8ab7-99fd5837aa0a)
44 | 
45 | Also, you have separate menu item in DPULSE CLI to work with report storage database which named "Report storage DB manager":
46 | 
47 | ![rsdb2](https://github.com/user-attachments/assets/519682dc-5d01-4844-8dcd-67e1914bb765)
48 | 
49 | As you can see, there are menu points for both seeing DB content and recreating reports. Lets see what DPULSE will return if we select first menu item:
50 | 
51 | ![rsdb3](https://github.com/user-attachments/assets/6778cf83-e9cf-4580-b46d-7c187cbdde9d)
52 | 
53 | Report recreating process is shown below:
54 | 
55 | ![rsdb4](https://github.com/user-attachments/assets/d7af9b03-703e-46b2-846b-05d99b33b900)
56 | 
57 | And that's how recreated report looks like inside:
58 | 
59 | ![rsdb5](https://github.com/user-attachments/assets/799d45cb-bc51-43ca-8b06-14e236d21912)
60 | 


--------------------------------------------------------------------------------
/docs/dpulse-docs/docs/getting_started.md:
--------------------------------------------------------------------------------
  1 | # Installation and Quick Start
  2 | 
  3 | ## System Requirements
  4 | 
  5 | DPULSE is built on Python and designed to run across various environments. To ensure stability and full functionality, your system must meet the following criteria:
  6 | 
  7 | *   **Operating System:** Linux (recommended), macOS, or Windows.
  8 | *   **Python Version:** Python **3.10**, **3.11**, or **3.12**.
  9 |     *   *Note:* Older versions (3.9 and below) are not supported due to dependency conflicts.
 10 | *   **Network:** A stable, high-speed internet connection is crucial. Modules like *Dorking Scan* and *PageSearch* rely on active scraping; unstable connections may lead to timeouts or incomplete results.
 11 | *   **Dependencies:**
 12 |     *   **Docker** (Recommended for isolation and ease of use).
 13 |     *   **Poetry** (Recommended for local Python installation).
 14 |     *   **Git** (Required for cloning the repository).
 15 | 
 16 | ---
 17 | 
 18 | ## Installation Methods
 19 | 
 20 | We provide three methods to install DPULSE. **Docker is the recommended method** as it eliminates environment conflicts.
 21 | 
 22 | ### Method 1: Docker (Recommended)
 23 | 
 24 | Using Docker ensures you have all necessary system libraries pre-installed without polluting your host machine.
 25 | 
 26 | 1.  **Pull the official image:**
 27 |     ```bash
 28 |     docker pull osinttechnologies/dpulse:latest
 29 |     ```
 30 | 
 31 | 2.  **Run the container:**
 32 |     *   **Linux / macOS:**
 33 |         ```bash
 34 |         docker run --rm -it -v "$PWD":/data -w /data osinttechnologies/dpulse:latest
 35 |         ```
 36 |     *   **Windows (PowerShell):**
 37 |         ```powershell
 38 |         docker run --rm -it -v "${PWD}:/data" -w /data osinttechnologies/dpulse:latest
 39 |         ```
 40 | 
 41 | ---
 42 | 
 43 | ### Method 2: Poetry
 44 | 
 45 | If you prefer running DPULSE natively, use [Poetry](https://python-poetry.org/). It handles virtual environments and dependency locking automatically.
 46 | 
 47 | 1.  **Clone the repository:**
 48 |     *   For the **Stable** version:
 49 |         ```bash
 50 |         git clone https://github.com/OSINT-TECHNOLOGIES/dpulse
 51 |         cd dpulse
 52 |         ```
 53 |     *   For the **Rolling** (Dev) version:
 54 |         ```bash
 55 |         git clone --branch rolling --single-branch https://github.com/OSINT-TECHNOLOGIES/dpulse.git
 56 |         cd dpulse
 57 |         ```
 58 | 
 59 | 2.  **Install dependencies:**
 60 |     ```bash
 61 |     poetry install
 62 |     ```
 63 | 
 64 | 3.  **Run DPULSE:**
 65 |     ```bash
 66 |     poetry run python dpulse.py
 67 |     ```
 68 | 
 69 | ---
 70 | 
 71 | ### Method 3: Standard PIP (Legacy)
 72 | 
 73 | This method is available but **not recommended** due to potential version conflicts with other Python packages on your system.
 74 | 
 75 | 1.  **Clone the repository and enter the directory:**
 76 |     ```bash
 77 |     git clone https://github.com/OSINT-TECHNOLOGIES/dpulse
 78 |     cd dpulse
 79 |     ```
 80 | 
 81 | 2.  **Install requirements:**
 82 |     ```bash
 83 |     pip install -r requirements.txt
 84 |     ```
 85 | 
 86 | 3.  **Run DPULSE:**
 87 |     ```bash
 88 |     python dpulse.py
 89 |     ```
 90 | 
 91 | > **Note:** The deprecated `.bat` and `.sh` installer scripts have been removed in favor of standard package managers to ensure security and reliability.
 92 | 
 93 | ---
 94 | 
 95 | ## Conducting Your First Scan
 96 | 
 97 | Once DPULSE is running, follow this workflow to perform a reconnaissance task.
 98 | 
 99 | ### 1. Main Menu
100 | Upon launch, the CLI interface will appear. To start a standard investigation, select **Option 1**.
101 | 
102 | ![Main Menu](https://github.com/user-attachments/assets/5b45d4f0-9fad-4e17-8d74-96989037a66a)
103 | 
104 | ### 2. Target Input
105 | DPULSE operates strictly with **domain names** (e.g., `example.com`), not full URLs (e.g., `https://www.example.com/page`).
106 | 
107 | *   **Input:** Enter the target domain when prompted.
108 | *   **Correction:** If you accidentally enter a URL, DPULSE will attempt to extract the domain, but manual input is preferred for accuracy.
109 | 
110 | ![Target Input](https://github.com/user-attachments/assets/cc5676d5-e11c-4aeb-b0b4-dd4c23fa228a)
111 | 
112 | ### 3. Scan Configuration (Modifiers)
113 | You will be asked to configure the scan parameters. You can customize the depth and scope of the research:
114 | 
115 | *   **Case Comment:** A brief description for your internal records (e.g., "Investigation #42").
116 | *   **PageSearch:** Enables deep crawling of the domain to find sensitive files (PDFs, configs) and exposed secrets.
117 |     *   *Keywords:* If PageSearch is active, you can specify keywords to search for within downloaded documents.
118 | *   **Dorking Mode:** Activates Google Dorking to find admin panels, IoT devices, or sensitive directories.
119 | *   **API Usage:** Toggles third-party integrations (VirusTotal, SecurityTrails, HudsonRock).
120 | *   **Snapshotting:** Enables capturing the target website's visual state via screenshots or Wayback Machine.
121 | 
122 | ![Modifiers Selection](https://github.com/user-attachments/assets/9470350f-edf3-4692-b9bd-7c327cea2017)
123 | 
124 | ### 4. Results
125 | Once the scan is complete, DPULSE will generate a report in the `./reports` directory and save the case metadata to the local database.
126 | 
127 | ![Scan Complete](https://github.com/user-attachments/assets/4e16f1e6-df60-441c-b730-79ea69134bb7)
128 | 
129 | You can now open the generated report file to view the gathered intelligence.
130 | 


--------------------------------------------------------------------------------
/CODE_OF_CONDUCT.md:
--------------------------------------------------------------------------------
  1 | # Contributor Covenant Code of Conduct
  2 | 
  3 | ## Our Pledge
  4 | 
  5 | We as members, contributors, and leaders pledge to make participation in our
  6 | community a harassment-free experience for everyone, regardless of age, body
  7 | size, visible or invisible disability, ethnicity, sex characteristics, gender
  8 | identity and expression, level of experience, education, socio-economic status,
  9 | nationality, personal appearance, race, religion, or sexual identity
 10 | and orientation.
 11 | 
 12 | We pledge to act and interact in ways that contribute to an open, welcoming,
 13 | diverse, inclusive, and healthy community.
 14 | 
 15 | ## Our Standards
 16 | 
 17 | Examples of behavior that contributes to a positive environment for our
 18 | community include:
 19 | 
 20 | * Demonstrating empathy and kindness toward other people
 21 | * Being respectful of differing opinions, viewpoints, and experiences
 22 | * Giving and gracefully accepting constructive feedback
 23 | * Accepting responsibility and apologizing to those affected by our mistakes,
 24 |   and learning from the experience
 25 | * Focusing on what is best not just for us as individuals, but for the
 26 |   overall community
 27 | 
 28 | Examples of unacceptable behavior include:
 29 | 
 30 | * The use of sexualized language or imagery, and sexual attention or
 31 |   advances of any kind
 32 | * Trolling, insulting or derogatory comments, and personal or political attacks
 33 | * Public or private harassment
 34 | * Publishing others' private information, such as a physical or email
 35 |   address, without their explicit permission
 36 | * Other conduct which could reasonably be considered inappropriate in a
 37 |   professional setting
 38 | 
 39 | ## Enforcement Responsibilities
 40 | 
 41 | Community leaders are responsible for clarifying and enforcing our standards of
 42 | acceptable behavior and will take appropriate and fair corrective action in
 43 | response to any behavior that they deem inappropriate, threatening, offensive,
 44 | or harmful.
 45 | 
 46 | Community leaders have the right and responsibility to remove, edit, or reject
 47 | comments, commits, code, wiki edits, issues, and other contributions that are
 48 | not aligned to this Code of Conduct, and will communicate reasons for moderation
 49 | decisions when appropriate.
 50 | 
 51 | ## Scope
 52 | 
 53 | This Code of Conduct applies within all community spaces, and also applies when
 54 | an individual is officially representing the community in public spaces.
 55 | Examples of representing our community include using an official e-mail address,
 56 | posting via an official social media account, or acting as an appointed
 57 | representative at an online or offline event.
 58 | 
 59 | ## Enforcement
 60 | 
 61 | Instances of abusive, harassing, or otherwise unacceptable behavior may be
 62 | reported to the community leaders responsible for enforcement at
 63 | osint.technologies@gmail.com.
 64 | All complaints will be reviewed and investigated promptly and fairly.
 65 | 
 66 | All community leaders are obligated to respect the privacy and security of the
 67 | reporter of any incident.
 68 | 
 69 | ## Enforcement Guidelines
 70 | 
 71 | Community leaders will follow these Community Impact Guidelines in determining
 72 | the consequences for any action they deem in violation of this Code of Conduct:
 73 | 
 74 | ### 1. Correction
 75 | 
 76 | **Community Impact**: Use of inappropriate language or other behavior deemed
 77 | unprofessional or unwelcome in the community.
 78 | 
 79 | **Consequence**: A private, written warning from community leaders, providing
 80 | clarity around the nature of the violation and an explanation of why the
 81 | behavior was inappropriate. A public apology may be requested.
 82 | 
 83 | ### 2. Warning
 84 | 
 85 | **Community Impact**: A violation through a single incident or series
 86 | of actions.
 87 | 
 88 | **Consequence**: A warning with consequences for continued behavior. No
 89 | interaction with the people involved, including unsolicited interaction with
 90 | those enforcing the Code of Conduct, for a specified period of time. This
 91 | includes avoiding interactions in community spaces as well as external channels
 92 | like social media. Violating these terms may lead to a temporary or
 93 | permanent ban.
 94 | 
 95 | ### 3. Temporary Ban
 96 | 
 97 | **Community Impact**: A serious violation of community standards, including
 98 | sustained inappropriate behavior.
 99 | 
100 | **Consequence**: A temporary ban from any sort of interaction or public
101 | communication with the community for a specified period of time. No public or
102 | private interaction with the people involved, including unsolicited interaction
103 | with those enforcing the Code of Conduct, is allowed during this period.
104 | Violating these terms may lead to a permanent ban.
105 | 
106 | ### 4. Permanent Ban
107 | 
108 | **Community Impact**: Demonstrating a pattern of violation of community
109 | standards, including sustained inappropriate behavior,  harassment of an
110 | individual, or aggression toward or disparagement of classes of individuals.
111 | 
112 | **Consequence**: A permanent ban from any sort of public interaction within
113 | the community.
114 | 
115 | ## Attribution
116 | 
117 | This Code of Conduct is adapted from the [Contributor Covenant][homepage],
118 | version 2.0, available at
119 | https://www.contributor-covenant.org/version/2/0/code_of_conduct.html.
120 | 
121 | Community Impact Guidelines were inspired by [Mozilla's code of conduct
122 | enforcement ladder](https://github.com/mozilla/diversity).
123 | 
124 | [homepage]: https://www.contributor-covenant.org
125 | 
126 | For answers to common questions about this code of conduct, see the FAQ at
127 | https://www.contributor-covenant.org/faq. Translations are available at
128 | https://www.contributor-covenant.org/translations.
129 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | <div align="center">
  2 | 
  3 | # 🌐 DPULSE
  4 | ### Advanced Domain OSINT & Reconnaissance Tool
  5 | 
  6 | <img src="https://github.com/user-attachments/assets/949c332b-790e-49da-81a3-a7cf21e9ddf2" width="500">
  7 | 
  8 | <br><br>
  9 | 
 10 | [![Stable Version](https://img.shields.io/badge/v1.3.4-STABLE-success?style=for-the-badge)](https://github.com/OSINT-TECHNOLOGIES/dpulse/releases)
 11 | [![Rolling Version](https://img.shields.io/badge/v1.4-DEV_BUILD-orange?style=for-the-badge)](https://github.com/OSINT-TECHNOLOGIES/dpulse/tree/rolling)
 12 | [![Python](https://img.shields.io/badge/Python-3.10+-blue?style=for-the-badge&logo=python&logoColor=white)](https://python.org)
 13 | [![Docker](https://img.shields.io/badge/Docker-Ready-2496ED?style=for-the-badge&logo=docker&logoColor=white)](https://hub.docker.com/r/osinttechnologies/dpulse)
 14 | [![Documentation](https://img.shields.io/badge/Docs-ReadTheDocs-informational?style=for-the-badge&logo=readthedocs&logoColor=white)](https://dpulse.readthedocs.io)
 15 | 
 16 | **Convenient, fast, and user-friendly collector of domain information from open sources.**
 17 | 
 18 | [Report Bug](https://github.com/OSINT-TECHNOLOGIES/dpulse/issues) • [Request Feature](https://github.com/OSINT-TECHNOLOGIES/dpulse/issues) • [Roadmap](https://github.com/users/OSINT-TECHNOLOGIES/projects/1)
 19 | 
 20 | </div>
 21 | 
 22 | ---
 23 | 
 24 | > ⚠️ **Disclaimer:** DPULSE is a research tool tailored for OSINT professionals. It is **not** intended for criminal activities. The developer is not responsible for any misuse of this tool. Use strictly on allowed domains and for legal purposes.
 25 | 
 26 | ---
 27 | 
 28 | ## 🚀 Key Features
 29 | 
 30 | DPULSE automates the boring stuff in domain reconnaissance. It compiles data into clean **HTML/XLSX reports**.
 31 | 
 32 | | Feature | Description |
 33 | | :--- | :--- |
 34 | | 🔍 **Basic Scan** | Automates WHOIS, subdomains, emails, IPs, social media, SSL info, open ports, and tech stack detection. |
 35 | | 🕵️‍♂️ **PageSearch** | Deep dive into subdomains to find API keys, exposed passwords, cookies, hidden forms, and sensitive documents (PDF, config files). |
 36 | | 🧩 **Dorking Mode** | Automated Google Dorking for IoT, admin panels, sensitive files, and custom user-defined dorks. |
 37 | | 🔗 **API Integrations** | Native support for **VirusTotal**, **SecurityTrails**, and **HudsonRock** (compromised hosts check). |
 38 | | 📸 **Snapshotting** | Captures target via Screenshots, HTML downloads, or Wayback Machine archiving. |
 39 | 
 40 | ---
 41 | 
 42 | ## ⚡ Quick Start
 43 | 
 44 | ### Option 1: Docker (Recommended)
 45 | The fastest way to run DPULSE without worrying about dependencies.
 46 | 
 47 | ```bash
 48 | # 1. Pull the image
 49 | docker pull osinttechnologies/dpulse:latest
 50 | 
 51 | # 2. Run DPULSE (Linux/macOS)
 52 | docker run --rm -it -v "$PWD":/data -w /data osinttechnologies/dpulse:latest
 53 | 
 54 | # 2. Run DPULSE (Windows PowerShell)
 55 | docker run --rm -it -v "${PWD}:/data" -w /data osinttechnologies/dpulse:latest
 56 | ```
 57 | 
 58 | ### Option 2: Source Code (Poetry)
 59 | For developers or those who prefer a local environment.
 60 | 
 61 | ```bash
 62 | git clone https://github.com/OSINT-TECHNOLOGIES/dpulse
 63 | cd dpulse
 64 | poetry install
 65 | poetry run python dpulse.py
 66 | ```
 67 | 
 68 | <details>
 69 | <summary><b>Click to see Legacy Installation (pip)</b></summary>
 70 | <br>
 71 | If you don't use Poetry, you can use standard pip (might have conflicts):
 72 | 
 73 | ```bash
 74 | git clone https://github.com/OSINT-TECHNOLOGIES/dpulse
 75 | cd dpulse
 76 | pip install -r requirements.txt
 77 | python dpulse.py
 78 | ```
 79 | </details>
 80 | 
 81 | ---
 82 | 
 83 | ## 🖥️ Interface & Reports
 84 | 
 85 | **Main Menu**  
 86 | Clean CLI interface for easy navigation.  
 87 | ![dpulse_start](https://github.com/user-attachments/assets/9ec0ab73-2206-4d38-bae6-e88656e17f95)
 88 | 
 89 | **Scanning Process**  
 90 | Real-time feedback during the scan.  
 91 | ![dpulse_bs](https://github.com/user-attachments/assets/b0ad7827-6dac-4f82-a369-4447a0e1c878)
 92 | 
 93 | **Output**  
 94 | Organized report folders with timestamps.  
 95 | ![Report Folder](https://github.com/OSINT-TECHNOLOGIES/dpulse/assets/77023667/7de73250-c9b6-4373-b21e-16bbb7a63882)
 96 | 
 97 | ---
 98 | 
 99 | ## 🏆 Community & Mentions
100 | 
101 | We are proud to be mentioned by industry leaders and the cybersecurity community.
102 | 
103 | *   **HudsonRock:** [Featured in cybercrime intelligence update](https://www.linkedin.com/feed/update/urn:li:share:7294336938495385600/)
104 | *   **DarkWebInformer:** [Tool for complex approach to domain OSINT](https://darkwebinformer.com/dpulse-tool-for-complex-approach-to-domain-osint/)
105 | *   **Ethical Hackers Academy:** [Tool Review](https://ethicalhacksacademy.com/blogs/cyber-security-tools/dpulse)
106 | 
107 | <details>
108 | <summary><b>View all mentions (Social Media & Blogs)</b></summary>
109 | 
110 | ### X.com (Twitter)
111 | *   [@DarkWebInformer](https://x.com/DarkWebInformer/status/1787583156775759915?t=Ak1W9ddUPpDvLAkVyQG8fQ&s=19)
112 | *   [@OSINTech_](https://x.com/OSINTech_/status/1805902553885888649)
113 | *   [@cyb_detective](https://x.com/cyb_detective/status/1821337404763959487?t=vbyRUeXM2C6gf47l7XvJnQ&s=19)
114 | *   [@DailyOsint](https://x.com/DailyOsint/status/1823013991951523997?t=Fr-oDCZ2pFmFJpUT3BKl5A&s=19)
115 | *   [@UndeadSec](https://x.com/UndeadSec/status/1827692406797689032)
116 | *   [@0xtechrock](https://x.com/0xtechrock/status/1804470459741978974?t=us1EVJEECNZdSmSe5CQjQA&s=19)
117 | 
118 | ### LinkedIn
119 | *   [Maory Schroder](https://fr.linkedin.com/posts/maory-schroder_osint-cybers%C3%A9curit%C3%A9-pentest-activity-7227562302009491456-sXoZ?trk=public_profile)
120 | *   [Maxim Marshak](https://www.linkedin.com/pulse/bormaxi8080-osint-timeline-64-27062024-maxim-marshak-jojbf)
121 | *   [DailyOSINT](https://www.linkedin.com/posts/daily-osint_osint-reconnaissance-infosec-activity-7228779678096850946-H-zC)
122 | 
123 | ### Telegram Channels
124 | *   Cyber Detective
125 | *   Hackers Factory
126 | *   C.I.T Security
127 | *   Реальний OSINT
128 | 
129 | </details>
130 | 
131 | ---
132 | 
133 | <div align="center">
134 | 
135 | **Created by OSINT-TECHNOLOGIES**
136 | 
137 | [Documentation](https://dpulse.readthedocs.io) • [Contact Developer](https://dpulse.readthedocs.io/en/latest/contact_dev/#)
138 | 
139 | </div>
140 | 


--------------------------------------------------------------------------------
/docs/dpulse-docs/docs/dorking.md:
--------------------------------------------------------------------------------
  1 | # Automatic Google Dorking scan mode
  2 | 
  3 | Automatic Google Dorking scan is an extended domain research function with prepared Google Dorking databases for different purposes. 
  4 | 
  5 | ## Prepared Dorking databases description
  6 | 
  7 | At the moment DPULSE offers the following prepared databases for automatic Google Dorking:
  8 | 
  9 | 1. IoT dorking
 10 | 2. Files dorking
 11 | 3. Admin panels dorking
 12 | 4. Web elements dorking
 13 | 
 14 | IoT dorking table contains following 20 dorks:
 15 | ```
 16 | inurl:":8080" site:{}
 17 | inurl:":1883" site:{}
 18 | inurl:":8883" site:{}
 19 | inurl:":554" site:{}
 20 | inurl:":81" site:{}
 21 | inurl:":5000" site:{}
 22 | inurl:":9000" site:{}
 23 | inurl:":10000" site:{}
 24 | inurl:debug site:{}
 25 | inurl:device site:{}
 26 | inurl:control site:{}
 27 | inurl:status site:{}
 28 | inurl:service site:{}
 29 | inurl:monitor site:{}
 30 | inurl:stream site:{}
 31 | inurl:video site:{}
 32 | inurl:camera site:{}
 33 | inurl:sensor site:{}
 34 | inurl:api site:{}
 35 | inurl:firmware site:{}
 36 | ```
 37 | 
 38 | Files dorking table contains following 30 dorks:
 39 | ```
 40 | filetype:pdf site:{}
 41 | filetype:doc site:{}
 42 | filetype:docx site:{}
 43 | filetype:xlsx site:{}
 44 | filetype:xls site:{}
 45 | filetype:ppt site:{}
 46 | filetype:pptx site:{}
 47 | filetype:txt site:{}
 48 | filetype:csv site:{}
 49 | filetype:xml site:{}
 50 | filetype:json site:{}
 51 | filetype:html site:{}
 52 | filetype:php site:{}
 53 | filetype:asp site:{}
 54 | filetype:aspx site:{}
 55 | filetype:js site:{}
 56 | filetype:css site:{}
 57 | filetype:jpg site:{}
 58 | filetype:jpeg site:{}
 59 | filetype:png site:{}
 60 | filetype:gif site:{}
 61 | filetype:mp3 site:{}
 62 | filetype:mp4 site:{}
 63 | filetype:avi site:{}
 64 | filetype:zip site:{}
 65 | filetype:rar site:{}
 66 | filetype:sql site:{}
 67 | filetype:db site:{}
 68 | filetype:conf site:{}
 69 | filetype:ini site:{}
 70 | ```
 71 | 
 72 | Admin panels dorking table contains following 72 dorks:
 73 | ```
 74 | site:{} intitle:"WordPress Login"
 75 | site:{} inurl:/wp-admin/
 76 | site:{} intext:"Войти в WordPress"
 77 | site:{} intitle:"Dashboard" "WordPress"
 78 | site:{} intitle:"Joomla! Administrator Login"
 79 | site:{} inurl:/administrator/
 80 | site:{} intitle:"Joomla! 3.x" "Login"
 81 | site:{} intitle:"Drupal login"
 82 | site:{} inurl:/user/login
 83 | site:{} intitle:"Drupal 8" "Login"
 84 | site:{} intitle:"phpMyAdmin"
 85 | site:{} inurl:/phpmyadmin/
 86 | site:{} intitle:"phpMyAdmin 4.x"
 87 | site:{} intitle:"Magento Admin"
 88 | site:{} inurl:/admin/
 89 | site:{} intitle:"Magento 2" "Admin"
 90 | site:{} intitle:"vBulletin Admin CP"
 91 | site:{} inurl:/admincp/
 92 | site:{} intitle:"vBulletin 4.x" "Admin"
 93 | site:{} intitle:"osCommerce Administration"
 94 | site:{} intitle:"osCommerce 2.x" "Admin"
 95 | site:{} intitle:"PrestaShop Back Office"
 96 | site:{} inurl:/admin-dev/
 97 | site:{} intitle:"PrestaShop 1.7" "Back Office"
 98 | site:{} intitle:"OpenCart Admin Panel"
 99 | site:{} intitle:"OpenCart 3.x" "Admin"
100 | site:{} intitle:"Zen Cart Admin"
101 | site:{} intitle:"Zen Cart 1.5" "Admin"
102 | site:{} intitle:"MediaWiki" "Special:UserLogin"
103 | site:{} inurl:/mediawiki/index.php/Special:UserLogin
104 | site:{} intitle:"Moodle" "Log in to the site"
105 | site:{} inurl:/login/index.php
106 | site:{} intitle:"Concrete5" "Sign In"
107 | site:{} inurl:/index.php/dashboard/
108 | site:{} intitle:"TYPO3" "Backend Login"
109 | site:{} inurl:/typo3/
110 | site:{} intitle:"Plone" "Log in"
111 | site:{} inurl:/login_form
112 | site:{} intitle:"Django" "Site administration"
113 | site:{} inurl:/rails/admin/
114 | site:{} intitle:"Ruby on Rails" "Admin"
115 | site:{} intitle:"Craft CMS" "Control Panel"
116 | site:{} inurl:/admin/
117 | site:{} intitle:"ExpressionEngine" "Control Panel"
118 | site:{} inurl:/admin.php
119 | site:{} intitle:"Kentico" "CMS Desk"
120 | site:{} inurl:/cmsdesk/
121 | site:{} intitle:"Umbraco" "Backoffice"
122 | site:{} inurl:/umbraco/
123 | site:{} intitle:"Sitecore" "Launchpad"
124 | site:{} inurl:/sitecore/
125 | site:{} intitle:"DotNetNuke" "Host"
126 | site:{} inurl:/host/
127 | site:{} intitle:"SharePoint" "Sign In"
128 | site:{} inurl:/_layouts/15/
129 | site:{} intitle:"Plesk" "Login"
130 | site:{} inurl:login.php?user=admin
131 | site:{} inurl:dashboard
132 | site:{} intitle:"admin login"
133 | site:{} intitle:"administrator login"
134 | site:{} "admin panel"
135 | site:{} inurl:panel
136 | site:{} inurl:cp
137 | site:{} inurl:controlpanel
138 | site:{} inurl:backend
139 | site:{} inurl:management
140 | site:{} inurl:administration
141 | site:{} intitle:"admin access"
142 | site:{} intitle:"control panel"
143 | site:{} "admin login" +directory
144 | site:{} "administrator login" +password
145 | site:{} inurl:/plesk-login/
146 | ```
147 | 
148 | Web elements dorking table contains following 25 dorks:
149 | ```
150 | site:{} intext:"index of"
151 | site:{} inurl:admin
152 | site:{} inurl:login
153 | site:{} inurl:dashboard
154 | site:{} inurl:wp-content
155 | site:{} inurl:backup
156 | site:{} inurl:old
157 | site:{} inurl:temp
158 | site:{} inurl:upload
159 | site:{} inurl:download
160 | site:{} inurl:config
161 | site:{} inurl:setup
162 | site:{} inurl:install
163 | site:{} inurl:database
164 | site:{} inurl:log
165 | site:{} inurl:debug
166 | site:{} inurl:api
167 | site:{} inurl:secret
168 | site:{} inurl:private
169 | site:{} inurl:secure
170 | site:{} inurl:password
171 | site:{} inurl:auth
172 | site:{} inurl:token
173 | site:{} inurl:session
174 | site:{} inurl:panel
175 | ```
176 | 
177 | ## Creating custom Dorking database
178 | 
179 | DPULSE allows you to create your own custom Google Dorking database. You can do it using DPULSE CLI by selecting menus as shown below:
180 | 
181 | ![dorking_start](https://github.com/user-attachments/assets/fc8fe1ba-1845-46d1-a9b9-d09d3dc03ce6)
182 | 
183 | After you select this menu point you will be welcomed with custom Dorking DB generator. It's very simple to use. First you should enter your new custom Dorking DB name without any extensions. Then you'll be prompted to enter id of your first dork (first id in custom DB is always 1, and every next dork gives +1 to id) and dork itself. There's a rule DPULSE requires from you when inputting dorks: when it comes to define domain in dork, put {} instead of it so the program code will replace these brackets with actual domain you'll enter lately.
184 | 
185 | Example of custom Dorking DB generator interaction is shown below:
186 | 
187 | ![customdork](https://github.com/user-attachments/assets/8f3e8ca5-feec-4bf5-add8-048f54931b67)
188 | 
189 | In result, new .db file will appear in dorking folder, which can be selected later to use in scan:
190 | 
191 | ![dorking_customdbresult](https://github.com/user-attachments/assets/0cd4facc-215b-4e56-ab56-aa23cb5136db)
192 | 
193 | And how it looks inside:
194 | 
195 | ![look_inside](https://github.com/user-attachments/assets/023467c2-008b-451f-8e14-88b7e54a8c3c)
196 | 
197 | 
198 | 
199 | 


--------------------------------------------------------------------------------
/service/config_processing.py:
--------------------------------------------------------------------------------
  1 | import configparser
  2 | import os
  3 | from colorama import Fore, Style
  4 | 
  5 | def create_config():
  6 |     basic_user_agents = [
  7 |         'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3',
  8 |         'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.2704.103 Safari/537.36',
  9 |         'Mozilla/5.0 (Windows NT 6.1; WOW64; rv:46.0) Gecko/20100101 Firefox/46.0',
 10 |         'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.115 Safari/537.36',
 11 |         'Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/61.0.3163.100 Safari/537.36',
 12 |         'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/61.0.3163.100 Safari/537.36',
 13 |         'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36',
 14 |         'Mozilla/5.0 (Linux; Android 7.0; SM-G930F Build/NRD90M) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Mobile Safari/537.36',
 15 |         'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.90 Safari/537.36',
 16 |         'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/62.0.3202.84 Safari/537.36',
 17 |         'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/62.0.3202.84 Safari/537.36',
 18 |         'Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36',
 19 |         'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/61.0.3163.100 Safari/537.36',
 20 |         'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/61.0.3163.100 Safari/537.36',
 21 |         'Mozilla/5.0 (Linux; Android 8.0; SM-G960F Build/R16NW) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/62.0.3202.84 Mobile Safari/537.36',
 22 |         'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/62.0.3202.84 Safari/537.36',
 23 |         'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36',
 24 |         'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36',
 25 |         'Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/61.0.3163.100 Safari/537.36',
 26 |         'Mozilla/5.0 (Linux; Android 7.1.2; SM-G955F Build/N2G48H) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.125 Mobile Safari/537.36'
 27 |     ]
 28 | 
 29 |     config = configparser.ConfigParser()
 30 |     config['HTML_REPORTING'] = {'template': 'modern', 'delete_txt_files': 'n'}
 31 |     config['LOGGING'] = {'log_level': 'info'}
 32 |     config['CLI VISUAL'] = {'preview_color': 'red', 'font': 'slant'}
 33 |     config['DORKING'] = {'dorking_delay (secs)': '2', 'delay_step': '5',
 34 |                          'full_path_to_browser': r'path\to\browser\for\dorking', 'browser_mode': 'nonheadless'}
 35 |     config['SNAPSHOTTING'] = {'installed_browser': 'firefox', 'opera_browser_path': 'None', 'wayback_retries': '3',
 36 |                               'wayback_req_pause': '2'}
 37 |     config['USER-AGENTS'] = {}
 38 |     for i, agent in enumerate(basic_user_agents):
 39 |         config['USER-AGENTS'][f'agent_{i + 1}'] = agent
 40 |     config['PROXIES'] = {'proxies_file_path': 'NONE'}
 41 | 
 42 |     with open('service//config.ini', 'w') as configfile:
 43 |         config.write(configfile)
 44 | 
 45 | def check_cfg_presence():
 46 |     cfg_presence = os.path.isfile('service//config.ini')
 47 |     return cfg_presence
 48 | 
 49 | def read_config():
 50 |     if not check_cfg_presence():
 51 |         create_config()
 52 | 
 53 |     config = configparser.ConfigParser()
 54 |     config.read('service//config.ini')
 55 | 
 56 |     if not config.has_section('LOGGING'):
 57 |         config.add_section('LOGGING')
 58 |         config.set('LOGGING', 'log_level', 'info')
 59 |         with open('service//config.ini', 'w') as configfile:
 60 |             config.write(configfile)
 61 | 
 62 |     log_level = config.get('LOGGING', 'log_level')
 63 |     cli_preview_color = config.get('CLI VISUAL', 'preview_color')
 64 |     wm_font = config.get('CLI VISUAL', 'font')
 65 |     dorking_delay = config.get('DORKING', 'dorking_delay (secs)')
 66 |     delay_step = config.get('DORKING', 'delay_step')
 67 |     user_agents = [value for key, value in config['USER-AGENTS'].items()]
 68 |     proxies_file_path = config.get('PROXIES', 'proxies_file_path')
 69 |     installed_browser = config.get('SNAPSHOTTING', 'installed_browser')
 70 |     opera_browser_path = config.get('SNAPSHOTTING', 'opera_browser_path')
 71 |     wayback_retries_amount = config.get('SNAPSHOTTING', 'wayback_retries')
 72 |     wayback_requests_pause = config.get('SNAPSHOTTING', 'wayback_req_pause')
 73 |     html_report_template = config.get('HTML_REPORTING', 'template')
 74 |     dorking_browser = config.get('DORKING', 'full_path_to_browser')
 75 |     dorking_browser_mode = config.get('DORKING', 'browser_mode')
 76 |     delete_txt_files = config.get('HTML_REPORTING', 'delete_txt_files')
 77 | 
 78 |     config_values = {
 79 |         'logging_level': log_level,
 80 |         'preview_color': cli_preview_color,
 81 |         'wm_font': wm_font,
 82 |         'dorking_delay (secs)': dorking_delay,
 83 |         'delay_step': delay_step,
 84 |         'user_agents': user_agents,
 85 |         'proxies_file_path': proxies_file_path,
 86 |         'installed_browser': installed_browser,
 87 |         'opera_browser_path': opera_browser_path,
 88 |         'wayback_retries_amount': wayback_retries_amount,
 89 |         'wayback_requests_pause': wayback_requests_pause,
 90 |         'template': html_report_template,
 91 |         'dorking_browser': dorking_browser,
 92 |         'dorking_browser_mode': dorking_browser_mode,
 93 |         'delete_txt_files': delete_txt_files
 94 |     }
 95 | 
 96 |     return config_values
 97 | 
 98 | def print_and_return_config():
 99 |     if not check_cfg_presence():
100 |         create_config()
101 |     config = configparser.ConfigParser()
102 |     config.read('service//config.ini')
103 |     print(Fore.LIGHTMAGENTA_EX + "\n[CURRENT CONFIG CONTENT START]" + Style.RESET_ALL)
104 |     for section in config.sections():
105 |         print('\n')
106 |         print(Fore.GREEN + f"[{section}]" + Style.RESET_ALL)
107 |         for key in config[section]:
108 |             print(Fore.GREEN + f"{key} = {config[section][key]}" + Style.RESET_ALL)
109 |     print(Fore.LIGHTMAGENTA_EX + "\n\n[CURRENT CONFIG CONTENT END]" + Style.RESET_ALL)
110 |     return config
111 | 


--------------------------------------------------------------------------------
/apis/api_securitytrails.py:
--------------------------------------------------------------------------------
  1 | import requests
  2 | import sqlite3
  3 | import re
  4 | from colorama import Fore, Style
  5 | 
  6 | def securitytrails_html_prep(formatted_output):
  7 |     formatted_output = re.sub(r'\x1b\[([0-9,A-Z]{1,2}(;[0-9]{1,2})?(;[0-9]{3})?)?[m|K]?', '', formatted_output)
  8 |     start_marker = "=== SECURITYTRAILS API REPORT ==="
  9 |     end_marker = "[+] Domain General Information:"
 10 |     start_index = formatted_output.find(start_marker)
 11 |     end_index = formatted_output.find(end_marker)
 12 |     if start_index != -1 and end_index != -1:
 13 |         formatted_output = formatted_output[:start_index] + formatted_output[end_index:]
 14 |     return formatted_output
 15 | 
 16 | def check_domain_securitytrails(domain, api_key):
 17 |     api_key = api_key.strip()
 18 |     api_key = re.sub(r'[\s\u200B\uFEFF]+', '', api_key)
 19 | 
 20 |     subdomains_url = f"https://api.securitytrails.com/v1/domain/{domain}/subdomains?apikey={api_key}"
 21 |     general_url = f"https://api.securitytrails.com/v1/domain/{domain}?apikey={api_key}"
 22 | 
 23 |     try:
 24 |         general_response = requests.get(general_url)
 25 |         general_data = general_response.json()
 26 |     except Exception as e:
 27 |         return Fore.RED + f"Error while parsing JSON: {e}" + Style.RESET_ALL
 28 | 
 29 |     formatted_output = Fore.LIGHTBLUE_EX + "=== SECURITYTRAILS API REPORT ===\n" + Style.RESET_ALL
 30 |     formatted_output += f"\n{Fore.LIGHTBLUE_EX}[+] Domain General Information:{Style.RESET_ALL}\n"
 31 |     formatted_output += (
 32 |         f"{Fore.GREEN}Alexa Rank: {Style.RESET_ALL}{Fore.LIGHTCYAN_EX}{general_data.get('alexa_rank')}{Style.RESET_ALL}\n"
 33 |         f"{Fore.GREEN}Apex Domain: {Style.RESET_ALL}{Fore.LIGHTCYAN_EX}{general_data.get('apex_domain')}{Style.RESET_ALL}\n"
 34 |         f"{Fore.GREEN}Hostname: {Style.RESET_ALL}{Fore.LIGHTCYAN_EX}{general_data.get('hostname')}{Style.RESET_ALL}\n"
 35 |     )
 36 | 
 37 |     formatted_output += f"\n{Fore.LIGHTBLUE_EX}[+] DNS Records:{Style.RESET_ALL}\n"
 38 |     current_dns = general_data.get('current_dns', {})
 39 |     for record_type, record_data in current_dns.items():
 40 |         formatted_output += f"\n{Fore.GREEN}[{record_type.upper()} RECORDS]:{Style.RESET_ALL}\n"
 41 |         for value in record_data.get('values', []):
 42 |             if record_type == 'a':
 43 |                 ip = value.get('ip', '')
 44 |                 org = value.get('ip_organization', '')
 45 |                 formatted_output += (
 46 |                     f"{Fore.GREEN}IP: {Style.RESET_ALL}{Fore.LIGHTCYAN_EX}{ip}{Style.RESET_ALL} "
 47 |                     f"{Fore.GREEN}| Organization: {Style.RESET_ALL}{Fore.LIGHTCYAN_EX}{org}{Style.RESET_ALL}\n"
 48 |                 )
 49 |             elif record_type == 'mx':
 50 |                 hostname = value.get('hostname', '')
 51 |                 priority = value.get('priority', '')
 52 |                 org = value.get('hostname_organization', '')
 53 |                 formatted_output += (
 54 |                     f"{Fore.GREEN}Hostname: {Style.RESET_ALL}{Fore.LIGHTCYAN_EX}{hostname}{Style.RESET_ALL} "
 55 |                     f"{Fore.GREEN}| Priority: {Style.RESET_ALL}{Fore.LIGHTCYAN_EX}{priority}{Style.RESET_ALL} "
 56 |                     f"{Fore.GREEN}| Organization: {Style.RESET_ALL}{Fore.LIGHTCYAN_EX}{org}{Style.RESET_ALL}\n"
 57 |                 )
 58 |             elif record_type == 'ns':
 59 |                 nameserver = value.get('nameserver', '')
 60 |                 org = value.get('nameserver_organization', '')
 61 |                 formatted_output += (
 62 |                     f"{Fore.GREEN}Nameserver: {Style.RESET_ALL}{Fore.LIGHTCYAN_EX}{nameserver}{Style.RESET_ALL} "
 63 |                     f"{Fore.GREEN}| Organization: {Style.RESET_ALL}{Fore.LIGHTCYAN_EX}{org}{Style.RESET_ALL}\n"
 64 |                 )
 65 |             elif record_type == 'soa':
 66 |                 email = value.get('email', '')
 67 |                 ttl = value.get('ttl', '')
 68 |                 formatted_output += (
 69 |                     f"{Fore.GREEN}Email: {Style.RESET_ALL}{Fore.LIGHTCYAN_EX}{email}{Style.RESET_ALL} "
 70 |                     f"{Fore.GREEN}| TTL: {Style.RESET_ALL}{Fore.LIGHTCYAN_EX}{ttl}{Style.RESET_ALL}\n"
 71 |                 )
 72 |             elif record_type == 'txt':
 73 |                 txt_value = value.get('value', '')
 74 |                 formatted_output += (
 75 |                     f"{Fore.GREEN}Value: {Style.RESET_ALL}{Fore.LIGHTCYAN_EX}{txt_value}{Style.RESET_ALL}\n"
 76 |                 )
 77 | 
 78 |     subdomains_response = requests.get(subdomains_url)
 79 |     if subdomains_response.status_code == 200:
 80 |         subdomains_data = subdomains_response.json()
 81 |         sub_count = subdomains_data.get('subdomain_count', 0)
 82 |         subdomains = subdomains_data.get('subdomains', [])
 83 | 
 84 |         formatted_output += f"\n{Fore.LIGHTBLUE_EX}[+] Subdomains Deep Enumeration:{Style.RESET_ALL}\n"
 85 |         formatted_output += (
 86 |             f"{Fore.GREEN}Found {Style.RESET_ALL}{Fore.LIGHTCYAN_EX}{sub_count}{Style.RESET_ALL}"
 87 |             f"{Fore.GREEN} subdomains.{Style.RESET_ALL}\n"
 88 |         )
 89 | 
 90 |         if subdomains:
 91 |             formatted_output += f"{Fore.GREEN}Subdomains list:{Style.RESET_ALL}\n"
 92 |             alive_count = 0
 93 |             for i, subdomain in enumerate(subdomains, start=1):
 94 |                 subdomain_url = f"http://{subdomain}.{domain}"
 95 |                 try:
 96 |                     r = requests.get(subdomain_url, timeout=5)
 97 |                     if r.status_code == 200:
 98 |                         alive_count += 1
 99 |                         formatted_output += (
100 |                             f"{Fore.GREEN}{i}. {Style.RESET_ALL}{Fore.LIGHTCYAN_EX}{subdomain_url}{Style.RESET_ALL}"
101 |                             f"{Fore.GREEN} is alive{Style.RESET_ALL}\n"
102 |                         )
103 |                 except Exception:
104 |                     pass
105 | 
106 |             if alive_count == 0:
107 |                 formatted_output += (f"{Fore.RED}No alive subdomains found (by HTTP 200 check).{Style.RESET_ALL}\n")
108 |         else:
109 |             formatted_output += f"{Fore.RED}No subdomains found in SecurityTrails data.{Style.RESET_ALL}\n"
110 |     else:
111 |         formatted_output += (f"{Fore.RED}Error while gathering subdomains: {subdomains_response.status_code}{Style.RESET_ALL}\n")
112 | 
113 |     return formatted_output
114 | 
115 | 
116 | def api_securitytrails_check(domain):
117 |     conn = sqlite3.connect('apis//api_keys.db')
118 |     cursor = conn.cursor()
119 |     cursor.execute("SELECT api_name, api_key FROM api_keys")
120 |     rows = cursor.fetchall()
121 | 
122 |     api_key = None
123 |     for row in rows:
124 |         api_name, key = row
125 |         if api_name == 'SecurityTrails':
126 |             api_key = str(key)
127 |             api_key = api_key.strip()
128 |             api_key = re.sub(r'[\s\u200B\uFEFF]+', '', api_key)
129 |             print(Fore.GREEN + 'Got SecurityTrails API key. Starting SecurityTrails scan...\n' + Style.RESET_ALL)
130 |             break
131 | 
132 |     if not api_key:
133 |         print(Fore.RED + "SecurityTrails API key not found." + Style.RESET_ALL)
134 |         conn.close()
135 |         return None
136 | 
137 |     formatted_output = check_domain_securitytrails(domain, api_key)
138 |     conn.close()
139 |     print(formatted_output)
140 |     return formatted_output
141 | 


--------------------------------------------------------------------------------
/docs/dpulse-docs/docs/config.md:
--------------------------------------------------------------------------------
 1 | # Configuration file
 2 | 
 3 | As you can understand, configuration file is a file that contains certain parameters which are necessary for certain DPULSE modules. Let's see what parameters are contained in this file and how to interact with it using DPULSE CLI.
 4 | 
 5 | ## Config file content
 6 | 
 7 | Configuration file (config.ini) located in 'serivce' folder, which is located inside DPULSE root folder. Default config.ini file generated with your first DPULSE start and it looks like that:
 8 | ```
 9 | [HTML_REPORTING]
10 | template = modern
11 | delete_txt_files = n
12 | 
13 | [LOGGING]
14 | log_level = info
15 | 
16 | [CLI VISUAL]
17 | preview_color = red
18 | font = slant
19 | 
20 | [DORKING]
21 | dorking_delay (secs) = 2
22 | delay_step = 5
23 | full_path_to_browser = path\to\browser\for\dorking
24 | browser_mode = nonheadless
25 | 
26 | [SNAPSHOTTING]
27 | installed_browser = firefox
28 | opera_browser_path = None
29 | wayback_retries = 3
30 | wayback_req_pause = 2
31 | 
32 | [USER-AGENTS]
33 | agent_1 = Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3
34 | agent_2 = Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/51.0.2704.103 Safari/537.36
35 | agent_3 = Mozilla/5.0 (Windows NT 6.1; WOW64; rv:46.0) Gecko/20100101 Firefox/46.0
36 | agent_4 = Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.115 Safari/537.36
37 | agent_5 = Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/61.0.3163.100 Safari/537.36
38 | agent_6 = Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/61.0.3163.100 Safari/537.36
39 | agent_7 = Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36
40 | agent_8 = Mozilla/5.0 (Linux; Android 7.0; SM-G930F Build/NRD90M) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Mobile Safari/537.36
41 | agent_9 = Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/60.0.3112.90 Safari/537.36
42 | agent_10 = Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/62.0.3202.84 Safari/537.36
43 | agent_11 = Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/62.0.3202.84 Safari/537.36
44 | agent_12 = Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.36
45 | agent_13 = Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/61.0.3163.100 Safari/537.36
46 | agent_14 = Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/61.0.3163.100 Safari/537.36
47 | agent_15 = Mozilla/5.0 (Linux; Android 8.0; SM-G960F Build/R16NW) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/62.0.3202.84 Mobile Safari/537.36
48 | agent_16 = Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/62.0.3202.84 Safari/537.36
49 | agent_17 = Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36
50 | agent_18 = Mozilla/5.0 (Macintosh; Intel Mac OS X 10_12_6) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36
51 | agent_19 = Mozilla/5.0 (Windows NT 6.3; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/61.0.3163.100 Safari/537.36
52 | agent_20 = Mozilla/5.0 (Linux; Android 7.1.2; SM-G955F Build/N2G48H) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/59.0.3071.125 Mobile Safari/537.36
53 | 
54 | [PROXIES]
55 | proxies_file_path = NONE
56 | ```
57 | 
58 | As you can see, config file built with sections, which represent separated DPULSE functions. Lets describe these sections and parameters:
59 | 
60 | | SECTION    | PARAMETER    | POSSIBLE VALUES     | COMMENT    |
61 | | ------------- | ------------- | ------------- | ------------- |
62 | | [HTML_REPORTING] | template | modern / legacy | Determines which HTML report template should be used while creating report itself. Modern contains more features like analytics, graphs and interactive features, while legacy is not supported anymore but may be convenient choice for someone |
63 | | [HTML_REPORTING] | delete_txt_files | y / n | Modern HTML report template contains text boxes for the content of robots.txt and sitemap.xml files, so maybe you won't need them as .txt files. Y parameter makes DPULSE delete this files from report folder, and N parameter leave everything as is |
64 | | [LOGGING] | log_level | See [here](https://docs.python.org/3/library/logging.html#logging-levels) | Determines how much technical info about program's execution will be logged in journal.log file |
65 | | [CLI_VISUAL] | preview_color | See [here](https://pypi.org/project/colorama/) | Determines the color of DPULSE ASCII art's color |
66 | | [CLI_VISUAL] | font | - | - |
67 | | [DORKING] | dorking_delay (secs) | Any integer value >=0 | Determines how much time browser will be on pause between dorks |
68 | | [DORKING] | delay_step | Any integer value >0 | Determines the amount of dorks browser should handle before activating delay |
69 | | [DORKING] | full_path_to_browser | Full path to your browser's executable file with \ symbol as a separator | Determines which browser will be used for Dorking |
70 | | [DORKING] | browser_mode | headless / nonheadless | Sets which browser mode will be used during Dorking process (headless means that browser window won't be opened, so nonheadless means that browser window will be opened every new dork, and actually it gives better results with TOS and Captcha bypassing) |
71 | | [SNAPSHOTTING] | installed_browser | Cell 1, Row 2 | Cell 1, Row 2 |
72 | | [SNAPSHOTTING] | opera_browser_path | Full path to your Opera.exe / None | Enter your Opera.exe path only if you decided to use Opera for screenshot snapshotting, in other cases - leave it as None |
73 | | [SNAPSHOTTING] | wayback_retries | Any integer value >0 | Determines how many retries DPULSE will make before abandoning inaccessible Wayback link |
74 | | [SNAPSHOTTING] | wayback_req_pause | Any integer value >0 | Determines how many seconds DPULSE will wait between different retries to an inaccessible Wayback link |
75 | | [USER-AGENTS] | agent_N | Any default user-agent string | User-agent are used to try bypassing TOS and Captcha when Dorking domain |
76 | | [PROXIES] | proxies_file_path | Full path to your proxies .txt file | Determines path to .txt file with "//" symbols as a separator which contains list of proxies (one proxy per row) |
77 | 
78 | ## Editing configuration file
79 | 
80 | First step in editing configuration file will be main menu of DPULSE. Here you should find 2nd menu point and select it like that:
81 | 
82 | ![config1](https://github.com/user-attachments/assets/d4eda335-102c-4dc6-ab5d-206ac01202d8)
83 | 
84 | Then Setting menu will pop-out. Here you will find two menu points related to config. First is "Print current config file" and second is "Edit config file". You can see them in the image below:
85 | 
86 | ![config2](https://github.com/user-attachments/assets/035e5a94-ca5f-43ca-89c7-8fca36048243)
87 | 
88 | If you select "Print current config file", you will see config file content in DPULSE CLI, just like that:
89 | 
90 | ![config3](https://github.com/user-attachments/assets/a86ee852-0b2c-4c83-9a48-bca7499c4671)
91 | 
92 | If you select "Edit config file" you will see current config file's content and you will be prompted to enter section and parameter to update, and, then, some new value for that:
93 | 
94 | ![config4](https://github.com/user-attachments/assets/b522f2d8-e05e-43a5-968e-b0306ad1de2e)
95 | 
96 | 
97 | 


--------------------------------------------------------------------------------
/datagather_modules/networking_processor.py:
--------------------------------------------------------------------------------
  1 | import sys
  2 | sys.path.append('service')
  3 | from logs_processing import logging
  4 | 
  5 | try:
  6 |     import dns.resolver
  7 |     import ssl
  8 |     import socket
  9 |     from colorama import Fore, Style
 10 |     import requests
 11 |     import xml.etree.ElementTree as ET
 12 |     import builtwith
 13 | except ImportError as e:
 14 |     print(Fore.RED + "Import error appeared. Reason: {}".format(e) + Style.RESET_ALL)
 15 |     sys.exit()
 16 | 
 17 | def get_dns_info(short_domain, report_file_extension):
 18 |     try:
 19 |         logging.info('DNS INFO GATHERING: OK')
 20 |         mx_list = []
 21 |         mx_records = dns.resolver.resolve(short_domain, 'MX')
 22 |         for record in mx_records:
 23 |             mx_list.append(record.exchange)
 24 |         if not mx_list:
 25 |             mx_list.append('MX records were not gathered')
 26 |         if report_file_extension == 'xlsx':
 27 |             return ', '.join(map(str, mx_list))
 28 |         elif report_file_extension == 'pdf':
 29 |             return ', '.join(map(str, mx_list))
 30 |     except dns.resolver.NoAnswer as error_noans:
 31 |         print(Fore.RED + "No answer from domain about MX records. See journal for details")
 32 |         logging.error(f'DNS INFO GATHERING: ERROR. REASON: {error_noans}')
 33 |         return 'No information about MX records was gathered'
 34 |     except dns.resolver.Timeout as error_timeout:
 35 |         print(Fore.RED + "Timeout while getting MX records. See journal for details")
 36 |         logging.error(f'DNS INFO GATHERING: ERROR. REASON: {error_timeout}')
 37 |         return 'No information about MX records was gathered'
 38 | 
 39 | def get_ssl_certificate(short_domain, port=443):
 40 |     try:
 41 |         logging.info('SSL CERTIFICATE GATHERING: OK')
 42 |         context = ssl.create_default_context()
 43 |         context.minimum_version = ssl.TLSVersion.TLSv1_2
 44 |         conn = socket.create_connection((short_domain, port))
 45 |         sock = context.wrap_socket(conn, server_hostname=short_domain)
 46 |         cert = sock.getpeercert()
 47 |         issuer = cert['issuer'][0][0][1]
 48 |         subject = cert['subject'][0][0][1]
 49 |         notBefore = cert['notBefore']
 50 |         notAfter = cert['notAfter']
 51 |         commonName = str(cert['issuer'][2][0][1]) + ', version: ' + str(cert['version'])
 52 |         serialNumber = cert['serialNumber']
 53 |         return issuer, subject, notBefore, notAfter, commonName, serialNumber
 54 |     except Exception as e:
 55 |         print(Fore.RED + "Error while gathering info about SSL certificate. See journal for details")
 56 |         logging.error(f'SSL CERTIFICATE GATHERING: ERROR. REASON: {e}')
 57 |         issuer = subject = notBefore = notAfter = commonName = serialNumber = "No information about SSL certificate was gathered"
 58 |         return issuer, subject, notBefore, notAfter, commonName, serialNumber
 59 | 
 60 | def query_internetdb(ip, report_file_extension):
 61 |     try:
 62 |         logging.info('INTERNETDB DATA GATHERING: OK')
 63 |         url = f"https://internetdb.shodan.io/{ip}"
 64 |         response = requests.get(url)
 65 |         if response.status_code == 200:
 66 |             data = response.json()
 67 |             ports = data.get("ports", [])
 68 |             hostnames = data.get("hostnames", [])
 69 |             cpes = data.get("cpes", [])
 70 |             tags = data.get("tags", [])
 71 |             vulns = data.get("vulns", [])
 72 |             if not ports:
 73 |                 ports = ['Open ports were not found']
 74 |             if not hostnames:
 75 |                 hostnames = ['Hostnames were not found']
 76 |             if not cpes:
 77 |                 cpes = ['CPEs were not found']
 78 |             if not tags:
 79 |                 tags = ['Tags were not found']
 80 |             if not vulns:
 81 |                 vulns = ['Vulnerabilities were not found']
 82 |             if report_file_extension == 'pdf' or report_file_extension == 'html':
 83 |                 return ports, hostnames, cpes, tags, vulns
 84 |             elif report_file_extension == 'xlsx':
 85 |                 return ports, hostnames, cpes, tags, vulns
 86 |         else:
 87 |             print(Fore.RED + "No information was found on InternetDB" + Style.RESET_ALL)
 88 |             ports = hostnames = cpes = tags = vulns = ["No info about this web resource on InternetDB"]
 89 |             return ports, hostnames, cpes, tags, vulns
 90 |     except Exception as e:
 91 |         print(Fore.RED + "No information was found on InternetDB due to some error. See journal for details" + Style.RESET_ALL)
 92 |         ports = hostnames = cpes = tags = vulns = ["No info about this web resource on InternetDB"]
 93 |         logging.error(f'INTERNETDB DATA GATHERING: ERROR. REASON: {e}')
 94 |         return ports, hostnames, cpes, tags, vulns
 95 | 
 96 | 
 97 | def get_robots_txt(url, robots_path):
 98 |     try:
 99 |         logging.info('ROBOTS.TXT EXTRACTION: OK')
100 |         if not url.startswith('http'):
101 |             url = 'http://' + url
102 |         robots_url = url + '/robots.txt'
103 |         response = requests.get(robots_url)
104 |         if response.status_code == 200:
105 |             with open(robots_path, 'w') as f:
106 |                 f.write(response.text)
107 |             return 'File "robots.txt" was extracted to text file in report folder'
108 |         else:
109 |             return 'File "robots.txt" was not found'
110 |     except Exception as e:
111 |         print(Fore.RED + 'robots.txt file was not extracted due to some error. See journal for details')
112 |         logging.error(f'ROBOTS.TXT EXTRACTION: ERROR. REASON: {e}')
113 |         return 'File "robots.txt" was not found'
114 | 
115 | def get_sitemap_xml(url, sitemap_path):
116 |     try:
117 |         logging.info('SITEMAP.XML EXTRACTION: OK')
118 |         if not url.startswith('http'):
119 |             url = 'http://' + url
120 |         sitemap_url = url + '/sitemap.xml'
121 |         response = requests.get(sitemap_url)
122 |         if len(response.text) > 0:
123 |             if response.status_code == 200:
124 |                 with open(sitemap_path, 'w') as f:
125 |                     f.write(response.text)
126 |                 return 'File "sitemap.xml" was extracted to text file in report folder'
127 |             else:
128 |                 return 'File "sitemap.xml" was not found'
129 |         else:
130 |             with open(sitemap_path, 'w') as f:
131 |                 f.write('0')
132 |             print(Fore.RED + "Error while gathering sitemap.xml. Probably it's unreachable")
133 |             return 'File "sitemap.xml" was not found'
134 |     except Exception as e:
135 |         print(Fore.RED + "Error while gathering sitemap.xml. See journal for details")
136 |         logging.error(f'SITEMAP.XML EXTRACTION: ERROR. REASON: {e}')
137 |         return 'Error occured during sitemap.xml gathering'
138 | 
139 | def extract_links_from_sitemap(sitemap_links_path, sitemap_path):
140 |     try:
141 |         logging.info('SITEMAP.XML LINKS EXTRACTION: OK')
142 |         tree = ET.parse(sitemap_path)
143 |         root = tree.getroot()
144 |         links = [elem.text for elem in root.iter('{http://www.sitemaps.org/schemas/sitemap/0.9}loc')]
145 |         with open(sitemap_links_path, 'w') as f:
146 |             for link in links:
147 |                 f.write(f"{link}\n")
148 |         return 'Links from "sitemap.txt" were successfully parsed'
149 |     except (ET.ParseError, FileNotFoundError) as e:
150 |         print(Fore.RED + "Links from sitemap.txt were not parsed. See journal for details")
151 |         logging.error(f'SITEMAP.XML LINKS EXTRACTION: ERROR. REASON: {e}')
152 |         return 'Links from "sitemap.txt" were not parsed'
153 | 
154 | def get_technologies(url):
155 |     try:
156 |         logging.info('WEB-TECHNOLOGIES GATHERING: OK')
157 |         tech = builtwith.parse(url)
158 |         web_servers = tech.get('web-servers', [])
159 |         cms = tech.get('cms', [])
160 |         programming_languages = tech.get('programming-languages', [])
161 |         web_frameworks = tech.get('web-frameworks', [])
162 |         analytics = tech.get('analytics', [])
163 |         javascript_frameworks = tech.get('javascript-frameworks', [])
164 |         if not web_servers:
165 |             web_servers = ['Web-servers were not found']
166 |         if not cms:
167 |             cms = ['CMS were not found']
168 |         if not programming_languages:
169 |             programming_languages = ['Used programming languages were not determined']
170 |         if not web_frameworks:
171 |             web_frameworks = ['Used web frameworks were not determined']
172 |         if not analytics:
173 |             analytics = ['Used analytics services were not determined']
174 |         if not javascript_frameworks:
175 |             javascript_frameworks = ['Used JS frameworks were not determined']
176 |         return web_servers, cms, programming_languages, web_frameworks, analytics, javascript_frameworks
177 |     except Exception as e:
178 |         web_servers = cms = programming_languages = web_frameworks = analytics = javascript_frameworks = ['Found nothing related to web-technologies due to some error']
179 |         print(Fore.RED + "Error when gathering info about web technologies. See journal for details")
180 |         logging.error(f'WEB-TECHNOLOGIES GATHERING: ERROR. REASON: {e}')
181 |         return web_servers, cms, programming_languages, web_frameworks, analytics, javascript_frameworks
182 | 


--------------------------------------------------------------------------------
/reporting_modules/html_report_creation.py:
--------------------------------------------------------------------------------
  1 | import sys
  2 | import os
  3 | from jinja2 import Environment, FileSystemLoader
  4 | from colorama import Fore, Style
  5 | 
  6 | sys.path.append('service')
  7 | sys.path.append('service//pdf_report_templates')
  8 | sys.path.append('apis')
  9 | 
 10 | from logs_processing import logging
 11 | import db_processing as db
 12 | import files_processing as fp
 13 | from api_hudsonrock import hudsonrock_html_prep
 14 | from api_virustotal import virustotal_html_prep
 15 | from api_securitytrails import securitytrails_html_prep
 16 | from config_processing import read_config
 17 | 
 18 | def generate_report(data, output_file, template_path):
 19 |     env = Environment(loader=FileSystemLoader('.'))
 20 |     template = env.get_template(template_path)
 21 |     html_output = template.render(data)
 22 |     with open(output_file, 'w', encoding='utf-8') as f:
 23 |         f.write(html_output)
 24 |     return True
 25 | 
 26 | def report_assembling(short_domain, url, case_comment, data_array, report_info_array, pagesearch_ui_mark, end, snapshotting_ui_mark):
 27 |     try:
 28 |         ip = data_array[0]
 29 |         res = data_array[1]
 30 |         mails = data_array[2]
 31 |         subdomains = data_array[3]
 32 |         subdomains_amount = data_array[4]
 33 |         social_medias = data_array[5]
 34 |         subdomain_mails = data_array[6]
 35 |         subdomain_ip = data_array[8]
 36 |         issuer = data_array[9]
 37 |         subject = data_array[10]
 38 |         notBefore = data_array[11]
 39 |         notAfter = data_array[12]
 40 |         commonName = data_array[13]
 41 |         serialNumber = data_array[14]
 42 |         mx_records = data_array[15]
 43 |         robots_txt_result = data_array[16]
 44 |         sitemap_xml_result = data_array[17]
 45 |         sitemap_links_status = data_array[18]
 46 |         web_servers = data_array[19]
 47 |         cms = data_array[20]
 48 |         programming_languages = data_array[21]
 49 |         web_frameworks = data_array[22]
 50 |         analytics = data_array[23]
 51 |         javascript_frameworks = data_array[24]
 52 |         ports = data_array[25]
 53 |         hostnames = data_array[26]
 54 |         cpes = data_array[27]
 55 |         tags = data_array[28]
 56 |         vulns = data_array[29]
 57 |         common_socials = data_array[30]
 58 |         total_socials = data_array[31]
 59 |         ps_emails_return = data_array[32]
 60 |         accessible_subdomains = data_array[33]
 61 |         emails_amount = data_array[34]
 62 |         files_counter = data_array[35]
 63 |         cookies_counter = data_array[36]
 64 |         api_keys_counter = data_array[37]
 65 |         website_elements_counter = data_array[38]
 66 |         exposed_passwords_counter = data_array[39]
 67 |         total_links_counter = data_array[40]
 68 |         accessed_links_counter = data_array[41]
 69 |         keywords_messages_list = data_array[42]
 70 |         dorking_status = data_array[43]
 71 |         dorking_file_path = data_array[44]
 72 |         virustotal_output = data_array[45]
 73 |         securitytrails_output = data_array[46]
 74 |         hudsonrock_output = data_array[47]
 75 |         ps_string = data_array[48]
 76 |         total_ports = data_array[49]
 77 |         total_ips = data_array[50]
 78 |         total_vulns = data_array[51]
 79 |         casename = report_info_array[0]
 80 |         db_casename = report_info_array[1]
 81 |         db_creation_date = report_info_array[2]
 82 |         report_folder = report_info_array[3]
 83 |         report_ctime = report_info_array[6]
 84 |         api_scan_db = report_info_array[7]
 85 |         used_api_flag = report_info_array[8]
 86 | 
 87 |         hudsonrock_output = hudsonrock_html_prep(hudsonrock_output)
 88 |         virustotal_output = virustotal_html_prep(virustotal_output)
 89 |         securitytrails_output = securitytrails_html_prep(securitytrails_output)
 90 | 
 91 |         if len(ps_emails_return) > 0:
 92 |             subdomain_mails += ps_emails_return
 93 |             subdomain_mails = list(set(subdomain_mails))
 94 |             subdomain_mails_cleaned = []
 95 |             substrings = ['m=Base64', 'Ë','Á','Æ','Å','Ä','Ò','Á','ó','ð','É','ë','â']
 96 |             for substring in substrings:
 97 |                 if any(substring in s for s in subdomain_mails):
 98 |                     subdomain_mails.remove(next(s for s in subdomain_mails if substring in s))
 99 |             for email in subdomain_mails:
100 |                 new_emails = email.split(', ')
101 |                 subdomain_mails_cleaned.extend(new_emails)
102 |         else:
103 |             subdomain_mails = list(set(subdomain_mails))
104 |             subdomain_mails_cleaned = []
105 |             substrings = ['m=Base64', 'Ë','Á','Æ','Å','Ä','Ò','Á','ó','ð','É','ë','â']
106 |             for substring in substrings:
107 |                 if any(substring in s for s in subdomain_mails):
108 |                     subdomain_mails.remove(next(s for s in subdomain_mails if substring in s))
109 |             for email in subdomain_mails:
110 |                 new_emails = email.split(', ')
111 |                 subdomain_mails_cleaned.extend(new_emails)
112 | 
113 |         total_mails = len(subdomain_mails_cleaned)
114 |         pdf_templates_path = 'service//pdf_report_templates'
115 |         config_values = read_config()
116 |         delete_txt_files = config_values['delete_txt_files']
117 |         template_path = pdf_templates_path + '//modern_report_template.html'
118 |         dorking_results_path = report_folder + '//04-dorking_results.txt'
119 |         if os.path.isfile(dorking_results_path):
120 |             with open(dorking_results_path, 'r') as f:
121 |                 add_dsi = f.read()
122 |         else:
123 |             add_dsi = 'Dorking mode was not enabled so there is no results to see'
124 | 
125 |         robots_content, sitemap_content, sitemap_links_content, dorking_content = fp.get_db_columns(report_folder)
126 | 
127 |         context = {'sh_domain': short_domain, 'full_url': url, 'ip_address': ip, 'registrar': res['registrar'],
128 |                        'creation_date': res['creation_date'], 'expiration_date': res['expiration_date'],
129 |                        'name_servers': ', '.join(res['name_servers']), 'org': res['org'],
130 |                        'mails': mails, 'subdomain_mails': subdomain_mails_cleaned, 'subdomain_socials': social_medias,
131 |                        'subdomain_ip': subdomain_ip,
132 |                        'subdomains': subdomains, 'fb_links': common_socials['Facebook'],
133 |                        'tw_links': common_socials['Twitter'], 'inst_links': common_socials['Instagram'],
134 |                        'tg_links': common_socials['Telegram'], 'tt_links': common_socials['TikTok'],
135 |                        'li_links': common_socials['LinkedIn'], 'vk_links': common_socials['VKontakte'],
136 |                        'yt_links': common_socials['YouTube'], 'wc_links': common_socials['WeChat'],
137 |                        'ok_links': common_socials['Odnoklassniki'], 'xcom_links': common_socials['X.com'], 'robots_txt_result': robots_txt_result,
138 |                        'sitemap_xml_result': sitemap_xml_result,
139 |                        'sitemap_links': sitemap_links_status, 'web_servers': web_servers, 'cms': cms,
140 |                        'programming_languages': programming_languages, 'web_frameworks': web_frameworks,
141 |                        'analytics': analytics,
142 |                        'javascript_frameworks': javascript_frameworks,
143 |                        'ctime': report_ctime, 'a_tsf': subdomains_amount, 'mx_records': mx_records, 'issuer': issuer,
144 |                        'subject': subject, 'notBefore': notBefore, 'notAfter': notAfter,
145 |                        'commonName': commonName, 'serialNumber': serialNumber, 'ports': ports, 'hostnames': hostnames,
146 |                        'cpes': cpes,
147 |                        'tags': tags, 'vulns': vulns, 'a_tsm': total_socials, 'pagesearch_ui_mark': pagesearch_ui_mark,
148 |                        'dorking_status': dorking_status,
149 |                        'add_dsi': add_dsi, 'ps_s': accessible_subdomains, 'ps_e': emails_amount, 'ps_f': files_counter, 'ps_c': cookies_counter, 'ps_a': api_keys_counter,
150 |                         'ps_w': website_elements_counter, 'ps_p': exposed_passwords_counter, 'ss_l': total_links_counter, 'ss_a': accessed_links_counter, 'hudsonrock_output': hudsonrock_output, "snapshotting_ui_mark": snapshotting_ui_mark,
151 |                         'virustotal_output': virustotal_output, 'securitytrails_output': securitytrails_output, 'ps_string': ps_string, 'a_tops': total_ports,
152 |                         'a_temails': total_mails, 'a_tips': total_ips, 'a_tpv': total_vulns, 'robots_content': robots_content, 'sitemap_xml_content': sitemap_content, 'sitemap_txt_content': sitemap_links_content}
153 | 
154 |         html_report_name = report_folder + '//' + casename
155 |         if generate_report(context, html_report_name, template_path):
156 |             print(Fore.GREEN + "HTML report for {} case was created at {}".format(short_domain, report_ctime) + Style.RESET_ALL)
157 |             print(Fore.GREEN + f"Scan elapsed time: {end}" + Style.RESET_ALL)
158 |         pdf_blob = fp.get_blob(html_report_name)
159 |         db.insert_blob('HTML', pdf_blob, db_casename, db_creation_date, case_comment, robots_content, sitemap_content, sitemap_links_content, dorking_content, api_scan_db)
160 | 
161 |         if delete_txt_files.lower() == 'y':
162 |             files_to_remove = [
163 |                 '04-dorking_results.txt',
164 |                 '03-sitemap_links.txt',
165 |                 '02-sitemap.txt',
166 |                 '01-robots.txt'
167 |             ]
168 |             for file in files_to_remove:
169 |                 file_path = os.path.join(report_folder, file)
170 |                 if os.path.exists(file_path):
171 |                     os.remove(file_path)
172 |         elif delete_txt_files.lower() == 'n':
173 |             pass
174 | 
175 |     except Exception as e:
176 |         print(Fore.RED + 'Unable to create HTML report. See journal for details')
177 |         logging.error(f'HTML REPORT CREATION: ERROR. REASON: {e}')
178 | 


--------------------------------------------------------------------------------
/service/db_processing.py:
--------------------------------------------------------------------------------
  1 | from colorama import Fore, Style
  2 | import os
  3 | import sqlite3
  4 | import sys
  5 | from rich import box
  6 | from rich.table import Table
  7 | from rich.console import Console
  8 | 
  9 | sys.path.append('apis//api_keys.db')
 10 | 
 11 | console = Console()
 12 | 
 13 | def db_connect():
 14 |     sqlite_connection = sqlite3.connect('report_storage.db')
 15 |     cursor = sqlite_connection.cursor()
 16 |     return cursor, sqlite_connection
 17 | 
 18 | def check_rsdb_presence(db_path):
 19 |     if not os.path.exists(db_path):
 20 |         print(Fore.RED + "Report storage database was not found. DPULSE will create it in a second" + Style.RESET_ALL)
 21 |         return False
 22 |     else:
 23 |         return True
 24 | 
 25 | def db_creation(db_path):
 26 |     cursor, sqlite_connection = db_connect()
 27 |     create_table_sql = """
 28 |     CREATE TABLE "report_storage" (
 29 |             "id" INTEGER NOT NULL UNIQUE,
 30 |             "report_file_extension" TEXT NOT NULL, 
 31 |             "report_content" BLOB NOT NULL,
 32 |             "comment" TEXT NOT NULL,
 33 |             "target" TEXT NOT NULL,
 34 |             "creation_date" INTEGER NOT NULL,
 35 |             "dorks_results" TEXT,
 36 |             "robots_text" TEXT,
 37 |             "sitemap_text" TEXT,
 38 |             "sitemap_file" TEXT,
 39 |             "api_scan" TEXT,
 40 |             PRIMARY KEY("id" AUTOINCREMENT)
 41 |         );
 42 |         """
 43 |     cursor.execute(create_table_sql)
 44 |     sqlite_connection.commit()
 45 |     sqlite_connection.close()
 46 | 
 47 | def db_select():
 48 |     cursor, sqlite_connection = db_connect()
 49 |     if_rows = "SELECT * FROM report_storage"
 50 |     cursor.execute(if_rows)
 51 |     rows = cursor.fetchall()
 52 |     data_presence_flag = False
 53 |     if rows:
 54 |         try:
 55 |             select_query = "SELECT creation_date, report_file_extension, target, id, comment, dorks_results, robots_text, sitemap_text, sitemap_file, api_scan FROM report_storage;"
 56 |             cursor.execute(select_query)
 57 |             records = cursor.fetchall()
 58 |             table = Table(title="[white on magenta]DATABASE CONTENT[/white on magenta]", show_lines=True, border_style="magenta", box=box.ROUNDED)
 59 |             table.add_column("ID", style="cyan", justify="center")
 60 |             table.add_column("Target", style="white", justify="center")
 61 |             table.add_column("Extension", style="white", justify="center")
 62 |             table.add_column("Comment", style="white", justify="center")
 63 |             table.add_column("Created", style="white", justify="center")
 64 |             table.add_column("Dorking", style="white", justify="center")
 65 |             table.add_column("robots.txt", style="white", justify="center")
 66 |             table.add_column("sitemap.xml", style="white", justify="center")
 67 |             table.add_column("API scan", style="white", justify="center")
 68 | 
 69 |             for row in records:
 70 |                 dorks_presence = "None"
 71 |                 robots_presence = "None"
 72 |                 sitemap_presence = "None"
 73 |                 if row[5] and len(str(row[5])) > 1:
 74 |                     dorks_presence = "In DB"
 75 |                 if row[6] and len(str(row[6])) > 1:
 76 |                     robots_presence = "In DB"
 77 |                 if row[7] and len(str(row[7])) > 1:
 78 |                     sitemap_presence = "In DB"
 79 |                 table.add_row(
 80 |                     str(row[3]),
 81 |                     str(row[2]),
 82 |                     str(row[1]),
 83 |                     str(row[4]),
 84 |                     str(row[0]),
 85 |                     dorks_presence,
 86 |                     robots_presence,
 87 |                     sitemap_presence,
 88 |                     str(row[9])
 89 |                 )
 90 |                 data_presence_flag = True
 91 |             console.print(table)
 92 |         except sqlite3.Error as e:
 93 |             print(Fore.RED + "Failed to see storage database's content. Reason: {}".format(e))
 94 |             sqlite_connection.close()
 95 |             data_presence_flag = False
 96 |     else:
 97 |         print(Fore.RED + 'No data found in report storage database')
 98 |         sqlite_connection.close()
 99 |         data_presence_flag = False
100 |     return cursor, sqlite_connection, data_presence_flag
101 | 
102 | def db_select_silent():
103 |     cursor, sqlite_connection = db_connect()
104 |     if_rows = "SELECT * FROM report_storage"
105 |     cursor.execute(if_rows)
106 |     rows = cursor.fetchall()
107 |     if rows:
108 |         try:
109 |             select_query = "SELECT creation_date, report_file_extension, target, id, comment, dorks_results, robots_text, sitemap_text, sitemap_file, api_scan FROM report_storage;"
110 |             cursor.execute(select_query)
111 |         except sqlite3.Error as e:
112 |             sqlite_connection.close()
113 |     else:
114 |         sqlite_connection.close()
115 |     return cursor, sqlite_connection
116 | 
117 | def db_report_recreate(extracted_folder_name, id_to_extract):
118 |     cursor, sqlite_connection = db_select_silent()
119 |     cursor.execute("SELECT report_content FROM report_storage WHERE id=?", (id_to_extract,))
120 |     try:
121 |         blob = cursor.fetchone()
122 |         if blob is not None:
123 |             blob_data = blob[0]
124 |             cursor.execute("SELECT report_file_extension FROM report_storage WHERE id=?", (id_to_extract,))
125 |             report_file_extension = (cursor.fetchone())[0]
126 |             if str(report_file_extension).upper() == 'XLSX':
127 |                 with open(extracted_folder_name + '//report_extracted.xlsx', 'wb') as file:
128 |                     file.write(blob_data)
129 |             elif str(report_file_extension).upper() == 'HTML':
130 |                 with open(extracted_folder_name + '//report_extracted.html', 'wb') as file:
131 |                     file.write(blob_data)
132 |         cursor.execute("SELECT dorks_results FROM report_storage WHERE id=?", (id_to_extract,))
133 |         dorks_results = (cursor.fetchone())[0]
134 |         with open(extracted_folder_name + '//dorks_extracted.txt', 'w') as file:
135 |             file.write(dorks_results)
136 |         cursor.execute("SELECT robots_text FROM report_storage WHERE id=?", (id_to_extract,))
137 |         robots_results = (cursor.fetchone())[0]
138 |         with open(extracted_folder_name + '//robots_extracted.txt', 'w') as file:
139 |             file.write(robots_results)
140 |         cursor.execute("SELECT sitemap_file FROM report_storage WHERE id=?", (id_to_extract,))
141 |         sitemap_results = (cursor.fetchone())[0]
142 |         with open(extracted_folder_name + '//sitemap_extracted.txt', 'w') as file:
143 |             file.write(sitemap_results)
144 |         cursor.execute("SELECT sitemap_text FROM report_storage WHERE id=?", (id_to_extract,))
145 |         sitemap_links_results = (cursor.fetchone())[0]
146 |         with open(extracted_folder_name + '//sitemap_links_extracted.txt', 'w') as file:
147 |             file.write(sitemap_links_results)
148 |         print(Fore.GREEN + "\nReport was successfully recreated from report storage database and saved in {} folder".format(extracted_folder_name))
149 |     except Exception as e:
150 |         print(Fore.RED + "Error appeared when recreating report from database. Reason: {}".format(e))
151 | 
152 | def insert_blob(report_file_type, pdf_blob, db_casename, creation_date, case_comment, robots, sitemap_xml, sitemap_links, dorking_results, api_scan_db):
153 |     try:
154 |         sqlite_connection = sqlite3.connect('report_storage.db')
155 |         cursor = sqlite_connection.cursor()
156 |         print(Fore.GREEN + "Connected to report storage database")
157 |         apis = [api for api in ['VirusTotal', 'SecurityTrails', 'HudsonRock'] if api in api_scan_db]
158 |         if len(apis) == 0:
159 |             api_scan_insert = 'No'
160 |         elif len(apis) == 1:
161 |             api_scan_insert = apis[0]
162 |         else:
163 |             api_scan_insert = ', '.join(apis[:-1]) + ' and ' + apis[-1]
164 | 
165 |         sqlite_insert_blob_query = """INSERT INTO report_storage
166 |                                   (report_file_extension, report_content, creation_date, target, comment, sitemap_file, robots_text, sitemap_text, dorks_results, api_scan) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)"""
167 | 
168 |         data_tuple = (report_file_type, pdf_blob, creation_date, db_casename, case_comment, sitemap_xml, robots, sitemap_links, dorking_results, api_scan_insert)
169 |         cursor.execute(sqlite_insert_blob_query, data_tuple)
170 |         sqlite_connection.commit()
171 |         print(Fore.GREEN + "Scanning results are successfully saved in report storage database")
172 |         cursor.close()
173 |     except sqlite3.Error as e:
174 |         print(Fore.RED + "Failed to insert scanning results in report storage database. Reason: {}".format(e))
175 |     finally:
176 |         if sqlite_connection:
177 |             sqlite_connection.close()
178 |             print(Fore.GREEN + "Database connection is successfully closed")
179 | 
180 | def check_api_keys(used_api_flag):
181 |     for key in used_api_flag:
182 |         conn = sqlite3.connect('apis//api_keys.db')
183 |         cursor = conn.cursor()
184 |         cursor.execute("SELECT api_key FROM api_keys WHERE id = ?", (key,))
185 |         result = cursor.fetchone()
186 |         if result[0] == 'YOUR_API_KEY':
187 |             return False
188 |     return True
189 | 
190 | def select_api_keys(mode):
191 |     conn = sqlite3.connect('apis//api_keys.db')
192 |     cursor = conn.cursor()
193 |     cursor.execute("SELECT id, api_name, api_key, limitations FROM api_keys")
194 |     rows = cursor.fetchall()
195 |     console = Console()
196 |     if rows:
197 |         try:
198 |             table = Table(
199 |                 title="[white on magenta]SUPPORTED API AND YOUR KEYS[/white on magenta]",
200 |                 show_lines=True,
201 |                 border_style="magenta",
202 |                 box=box.ROUNDED
203 |             )
204 |             table.add_column("ID", style="cyan", justify="center")
205 |             table.add_column("API Name", style="white", justify="center")
206 |             table.add_column("API Key", style="white", justify="center")
207 |             table.add_column("Limitations", style="white", justify="center")
208 |             for row in rows:
209 |                 api_key = f"[red]{row[2]}[/red]" if row[2] == "YOUR_API_KEY" else str(row[2])
210 |                 table.add_row(
211 |                     str(row[0]),
212 |                     str(row[1]),
213 |                     api_key,
214 |                     str(row[3])
215 |                 )
216 |             console.print(table)
217 |         except sqlite3.Error as e:
218 |             print(Fore.RED + "Failed to see API keys database's content. Reason: {}".format(e))
219 |             conn.close()
220 |     else:
221 |         print(Fore.RED + 'No data found in API keys database')
222 |         conn.close()
223 |     if mode == 'printing':
224 |         conn.close()
225 |         return None
226 |     else:
227 |         return cursor, conn
228 | 


--------------------------------------------------------------------------------
/dorking/dorking_handler.py:
--------------------------------------------------------------------------------
  1 | import sys
  2 | import random
  3 | import time
  4 | import os
  5 | import logging
  6 | from colorama import Fore, Style
  7 | import undetected_chromedriver as uc
  8 | from selenium.webdriver.common.by import By
  9 | from selenium.webdriver.common.keys import Keys
 10 | 
 11 | sys.path.append('service')
 12 | from logs_processing import logging
 13 | from ua_rotator import user_agent_rotator
 14 | from proxies_rotator import proxies_rotator
 15 | from config_processing import read_config
 16 | 
 17 | def proxy_transfer():
 18 |     proxy_flag, proxies_list = proxies_rotator.get_proxies()
 19 |     if proxy_flag == 0:
 20 |         pass
 21 |         return proxy_flag, ""
 22 |     else:
 23 |         working_proxies = proxies_rotator.check_proxies(proxies_list)
 24 |         return proxy_flag, working_proxies
 25 | 
 26 | def solid_google_dorking(query, proxy_flag, proxies_list, pages=1):
 27 |     result_query = []
 28 |     request_count = 0
 29 |     try:
 30 |         config_values = read_config()
 31 |         options = uc.ChromeOptions()
 32 |         options.binary_location = r"{}".format(config_values['dorking_browser'])
 33 |         dorking_browser_mode = config_values['dorking_browser_mode']
 34 |         if dorking_browser_mode.lower() == 'headless':
 35 |             options.add_argument("--headless=new")
 36 |         elif dorking_browser_mode.lower() == 'nonheadless':
 37 |             pass
 38 |         options.add_argument("--no-sandbox")
 39 |         options.add_argument("--disable-dev-shm-usage")
 40 |         options.add_argument("--disable-blink-features=AutomationControlled")
 41 |         options.add_argument("--disable-infobars")
 42 |         options.add_argument("--disable-extensions")
 43 |         options.add_argument(f"user-agent={user_agent_rotator.get_random_user_agent()}")
 44 |         if proxy_flag == 1:
 45 |             proxy = proxies_rotator.get_random_proxy(proxies_list)
 46 |             options.add_argument(f'--proxy-server={proxy["http"]}')
 47 |         driver = uc.Chrome(options=options)
 48 |         for page in range(pages):
 49 |             try:
 50 |                 driver.get("https://www.google.com")
 51 |                 time.sleep(random.uniform(2, 4))
 52 |                 try:
 53 |                     accepted = False
 54 |                     try:
 55 |                         accept_btn = driver.find_element(By.XPATH, '//button[contains(text(), "Принять все") or contains(text(), "Accept all")]')
 56 |                         driver.execute_script("arguments[0].click();", accept_btn)
 57 |                         print(Fore.GREEN + 'Pressed "Accept all" button!' + Style.RESET_ALL)
 58 |                         accepted = True
 59 |                         time.sleep(random.uniform(2, 3))
 60 |                     except:
 61 |                         pass
 62 |                     if not accepted:
 63 |                         iframes = driver.find_elements(By.TAG_NAME, "iframe")
 64 |                         for iframe in iframes:
 65 |                             driver.switch_to.frame(iframe)
 66 |                             try:
 67 |                                 accept_btn = driver.find_element(By.XPATH, '//button[contains(text(), "Принять все") or contains(text(), "Accept all")]')
 68 |                                 driver.execute_script("arguments[0].click();", accept_btn)
 69 |                                 print(Fore.GREEN + 'Pressed "Accept all" button!' + Style.RESET_ALL)
 70 |                                 accepted = True
 71 |                                 driver.switch_to.default_content()
 72 |                                 time.sleep(random.uniform(2, 3))
 73 |                                 break
 74 |                             except:
 75 |                                 driver.switch_to.default_content()
 76 |                                 continue
 77 |                         driver.switch_to.default_content()
 78 |                     if not accepted:
 79 |                         print(Fore.GREEN + "Google TOS button was not found. Seems good..." + Style.RESET_ALL)
 80 |                 except Exception:
 81 |                     print(Fore.RED + f'Error with pressing "Accept all" button. Closing...' + Style.RESET_ALL)
 82 |                     driver.save_screenshot("consent_error.png")
 83 |                     driver.switch_to.default_content()
 84 |                 search_box = driver.find_element(By.NAME, "q")
 85 |                 for char in query:
 86 |                     search_box.send_keys(char)
 87 |                     time.sleep(random.uniform(0.05, 0.2))
 88 |                 time.sleep(random.uniform(0.5, 1.2))
 89 |                 search_box.send_keys(Keys.RETURN)
 90 |                 time.sleep(random.uniform(2.5, 4))
 91 |                 links = driver.find_elements(By.CSS_SELECTOR, 'a')
 92 |                 for link in links:
 93 |                     href = link.get_attribute('href')
 94 |                     if href and href.startswith('http') and 'google.' not in href and 'webcache.googleusercontent.com' not in href:
 95 |                         result_query.append(href)
 96 |                         request_count += 1
 97 |                 try:
 98 |                     next_button = driver.find_element(By.ID, 'pnnext')
 99 |                     next_button.click()
100 |                     time.sleep(random.uniform(2, 3))
101 |                 except:
102 |                     break
103 |             except Exception as e:
104 |                 logging.error(f'DORKING PROCESSING (SELENIUM): ERROR. REASON: {e}')
105 |                 continue
106 |         driver.quit()
107 |         if len(result_query) >= 2:
108 |             del result_query[-2:]
109 |         return result_query
110 |     except Exception as e:
111 |         logging.error(f'DORKING PROCESSING: ERROR. REASON: {e}')
112 |         print(Fore.RED + "Error while running Selenium dorking. See journal for details." + Style.RESET_ALL)
113 |         return []
114 | 
115 | def save_results_to_txt(folderpath, table, queries, pages=1):
116 |     try:
117 |         config_values = read_config()
118 |         dorking_delay = int(config_values['dorking_delay (secs)'])
119 |         delay_step = int(config_values['delay_step'])
120 |         txt_writepath = folderpath + '//04-dorking_results.txt'
121 |         total_results = []
122 |         total_dorks_amount = len(queries)
123 |         with open(txt_writepath, 'w') as f:
124 |             print(Fore.GREEN + "Started Google Dorking. Please, be patient, it may take some time")
125 |             print(Fore.GREEN + f"{dorking_delay} seconds delay after each {delay_step} dorking requests was configured" + Style.RESET_ALL)
126 |             proxy_flag, proxies_list = proxy_transfer()
127 |             dorked_query_counter = 0
128 |             for i, query in enumerate(queries, start=1):
129 |                 f.write(f"QUERY #{i}: {query}\n")
130 |                 try:
131 |                     results = solid_google_dorking(query, proxy_flag, proxies_list, pages)
132 |                     if not results:
133 |                         f.write("=> NO RESULT FOUND\n")
134 |                         total_results.append((query, 0))
135 |                     else:
136 |                         total_results.append((query, len(results)))
137 |                         for result in results:
138 |                             f.write(f"=> {result}\n")
139 |                 except Exception as e:
140 |                     logging.error(f"DORKING PROCESSING: ERROR. REASON: {e}")
141 |                     total_results.append((query, 0))
142 |                 f.write("\n")
143 |                 dorked_query_counter += 1
144 |                 print(Fore.GREEN + f"  Dorking with " + Style.RESET_ALL + Fore.LIGHTCYAN_EX + Style.BRIGHT + f"{dorked_query_counter}/{total_dorks_amount}" + Style.RESET_ALL + Fore.GREEN + " dork" + Style.RESET_ALL, end="\r")
145 |         print(Fore.GREEN + "\nGoogle Dorking end. Results successfully saved in HTML report\n" + Style.RESET_ALL)
146 |         print(Fore.GREEN + f"During Google Dorking with {table.upper()}:")
147 |         for query, count in total_results:
148 |             if count == 0:
149 |                 count = 'no results'
150 |                 print(Fore.GREEN + f"[+] Found results for " + Fore.LIGHTCYAN_EX + f'{query}' + Fore.GREEN + ' query: ' + Fore.LIGHTRED_EX + f'{count}' + Style.RESET_ALL)
151 |             else:
152 |                 print(Fore.GREEN + f"[+] Found results for " + Fore.LIGHTCYAN_EX + f'{query}' + Fore.GREEN + ' query: ' + Fore.LIGHTCYAN_EX + f'{count}' + Style.RESET_ALL)
153 |         return f'Successfully dorked domain with {table.upper()} dorks table', txt_writepath
154 |     except Exception as e:
155 |         print(Fore.RED + 'Error appeared while trying to dork target. See journal for details')
156 |         logging.error(f'DORKING PROCESSING: ERROR. REASON: {e}')
157 |         return 'Domain dorking failed. See journal for details', txt_writepath
158 | 
159 | def transfer_results_to_xlsx(table, queries, pages=10):
160 |     config_values = read_config()
161 |     dorking_delay = int(config_values['dorking_delay (secs)'])
162 |     delay_step = int(config_values['delay_step'])
163 |     print(Fore.GREEN + "Started Google Dorking. Please, be patient, it may take some time")
164 |     print(Fore.GREEN + f"{dorking_delay} seconds delay after each {delay_step} dorking requests was configured" + Style.RESET_ALL)
165 |     proxy_flag, proxies_list = proxy_transfer()
166 |     dorked_query_counter = 0
167 |     total_dorks_amount = len(queries)
168 |     dorking_return_list = []
169 |     for i, query in enumerate(queries, start=1):
170 |         dorking_return_list.append(f"QUERY #{i}: {query}\n")
171 |         results = solid_google_dorking(query, dorking_delay, delay_step, proxy_flag, proxies_list)
172 |         if not results:
173 |             dorking_return_list.append("NO RESULT FOUND\n")
174 |         else:
175 |             for result in results:
176 |                 dorking_return_list.append(f"{result}\n")
177 |         dorked_query_counter += 1
178 |         dorking_return_list.append("\n")
179 |         print(Fore.GREEN + f"  Dorking with " + Style.RESET_ALL + Fore.LIGHTCYAN_EX + Style.BRIGHT + f"{dorked_query_counter}/{total_dorks_amount}" + Style.RESET_ALL + Fore.GREEN + " dork" + Style.RESET_ALL, end="\r")
180 |     print(Fore.GREEN + "\nGoogle Dorking end. Results successfully saved in XLSX report\n" + Style.RESET_ALL)
181 |     return f'Successfully dorked domain with {table.upper()} dorks table', dorking_return_list
182 | 
183 | def dorks_files_check():
184 |     dorks_path = 'dorking//'
185 |     dorks_files = ['iot_dorking.db', 'files_dorking.db', 'basic_dorking.db', 'adminpanels_dorking.db', 'webstructure_dorking.db']
186 |     dorks_files_counter = 0
187 |     for dork_files in dorks_files:
188 |         files_path = os.path.join(dorks_path, dork_files)
189 |         if os.path.isfile(files_path):
190 |             dorks_files_counter += 1
191 |         else:
192 |             pass
193 |     if dorks_files_counter == 5:
194 |         print(Fore.GREEN + "Dorks databases presence: OK" + Style.RESET_ALL)
195 |     else:
196 |         print(Fore.RED + "Dorks databases presence: NOT OK\nSome files may not be in folder. Please compare dorking folder with the same folder on the official repository\n" + Style.RESET_ALL)
197 |         sys.exit()
198 | 


--------------------------------------------------------------------------------
/apis/api_hudsonrock.py:
--------------------------------------------------------------------------------
  1 | import requests
  2 | from colorama import Fore, Style
  3 | import re
  4 | 
  5 | def hudsonrock_html_prep(formatted_output):
  6 |     formatted_output = re.sub(r'\x1b\[([0-9,A-Z]{1,2}(;[0-9]{1,2})?(;[0-9]{3})?)?[m|K]?', '', formatted_output)
  7 |     start_marker = "=== HUDSONROCK API REPORT ==="
  8 |     end_marker = "[+] Email Data:"
  9 |     start_index = formatted_output.find(start_marker)
 10 |     end_index = formatted_output.find(end_marker)
 11 |     if start_index != -1 and end_index != -1:
 12 |         formatted_output = formatted_output[:start_index] + formatted_output[end_index:]
 13 |     return formatted_output
 14 | 
 15 | def api_hudsonrock_get(email=None, username=None, domain=None, ip=None):
 16 |     base_url = "https://cavalier.hudsonrock.com/api/json/v2/osint-tools/"
 17 |     results = {}
 18 | 
 19 |     def make_request(url):
 20 |         try:
 21 |             response = requests.get(url)
 22 |             response.raise_for_status()
 23 |             return response.json()
 24 |         except requests.RequestException as e:
 25 |             return {'error': str(e)}
 26 | 
 27 |     if email:
 28 |         email_url = f"{base_url}search-by-email?email={email}"
 29 |         results['email'] = make_request(email_url)
 30 | 
 31 |     if username:
 32 |         username_url = f"{base_url}search-by-username?username={username}"
 33 |         results['username'] = make_request(username_url)
 34 | 
 35 |     if domain:
 36 |         domain_url = f"{base_url}search-by-domain?domain={domain}"
 37 |         results['domain'] = make_request(domain_url)
 38 | 
 39 |         urls_by_domain_url = f"{base_url}urls-by-domain?domain={domain}"
 40 |         results['urls_by_domain'] = make_request(urls_by_domain_url)
 41 | 
 42 |     if ip:
 43 |         ip_url = f"{base_url}search-by-ip?ip={ip}"
 44 |         results['ip'] = make_request(ip_url)
 45 | 
 46 |     return results
 47 | 
 48 | 
 49 | def api_hudsonrock_check(domain, ip, email, username):
 50 |     results = api_hudsonrock_get(email, username, domain, ip)
 51 |     formatted_output = Fore.LIGHTBLUE_EX + "\n=== HUDSONROCK API REPORT ===\n" + Style.RESET_ALL
 52 |     formatted_output += f"\n{Fore.LIGHTBLUE_EX}[+] Provided Data:{Style.RESET_ALL}\n"
 53 |     formatted_output += f"{Fore.GREEN}Domain:{Style.RESET_ALL} {Fore.LIGHTCYAN_EX}{domain}{Style.RESET_ALL}\n"
 54 |     formatted_output += f"{Fore.GREEN}IP:{Style.RESET_ALL} {Fore.LIGHTCYAN_EX}{ip}{Style.RESET_ALL}\n"
 55 |     formatted_output += f"{Fore.GREEN}E-mail:{Style.RESET_ALL} {Fore.LIGHTCYAN_EX}{email}{Style.RESET_ALL}\n"
 56 |     formatted_output += f"{Fore.GREEN}Username:{Style.RESET_ALL} {Fore.LIGHTCYAN_EX}{username}{Style.RESET_ALL}\n"
 57 | 
 58 |     def format_section(title, data):
 59 |         nonlocal formatted_output
 60 |         formatted_output += f"\n{Fore.LIGHTBLUE_EX}[+] {title}:{Style.RESET_ALL}\n"
 61 |         if 'error' in data:
 62 |             formatted_output += f"{Fore.RED}Error appeared when trying to get results for {title} requests. Probably given data is incorrect.{Style.RESET_ALL}\n"
 63 |             return
 64 | 
 65 |         if title == 'Email Data':
 66 |             formatted_output += f"{Fore.GREEN}Message:{Style.RESET_ALL} {Fore.LIGHTCYAN_EX}{data.get('message', 'No message available')}{Style.RESET_ALL}\n"
 67 |             for i, stealer in enumerate(data.get('stealers', []), 1):
 68 |                 formatted_output += f"\n{Fore.GREEN}--- STEALER {i} ---{Style.RESET_ALL}\n"
 69 |                 formatted_output += f"{Fore.GREEN}Computer Name:{Style.RESET_ALL} {Fore.LIGHTCYAN_EX}{stealer.get('computer_name', 'Not Found')}{Style.RESET_ALL}\n"
 70 |                 formatted_output += f"{Fore.GREEN}OS:{Style.RESET_ALL} {Fore.LIGHTCYAN_EX}{stealer.get('operating_system', 'Not Found')}{Style.RESET_ALL}\n"
 71 |                 formatted_output += f"{Fore.GREEN}Date Compromised:{Style.RESET_ALL} {Fore.LIGHTCYAN_EX}{stealer.get('date_compromised', 'Not Found')}{Style.RESET_ALL}\n"
 72 |                 formatted_output += f"{Fore.GREEN}Malware Path:{Style.RESET_ALL} {Fore.LIGHTCYAN_EX}{stealer.get('malware_path', 'Not Found')}{Style.RESET_ALL}\n"
 73 |                 formatted_output += f"{Fore.GREEN}IP:{Style.RESET_ALL} {Fore.LIGHTCYAN_EX}{stealer.get('ip', 'Not Found')}{Style.RESET_ALL}\n"
 74 |                 formatted_output += f"{Fore.GREEN}Top Passwords:{Style.RESET_ALL} {Fore.LIGHTCYAN_EX}{', '.join(stealer.get('top_passwords', []))}{Style.RESET_ALL}\n"
 75 |                 formatted_output += f"{Fore.GREEN}Top Logins:{Style.RESET_ALL} {Fore.LIGHTCYAN_EX}{', '.join(stealer.get('top_logins', []))}{Style.RESET_ALL}\n"
 76 | 
 77 |         elif title == 'Username Data':
 78 |             formatted_output += f"{Fore.GREEN}Message:{Style.RESET_ALL} {Fore.LIGHTCYAN_EX}{data.get('message', 'No message available')}{Style.RESET_ALL}\n"
 79 |             for i, stealer in enumerate(data.get('stealers', []), 1):
 80 |                 formatted_output += f"\n{Fore.GREEN}--- STEALER {i} ---{Style.RESET_ALL}\n"
 81 |                 formatted_output += f"{Fore.GREEN}Stealer Family:{Style.RESET_ALL} {Fore.LIGHTCYAN_EX}{stealer.get('stealer_family', 'Not Found')}{Style.RESET_ALL}\n"
 82 |                 formatted_output += f"{Fore.GREEN}Computer Name:{Style.RESET_ALL} {Fore.LIGHTCYAN_EX}{stealer.get('computer_name', 'Not Found')}{Style.RESET_ALL}\n"
 83 |                 formatted_output += f"{Fore.GREEN}OS:{Style.RESET_ALL} {Fore.LIGHTCYAN_EX}{stealer.get('operating_system', 'Not Found')}{Style.RESET_ALL}\n"
 84 |                 formatted_output += f"{Fore.GREEN}Date Compromised:{Style.RESET_ALL} {Fore.LIGHTCYAN_EX}{stealer.get('date_compromised', 'Not Found')}{Style.RESET_ALL}\n"
 85 |                 formatted_output += f"{Fore.GREEN}Malware Path:{Style.RESET_ALL} {Fore.LIGHTCYAN_EX}{stealer.get('malware_path', 'Not Found')}{Style.RESET_ALL}\n"
 86 |                 formatted_output += f"{Fore.GREEN}IP:{Style.RESET_ALL} {Fore.LIGHTCYAN_EX}{stealer.get('ip', 'Not Found')}{Style.RESET_ALL}\n"
 87 |                 formatted_output += f"{Fore.GREEN}Top Passwords:{Style.RESET_ALL} {Fore.LIGHTCYAN_EX}{', '.join(stealer.get('top_passwords', []))}{Style.RESET_ALL}\n"
 88 |                 formatted_output += f"{Fore.GREEN}Top Logins:{Style.RESET_ALL} {Fore.LIGHTCYAN_EX}{', '.join(stealer.get('top_logins', []))}{Style.RESET_ALL}\n"
 89 | 
 90 |         elif title == 'Domain Data':
 91 |             formatted_output += f"{Fore.GREEN}Total Entries:{Style.RESET_ALL} {Fore.LIGHTCYAN_EX}{data.get('total', 0)}{Style.RESET_ALL}\n"
 92 |             formatted_output += f"{Fore.GREEN}Total Stealers:{Style.RESET_ALL} {Fore.LIGHTCYAN_EX}{data.get('totalStealers', 0)}{Style.RESET_ALL}\n"
 93 |             formatted_output += f"\n{Fore.GREEN}Sample Employee URLs:{Style.RESET_ALL}\n"
 94 |             employee_urls = data.get('data', {}).get('employees_urls', [])
 95 |             if employee_urls:
 96 |                 for url_data in employee_urls[:10]:
 97 |                     formatted_output += f"{Fore.GREEN}Type:{Style.RESET_ALL} {Fore.LIGHTCYAN_EX}{url_data.get('type', 'N/A')}{Style.RESET_ALL}"
 98 |                     formatted_output += f" {Fore.GREEN}| URL:{Style.RESET_ALL} {Fore.LIGHTCYAN_EX}{url_data.get('url', 'N/A')}{Style.RESET_ALL}"
 99 |                     formatted_output += f" {Fore.GREEN}| Occurrence:{Style.RESET_ALL} {Fore.LIGHTCYAN_EX}{url_data.get('occurrence', 'N/A')}{Style.RESET_ALL}\n"
100 |             else:
101 |                 formatted_output += f"{Fore.RED}No employee URLs available.{Style.RESET_ALL}\n"
102 | 
103 |         elif title == 'Attack Surface Data':
104 |             formatted_output += f"{Fore.GREEN}Message:{Style.RESET_ALL} {Fore.LIGHTCYAN_EX}{data.get('message', 'No message available')}{Style.RESET_ALL}\n"
105 |             formatted_output += f"\n{Fore.GREEN}Sample Employee URLs:{Style.RESET_ALL}\n"
106 |             employees = data.get('data', {}).get('employees_urls', [])
107 |             if employees:
108 |                 for url_data in employees[:10]:
109 |                     formatted_output += f"{Fore.GREEN}Type:{Style.RESET_ALL} {Fore.LIGHTCYAN_EX}{url_data.get('type', 'N/A')}{Style.RESET_ALL}"
110 |                     formatted_output += f" {Fore.GREEN}| URL:{Style.RESET_ALL} {Fore.LIGHTCYAN_EX}{url_data.get('url', 'N/A')}{Style.RESET_ALL}"
111 |                     formatted_output += f" {Fore.GREEN}| Occurrence:{Style.RESET_ALL} {Fore.LIGHTCYAN_EX}{url_data.get('occurrence', 'N/A')}{Style.RESET_ALL}\n"
112 |             else:
113 |                 formatted_output += f"{Fore.RED}No employee URLs available{Style.RESET_ALL}\n"
114 |             formatted_output += f"\n{Fore.GREEN}Sample Client URLs:{Style.RESET_ALL}\n"
115 |             clients = data.get('data', {}).get('clients_urls', [])
116 |             if clients:
117 |                 for url_data in clients[:10]:
118 |                     formatted_output += f"{Fore.GREEN}Type:{Style.RESET_ALL} {Fore.LIGHTCYAN_EX}{url_data.get('type', 'N/A')}{Style.RESET_ALL}"
119 |                     formatted_output += f" {Fore.GREEN}| URL:{Style.RESET_ALL} {Fore.LIGHTCYAN_EX}{url_data.get('url', 'N/A')}{Style.RESET_ALL}"
120 |                     formatted_output += f" {Fore.GREEN}| Occurrence:{Style.RESET_ALL} {Fore.LIGHTCYAN_EX}{url_data.get('occurrence', 'N/A')}{Style.RESET_ALL}\n"
121 |             else:
122 |                 formatted_output += f"{Fore.RED}No client URLs available{Style.RESET_ALL}\n"
123 | 
124 |         elif title == 'IP Data':
125 |             formatted_output += f"{Fore.GREEN}Message:{Style.RESET_ALL} {Fore.LIGHTCYAN_EX}{data.get('message', 'No message available')}{Style.RESET_ALL}\n"
126 |             if data.get('stealers'):
127 |                 for i, stealer in enumerate(data.get('stealers', []), 1):
128 |                     formatted_output += f"\n{Fore.GREEN}--- STEALER {i} ---{Style.RESET_ALL}\n"
129 |                     formatted_output += f"{Fore.GREEN}Computer Name:{Style.RESET_ALL} {Fore.LIGHTCYAN_EX}{stealer.get('computer_name', 'Not Found')}{Style.RESET_ALL}\n"
130 |                     formatted_output += f"{Fore.GREEN}OS:{Style.RESET_ALL} {Fore.LIGHTCYAN_EX}{stealer.get('operating_system', 'Not Found')}{Style.RESET_ALL}\n"
131 |                     formatted_output += f"{Fore.GREEN}Date Compromised:{Style.RESET_ALL} {Fore.LIGHTCYAN_EX}{stealer.get('date_compromised', 'Not Found')}{Style.RESET_ALL}\n"
132 |                     formatted_output += f"{Fore.GREEN}Malware Path:{Style.RESET_ALL} {Fore.LIGHTCYAN_EX}{stealer.get('malware_path', 'Not Found')}{Style.RESET_ALL}\n"
133 |                     formatted_output += f"{Fore.GREEN}IP:{Style.RESET_ALL} {Fore.LIGHTCYAN_EX}{stealer.get('ip', 'Not Found')}{Style.RESET_ALL}\n"
134 |                     formatted_output += f"{Fore.GREEN}Top Passwords:{Style.RESET_ALL} {Fore.LIGHTCYAN_EX}{', '.join(stealer.get('top_passwords', []))}{Style.RESET_ALL}\n"
135 |                     formatted_output += f"{Fore.GREEN}Top Logins:{Style.RESET_ALL} {Fore.LIGHTCYAN_EX}{', '.join(stealer.get('top_logins', []))}{Style.RESET_ALL}\n"
136 |         formatted_output += "\n"
137 | 
138 |     if 'email' in results:
139 |         format_section('Email Data', results['email'])
140 |     if 'username' in results:
141 |         format_section('Username Data', results['username'])
142 |     if 'domain' in results:
143 |         format_section('Domain Data', results['domain'])
144 |     if 'urls_by_domain' in results:
145 |         format_section('Attack Surface Data', results['urls_by_domain'])
146 |     if 'ip' in results:
147 |         format_section('IP Data', results['ip'])
148 | 
149 |     print(formatted_output)
150 |     return formatted_output
151 | 


--------------------------------------------------------------------------------
/service/cli_init.py:
--------------------------------------------------------------------------------
  1 | import sys
  2 | from config_processing import read_config
  3 | from rich.panel import Panel
  4 | from rich.table import Table
  5 | from rich.layout import Layout
  6 | from rich.text import Text
  7 | from rich.prompt import Prompt
  8 | from rich.progress import Progress, SpinnerColumn, TextColumn
  9 | from rich import box
 10 | 
 11 | try:
 12 |     from colorama import Fore, Back, Style
 13 |     from pyfiglet import Figlet
 14 |     from rich.console import Console
 15 | except ImportError as e:
 16 |     print(Fore.RED + "Import error appeared. Reason: {}".format(e) + Style.RESET_ALL)
 17 |     sys.exit()
 18 | 
 19 | 
 20 | class Menu:
 21 |     def __init__(self):
 22 |         self.console = Console()
 23 | 
 24 |     def welcome_menu(self):
 25 |         config_values = read_config()
 26 |         preview_style = (config_values['preview_color']).lower()
 27 |         wm_font = (config_values['wm_font']).lower()
 28 |         fig = Figlet(font=wm_font)
 29 |         print('\n')
 30 |         combined_panel = Panel(
 31 |             Text.assemble(
 32 |                 (fig.renderText('DPULSE'), preview_style),
 33 |                 ("\n", ""),
 34 |                 ("DPULSE-CLI - v1.4 rolling - OSINT-TECHNOLOGIES\n\n", "magenta bold"),
 35 |                 ("Visit our pages:\n", "white"),
 36 |                 ("GitHub: ", "white"), ("https://github.com/OSINT-TECHNOLOGIES\n", "blue underline"),
 37 |                 ("PyPi: ", "white"), ("https://pypi.org/project/dpulse/\n", "blue underline"),
 38 |                 ("Docs: ", "white"), ("https://dpulse.readthedocs.io", "blue underline")
 39 |             ),
 40 |             title="Current version info",
 41 |             box=box.ROUNDED,
 42 |             border_style="magenta"
 43 |         )
 44 | 
 45 |         self.console.print(combined_panel)
 46 | 
 47 |     def print_main_menu(self):
 48 |         table = Table(
 49 |             show_header=False,
 50 |             box=box.ROUNDED,
 51 |             border_style="magenta",
 52 |             show_edge=False
 53 |         )
 54 | 
 55 |         table.add_column("Option", style="cyan", justify="right")
 56 |         table.add_column("Description", style="white")
 57 |         table.add_row("1.", "Target selection & scanning")
 58 |         table.add_row("2.", "General settings")
 59 |         table.add_row("3.", "Dorking module manager")
 60 |         table.add_row("4.", "Report storage DB manager")
 61 |         table.add_row("5.", "API modules manager")
 62 |         table.add_row("6.", "Help (browser will be opened!)")
 63 |         table.add_row("7.", "[red]Exit DPULSE[/red]")
 64 | 
 65 |         menu_panel = Panel(
 66 |             table,
 67 |             title="[white on magenta]MAIN MENU[/white on magenta]",
 68 |             border_style="magenta"
 69 |         )
 70 | 
 71 |         self.console.print("\n")
 72 |         self.console.print(menu_panel)
 73 | 
 74 |     def print_settings_menu(self):
 75 |         table = Table(
 76 |             show_header=False,
 77 |             box=box.ROUNDED,
 78 |             border_style="magenta",
 79 |             show_edge=False
 80 |         )
 81 | 
 82 |         table.add_column("Option", style="cyan", justify="right")
 83 |         table.add_column("Description", style="white")
 84 | 
 85 |         table.add_row("1.", "Print current config file")
 86 |         table.add_row("2.", "Edit config file")
 87 |         table.add_row("3.", "Clear journal content")
 88 |         table.add_row("4.", "[red]Return to main menu[/red]")
 89 | 
 90 |         menu_panel = Panel(
 91 |             table,
 92 |             title="[white on magenta]SETTINGS MENU[/white on magenta]",
 93 |             border_style="magenta"
 94 |         )
 95 | 
 96 |         self.console.print("\n")
 97 |         self.console.print(menu_panel)
 98 | 
 99 |     def print_db_menu(self):
100 |         table = Table(
101 |             show_header=False,
102 |             box=box.ROUNDED,
103 |             border_style="magenta",
104 |             show_edge=False
105 |         )
106 | 
107 |         table.add_column("Option", style="cyan", justify="right")
108 |         table.add_column("Description", style="white")
109 | 
110 |         table.add_row("1.", "Show database content")
111 |         table.add_row("2.", "Recreate report from database")
112 |         table.add_row("3.", "[red]Return to main menu[/red]")
113 | 
114 |         menu_panel = Panel(
115 |             table,
116 |             title="[white on magenta]REPORTS DATABASE MANAGER[/white on magenta]",
117 |             border_style="magenta"
118 |         )
119 | 
120 |         self.console.print("\n")
121 |         self.console.print(menu_panel)
122 | 
123 |     def dorking_db_manager(self):
124 |         table = Table(
125 |             show_header=False,
126 |             box=box.ROUNDED,
127 |             border_style="magenta",
128 |             show_edge=False
129 |         )
130 | 
131 |         table.add_column("Option", style="cyan", justify="right")
132 |         table.add_column("Description", style="white")
133 | 
134 |         table.add_row("1.", "Generate custom Dorking DB")
135 |         table.add_row("2.", "[red]Return to main menu[/red]")
136 | 
137 |         menu_panel = Panel(
138 |             table,
139 |             title="[white on magenta]DORKING DB MANAGER[/white on magenta]",
140 |             border_style="magenta"
141 |         )
142 | 
143 |         self.console.print("\n")
144 |         self.console.print(menu_panel)
145 | 
146 |     def api_manager(self):
147 |         table = Table(
148 |             show_header=False,
149 |             box=box.ROUNDED,
150 |             border_style="magenta",
151 |             show_edge=False
152 |         )
153 | 
154 |         table.add_column("Option", style="cyan", justify="right")
155 |         table.add_column("Description", style="white")
156 | 
157 |         table.add_row("1.", "Add API key")
158 |         table.add_row("2.", "Restore reference API Keys DB")
159 |         table.add_row("3.", "[red]Return to main menu[/red]")
160 | 
161 |         menu_panel = Panel(
162 |             table,
163 |             title="[white on magenta]API KEYS DB MANAGER[/white on magenta]",
164 |             border_style="magenta"
165 |         )
166 | 
167 |         self.console.print("\n")
168 |         self.console.print(menu_panel)
169 | 
170 | 
171 | def print_prescan_summary(short_domain, report_filetype, pagesearch_ui_mark, dorking_ui_mark, used_api_ui, case_comment, snapshotting_ui_mark):
172 |     table = Table(
173 |         show_header=False,
174 |         box=box.ROUNDED,
175 |         border_style="magenta"
176 |     )
177 | 
178 |     table.add_column("Parameter", style="green")
179 |     table.add_column("Value", style="cyan bold")
180 | 
181 |     table.add_row("Determined target:", short_domain)
182 |     table.add_row("Report type:", report_filetype.lower())
183 |     table.add_row("PageSearch conduction:", pagesearch_ui_mark)
184 |     table.add_row("Dorking conduction:", dorking_ui_mark)
185 |     table.add_row("APIs scan:", used_api_ui)
186 |     table.add_row("Snapshotting conduction:", snapshotting_ui_mark)
187 |     table.add_row("Case comment:", case_comment)
188 | 
189 |     summary_panel = Panel(
190 |         table,
191 |         title="[magenta]PRE-SCAN SUMMARY[/magenta]",
192 |         border_style="magenta"
193 |     )
194 | 
195 |     Console().print("\n")
196 |     Console().print(summary_panel)
197 | 
198 | def print_api_db_msg():
199 |     print(Fore.GREEN + "\nYou've entered custom Dorking DB generator!\n" + Style.RESET_ALL)
200 |     print(Fore.GREEN + "Remember some rules in order to successfully create your custom Dorking DB:" + Style.RESET_ALL)
201 |     print(Fore.GREEN + "[1] - dork_id variable must be unique, starting with 1 and then +1 every new dork" + Style.RESET_ALL)
202 |     print(Fore.GREEN + "[2] - When it comes to define domain in dork, put {} in it\n" + Style.RESET_ALL)
203 |     print(Fore.GREEN + "Examples: related:{}, site:{} inurl:login and so on\n" + Style.RESET_ALL)
204 | 
205 | def print_ps_cli_report(subdomains_list,  accessible_subdomains, ps_emails_return, files_counter, cookies_counter, api_keys_counter, website_elements_counter, exposed_passwords_counter):
206 |     if len(subdomains_list) == 0:
207 |         print(Fore.GREEN + "\nDuring subdomains analysis:\n[+] Total " + Fore.LIGHTRED_EX + Style.BRIGHT + f"{len(subdomains_list)}" + Style.RESET_ALL + Fore.GREEN + " subdomains were checked" + Style.RESET_ALL)
208 |     else:
209 |         print(Fore.GREEN + "\nDuring subdomains analysis:\n[+] Total " + Fore.LIGHTCYAN_EX + Style.BRIGHT + f"{len(subdomains_list)}" + Style.RESET_ALL + Fore.GREEN + " subdomains were checked" + Style.RESET_ALL)
210 |     if accessible_subdomains == 0:
211 |         print(Fore.GREEN + "[+] Among them " + Fore.LIGHTRED_EX + Style.BRIGHT + f"{accessible_subdomains}" + Style.RESET_ALL + Fore.GREEN + " subdomains were accessible" + Style.RESET_ALL)
212 |     else:
213 |         print(Fore.GREEN + "[+] Among them " + Fore.LIGHTCYAN_EX + Style.BRIGHT + f"{accessible_subdomains}" + Style.RESET_ALL + Fore.GREEN + " subdomains were accessible" + Style.RESET_ALL)
214 |     if len(ps_emails_return) == 0:
215 |         print(Fore.GREEN + "[+] In result, " + Fore.LIGHTRED_EX + Style.BRIGHT + f"{len(ps_emails_return)}" + Style.RESET_ALL + Fore.GREEN + " unique e-mail addresses were found" + Style.RESET_ALL)
216 |     else:
217 |         print(Fore.GREEN + "[+] In result, " + Fore.LIGHTCYAN_EX + Style.BRIGHT + f"{len(ps_emails_return)}" + Style.RESET_ALL + Fore.GREEN + " unique e-mail addresses were found" + Style.RESET_ALL)
218 |     if files_counter == 0:
219 |         print(Fore.GREEN + "[+] Also, " + Fore.LIGHTRED_EX + Style.BRIGHT + f"{files_counter}" + Style.RESET_ALL + Fore.GREEN + " files were extracted" + Style.RESET_ALL)
220 |     else:
221 |         print(Fore.GREEN + "[+] Also, " + Fore.LIGHTCYAN_EX + Style.BRIGHT + f"{files_counter}" + Style.RESET_ALL + Fore.GREEN + " files were extracted" + Style.RESET_ALL)
222 |     if cookies_counter == 0:
223 |         print(Fore.GREEN + "[+] Found " + Fore.LIGHTRED_EX + Style.BRIGHT + f"{cookies_counter}" + Style.RESET_ALL + Fore.GREEN + " cookies with values" + Style.RESET_ALL)
224 |     else:
225 |         print(Fore.GREEN + "[+] Found " + Fore.LIGHTCYAN_EX + Style.BRIGHT + f"{cookies_counter}" + Style.RESET_ALL + Fore.GREEN + " cookies with values" + Style.RESET_ALL)
226 |     if api_keys_counter == 0:
227 |         print(Fore.GREEN + "[+] Found " + Fore.LIGHTRED_EX + Style.BRIGHT + f"{api_keys_counter}" + Style.RESET_ALL + Fore.GREEN + " API keys" + Style.RESET_ALL)
228 |     else:
229 |         print(Fore.GREEN + "[+] Found " + Fore.LIGHTCYAN_EX + Style.BRIGHT + f"{api_keys_counter}" + Style.RESET_ALL + Fore.GREEN + " API keys" + Style.RESET_ALL)
230 |     if website_elements_counter == 0:
231 |         print(Fore.GREEN + "[+] Found " + Fore.LIGHTRED_EX + Style.BRIGHT + f"{website_elements_counter}" + Style.RESET_ALL + Fore.GREEN + " different web page elements" + Style.RESET_ALL)
232 |     else:
233 |         print(Fore.GREEN + "[+] Found " + Fore.LIGHTCYAN_EX + Style.BRIGHT + f"{website_elements_counter}" + Style.RESET_ALL + Fore.GREEN + " different web page elements" + Style.RESET_ALL)
234 |     if exposed_passwords_counter == 0:
235 |         print(Fore.GREEN + "[+] Found " + Fore.LIGHTRED_EX + Style.BRIGHT + f"{exposed_passwords_counter}" + Style.RESET_ALL + Fore.GREEN + " exposed passwords" + Style.RESET_ALL)
236 |     else:
237 |         print(Fore.GREEN + "[+] Found " + Fore.LIGHTCYAN_EX + Style.BRIGHT + f"{exposed_passwords_counter}" + Style.RESET_ALL + Fore.GREEN + " exposed passwords" + Style.RESET_ALL)
238 | 


--------------------------------------------------------------------------------
/datagather_modules/crawl_processor.py:
--------------------------------------------------------------------------------
  1 | import sys
  2 | import socket
  3 | import re
  4 | import urllib
  5 | from collections import defaultdict
  6 | from urllib.parse import urlparse, unquote
  7 | import whois
  8 | import requests
  9 | from bs4 import BeautifulSoup
 10 | from colorama import Fore, Style
 11 | 
 12 | sys.path.append('service')
 13 | from logs_processing import logging
 14 | 
 15 | def ip_gather(short_domain):
 16 |     ip_address = socket.gethostbyname(short_domain)
 17 |     return ip_address
 18 | 
 19 | def whois_gather(short_domain):
 20 |     try:
 21 |         logging.info('WHOIS INFO GATHERING: OK')
 22 |         w = whois.whois(short_domain)
 23 |         if w.org is None:
 24 |             w['org'] = 'Organization name was not extracted'
 25 |             logging.info('WHOIS INFO GATHERING: OK')
 26 |         return w
 27 |     except Exception as e:
 28 |         print(Fore.RED + "Error while gathering WHOIS information. See journal for details")
 29 |         logging.error(f'WHOIS GATHERING: ERROR. REASON: {e}')
 30 |         w = {
 31 |             'registrar': 'N/A',
 32 |             'creation_date': 'N/A',
 33 |             'expiration_date': 'N/A',
 34 |             'name_servers': ['N/A'],
 35 |             'org': 'N/A'
 36 |         }
 37 |         return w
 38 |         pass
 39 | 
 40 | def contact_mail_gather(url):
 41 |     try:
 42 |         logging.info('CONTACT MAIL GATHERING: OK')
 43 |         r = requests.get(url)
 44 |         data = r.text
 45 |         soup = BeautifulSoup(data, "html.parser")
 46 |         mails = []
 47 |         for i in soup.find_all(href=re.compile("mailto")):
 48 |             i.encode().decode()
 49 |             mails.append(i.string)
 50 |         mails = [mail for mail in mails if mail is not None]
 51 |         if (not mails) or (mails is None):
 52 |             logging.info('CONTACT MAIL GATHERING: OK (BUT NO MAILS WERE FOUND)')
 53 |             return 'No contact e-mails were found'
 54 |         else:
 55 |             logging.info('CONTACT MAIL GATHERING: OK')
 56 |             return ', '.join(map(str, mails))
 57 |     except requests.RequestException as e:
 58 |         print(Fore.RED + "Error while gathering e-mails. See journal for details")
 59 |         logging.error(f'CONTACT MAIL GATHERING: ERROR. REASON: {e}')
 60 |         pass
 61 | 
 62 | def subdomains_mail_gather(url):
 63 |     try:
 64 |         logging.info('SUBDOMAINS MAIL GATHERING: OK')
 65 |         r = requests.get(url)
 66 |         data = r.text
 67 |         soup = BeautifulSoup(data, "html.parser")
 68 |         mails_uncleaned = []
 69 |         for i in soup.find_all(href=re.compile("mailto")):
 70 |             i.encode().decode()
 71 |             mails_uncleaned.append(i.string)
 72 |         mails_cleaned = [item for item in mails_uncleaned if item is not None]
 73 |         mails = [''.join(sublist) for sublist in mails_cleaned]
 74 |         return mails
 75 |     except requests.RequestException as e:
 76 |         print(Fore.RED + "Error while gathering e-mails. See journal for details")
 77 |         logging.error(f'SUBDOMAINS MAIL GATHERING: ERROR. REASON: {e}')
 78 |         pass
 79 | 
 80 | def subdomains_gather(url, short_domain):
 81 |     try:
 82 |         logging.info('SUBDOMAINS GATHERING: OK')
 83 |         response = requests.get(url)
 84 |         soup = BeautifulSoup(response.text, 'html.parser')
 85 |         linked_domains = set()
 86 |         for link in soup.find_all('a', href=True):
 87 |             domain = urlparse(link['href']).netloc
 88 |             if domain and domain != urlparse(url).netloc:
 89 |                 linked_domains.add(domain)
 90 |         finder = short_domain
 91 |         subdomains = [urllib.parse.unquote(i) for i in linked_domains if finder in i]
 92 |         subdomains_amount = len(subdomains)
 93 |         if not subdomains:
 94 |             subdomains = ['No subdomains were found']
 95 |             logging.info('SUBDOMAINS GATHERING: OK')
 96 |         return subdomains, subdomains_amount
 97 |     except Exception as e:
 98 |         print(Fore.RED + f"Cannot gather subdomains due to error. See journal for details" + Style.RESET_ALL)
 99 |         logging.error(f'SUBDOMAINS GATHERING: ERROR. REASON: {e}')
100 |         pass
101 |         return ['No subdomains were found'], 0
102 | 
103 | def sm_gather(url):
104 |     social_domains = {
105 |         'Facebook':       ('facebook.com',),
106 |         'Twitter':        ('twitter.com',),
107 |         'Instagram':      ('instagram.com',),
108 |         'Telegram':       ('t.me',),
109 |         'TikTok':         ('tiktok.com',),
110 |         'LinkedIn':       ('linkedin.com',),
111 |         'VKontakte':      ('vk.com',),
112 |         'YouTube':        ('youtube.com', 'youtu.be'),
113 |         'Odnoklassniki':  ('ok.ru',),
114 |         'WeChat':         ('wechat.com',),
115 |         'X.com':          ('x.com',),
116 |     }
117 | 
118 |     categorized_links = {name: [] for name in social_domains.keys()}
119 |     parsed_input = urlparse(url)
120 |     host_input = (parsed_input.hostname or parsed_input.netloc or '').lower()
121 | 
122 |     if host_input.startswith('www.'):
123 |         host_input = host_input[4:]
124 | 
125 |     for name, domains in social_domains.items():
126 |         if any(host_input == d or host_input.endswith('.' + d) for d in domains):
127 |             categorized_links[name].append(unquote(url))
128 |             break
129 | 
130 |     response = requests.get(url, timeout=10)
131 |     response.raise_for_status()
132 |     soup = BeautifulSoup(response.text, 'html.parser')
133 |     for a in soup.find_all('a', href=True):
134 |         href = a['href']
135 |         parsed = urlparse(href)
136 |         host = parsed.hostname or parsed.netloc
137 |         if not host:
138 |             continue
139 | 
140 |         host = host.lower()
141 |         if host.startswith('www.'):
142 |             host = host[4:]
143 | 
144 |         for name, domains in social_domains.items():
145 |             if any(host == d or host.endswith('.' + d) for d in domains):
146 |                 categorized_links[name].append(unquote(href))
147 |                 break
148 | 
149 |     for name, links in categorized_links.items():
150 |         if not links:
151 |             links.append(f'{name} links were not found')
152 | 
153 |     return categorized_links
154 | 
155 | def domains_reverse_research(subdomains, report_file_type):
156 |     subdomain_urls = []
157 |     subdomain_mails = []
158 |     subdomain_socials = []
159 |     subdomain_ip = []
160 | 
161 |     try:
162 |         for subdomain in subdomains:
163 |             subdomain_url = "http://" + subdomain + "/"
164 |             subdomain_urls.append(subdomain_url)
165 |     except Exception as e:
166 |         print(Fore.RED + "Some URL seems unreachable! DPULSE will continue to work, but the URL causing the error won't be included in report. See journal for details" + Style.RESET_ALL)
167 |         logging.error(f'SUBDOMAINS URL FORMING: ERROR. REASON: {e}')
168 |         pass
169 | 
170 |     try:
171 |         for subdomain in subdomains:
172 |             subdomains_ip = ip_gather(subdomain)
173 |             subdomain_ip.append(subdomains_ip)
174 |             subdomain_ip = list(set(subdomain_ip))
175 |     except Exception as e:
176 |         print(Fore.RED + "Some URL seems unreachable! DPULSE will continue to work, but the URL causing the error won't be included in report. See journal for details" + Style.RESET_ALL)
177 |         logging.error(f'SUBDOMAINS IP GATHERING: ERROR. REASON: {e}')
178 |         pass
179 | 
180 |     try:
181 |         for subdomain_url in subdomain_urls:
182 |             subdomain_mail = subdomains_mail_gather(subdomain_url)
183 |             subdomain_mails.append(subdomain_mail)
184 |             subdomain_social = sm_gather(subdomain_url)
185 |             subdomain_socials.append(subdomain_social)
186 |     except Exception as e:
187 |         print(Fore.RED + "Some URL seems unreachable! DPULSE will continue to work, but the URL causing the error won't be included in report. See journal for details" + Style.RESET_ALL)
188 |         logging.error(f'SUBDOMAINS MAIL/SOCIALS GATHERING: ERROR. REASON: {e}')
189 |         pass
190 | 
191 |     subdomain_mails = [sublist for sublist in subdomain_mails if sublist]
192 |     subdomain_mails = [sublist for sublist in subdomain_mails if sublist != [None]]
193 |     subdomain_mails = list(map(''.join, subdomain_mails))
194 |     subdomain_socials = [{k: v for k, v in d.items() if v} for d in subdomain_socials]
195 |     subdomain_socials = [d for d in subdomain_socials if d]
196 |     subdomain_socials_grouped = defaultdict(list)
197 | 
198 |     for d in subdomain_socials:
199 |         for key, value in d.items():
200 |             subdomain_socials_grouped[key].extend(value)
201 | 
202 |     subdomain_socials_grouped = list(dict(subdomain_socials_grouped).values())
203 | 
204 |     sd_socials = {'Facebook': [], 'Twitter': [], 'Instagram': [], 'Telegram': [], 'TikTok': [], 'LinkedIn': [],
205 |                   'VKontakte': [], 'YouTube': [], 'Odnoklassniki': [], 'WeChat': [], 'X.com': []}
206 | 
207 |     for inner_list in subdomain_socials_grouped:
208 |         for link in inner_list:
209 |             hostname = urlparse(link).hostname
210 |             if hostname and (hostname == 'facebook.com' or hostname.endswith('.facebook.com')):
211 |                 sd_socials['Facebook'].append(urllib.parse.unquote(link))
212 |             elif hostname and (hostname == 'twitter.com' or hostname.endswith('.twitter.com')):
213 |                 sd_socials['Twitter'].append(urllib.parse.unquote(link))
214 |             elif hostname and (hostname == 'instagram.com' or hostname.endswith('.instagram.com')):
215 |                 sd_socials['Instagram'].append(urllib.parse.unquote(link))
216 |             elif hostname and (hostname == 't.me' or hostname.endswith('.t.me')):
217 |                 sd_socials['Telegram'].append(urllib.parse.unquote(link))
218 |             elif hostname and (hostname == 'tiktok.com' or hostname.endswith('.tiktok.com')):
219 |                 sd_socials['TikTok'].append(urllib.parse.unquote(link))
220 |             elif hostname and (hostname == 'linkedin.com' or hostname.endswith('.linkedin.com')):
221 |                 sd_socials['LinkedIn'].append(urllib.parse.unquote(link))
222 |             elif hostname and (hostname == 'vk.com' or hostname.endswith('.vk.com')):
223 |                 sd_socials['VKontakte'].append(urllib.parse.unquote(link))
224 |             elif hostname and (hostname == 'youtube.com' or hostname.endswith('.youtube.com')):
225 |                 sd_socials['YouTube'].append(urllib.parse.unquote(link))
226 |             elif hostname and (hostname == 'wechat.com' or hostname.endswith('.wechat.com')):
227 |                 sd_socials['WeChat'].append(urllib.parse.unquote(link))
228 |             elif hostname and (hostname == 'ok.ru' or hostname.endswith('.ok.ru')):
229 |                 sd_socials['Odnoklassniki'].append(urllib.parse.unquote(link))
230 |             elif hostname and (hostname == 'x.com' or hostname.endswith('.x.com')):
231 |                 sd_socials['Odnoklassniki'].append(urllib.parse.unquote(link))
232 | 
233 |     sd_socials = {k: list(set(v)) for k, v in sd_socials.items()}
234 | 
235 |     if not subdomain_mails:
236 |         subdomain_mails = ['No subdomains mails were found']
237 |     if not subdomain_ip:
238 |         subdomain_ip = ["No subdomains IP's were found"]
239 | 
240 |     if report_file_type == 'html':
241 |         return subdomain_mails, sd_socials, subdomain_ip
242 |     elif report_file_type == 'xlsx':
243 |         return subdomain_urls, subdomain_mails, subdomain_ip, sd_socials
244 | 


--------------------------------------------------------------------------------
/pagesearch/pagesearch_parsers.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import re
  3 | import sys
  4 | import fitz
  5 | import requests
  6 | 
  7 | from bs4 import BeautifulSoup
  8 | from typing import List, Tuple
  9 | from colorama import Fore, Style
 10 | 
 11 | sys.path.append('service')
 12 | from logs_processing import logging
 13 | from cli_init import print_ps_cli_report
 14 | 
 15 | ansi_re = re.compile(r'\x1b\[[0-9;]*[mK]')
 16 | 
 17 | def make_recorder(storage: List[str]):
 18 |     def _rec(*parts, sep=" ", end="\n"):
 19 |         msg = sep.join(str(p) for p in parts) + end
 20 |         print(msg, end="")
 21 |         storage.append(ansi_re.sub("", msg))
 22 |     return _rec
 23 | 
 24 | def extract_text_from_pdf(filename: str) -> str:
 25 |     try:
 26 |         logging.info('TEXT EXTRACTION FROM PDF (PAGESEARCH): OK')
 27 |         doc = fitz.open(filename=filename)
 28 |         text = ""
 29 |         for page in doc:
 30 |             text += page.get_text()
 31 |         return text
 32 |     except Exception as e:
 33 |         print(Fore.RED + "Can't open some PDF file. See journal for details" + Style.RESET_ALL)
 34 |         logging.error(f'TEXT EXTRACTION FROM PDF (PAGESEARCH): ERROR. REASON: {e}')
 35 |         return ""
 36 | 
 37 | def find_keywords_in_pdfs(ps_docs_path, keywords: List[str]) -> Tuple[dict, int]:
 38 |     try:
 39 |         logging.info('KEYWORDS SEARCH IN PDF (PAGESEARCH): OK')
 40 |         pdf_files = [f for f in os.listdir(ps_docs_path) if f.lower().endswith(".pdf")]
 41 |         results, pdf_with_keywords = {}, 0
 42 |         for pdf_file in pdf_files:
 43 |             pdf_path = os.path.join(ps_docs_path, pdf_file)
 44 |             extracted_text = extract_text_from_pdf(pdf_path)
 45 |             for keyword in keywords:
 46 |                 if keyword.lower() in extracted_text.lower():
 47 |                     if pdf_file not in results:
 48 |                         results[pdf_file] = []
 49 |                     results[pdf_file].append(keyword)
 50 |                     pdf_with_keywords += 1
 51 |         return results, pdf_with_keywords
 52 |     except Exception as e:
 53 |         print(Fore.RED + "Can't find keywords. See journal for details" + Style.RESET_ALL)
 54 |         logging.error(f'KEYWORDS SEARCH IN PDF (PAGESEARCH): ERROR. REASON: {e}')
 55 |         return {}, 0
 56 | 
 57 | def clean_bad_pdfs(ps_docs_path):
 58 |     pdf_files = [f for f in os.listdir(ps_docs_path) if f.lower().endswith(".pdf")]
 59 |     for pdf_file in pdf_files:
 60 |         try:
 61 |             fitz.open(filename=os.path.join(ps_docs_path, pdf_file))
 62 |         except Exception:
 63 |             os.remove(os.path.join(ps_docs_path, pdf_file))
 64 | 
 65 | def subdomains_parser(subdomains_list, report_folder, keywords, keywords_flag):
 66 |     report_lines: List[str] = []
 67 |     p = make_recorder(report_lines)
 68 |     #print(Fore.GREEN + "Conducting PageSearch. Please, be patient, it may take a long time\n" + Style.RESET_ALL)
 69 |     ps_docs_path = os.path.join(report_folder, 'ps_documents')
 70 |     if not os.path.exists(ps_docs_path):
 71 |         os.makedirs(ps_docs_path)
 72 | 
 73 |     email_pattern = r'[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}'
 74 |     total_emails, keywords_messages_list = [], []
 75 |     accessible_subdomains = files_counter = website_elements_counter = 0
 76 |     exposed_passwords_counter = api_keys_counter = cookies_counter = 0
 77 |     tried_subdomains_counter = 0
 78 | 
 79 |     for url in subdomains_list:
 80 |         try:
 81 |             logging.info('ACCESSING SUBDOMAIN (PAGESEARCH): OK')
 82 |             response = requests.get('http://' + url)
 83 |             tried_subdomains_counter += 1
 84 |             if response.status_code == 200:
 85 |                 accessible_subdomains += 1
 86 |                 soup = BeautifulSoup(response.content, 'html.parser')
 87 |             else:
 88 |                 continue
 89 |         except Exception as e:
 90 |             print(Fore.RED + "Can't access some subdomain. See journal for details" + Style.RESET_ALL)
 91 |             logging.error(f'ACCESSING SUBDOMAIN (PAGESEARCH): ERROR. REASON: {e}')
 92 |             continue
 93 | 
 94 |         try:
 95 |             logging.info('WEB RESOURCE ADDITIONAL INFO GATHERING (PAGESEARCH): OK')
 96 |             title = soup.title.string if soup.title else "No title"
 97 |             emails = re.findall(email_pattern, soup.text)
 98 |             total_emails.append(emails)
 99 |             if not emails:
100 |                 emails = ['None']
101 |             hidden_inputs = soup.find_all(type='hidden')
102 |             search_query_input = soup.find('input', {'name': 'q'})
103 |             customization_input = soup.find('input', {'name': 'language'})
104 |             passwords = soup.find_all('input', {'type': 'password'})
105 |             p(Fore.LIGHTGREEN_EX + "-------------------------------------------------" + Style.RESET_ALL)
106 |             p(Fore.GREEN + "Page number: " + Fore.LIGHTCYAN_EX + Style.BRIGHT + f"{tried_subdomains_counter}/{len(subdomains_list)}" + Style.RESET_ALL)
107 |             p(Fore.GREEN + "Page URL: " + Fore.LIGHTCYAN_EX + Style.BRIGHT + f"{url}" + Style.RESET_ALL)
108 |             p(Fore.GREEN + "Page title: " + Fore.LIGHTCYAN_EX + Style.BRIGHT + f"{title}" + Style.RESET_ALL)
109 |             p(Fore.GREEN + "Found e-mails: " + Fore.LIGHTCYAN_EX + Style.BRIGHT + f"{', '.join(emails)}" + Style.RESET_ALL)
110 | 
111 |             if customization_input and customization_input.get('value'):
112 |                 p(Fore.GREEN + "Found site customization setting: " + Fore.LIGHTCYAN_EX + Style.BRIGHT + f"{customization_input.get('value')}" + Style.RESET_ALL)
113 |                 website_elements_counter += 1
114 |             if search_query_input and search_query_input.get('value'):
115 |                 p(Fore.GREEN + "Found search query: " + Fore.LIGHTCYAN_EX + Style.BRIGHT + f"{search_query_input.get('value')}" + Style.RESET_ALL)
116 |                 website_elements_counter += 1
117 |             for hidden_input in hidden_inputs:
118 |                 if hidden_input and hidden_input.get('value'):
119 |                     p(Fore.GREEN + "Found hidden form data: " + Fore.LIGHTCYAN_EX + Style.BRIGHT + f"{hidden_input.get('value')}" + Style.RESET_ALL)
120 |                     website_elements_counter += 1
121 |             for password in passwords:
122 |                 if password and password.get('value'):
123 |                     p(Fore.GREEN + "Found exposed password: " + Fore.LIGHTCYAN_EX + Style.BRIGHT + f"{password.get('value')}" + Style.RESET_ALL)
124 |                     exposed_passwords_counter += 1
125 |             api_keys = soup.find_all('input', attrs={'type': 'apikey'})
126 |             for key in api_keys:
127 |                 key_value = key.get('value')
128 |                 p(Fore.GREEN + f"Found API Key: " + Fore.LIGHTCYAN_EX + Style.BRIGHT + f"{key_value}" + Style.RESET_ALL)
129 |                 api_keys_counter += 1
130 | 
131 |             cookies_dict = response.cookies
132 |             for cookie_name, cookie_value in cookies_dict.items():
133 |                 p(Fore.GREEN + "Found cookie: " + Fore.LIGHTCYAN_EX + Style.BRIGHT + f"{cookie_name}. " + Style.RESET_ALL + Fore.GREEN + "Value: " + Fore.LIGHTCYAN_EX + Style.BRIGHT + f"{cookie_value}" + Style.RESET_ALL)
134 |                 cookies_counter += 1
135 |         except Exception as e:
136 |             print(Fore.RED + "Error while getting detailed info on web resource. See journal for details" + Style.RESET_ALL)
137 |             logging.error(f'WEB RESOURCE ADDITIONAL INFO GATHERING (PAGESEARCH): ERROR. REASON: {e}')
138 | 
139 |         try:
140 |             logging.info('FILES EXTRACTION (PAGESEARCH): OK')
141 |             links = soup.find_all('a')
142 |             for link in links:
143 |                 href = link.get('href')
144 |                 if href and href.lower().endswith(('.docx', '.xlsx', '.csv', '.pdf', '.pptx', '.doc', '.ppt', '.xls', '.rtf', '.conf', '.config', '.db', '.sql', '.json', '.txt')):
145 |                     document_url = 'http://' + url + href
146 |                     p(Fore.GREEN + "Found document: " + Fore.LIGHTCYAN_EX + Style.BRIGHT + f"{document_url}" + Style.RESET_ALL)
147 |                     response_doc = requests.get(document_url)
148 |                     file_extensions = {
149 |                         '.docx': 'extracted_{}.docx',
150 |                         '.xlsx': 'extracted_{}.xlsx',
151 |                         '.pdf': 'extracted_{}.pdf',
152 |                         '.csv': 'extracted_{}.csv',
153 |                         '.pptx': 'extracted_{}.pptx',
154 |                         '.doc': 'extracted_{}.doc',
155 |                         '.ppt': 'extracted_{}.ppt',
156 |                         '.xls': 'extracted_{}.xls',
157 |                         '.json': 'extracted_{}.json',
158 |                         '.txt': 'extracted_{}.txt',
159 |                         '.sql': 'extracted_{}.sql',
160 |                         '.db': 'extracted_{}.db',
161 |                         '.config': 'extracted_{}.config',
162 |                         '.conf': 'extracted_{}.conf'
163 |                     }
164 |                     if response_doc.status_code == 200:
165 |                         file_extension = os.path.splitext(href.lower())[1]
166 |                         if file_extension in file_extensions:
167 |                             filename = os.path.basename(href)
168 |                             extracted_path = os.path.join(ps_docs_path, file_extensions[file_extension].format(os.path.splitext(filename)[0]))
169 |                             with open(extracted_path, 'wb') as file:
170 |                                 file.write(response_doc.content)
171 |                             files_counter += 1
172 |                             p(Fore.GREEN + "File was successfully saved" + Style.RESET_ALL)
173 |         except Exception as e:
174 |             print(Fore.RED + "This file can't be accessed to extract it. See journal for details" + Style.RESET_ALL)
175 |             logging.error(f'FILES EXTRACTION (PAGESEARCH): ERROR. REASON: {e}')
176 | 
177 |     p(Fore.LIGHTGREEN_EX + "-------------------------------------------------" + Style.RESET_ALL)
178 |     ps_emails_list = [x for x in total_emails if x]
179 |     ps_emails_return = [', '.join(sublist) for sublist in ps_emails_list]
180 | 
181 |     clean_bad_pdfs(ps_docs_path)
182 | 
183 |     pdf_with_keywords = 0
184 |     if keywords_flag == 1:
185 |         print(Fore.GREEN + "Searching keywords in PDF files..." + Style.RESET_ALL)
186 |         pdf_results, pdf_with_keywords = find_keywords_in_pdfs(ps_docs_path, keywords)
187 |         for pdf_file, found_keywords in pdf_results.items():
188 |             p(Fore.GREEN + f"Keywords " + Fore.LIGHTCYAN_EX + Style.BRIGHT + f"{', '.join(found_keywords)}" + Style.RESET_ALL + Fore.GREEN + f" found in '{pdf_file}'" + Style.RESET_ALL)
189 |             keywords_messages_list.append(f"Keywords {', '.join(found_keywords)} found in '{pdf_file}'")
190 | 
191 |     print_ps_cli_report(subdomains_list, accessible_subdomains, ps_emails_return, files_counter, cookies_counter, api_keys_counter, website_elements_counter, exposed_passwords_counter)
192 | 
193 |     if keywords_flag == 0:
194 |         print(Fore.RED + "[+] Keywords were not gathered because of None user input" + Style.RESET_ALL)
195 |         keywords_messages_list = ['No keywords were found because of None user input']
196 |     else:
197 |         print(Fore.GREEN + f"[+] Total {pdf_with_keywords} keywords were found in PDF files" + Style.RESET_ALL)
198 |     p(Fore.LIGHTGREEN_EX + "-------------------------------------------------" + Style.RESET_ALL)
199 | 
200 |     data_tuple = (
201 |         ps_emails_return,
202 |         accessible_subdomains,
203 |         len(ps_emails_return),
204 |         files_counter,
205 |         cookies_counter,
206 |         api_keys_counter,
207 |         website_elements_counter,
208 |         exposed_passwords_counter,
209 |         keywords_messages_list
210 |     )
211 | 
212 |     exclude = ("Conducting PageSearch", "Searching keywords", "Keywords were not gathered", "Total ")
213 |     pagesearch_query = "\n".join(line for line in report_lines if not line.startswith(exclude))
214 |     return data_tuple, pagesearch_query
215 | 


--------------------------------------------------------------------------------
/datagather_modules/data_assembler.py:
--------------------------------------------------------------------------------
  1 | import sys
  2 | from datetime import datetime
  3 | import os
  4 | from colorama import Fore, Style
  5 | from urllib.parse import urlparse
  6 | 
  7 | sys.path.extend(['service', 'pagesearch', 'dorking', 'snapshotting'])
  8 | 
  9 | from logs_processing import logging
 10 | from config_processing import read_config
 11 | from db_creator import get_dorking_query
 12 | import crawl_processor as cp
 13 | import dorking_handler as dp
 14 | import networking_processor as np
 15 | from pagesearch_parsers import subdomains_parser
 16 | from api_virustotal import api_virustotal_check
 17 | from api_securitytrails import api_securitytrails_check
 18 | from api_hudsonrock import api_hudsonrock_check
 19 | from screen_snapshotting import take_screenshot
 20 | from html_snapshotting import save_page_as_html
 21 | from archive_snapshotting import download_snapshot
 22 | 
 23 | 
 24 | SOCIAL_KEYS = [
 25 |     'Facebook',
 26 |     'Twitter',
 27 |     'Instagram',
 28 |     'Telegram',
 29 |     'TikTok',
 30 |     'LinkedIn',
 31 |     'VKontakte',
 32 |     'YouTube',
 33 |     'Odnoklassniki',
 34 |     'WeChat',
 35 |     'X.com',
 36 | ]
 37 | 
 38 | def make_socials_dict(with_not_found: bool = False):
 39 |     if with_not_found:
 40 |         return {name: [f'{name} links were not found'] for name in SOCIAL_KEYS}
 41 |     return {name: [] for name in SOCIAL_KEYS}
 42 | 
 43 | def ensure_list(value):
 44 |     if isinstance(value, list):
 45 |         return value
 46 |     if value is None:
 47 |         return []
 48 |     return [value]
 49 | 
 50 | def is_real_url(value: str) -> bool:
 51 |     if not isinstance(value, str):
 52 |         return False
 53 |     parsed = urlparse(value)
 54 |     return parsed.scheme in ('http', 'https') and bool(parsed.netloc)
 55 | 
 56 | 
 57 | def establishing_dork_db_connection(dorking_flag):
 58 |     dorking_db_paths = {
 59 |         'basic': 'dorking//basic_dorking.db',
 60 |         'iot': 'dorking//iot_dorking.db',
 61 |         'files': 'dorking//files_dorking.db',
 62 |         'admins': 'dorking//adminpanels_dorking.db',
 63 |         'web': 'dorking//webstructure_dorking.db',
 64 |     }
 65 |     dorking_tables = {
 66 |         'basic': 'basic_dorks',
 67 |         'iot': 'iot_dorks',
 68 |         'files': 'files_dorks',
 69 |         'admins': 'admins_dorks',
 70 |         'web': 'web_dorks',
 71 |     }
 72 |     if dorking_flag in dorking_db_paths:
 73 |         dorking_db_path = dorking_db_paths[dorking_flag]
 74 |         table = dorking_tables[dorking_flag]
 75 |     elif dorking_flag.startswith('custom'):
 76 |         lst = dorking_flag.split('+')
 77 |         dorking_db_name = lst[1]
 78 |         dorking_db_path = 'dorking//' + dorking_db_name
 79 |         table = 'dorks'
 80 |     else:
 81 |         raise ValueError(f"Invalid dorking flag: {dorking_flag}")
 82 |     return dorking_db_path, table
 83 | 
 84 | 
 85 | class DataProcessing():
 86 |     def report_preprocessing(self, short_domain, report_file_type):
 87 |         report_ctime = datetime.now().strftime('%d-%m-%Y, %H:%M:%S')
 88 |         files_ctime = datetime.now().strftime('(%d-%m-%Y, %Hh%Mm%Ss)')
 89 |         files_body = short_domain.replace(".", "") + '_' + files_ctime
 90 |         casename = f"{files_body}.{report_file_type}"
 91 |         foldername = files_body
 92 |         db_casename = short_domain.replace(".", "")
 93 |         now = datetime.now()
 94 |         db_creation_date = str(now.year) + str(now.month) + str(now.day)
 95 |         report_folder = f"report_{foldername}"
 96 |         robots_filepath = os.path.join(report_folder, '01-robots.txt')
 97 |         sitemap_filepath = os.path.join(report_folder, '02-sitemap.txt')
 98 |         sitemap_links_filepath = os.path.join(report_folder, '03-sitemap_links.txt')
 99 |         os.makedirs(report_folder, exist_ok=True)
100 |         return (casename, db_casename, db_creation_date, robots_filepath,
101 |                 sitemap_filepath, sitemap_links_filepath, report_file_type,
102 |                 report_folder, files_ctime, report_ctime)
103 | 
104 |     def data_gathering(self, short_domain, url, report_file_type, pagesearch_flag,
105 |                        keywords, keywords_flag, dorking_flag, used_api_flag,
106 |                        snapshotting_flag, username, from_date, end_date):
107 | 
108 |         (casename, db_casename, db_creation_date, robots_filepath,
109 |          sitemap_filepath, sitemap_links_filepath, report_file_type,
110 |          report_folder, ctime, report_ctime) = self.report_preprocessing(short_domain, report_file_type)
111 | 
112 |         logging.info(f'### THIS LOG PART FOR {casename} CASE, TIME: {ctime} STARTS HERE')
113 |         print(Fore.LIGHTMAGENTA_EX + "\n[STARTED BASIC DOMAIN SCAN]" + Style.RESET_ALL)
114 |         print(Fore.GREEN + "[1/11] Getting domain IP address" + Style.RESET_ALL)
115 |         ip = cp.ip_gather(short_domain)
116 |         print(Fore.GREEN + '[2/11] Gathering WHOIS information' + Style.RESET_ALL)
117 |         res = cp.whois_gather(short_domain)
118 |         print(Fore.GREEN + '[3/11] Processing e-mails gathering' + Style.RESET_ALL)
119 |         mails = cp.contact_mail_gather(url)
120 |         print(Fore.GREEN + '[4/11] Processing subdomain gathering' + Style.RESET_ALL)
121 |         subdomains, subdomains_amount = cp.subdomains_gather(url, short_domain)
122 |         print(Fore.GREEN + '[5/11] Processing social medias gathering' + Style.RESET_ALL)
123 |         try:
124 |             social_medias = cp.sm_gather(url)
125 |         except Exception as e:
126 |             print(Fore.RED + "Social medias were not gathered because of error" + Style.RESET_ALL)
127 |             logging.exception("Error during social medias gathering")
128 |             social_medias = make_socials_dict(with_not_found=True)
129 | 
130 |         print(Fore.GREEN + '[6/11] Processing subdomain analysis' + Style.RESET_ALL)
131 |         if report_file_type == 'xlsx':
132 |             subdomain_urls, subdomain_mails, subdomain_ip, sd_socials = cp.domains_reverse_research(
133 |                 subdomains, report_file_type
134 |             )
135 |         elif report_file_type == 'html':
136 |             subdomain_mails, sd_socials, subdomain_ip = cp.domains_reverse_research(
137 |                 subdomains, report_file_type
138 |             )
139 |         else:
140 |             subdomain_urls = []
141 |             subdomain_mails = []
142 |             subdomain_ip = []
143 |             sd_socials = make_socials_dict()
144 | 
145 |         print(Fore.GREEN + '[7/11] Processing SSL certificate gathering' + Style.RESET_ALL)
146 |         issuer, subject, notBefore, notAfter, commonName, serialNumber = np.get_ssl_certificate(short_domain)
147 | 
148 |         print(Fore.GREEN + '[8/11] Processing DNS records gathering' + Style.RESET_ALL)
149 |         mx_records = np.get_dns_info(short_domain, report_file_type)
150 | 
151 |         print(Fore.GREEN + '[9/11] Extracting robots.txt and sitemap.xml' + Style.RESET_ALL)
152 |         robots_txt_result = np.get_robots_txt(short_domain, robots_filepath)
153 |         sitemap_xml_result = np.get_sitemap_xml(short_domain, sitemap_filepath)
154 |         try:
155 |             sitemap_links_status = np.extract_links_from_sitemap(sitemap_links_filepath, sitemap_filepath)
156 |         except Exception:
157 |             sitemap_links_status = 'Sitemap links were not parsed'
158 | 
159 |         print(Fore.GREEN + '[10/11] Gathering info about website technologies' + Style.RESET_ALL)
160 |         (web_servers, cms, programming_languages,
161 |          web_frameworks, analytics, javascript_frameworks) = np.get_technologies(url)
162 | 
163 |         print(Fore.GREEN + '[11/11] Processing Shodan InternetDB search' + Style.RESET_ALL)
164 |         ports, hostnames, cpes, tags, vulns = np.query_internetdb(ip, report_file_type)
165 | 
166 |         if not isinstance(social_medias, dict):
167 |             logging.warning(f'social_medias is {type(social_medias)}, expected dict; replacing with empty socials dict')
168 |             social_medias = make_socials_dict()
169 | 
170 |         if not isinstance(sd_socials, dict):
171 |             logging.warning(f'sd_socials is {type(sd_socials)}, expected dict; replacing with empty socials dict')
172 |             sd_socials = make_socials_dict()
173 | 
174 |         all_social_keys = set(SOCIAL_KEYS) | set(social_medias.keys()) | set(sd_socials.keys())
175 | 
176 |         common_socials_raw = {}
177 |         for key in all_social_keys:
178 |             main_vals = ensure_list(social_medias.get(key, []))
179 |             sd_vals = ensure_list(sd_socials.get(key, []))
180 |             common_socials_raw[key] = main_vals + sd_vals
181 | 
182 |         common_socials = {}
183 |         total_socials = 0
184 | 
185 |         for key, values in common_socials_raw.items():
186 |             seen = set()
187 |             deduped = []
188 |             for v in values:
189 |                 if v not in seen:
190 |                     seen.add(v)
191 |                     deduped.append(v)
192 | 
193 |             real_links = [v for v in deduped if is_real_url(v)]
194 | 
195 |             if real_links:
196 |                 common_socials[key] = real_links
197 |                 total_socials += len(real_links)
198 |             else:
199 |                 common_socials[key] = [f'{key} links were not found']
200 | 
201 |         total_ports = len(ports)
202 |         total_ips = len(subdomain_ip) + 1
203 |         total_vulns = len(vulns)
204 | 
205 |         print(Fore.LIGHTMAGENTA_EX + "[ENDED BASIC DOMAIN SCAN]\n" + Style.RESET_ALL)
206 | 
207 |         if report_file_type == 'html':
208 |             if pagesearch_flag.lower() == 'y':
209 |                 if subdomains and subdomains[0] != 'No subdomains were found':
210 |                     to_search_array = [subdomains, social_medias, sd_socials]
211 |                     print(Fore.LIGHTMAGENTA_EX + "[STARTED EXTENDED DOMAIN SCAN WITH PAGESEARCH]" + Style.RESET_ALL)
212 |                     (
213 |                         ps_emails_return,
214 |                         accessible_subdomains,
215 |                         emails_amount,
216 |                         files_counter,
217 |                         cookies_counter,
218 |                         api_keys_counter,
219 |                         website_elements_counter,
220 |                         exposed_passwords_counter,
221 |                         keywords_messages_list
222 |                     ), ps_string = subdomains_parser(
223 |                         to_search_array[0], report_folder, keywords, keywords_flag
224 |                     )
225 |                     total_links_counter = accessed_links_counter = "No results because PageSearch does not gather these categories"
226 |                     if len(keywords_messages_list) == 0:
227 |                         keywords_messages_list = ['No keywords were found']
228 |                     print(Fore.LIGHTMAGENTA_EX + "[ENDED EXTENDED DOMAIN SCAN WITH PAGESEARCH]\n" + Style.RESET_ALL)
229 |                 else:
230 |                     print(Fore.RED + "Cant start PageSearch because no subdomains were detected\n")
231 |                     ps_emails_return = ""
232 |                     accessible_subdomains = files_counter = cookies_counter = api_keys_counter = \
233 |                         website_elements_counter = exposed_passwords_counter = total_links_counter = \
234 |                         accessed_links_counter = emails_amount = 'No results because no subdomains were found'
235 |                     ps_string = 'No PageSearch listing provided because no subdomains were found'
236 |                     keywords_messages_list = ['No data was gathered because no subdomains were found']
237 |             elif pagesearch_flag.lower() == 'n':
238 |                 accessible_subdomains = files_counter = cookies_counter = api_keys_counter = \
239 |                     website_elements_counter = exposed_passwords_counter = total_links_counter = \
240 |                     accessed_links_counter = emails_amount = keywords_messages_list = \
241 |                     "No results because user did not selected PageSearch for this scan"
242 |                 ps_emails_return = ""
243 |                 ps_string = 'No PageSearch listing provided because user did not selected PageSearch mode for this scan'
244 | 
245 |             if dorking_flag == 'n':
246 |                 dorking_status = 'Google Dorking mode was not selected for this scan'
247 |                 dorking_file_path = 'Google Dorking mode was not selected for this scan'
248 |             else:
249 |                 dorking_db_path, table = establishing_dork_db_connection(dorking_flag.lower())
250 |                 print(Fore.LIGHTMAGENTA_EX + f"[STARTED EXTENDED DOMAIN SCAN WITH {dorking_flag.upper()} DORKING TABLE]" + Style.RESET_ALL)
251 |                 dorking_status, dorking_file_path = dp.save_results_to_txt(
252 |                     report_folder, table, get_dorking_query(short_domain, dorking_db_path, table)
253 |                 )
254 |                 print(Fore.LIGHTMAGENTA_EX + f"[ENDED EXTENDED DOMAIN SCAN WITH {dorking_flag.upper()} DORKING TABLE]\n" + Style.RESET_ALL)
255 | 
256 |             api_scan_db = []
257 |             if used_api_flag != ['Empty']:
258 |                 print(Fore.LIGHTMAGENTA_EX + f"[STARTED EXTENDED DOMAIN SCAN WITH 3RD PARTY API]" + Style.RESET_ALL)
259 |                 if '1' in used_api_flag:
260 |                     virustotal_output = api_virustotal_check(short_domain)
261 |                     api_scan_db.append('VirusTotal')
262 |                 else:
263 |                     virustotal_output = 'No results because user did not selected VirusTotal API scan'
264 | 
265 |                 if '2' in used_api_flag:
266 |                     securitytrails_output = api_securitytrails_check(short_domain)
267 |                     api_scan_db.append('SecurityTrails')
268 |                 else:
269 |                     securitytrails_output = 'No results because user did not selected SecurityTrails API scan'
270 | 
271 |                 if '3' in used_api_flag:
272 |                     if username is None or (isinstance(username, str) and username.lower() == 'n'):
273 |                         username = None
274 |                     hudsonrock_output = api_hudsonrock_check(short_domain, ip, mails, username)
275 |                     api_scan_db.append('HudsonRock')
276 |                 else:
277 |                     hudsonrock_output = 'No results because user did not selected HudsonRock API scan'
278 | 
279 |                 print(Fore.LIGHTMAGENTA_EX + f"[ENDED EXTENDED DOMAIN SCAN WITH 3RD PARTY API]\n" + Style.RESET_ALL)
280 |             else:
281 |                 virustotal_output = 'No results because user did not selected VirusTotal API scan'
282 |                 securitytrails_output = 'No results because user did not selected SecurityTrails API scan'
283 |                 hudsonrock_output = 'No results because user did not selected HudsonRock API scan'
284 |                 api_scan_db.append('No')
285 | 
286 |             if snapshotting_flag.lower() in ['s', 'p', 'w']:
287 |                 config_values = read_config()
288 |                 installed_browser = config_values['installed_browser']
289 |                 print(Fore.LIGHTMAGENTA_EX + f"[STARTED DOMAIN SNAPSHOTTING]" + Style.RESET_ALL)
290 |                 if snapshotting_flag.lower() == 's':
291 |                     take_screenshot(installed_browser, url, report_folder + '//screensnapshot.png')
292 |                 elif snapshotting_flag.lower() == 'p':
293 |                     save_page_as_html(url, report_folder + '//domain_html_copy.html')
294 |                 elif snapshotting_flag.lower() == 'w':
295 |                     download_snapshot(short_domain, from_date, end_date, report_folder)
296 |                 print(Fore.LIGHTMAGENTA_EX + f"[ENDED DOMAIN SNAPSHOTTING]\n" + Style.RESET_ALL)
297 | 
298 |             data_array = [
299 |                 ip, res, mails, subdomains, subdomains_amount, social_medias,
300 |                 subdomain_mails, sd_socials, subdomain_ip, issuer, subject,
301 |                 notBefore, notAfter, commonName, serialNumber, mx_records,
302 |                 robots_txt_result, sitemap_xml_result, sitemap_links_status,
303 |                 web_servers, cms, programming_languages, web_frameworks,
304 |                 analytics, javascript_frameworks, ports, hostnames, cpes, tags,
305 |                 vulns, common_socials, total_socials, ps_emails_return,
306 |                 accessible_subdomains, emails_amount, files_counter,
307 |                 cookies_counter, api_keys_counter, website_elements_counter,
308 |                 exposed_passwords_counter, total_links_counter,
309 |                 accessed_links_counter, keywords_messages_list, dorking_status,
310 |                 dorking_file_path, virustotal_output, securitytrails_output,
311 |                 hudsonrock_output, ps_string, total_ports, total_ips, total_vulns
312 |             ]
313 | 
314 |         report_info_array = [
315 |             casename, db_casename, db_creation_date, report_folder,
316 |             ctime, report_file_type, report_ctime, api_scan_db, used_api_flag
317 |         ]
318 |         logging.info(f'### THIS LOG PART FOR {casename} CASE, TIME: {ctime} ENDS HERE')
319 |         return data_array, report_info_array
320 | 


--------------------------------------------------------------------------------