├── .env
├── .gitignore
├── Dockerfile
├── docker-compose.yml
├── license.txt
├── modules
├── .DS_Store
├── date_extractor.py
├── link_analyzer.py
├── profile_extractor.py
├── proxy.py
└── rpuc.py
├── readme.md
├── requirements.txt
└── run.py
/.env:
--------------------------------------------------------------------------------
1 | # URL du fichier JSON contenant les données des sites
2 | WMN_JSON_URL=https://raw.githubusercontent.com/degun-osint/WhatsMyName/main/wmn-data.json
3 | PROXY_URL=http://127.0.0.1:8000/proxy
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | /.DS_Store
2 | /.venv
3 | /.vscode
4 | /data
5 | /results
6 | modules/__pycache__/*
7 | modules/.DS_Store
8 | modules/.DS_Store
9 |
--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
1 | # Use Python 3.11 slim as base image
2 | FROM python:3.12-slim
3 |
4 | # Set working directory
5 | WORKDIR /app
6 |
7 | # Install system dependencies
8 | RUN apt-get update && apt-get install -y --no-install-recommends \
9 | gcc \
10 | python3-dev \
11 | && rm -rf /var/lib/apt/lists/*
12 |
13 | # Copy requirements first for better layer caching
14 | COPY requirements.txt .
15 | RUN pip install --no-cache-dir -r requirements.txt
16 |
17 | # Create necessary directories
18 | RUN mkdir -p /app/data /app/results /app/modules
19 |
20 | # Copy application files
21 | COPY run.py .
22 | COPY modules/proxy.py modules/
23 | COPY modules/rpuc.py modules/
24 | COPY modules/date_extractor.py modules/
25 | COPY modules/link_analyzer.py modules/
26 | COPY modules/profile_extractor.py modules/
27 |
28 | # Make scripts executable
29 | RUN chmod +x run.py
30 | RUN chmod +x modules/proxy.py
31 | RUN chmod +x modules/rpuc.py
32 |
33 | # Set environment variables
34 | ENV PYTHONUNBUFFERED=1
35 | ENV WMN_JSON_URL=https://raw.githubusercontent.com/degun-osint/WhatsMyName/main/wmn-data.json
36 | ENV PROXY_URL=http://127.0.0.1:8000/proxy
37 |
38 | # Create a volume for persistent data
39 | VOLUME ["/app/data", "/app/results"]
40 |
41 | # Run application
42 | CMD ["python", "run.py"]
--------------------------------------------------------------------------------
/docker-compose.yml:
--------------------------------------------------------------------------------
1 | services:
2 | rhino-user-checker:
3 | build:
4 | context: .
5 | dockerfile: Dockerfile
6 | container_name: rhino-user-checker
7 | volumes:
8 | - ./data:/app/data
9 | - ./results:/app/results
10 | stdin_open: true # Keep STDIN open even if not attached
11 | tty: true # Allocate a pseudo-TTY
12 | restart: "no" # Don't restart automatically
--------------------------------------------------------------------------------
/license.txt:
--------------------------------------------------------------------------------
1 | GNU GENERAL PUBLIC LICENSE
2 | Version 3, 29 June 2007
3 |
4 | RhinoUserChecker (RPUC) - OSINT Username Checking Tool
5 | Copyright (C) 2024 DEGUN
6 |
7 | This program is free software: you can redistribute it and/or modify
8 | it under the terms of the GNU General Public License as published by
9 | the Free Software Foundation, either version 3 of the License, or
10 | (at your option) any later version.
11 |
12 | This program is distributed in the hope that it will be useful,
13 | but WITHOUT ANY WARRANTY; without even the implied warranty of
14 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 | GNU General Public License for more details.
16 |
17 | You should have received a copy of the GNU General Public License
18 | along with this program. If not, see .
19 |
20 | For the full license text, please visit:
21 | https://www.gnu.org/licenses/gpl-3.0.txt
--------------------------------------------------------------------------------
/modules/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/degun-osint/RhinoUserChecker/3f7ca0cea917314b64c59e9923611ac7f05ac0f2/modules/.DS_Store
--------------------------------------------------------------------------------
/modules/date_extractor.py:
--------------------------------------------------------------------------------
1 | import re
2 | from datetime import datetime
3 | from typing import Optional
4 |
5 | def extract_profile_date(html_content: str, metadata: dict, site_name: str = "") -> Optional[str]:
6 | """
7 | Extraire la date de création du profil à partir du contenu HTML ou des métadonnées.
8 |
9 | Args:
10 | html_content (str): Le contenu HTML de la page
11 | metadata (dict): Les métadonnées extraites du profil
12 | site_name (str): Le nom du site pour appliquer des règles spécifiques
13 |
14 | Returns:
15 | Optional[str]: La date de création formatée, ou None si aucune date n'est trouvée
16 | """
17 | # Exclure certains sites ou patterns spécifiques
18 | if site_name.lower() == "behance" and "created_on" in html_content:
19 | return None
20 |
21 | # Vérifier si le contenu provient d'une balise link rel
22 | has_link_rel_date = " str:
88 | """
89 | Tenter de normaliser le format de date pour un affichage cohérent.
90 | Cette fonction est simple et peut être améliorée pour gérer plus de formats.
91 |
92 | Args:
93 | date_str (str): La chaîne de date extraite
94 |
95 | Returns:
96 | str: La date normalisée, ou la chaîne originale si impossible à normaliser
97 | """
98 | # Pour l'instant, simplement nettoyer la chaîne
99 | date_str = date_str.strip()
100 |
101 | # Supprimer les virgules pour simplifier
102 | date_str = date_str.replace(',', '')
103 |
104 | # Pour une implémentation plus robuste, on pourrait tenter de parser la date
105 | # avec datetime.strptime() et la reformater selon un format standard
106 |
107 | return date_str
--------------------------------------------------------------------------------
/modules/link_analyzer.py:
--------------------------------------------------------------------------------
1 | from bs4 import BeautifulSoup
2 | from urllib.parse import urljoin, urlparse
3 | import re
4 | from typing import List, Dict, Set
5 |
6 | class LinkAnalyzer:
7 | # Known social media domains
8 | SOCIAL_DOMAINS = {
9 | 'twitter.com', 'facebook.com', 'linkedin.com', 'instagram.com',
10 | 'github.com', 'gitlab.com', 'bitbucket.org', 'youtube.com',
11 | 'medium.com', 'dev.to', 'behance.net', 'dribbble.com',
12 | 'stackoverflow.com', 't.me', 'mastodon.social'
13 | }
14 |
15 | # Areas to avoid (navigation, footer, etc.)
16 | EXCLUDE_CONTAINERS = {
17 | 'footer',
18 | 'nav',
19 | 'navigation',
20 | 'navbar',
21 | 'menu',
22 | 'sidebar',
23 | 'header',
24 | 'topbar',
25 | 'bottombar',
26 | 'copyright',
27 | 'legal'
28 | }
29 |
30 | # Areas of interest (profile, bio, etc.)
31 | PROFILE_CONTAINERS = {
32 | 'profile',
33 | 'bio',
34 | 'about',
35 | 'user-info',
36 | 'userinfo',
37 | 'user-profile',
38 | 'userprofile',
39 | 'profile-info',
40 | 'description',
41 | 'user-description',
42 | 'user-details',
43 | 'personal-info',
44 | 'account-info'
45 | }
46 |
47 | EXCLUDE_KEYWORDS = {
48 | # System and legal pages
49 | 'privacy', 'legal', 'terms', 'policy', 'cookie',
50 | 'about', 'contact', 'help', 'support',
51 | 'documentation', 'docs', 'guidelines',
52 | 'static', 'api', 'enterprise', 'showcase', 'policie',
53 | 'advertising', 'welcome',
54 |
55 | # Marketing and sharing
56 | 'share', 'sharer', 'sharing', 'newsletter',
57 | 'subscribe', 'subscription', 'marketing',
58 |
59 | # Authentication and account
60 | 'login', 'signin', 'signup', 'register',
61 | 'authentication', 'password', 'forgot',
62 |
63 | # Commerce
64 | 'shop', 'store', 'pricing', 'payment',
65 | 'checkout', 'cart', 'billing',
66 |
67 | # Miscellaneous
68 | 'sitemap', 'search', 'tag', 'category',
69 | 'feed', 'rss', 'download', 'uploads',
70 | 'status', 'stats', 'analytics', 'envato', 'placeit'
71 | }
72 |
73 | def __init__(self, html_content: str, base_url: str):
74 | self.soup = BeautifulSoup(html_content, 'html.parser')
75 | self.base_url = base_url
76 | parsed_base = urlparse(base_url)
77 | self.base_domain = parsed_base.netloc.lower()
78 |
79 | # Extract the main domain name
80 | domain_parts = self.base_domain.split('.')
81 | if domain_parts[0] == 'www':
82 | domain_parts = domain_parts[1:-1] # Remove www and tld
83 | else:
84 | domain_parts = domain_parts[1:-1] if len(domain_parts) > 2 else domain_parts[:-1] # Remove tld and subdomain if present
85 |
86 | self.domain_name = '.'.join(domain_parts) # For cases with multiple subdomains, keep all
87 |
88 | def _should_exclude_link(self, url: str) -> bool:
89 | """Check if a link should be excluded from results."""
90 | url_lower = url.lower()
91 |
92 | # If domain name appears anywhere in the URL, exclude it
93 | if self.domain_name in url_lower:
94 | return True
95 |
96 | # If URL contains an excluded keyword
97 | if any(keyword.lower() in url_lower for keyword in self.EXCLUDE_KEYWORDS):
98 | return True
99 |
100 | return False
101 |
102 | def _is_in_excluded_container(self, element) -> bool:
103 | """Check if element is in an excluded container.
104 | Partial matching is used, so 'footer' will match 'global-footer', 'footer-wrapper', etc."""
105 | for parent in element.parents:
106 | # Check IDs
107 | if parent.get('id'):
108 | parent_id = parent.get('id').lower()
109 | if any(exc in parent_id or parent_id in exc for exc in self.EXCLUDE_CONTAINERS):
110 | return True
111 |
112 | # Check classes
113 | if parent.get('class'):
114 | parent_classes = ' '.join(parent.get('class')).lower()
115 | if any(exc in parent_classes for exc in self.EXCLUDE_CONTAINERS):
116 | return True
117 |
118 | # Check tag names (exact match as these are standard HTML tags)
119 | if parent.name and parent.name.lower() in self.EXCLUDE_CONTAINERS:
120 | return True
121 |
122 | return False
123 |
124 | def _is_in_profile_container(self, element) -> bool:
125 | """Check if element is in a profile container."""
126 | for parent in element.parents:
127 | # Check IDs
128 | if parent.get('id') and any(prof in parent.get('id').lower() for prof in self.PROFILE_CONTAINERS):
129 | return True
130 | # Check classes
131 | if parent.get('class'):
132 | if any(prof in ' '.join(parent.get('class')).lower() for prof in self.PROFILE_CONTAINERS):
133 | return True
134 | return False
135 |
136 | def _is_valid_external_link(self, url: str) -> bool:
137 | """Check if a link is a valid external link."""
138 | try:
139 | parsed = urlparse(url)
140 | domain = parsed.netloc.lower()
141 |
142 | # Ignore empty links or links to the same domain
143 | if not domain or domain == self.base_domain:
144 | return False
145 |
146 | # If it's a link to a known social media profile, keep it
147 | social_profile_indicators = ['/user/', '/users/', '/profile/', '@', '/u/', '/channel/']
148 | if any(social_domain in domain for social_domain in self.SOCIAL_DOMAINS):
149 | if any(indicator in url.lower() for indicator in social_profile_indicators):
150 | # But still check if source domain name isn't present
151 | return not self._should_exclude_link(url)
152 |
153 | # Exclude based on defined criteria
154 | if self._should_exclude_link(url):
155 | return False
156 |
157 | # Check for URLs that look like user profiles
158 | user_profile_patterns = [
159 | r'/[~@][\w-]+/?$',
160 | r'/users?/[\w-]+/?$',
161 | r'/profiles?/[\w-]+/?$',
162 | r'/members?/[\w-]+/?$',
163 | r'/channel/[\w-]+/?$',
164 | r'/commissions/[\w-]+/?$'
165 | ]
166 |
167 | if any(re.search(pattern, url) for pattern in user_profile_patterns):
168 | return True
169 |
170 | return True # If we get here, the link has passed all filters
171 |
172 | except Exception:
173 | return False
174 |
175 | def analyze(self) -> List[str]:
176 | """Analyze HTML to find relevant external links."""
177 | links = set()
178 | for a_tag in self.soup.find_all('a', href=True):
179 | href = a_tag['href']
180 | if href.startswith(('http://', 'https://')):
181 | full_url = href
182 | else:
183 | full_url = urljoin(self.base_url, href)
184 |
185 | if self._is_valid_external_link(full_url):
186 | links.add(full_url)
187 |
188 | # Clean and normalize URLs
189 | cleaned_links = []
190 | for link in links:
191 | # Remove common tracking parameters
192 | cleaned_url = re.sub(r'\?.*$', '', link)
193 | # Remove trailing slash
194 | cleaned_url = re.sub(r'/$', '', cleaned_url)
195 | cleaned_links.append(cleaned_url)
196 |
197 | return sorted(list(set(cleaned_links))) # Remove duplicates and sort
198 |
199 | def analyze_links(html_content: str, base_url: str) -> List[str]:
200 | """
201 | Utility function to analyze links on a page.
202 |
203 | Args:
204 | html_content (str): The HTML content to analyze
205 | base_url (str): The base URL for resolving relative links
206 |
207 | Returns:
208 | List[str]: List of external links found
209 | """
210 | analyzer = LinkAnalyzer(html_content, base_url)
211 | return analyzer.analyze()
--------------------------------------------------------------------------------
/modules/profile_extractor.py:
--------------------------------------------------------------------------------
1 | from bs4 import BeautifulSoup
2 | from typing import Dict, Set, List
3 | from urllib.parse import urlparse
4 | import json
5 | import re
6 |
7 | class ProfileExtractor:
8 | # Profile related containers and classes
9 | PROFILE_CONTAINERS = {
10 | # Common profile containers
11 | 'profile',
12 | 'bio',
13 | 'about',
14 | 'description',
15 | 'user-info',
16 | 'user-profile',
17 | 'userprofile',
18 | 'user-bio',
19 | 'userbio',
20 | 'author-info',
21 | 'author-bio',
22 | 'biography',
23 |
24 | # Social media specific
25 | 'profile-header',
26 | 'profile-card',
27 | 'profile-info',
28 | 'profile-details',
29 | 'user-details',
30 | 'personal-info',
31 | 'account-info',
32 |
33 | # Content descriptions
34 | 'user-description',
35 | 'creator-info',
36 | 'artist-info',
37 | 'member-info'
38 | }
39 |
40 | # Common metadata fields that might contain profile information
41 | METADATA_FIELDS = {
42 | 'description',
43 | 'og:description',
44 | 'profile:username',
45 | 'profile:first_name',
46 | 'profile:last_name',
47 | 'author',
48 | 'twitter:description',
49 | 'article:author',
50 | 'profile:gender',
51 | 'profile:location'
52 | }
53 |
54 | # Common UI elements to ignore
55 | UI_ELEMENTS = {
56 | 'menu', 'navigation', 'nav', 'search', 'button',
57 | 'dialog', 'modal', 'popup', 'tooltip', 'dropdown',
58 | 'tab', 'menu-item', 'sidebar', 'widget', 'footer'
59 | }
60 |
61 | # Content to exclude (similar to link analyzer)
62 | EXCLUDE_CONTAINERS = {
63 | 'footer',
64 | 'header',
65 | 'nav',
66 | 'navigation',
67 | 'menu',
68 | 'sidebar',
69 | 'copyright',
70 | 'legal',
71 | 'advertisement',
72 | 'cookie',
73 | 'popup',
74 | 'stats',
75 | 'style',
76 | 'script'
77 | }
78 |
79 | def __init__(self, html_content: str, base_url: str):
80 | """Initialize the ProfileExtractor."""
81 | self.soup = BeautifulSoup(html_content, 'html.parser')
82 | self.base_url = base_url
83 |
84 | # Extract domain name for filtering
85 | parsed_base = urlparse(base_url)
86 | self.base_domain = parsed_base.netloc.lower()
87 | domain_parts = self.base_domain.split('.')
88 | if domain_parts[0] == 'www':
89 | domain_parts = domain_parts[1:-1]
90 | else:
91 | domain_parts = domain_parts[1:-1] if len(domain_parts) > 2 else domain_parts[:-1]
92 | self.domain_name = '.'.join(domain_parts)
93 |
94 | def _clean_text(self, text: str) -> str:
95 | """Clean and normalize text."""
96 | # Remove multiple spaces and newlines
97 | text = ' '.join(text.split())
98 | # Remove common UI text patterns
99 | text = re.sub(r'(Follow|Message|Subscribe|Share|Like|Comment|Post|View|Open|Close|Toggle|Click|Tap)\s*', '', text, flags=re.IGNORECASE)
100 | return text.strip()
101 |
102 | def _is_meaningful_text(self, text: str) -> bool:
103 | """Check if text contains meaningful information."""
104 | # Minimum length check
105 | if len(text) < 3:
106 | return False
107 |
108 | # Check if text is just a single common word
109 | common_words = {'menu', 'home', 'about', 'contact', 'search', 'login', 'signup'}
110 | if text.lower() in common_words:
111 | return False
112 |
113 | # Check if text is just numbers
114 | if text.replace(',', '').replace('.', '').isdigit():
115 | return False
116 |
117 | # Check if text is just a common UI element
118 | if text.lower() in self.UI_ELEMENTS:
119 | return False
120 |
121 | return True
122 |
123 | def _is_in_excluded_container(self, element) -> bool:
124 | """Check if element is in a container that should be excluded."""
125 | for parent in element.parents:
126 | # Check IDs
127 | if parent.get('id'):
128 | parent_id = parent.get('id').lower()
129 | if any(exc in parent_id or parent_id in exc for exc in self.EXCLUDE_CONTAINERS):
130 | return True
131 |
132 | # Check classes
133 | if parent.get('class'):
134 | parent_classes = ' '.join(parent.get('class')).lower()
135 | if any(exc in parent_classes for exc in self.EXCLUDE_CONTAINERS):
136 | return True
137 |
138 | # Check tag names
139 | if parent.name and parent.name.lower() in self.EXCLUDE_CONTAINERS:
140 | return True
141 |
142 | return False
143 |
144 | def _extract_from_metadata(self) -> Dict[str, str]:
145 | """Extract profile information from metadata tags."""
146 | metadata = {}
147 |
148 | # Extract from standard meta tags
149 | for meta in self.soup.find_all('meta'):
150 | name = meta.get('name', meta.get('property', '')).lower()
151 | if name in self.METADATA_FIELDS:
152 | content = self._clean_text(meta.get('content', ''))
153 | if content and not self._should_exclude_content(content):
154 | metadata[name] = content
155 |
156 | # Extract from JSON-LD
157 | for script in self.soup.find_all('script', type='application/ld+json'):
158 | try:
159 | data = json.loads(script.string)
160 | if isinstance(data, dict):
161 | if data.get('@type') in ['Person', 'Profile']:
162 | for key, value in data.items():
163 | if isinstance(value, str):
164 | cleaned_value = self._clean_text(value)
165 | if cleaned_value and not self._should_exclude_content(cleaned_value):
166 | metadata[key] = cleaned_value
167 | except (json.JSONDecodeError, AttributeError):
168 | continue
169 |
170 | return metadata
171 |
172 | def _should_exclude_content(self, text: str) -> bool:
173 | """Check if content should be excluded."""
174 | return self.domain_name.lower() in text.lower()
175 |
176 | def _extract_from_html(self) -> Set[str]:
177 | """Extract profile information from HTML content."""
178 | profile_texts = set()
179 | seen_texts = set()
180 |
181 | # First, get all text elements
182 | for element in self.soup.find_all(text=True):
183 | # Check if element is inside excluded container like footer FIRST
184 | if self._is_in_excluded_container(element):
185 | continue # Skip this element and all its content
186 |
187 | # Only then check if it's in a profile container
188 | parent_element = element.parent
189 | if any(ptn in str(parent_element.get('class', [])).lower() or
190 | ptn in str(parent_element.get('id', '')).lower()
191 | for ptn in self.PROFILE_CONTAINERS):
192 |
193 | text = self._clean_text(element.string)
194 | if (text and
195 | text not in seen_texts and
196 | len(text) >= 3):
197 |
198 | profile_texts.add(text)
199 | seen_texts.add(text)
200 |
201 | return profile_texts
202 |
203 | def extract(self) -> Dict[str, List[str]]:
204 | """Extract all profile information from the page."""
205 | metadata = self._extract_from_metadata()
206 | content = sorted(list(self._extract_from_html())) # Convert set to sorted list
207 |
208 | return {
209 | 'metadata': metadata,
210 | 'content': content
211 | }
212 |
213 | def extract_profile_info(html_content: str, base_url: str) -> Dict[str, List[str]]:
214 | """Utility function to extract profile information from a page."""
215 | extractor = ProfileExtractor(html_content, base_url)
216 | return extractor.extract()
--------------------------------------------------------------------------------
/modules/proxy.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | # modules/proxy.py
3 |
4 | from fastapi import FastAPI, HTTPException
5 | from fastapi.middleware.cors import CORSMiddleware
6 | import httpx
7 | from urllib.parse import urlparse
8 | import os
9 | from dotenv import load_dotenv
10 |
11 | load_dotenv()
12 |
13 | app = FastAPI()
14 |
15 | # CORS configuration
16 | app.add_middleware(
17 | CORSMiddleware,
18 | allow_origins=["*"],
19 | allow_credentials=True,
20 | allow_methods=["*"],
21 | allow_headers=["*"],
22 | )
23 |
24 | # Domain-specific header configurations
25 | DOMAIN_PATTERNS = {
26 | '.ru': {
27 | 'Accept-Language': 'ru-RU,ru;q=0.9,en-US;q=0.8,en;q=0.7',
28 | 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) Chrome/120.0.0.0 Safari/537.36'
29 | },
30 | '.pl': {
31 | 'Accept-Language': 'pl-PL,pl;q=0.9,en-US;q=0.8,en;q=0.7',
32 | 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) Firefox/120.0'
33 | },
34 | '.jp': {
35 | 'Accept-Language': 'ja-JP,ja;q=0.9,en-US;q=0.8,en;q=0.7',
36 | 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) Safari/605.1.15'
37 | },
38 | '.cn': {
39 | 'Accept-Language': 'zh-CN,zh;q=0.9,en-US;q=0.8,en;q=0.7',
40 | 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) Chrome/120.0.0.0 Safari/537.36'
41 | },
42 | 'behance.net': {
43 | 'Accept-Language': 'en-US,en;q=0.9',
44 | 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) Chrome/120.0.0.0 Safari/537.36',
45 | 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
46 | 'Referer': 'https://www.behance.net/'
47 | },
48 | 'community': {
49 | 'Accept': 'application/activity+json',
50 | 'User-Agent': 'Mozilla/5.0 (compatible; SocialMediaBot/1.0)'
51 | },
52 | 'mastodon': {
53 | 'Accept': 'application/activity+json',
54 | 'User-Agent': 'Mozilla/5.0 (compatible; SocialMediaBot/1.0)'
55 | }
56 | }
57 |
58 | # Default headers
59 | DEFAULT_HEADERS = {
60 | 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) Chrome/120.0.0.0 Safari/537.36',
61 | 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
62 | 'Accept-Language': 'en-US,en;q=0.9',
63 | 'Cache-Control': 'no-cache',
64 | }
65 |
66 | @app.get("/proxy")
67 | async def proxy(url: str):
68 | if not url:
69 | raise HTTPException(status_code=400, detail='URL parameter is required')
70 |
71 | domain = urlparse(url).netloc.replace('www.', '')
72 |
73 | # Build headers
74 | headers = DEFAULT_HEADERS.copy()
75 | for pattern, pattern_headers in DOMAIN_PATTERNS.items():
76 | if pattern in domain:
77 | headers.update(pattern_headers)
78 | break
79 |
80 | try:
81 | async with httpx.AsyncClient(verify=False, timeout=25.0) as client:
82 | # First request without following redirects
83 | response = await client.get(
84 | url,
85 | headers=headers,
86 | follow_redirects=False
87 | )
88 |
89 | initial_status_code = response.status_code
90 |
91 | # If redirect, follow with a new request
92 | if 300 <= initial_status_code < 400:
93 | response = await client.get(
94 | url,
95 | headers=headers,
96 | follow_redirects=True
97 | )
98 |
99 | # Build response
100 | result = {
101 | 'status': {
102 | 'http_code': response.status_code,
103 | 'initial_http_code': initial_status_code,
104 | 'headers': dict(response.headers)
105 | },
106 | 'contents': response.text,
107 | 'url': str(response.url)
108 | }
109 |
110 | # Add redirect history if present
111 | if response.history:
112 | result['status']['redirect_history'] = [
113 | {
114 | 'url': str(r.url),
115 | 'status_code': r.status_code,
116 | 'headers': dict(r.headers)
117 | }
118 | for r in response.history
119 | ]
120 |
121 | return result
122 |
123 | except httpx.RequestError as e:
124 | error_details = {
125 | 'message': str(e),
126 | 'code': type(e).__name__,
127 | 'url': url
128 | }
129 |
130 | if isinstance(e, httpx.TimeoutException):
131 | return {
132 | 'error': error_details,
133 | 'status': {'http_code': 504}
134 | }
135 |
136 | return {
137 | 'error': error_details,
138 | 'status': {'http_code': 500}
139 | }
140 |
141 | if __name__ == "__main__":
142 | import uvicorn
143 | import logging
144 | logging.getLogger("uvicorn.access").setLevel(logging.WARNING)
145 | uvicorn.run(app, host="127.0.0.1", port=8000, log_level="warning")
--------------------------------------------------------------------------------
/modules/rpuc.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | # modules/rpuc.py
3 | import aiohttp
4 | import asyncio
5 | import json
6 | import os
7 | from datetime import datetime
8 | from rich.console import Console
9 | from rich.table import Table
10 | from rich.progress import Progress, BarColumn, TimeRemainingColumn, TextColumn
11 | from rich.live import Live
12 | from jinja2 import Environment, BaseLoader
13 | from urllib.parse import urlparse, quote
14 | import logging
15 | from typing import Dict, List, Optional
16 | from dotenv import load_dotenv
17 | from link_analyzer import analyze_links
18 | from profile_extractor import extract_profile_info
19 | from date_extractor import extract_profile_date, normalize_date
20 | import re
21 |
22 | # Load environment variables
23 | load_dotenv()
24 |
25 | # Logging configuration
26 | logging.basicConfig(
27 | level=logging.INFO,
28 | format='%(asctime)s - %(levelname)s - %(message)s'
29 | )
30 | logger = logging.getLogger(__name__)
31 |
32 | # Configuration
33 | BATCH_SIZE = 50 # Process 50 requests simultaneously
34 | MAX_CONNECTIONS = 200 # Maximum connections for aiohttp
35 | REQUEST_TIMEOUT = 15
36 | DEFAULT_JSON_URL = "https://raw.githubusercontent.com/degun-osint/WhatsMyName/main/wmn-data.json"
37 | JSON_URL = os.getenv('WMN_JSON_URL', DEFAULT_JSON_URL)
38 | BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
39 | DATA_DIR = os.path.join(BASE_DIR, "data")
40 | RESULTS_DIR = os.path.join(BASE_DIR, "results")
41 | PROGRESS_DELAY = 0.01
42 |
43 | os.makedirs(DATA_DIR, exist_ok=True)
44 | os.makedirs(RESULTS_DIR, exist_ok=True)
45 |
46 | PROXY_URL = os.getenv('PROXY_URL', 'http://127.0.0.1:8000/proxy')
47 |
48 | HEADERS = {
49 | 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) Chrome/120.0.0.0 Safari/537.36',
50 | 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
51 | 'Accept-Language': 'en-US,en;q=0.9',
52 | 'Cache-Control': 'no-cache',
53 | }
54 |
55 | class SiteChecker:
56 | def __init__(self):
57 | """Initialize the site checker."""
58 | self.console = Console()
59 | self.sites = []
60 | self.results = []
61 | self.data_dir = DATA_DIR
62 | self.results_dir = RESULTS_DIR
63 |
64 | async def download_sites_data(self):
65 | """Download site data from configured URL."""
66 | local_file = os.path.join(self.data_dir, "wmn-data.json")
67 |
68 | try:
69 | async with aiohttp.ClientSession() as session:
70 | self.console.print(f"[cyan]Downloading data from {JSON_URL}...")
71 | async with session.get(JSON_URL) as response:
72 | if response.status == 200:
73 | data = await response.text()
74 | json_data = json.loads(data)
75 | self.sites = json_data.get('sites', [])
76 | with open(local_file, 'w', encoding='utf-8') as f:
77 | f.write(data)
78 | self.console.print("[green]Data downloaded successfully")
79 | else:
80 | if os.path.exists(local_file):
81 | self.console.print("[yellow]Using local data...")
82 | with open(local_file, 'r', encoding='utf-8') as f:
83 | json_data = json.load(f)
84 | self.sites = json_data.get('sites', [])
85 | else:
86 | raise Exception("Unable to download data and no local data available")
87 | except Exception as e:
88 | if os.path.exists(local_file):
89 | self.console.print("[yellow]Using local data...")
90 | with open(local_file, 'r', encoding='utf-8') as f:
91 | json_data = json.load(f)
92 | self.sites = json_data.get('sites', [])
93 | else:
94 | raise
95 |
96 | def is_date_status(self, status):
97 | """Détermine si le statut contient une date."""
98 | if not isinstance(status, str):
99 | return False
100 |
101 | status_lower = status.lower()
102 |
103 | # Vérifie si "join" ou un nom de mois est présent
104 | months = ['jan', 'feb', 'mar', 'apr', 'may', 'jun', 'jul', 'aug', 'sep', 'oct', 'nov', 'dec']
105 | if 'join' in status_lower or any(month in status_lower for month in months):
106 | return True
107 |
108 | # Vérifie s'il y a au moins un chiffre
109 | if any(c.isdigit() for c in status_lower):
110 | return True
111 |
112 | return False
113 |
114 | async def verify_content(self, content: str, pattern: str, site_name: str) -> bool:
115 | """Check if pattern is present in content."""
116 | if not pattern:
117 | return True
118 | if not isinstance(content, str):
119 | return False
120 |
121 | normalized_content = ' '.join(content.split())
122 | normalized_pattern = ' '.join(pattern.split()).replace('\\"', '"')
123 |
124 | return normalized_pattern.lower() in normalized_content.lower()
125 |
126 | async def check_site(self, site: dict, username: str, session: aiohttp.ClientSession) -> Optional[dict]:
127 | """Check a specific site for a given username."""
128 | original_url = site['uri_check'].replace("{account}", username)
129 | display_url = site.get('uri_pretty', original_url).replace("{account}", username)
130 |
131 | if original_url.startswith('http://'):
132 | original_url = original_url.replace('http://', 'https://')
133 |
134 | try:
135 | # Use proxy
136 | proxy_url = f"{PROXY_URL}?url={quote(original_url)}"
137 | async with session.get(proxy_url, timeout=REQUEST_TIMEOUT) as response:
138 | if response.status != 200:
139 | return None
140 |
141 | json_response = await response.json()
142 | if not json_response or 'status' not in json_response:
143 | return None
144 |
145 | content = json_response.get('contents', '')
146 | status_data = json_response['status']
147 | initial_status = status_data.get('initial_http_code', status_data.get('http_code'))
148 |
149 | # Verify status and patterns
150 | has_miss_string = await self.verify_content(content, site.get('m_string', ''), site['name'])
151 | has_expected_string = await self.verify_content(content, site.get('e_string', ''), site['name'])
152 |
153 | # Cas 1: Si m_string est présent et m_code correspond => Non trouvé
154 | if has_miss_string and initial_status == site['m_code']:
155 | return None
156 |
157 | # Cas 2: Si e_string est présent et e_code correspond => Found
158 | if has_expected_string and initial_status == site['e_code']:
159 | external_links = analyze_links(content, original_url)
160 | profile_info = extract_profile_info(content, original_url)
161 |
162 | # Extraire la date de création du profil
163 | profile_date = None
164 | if profile_info and 'metadata' in profile_info:
165 | profile_date = extract_profile_date(content, profile_info.get('metadata', {}), site_name=site['name'])
166 |
167 | # Déterminer le statut (date de création ou "found")
168 | status = 'found'
169 | if profile_date:
170 | status = normalize_date(profile_date)
171 |
172 | # Vérifier si le contenu provient d'une balise link rel (à ignorer)
173 | if status != 'found' and " Unsure
187 | if has_expected_string and initial_status != site['e_code']:
188 | external_links = analyze_links(content, original_url)
189 | profile_info = extract_profile_info(content, original_url)
190 |
191 | return {
192 | 'name': site['name'],
193 | 'category': site['cat'],
194 | 'url': display_url,
195 | 'status': 'unsure',
196 | 'http_code': initial_status,
197 | 'external_links': external_links,
198 | 'profile_info': profile_info
199 | }
200 |
201 | # Cas 4: Si ni e_string ni m_string ne sont présents => Non trouvé
202 | if not has_expected_string and not has_miss_string:
203 | return None
204 |
205 | # Pour tout autre cas non prévu => Non trouvé
206 | return None
207 |
208 | except Exception as e:
209 | logger.error(f"Error checking {site['name']}: {str(e)}")
210 | return None
211 |
212 | async def process_batch(self, sites: List[dict], username: str) -> List[dict]:
213 | """Process a batch of sites in parallel."""
214 | connector = aiohttp.TCPConnector(limit=50, force_close=True)
215 | async with aiohttp.ClientSession(connector=connector) as session:
216 | tasks = []
217 | for site in sites:
218 | tasks.append(self.check_site(site, username, session))
219 |
220 | results = await asyncio.gather(*tasks, return_exceptions=True)
221 | valid_results = []
222 | for r in results:
223 | if isinstance(r, Exception):
224 | logger.error(f"Error in batch: {str(r)}")
225 | continue
226 | if r is not None:
227 | valid_results.append(r)
228 | return valid_results
229 |
230 | async def check_username(self, username: str):
231 | """Check a username across all sites."""
232 | self.results = []
233 | console = Console()
234 |
235 | with Progress(
236 | TextColumn("{task.description}"),
237 | BarColumn(complete_style="green", finished_style="green"),
238 | TextColumn("[progress.percentage]{task.percentage:>3.0f}%"),
239 | TextColumn("•"),
240 | TimeRemainingColumn(),
241 | console=console,
242 | transient=True,
243 | ) as progress:
244 | main_task = progress.add_task(
245 | f"[cyan]🦏 Searching...",
246 | total=len(self.sites)
247 | )
248 |
249 | found_count = 0
250 | tasks = []
251 |
252 | # Create all batches
253 | for i in range(0, len(self.sites), BATCH_SIZE):
254 | batch = self.sites[i:i + BATCH_SIZE]
255 | tasks.append(self.process_batch(batch, username))
256 |
257 | # Process batches in groups
258 | for i in range(0, len(tasks), 2):
259 | current_tasks = tasks[i:i+2]
260 | batch_results = await asyncio.gather(*current_tasks)
261 |
262 | sites_processed = min(BATCH_SIZE * 2, len(self.sites) - (i * BATCH_SIZE))
263 |
264 | for results in batch_results:
265 | found_in_batch = len(results)
266 | if found_in_batch > 0:
267 | found_count += found_in_batch
268 | for result in results:
269 | console.print(f"[green]✓ Found on {result['name']}[/green]")
270 |
271 | progress.update(
272 | main_task,
273 | advance=sites_processed,
274 | description=f"[cyan]🦏 Searching... ({found_count} found)"
275 | )
276 |
277 | await asyncio.sleep(PROGRESS_DELAY)
278 |
279 | for results in batch_results:
280 | self.results.extend(results)
281 |
282 | def display_results_console(self):
283 | """Display results in console with styling."""
284 | if not self.results:
285 | self.console.print("\n[yellow]No profiles found[/yellow]")
286 | return
287 |
288 | table = Table(title=f"Search Results")
289 |
290 | table.add_column("Site", style="cyan")
291 | table.add_column("Category", style="green")
292 | table.add_column("Status", style="magenta")
293 | table.add_column("URL", style="blue")
294 | table.add_column("External Links", style="yellow")
295 | table.add_column("Profile Info", style="white")
296 |
297 | for result in self.results:
298 | status_style = "green" if result['status'] == 'found' else "yellow" if result['status'] == 'unsure' else "white"
299 |
300 | external_links = result.get('external_links', [])
301 | links_str = ", ".join(external_links) if external_links else "-"
302 |
303 | profile_info = result.get('profile_info', {})
304 | profile_str = ""
305 | if profile_info:
306 | if profile_info.get('metadata'):
307 | profile_str += "Metadata: " + ", ".join(f"{k}: {v}" for k, v in profile_info['metadata'].items())
308 | if profile_info.get('content'):
309 | profile_str += "\nContent: " + ", ".join(profile_info['content'])
310 |
311 | table.add_row(
312 | result['name'],
313 | result['category'],
314 | f"[{status_style}]{result['status']}[/{status_style}]",
315 | result['url'],
316 | links_str,
317 | profile_str or "-"
318 | )
319 | self.console.print(table)
320 |
321 | def export_html(self, output_file: str, username: str = ""):
322 | """Export results to HTML."""
323 | env = Environment(loader=BaseLoader())
324 | template_str = r'''
325 |
326 |
327 |
328 |
329 |
330 | RPUC Results
331 |
332 |
681 |
682 |
683 |
684 |
689 |
690 | {% if results %}
691 |
692 |
693 |
694 |
{{ results|length }}
695 |
Total Profiles
696 |
697 |
698 |
{{ results|selectattr("status", "equalto", "found")|list|length }}
699 |
Confirmed
700 |
701 |
702 |
{{ results|selectattr("status", "equalto", "unsure")|list|length }}
703 |
Possible
704 |
705 |
706 |
{{ results|rejectattr("status", "equalto", "found")|rejectattr("status", "equalto", "unsure")|list|length }}
707 |
With Dates
708 |
709 |
710 |
711 |
712 |
713 | {% for result in results %}
714 |
715 |
732 |
733 |
734 |
740 |
741 | {% if result.external_links %}
742 |
743 |
External Links ({{ result.external_links|length }})
744 |
751 |
752 | {% endif %}
753 |
754 | {% if result.profile_info %}
755 | {% if result.profile_info.metadata %}
756 |
766 | {% endif %}
767 |
768 | {% if result.profile_info.content %}
769 |
770 |
Content
771 |
772 | {% for item in result.profile_info.content %}
773 |
{{ item }}
774 | {% endfor %}
775 |
776 |
777 | {% endif %}
778 | {% endif %}
779 |
780 |
781 | {% endfor %}
782 |
783 | {% else %}
784 |
785 | No profiles found
786 |
787 | {% endif %}
788 |
789 |
790 |
791 | '''
792 |
793 | template = env.from_string(template_str)
794 | html_content = template.render(
795 | results=self.results,
796 | timestamp=datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
797 | username=username,
798 | is_date_status=self.is_date_status # Ajouter la fonction au contexte
799 | )
800 |
801 | output_path = os.path.join(self.results_dir, output_file)
802 |
803 | with open(output_path, 'w', encoding='utf-8') as f:
804 | f.write(html_content)
805 | return output_path
806 |
807 | def export_results_csv(self, output_file: str):
808 | """Export results to CSV format."""
809 | import csv
810 | output_path = os.path.join(self.results_dir, output_file)
811 |
812 | with open(output_path, 'w', newline='', encoding='utf-8') as f:
813 | writer = csv.writer(f)
814 | # Write headers
815 | headers = ['Site', 'Category', 'Status', 'URL', 'External Links', 'Profile Info']
816 | writer.writerow(headers)
817 |
818 | # Write data
819 | for result in self.results:
820 | external_links = '; '.join(result.get('external_links', []))
821 |
822 | # Format profile info
823 | profile_info = result.get('profile_info', {})
824 | profile_str = ''
825 | if profile_info:
826 | if profile_info.get('metadata'):
827 | profile_str += 'Metadata: ' + ', '.join(f"{k}: {v}" for k, v in profile_info['metadata'].items())
828 | if profile_info.get('content'):
829 | profile_str += ' | Content: ' + ', '.join(profile_info['content'])
830 |
831 | row = [
832 | result['name'],
833 | result['category'],
834 | result['status'],
835 | result['url'],
836 | external_links,
837 | profile_str
838 | ]
839 | writer.writerow(row)
840 |
841 | return output_path
842 |
843 | async def main():
844 | try:
845 | checker = SiteChecker()
846 | await checker.download_sites_data()
847 |
848 | username = input("\nEnter username to search: ")
849 |
850 | while True:
851 | if not username.strip():
852 | print("Username cannot be empty")
853 | username = input("\nEnter username to search: ")
854 | continue
855 |
856 | print(f"\nSearching profiles for {username}...")
857 | await checker.check_username(username)
858 |
859 | checker.display_results_console()
860 |
861 | # Ask for export format
862 | while True:
863 | export_choice = input("\nDo you want to export results? (CSV / HTML / BOTH / NO): ").upper()
864 | if export_choice in ['CSV', 'HTML', 'BOTH', 'NO']:
865 | break
866 | print("Invalid choice. Please enter CSV, HTML, BOTH, or NO.")
867 |
868 | if export_choice != 'NO':
869 | timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
870 |
871 | if export_choice in ['HTML', 'BOTH']:
872 | output_file = f"results_{username}_{timestamp}.html"
873 | output_path_html = checker.export_html(output_file, username=username)
874 | print(f"\nHTML results exported to {output_path_html}")
875 |
876 | if export_choice in ['CSV', 'BOTH']:
877 | output_file = f"results_{username}_{timestamp}.csv"
878 | output_path_csv = checker.export_results_csv(output_file)
879 | print(f"CSV results exported to {output_path_csv}")
880 |
881 | # Ask to search another user
882 | username = input("\nSearch another user? (enter alias or ctrl-c to quit): ")
883 | if not username.strip():
884 | break
885 |
886 | except KeyboardInterrupt:
887 | print("\nOperation cancelled by user...")
888 | except asyncio.CancelledError:
889 | print("\nOperation cancelled...")
890 | except Exception as e:
891 | print(f"An error occurred: {str(e)}")
892 | print(f"An error occurred: {str(e)}")
893 |
894 | def run():
895 | try:
896 | asyncio.run(main())
897 | except KeyboardInterrupt:
898 | pass
899 |
900 | if __name__ == "__main__":
901 | run()
--------------------------------------------------------------------------------
/readme.md:
--------------------------------------------------------------------------------
1 | # 🦏 RhinoUserChecker (RPUC)
2 |
3 | ** PLEASE BE AWARE THAT THIS IS NOT A PRODUCTION VERSION AND SHOULD BE USED WITH CAUTION **
4 |
5 | A Python-based OSINT tool that helps you find usernames across multiple platforms and extract profile information. Built on top of the WhatsMyName project's data, RPUC adds advanced profile extraction and external link analysis capabilities.
6 |
7 | ## 🌟 Features
8 |
9 | - **Multi-platform Search**: Search for usernames across hundreds of social media platforms and websites thanks to WhatMyName JSON file
10 | - **Profile Information Extraction**: Automatically extract user profile information, bios, and metadata
11 | - **Profile creation date**: Attempt to find account creation date
12 | - **External Link Analysis**: Discover related profiles through external link analysis
13 | - **Smart Rate Limiting**: Built-in proxy support and smart rate limiting to avoid blocking
14 | - **Rich Console Output**: Real-time progress tracking and beautiful console output using Rich
15 | - **HTML or CSV Report Generation**: Generate detailed HTML or CSV reports with all findings
16 | - **International Platform Support**: Special handling for international platforms (Russian, Chinese, Japanese, etc.)
17 |
18 | ## Discussion
19 |
20 | You can join the OSCAR ZULU discord server to discuss about this tool : https://discord.gg/4REgJzn4NG
21 |
22 | ## 📋 Requirements
23 |
24 | ```text
25 | Python 3.8+
26 | See requirements.txt for full dependencies
27 | ```
28 |
29 | ## 🚀 Installation
30 |
31 | 1. Clone the repository:
32 | ```bash
33 | git clone https://github.com/degun-osint/RhinoUserChecker
34 | cd RhinoUserChecker
35 | ```
36 |
37 | 2. Create a virtual environment and activate it:
38 | ```bash
39 | python -m venv venv
40 | source venv/bin/activate # On Windows: venv\Scripts\activate
41 | ```
42 |
43 | 3. Install the required packages:
44 | ```bash
45 | pip install -r requirements.txt
46 | ```
47 |
48 | ## ⚙️ Configuration
49 |
50 | RPUC uses environment variables for configuration. Create a `.env` based on .env-sample file in the root directory with:
51 |
52 | ```env
53 | WMN_JSON_URL=https://raw.githubusercontent.com/WebBreacher/WhatsMyName/main/wmn-data.json
54 | PROXY_URL=http://127.0.0.1:8000/proxy
55 | ```
56 | By default, the script uses a forked version of WMN JSON.
57 |
58 | ## 🐳 Docker Installation
59 |
60 | ### Using Docker Compose (recommended)
61 |
62 | 1. Clone the repository:
63 | ```bash
64 | git clone https://github.com/degun-osint/RhinoUserChecker
65 | cd RhinoUserChecker
66 | ```
67 |
68 | 2. Run the application:
69 | ```bash
70 | docker-compose up -d
71 | ```
72 |
73 | 3. Attach to the running container to interact with the application:
74 | ```bash
75 | docker attach rhino-user-checker
76 | ```
77 |
78 | 4. To exit the application, press `Ctrl+C` and then to detach from the container without stopping it, press `Ctrl+P` followed by `Ctrl+Q`
79 |
80 | ### Using Docker directly
81 |
82 | 1. Build the Docker image:
83 | ```bash
84 | docker build -t rhino-user-checker .
85 | ```
86 |
87 | 2. Run the container:
88 | ```bash
89 | docker run -it --name rhino-user-checker -v $(pwd)/data:/app/data -v $(pwd)/results:/app/results rhino-user-checker
90 | ```
91 |
92 | The application creates two directories:
93 | - `./data`: Stores the WhatsMyName database
94 | - `./results`: Stores exported results (HTML and CSV)
95 |
96 | These directories are mounted as volumes to persist data between container runs.
97 |
98 | ### Docker Troubleshooting
99 |
100 | If you encounter any issues with Docker:
101 | 2. Check that the volumes have the correct permissions
102 | 3. If you're having network issues, ensure your Docker container has internet access
103 |
104 | ## 🎮 Usage
105 |
106 | Start the tool by running:
107 |
108 | ```bash
109 | python run.py
110 | ```
111 |
112 | The tool will:
113 | 1. Download the latest site data from WhatsMyName project
114 | 2. Prompt you for a username to search
115 | 3. Search across hundreds of platforms
116 | 4. Generate an HTML or a CSV report with findings
117 |
118 | ## 📊 Output
119 |
120 | RPUC generates two types of output:
121 | - Real-time console output with progress tracking
122 | - Detailed HTML or CSV report containing:
123 | - Found profiles with links
124 | - Status (found = good chance profile exists, unsure = good http [200] code when a 404 was expected if profile does not exists, but can't confirm the profile)
125 | - Extracted profile information
126 | - Discovered external links
127 | - Metadata from profiles
128 |
129 | ## 🏗️ Project Structure
130 |
131 | ```
132 | rpuc/
133 | ├── run.py # Main entry point
134 | ├── modules/
135 | │ ├── proxy.py # Proxy server for rate limiting
136 | │ ├── rpuc.py # Core functionality
137 | │ ├── date_extractor.py # date search
138 | │ ├── link_analyzer.py # External link analysis
139 | │ └── profile_extractor.py # Profile information extraction
140 | ├── data/ # Data storage
141 | └── results/ # Generated reports
142 | ```
143 |
144 | ## 🔧 Advanced Usage
145 |
146 | ### Custom Headers
147 |
148 | RPUC supports custom headers for different domains/regions. Edit the `DOMAIN_PATTERNS` in `proxy.py` to add more patterns.
149 |
150 | ### Proxy Configuration
151 |
152 | By default, RPUC runs its own proxy server for rate limiting. You can configure an external proxy by modifying the `PROXY_URL` in your `.env` file.
153 |
154 | ## 🤝 Contributing
155 |
156 | Contributions are welcome! Please feel free to submit a Pull Request. For major changes, please open an issue first to discuss what you would like to change.
157 |
158 | ## 📜 License
159 |
160 | This project is licensed under the GNU General Public License v3.0 - see the [LICENSE](LICENSE.txt) file for details.
161 |
162 | ## 🙏 Credits
163 |
164 | - Based on the [WhatsMyName Project](https://github.com/WebBreacher/WhatsMyName)
165 | - Built with:
166 | - [FastAPI](https://fastapi.tiangolo.com/)
167 | - [Rich](https://rich.readthedocs.io/)
168 | - [BeautifulSoup4](https://www.crummy.com/software/BeautifulSoup/)
169 | - [aiohttp](https://docs.aiohttp.org/)
170 |
171 | ## ⚠️ Disclaimer
172 |
173 | This tool is for educational purposes only. Be mindful of the platforms' terms of service and use responsibly.
174 |
175 | ## Author
176 |
177 | DEGUN (https://github.com/degun-osint)
178 |
179 |
180 |
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | aiohappyeyeballs==2.4.4
2 | aiohttp==3.11.11
3 | aiosignal==1.3.2
4 | annotated-types==0.7.0
5 | anyio==4.8.0
6 | attrs==25.1.0
7 | beautifulsoup4==4.13.1
8 | certifi==2025.1.31
9 | click==8.1.8
10 | fastapi==0.115.8
11 | frozenlist==1.5.0
12 | h11==0.14.0
13 | httpcore==1.0.7
14 | httpx==0.28.1
15 | idna==3.10
16 | Jinja2==3.1.5
17 | markdown-it-py==3.0.0
18 | MarkupSafe==3.0.2
19 | mdurl==0.1.2
20 | multidict==6.1.0
21 | propcache==0.2.1
22 | psutil==6.1.1
23 | pydantic==2.10.6
24 | pydantic_core==2.27.2
25 | Pygments==2.19.1
26 | python-dotenv==1.0.1
27 | rich==13.9.4
28 | sniffio==1.3.1
29 | soupsieve==2.6
30 | starlette==0.45.3
31 | typing_extensions==4.12.2
32 | uvicorn==0.34.0
33 | yarl==1.18.3
34 |
--------------------------------------------------------------------------------
/run.py:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env python3
2 | # run.py
3 |
4 | import subprocess
5 | import sys
6 | import time
7 | import signal
8 | import os
9 | import psutil
10 | from rich.console import Console
11 |
12 | # Path configuration
13 | BASE_DIR = os.path.dirname(os.path.abspath(__file__))
14 | MODULES_DIR = os.path.join(BASE_DIR, "modules")
15 | PROXY_PATH = os.path.join(MODULES_DIR, "proxy.py")
16 | RPUC_PATH = os.path.join(MODULES_DIR, "rpuc.py")
17 | DATA_DIR = os.path.join(BASE_DIR, "data")
18 | RESULTS_DIR = os.path.join(BASE_DIR, "results")
19 |
20 | # Create necessary directories
21 | os.makedirs(DATA_DIR, exist_ok=True)
22 | os.makedirs(RESULTS_DIR, exist_ok=True)
23 |
24 | console = Console()
25 |
26 | def kill_process_tree(pid):
27 | """Kill a process and all its children."""
28 | try:
29 | parent = psutil.Process(pid)
30 | children = parent.children(recursive=True)
31 | for child in children:
32 | try:
33 | child.kill()
34 | except psutil.NoSuchProcess:
35 | pass
36 | parent.kill()
37 | except psutil.NoSuchProcess:
38 | pass
39 |
40 | def cleanup(proxy_process, main_process):
41 | """Clean up processes on shutdown."""
42 | if main_process:
43 | kill_process_tree(main_process.pid)
44 | if proxy_process:
45 | kill_process_tree(proxy_process.pid)
46 |
47 | def run_proxy():
48 | """Start the proxy server without changing the global directory."""
49 | try:
50 | return subprocess.Popen([sys.executable, PROXY_PATH],
51 | stdout=subprocess.PIPE,
52 | stderr=subprocess.PIPE,
53 | cwd=MODULES_DIR)
54 | except Exception as e:
55 | console.print(f"[red]Error starting proxy: {e}[/red]")
56 | sys.exit(1)
57 |
58 | def run_main():
59 | """Start the main script without changing the global directory."""
60 | try:
61 | return subprocess.Popen([sys.executable, RPUC_PATH],
62 | cwd=MODULES_DIR)
63 | except Exception as e:
64 | console.print(f"[red]Error starting main script: {e}[/red]")
65 | return None
66 |
67 | def print_banner():
68 | banner = r"""
69 | .-----------------------------------------.
70 | ( RHINO USER CHECKER - OSCAR ZULU FOREVER ! )
71 | //\'----------------------------------------'\
72 | / , _.-~~-.__ __.,----.
73 | ('; __( ) ~~~'--..--~~ '.
74 | ( . ""..-' ')| . \ '.
75 | \\. |\'.' ; . ; ; ;
76 | \ \" /9) ' . ; ;
77 | ; ) ) ( ' . ; ' .
78 | ) _ __.-'-._ ; ' . , /\ ;
79 | '-"'--' ; "-. '. ' _.-( ". (
80 | ; \,) )--,..----';' > ; .
81 | \ ( | / ( / . ;
82 | , , ) | ; .( . , ) / \ ;
83 | ,;'PjP;.';-.;._,;/;,;)/;.;.);.;,,;,;,,;/;;,),;.,/,;.).,;
84 |
85 | """
86 | console.print("[yellow]" + banner + "[/yellow]")
87 |
88 | def print_title():
89 | title = "Username, profile info and link scrapper \n"
90 | credits = "Based on Whatsmyname JSON (https://github.com/WebBreacher/WhatsMyName)\n"
91 | console.print("[bold cyan]" + title + "[/bold cyan]")
92 | console.print("[italic dim cyan]" + credits + "[/italic dim cyan]")
93 |
94 | def main():
95 | # Display banner
96 | print_banner()
97 | print_title()
98 |
99 | # Check file existence
100 | if not os.path.exists(PROXY_PATH):
101 | console.print(f"[red]Error: {PROXY_PATH} does not exist[/red]")
102 | sys.exit(1)
103 | if not os.path.exists(RPUC_PATH):
104 | console.print(f"[red]Error: {RPUC_PATH} does not exist[/red]")
105 | sys.exit(1)
106 |
107 | proxy_process = None
108 | main_process = None
109 |
110 | def signal_handler(signum, frame):
111 | console.print("\n[yellow]Stopping processes...[/yellow]")
112 | cleanup(proxy_process, main_process)
113 | sys.exit(0)
114 |
115 | # Signal handling
116 | signal.signal(signal.SIGINT, signal_handler)
117 | signal.signal(signal.SIGTERM, signal_handler)
118 |
119 | try:
120 | # Start proxy
121 | console.print("[cyan]Starting proxy...[/cyan]")
122 | proxy_process = run_proxy()
123 |
124 | # Wait for proxy to be ready
125 | time.sleep(2)
126 |
127 | # Start main script
128 | console.print("[cyan]Starting main script...[/cyan]")
129 | main_process = run_main()
130 |
131 | while True:
132 | if main_process.poll() is not None:
133 | break
134 | time.sleep(0.1)
135 |
136 | except KeyboardInterrupt:
137 | console.print("\n[yellow]Operation cancelled by user...[/yellow]")
138 | except Exception as e:
139 | console.print(f"\n[red]Error: {e}[/red]")
140 | finally:
141 | cleanup(proxy_process, main_process)
142 | console.print("[green]Processes stopped[/green]")
143 |
144 | if __name__ == "__main__":
145 | main()
--------------------------------------------------------------------------------