├── .env ├── .gitignore ├── Dockerfile ├── docker-compose.yml ├── license.txt ├── modules ├── .DS_Store ├── date_extractor.py ├── link_analyzer.py ├── profile_extractor.py ├── proxy.py └── rpuc.py ├── readme.md ├── requirements.txt └── run.py /.env: -------------------------------------------------------------------------------- 1 | # URL du fichier JSON contenant les données des sites 2 | WMN_JSON_URL=https://raw.githubusercontent.com/degun-osint/WhatsMyName/main/wmn-data.json 3 | PROXY_URL=http://127.0.0.1:8000/proxy -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | /.DS_Store 2 | /.venv 3 | /.vscode 4 | /data 5 | /results 6 | modules/__pycache__/* 7 | modules/.DS_Store 8 | modules/.DS_Store 9 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | # Use Python 3.11 slim as base image 2 | FROM python:3.12-slim 3 | 4 | # Set working directory 5 | WORKDIR /app 6 | 7 | # Install system dependencies 8 | RUN apt-get update && apt-get install -y --no-install-recommends \ 9 | gcc \ 10 | python3-dev \ 11 | && rm -rf /var/lib/apt/lists/* 12 | 13 | # Copy requirements first for better layer caching 14 | COPY requirements.txt . 15 | RUN pip install --no-cache-dir -r requirements.txt 16 | 17 | # Create necessary directories 18 | RUN mkdir -p /app/data /app/results /app/modules 19 | 20 | # Copy application files 21 | COPY run.py . 22 | COPY modules/proxy.py modules/ 23 | COPY modules/rpuc.py modules/ 24 | COPY modules/date_extractor.py modules/ 25 | COPY modules/link_analyzer.py modules/ 26 | COPY modules/profile_extractor.py modules/ 27 | 28 | # Make scripts executable 29 | RUN chmod +x run.py 30 | RUN chmod +x modules/proxy.py 31 | RUN chmod +x modules/rpuc.py 32 | 33 | # Set environment variables 34 | ENV PYTHONUNBUFFERED=1 35 | ENV WMN_JSON_URL=https://raw.githubusercontent.com/degun-osint/WhatsMyName/main/wmn-data.json 36 | ENV PROXY_URL=http://127.0.0.1:8000/proxy 37 | 38 | # Create a volume for persistent data 39 | VOLUME ["/app/data", "/app/results"] 40 | 41 | # Run application 42 | CMD ["python", "run.py"] -------------------------------------------------------------------------------- /docker-compose.yml: -------------------------------------------------------------------------------- 1 | services: 2 | rhino-user-checker: 3 | build: 4 | context: . 5 | dockerfile: Dockerfile 6 | container_name: rhino-user-checker 7 | volumes: 8 | - ./data:/app/data 9 | - ./results:/app/results 10 | stdin_open: true # Keep STDIN open even if not attached 11 | tty: true # Allocate a pseudo-TTY 12 | restart: "no" # Don't restart automatically -------------------------------------------------------------------------------- /license.txt: -------------------------------------------------------------------------------- 1 | GNU GENERAL PUBLIC LICENSE 2 | Version 3, 29 June 2007 3 | 4 | RhinoUserChecker (RPUC) - OSINT Username Checking Tool 5 | Copyright (C) 2024 DEGUN 6 | 7 | This program is free software: you can redistribute it and/or modify 8 | it under the terms of the GNU General Public License as published by 9 | the Free Software Foundation, either version 3 of the License, or 10 | (at your option) any later version. 11 | 12 | This program is distributed in the hope that it will be useful, 13 | but WITHOUT ANY WARRANTY; without even the implied warranty of 14 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 | GNU General Public License for more details. 16 | 17 | You should have received a copy of the GNU General Public License 18 | along with this program. If not, see . 19 | 20 | For the full license text, please visit: 21 | https://www.gnu.org/licenses/gpl-3.0.txt -------------------------------------------------------------------------------- /modules/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/degun-osint/RhinoUserChecker/3f7ca0cea917314b64c59e9923611ac7f05ac0f2/modules/.DS_Store -------------------------------------------------------------------------------- /modules/date_extractor.py: -------------------------------------------------------------------------------- 1 | import re 2 | from datetime import datetime 3 | from typing import Optional 4 | 5 | def extract_profile_date(html_content: str, metadata: dict, site_name: str = "") -> Optional[str]: 6 | """ 7 | Extraire la date de création du profil à partir du contenu HTML ou des métadonnées. 8 | 9 | Args: 10 | html_content (str): Le contenu HTML de la page 11 | metadata (dict): Les métadonnées extraites du profil 12 | site_name (str): Le nom du site pour appliquer des règles spécifiques 13 | 14 | Returns: 15 | Optional[str]: La date de création formatée, ou None si aucune date n'est trouvée 16 | """ 17 | # Exclure certains sites ou patterns spécifiques 18 | if site_name.lower() == "behance" and "created_on" in html_content: 19 | return None 20 | 21 | # Vérifier si le contenu provient d'une balise link rel 22 | has_link_rel_date = " str: 88 | """ 89 | Tenter de normaliser le format de date pour un affichage cohérent. 90 | Cette fonction est simple et peut être améliorée pour gérer plus de formats. 91 | 92 | Args: 93 | date_str (str): La chaîne de date extraite 94 | 95 | Returns: 96 | str: La date normalisée, ou la chaîne originale si impossible à normaliser 97 | """ 98 | # Pour l'instant, simplement nettoyer la chaîne 99 | date_str = date_str.strip() 100 | 101 | # Supprimer les virgules pour simplifier 102 | date_str = date_str.replace(',', '') 103 | 104 | # Pour une implémentation plus robuste, on pourrait tenter de parser la date 105 | # avec datetime.strptime() et la reformater selon un format standard 106 | 107 | return date_str -------------------------------------------------------------------------------- /modules/link_analyzer.py: -------------------------------------------------------------------------------- 1 | from bs4 import BeautifulSoup 2 | from urllib.parse import urljoin, urlparse 3 | import re 4 | from typing import List, Dict, Set 5 | 6 | class LinkAnalyzer: 7 | # Known social media domains 8 | SOCIAL_DOMAINS = { 9 | 'twitter.com', 'facebook.com', 'linkedin.com', 'instagram.com', 10 | 'github.com', 'gitlab.com', 'bitbucket.org', 'youtube.com', 11 | 'medium.com', 'dev.to', 'behance.net', 'dribbble.com', 12 | 'stackoverflow.com', 't.me', 'mastodon.social' 13 | } 14 | 15 | # Areas to avoid (navigation, footer, etc.) 16 | EXCLUDE_CONTAINERS = { 17 | 'footer', 18 | 'nav', 19 | 'navigation', 20 | 'navbar', 21 | 'menu', 22 | 'sidebar', 23 | 'header', 24 | 'topbar', 25 | 'bottombar', 26 | 'copyright', 27 | 'legal' 28 | } 29 | 30 | # Areas of interest (profile, bio, etc.) 31 | PROFILE_CONTAINERS = { 32 | 'profile', 33 | 'bio', 34 | 'about', 35 | 'user-info', 36 | 'userinfo', 37 | 'user-profile', 38 | 'userprofile', 39 | 'profile-info', 40 | 'description', 41 | 'user-description', 42 | 'user-details', 43 | 'personal-info', 44 | 'account-info' 45 | } 46 | 47 | EXCLUDE_KEYWORDS = { 48 | # System and legal pages 49 | 'privacy', 'legal', 'terms', 'policy', 'cookie', 50 | 'about', 'contact', 'help', 'support', 51 | 'documentation', 'docs', 'guidelines', 52 | 'static', 'api', 'enterprise', 'showcase', 'policie', 53 | 'advertising', 'welcome', 54 | 55 | # Marketing and sharing 56 | 'share', 'sharer', 'sharing', 'newsletter', 57 | 'subscribe', 'subscription', 'marketing', 58 | 59 | # Authentication and account 60 | 'login', 'signin', 'signup', 'register', 61 | 'authentication', 'password', 'forgot', 62 | 63 | # Commerce 64 | 'shop', 'store', 'pricing', 'payment', 65 | 'checkout', 'cart', 'billing', 66 | 67 | # Miscellaneous 68 | 'sitemap', 'search', 'tag', 'category', 69 | 'feed', 'rss', 'download', 'uploads', 70 | 'status', 'stats', 'analytics', 'envato', 'placeit' 71 | } 72 | 73 | def __init__(self, html_content: str, base_url: str): 74 | self.soup = BeautifulSoup(html_content, 'html.parser') 75 | self.base_url = base_url 76 | parsed_base = urlparse(base_url) 77 | self.base_domain = parsed_base.netloc.lower() 78 | 79 | # Extract the main domain name 80 | domain_parts = self.base_domain.split('.') 81 | if domain_parts[0] == 'www': 82 | domain_parts = domain_parts[1:-1] # Remove www and tld 83 | else: 84 | domain_parts = domain_parts[1:-1] if len(domain_parts) > 2 else domain_parts[:-1] # Remove tld and subdomain if present 85 | 86 | self.domain_name = '.'.join(domain_parts) # For cases with multiple subdomains, keep all 87 | 88 | def _should_exclude_link(self, url: str) -> bool: 89 | """Check if a link should be excluded from results.""" 90 | url_lower = url.lower() 91 | 92 | # If domain name appears anywhere in the URL, exclude it 93 | if self.domain_name in url_lower: 94 | return True 95 | 96 | # If URL contains an excluded keyword 97 | if any(keyword.lower() in url_lower for keyword in self.EXCLUDE_KEYWORDS): 98 | return True 99 | 100 | return False 101 | 102 | def _is_in_excluded_container(self, element) -> bool: 103 | """Check if element is in an excluded container. 104 | Partial matching is used, so 'footer' will match 'global-footer', 'footer-wrapper', etc.""" 105 | for parent in element.parents: 106 | # Check IDs 107 | if parent.get('id'): 108 | parent_id = parent.get('id').lower() 109 | if any(exc in parent_id or parent_id in exc for exc in self.EXCLUDE_CONTAINERS): 110 | return True 111 | 112 | # Check classes 113 | if parent.get('class'): 114 | parent_classes = ' '.join(parent.get('class')).lower() 115 | if any(exc in parent_classes for exc in self.EXCLUDE_CONTAINERS): 116 | return True 117 | 118 | # Check tag names (exact match as these are standard HTML tags) 119 | if parent.name and parent.name.lower() in self.EXCLUDE_CONTAINERS: 120 | return True 121 | 122 | return False 123 | 124 | def _is_in_profile_container(self, element) -> bool: 125 | """Check if element is in a profile container.""" 126 | for parent in element.parents: 127 | # Check IDs 128 | if parent.get('id') and any(prof in parent.get('id').lower() for prof in self.PROFILE_CONTAINERS): 129 | return True 130 | # Check classes 131 | if parent.get('class'): 132 | if any(prof in ' '.join(parent.get('class')).lower() for prof in self.PROFILE_CONTAINERS): 133 | return True 134 | return False 135 | 136 | def _is_valid_external_link(self, url: str) -> bool: 137 | """Check if a link is a valid external link.""" 138 | try: 139 | parsed = urlparse(url) 140 | domain = parsed.netloc.lower() 141 | 142 | # Ignore empty links or links to the same domain 143 | if not domain or domain == self.base_domain: 144 | return False 145 | 146 | # If it's a link to a known social media profile, keep it 147 | social_profile_indicators = ['/user/', '/users/', '/profile/', '@', '/u/', '/channel/'] 148 | if any(social_domain in domain for social_domain in self.SOCIAL_DOMAINS): 149 | if any(indicator in url.lower() for indicator in social_profile_indicators): 150 | # But still check if source domain name isn't present 151 | return not self._should_exclude_link(url) 152 | 153 | # Exclude based on defined criteria 154 | if self._should_exclude_link(url): 155 | return False 156 | 157 | # Check for URLs that look like user profiles 158 | user_profile_patterns = [ 159 | r'/[~@][\w-]+/?$', 160 | r'/users?/[\w-]+/?$', 161 | r'/profiles?/[\w-]+/?$', 162 | r'/members?/[\w-]+/?$', 163 | r'/channel/[\w-]+/?$', 164 | r'/commissions/[\w-]+/?$' 165 | ] 166 | 167 | if any(re.search(pattern, url) for pattern in user_profile_patterns): 168 | return True 169 | 170 | return True # If we get here, the link has passed all filters 171 | 172 | except Exception: 173 | return False 174 | 175 | def analyze(self) -> List[str]: 176 | """Analyze HTML to find relevant external links.""" 177 | links = set() 178 | for a_tag in self.soup.find_all('a', href=True): 179 | href = a_tag['href'] 180 | if href.startswith(('http://', 'https://')): 181 | full_url = href 182 | else: 183 | full_url = urljoin(self.base_url, href) 184 | 185 | if self._is_valid_external_link(full_url): 186 | links.add(full_url) 187 | 188 | # Clean and normalize URLs 189 | cleaned_links = [] 190 | for link in links: 191 | # Remove common tracking parameters 192 | cleaned_url = re.sub(r'\?.*$', '', link) 193 | # Remove trailing slash 194 | cleaned_url = re.sub(r'/$', '', cleaned_url) 195 | cleaned_links.append(cleaned_url) 196 | 197 | return sorted(list(set(cleaned_links))) # Remove duplicates and sort 198 | 199 | def analyze_links(html_content: str, base_url: str) -> List[str]: 200 | """ 201 | Utility function to analyze links on a page. 202 | 203 | Args: 204 | html_content (str): The HTML content to analyze 205 | base_url (str): The base URL for resolving relative links 206 | 207 | Returns: 208 | List[str]: List of external links found 209 | """ 210 | analyzer = LinkAnalyzer(html_content, base_url) 211 | return analyzer.analyze() -------------------------------------------------------------------------------- /modules/profile_extractor.py: -------------------------------------------------------------------------------- 1 | from bs4 import BeautifulSoup 2 | from typing import Dict, Set, List 3 | from urllib.parse import urlparse 4 | import json 5 | import re 6 | 7 | class ProfileExtractor: 8 | # Profile related containers and classes 9 | PROFILE_CONTAINERS = { 10 | # Common profile containers 11 | 'profile', 12 | 'bio', 13 | 'about', 14 | 'description', 15 | 'user-info', 16 | 'user-profile', 17 | 'userprofile', 18 | 'user-bio', 19 | 'userbio', 20 | 'author-info', 21 | 'author-bio', 22 | 'biography', 23 | 24 | # Social media specific 25 | 'profile-header', 26 | 'profile-card', 27 | 'profile-info', 28 | 'profile-details', 29 | 'user-details', 30 | 'personal-info', 31 | 'account-info', 32 | 33 | # Content descriptions 34 | 'user-description', 35 | 'creator-info', 36 | 'artist-info', 37 | 'member-info' 38 | } 39 | 40 | # Common metadata fields that might contain profile information 41 | METADATA_FIELDS = { 42 | 'description', 43 | 'og:description', 44 | 'profile:username', 45 | 'profile:first_name', 46 | 'profile:last_name', 47 | 'author', 48 | 'twitter:description', 49 | 'article:author', 50 | 'profile:gender', 51 | 'profile:location' 52 | } 53 | 54 | # Common UI elements to ignore 55 | UI_ELEMENTS = { 56 | 'menu', 'navigation', 'nav', 'search', 'button', 57 | 'dialog', 'modal', 'popup', 'tooltip', 'dropdown', 58 | 'tab', 'menu-item', 'sidebar', 'widget', 'footer' 59 | } 60 | 61 | # Content to exclude (similar to link analyzer) 62 | EXCLUDE_CONTAINERS = { 63 | 'footer', 64 | 'header', 65 | 'nav', 66 | 'navigation', 67 | 'menu', 68 | 'sidebar', 69 | 'copyright', 70 | 'legal', 71 | 'advertisement', 72 | 'cookie', 73 | 'popup', 74 | 'stats', 75 | 'style', 76 | 'script' 77 | } 78 | 79 | def __init__(self, html_content: str, base_url: str): 80 | """Initialize the ProfileExtractor.""" 81 | self.soup = BeautifulSoup(html_content, 'html.parser') 82 | self.base_url = base_url 83 | 84 | # Extract domain name for filtering 85 | parsed_base = urlparse(base_url) 86 | self.base_domain = parsed_base.netloc.lower() 87 | domain_parts = self.base_domain.split('.') 88 | if domain_parts[0] == 'www': 89 | domain_parts = domain_parts[1:-1] 90 | else: 91 | domain_parts = domain_parts[1:-1] if len(domain_parts) > 2 else domain_parts[:-1] 92 | self.domain_name = '.'.join(domain_parts) 93 | 94 | def _clean_text(self, text: str) -> str: 95 | """Clean and normalize text.""" 96 | # Remove multiple spaces and newlines 97 | text = ' '.join(text.split()) 98 | # Remove common UI text patterns 99 | text = re.sub(r'(Follow|Message|Subscribe|Share|Like|Comment|Post|View|Open|Close|Toggle|Click|Tap)\s*', '', text, flags=re.IGNORECASE) 100 | return text.strip() 101 | 102 | def _is_meaningful_text(self, text: str) -> bool: 103 | """Check if text contains meaningful information.""" 104 | # Minimum length check 105 | if len(text) < 3: 106 | return False 107 | 108 | # Check if text is just a single common word 109 | common_words = {'menu', 'home', 'about', 'contact', 'search', 'login', 'signup'} 110 | if text.lower() in common_words: 111 | return False 112 | 113 | # Check if text is just numbers 114 | if text.replace(',', '').replace('.', '').isdigit(): 115 | return False 116 | 117 | # Check if text is just a common UI element 118 | if text.lower() in self.UI_ELEMENTS: 119 | return False 120 | 121 | return True 122 | 123 | def _is_in_excluded_container(self, element) -> bool: 124 | """Check if element is in a container that should be excluded.""" 125 | for parent in element.parents: 126 | # Check IDs 127 | if parent.get('id'): 128 | parent_id = parent.get('id').lower() 129 | if any(exc in parent_id or parent_id in exc for exc in self.EXCLUDE_CONTAINERS): 130 | return True 131 | 132 | # Check classes 133 | if parent.get('class'): 134 | parent_classes = ' '.join(parent.get('class')).lower() 135 | if any(exc in parent_classes for exc in self.EXCLUDE_CONTAINERS): 136 | return True 137 | 138 | # Check tag names 139 | if parent.name and parent.name.lower() in self.EXCLUDE_CONTAINERS: 140 | return True 141 | 142 | return False 143 | 144 | def _extract_from_metadata(self) -> Dict[str, str]: 145 | """Extract profile information from metadata tags.""" 146 | metadata = {} 147 | 148 | # Extract from standard meta tags 149 | for meta in self.soup.find_all('meta'): 150 | name = meta.get('name', meta.get('property', '')).lower() 151 | if name in self.METADATA_FIELDS: 152 | content = self._clean_text(meta.get('content', '')) 153 | if content and not self._should_exclude_content(content): 154 | metadata[name] = content 155 | 156 | # Extract from JSON-LD 157 | for script in self.soup.find_all('script', type='application/ld+json'): 158 | try: 159 | data = json.loads(script.string) 160 | if isinstance(data, dict): 161 | if data.get('@type') in ['Person', 'Profile']: 162 | for key, value in data.items(): 163 | if isinstance(value, str): 164 | cleaned_value = self._clean_text(value) 165 | if cleaned_value and not self._should_exclude_content(cleaned_value): 166 | metadata[key] = cleaned_value 167 | except (json.JSONDecodeError, AttributeError): 168 | continue 169 | 170 | return metadata 171 | 172 | def _should_exclude_content(self, text: str) -> bool: 173 | """Check if content should be excluded.""" 174 | return self.domain_name.lower() in text.lower() 175 | 176 | def _extract_from_html(self) -> Set[str]: 177 | """Extract profile information from HTML content.""" 178 | profile_texts = set() 179 | seen_texts = set() 180 | 181 | # First, get all text elements 182 | for element in self.soup.find_all(text=True): 183 | # Check if element is inside excluded container like footer FIRST 184 | if self._is_in_excluded_container(element): 185 | continue # Skip this element and all its content 186 | 187 | # Only then check if it's in a profile container 188 | parent_element = element.parent 189 | if any(ptn in str(parent_element.get('class', [])).lower() or 190 | ptn in str(parent_element.get('id', '')).lower() 191 | for ptn in self.PROFILE_CONTAINERS): 192 | 193 | text = self._clean_text(element.string) 194 | if (text and 195 | text not in seen_texts and 196 | len(text) >= 3): 197 | 198 | profile_texts.add(text) 199 | seen_texts.add(text) 200 | 201 | return profile_texts 202 | 203 | def extract(self) -> Dict[str, List[str]]: 204 | """Extract all profile information from the page.""" 205 | metadata = self._extract_from_metadata() 206 | content = sorted(list(self._extract_from_html())) # Convert set to sorted list 207 | 208 | return { 209 | 'metadata': metadata, 210 | 'content': content 211 | } 212 | 213 | def extract_profile_info(html_content: str, base_url: str) -> Dict[str, List[str]]: 214 | """Utility function to extract profile information from a page.""" 215 | extractor = ProfileExtractor(html_content, base_url) 216 | return extractor.extract() -------------------------------------------------------------------------------- /modules/proxy.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # modules/proxy.py 3 | 4 | from fastapi import FastAPI, HTTPException 5 | from fastapi.middleware.cors import CORSMiddleware 6 | import httpx 7 | from urllib.parse import urlparse 8 | import os 9 | from dotenv import load_dotenv 10 | 11 | load_dotenv() 12 | 13 | app = FastAPI() 14 | 15 | # CORS configuration 16 | app.add_middleware( 17 | CORSMiddleware, 18 | allow_origins=["*"], 19 | allow_credentials=True, 20 | allow_methods=["*"], 21 | allow_headers=["*"], 22 | ) 23 | 24 | # Domain-specific header configurations 25 | DOMAIN_PATTERNS = { 26 | '.ru': { 27 | 'Accept-Language': 'ru-RU,ru;q=0.9,en-US;q=0.8,en;q=0.7', 28 | 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) Chrome/120.0.0.0 Safari/537.36' 29 | }, 30 | '.pl': { 31 | 'Accept-Language': 'pl-PL,pl;q=0.9,en-US;q=0.8,en;q=0.7', 32 | 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) Firefox/120.0' 33 | }, 34 | '.jp': { 35 | 'Accept-Language': 'ja-JP,ja;q=0.9,en-US;q=0.8,en;q=0.7', 36 | 'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) Safari/605.1.15' 37 | }, 38 | '.cn': { 39 | 'Accept-Language': 'zh-CN,zh;q=0.9,en-US;q=0.8,en;q=0.7', 40 | 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) Chrome/120.0.0.0 Safari/537.36' 41 | }, 42 | 'behance.net': { 43 | 'Accept-Language': 'en-US,en;q=0.9', 44 | 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) Chrome/120.0.0.0 Safari/537.36', 45 | 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8', 46 | 'Referer': 'https://www.behance.net/' 47 | }, 48 | 'community': { 49 | 'Accept': 'application/activity+json', 50 | 'User-Agent': 'Mozilla/5.0 (compatible; SocialMediaBot/1.0)' 51 | }, 52 | 'mastodon': { 53 | 'Accept': 'application/activity+json', 54 | 'User-Agent': 'Mozilla/5.0 (compatible; SocialMediaBot/1.0)' 55 | } 56 | } 57 | 58 | # Default headers 59 | DEFAULT_HEADERS = { 60 | 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) Chrome/120.0.0.0 Safari/537.36', 61 | 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', 62 | 'Accept-Language': 'en-US,en;q=0.9', 63 | 'Cache-Control': 'no-cache', 64 | } 65 | 66 | @app.get("/proxy") 67 | async def proxy(url: str): 68 | if not url: 69 | raise HTTPException(status_code=400, detail='URL parameter is required') 70 | 71 | domain = urlparse(url).netloc.replace('www.', '') 72 | 73 | # Build headers 74 | headers = DEFAULT_HEADERS.copy() 75 | for pattern, pattern_headers in DOMAIN_PATTERNS.items(): 76 | if pattern in domain: 77 | headers.update(pattern_headers) 78 | break 79 | 80 | try: 81 | async with httpx.AsyncClient(verify=False, timeout=25.0) as client: 82 | # First request without following redirects 83 | response = await client.get( 84 | url, 85 | headers=headers, 86 | follow_redirects=False 87 | ) 88 | 89 | initial_status_code = response.status_code 90 | 91 | # If redirect, follow with a new request 92 | if 300 <= initial_status_code < 400: 93 | response = await client.get( 94 | url, 95 | headers=headers, 96 | follow_redirects=True 97 | ) 98 | 99 | # Build response 100 | result = { 101 | 'status': { 102 | 'http_code': response.status_code, 103 | 'initial_http_code': initial_status_code, 104 | 'headers': dict(response.headers) 105 | }, 106 | 'contents': response.text, 107 | 'url': str(response.url) 108 | } 109 | 110 | # Add redirect history if present 111 | if response.history: 112 | result['status']['redirect_history'] = [ 113 | { 114 | 'url': str(r.url), 115 | 'status_code': r.status_code, 116 | 'headers': dict(r.headers) 117 | } 118 | for r in response.history 119 | ] 120 | 121 | return result 122 | 123 | except httpx.RequestError as e: 124 | error_details = { 125 | 'message': str(e), 126 | 'code': type(e).__name__, 127 | 'url': url 128 | } 129 | 130 | if isinstance(e, httpx.TimeoutException): 131 | return { 132 | 'error': error_details, 133 | 'status': {'http_code': 504} 134 | } 135 | 136 | return { 137 | 'error': error_details, 138 | 'status': {'http_code': 500} 139 | } 140 | 141 | if __name__ == "__main__": 142 | import uvicorn 143 | import logging 144 | logging.getLogger("uvicorn.access").setLevel(logging.WARNING) 145 | uvicorn.run(app, host="127.0.0.1", port=8000, log_level="warning") -------------------------------------------------------------------------------- /modules/rpuc.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # modules/rpuc.py 3 | import aiohttp 4 | import asyncio 5 | import json 6 | import os 7 | from datetime import datetime 8 | from rich.console import Console 9 | from rich.table import Table 10 | from rich.progress import Progress, BarColumn, TimeRemainingColumn, TextColumn 11 | from rich.live import Live 12 | from jinja2 import Environment, BaseLoader 13 | from urllib.parse import urlparse, quote 14 | import logging 15 | from typing import Dict, List, Optional 16 | from dotenv import load_dotenv 17 | from link_analyzer import analyze_links 18 | from profile_extractor import extract_profile_info 19 | from date_extractor import extract_profile_date, normalize_date 20 | import re 21 | 22 | # Load environment variables 23 | load_dotenv() 24 | 25 | # Logging configuration 26 | logging.basicConfig( 27 | level=logging.INFO, 28 | format='%(asctime)s - %(levelname)s - %(message)s' 29 | ) 30 | logger = logging.getLogger(__name__) 31 | 32 | # Configuration 33 | BATCH_SIZE = 50 # Process 50 requests simultaneously 34 | MAX_CONNECTIONS = 200 # Maximum connections for aiohttp 35 | REQUEST_TIMEOUT = 15 36 | DEFAULT_JSON_URL = "https://raw.githubusercontent.com/degun-osint/WhatsMyName/main/wmn-data.json" 37 | JSON_URL = os.getenv('WMN_JSON_URL', DEFAULT_JSON_URL) 38 | BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__))) 39 | DATA_DIR = os.path.join(BASE_DIR, "data") 40 | RESULTS_DIR = os.path.join(BASE_DIR, "results") 41 | PROGRESS_DELAY = 0.01 42 | 43 | os.makedirs(DATA_DIR, exist_ok=True) 44 | os.makedirs(RESULTS_DIR, exist_ok=True) 45 | 46 | PROXY_URL = os.getenv('PROXY_URL', 'http://127.0.0.1:8000/proxy') 47 | 48 | HEADERS = { 49 | 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) Chrome/120.0.0.0 Safari/537.36', 50 | 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8', 51 | 'Accept-Language': 'en-US,en;q=0.9', 52 | 'Cache-Control': 'no-cache', 53 | } 54 | 55 | class SiteChecker: 56 | def __init__(self): 57 | """Initialize the site checker.""" 58 | self.console = Console() 59 | self.sites = [] 60 | self.results = [] 61 | self.data_dir = DATA_DIR 62 | self.results_dir = RESULTS_DIR 63 | 64 | async def download_sites_data(self): 65 | """Download site data from configured URL.""" 66 | local_file = os.path.join(self.data_dir, "wmn-data.json") 67 | 68 | try: 69 | async with aiohttp.ClientSession() as session: 70 | self.console.print(f"[cyan]Downloading data from {JSON_URL}...") 71 | async with session.get(JSON_URL) as response: 72 | if response.status == 200: 73 | data = await response.text() 74 | json_data = json.loads(data) 75 | self.sites = json_data.get('sites', []) 76 | with open(local_file, 'w', encoding='utf-8') as f: 77 | f.write(data) 78 | self.console.print("[green]Data downloaded successfully") 79 | else: 80 | if os.path.exists(local_file): 81 | self.console.print("[yellow]Using local data...") 82 | with open(local_file, 'r', encoding='utf-8') as f: 83 | json_data = json.load(f) 84 | self.sites = json_data.get('sites', []) 85 | else: 86 | raise Exception("Unable to download data and no local data available") 87 | except Exception as e: 88 | if os.path.exists(local_file): 89 | self.console.print("[yellow]Using local data...") 90 | with open(local_file, 'r', encoding='utf-8') as f: 91 | json_data = json.load(f) 92 | self.sites = json_data.get('sites', []) 93 | else: 94 | raise 95 | 96 | def is_date_status(self, status): 97 | """Détermine si le statut contient une date.""" 98 | if not isinstance(status, str): 99 | return False 100 | 101 | status_lower = status.lower() 102 | 103 | # Vérifie si "join" ou un nom de mois est présent 104 | months = ['jan', 'feb', 'mar', 'apr', 'may', 'jun', 'jul', 'aug', 'sep', 'oct', 'nov', 'dec'] 105 | if 'join' in status_lower or any(month in status_lower for month in months): 106 | return True 107 | 108 | # Vérifie s'il y a au moins un chiffre 109 | if any(c.isdigit() for c in status_lower): 110 | return True 111 | 112 | return False 113 | 114 | async def verify_content(self, content: str, pattern: str, site_name: str) -> bool: 115 | """Check if pattern is present in content.""" 116 | if not pattern: 117 | return True 118 | if not isinstance(content, str): 119 | return False 120 | 121 | normalized_content = ' '.join(content.split()) 122 | normalized_pattern = ' '.join(pattern.split()).replace('\\"', '"') 123 | 124 | return normalized_pattern.lower() in normalized_content.lower() 125 | 126 | async def check_site(self, site: dict, username: str, session: aiohttp.ClientSession) -> Optional[dict]: 127 | """Check a specific site for a given username.""" 128 | original_url = site['uri_check'].replace("{account}", username) 129 | display_url = site.get('uri_pretty', original_url).replace("{account}", username) 130 | 131 | if original_url.startswith('http://'): 132 | original_url = original_url.replace('http://', 'https://') 133 | 134 | try: 135 | # Use proxy 136 | proxy_url = f"{PROXY_URL}?url={quote(original_url)}" 137 | async with session.get(proxy_url, timeout=REQUEST_TIMEOUT) as response: 138 | if response.status != 200: 139 | return None 140 | 141 | json_response = await response.json() 142 | if not json_response or 'status' not in json_response: 143 | return None 144 | 145 | content = json_response.get('contents', '') 146 | status_data = json_response['status'] 147 | initial_status = status_data.get('initial_http_code', status_data.get('http_code')) 148 | 149 | # Verify status and patterns 150 | has_miss_string = await self.verify_content(content, site.get('m_string', ''), site['name']) 151 | has_expected_string = await self.verify_content(content, site.get('e_string', ''), site['name']) 152 | 153 | # Cas 1: Si m_string est présent et m_code correspond => Non trouvé 154 | if has_miss_string and initial_status == site['m_code']: 155 | return None 156 | 157 | # Cas 2: Si e_string est présent et e_code correspond => Found 158 | if has_expected_string and initial_status == site['e_code']: 159 | external_links = analyze_links(content, original_url) 160 | profile_info = extract_profile_info(content, original_url) 161 | 162 | # Extraire la date de création du profil 163 | profile_date = None 164 | if profile_info and 'metadata' in profile_info: 165 | profile_date = extract_profile_date(content, profile_info.get('metadata', {}), site_name=site['name']) 166 | 167 | # Déterminer le statut (date de création ou "found") 168 | status = 'found' 169 | if profile_date: 170 | status = normalize_date(profile_date) 171 | 172 | # Vérifier si le contenu provient d'une balise link rel (à ignorer) 173 | if status != 'found' and " Unsure 187 | if has_expected_string and initial_status != site['e_code']: 188 | external_links = analyze_links(content, original_url) 189 | profile_info = extract_profile_info(content, original_url) 190 | 191 | return { 192 | 'name': site['name'], 193 | 'category': site['cat'], 194 | 'url': display_url, 195 | 'status': 'unsure', 196 | 'http_code': initial_status, 197 | 'external_links': external_links, 198 | 'profile_info': profile_info 199 | } 200 | 201 | # Cas 4: Si ni e_string ni m_string ne sont présents => Non trouvé 202 | if not has_expected_string and not has_miss_string: 203 | return None 204 | 205 | # Pour tout autre cas non prévu => Non trouvé 206 | return None 207 | 208 | except Exception as e: 209 | logger.error(f"Error checking {site['name']}: {str(e)}") 210 | return None 211 | 212 | async def process_batch(self, sites: List[dict], username: str) -> List[dict]: 213 | """Process a batch of sites in parallel.""" 214 | connector = aiohttp.TCPConnector(limit=50, force_close=True) 215 | async with aiohttp.ClientSession(connector=connector) as session: 216 | tasks = [] 217 | for site in sites: 218 | tasks.append(self.check_site(site, username, session)) 219 | 220 | results = await asyncio.gather(*tasks, return_exceptions=True) 221 | valid_results = [] 222 | for r in results: 223 | if isinstance(r, Exception): 224 | logger.error(f"Error in batch: {str(r)}") 225 | continue 226 | if r is not None: 227 | valid_results.append(r) 228 | return valid_results 229 | 230 | async def check_username(self, username: str): 231 | """Check a username across all sites.""" 232 | self.results = [] 233 | console = Console() 234 | 235 | with Progress( 236 | TextColumn("{task.description}"), 237 | BarColumn(complete_style="green", finished_style="green"), 238 | TextColumn("[progress.percentage]{task.percentage:>3.0f}%"), 239 | TextColumn("•"), 240 | TimeRemainingColumn(), 241 | console=console, 242 | transient=True, 243 | ) as progress: 244 | main_task = progress.add_task( 245 | f"[cyan]🦏 Searching...", 246 | total=len(self.sites) 247 | ) 248 | 249 | found_count = 0 250 | tasks = [] 251 | 252 | # Create all batches 253 | for i in range(0, len(self.sites), BATCH_SIZE): 254 | batch = self.sites[i:i + BATCH_SIZE] 255 | tasks.append(self.process_batch(batch, username)) 256 | 257 | # Process batches in groups 258 | for i in range(0, len(tasks), 2): 259 | current_tasks = tasks[i:i+2] 260 | batch_results = await asyncio.gather(*current_tasks) 261 | 262 | sites_processed = min(BATCH_SIZE * 2, len(self.sites) - (i * BATCH_SIZE)) 263 | 264 | for results in batch_results: 265 | found_in_batch = len(results) 266 | if found_in_batch > 0: 267 | found_count += found_in_batch 268 | for result in results: 269 | console.print(f"[green]✓ Found on {result['name']}[/green]") 270 | 271 | progress.update( 272 | main_task, 273 | advance=sites_processed, 274 | description=f"[cyan]🦏 Searching... ({found_count} found)" 275 | ) 276 | 277 | await asyncio.sleep(PROGRESS_DELAY) 278 | 279 | for results in batch_results: 280 | self.results.extend(results) 281 | 282 | def display_results_console(self): 283 | """Display results in console with styling.""" 284 | if not self.results: 285 | self.console.print("\n[yellow]No profiles found[/yellow]") 286 | return 287 | 288 | table = Table(title=f"Search Results") 289 | 290 | table.add_column("Site", style="cyan") 291 | table.add_column("Category", style="green") 292 | table.add_column("Status", style="magenta") 293 | table.add_column("URL", style="blue") 294 | table.add_column("External Links", style="yellow") 295 | table.add_column("Profile Info", style="white") 296 | 297 | for result in self.results: 298 | status_style = "green" if result['status'] == 'found' else "yellow" if result['status'] == 'unsure' else "white" 299 | 300 | external_links = result.get('external_links', []) 301 | links_str = ", ".join(external_links) if external_links else "-" 302 | 303 | profile_info = result.get('profile_info', {}) 304 | profile_str = "" 305 | if profile_info: 306 | if profile_info.get('metadata'): 307 | profile_str += "Metadata: " + ", ".join(f"{k}: {v}" for k, v in profile_info['metadata'].items()) 308 | if profile_info.get('content'): 309 | profile_str += "\nContent: " + ", ".join(profile_info['content']) 310 | 311 | table.add_row( 312 | result['name'], 313 | result['category'], 314 | f"[{status_style}]{result['status']}[/{status_style}]", 315 | result['url'], 316 | links_str, 317 | profile_str or "-" 318 | ) 319 | self.console.print(table) 320 | 321 | def export_html(self, output_file: str, username: str = ""): 322 | """Export results to HTML.""" 323 | env = Environment(loader=BaseLoader()) 324 | template_str = r''' 325 | 326 | 327 | 328 | 329 | 330 | RPUC Results 331 | 332 | 681 | 682 | 683 |
684 |
685 |

Rhino User Checker Results

686 |

Results for: {{ username }}

687 |
Generated on {{ timestamp }}
688 |
689 | 690 | {% if results %} 691 | 692 |
693 |
694 |
{{ results|length }}
695 |
Total Profiles
696 |
697 |
698 |
{{ results|selectattr("status", "equalto", "found")|list|length }}
699 |
Confirmed
700 |
701 |
702 |
{{ results|selectattr("status", "equalto", "unsure")|list|length }}
703 |
Possible
704 |
705 |
706 |
{{ results|rejectattr("status", "equalto", "found")|rejectattr("status", "equalto", "unsure")|list|length }}
707 |
With Dates
708 |
709 |
710 | 711 | 712 |
713 | {% for result in results %} 714 |
715 |
716 |
717 | {{ result.name }} 718 | {{ result.category }} 719 |
720 | 721 |
722 | {% if result.status != 'found' and result.status != 'unsure' %} 723 | 724 | {% elif result.status == 'found' %} 725 | 726 | {% elif result.status == 'unsure' %} 727 | 728 | {% endif %} 729 | {{ result.status }} 730 |
731 |
732 | 733 |
734 | 740 | 741 | {% if result.external_links %} 742 | 752 | {% endif %} 753 | 754 | {% if result.profile_info %} 755 | {% if result.profile_info.metadata %} 756 | 766 | {% endif %} 767 | 768 | {% if result.profile_info.content %} 769 |
770 |
Content
771 |
772 | {% for item in result.profile_info.content %} 773 |
{{ item }}
774 | {% endfor %} 775 |
776 |
777 | {% endif %} 778 | {% endif %} 779 |
780 |
781 | {% endfor %} 782 |
783 | {% else %} 784 |
785 | No profiles found 786 |
787 | {% endif %} 788 |
789 | 790 | 791 | ''' 792 | 793 | template = env.from_string(template_str) 794 | html_content = template.render( 795 | results=self.results, 796 | timestamp=datetime.now().strftime("%Y-%m-%d %H:%M:%S"), 797 | username=username, 798 | is_date_status=self.is_date_status # Ajouter la fonction au contexte 799 | ) 800 | 801 | output_path = os.path.join(self.results_dir, output_file) 802 | 803 | with open(output_path, 'w', encoding='utf-8') as f: 804 | f.write(html_content) 805 | return output_path 806 | 807 | def export_results_csv(self, output_file: str): 808 | """Export results to CSV format.""" 809 | import csv 810 | output_path = os.path.join(self.results_dir, output_file) 811 | 812 | with open(output_path, 'w', newline='', encoding='utf-8') as f: 813 | writer = csv.writer(f) 814 | # Write headers 815 | headers = ['Site', 'Category', 'Status', 'URL', 'External Links', 'Profile Info'] 816 | writer.writerow(headers) 817 | 818 | # Write data 819 | for result in self.results: 820 | external_links = '; '.join(result.get('external_links', [])) 821 | 822 | # Format profile info 823 | profile_info = result.get('profile_info', {}) 824 | profile_str = '' 825 | if profile_info: 826 | if profile_info.get('metadata'): 827 | profile_str += 'Metadata: ' + ', '.join(f"{k}: {v}" for k, v in profile_info['metadata'].items()) 828 | if profile_info.get('content'): 829 | profile_str += ' | Content: ' + ', '.join(profile_info['content']) 830 | 831 | row = [ 832 | result['name'], 833 | result['category'], 834 | result['status'], 835 | result['url'], 836 | external_links, 837 | profile_str 838 | ] 839 | writer.writerow(row) 840 | 841 | return output_path 842 | 843 | async def main(): 844 | try: 845 | checker = SiteChecker() 846 | await checker.download_sites_data() 847 | 848 | username = input("\nEnter username to search: ") 849 | 850 | while True: 851 | if not username.strip(): 852 | print("Username cannot be empty") 853 | username = input("\nEnter username to search: ") 854 | continue 855 | 856 | print(f"\nSearching profiles for {username}...") 857 | await checker.check_username(username) 858 | 859 | checker.display_results_console() 860 | 861 | # Ask for export format 862 | while True: 863 | export_choice = input("\nDo you want to export results? (CSV / HTML / BOTH / NO): ").upper() 864 | if export_choice in ['CSV', 'HTML', 'BOTH', 'NO']: 865 | break 866 | print("Invalid choice. Please enter CSV, HTML, BOTH, or NO.") 867 | 868 | if export_choice != 'NO': 869 | timestamp = datetime.now().strftime('%Y%m%d_%H%M%S') 870 | 871 | if export_choice in ['HTML', 'BOTH']: 872 | output_file = f"results_{username}_{timestamp}.html" 873 | output_path_html = checker.export_html(output_file, username=username) 874 | print(f"\nHTML results exported to {output_path_html}") 875 | 876 | if export_choice in ['CSV', 'BOTH']: 877 | output_file = f"results_{username}_{timestamp}.csv" 878 | output_path_csv = checker.export_results_csv(output_file) 879 | print(f"CSV results exported to {output_path_csv}") 880 | 881 | # Ask to search another user 882 | username = input("\nSearch another user? (enter alias or ctrl-c to quit): ") 883 | if not username.strip(): 884 | break 885 | 886 | except KeyboardInterrupt: 887 | print("\nOperation cancelled by user...") 888 | except asyncio.CancelledError: 889 | print("\nOperation cancelled...") 890 | except Exception as e: 891 | print(f"An error occurred: {str(e)}") 892 | print(f"An error occurred: {str(e)}") 893 | 894 | def run(): 895 | try: 896 | asyncio.run(main()) 897 | except KeyboardInterrupt: 898 | pass 899 | 900 | if __name__ == "__main__": 901 | run() -------------------------------------------------------------------------------- /readme.md: -------------------------------------------------------------------------------- 1 | # 🦏 RhinoUserChecker (RPUC) 2 | 3 | ** PLEASE BE AWARE THAT THIS IS NOT A PRODUCTION VERSION AND SHOULD BE USED WITH CAUTION ** 4 | 5 | A Python-based OSINT tool that helps you find usernames across multiple platforms and extract profile information. Built on top of the WhatsMyName project's data, RPUC adds advanced profile extraction and external link analysis capabilities. 6 | 7 | ## 🌟 Features 8 | 9 | - **Multi-platform Search**: Search for usernames across hundreds of social media platforms and websites thanks to WhatMyName JSON file 10 | - **Profile Information Extraction**: Automatically extract user profile information, bios, and metadata 11 | - **Profile creation date**: Attempt to find account creation date 12 | - **External Link Analysis**: Discover related profiles through external link analysis 13 | - **Smart Rate Limiting**: Built-in proxy support and smart rate limiting to avoid blocking 14 | - **Rich Console Output**: Real-time progress tracking and beautiful console output using Rich 15 | - **HTML or CSV Report Generation**: Generate detailed HTML or CSV reports with all findings 16 | - **International Platform Support**: Special handling for international platforms (Russian, Chinese, Japanese, etc.) 17 | 18 | ## Discussion 19 | 20 | You can join the OSCAR ZULU discord server to discuss about this tool : https://discord.gg/4REgJzn4NG 21 | 22 | ## 📋 Requirements 23 | 24 | ```text 25 | Python 3.8+ 26 | See requirements.txt for full dependencies 27 | ``` 28 | 29 | ## 🚀 Installation 30 | 31 | 1. Clone the repository: 32 | ```bash 33 | git clone https://github.com/degun-osint/RhinoUserChecker 34 | cd RhinoUserChecker 35 | ``` 36 | 37 | 2. Create a virtual environment and activate it: 38 | ```bash 39 | python -m venv venv 40 | source venv/bin/activate # On Windows: venv\Scripts\activate 41 | ``` 42 | 43 | 3. Install the required packages: 44 | ```bash 45 | pip install -r requirements.txt 46 | ``` 47 | 48 | ## ⚙️ Configuration 49 | 50 | RPUC uses environment variables for configuration. Create a `.env` based on .env-sample file in the root directory with: 51 | 52 | ```env 53 | WMN_JSON_URL=https://raw.githubusercontent.com/WebBreacher/WhatsMyName/main/wmn-data.json 54 | PROXY_URL=http://127.0.0.1:8000/proxy 55 | ``` 56 | By default, the script uses a forked version of WMN JSON. 57 | 58 | ## 🐳 Docker Installation 59 | 60 | ### Using Docker Compose (recommended) 61 | 62 | 1. Clone the repository: 63 | ```bash 64 | git clone https://github.com/degun-osint/RhinoUserChecker 65 | cd RhinoUserChecker 66 | ``` 67 | 68 | 2. Run the application: 69 | ```bash 70 | docker-compose up -d 71 | ``` 72 | 73 | 3. Attach to the running container to interact with the application: 74 | ```bash 75 | docker attach rhino-user-checker 76 | ``` 77 | 78 | 4. To exit the application, press `Ctrl+C` and then to detach from the container without stopping it, press `Ctrl+P` followed by `Ctrl+Q` 79 | 80 | ### Using Docker directly 81 | 82 | 1. Build the Docker image: 83 | ```bash 84 | docker build -t rhino-user-checker . 85 | ``` 86 | 87 | 2. Run the container: 88 | ```bash 89 | docker run -it --name rhino-user-checker -v $(pwd)/data:/app/data -v $(pwd)/results:/app/results rhino-user-checker 90 | ``` 91 | 92 | The application creates two directories: 93 | - `./data`: Stores the WhatsMyName database 94 | - `./results`: Stores exported results (HTML and CSV) 95 | 96 | These directories are mounted as volumes to persist data between container runs. 97 | 98 | ### Docker Troubleshooting 99 | 100 | If you encounter any issues with Docker: 101 | 2. Check that the volumes have the correct permissions 102 | 3. If you're having network issues, ensure your Docker container has internet access 103 | 104 | ## 🎮 Usage 105 | 106 | Start the tool by running: 107 | 108 | ```bash 109 | python run.py 110 | ``` 111 | 112 | The tool will: 113 | 1. Download the latest site data from WhatsMyName project 114 | 2. Prompt you for a username to search 115 | 3. Search across hundreds of platforms 116 | 4. Generate an HTML or a CSV report with findings 117 | 118 | ## 📊 Output 119 | 120 | RPUC generates two types of output: 121 | - Real-time console output with progress tracking 122 | - Detailed HTML or CSV report containing: 123 | - Found profiles with links 124 | - Status (found = good chance profile exists, unsure = good http [200] code when a 404 was expected if profile does not exists, but can't confirm the profile) 125 | - Extracted profile information 126 | - Discovered external links 127 | - Metadata from profiles 128 | 129 | ## 🏗️ Project Structure 130 | 131 | ``` 132 | rpuc/ 133 | ├── run.py # Main entry point 134 | ├── modules/ 135 | │ ├── proxy.py # Proxy server for rate limiting 136 | │ ├── rpuc.py # Core functionality 137 | │ ├── date_extractor.py # date search 138 | │ ├── link_analyzer.py # External link analysis 139 | │ └── profile_extractor.py # Profile information extraction 140 | ├── data/ # Data storage 141 | └── results/ # Generated reports 142 | ``` 143 | 144 | ## 🔧 Advanced Usage 145 | 146 | ### Custom Headers 147 | 148 | RPUC supports custom headers for different domains/regions. Edit the `DOMAIN_PATTERNS` in `proxy.py` to add more patterns. 149 | 150 | ### Proxy Configuration 151 | 152 | By default, RPUC runs its own proxy server for rate limiting. You can configure an external proxy by modifying the `PROXY_URL` in your `.env` file. 153 | 154 | ## 🤝 Contributing 155 | 156 | Contributions are welcome! Please feel free to submit a Pull Request. For major changes, please open an issue first to discuss what you would like to change. 157 | 158 | ## 📜 License 159 | 160 | This project is licensed under the GNU General Public License v3.0 - see the [LICENSE](LICENSE.txt) file for details. 161 | 162 | ## 🙏 Credits 163 | 164 | - Based on the [WhatsMyName Project](https://github.com/WebBreacher/WhatsMyName) 165 | - Built with: 166 | - [FastAPI](https://fastapi.tiangolo.com/) 167 | - [Rich](https://rich.readthedocs.io/) 168 | - [BeautifulSoup4](https://www.crummy.com/software/BeautifulSoup/) 169 | - [aiohttp](https://docs.aiohttp.org/) 170 | 171 | ## ⚠️ Disclaimer 172 | 173 | This tool is for educational purposes only. Be mindful of the platforms' terms of service and use responsibly. 174 | 175 | ## Author 176 | 177 | DEGUN (https://github.com/degun-osint) 178 | 179 | 180 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | aiohappyeyeballs==2.4.4 2 | aiohttp==3.11.11 3 | aiosignal==1.3.2 4 | annotated-types==0.7.0 5 | anyio==4.8.0 6 | attrs==25.1.0 7 | beautifulsoup4==4.13.1 8 | certifi==2025.1.31 9 | click==8.1.8 10 | fastapi==0.115.8 11 | frozenlist==1.5.0 12 | h11==0.14.0 13 | httpcore==1.0.7 14 | httpx==0.28.1 15 | idna==3.10 16 | Jinja2==3.1.5 17 | markdown-it-py==3.0.0 18 | MarkupSafe==3.0.2 19 | mdurl==0.1.2 20 | multidict==6.1.0 21 | propcache==0.2.1 22 | psutil==6.1.1 23 | pydantic==2.10.6 24 | pydantic_core==2.27.2 25 | Pygments==2.19.1 26 | python-dotenv==1.0.1 27 | rich==13.9.4 28 | sniffio==1.3.1 29 | soupsieve==2.6 30 | starlette==0.45.3 31 | typing_extensions==4.12.2 32 | uvicorn==0.34.0 33 | yarl==1.18.3 34 | -------------------------------------------------------------------------------- /run.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # run.py 3 | 4 | import subprocess 5 | import sys 6 | import time 7 | import signal 8 | import os 9 | import psutil 10 | from rich.console import Console 11 | 12 | # Path configuration 13 | BASE_DIR = os.path.dirname(os.path.abspath(__file__)) 14 | MODULES_DIR = os.path.join(BASE_DIR, "modules") 15 | PROXY_PATH = os.path.join(MODULES_DIR, "proxy.py") 16 | RPUC_PATH = os.path.join(MODULES_DIR, "rpuc.py") 17 | DATA_DIR = os.path.join(BASE_DIR, "data") 18 | RESULTS_DIR = os.path.join(BASE_DIR, "results") 19 | 20 | # Create necessary directories 21 | os.makedirs(DATA_DIR, exist_ok=True) 22 | os.makedirs(RESULTS_DIR, exist_ok=True) 23 | 24 | console = Console() 25 | 26 | def kill_process_tree(pid): 27 | """Kill a process and all its children.""" 28 | try: 29 | parent = psutil.Process(pid) 30 | children = parent.children(recursive=True) 31 | for child in children: 32 | try: 33 | child.kill() 34 | except psutil.NoSuchProcess: 35 | pass 36 | parent.kill() 37 | except psutil.NoSuchProcess: 38 | pass 39 | 40 | def cleanup(proxy_process, main_process): 41 | """Clean up processes on shutdown.""" 42 | if main_process: 43 | kill_process_tree(main_process.pid) 44 | if proxy_process: 45 | kill_process_tree(proxy_process.pid) 46 | 47 | def run_proxy(): 48 | """Start the proxy server without changing the global directory.""" 49 | try: 50 | return subprocess.Popen([sys.executable, PROXY_PATH], 51 | stdout=subprocess.PIPE, 52 | stderr=subprocess.PIPE, 53 | cwd=MODULES_DIR) 54 | except Exception as e: 55 | console.print(f"[red]Error starting proxy: {e}[/red]") 56 | sys.exit(1) 57 | 58 | def run_main(): 59 | """Start the main script without changing the global directory.""" 60 | try: 61 | return subprocess.Popen([sys.executable, RPUC_PATH], 62 | cwd=MODULES_DIR) 63 | except Exception as e: 64 | console.print(f"[red]Error starting main script: {e}[/red]") 65 | return None 66 | 67 | def print_banner(): 68 | banner = r""" 69 | .-----------------------------------------. 70 | ( RHINO USER CHECKER - OSCAR ZULU FOREVER ! ) 71 | //\'----------------------------------------'\ 72 | / , _.-~~-.__ __.,----. 73 | ('; __( ) ~~~'--..--~~ '. 74 | ( . ""..-' ')| . \ '. 75 | \\. |\'.' ; . ; ; ; 76 | \ \" /9) ' . ; ; 77 | ; ) ) ( ' . ; ' . 78 | ) _ __.-'-._ ; ' . , /\ ; 79 | '-"'--' ; "-. '. ' _.-( ". ( 80 | ; \,) )--,..----';' > ; . 81 | \ ( | / ( / . ; 82 | , , ) | ; .( . , ) / \ ; 83 | ,;'PjP;.';-.;._,;/;,;)/;.;.);.;,,;,;,,;/;;,),;.,/,;.).,; 84 | 85 | """ 86 | console.print("[yellow]" + banner + "[/yellow]") 87 | 88 | def print_title(): 89 | title = "Username, profile info and link scrapper \n" 90 | credits = "Based on Whatsmyname JSON (https://github.com/WebBreacher/WhatsMyName)\n" 91 | console.print("[bold cyan]" + title + "[/bold cyan]") 92 | console.print("[italic dim cyan]" + credits + "[/italic dim cyan]") 93 | 94 | def main(): 95 | # Display banner 96 | print_banner() 97 | print_title() 98 | 99 | # Check file existence 100 | if not os.path.exists(PROXY_PATH): 101 | console.print(f"[red]Error: {PROXY_PATH} does not exist[/red]") 102 | sys.exit(1) 103 | if not os.path.exists(RPUC_PATH): 104 | console.print(f"[red]Error: {RPUC_PATH} does not exist[/red]") 105 | sys.exit(1) 106 | 107 | proxy_process = None 108 | main_process = None 109 | 110 | def signal_handler(signum, frame): 111 | console.print("\n[yellow]Stopping processes...[/yellow]") 112 | cleanup(proxy_process, main_process) 113 | sys.exit(0) 114 | 115 | # Signal handling 116 | signal.signal(signal.SIGINT, signal_handler) 117 | signal.signal(signal.SIGTERM, signal_handler) 118 | 119 | try: 120 | # Start proxy 121 | console.print("[cyan]Starting proxy...[/cyan]") 122 | proxy_process = run_proxy() 123 | 124 | # Wait for proxy to be ready 125 | time.sleep(2) 126 | 127 | # Start main script 128 | console.print("[cyan]Starting main script...[/cyan]") 129 | main_process = run_main() 130 | 131 | while True: 132 | if main_process.poll() is not None: 133 | break 134 | time.sleep(0.1) 135 | 136 | except KeyboardInterrupt: 137 | console.print("\n[yellow]Operation cancelled by user...[/yellow]") 138 | except Exception as e: 139 | console.print(f"\n[red]Error: {e}[/red]") 140 | finally: 141 | cleanup(proxy_process, main_process) 142 | console.print("[green]Processes stopped[/green]") 143 | 144 | if __name__ == "__main__": 145 | main() --------------------------------------------------------------------------------