├── .env
├── .gitignore
├── Dockerfile
├── docker-compose.yml
├── license.txt
├── modules
    ├── .DS_Store
    ├── date_extractor.py
    ├── link_analyzer.py
    ├── profile_extractor.py
    ├── proxy.py
    └── rpuc.py
├── readme.md
├── requirements.txt
└── run.py


/.env:
--------------------------------------------------------------------------------
1 | # URL du fichier JSON contenant les données des sites
2 | WMN_JSON_URL=https://raw.githubusercontent.com/degun-osint/WhatsMyName/main/wmn-data.json
3 | PROXY_URL=http://127.0.0.1:8000/proxy


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | /.DS_Store
2 | /.venv
3 | /.vscode
4 | /data
5 | /results
6 | modules/__pycache__/*
7 | modules/.DS_Store
8 | modules/.DS_Store
9 | 


--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
 1 | # Use Python 3.11 slim as base image
 2 | FROM python:3.12-slim
 3 | 
 4 | # Set working directory
 5 | WORKDIR /app
 6 | 
 7 | # Install system dependencies
 8 | RUN apt-get update && apt-get install -y --no-install-recommends \
 9 |     gcc \
10 |     python3-dev \
11 |     && rm -rf /var/lib/apt/lists/*
12 | 
13 | # Copy requirements first for better layer caching
14 | COPY requirements.txt .
15 | RUN pip install --no-cache-dir -r requirements.txt
16 | 
17 | # Create necessary directories
18 | RUN mkdir -p /app/data /app/results /app/modules
19 | 
20 | # Copy application files
21 | COPY run.py .
22 | COPY modules/proxy.py modules/
23 | COPY modules/rpuc.py modules/
24 | COPY modules/date_extractor.py modules/
25 | COPY modules/link_analyzer.py modules/
26 | COPY modules/profile_extractor.py modules/
27 | 
28 | # Make scripts executable
29 | RUN chmod +x run.py
30 | RUN chmod +x modules/proxy.py
31 | RUN chmod +x modules/rpuc.py
32 | 
33 | # Set environment variables
34 | ENV PYTHONUNBUFFERED=1
35 | ENV WMN_JSON_URL=https://raw.githubusercontent.com/degun-osint/WhatsMyName/main/wmn-data.json
36 | ENV PROXY_URL=http://127.0.0.1:8000/proxy
37 | 
38 | # Create a volume for persistent data
39 | VOLUME ["/app/data", "/app/results"]
40 | 
41 | # Run application
42 | CMD ["python", "run.py"]


--------------------------------------------------------------------------------
/docker-compose.yml:
--------------------------------------------------------------------------------
 1 | services:
 2 |   rhino-user-checker:
 3 |     build:
 4 |       context: .
 5 |       dockerfile: Dockerfile
 6 |     container_name: rhino-user-checker
 7 |     volumes:
 8 |       - ./data:/app/data
 9 |       - ./results:/app/results
10 |     stdin_open: true  # Keep STDIN open even if not attached
11 |     tty: true         # Allocate a pseudo-TTY
12 |     restart: "no"     # Don't restart automatically


--------------------------------------------------------------------------------
/license.txt:
--------------------------------------------------------------------------------
 1 | GNU GENERAL PUBLIC LICENSE
 2 |                        Version 3, 29 June 2007
 3 | 
 4 | RhinoUserChecker (RPUC) - OSINT Username Checking Tool
 5 | Copyright (C) 2024 DEGUN
 6 | 
 7 | This program is free software: you can redistribute it and/or modify
 8 | it under the terms of the GNU General Public License as published by
 9 | the Free Software Foundation, either version 3 of the License, or
10 | (at your option) any later version.
11 | 
12 | This program is distributed in the hope that it will be useful,
13 | but WITHOUT ANY WARRANTY; without even the implied warranty of
14 | MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15 | GNU General Public License for more details.
16 | 
17 | You should have received a copy of the GNU General Public License
18 | along with this program.  If not, see <https://www.gnu.org/licenses/>.
19 | 
20 | For the full license text, please visit:
21 | https://www.gnu.org/licenses/gpl-3.0.txt


--------------------------------------------------------------------------------
/modules/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/degun-osint/RhinoUserChecker/3f7ca0cea917314b64c59e9923611ac7f05ac0f2/modules/.DS_Store


--------------------------------------------------------------------------------
/modules/date_extractor.py:
--------------------------------------------------------------------------------
  1 | import re
  2 | from datetime import datetime
  3 | from typing import Optional
  4 | 
  5 | def extract_profile_date(html_content: str, metadata: dict, site_name: str = "") -> Optional[str]:
  6 |     """
  7 |     Extraire la date de création du profil à partir du contenu HTML ou des métadonnées.
  8 |     
  9 |     Args:
 10 |         html_content (str): Le contenu HTML de la page
 11 |         metadata (dict): Les métadonnées extraites du profil
 12 |         site_name (str): Le nom du site pour appliquer des règles spécifiques
 13 |         
 14 |     Returns:
 15 |         Optional[str]: La date de création formatée, ou None si aucune date n'est trouvée
 16 |     """
 17 |     # Exclure certains sites ou patterns spécifiques
 18 |     if site_name.lower() == "behance" and "created_on" in html_content:
 19 |         return None
 20 |         
 21 |     # Vérifier si le contenu provient d'une balise link rel
 22 |     has_link_rel_date = "<link rel=" in html_content and re.search(r'<link\s+rel=["\'].*?date.*?["\']', html_content)
 23 |     
 24 |     # Liste des indicateurs fiables qui doivent précéder une date pour qu'elle soit considérée comme date de création
 25 |     date_indicators = [
 26 |         r'joined', r'member since', r'est\.', r'established',
 27 |         r'user since', r'account created', r'registration date',
 28 |         r'created on', r'date joined', r'created at', r'profile created'
 29 |     ]
 30 |     
 31 |     # Motifs de recherche pour les dates de création de profil
 32 |     # Maintenant, chaque pattern inclut un indicateur fiable suivi d'une date
 33 |     join_patterns = [
 34 |         # Format Twitter: "Joined September 2023"
 35 |         r'[Jj]oined\s+(\w+\s+\d{4})',
 36 |         
 37 |         # Format "Member since: Jan 2022" ou "Member since Jan 2022"
 38 |         r'[Mm]ember\s+[Ss]ince:?\s+(\w+\s+\d{4})',
 39 |         
 40 |         # Format "Joined on" ou "Created on": "Joined on 12/05/2021"
 41 |         r'(?:[Jj]oined|[Cc]reated)(?:\s+on)?\s+(\d{1,2}[-/\.]\d{1,2}[-/\.]\d{2,4})',
 42 |         
 43 |         # Format "Registration date: 2022-03-15"
 44 |         r'[Rr]egistration\s+[Dd]ate:?\s+(\d{4}[-/\.]\d{1,2}[-/\.]\d{1,2})',
 45 |         
 46 |         # Format "Account created: March 15, 2021"
 47 |         r'[Aa]ccount\s+[Cc]reated:?\s+(\w+\s+\d{1,2},?\s+\d{4})',
 48 |         
 49 |         # Format "User since 2021"
 50 |         r'[Uu]ser\s+[Ss]ince\s+(\d{4})',
 51 |         
 52 |         # Format "Est. YYYY" (popularisé par GitHub)
 53 |         r'[Ee]st\.\s+(\d{4})',
 54 |         
 55 |         # Format "Created: YYYY-MM-DD"
 56 |         r'[Cc]reated:?\s+(\d{4}[-/\.]\d{1,2}[-/\.]\d{1,2})',
 57 |     ]
 58 |     
 59 |     # Rechercher d'abord dans les métadonnées
 60 |     if metadata:
 61 |         for key, value in metadata.items():
 62 |             if isinstance(value, str) and any(kw in key.lower() for kw in ['joined', 'since', 'registration', 'created']):
 63 |                 for pattern in join_patterns:
 64 |                     match = re.search(pattern, value)
 65 |                     if match:
 66 |                         # Si on trouve une date dans les métadonnées et qu'elle est précédée d'un indicateur fiable
 67 |                         if has_link_rel_date:
 68 |                             # Vérifier que la date n'est pas dans une balise link rel
 69 |                             link_match = re.search(r'<link\s+rel=["\'].*?\b' + re.escape(match.group(1)) + r'\b.*?["\']', html_content, re.IGNORECASE)
 70 |                             if link_match:
 71 |                                 continue  # Ignorer cette correspondance
 72 |                         return match.group(1)
 73 |     
 74 |     # Puis rechercher dans le contenu HTML
 75 |     for pattern in join_patterns:
 76 |         match = re.search(pattern, html_content)
 77 |         if match:
 78 |             # Si on a détecté une balise link rel, vérifier que la date n'est pas dedans
 79 |             if has_link_rel_date:
 80 |                 link_match = re.search(r'<link\s+rel=["\'].*?\b' + re.escape(match.group(1)) + r'\b.*?["\']', html_content, re.IGNORECASE)
 81 |                 if link_match:
 82 |                     continue  # Ignorer cette correspondance
 83 |             return match.group(1)
 84 |     
 85 |     return None
 86 | 
 87 | def normalize_date(date_str: str) -> str:
 88 |     """
 89 |     Tenter de normaliser le format de date pour un affichage cohérent.
 90 |     Cette fonction est simple et peut être améliorée pour gérer plus de formats.
 91 |     
 92 |     Args:
 93 |         date_str (str): La chaîne de date extraite
 94 |         
 95 |     Returns:
 96 |         str: La date normalisée, ou la chaîne originale si impossible à normaliser
 97 |     """
 98 |     # Pour l'instant, simplement nettoyer la chaîne
 99 |     date_str = date_str.strip()
100 |     
101 |     # Supprimer les virgules pour simplifier
102 |     date_str = date_str.replace(',', '')
103 |     
104 |     # Pour une implémentation plus robuste, on pourrait tenter de parser la date
105 |     # avec datetime.strptime() et la reformater selon un format standard
106 |     
107 |     return date_str


--------------------------------------------------------------------------------
/modules/link_analyzer.py:
--------------------------------------------------------------------------------
  1 | from bs4 import BeautifulSoup
  2 | from urllib.parse import urljoin, urlparse
  3 | import re
  4 | from typing import List, Dict, Set
  5 | 
  6 | class LinkAnalyzer:
  7 |     # Known social media domains
  8 |     SOCIAL_DOMAINS = {
  9 |         'twitter.com', 'facebook.com', 'linkedin.com', 'instagram.com',
 10 |         'github.com', 'gitlab.com', 'bitbucket.org', 'youtube.com',
 11 |         'medium.com', 'dev.to', 'behance.net', 'dribbble.com',
 12 |         'stackoverflow.com', 't.me', 'mastodon.social'
 13 |     }
 14 |     
 15 |     # Areas to avoid (navigation, footer, etc.)
 16 |     EXCLUDE_CONTAINERS = {
 17 |         'footer',
 18 |         'nav', 
 19 |         'navigation',
 20 |         'navbar',
 21 |         'menu',
 22 |         'sidebar',
 23 |         'header',
 24 |         'topbar',
 25 |         'bottombar',
 26 |         'copyright',
 27 |         'legal'
 28 |     }
 29 | 
 30 |     # Areas of interest (profile, bio, etc.)
 31 |     PROFILE_CONTAINERS = {
 32 |         'profile',
 33 |         'bio',
 34 |         'about',
 35 |         'user-info',
 36 |         'userinfo',
 37 |         'user-profile',
 38 |         'userprofile',
 39 |         'profile-info',
 40 |         'description',
 41 |         'user-description',
 42 |         'user-details',
 43 |         'personal-info',
 44 |         'account-info'
 45 |     }
 46 |     
 47 |     EXCLUDE_KEYWORDS = {
 48 |         # System and legal pages
 49 |         'privacy', 'legal', 'terms', 'policy', 'cookie',
 50 |         'about', 'contact', 'help', 'support',
 51 |         'documentation', 'docs', 'guidelines',
 52 |         'static', 'api', 'enterprise', 'showcase', 'policie',
 53 |         'advertising', 'welcome',
 54 |         
 55 |         # Marketing and sharing
 56 |         'share', 'sharer', 'sharing', 'newsletter',
 57 |         'subscribe', 'subscription', 'marketing',
 58 |         
 59 |         # Authentication and account
 60 |         'login', 'signin', 'signup', 'register',
 61 |         'authentication', 'password', 'forgot',
 62 |         
 63 |         # Commerce
 64 |         'shop', 'store', 'pricing', 'payment',
 65 |         'checkout', 'cart', 'billing',
 66 |         
 67 |         # Miscellaneous
 68 |         'sitemap', 'search', 'tag', 'category',
 69 |         'feed', 'rss', 'download', 'uploads',
 70 |         'status', 'stats', 'analytics', 'envato', 'placeit'
 71 |     }
 72 | 
 73 |     def __init__(self, html_content: str, base_url: str):
 74 |         self.soup = BeautifulSoup(html_content, 'html.parser')
 75 |         self.base_url = base_url
 76 |         parsed_base = urlparse(base_url)
 77 |         self.base_domain = parsed_base.netloc.lower()
 78 |         
 79 |         # Extract the main domain name
 80 |         domain_parts = self.base_domain.split('.')
 81 |         if domain_parts[0] == 'www':
 82 |             domain_parts = domain_parts[1:-1]  # Remove www and tld
 83 |         else:
 84 |             domain_parts = domain_parts[1:-1] if len(domain_parts) > 2 else domain_parts[:-1]  # Remove tld and subdomain if present
 85 |             
 86 |         self.domain_name = '.'.join(domain_parts)  # For cases with multiple subdomains, keep all
 87 | 
 88 |     def _should_exclude_link(self, url: str) -> bool:
 89 |         """Check if a link should be excluded from results."""
 90 |         url_lower = url.lower()
 91 |         
 92 |         # If domain name appears anywhere in the URL, exclude it
 93 |         if self.domain_name in url_lower:
 94 |             return True
 95 |             
 96 |         # If URL contains an excluded keyword
 97 |         if any(keyword.lower() in url_lower for keyword in self.EXCLUDE_KEYWORDS):
 98 |             return True
 99 |             
100 |         return False
101 | 
102 |     def _is_in_excluded_container(self, element) -> bool:
103 |         """Check if element is in an excluded container.
104 |         Partial matching is used, so 'footer' will match 'global-footer', 'footer-wrapper', etc."""
105 |         for parent in element.parents:
106 |             # Check IDs
107 |             if parent.get('id'):
108 |                 parent_id = parent.get('id').lower()
109 |                 if any(exc in parent_id or parent_id in exc for exc in self.EXCLUDE_CONTAINERS):
110 |                     return True
111 |                     
112 |             # Check classes
113 |             if parent.get('class'):
114 |                 parent_classes = ' '.join(parent.get('class')).lower()
115 |                 if any(exc in parent_classes for exc in self.EXCLUDE_CONTAINERS):
116 |                     return True
117 |                     
118 |             # Check tag names (exact match as these are standard HTML tags)
119 |             if parent.name and parent.name.lower() in self.EXCLUDE_CONTAINERS:
120 |                 return True
121 |                 
122 |         return False
123 | 
124 |     def _is_in_profile_container(self, element) -> bool:
125 |         """Check if element is in a profile container."""
126 |         for parent in element.parents:
127 |             # Check IDs
128 |             if parent.get('id') and any(prof in parent.get('id').lower() for prof in self.PROFILE_CONTAINERS):
129 |                 return True
130 |             # Check classes
131 |             if parent.get('class'):
132 |                 if any(prof in ' '.join(parent.get('class')).lower() for prof in self.PROFILE_CONTAINERS):
133 |                     return True
134 |         return False
135 | 
136 |     def _is_valid_external_link(self, url: str) -> bool:
137 |         """Check if a link is a valid external link."""
138 |         try:
139 |             parsed = urlparse(url)
140 |             domain = parsed.netloc.lower()
141 | 
142 |             # Ignore empty links or links to the same domain
143 |             if not domain or domain == self.base_domain:
144 |                 return False
145 |                 
146 |             # If it's a link to a known social media profile, keep it
147 |             social_profile_indicators = ['/user/', '/users/', '/profile/', '@', '/u/', '/channel/']
148 |             if any(social_domain in domain for social_domain in self.SOCIAL_DOMAINS):
149 |                 if any(indicator in url.lower() for indicator in social_profile_indicators):
150 |                     # But still check if source domain name isn't present
151 |                     return not self._should_exclude_link(url)
152 | 
153 |             # Exclude based on defined criteria
154 |             if self._should_exclude_link(url):
155 |                 return False
156 | 
157 |             # Check for URLs that look like user profiles
158 |             user_profile_patterns = [
159 |                 r'/[~@][\w-]+/?$',
160 |                 r'/users?/[\w-]+/?$',
161 |                 r'/profiles?/[\w-]+/?$',
162 |                 r'/members?/[\w-]+/?$',
163 |                 r'/channel/[\w-]+/?$',
164 |                 r'/commissions/[\w-]+/?$'
165 |             ]
166 |             
167 |             if any(re.search(pattern, url) for pattern in user_profile_patterns):
168 |                 return True
169 | 
170 |             return True  # If we get here, the link has passed all filters
171 | 
172 |         except Exception:
173 |             return False
174 | 
175 |     def analyze(self) -> List[str]:
176 |         """Analyze HTML to find relevant external links."""
177 |         links = set()
178 |         for a_tag in self.soup.find_all('a', href=True):
179 |             href = a_tag['href']
180 |             if href.startswith(('http://', 'https://')):
181 |                 full_url = href
182 |             else:
183 |                 full_url = urljoin(self.base_url, href)
184 |             
185 |             if self._is_valid_external_link(full_url):
186 |                 links.add(full_url)
187 | 
188 |         # Clean and normalize URLs
189 |         cleaned_links = []
190 |         for link in links:
191 |             # Remove common tracking parameters
192 |             cleaned_url = re.sub(r'\?.*$', '', link)
193 |             # Remove trailing slash
194 |             cleaned_url = re.sub(r'/$', '', cleaned_url)
195 |             cleaned_links.append(cleaned_url)
196 | 
197 |         return sorted(list(set(cleaned_links)))  # Remove duplicates and sort
198 | 
199 | def analyze_links(html_content: str, base_url: str) -> List[str]:
200 |     """
201 |     Utility function to analyze links on a page.
202 |     
203 |     Args:
204 |         html_content (str): The HTML content to analyze
205 |         base_url (str): The base URL for resolving relative links
206 |         
207 |     Returns:
208 |         List[str]: List of external links found
209 |     """
210 |     analyzer = LinkAnalyzer(html_content, base_url)
211 |     return analyzer.analyze()


--------------------------------------------------------------------------------
/modules/profile_extractor.py:
--------------------------------------------------------------------------------
  1 | from bs4 import BeautifulSoup
  2 | from typing import Dict, Set, List
  3 | from urllib.parse import urlparse
  4 | import json
  5 | import re
  6 | 
  7 | class ProfileExtractor:
  8 |     # Profile related containers and classes
  9 |     PROFILE_CONTAINERS = {
 10 |         # Common profile containers
 11 |         'profile',
 12 |         'bio',
 13 |         'about',
 14 |         'description',
 15 |         'user-info',
 16 |         'user-profile',
 17 |         'userprofile',
 18 |         'user-bio',
 19 |         'userbio',
 20 |         'author-info',
 21 |         'author-bio',
 22 |         'biography',
 23 |         
 24 |         # Social media specific
 25 |         'profile-header',
 26 |         'profile-card',
 27 |         'profile-info',
 28 |         'profile-details',
 29 |         'user-details',
 30 |         'personal-info',
 31 |         'account-info',
 32 |         
 33 |         # Content descriptions
 34 |         'user-description',
 35 |         'creator-info',
 36 |         'artist-info',
 37 |         'member-info'
 38 |     }
 39 |     
 40 |     # Common metadata fields that might contain profile information
 41 |     METADATA_FIELDS = {
 42 |         'description',
 43 |         'og:description',
 44 |         'profile:username',
 45 |         'profile:first_name',
 46 |         'profile:last_name',
 47 |         'author',
 48 |         'twitter:description',
 49 |         'article:author',
 50 |         'profile:gender',
 51 |         'profile:location'
 52 |     }
 53 |     
 54 |     # Common UI elements to ignore
 55 |     UI_ELEMENTS = {
 56 |         'menu', 'navigation', 'nav', 'search', 'button',
 57 |         'dialog', 'modal', 'popup', 'tooltip', 'dropdown',
 58 |         'tab', 'menu-item', 'sidebar', 'widget', 'footer'
 59 |     }
 60 |     
 61 |     # Content to exclude (similar to link analyzer)
 62 |     EXCLUDE_CONTAINERS = {
 63 |         'footer',
 64 |         'header',
 65 |         'nav',
 66 |         'navigation',
 67 |         'menu',
 68 |         'sidebar',
 69 |         'copyright',
 70 |         'legal',
 71 |         'advertisement',
 72 |         'cookie',
 73 |         'popup',
 74 |         'stats',
 75 |         'style',
 76 |         'script'
 77 |     }
 78 | 
 79 |     def __init__(self, html_content: str, base_url: str):
 80 |         """Initialize the ProfileExtractor."""
 81 |         self.soup = BeautifulSoup(html_content, 'html.parser')
 82 |         self.base_url = base_url
 83 |         
 84 |         # Extract domain name for filtering
 85 |         parsed_base = urlparse(base_url)
 86 |         self.base_domain = parsed_base.netloc.lower()
 87 |         domain_parts = self.base_domain.split('.')
 88 |         if domain_parts[0] == 'www':
 89 |             domain_parts = domain_parts[1:-1]
 90 |         else:
 91 |             domain_parts = domain_parts[1:-1] if len(domain_parts) > 2 else domain_parts[:-1]
 92 |         self.domain_name = '.'.join(domain_parts)
 93 | 
 94 |     def _clean_text(self, text: str) -> str:
 95 |         """Clean and normalize text."""
 96 |         # Remove multiple spaces and newlines
 97 |         text = ' '.join(text.split())
 98 |         # Remove common UI text patterns
 99 |         text = re.sub(r'(Follow|Message|Subscribe|Share|Like|Comment|Post|View|Open|Close|Toggle|Click|Tap)\s*', '', text, flags=re.IGNORECASE)
100 |         return text.strip()
101 | 
102 |     def _is_meaningful_text(self, text: str) -> bool:
103 |         """Check if text contains meaningful information."""
104 |         # Minimum length check
105 |         if len(text) < 3:
106 |             return False
107 |             
108 |         # Check if text is just a single common word
109 |         common_words = {'menu', 'home', 'about', 'contact', 'search', 'login', 'signup'}
110 |         if text.lower() in common_words:
111 |             return False
112 |             
113 |         # Check if text is just numbers
114 |         if text.replace(',', '').replace('.', '').isdigit():
115 |             return False
116 |             
117 |         # Check if text is just a common UI element
118 |         if text.lower() in self.UI_ELEMENTS:
119 |             return False
120 |             
121 |         return True
122 | 
123 |     def _is_in_excluded_container(self, element) -> bool:
124 |         """Check if element is in a container that should be excluded."""
125 |         for parent in element.parents:
126 |             # Check IDs
127 |             if parent.get('id'):
128 |                 parent_id = parent.get('id').lower()
129 |                 if any(exc in parent_id or parent_id in exc for exc in self.EXCLUDE_CONTAINERS):
130 |                     return True
131 |                     
132 |             # Check classes
133 |             if parent.get('class'):
134 |                 parent_classes = ' '.join(parent.get('class')).lower()
135 |                 if any(exc in parent_classes for exc in self.EXCLUDE_CONTAINERS):
136 |                     return True
137 |                     
138 |             # Check tag names
139 |             if parent.name and parent.name.lower() in self.EXCLUDE_CONTAINERS:
140 |                 return True
141 |                 
142 |         return False
143 | 
144 |     def _extract_from_metadata(self) -> Dict[str, str]:
145 |         """Extract profile information from metadata tags."""
146 |         metadata = {}
147 |         
148 |         # Extract from standard meta tags
149 |         for meta in self.soup.find_all('meta'):
150 |             name = meta.get('name', meta.get('property', '')).lower()
151 |             if name in self.METADATA_FIELDS:
152 |                 content = self._clean_text(meta.get('content', ''))
153 |                 if content and not self._should_exclude_content(content):
154 |                     metadata[name] = content
155 |         
156 |         # Extract from JSON-LD
157 |         for script in self.soup.find_all('script', type='application/ld+json'):
158 |             try:
159 |                 data = json.loads(script.string)
160 |                 if isinstance(data, dict):
161 |                     if data.get('@type') in ['Person', 'Profile']:
162 |                         for key, value in data.items():
163 |                             if isinstance(value, str):
164 |                                 cleaned_value = self._clean_text(value)
165 |                                 if cleaned_value and not self._should_exclude_content(cleaned_value):
166 |                                     metadata[key] = cleaned_value
167 |             except (json.JSONDecodeError, AttributeError):
168 |                 continue
169 |         
170 |         return metadata
171 | 
172 |     def _should_exclude_content(self, text: str) -> bool:
173 |         """Check if content should be excluded."""
174 |         return self.domain_name.lower() in text.lower()
175 | 
176 |     def _extract_from_html(self) -> Set[str]:
177 |         """Extract profile information from HTML content."""
178 |         profile_texts = set()
179 |         seen_texts = set()
180 | 
181 |         # First, get all text elements
182 |         for element in self.soup.find_all(text=True):
183 |             # Check if element is inside excluded container like footer FIRST
184 |             if self._is_in_excluded_container(element):
185 |                 continue  # Skip this element and all its content
186 | 
187 |             # Only then check if it's in a profile container
188 |             parent_element = element.parent
189 |             if any(ptn in str(parent_element.get('class', [])).lower() or 
190 |                 ptn in str(parent_element.get('id', '')).lower() 
191 |                 for ptn in self.PROFILE_CONTAINERS):
192 |                 
193 |                 text = self._clean_text(element.string)
194 |                 if (text and 
195 |                     text not in seen_texts and 
196 |                     len(text) >= 3):
197 |                     
198 |                     profile_texts.add(text)
199 |                     seen_texts.add(text)
200 | 
201 |         return profile_texts
202 | 
203 |     def extract(self) -> Dict[str, List[str]]:
204 |         """Extract all profile information from the page."""
205 |         metadata = self._extract_from_metadata()
206 |         content = sorted(list(self._extract_from_html()))  # Convert set to sorted list
207 |         
208 |         return {
209 |             'metadata': metadata,
210 |             'content': content
211 |         }
212 | 
213 | def extract_profile_info(html_content: str, base_url: str) -> Dict[str, List[str]]:
214 |     """Utility function to extract profile information from a page."""
215 |     extractor = ProfileExtractor(html_content, base_url)
216 |     return extractor.extract()


--------------------------------------------------------------------------------
/modules/proxy.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # modules/proxy.py
  3 | 
  4 | from fastapi import FastAPI, HTTPException
  5 | from fastapi.middleware.cors import CORSMiddleware
  6 | import httpx
  7 | from urllib.parse import urlparse
  8 | import os
  9 | from dotenv import load_dotenv
 10 | 
 11 | load_dotenv()
 12 | 
 13 | app = FastAPI()
 14 | 
 15 | # CORS configuration
 16 | app.add_middleware(
 17 |     CORSMiddleware,
 18 |     allow_origins=["*"],
 19 |     allow_credentials=True,
 20 |     allow_methods=["*"],
 21 |     allow_headers=["*"],
 22 | )
 23 | 
 24 | # Domain-specific header configurations
 25 | DOMAIN_PATTERNS = {
 26 |     '.ru': {
 27 |         'Accept-Language': 'ru-RU,ru;q=0.9,en-US;q=0.8,en;q=0.7',
 28 |         'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) Chrome/120.0.0.0 Safari/537.36'
 29 |     },
 30 |     '.pl': {
 31 |         'Accept-Language': 'pl-PL,pl;q=0.9,en-US;q=0.8,en;q=0.7',
 32 |         'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) Firefox/120.0'
 33 |     },
 34 |     '.jp': {
 35 |         'Accept-Language': 'ja-JP,ja;q=0.9,en-US;q=0.8,en;q=0.7',
 36 |         'User-Agent': 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) Safari/605.1.15'
 37 |     },
 38 |     '.cn': {
 39 |         'Accept-Language': 'zh-CN,zh;q=0.9,en-US;q=0.8,en;q=0.7',
 40 |         'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) Chrome/120.0.0.0 Safari/537.36'
 41 |     },
 42 |     'behance.net': {
 43 |         'Accept-Language': 'en-US,en;q=0.9',
 44 |         'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) Chrome/120.0.0.0 Safari/537.36',
 45 |         'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
 46 |         'Referer': 'https://www.behance.net/'
 47 |     },
 48 |     'community': {
 49 |         'Accept': 'application/activity+json',
 50 |         'User-Agent': 'Mozilla/5.0 (compatible; SocialMediaBot/1.0)'
 51 |     },
 52 |     'mastodon': {
 53 |         'Accept': 'application/activity+json',
 54 |         'User-Agent': 'Mozilla/5.0 (compatible; SocialMediaBot/1.0)'
 55 |     }
 56 | }
 57 | 
 58 | # Default headers
 59 | DEFAULT_HEADERS = {
 60 |     'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) Chrome/120.0.0.0 Safari/537.36',
 61 |     'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
 62 |     'Accept-Language': 'en-US,en;q=0.9',
 63 |     'Cache-Control': 'no-cache',
 64 | }
 65 | 
 66 | @app.get("/proxy")
 67 | async def proxy(url: str):
 68 |     if not url:
 69 |         raise HTTPException(status_code=400, detail='URL parameter is required')
 70 |     
 71 |     domain = urlparse(url).netloc.replace('www.', '')
 72 |     
 73 |     # Build headers
 74 |     headers = DEFAULT_HEADERS.copy()
 75 |     for pattern, pattern_headers in DOMAIN_PATTERNS.items():
 76 |         if pattern in domain:
 77 |             headers.update(pattern_headers)
 78 |             break
 79 | 
 80 |     try:
 81 |         async with httpx.AsyncClient(verify=False, timeout=25.0) as client:
 82 |             # First request without following redirects
 83 |             response = await client.get(
 84 |                 url,
 85 |                 headers=headers,
 86 |                 follow_redirects=False
 87 |             )
 88 |             
 89 |             initial_status_code = response.status_code
 90 |             
 91 |             # If redirect, follow with a new request
 92 |             if 300 <= initial_status_code < 400:
 93 |                 response = await client.get(
 94 |                     url,
 95 |                     headers=headers,
 96 |                     follow_redirects=True
 97 |                 )
 98 | 
 99 |             # Build response
100 |             result = {
101 |                 'status': {
102 |                     'http_code': response.status_code,
103 |                     'initial_http_code': initial_status_code,
104 |                     'headers': dict(response.headers)
105 |                 },
106 |                 'contents': response.text,
107 |                 'url': str(response.url)
108 |             }
109 |             
110 |             # Add redirect history if present
111 |             if response.history:
112 |                 result['status']['redirect_history'] = [
113 |                     {
114 |                         'url': str(r.url),
115 |                         'status_code': r.status_code,
116 |                         'headers': dict(r.headers)
117 |                     }
118 |                     for r in response.history
119 |                 ]
120 |             
121 |             return result
122 | 
123 |     except httpx.RequestError as e:
124 |         error_details = {
125 |             'message': str(e),
126 |             'code': type(e).__name__,
127 |             'url': url
128 |         }
129 |         
130 |         if isinstance(e, httpx.TimeoutException):
131 |             return {
132 |                 'error': error_details,
133 |                 'status': {'http_code': 504}
134 |             }
135 | 
136 |         return {
137 |             'error': error_details,
138 |             'status': {'http_code': 500}
139 |         }
140 | 
141 | if __name__ == "__main__":
142 |     import uvicorn
143 |     import logging
144 |     logging.getLogger("uvicorn.access").setLevel(logging.WARNING)
145 |     uvicorn.run(app, host="127.0.0.1", port=8000, log_level="warning")


--------------------------------------------------------------------------------
/modules/rpuc.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # modules/rpuc.py
  3 | import aiohttp
  4 | import asyncio
  5 | import json
  6 | import os
  7 | from datetime import datetime
  8 | from rich.console import Console
  9 | from rich.table import Table
 10 | from rich.progress import Progress, BarColumn, TimeRemainingColumn, TextColumn
 11 | from rich.live import Live
 12 | from jinja2 import Environment, BaseLoader
 13 | from urllib.parse import urlparse, quote
 14 | import logging
 15 | from typing import Dict, List, Optional
 16 | from dotenv import load_dotenv
 17 | from link_analyzer import analyze_links
 18 | from profile_extractor import extract_profile_info
 19 | from date_extractor import extract_profile_date, normalize_date
 20 | import re
 21 | 
 22 | # Load environment variables
 23 | load_dotenv()
 24 | 
 25 | # Logging configuration
 26 | logging.basicConfig(
 27 |     level=logging.INFO,
 28 |     format='%(asctime)s - %(levelname)s - %(message)s'
 29 | )
 30 | logger = logging.getLogger(__name__)
 31 | 
 32 | # Configuration
 33 | BATCH_SIZE = 50  # Process 50 requests simultaneously
 34 | MAX_CONNECTIONS = 200  # Maximum connections for aiohttp
 35 | REQUEST_TIMEOUT = 15
 36 | DEFAULT_JSON_URL = "https://raw.githubusercontent.com/degun-osint/WhatsMyName/main/wmn-data.json"
 37 | JSON_URL = os.getenv('WMN_JSON_URL', DEFAULT_JSON_URL)
 38 | BASE_DIR = os.path.dirname(os.path.dirname(os.path.abspath(__file__)))
 39 | DATA_DIR = os.path.join(BASE_DIR, "data")
 40 | RESULTS_DIR = os.path.join(BASE_DIR, "results")
 41 | PROGRESS_DELAY = 0.01
 42 | 
 43 | os.makedirs(DATA_DIR, exist_ok=True)
 44 | os.makedirs(RESULTS_DIR, exist_ok=True)
 45 | 
 46 | PROXY_URL = os.getenv('PROXY_URL', 'http://127.0.0.1:8000/proxy')
 47 | 
 48 | HEADERS = {
 49 |     'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) Chrome/120.0.0.0 Safari/537.36',
 50 |     'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8',
 51 |     'Accept-Language': 'en-US,en;q=0.9',
 52 |     'Cache-Control': 'no-cache',
 53 | }
 54 | 
 55 | class SiteChecker:
 56 |     def __init__(self):
 57 |         """Initialize the site checker."""
 58 |         self.console = Console()
 59 |         self.sites = []
 60 |         self.results = []
 61 |         self.data_dir = DATA_DIR
 62 |         self.results_dir = RESULTS_DIR
 63 | 
 64 |     async def download_sites_data(self):
 65 |         """Download site data from configured URL."""
 66 |         local_file = os.path.join(self.data_dir, "wmn-data.json")
 67 | 
 68 |         try:
 69 |             async with aiohttp.ClientSession() as session:
 70 |                 self.console.print(f"[cyan]Downloading data from {JSON_URL}...")
 71 |                 async with session.get(JSON_URL) as response:
 72 |                     if response.status == 200:
 73 |                         data = await response.text()
 74 |                         json_data = json.loads(data)
 75 |                         self.sites = json_data.get('sites', [])
 76 |                         with open(local_file, 'w', encoding='utf-8') as f:
 77 |                             f.write(data)
 78 |                         self.console.print("[green]Data downloaded successfully")
 79 |                     else:
 80 |                         if os.path.exists(local_file):
 81 |                             self.console.print("[yellow]Using local data...")
 82 |                             with open(local_file, 'r', encoding='utf-8') as f:
 83 |                                 json_data = json.load(f)
 84 |                                 self.sites = json_data.get('sites', [])
 85 |                         else:
 86 |                             raise Exception("Unable to download data and no local data available")
 87 |         except Exception as e:
 88 |             if os.path.exists(local_file):
 89 |                 self.console.print("[yellow]Using local data...")
 90 |                 with open(local_file, 'r', encoding='utf-8') as f:
 91 |                     json_data = json.load(f)
 92 |                     self.sites = json_data.get('sites', [])
 93 |             else:
 94 |                 raise
 95 |             
 96 |     def is_date_status(self, status):
 97 |         """Détermine si le statut contient une date."""
 98 |         if not isinstance(status, str):
 99 |             return False
100 |             
101 |         status_lower = status.lower()
102 |         
103 |         # Vérifie si "join" ou un nom de mois est présent
104 |         months = ['jan', 'feb', 'mar', 'apr', 'may', 'jun', 'jul', 'aug', 'sep', 'oct', 'nov', 'dec']
105 |         if 'join' in status_lower or any(month in status_lower for month in months):
106 |             return True
107 |             
108 |         # Vérifie s'il y a au moins un chiffre
109 |         if any(c.isdigit() for c in status_lower):
110 |             return True
111 |             
112 |         return False
113 | 
114 |     async def verify_content(self, content: str, pattern: str, site_name: str) -> bool:
115 |         """Check if pattern is present in content."""
116 |         if not pattern:
117 |             return True
118 |         if not isinstance(content, str):
119 |             return False
120 | 
121 |         normalized_content = ' '.join(content.split())
122 |         normalized_pattern = ' '.join(pattern.split()).replace('\\"', '"')
123 |         
124 |         return normalized_pattern.lower() in normalized_content.lower()
125 |     
126 |     async def check_site(self, site: dict, username: str, session: aiohttp.ClientSession) -> Optional[dict]:
127 |         """Check a specific site for a given username."""
128 |         original_url = site['uri_check'].replace("{account}", username)
129 |         display_url = site.get('uri_pretty', original_url).replace("{account}", username)
130 | 
131 |         if original_url.startswith('http://'):
132 |             original_url = original_url.replace('http://', 'https://')
133 | 
134 |         try:
135 |             # Use proxy
136 |             proxy_url = f"{PROXY_URL}?url={quote(original_url)}"
137 |             async with session.get(proxy_url, timeout=REQUEST_TIMEOUT) as response:
138 |                 if response.status != 200:
139 |                     return None
140 |                     
141 |                 json_response = await response.json()
142 |                 if not json_response or 'status' not in json_response:
143 |                     return None
144 | 
145 |                 content = json_response.get('contents', '')
146 |                 status_data = json_response['status']
147 |                 initial_status = status_data.get('initial_http_code', status_data.get('http_code'))
148 | 
149 |                 # Verify status and patterns
150 |                 has_miss_string = await self.verify_content(content, site.get('m_string', ''), site['name'])
151 |                 has_expected_string = await self.verify_content(content, site.get('e_string', ''), site['name'])
152 | 
153 |                 # Cas 1: Si m_string est présent et m_code correspond => Non trouvé
154 |                 if has_miss_string and initial_status == site['m_code']:
155 |                     return None
156 |                 
157 |                 # Cas 2: Si e_string est présent et e_code correspond => Found
158 |                 if has_expected_string and initial_status == site['e_code']:
159 |                     external_links = analyze_links(content, original_url)
160 |                     profile_info = extract_profile_info(content, original_url)
161 |                     
162 |                     # Extraire la date de création du profil
163 |                     profile_date = None
164 |                     if profile_info and 'metadata' in profile_info:
165 |                         profile_date = extract_profile_date(content, profile_info.get('metadata', {}), site_name=site['name'])
166 |                     
167 |                     # Déterminer le statut (date de création ou "found")
168 |                     status = 'found'
169 |                     if profile_date:
170 |                         status = normalize_date(profile_date)
171 |                         
172 |                     # Vérifier si le contenu provient d'une balise link rel (à ignorer)
173 |                     if status != 'found' and "<link rel=" in content and re.search(r'<link\s+rel=["\'].*?\b' + re.escape(status) + r'\b.*?["\']', content, re.IGNORECASE):
174 |                         status = 'found'
175 |                     
176 |                     return {
177 |                         'name': site['name'],
178 |                         'category': site['cat'],
179 |                         'url': display_url,
180 |                         'status': status,
181 |                         'http_code': initial_status,
182 |                         'external_links': external_links,
183 |                         'profile_info': profile_info
184 |                     }
185 |                 
186 |                 # Cas 3: Si e_string est présent mais e_code ne correspond pas => Unsure
187 |                 if has_expected_string and initial_status != site['e_code']:
188 |                     external_links = analyze_links(content, original_url)
189 |                     profile_info = extract_profile_info(content, original_url)
190 |                     
191 |                     return {
192 |                         'name': site['name'],
193 |                         'category': site['cat'],
194 |                         'url': display_url,
195 |                         'status': 'unsure',
196 |                         'http_code': initial_status,
197 |                         'external_links': external_links,
198 |                         'profile_info': profile_info
199 |                     }
200 |                 
201 |                 # Cas 4: Si ni e_string ni m_string ne sont présents => Non trouvé
202 |                 if not has_expected_string and not has_miss_string:
203 |                     return None
204 | 
205 |                 # Pour tout autre cas non prévu => Non trouvé
206 |                 return None
207 |                     
208 |         except Exception as e:
209 |             logger.error(f"Error checking {site['name']}: {str(e)}")
210 |             return None
211 | 
212 |     async def process_batch(self, sites: List[dict], username: str) -> List[dict]:
213 |         """Process a batch of sites in parallel."""
214 |         connector = aiohttp.TCPConnector(limit=50, force_close=True)
215 |         async with aiohttp.ClientSession(connector=connector) as session:
216 |             tasks = []
217 |             for site in sites:
218 |                 tasks.append(self.check_site(site, username, session))
219 |             
220 |             results = await asyncio.gather(*tasks, return_exceptions=True)
221 |             valid_results = []
222 |             for r in results:
223 |                 if isinstance(r, Exception):
224 |                     logger.error(f"Error in batch: {str(r)}")
225 |                     continue
226 |                 if r is not None:
227 |                     valid_results.append(r)
228 |             return valid_results
229 | 
230 |     async def check_username(self, username: str):
231 |         """Check a username across all sites."""
232 |         self.results = []
233 |         console = Console()
234 |         
235 |         with Progress(
236 |             TextColumn("{task.description}"),
237 |             BarColumn(complete_style="green", finished_style="green"),
238 |             TextColumn("[progress.percentage]{task.percentage:>3.0f}%"),
239 |             TextColumn("•"),
240 |             TimeRemainingColumn(),
241 |             console=console,
242 |             transient=True,
243 |         ) as progress:
244 |             main_task = progress.add_task(
245 |                 f"[cyan]🦏 Searching...", 
246 |                 total=len(self.sites)
247 |             )
248 |             
249 |             found_count = 0
250 |             tasks = []
251 | 
252 |             # Create all batches
253 |             for i in range(0, len(self.sites), BATCH_SIZE):
254 |                 batch = self.sites[i:i + BATCH_SIZE]
255 |                 tasks.append(self.process_batch(batch, username))
256 | 
257 |             # Process batches in groups
258 |             for i in range(0, len(tasks), 2):
259 |                 current_tasks = tasks[i:i+2]
260 |                 batch_results = await asyncio.gather(*current_tasks)
261 |                 
262 |                 sites_processed = min(BATCH_SIZE * 2, len(self.sites) - (i * BATCH_SIZE))
263 |                 
264 |                 for results in batch_results:
265 |                     found_in_batch = len(results)
266 |                     if found_in_batch > 0:
267 |                         found_count += found_in_batch
268 |                         for result in results:
269 |                             console.print(f"[green]✓ Found on {result['name']}[/green]")
270 |                 
271 |                 progress.update(
272 |                     main_task,
273 |                     advance=sites_processed,
274 |                     description=f"[cyan]🦏 Searching... ({found_count} found)"
275 |                 )
276 |                 
277 |                 await asyncio.sleep(PROGRESS_DELAY)
278 |                 
279 |                 for results in batch_results:
280 |                     self.results.extend(results)
281 | 
282 |     def display_results_console(self):
283 |         """Display results in console with styling."""
284 |         if not self.results:
285 |             self.console.print("\n[yellow]No profiles found[/yellow]")
286 |             return
287 | 
288 |         table = Table(title=f"Search Results")
289 |         
290 |         table.add_column("Site", style="cyan")
291 |         table.add_column("Category", style="green")
292 |         table.add_column("Status", style="magenta")
293 |         table.add_column("URL", style="blue")
294 |         table.add_column("External Links", style="yellow")
295 |         table.add_column("Profile Info", style="white")
296 |         
297 |         for result in self.results:
298 |             status_style = "green" if result['status'] == 'found' else "yellow" if result['status'] == 'unsure' else "white"
299 |             
300 |             external_links = result.get('external_links', [])
301 |             links_str = ", ".join(external_links) if external_links else "-"
302 |             
303 |             profile_info = result.get('profile_info', {})
304 |             profile_str = ""
305 |             if profile_info:
306 |                 if profile_info.get('metadata'):
307 |                     profile_str += "Metadata: " + ", ".join(f"{k}: {v}" for k, v in profile_info['metadata'].items())
308 |                 if profile_info.get('content'):
309 |                     profile_str += "\nContent: " + ", ".join(profile_info['content'])
310 |             
311 |             table.add_row(
312 |                 result['name'],
313 |                 result['category'],
314 |                 f"[{status_style}]{result['status']}[/{status_style}]",
315 |                 result['url'],
316 |                 links_str,
317 |                 profile_str or "-"
318 |             )
319 |         self.console.print(table)
320 | 
321 |     def export_html(self, output_file: str, username: str = ""):
322 |         """Export results to HTML."""
323 |         env = Environment(loader=BaseLoader())
324 |         template_str = r'''
325 |                     <!DOCTYPE html>
326 |                     <html lang="en">
327 |                     <head>
328 |                         <meta charset="UTF-8">
329 |                         <meta name="viewport" content="width=device-width, initial-scale=1.0">
330 |                         <title>RPUC Results</title>
331 |                         <link href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/6.0.0/css/all.min.css" rel="stylesheet">
332 |                         <style>
333 |                             :root {
334 |                                 --primary: #1a1a1a;
335 |                                 --secondary: #2b2b2b;
336 |                                 --accent: #0f4c75;
337 |                                 --highlight: #00a8e8;
338 |                                 --success: #00ff9d;
339 |                                 --warning: #FFA500;
340 |                                 --date: #00BFFF;
341 |                                 --white: #ffffff;
342 |                                 --text-gray: #b3b3b3;
343 |                                 --card-bg: rgba(43, 43, 43, 0.5);
344 |                                 --content-bg: rgba(15, 15, 15, 0.7);
345 |                             }
346 | 
347 |                             * {
348 |                                 margin: 0;
349 |                                 padding: 0;
350 |                                 box-sizing: border-box;
351 |                                 font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
352 |                             }
353 | 
354 |                             body {
355 |                                 line-height: 1.6;
356 |                                 background: linear-gradient(135deg, var(--primary) 0%, var(--secondary) 100%);
357 |                                 color: var(--white);
358 |                                 min-height: 100vh;
359 |                                 font-size: 16px;
360 |                             }
361 | 
362 |                             .container {
363 |                                 max-width: 1400px;
364 |                                 margin: 0 auto;
365 |                                 padding: 2rem;
366 |                             }
367 | 
368 |                             .header {
369 |                                 text-align: center;
370 |                                 padding: 2rem 0;
371 |                                 animation: fadeIn 1s ease-out;
372 |                             }
373 | 
374 |                             .ascii-art {
375 |                                 font-family: monospace;
376 |                                 white-space: pre;
377 |                                 color: var(--highlight);
378 |                                 font-size: 0.7rem;
379 |                                 margin-bottom: 1rem;
380 |                                 text-align: left;
381 |                                 display: inline-block;
382 |                             }
383 | 
384 |                             .header h1 {
385 |                                 font-size: 2.5rem;
386 |                                 margin-bottom: 1rem;
387 |                                 background: linear-gradient(45deg, var(--highlight), var(--success));
388 |                                 -webkit-background-clip: text;
389 |                                 -webkit-text-fill-color: transparent;
390 |                                 text-shadow: 2px 2px 4px rgba(0,0,0,0.3);
391 |                             }
392 | 
393 |                             .timestamp {
394 |                                 background: var(--accent);
395 |                                 padding: 0.5rem 1rem;
396 |                                 border-radius: 20px;
397 |                                 display: inline-block;
398 |                                 font-size: 0.9rem;
399 |                                 margin-top: 1rem;
400 |                                 animation: slideIn 1s ease-out;
401 |                             }
402 | 
403 |                             .stats-container {
404 |                                 display: flex;
405 |                                 flex-wrap: wrap;
406 |                                 justify-content: center;
407 |                                 gap: 1rem;
408 |                                 margin: 2rem 0;
409 |                             }
410 | 
411 |                             .stat-card {
412 |                                 background: var(--accent);
413 |                                 padding: 1rem;
414 |                                 border-radius: 10px;
415 |                                 min-width: 150px;
416 |                                 text-align: center;
417 |                                 box-shadow: 0 4px 12px rgba(0,0,0,0.2);
418 |                             }
419 | 
420 |                             .stat-value {
421 |                                 font-size: 1.8rem;
422 |                                 font-weight: bold;
423 |                                 margin-bottom: 0.5rem;
424 |                             }
425 | 
426 |                             .stat-label {
427 |                                 font-size: 0.9rem;
428 |                                 opacity: 0.9;
429 |                             }
430 | 
431 |                             .results-grid {
432 |                                 display: grid;
433 |                                 grid-template-columns: repeat(auto-fill, minmax(400px, 1fr));
434 |                                 gap: 1.5rem;
435 |                                 margin-top: 2rem;
436 |                             }
437 | 
438 |                             .profile-card {
439 |                                 background: var(--card-bg);
440 |                                 border-radius: 10px;
441 |                                 overflow: hidden;
442 |                                 box-shadow: 0 8px 24px rgba(0,0,0,0.2);
443 |                                 transition: transform 0.3s ease, box-shadow 0.3s ease;
444 |                                 display: flex;
445 |                                 flex-direction: column;
446 |                             }
447 | 
448 |                             .profile-card:hover {
449 |                                 transform: translateY(-5px);
450 |                                 box-shadow: 0 12px 32px rgba(0,0,0,0.3);
451 |                             }
452 | 
453 |                             .card-header {
454 |                                 background: var(--accent);
455 |                                 padding: 1rem;
456 |                                 display: flex;
457 |                                 justify-content: space-between;
458 |                                 align-items: center;
459 |                             }
460 | 
461 |                             .site-info {
462 |                                 display: flex;
463 |                                 align-items: center;
464 |                                 gap: 0.5rem;
465 |                             }
466 | 
467 |                             .site-name {
468 |                                 font-weight: bold;
469 |                                 font-size: 1.2rem;
470 |                             }
471 | 
472 |                             .site-category {
473 |                                 background: rgba(0,0,0,0.2);
474 |                                 padding: 0.2rem 0.5rem;
475 |                                 border-radius: 12px;
476 |                                 font-size: 0.8rem;
477 |                             }
478 | 
479 |                             .status-badge {
480 |                                 padding: 0.3rem 0.8rem;
481 |                                 border-radius: 20px;
482 |                                 font-size: 0.9rem;
483 |                                 font-weight: 500;
484 |                                 display: flex;
485 |                                 align-items: center;
486 |                                 gap: 0.3rem;
487 |                             }
488 | 
489 |                             .status-found {
490 |                                 background: var(--success);
491 |                                 color: #000;
492 |                             }
493 | 
494 |                             .status-unsure {
495 |                                 background: var(--warning);
496 |                                 color: #000;
497 |                             }
498 | 
499 |                             .status-date {
500 |                                 background: var(--date);
501 |                                 color: #000;
502 |                             }
503 | 
504 |                             .card-body {
505 |                                 padding: 1rem;
506 |                                 flex-grow: 1;
507 |                                 display: flex;
508 |                                 flex-direction: column;
509 |                                 gap: 1rem;
510 |                             }
511 | 
512 |                             .url-container {
513 |                                 word-break: break-all;
514 |                             }
515 | 
516 |                             .url-link {
517 |                                 color: var(--highlight);
518 |                                 text-decoration: none;
519 |                                 transition: color 0.2s ease;
520 |                                 display: flex;
521 |                                 align-items: center;
522 |                                 gap: 0.5rem;
523 |                             }
524 | 
525 |                             .url-link:hover {
526 |                                 color: var(--success);
527 |                             }
528 | 
529 |                             .external-links-container {
530 |                                 margin-top: 0.5rem;
531 |                             }
532 | 
533 |                             .external-links-title {
534 |                                 font-size: 0.9rem;
535 |                                 margin-bottom: 0.5rem;
536 |                                 color: var(--text-gray);
537 |                             }
538 | 
539 |                             .external-links {
540 |                                 display: flex;
541 |                                 flex-wrap: wrap;
542 |                                 gap: 0.5rem;
543 |                             }
544 | 
545 |                             .external-link {
546 |                                 color: var(--highlight);
547 |                                 text-decoration: none;
548 |                                 background: rgba(0, 168, 232, 0.1);
549 |                                 padding: 0.3rem 0.6rem;
550 |                                 border-radius: 5px;
551 |                                 font-size: 0.85rem;
552 |                                 transition: all 0.2s ease;
553 |                                 max-width: 100%;
554 |                                 overflow: hidden;
555 |                                 text-overflow: ellipsis;
556 |                                 white-space: nowrap;
557 |                             }
558 | 
559 |                             .external-link:hover {
560 |                                 background: rgba(0, 168, 232, 0.2);
561 |                                 color: var(--success);
562 |                             }
563 | 
564 |                             .profile-info {
565 |                                 margin-top: 0.5rem;
566 |                             }
567 | 
568 |                             .metadata, .content {
569 |                                 background: rgba(15, 76, 117, 0.2);
570 |                                 padding: 0.8rem;
571 |                                 border-radius: 8px;
572 |                                 margin-bottom: 0.8rem;
573 |                                 border: 1px solid rgba(0, 168, 232, 0.2);
574 |                                 font-size: 0.9rem;
575 |                             }
576 | 
577 |                             .content {
578 |                                 color: var(--text-gray);
579 |                             }
580 | 
581 |                             .info-title {
582 |                                 display: flex;
583 |                                 align-items: center;
584 |                                 gap: 0.5rem;
585 |                                 margin-bottom: 0.5rem;
586 |                                 font-weight: 600;
587 |                             }
588 | 
589 |                             .metadata-items, .content-items {
590 |                                 display: flex;
591 |                                 flex-direction: column;
592 |                                 gap: 0.3rem;
593 |                             }
594 | 
595 |                             .metadata-item, .content-item {
596 |                                 line-height: 1.4;
597 |                             }
598 | 
599 |                             .icon {
600 |                                 color: var(--highlight);
601 |                             }
602 | 
603 |                             .no-results {
604 |                                 text-align: center;
605 |                                 padding: 3rem;
606 |                                 font-size: 1.2rem;
607 |                                 color: var(--highlight);
608 |                                 background: var(--content-bg);
609 |                                 backdrop-filter: blur(10px);
610 |                                 border-radius: 15px;
611 |                                 margin-top: 2rem;
612 |                             }
613 | 
614 |                             @keyframes fadeIn {
615 |                                 from { opacity: 0; }
616 |                                 to { opacity: 1; }
617 |                             }
618 | 
619 |                             @keyframes slideIn {
620 |                                 from {
621 |                                     transform: translateY(-20px);
622 |                                     opacity: 0;
623 |                                 }
624 |                                 to {
625 |                                     transform: translateY(0);
626 |                                     opacity: 1;
627 |                                 }
628 |                             }
629 | 
630 |                             /* Styles pour la version mobile et tablette */
631 |                             @media (max-width: 1200px) {
632 |                                 .results-grid {
633 |                                     grid-template-columns: repeat(auto-fill, minmax(350px, 1fr));
634 |                                 }
635 |                             }
636 | 
637 |                             @media (max-width: 768px) {
638 |                                 .container {
639 |                                     padding: 1rem;
640 |                                 }
641 |                                 
642 |                                 .results-grid {
643 |                                     grid-template-columns: 1fr;
644 |                                 }
645 | 
646 |                                 .header h1 {
647 |                                     font-size: 2rem;
648 |                                 }
649 | 
650 |                                 .ascii-art {
651 |                                     font-size: 0.5rem;
652 |                                 }
653 |                                 
654 |                                 .stat-card {
655 |                                     flex: 1 0 120px;
656 |                                 }
657 |                             }
658 | 
659 |                             @media (max-width: 480px) {
660 |                                 .card-header {
661 |                                     flex-direction: column;
662 |                                     align-items: flex-start;
663 |                                     gap: 0.5rem;
664 |                                 }
665 |                                 
666 |                                 .status-badge {
667 |                                     align-self: flex-start;
668 |                                 }
669 |                                 
670 |                                 .stats-container {
671 |                                     flex-direction: column;
672 |                                     align-items: center;
673 |                                 }
674 |                                 
675 |                                 .stat-card {
676 |                                     width: 100%;
677 |                                     max-width: 250px;
678 |                                 }
679 |                             }
680 |                         </style>
681 |                     </head>
682 |                     <body>
683 |                         <div class="container">
684 |                             <header class="header">
685 |                                 <h1>Rhino User Checker Results</h1>
686 |                                 <h2 style="color: var(--highlight); margin-bottom: 1rem;">Results for: {{ username }}</h2>
687 |                                 <div class="timestamp"><i class="far fa-clock icon"></i>Generated on {{ timestamp }}</div>
688 |                             </header>
689 | 
690 |                             {% if results %}
691 |                             <!-- Statistiques -->
692 |                             <div class="stats-container">
693 |                                 <div class="stat-card">
694 |                                     <div class="stat-value">{{ results|length }}</div>
695 |                                     <div class="stat-label">Total Profiles</div>
696 |                                 </div>
697 |                                 <div class="stat-card">
698 |                                     <div class="stat-value">{{ results|selectattr("status", "equalto", "found")|list|length }}</div>
699 |                                     <div class="stat-label">Confirmed</div>
700 |                                 </div>
701 |                                 <div class="stat-card">
702 |                                     <div class="stat-value">{{ results|selectattr("status", "equalto", "unsure")|list|length }}</div>
703 |                                     <div class="stat-label">Possible</div>
704 |                                 </div>
705 |                                 <div class="stat-card">
706 |                                     <div class="stat-value">{{ results|rejectattr("status", "equalto", "found")|rejectattr("status", "equalto", "unsure")|list|length }}</div>
707 |                                     <div class="stat-label">With Dates</div>
708 |                                 </div>
709 |                             </div>
710 | 
711 |                             <!-- Grille de résultats -->
712 |                             <div class="results-grid">
713 |                                 {% for result in results %}
714 |                                 <div class="profile-card">
715 |                                     <div class="card-header">
716 |                                         <div class="site-info">
717 |                                             <span class="site-name"><i class="fas fa-globe icon"></i> {{ result.name }}</span>
718 |                                             <span class="site-category">{{ result.category }}</span>
719 |                                         </div>
720 |                                         
721 |                                         <div class="status-badge {% if result.status == 'found' %}status-found{% elif result.status == 'unsure' %}status-unsure{% elif result.status != 'found' and result.status != 'unsure' %}status-date{% endif %}">
722 |                                             {% if result.status != 'found' and result.status != 'unsure' %}
723 |                                                 <i class="fas fa-calendar-alt"></i>
724 |                                             {% elif result.status == 'found' %}
725 |                                                 <i class="fas fa-check"></i>
726 |                                             {% elif result.status == 'unsure' %}
727 |                                                 <i class="fas fa-question"></i>
728 |                                             {% endif %}
729 |                                             {{ result.status }}
730 |                                         </div>
731 |                                     </div>
732 |                                     
733 |                                     <div class="card-body">
734 |                                         <div class="url-container">
735 |                                             <a href="{{ result.url }}" target="_blank" class="url-link">
736 |                                                 <i class="fas fa-external-link-alt"></i>
737 |                                                 <span>{{ result.url }}</span>
738 |                                             </a>
739 |                                         </div>
740 |                                         
741 |                                         {% if result.external_links %}
742 |                                         <div class="external-links-container">
743 |                                             <div class="external-links-title"><i class="fas fa-link icon"></i> External Links ({{ result.external_links|length }})</div>
744 |                                             <div class="external-links">
745 |                                                 {% for link in result.external_links %}
746 |                                                 <a href="{{ link }}" target="_blank" class="external-link" title="{{ link }}">
747 |                                                     {{ link|truncate(30, true) }}
748 |                                                 </a>
749 |                                                 {% endfor %}
750 |                                             </div>
751 |                                         </div>
752 |                                         {% endif %}
753 |                                         
754 |                                         {% if result.profile_info %}
755 |                                             {% if result.profile_info.metadata %}
756 |                                             <div class="metadata">
757 |                                                 <div class="info-title"><i class="fas fa-database icon"></i>Metadata</div>
758 |                                                 <div class="metadata-items">
759 |                                                     {% for key, value in result.profile_info.metadata.items() %}
760 |                                                     <div class="metadata-item">
761 |                                                         <strong>{{ key }}:</strong> {{ value }}
762 |                                                     </div>
763 |                                                     {% endfor %}
764 |                                                 </div>
765 |                                             </div>
766 |                                             {% endif %}
767 |                                             
768 |                                             {% if result.profile_info.content %}
769 |                                             <div class="content">
770 |                                                 <div class="info-title"><i class="fas fa-file-alt icon"></i>Content</div>
771 |                                                 <div class="content-items">
772 |                                                     {% for item in result.profile_info.content %}
773 |                                                     <div class="content-item">{{ item }}</div>
774 |                                                     {% endfor %}
775 |                                                 </div>
776 |                                             </div>
777 |                                             {% endif %}
778 |                                         {% endif %}
779 |                                     </div>
780 |                                 </div>
781 |                                 {% endfor %}
782 |                             </div>
783 |                             {% else %}
784 |                             <div class="no-results">
785 |                                 <i class="fas fa-search icon"></i> No profiles found
786 |                             </div>
787 |                             {% endif %}
788 |                         </div>
789 |                     </body>
790 |                     </html>
791 |                     '''
792 |         
793 |         template = env.from_string(template_str)
794 |         html_content = template.render(
795 |             results=self.results,
796 |             timestamp=datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
797 |             username=username,
798 |             is_date_status=self.is_date_status  # Ajouter la fonction au contexte
799 |         )
800 |         
801 |         output_path = os.path.join(self.results_dir, output_file)
802 |         
803 |         with open(output_path, 'w', encoding='utf-8') as f:
804 |             f.write(html_content)
805 |         return output_path
806 |     
807 |     def export_results_csv(self, output_file: str):
808 |         """Export results to CSV format."""
809 |         import csv
810 |         output_path = os.path.join(self.results_dir, output_file)
811 |         
812 |         with open(output_path, 'w', newline='', encoding='utf-8') as f:
813 |             writer = csv.writer(f)
814 |             # Write headers
815 |             headers = ['Site', 'Category', 'Status', 'URL', 'External Links', 'Profile Info']
816 |             writer.writerow(headers)
817 |             
818 |             # Write data
819 |             for result in self.results:
820 |                 external_links = '; '.join(result.get('external_links', []))
821 |                 
822 |                 # Format profile info
823 |                 profile_info = result.get('profile_info', {})
824 |                 profile_str = ''
825 |                 if profile_info:
826 |                     if profile_info.get('metadata'):
827 |                         profile_str += 'Metadata: ' + ', '.join(f"{k}: {v}" for k, v in profile_info['metadata'].items())
828 |                     if profile_info.get('content'):
829 |                         profile_str += ' | Content: ' + ', '.join(profile_info['content'])
830 |                 
831 |                 row = [
832 |                     result['name'],
833 |                     result['category'],
834 |                     result['status'],
835 |                     result['url'],
836 |                     external_links,
837 |                     profile_str
838 |                 ]
839 |                 writer.writerow(row)
840 |                 
841 |         return output_path
842 | 
843 | async def main():
844 |     try:
845 |         checker = SiteChecker()
846 |         await checker.download_sites_data()
847 |         
848 |         username = input("\nEnter username to search: ")
849 |         
850 |         while True:
851 |             if not username.strip():
852 |                 print("Username cannot be empty")
853 |                 username = input("\nEnter username to search: ")
854 |                 continue
855 |                 
856 |             print(f"\nSearching profiles for {username}...")
857 |             await checker.check_username(username)
858 |             
859 |             checker.display_results_console()
860 |             
861 |             # Ask for export format
862 |             while True:
863 |                 export_choice = input("\nDo you want to export results? (CSV / HTML / BOTH / NO): ").upper()
864 |                 if export_choice in ['CSV', 'HTML', 'BOTH', 'NO']:
865 |                     break
866 |                 print("Invalid choice. Please enter CSV, HTML, BOTH, or NO.")
867 |             
868 |             if export_choice != 'NO':
869 |                 timestamp = datetime.now().strftime('%Y%m%d_%H%M%S')
870 |                 
871 |                 if export_choice in ['HTML', 'BOTH']:
872 |                     output_file = f"results_{username}_{timestamp}.html"
873 |                     output_path_html = checker.export_html(output_file, username=username)
874 |                     print(f"\nHTML results exported to {output_path_html}")
875 |                 
876 |                 if export_choice in ['CSV', 'BOTH']:
877 |                     output_file = f"results_{username}_{timestamp}.csv"
878 |                     output_path_csv = checker.export_results_csv(output_file)
879 |                     print(f"CSV results exported to {output_path_csv}")
880 |             
881 |             # Ask to search another user
882 |             username = input("\nSearch another user? (enter alias or ctrl-c to quit): ")
883 |             if not username.strip():
884 |                 break
885 |             
886 |     except KeyboardInterrupt:
887 |         print("\nOperation cancelled by user...")
888 |     except asyncio.CancelledError:
889 |         print("\nOperation cancelled...")
890 |     except Exception as e:
891 |         print(f"An error occurred: {str(e)}")
892 |         print(f"An error occurred: {str(e)}")
893 | 
894 | def run():
895 |     try:
896 |         asyncio.run(main())
897 |     except KeyboardInterrupt:
898 |         pass
899 | 
900 | if __name__ == "__main__":
901 |     run()


--------------------------------------------------------------------------------
/readme.md:
--------------------------------------------------------------------------------
  1 | # 🦏 RhinoUserChecker (RPUC)
  2 | 
  3 | ** PLEASE BE AWARE THAT THIS IS NOT A PRODUCTION VERSION AND SHOULD BE USED WITH CAUTION **
  4 | 
  5 | A Python-based OSINT tool that helps you find usernames across multiple platforms and extract profile information. Built on top of the WhatsMyName project's data, RPUC adds advanced profile extraction and external link analysis capabilities.
  6 | 
  7 | ## 🌟 Features
  8 | 
  9 | - **Multi-platform Search**: Search for usernames across hundreds of social media platforms and websites thanks to WhatMyName JSON file
 10 | - **Profile Information Extraction**: Automatically extract user profile information, bios, and metadata
 11 | - **Profile creation date**: Attempt to find account creation date
 12 | - **External Link Analysis**: Discover related profiles through external link analysis
 13 | - **Smart Rate Limiting**: Built-in proxy support and smart rate limiting to avoid blocking
 14 | - **Rich Console Output**: Real-time progress tracking and beautiful console output using Rich
 15 | - **HTML or CSV Report Generation**: Generate detailed HTML or CSV reports with all findings
 16 | - **International Platform Support**: Special handling for international platforms (Russian, Chinese, Japanese, etc.)
 17 | 
 18 | ## Discussion
 19 | 
 20 | You can join the OSCAR ZULU discord server to discuss about this tool : https://discord.gg/4REgJzn4NG
 21 | 
 22 | ## 📋 Requirements
 23 | 
 24 | ```text
 25 | Python 3.8+
 26 | See requirements.txt for full dependencies
 27 | ```
 28 | 
 29 | ## 🚀 Installation
 30 | 
 31 | 1. Clone the repository:
 32 | ```bash
 33 | git clone https://github.com/degun-osint/RhinoUserChecker
 34 | cd RhinoUserChecker
 35 | ```
 36 | 
 37 | 2. Create a virtual environment and activate it:
 38 | ```bash
 39 | python -m venv venv
 40 | source venv/bin/activate  # On Windows: venv\Scripts\activate
 41 | ```
 42 | 
 43 | 3. Install the required packages:
 44 | ```bash
 45 | pip install -r requirements.txt
 46 | ```
 47 | 
 48 | ## ⚙️ Configuration
 49 | 
 50 | RPUC uses environment variables for configuration. Create a `.env` based on .env-sample file in the root directory with:
 51 | 
 52 | ```env
 53 | WMN_JSON_URL=https://raw.githubusercontent.com/WebBreacher/WhatsMyName/main/wmn-data.json
 54 | PROXY_URL=http://127.0.0.1:8000/proxy
 55 | ```
 56 | By default, the script uses a forked version of WMN JSON.
 57 | 
 58 | ## 🐳 Docker Installation
 59 | 
 60 | ### Using Docker Compose (recommended)
 61 | 
 62 | 1. Clone the repository:
 63 | ```bash
 64 | git clone https://github.com/degun-osint/RhinoUserChecker
 65 | cd RhinoUserChecker
 66 | ```
 67 | 
 68 | 2. Run the application:
 69 | ```bash
 70 | docker-compose up -d
 71 | ```
 72 | 
 73 | 3. Attach to the running container to interact with the application:
 74 | ```bash
 75 | docker attach rhino-user-checker
 76 | ```
 77 | 
 78 | 4. To exit the application, press `Ctrl+C` and then to detach from the container without stopping it, press `Ctrl+P` followed by `Ctrl+Q`
 79 | 
 80 | ### Using Docker directly
 81 | 
 82 | 1. Build the Docker image:
 83 | ```bash
 84 | docker build -t rhino-user-checker .
 85 | ```
 86 | 
 87 | 2. Run the container:
 88 | ```bash
 89 | docker run -it --name rhino-user-checker -v $(pwd)/data:/app/data -v $(pwd)/results:/app/results rhino-user-checker
 90 | ```
 91 | 
 92 | The application creates two directories:
 93 | - `./data`: Stores the WhatsMyName database
 94 | - `./results`: Stores exported results (HTML and CSV)
 95 | 
 96 | These directories are mounted as volumes to persist data between container runs.
 97 | 
 98 | ### Docker Troubleshooting
 99 | 
100 | If you encounter any issues with Docker:
101 | 2. Check that the volumes have the correct permissions
102 | 3. If you're having network issues, ensure your Docker container has internet access
103 | 
104 | ## 🎮 Usage
105 | 
106 | Start the tool by running:
107 | 
108 | ```bash
109 | python run.py
110 | ```
111 | 
112 | The tool will:
113 | 1. Download the latest site data from WhatsMyName project
114 | 2. Prompt you for a username to search
115 | 3. Search across hundreds of platforms
116 | 4. Generate an HTML or a CSV report with findings
117 | 
118 | ## 📊 Output
119 | 
120 | RPUC generates two types of output:
121 | - Real-time console output with progress tracking
122 | - Detailed HTML or CSV report containing:
123 |   - Found profiles with links
124 |   - Status (found = good chance profile exists, unsure = good http [200] code when a 404 was expected if profile does not exists, but can't confirm the profile)
125 |   - Extracted profile information
126 |   - Discovered external links
127 |   - Metadata from profiles
128 | 
129 | ## 🏗️ Project Structure
130 | 
131 | ```
132 | rpuc/
133 | ├── run.py              # Main entry point
134 | ├── modules/
135 | │   ├── proxy.py        # Proxy server for rate limiting
136 | │   ├── rpuc.py         # Core functionality
137 | │   ├── date_extractor.py  # date search
138 | │   ├── link_analyzer.py # External link analysis
139 | │   └── profile_extractor.py # Profile information extraction
140 | ├── data/               # Data storage
141 | └── results/            # Generated reports
142 | ```
143 | 
144 | ## 🔧 Advanced Usage
145 | 
146 | ### Custom Headers
147 | 
148 | RPUC supports custom headers for different domains/regions. Edit the `DOMAIN_PATTERNS` in `proxy.py` to add more patterns.
149 | 
150 | ### Proxy Configuration
151 | 
152 | By default, RPUC runs its own proxy server for rate limiting. You can configure an external proxy by modifying the `PROXY_URL` in your `.env` file.
153 | 
154 | ## 🤝 Contributing
155 | 
156 | Contributions are welcome! Please feel free to submit a Pull Request. For major changes, please open an issue first to discuss what you would like to change.
157 | 
158 | ## 📜 License
159 | 
160 | This project is licensed under the GNU General Public License v3.0 - see the [LICENSE](LICENSE.txt) file for details.
161 | 
162 | ## 🙏 Credits
163 | 
164 | - Based on the [WhatsMyName Project](https://github.com/WebBreacher/WhatsMyName)
165 | - Built with:
166 |   - [FastAPI](https://fastapi.tiangolo.com/)
167 |   - [Rich](https://rich.readthedocs.io/)
168 |   - [BeautifulSoup4](https://www.crummy.com/software/BeautifulSoup/)
169 |   - [aiohttp](https://docs.aiohttp.org/)
170 | 
171 | ## ⚠️ Disclaimer
172 | 
173 | This tool is for educational purposes only. Be mindful of the platforms' terms of service and use responsibly.
174 | 
175 | ## Author
176 | 
177 | DEGUN (https://github.com/degun-osint)
178 | 
179 | 
180 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | aiohappyeyeballs==2.4.4
 2 | aiohttp==3.11.11
 3 | aiosignal==1.3.2
 4 | annotated-types==0.7.0
 5 | anyio==4.8.0
 6 | attrs==25.1.0
 7 | beautifulsoup4==4.13.1
 8 | certifi==2025.1.31
 9 | click==8.1.8
10 | fastapi==0.115.8
11 | frozenlist==1.5.0
12 | h11==0.14.0
13 | httpcore==1.0.7
14 | httpx==0.28.1
15 | idna==3.10
16 | Jinja2==3.1.5
17 | markdown-it-py==3.0.0
18 | MarkupSafe==3.0.2
19 | mdurl==0.1.2
20 | multidict==6.1.0
21 | propcache==0.2.1
22 | psutil==6.1.1
23 | pydantic==2.10.6
24 | pydantic_core==2.27.2
25 | Pygments==2.19.1
26 | python-dotenv==1.0.1
27 | rich==13.9.4
28 | sniffio==1.3.1
29 | soupsieve==2.6
30 | starlette==0.45.3
31 | typing_extensions==4.12.2
32 | uvicorn==0.34.0
33 | yarl==1.18.3
34 | 


--------------------------------------------------------------------------------
/run.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | # run.py
  3 | 
  4 | import subprocess
  5 | import sys
  6 | import time
  7 | import signal
  8 | import os
  9 | import psutil
 10 | from rich.console import Console
 11 | 
 12 | # Path configuration
 13 | BASE_DIR = os.path.dirname(os.path.abspath(__file__))
 14 | MODULES_DIR = os.path.join(BASE_DIR, "modules")
 15 | PROXY_PATH = os.path.join(MODULES_DIR, "proxy.py")
 16 | RPUC_PATH = os.path.join(MODULES_DIR, "rpuc.py")
 17 | DATA_DIR = os.path.join(BASE_DIR, "data")
 18 | RESULTS_DIR = os.path.join(BASE_DIR, "results")
 19 | 
 20 | # Create necessary directories
 21 | os.makedirs(DATA_DIR, exist_ok=True)
 22 | os.makedirs(RESULTS_DIR, exist_ok=True)
 23 | 
 24 | console = Console()
 25 | 
 26 | def kill_process_tree(pid):
 27 |     """Kill a process and all its children."""
 28 |     try:
 29 |         parent = psutil.Process(pid)
 30 |         children = parent.children(recursive=True)
 31 |         for child in children:
 32 |             try:
 33 |                 child.kill()
 34 |             except psutil.NoSuchProcess:
 35 |                 pass
 36 |         parent.kill()
 37 |     except psutil.NoSuchProcess:
 38 |         pass
 39 | 
 40 | def cleanup(proxy_process, main_process):
 41 |     """Clean up processes on shutdown."""
 42 |     if main_process:
 43 |         kill_process_tree(main_process.pid)
 44 |     if proxy_process:
 45 |         kill_process_tree(proxy_process.pid)
 46 | 
 47 | def run_proxy():
 48 |     """Start the proxy server without changing the global directory."""
 49 |     try:
 50 |         return subprocess.Popen([sys.executable, PROXY_PATH],
 51 |                                 stdout=subprocess.PIPE,
 52 |                                 stderr=subprocess.PIPE,
 53 |                                 cwd=MODULES_DIR)
 54 |     except Exception as e:
 55 |         console.print(f"[red]Error starting proxy: {e}[/red]")
 56 |         sys.exit(1)
 57 | 
 58 | def run_main():
 59 |     """Start the main script without changing the global directory."""
 60 |     try:
 61 |         return subprocess.Popen([sys.executable, RPUC_PATH],
 62 |                                 cwd=MODULES_DIR)
 63 |     except Exception as e:
 64 |         console.print(f"[red]Error starting main script: {e}[/red]")
 65 |         return None
 66 | 
 67 | def print_banner():
 68 |     banner = r"""
 69 |             .-----------------------------------------.
 70 |            ( RHINO USER CHECKER - OSCAR ZULU FOREVER ! )
 71 |           //\'----------------------------------------'\
 72 |          /      , _.-~~-.__            __.,----.
 73 |       (';    __( )         ~~~'--..--~~         '.
 74 | (    . ""..-'  ')|                     .       \  '.
 75 |  \\. |\'.'                    ;       .  ;       ;   ;
 76 |   \ \"   /9)                 '       .  ;           ;
 77 |    ; )           )    (        '       .  ;     '    .
 78 |     )    _  __.-'-._   ;       '       . ,     /\    ;
 79 |     '-"'--'      ; "-. '.    '            _.-(  ".  (
 80 |                   ;    \,)    )--,..----';'    >  ;   .
 81 |                    \   ( |   /           (    /   .   ;
 82 |      ,   ,          )  | ; .(      .    , )  /     \  ;
 83 | ,;'PjP;.';-.;._,;/;,;)/;.;.);.;,,;,;,,;/;;,),;.,/,;.).,;
 84 | 
 85 |     """
 86 |     console.print("[yellow]" + banner + "[/yellow]")
 87 | 
 88 | def print_title():
 89 |     title = "Username, profile info and link scrapper \n"
 90 |     credits = "Based on Whatsmyname JSON (https://github.com/WebBreacher/WhatsMyName)\n"
 91 |     console.print("[bold cyan]" + title + "[/bold cyan]")
 92 |     console.print("[italic dim cyan]" + credits + "[/italic dim cyan]")
 93 | 
 94 | def main():
 95 |     # Display banner
 96 |     print_banner()
 97 |     print_title()
 98 | 
 99 |     # Check file existence
100 |     if not os.path.exists(PROXY_PATH):
101 |         console.print(f"[red]Error: {PROXY_PATH} does not exist[/red]")
102 |         sys.exit(1)
103 |     if not os.path.exists(RPUC_PATH):
104 |         console.print(f"[red]Error: {RPUC_PATH} does not exist[/red]")
105 |         sys.exit(1)
106 | 
107 |     proxy_process = None
108 |     main_process = None
109 | 
110 |     def signal_handler(signum, frame):
111 |         console.print("\n[yellow]Stopping processes...[/yellow]")
112 |         cleanup(proxy_process, main_process)
113 |         sys.exit(0)
114 | 
115 |     # Signal handling
116 |     signal.signal(signal.SIGINT, signal_handler)
117 |     signal.signal(signal.SIGTERM, signal_handler)
118 | 
119 |     try:
120 |         # Start proxy
121 |         console.print("[cyan]Starting proxy...[/cyan]")
122 |         proxy_process = run_proxy()
123 |         
124 |         # Wait for proxy to be ready
125 |         time.sleep(2)
126 |         
127 |         # Start main script
128 |         console.print("[cyan]Starting main script...[/cyan]")
129 |         main_process = run_main()
130 | 
131 |         while True:
132 |             if main_process.poll() is not None:
133 |                 break
134 |             time.sleep(0.1)
135 | 
136 |     except KeyboardInterrupt:
137 |         console.print("\n[yellow]Operation cancelled by user...[/yellow]")
138 |     except Exception as e:
139 |         console.print(f"\n[red]Error: {e}[/red]")
140 |     finally:
141 |         cleanup(proxy_process, main_process)
142 |         console.print("[green]Processes stopped[/green]")
143 | 
144 | if __name__ == "__main__":
145 |     main()


--------------------------------------------------------------------------------