├── README.md ├── assets ├── demo.mp4 └── demo_preview.jpeg ├── requirements.txt └── wallet_analysis.py /README.md: -------------------------------------------------------------------------------- 1 | # crawler-scripts 2 | 3 | ## GMGN 錢包分析工具 4 | 5 | 這是一個自動化爬蟲腳本,可以訪問 GMGN.ai 網站並獲取 Solana 錢包的勝率統計和交易記錄。 6 | 7 | ### Demo 8 | 9 | https://github.com/user-attachments/assets/7959a28c-e215-45ad-9c8d-15b4cba798db 10 | 11 | ### 功能特點 12 | 13 | - 自動訪問 GMGN.ai 上指定錢包地址的頁面 14 | - 自動處理網頁操作(關閉彈窗、點擊按鈕等) 15 | - 抓取錢包的統計數據和持倉資訊 16 | - 支持批量分析多個錢包地址 17 | - 可選擇保持瀏覽器開啟以便調試 18 | 19 | ### 安裝要求 20 | 21 | 在使用前,需要安裝以下依賴: 22 | 23 | ```bash 24 | pip install -r requirements.txt 25 | ``` 26 | 27 | 主要依賴: 28 | 29 | - undetected-chromedriver >= 3.5.0 30 | - selenium >= 4.10 31 | 32 | ### 使用方法 33 | 34 | 基本用法: 35 | 36 | ```bash 37 | python wallet_analysis.py <錢包地址> 38 | ``` 39 | 40 | 分析多個錢包: 41 | 42 | ```bash 43 | python wallet_analysis.py <錢包地址1> <錢包地址2> <錢包地址3> ... 44 | ``` 45 | 46 | ### 命令行參數 47 | 48 | - `<錢包地址>`: 一個或多個要分析的 Solana 錢包地址 49 | - `keep_open`: 分析完成後保持瀏覽器開啟(用於調試) 50 | - `clean`: 只輸出乾淨的 JSON 數據,不輸出調試信息 51 | 52 | ### 使用示例 53 | 54 | 分析單個錢包地址: 55 | 56 | ```bash 57 | python wallet_analysis.py 8zab1batbJZZz5MnawzLz3MqkWJBP9LF4AdZCE3y2JJF 58 | ``` 59 | 60 | 分析多個錢包地址並保持瀏覽器開啟: 61 | 62 | ```bash 63 | python wallet_analysis.py 8zab1batbJZZz5MnawzLz3MqkWJBP9LF4AdZCE3y2JJF 4Xky4NEi6rPsLzQxNhZ3JvKnasocUL4cT3x4fso76qxN keep_open 64 | ``` 65 | 66 | 只輸出乾淨的 JSON 數據(適合後續處理): 67 | 68 | ```bash 69 | python wallet_analysis.py 8zab1batbJZZz5MnawzLz3MqkWJBP9LF4AdZCE3y2JJF clean 70 | ``` 71 | 72 | ### 輸出數據 73 | 74 | 腳本將輸出 JSON 格式的數據,包含每個錢包的: 75 | 76 | - wallet_summary: 錢包的統計摘要,包括勝率等信息 77 | - wallet_holdings: 錢包交易過的的代幣信息 78 | 79 | 如果發生錯誤,將返回錯誤信息。 80 | -------------------------------------------------------------------------------- /assets/demo.mp4: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/a00012025/crawler-scripts/93406f694f9abab2b477c4818370dfca41032e18/assets/demo.mp4 -------------------------------------------------------------------------------- /assets/demo_preview.jpeg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/a00012025/crawler-scripts/93406f694f9abab2b477c4818370dfca41032e18/assets/demo_preview.jpeg -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | undetected-chromedriver>=3.5.0 2 | selenium>=4.10.0 -------------------------------------------------------------------------------- /wallet_analysis.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | import sys 3 | import json 4 | import time 5 | import os 6 | import traceback 7 | from typing import List, Dict, Any, Optional, Union 8 | 9 | import undetected_chromedriver as uc 10 | from selenium.webdriver.common.by import By 11 | from selenium.webdriver.support.ui import WebDriverWait 12 | from selenium.webdriver.support import expected_conditions as EC 13 | from selenium.common.exceptions import TimeoutException, NoSuchElementException 14 | 15 | # Constants 16 | BASE_URL = "https://gmgn.ai/sol/address/{}" 17 | DEFAULT_TIMEOUT = 10 18 | PAGE_LOAD_WAIT = 3 19 | ANIMATION_WAIT = 2 20 | 21 | def log(message: str, debug: bool = False) -> None: 22 | """Print debug messages to stderr if debug mode is enabled. 23 | 24 | Args: 25 | message: The message to log 26 | debug: Whether to print the message 27 | """ 28 | if debug: 29 | print(message, file=sys.stderr) 30 | 31 | def setup_driver(keep_browser_open: bool = False) -> uc.Chrome: 32 | """Configure and initialize the Chrome WebDriver. 33 | 34 | Args: 35 | keep_browser_open: Whether to keep the browser open after completion 36 | 37 | Returns: 38 | An instance of undetected_chromedriver Chrome 39 | """ 40 | # Configure Chrome options 41 | options = uc.ChromeOptions() 42 | options.add_argument("--no-sandbox") 43 | options.add_argument("--disable-dev-shm-usage") 44 | options.add_argument("--disable-blink-features=AutomationControlled") 45 | 46 | # Handle display environment variable if present 47 | display = os.environ.get("DISPLAY", None) 48 | if display: 49 | options.add_argument(f"--display={display}") 50 | 51 | # Enable performance logging 52 | options.set_capability('goog:loggingPrefs', {'performance': 'ALL'}) 53 | 54 | # Create driver 55 | driver = uc.Chrome(options=options) 56 | driver.set_page_load_timeout(60) 57 | 58 | return driver 59 | 60 | def click_element_by_js(driver: uc.Chrome, element, debug: bool = False) -> bool: 61 | """Click an element using JavaScript to avoid potential WebDriver issues. 62 | 63 | Args: 64 | driver: The WebDriver instance 65 | element: The WebElement to click 66 | debug: Whether to log debug messages 67 | 68 | Returns: 69 | True if successful, False otherwise 70 | """ 71 | try: 72 | driver.execute_script("arguments[0].click();", element) 73 | log(f"Clicked element using JavaScript: {element.get_attribute('outerHTML')}", debug) 74 | return True 75 | except Exception as e: 76 | log(f"Error clicking element: {e}", debug) 77 | return False 78 | 79 | def find_and_click_element(driver: uc.Chrome, by: By, selector: str, 80 | description: str, debug: bool = False) -> bool: 81 | """Find an element by selector and click it. 82 | 83 | Args: 84 | driver: The WebDriver instance 85 | by: The method to locate elements 86 | selector: The selector string 87 | description: Description of the element for logging 88 | debug: Whether to log debug messages 89 | 90 | Returns: 91 | True if successful, False otherwise 92 | """ 93 | log(f"Looking for '{description}' element...", debug) 94 | try: 95 | element = WebDriverWait(driver, DEFAULT_TIMEOUT).until( 96 | EC.presence_of_element_located((by, selector)) 97 | ) 98 | log(f"Found '{description}' element", debug) 99 | return click_element_by_js(driver, element, debug) 100 | except Exception as e: 101 | log(f"Error finding or clicking '{description}' element: {e}", debug) 102 | return False 103 | 104 | def complete_onboarding_flow(driver: uc.Chrome, debug: bool = False) -> bool: 105 | """Complete the onboarding flow by clicking through intro screens. 106 | 107 | Args: 108 | driver: The WebDriver instance 109 | debug: Whether to log debug messages 110 | 111 | Returns: 112 | True if successful, False otherwise 113 | """ 114 | # Try to close intro modal if present 115 | try: 116 | close_icons = driver.find_elements(By.CLASS_NAME, "css-pt4g3d") 117 | if close_icons: 118 | click_element_by_js(driver, close_icons[0], debug) 119 | except Exception as e: 120 | log(f"Error closing intro modal: {e}", debug) 121 | 122 | # Click through the Next buttons 123 | try: 124 | for _ in range(3): 125 | next_button = WebDriverWait(driver, DEFAULT_TIMEOUT).until( 126 | EC.presence_of_element_located( 127 | (By.XPATH, "//button[contains(@class, 'pi-btn')]//span[text()='Next']/..") 128 | ) 129 | ) 130 | click_element_by_js(driver, next_button, debug) 131 | time.sleep(ANIMATION_WAIT) 132 | 133 | # Click Finish button 134 | finish_selector = "//button[contains(@class, 'pi-btn')]//span[text()='Finish']/.." 135 | find_and_click_element(driver, By.XPATH, finish_selector, "Finish button", debug) 136 | time.sleep(ANIMATION_WAIT) 137 | 138 | return True 139 | except Exception as e: 140 | log(f"Error in onboarding flow: {e}", debug) 141 | return False 142 | 143 | def navigate_to_wallet_page(driver: uc.Chrome, wallet_address: str, debug: bool = False) -> bool: 144 | """Navigate to the wallet page and prepare it for data extraction. 145 | 146 | Args: 147 | driver: The WebDriver instance 148 | wallet_address: The wallet address to analyze 149 | debug: Whether to log debug messages 150 | 151 | Returns: 152 | True if successful, False otherwise 153 | """ 154 | try: 155 | # Enable network monitoring 156 | log("Setting up network interception...", debug) 157 | driver.execute_cdp_cmd('Network.enable', {}) 158 | 159 | # Clear existing logs 160 | driver.get_log('performance') 161 | 162 | # Navigate to the URL 163 | wallet_url = BASE_URL.format(wallet_address) 164 | log(f"Navigating to {wallet_url}...", debug) 165 | driver.get(wallet_url) 166 | 167 | # Wait for the page to load 168 | log("Waiting for page to load...", debug) 169 | time.sleep(PAGE_LOAD_WAIT) 170 | 171 | # Complete onboarding flow 172 | if not complete_onboarding_flow(driver, debug): 173 | log("Warning: Onboarding flow may not have completed successfully", debug) 174 | 175 | # Click on Recent PnL tab 176 | find_and_click_element( 177 | driver, By.XPATH, "//*[contains(text(), 'Recent PnL')]", 178 | "Recent PnL", debug 179 | ) 180 | 181 | # Scroll the page to load all content 182 | scroll_page_for_content(driver, debug) 183 | 184 | return True 185 | except Exception as e: 186 | log(f"Error navigating to wallet page: {e}", debug) 187 | return False 188 | 189 | def scroll_page_for_content(driver: uc.Chrome, debug: bool = False) -> None: 190 | """Scroll all scrollable containers to load content. 191 | 192 | Args: 193 | driver: The WebDriver instance 194 | debug: Whether to log debug messages 195 | """ 196 | log("Scrolling to load all content...", debug) 197 | 198 | # JavaScript to find all scrollable containers and scroll them 199 | js_find_and_scroll = """ 200 | let containers = Array.from(document.querySelectorAll('*')).filter(el => { 201 | let style = window.getComputedStyle(el); 202 | return (style.overflowY === 'scroll' || style.overflowY === 'auto') && 203 | el.scrollHeight > el.clientHeight; 204 | }); 205 | 206 | let scrolledContainers = 0; 207 | containers.forEach(container => { 208 | container.scrollTop = container.scrollHeight; 209 | scrolledContainers++; 210 | }); 211 | 212 | return { 213 | totalContainers: containers.length, 214 | scrolledContainers: scrolledContainers 215 | }; 216 | """ 217 | 218 | # Execute scroll script and log results 219 | scroll_result = driver.execute_script(js_find_and_scroll) 220 | log(f"Found {scroll_result['totalContainers']} scrollable containers and " 221 | f"scrolled {scroll_result['scrolledContainers']}", debug) 222 | 223 | # Wait for content to load 224 | time.sleep(ANIMATION_WAIT) 225 | 226 | # Scroll again to catch any newly loaded content 227 | driver.execute_script(js_find_and_scroll) 228 | log("Performed second scroll to catch dynamically loaded content", debug) 229 | time.sleep(ANIMATION_WAIT) 230 | 231 | def extract_network_data(driver: uc.Chrome, debug: bool = False) -> Dict[str, Any]: 232 | """Extract wallet data from network requests. 233 | 234 | Args: 235 | driver: The WebDriver instance 236 | debug: Whether to log debug messages 237 | 238 | Returns: 239 | Dictionary containing wallet summary and holdings data 240 | """ 241 | logs = driver.get_log('performance') 242 | wallet_summary_data = None 243 | wallet_holdings_data = [] 244 | 245 | for entry in logs: 246 | try: 247 | if 'message' not in entry: 248 | continue 249 | 250 | message = json.loads(entry['message']) 251 | if ('message' not in message or 252 | 'method' not in message['message'] or 253 | message['message']['method'] != 'Network.responseReceived'): 254 | continue 255 | 256 | params = message['message']['params'] 257 | request_id = params['requestId'] 258 | url = params['response']['url'] 259 | 260 | # Extract wallet summary data 261 | if '/api/v1/wallet_stat/sol/' in url: 262 | log(f"Found wallet summary request: {url}", debug) 263 | try: 264 | response = driver.execute_cdp_cmd('Network.getResponseBody', {'requestId': request_id}) 265 | if 'body' in response: 266 | wallet_summary_data = json.loads(response['body'])['data'] 267 | log("Successfully captured wallet summary data", debug) 268 | except Exception as e: 269 | log(f"Error extracting wallet summary data: {e}", debug) 270 | 271 | # Extract wallet holdings data 272 | elif '/api/v1/wallet_holdings' in url: 273 | log(f"Found wallet holdings request: {url}", debug) 274 | try: 275 | response = driver.execute_cdp_cmd('Network.getResponseBody', {'requestId': request_id}) 276 | if 'body' in response: 277 | wallet_holdings = json.loads(response['body']) 278 | wallet_holdings_data.extend(wallet_holdings['data']['holdings']) 279 | log("Successfully captured wallet holdings data", debug) 280 | except Exception as e: 281 | log(f"Error extracting wallet holdings data: {e}", debug) 282 | except Exception as e: 283 | log(f"Error processing network log entry: {e}", debug) 284 | 285 | return { 286 | "wallet_summary": wallet_summary_data, 287 | "wallet_holdings": wallet_holdings_data 288 | } 289 | 290 | def analyze_wallet(wallet_address: str, driver: uc.Chrome, debug: bool = False) -> Dict[str, Any]: 291 | """Analyze a single wallet address. 292 | 293 | Args: 294 | wallet_address: The wallet address to analyze 295 | driver: The WebDriver instance 296 | debug: Whether to log debug messages 297 | 298 | Returns: 299 | Dictionary containing analysis results or error 300 | """ 301 | log(f"Processing wallet: {wallet_address}", debug) 302 | 303 | try: 304 | if navigate_to_wallet_page(driver, wallet_address, debug): 305 | return extract_network_data(driver, debug) 306 | else: 307 | return {"error": "Failed to navigate to wallet page"} 308 | except Exception as e: 309 | log(f"Error analyzing wallet {wallet_address}: {e}", debug) 310 | return {"error": str(e)} 311 | 312 | def wallet_analysis( 313 | wallet_addresses: Union[str, List[str]], 314 | keep_browser_open: bool = False, 315 | clean_output: bool = False 316 | ) -> Dict[str, Any]: 317 | """Analyze multiple wallet addresses and extract their data. 318 | 319 | Args: 320 | wallet_addresses: Single wallet address or list of addresses 321 | keep_browser_open: Whether to keep the browser open after completion 322 | clean_output: Whether to produce clean output (no formatting or debug info) 323 | 324 | Returns: 325 | Dictionary mapping wallet addresses to their analysis results 326 | """ 327 | # Normalize input to list 328 | if isinstance(wallet_addresses, str): 329 | wallet_addresses = [wallet_addresses] 330 | 331 | log(f"Starting wallet analysis for {len(wallet_addresses)} wallets...", not clean_output) 332 | all_results = {} 333 | driver = None 334 | 335 | try: 336 | driver = setup_driver(keep_browser_open) 337 | 338 | # Process each wallet address 339 | for wallet_address in wallet_addresses: 340 | all_results[wallet_address] = analyze_wallet(wallet_address, driver, not clean_output) 341 | 342 | # Handle keep_browser_open flag 343 | if keep_browser_open: 344 | print(json.dumps(all_results, indent=2)) 345 | log("Debug session complete. Browser will remain open for inspection.", not clean_output) 346 | log("Press Ctrl+C to close the browser and exit.", not clean_output) 347 | 348 | # Keep the script running 349 | while True: 350 | time.sleep(3) 351 | elif driver: 352 | log("Debug session complete. Closing browser.", not clean_output) 353 | driver.quit() 354 | 355 | except Exception as e: 356 | log(f"Error in wallet analysis: {e}", not clean_output) 357 | log(traceback.format_exc(), not clean_output) 358 | 359 | # Keep the browser open for inspection if requested 360 | if keep_browser_open and driver: 361 | log("Browser will remain open for inspection due to error.", not clean_output) 362 | while True: 363 | time.sleep(3) 364 | elif driver: 365 | driver.quit() 366 | 367 | # Output the results 368 | if clean_output: 369 | print(json.dumps(all_results)) 370 | else: 371 | print(json.dumps(all_results, indent=2)) 372 | 373 | return all_results 374 | 375 | def parse_arguments() -> tuple: 376 | """Parse command line arguments. 377 | 378 | Returns: 379 | Tuple of (wallet_addresses, keep_browser_open, clean_output) 380 | """ 381 | if len(sys.argv) < 2: 382 | print("Usage: python3 wallet_analysis.py [wallet_address2 ...] [keep_open] [clean]") 383 | print(" [wallet_address2 ...] - One or more wallet addresses to analyze") 384 | print(" [keep_open] - Optional: 'keep_open' to keep the browser open after completion") 385 | print(" [clean] - Optional: 'clean' to output only the JSON data") 386 | sys.exit(1) 387 | 388 | wallet_addresses = [] 389 | keep_browser_open = False 390 | clean_output = False 391 | 392 | for arg in sys.argv[1:]: 393 | if arg.lower() == 'keep_open': 394 | keep_browser_open = True 395 | elif arg.lower() == 'clean': 396 | clean_output = True 397 | else: 398 | wallet_addresses.append(arg) 399 | 400 | if not wallet_addresses: 401 | print("Error: No wallet addresses provided") 402 | sys.exit(1) 403 | 404 | return wallet_addresses, keep_browser_open, clean_output 405 | 406 | if __name__ == "__main__": 407 | wallet_addresses, keep_browser_open, clean_output = parse_arguments() 408 | wallet_analysis(wallet_addresses, keep_browser_open, clean_output) --------------------------------------------------------------------------------