├── README.md
├── assets
    ├── demo.mp4
    └── demo_preview.jpeg
├── requirements.txt
└── wallet_analysis.py


/README.md:
--------------------------------------------------------------------------------
 1 | # crawler-scripts
 2 | 
 3 | ## GMGN 錢包分析工具
 4 | 
 5 | 這是一個自動化爬蟲腳本，可以訪問 GMGN.ai 網站並獲取 Solana 錢包的勝率統計和交易記錄。
 6 | 
 7 | ### Demo
 8 | 
 9 | https://github.com/user-attachments/assets/7959a28c-e215-45ad-9c8d-15b4cba798db
10 | 
11 | ### 功能特點
12 | 
13 | - 自動訪問 GMGN.ai 上指定錢包地址的頁面
14 | - 自動處理網頁操作（關閉彈窗、點擊按鈕等）
15 | - 抓取錢包的統計數據和持倉資訊
16 | - 支持批量分析多個錢包地址
17 | - 可選擇保持瀏覽器開啟以便調試
18 | 
19 | ### 安裝要求
20 | 
21 | 在使用前，需要安裝以下依賴：
22 | 
23 | ```bash
24 | pip install -r requirements.txt
25 | ```
26 | 
27 | 主要依賴：
28 | 
29 | - undetected-chromedriver >= 3.5.0
30 | - selenium >= 4.10
31 | 
32 | ### 使用方法
33 | 
34 | 基本用法：
35 | 
36 | ```bash
37 | python wallet_analysis.py <錢包地址>
38 | ```
39 | 
40 | 分析多個錢包：
41 | 
42 | ```bash
43 | python wallet_analysis.py <錢包地址1> <錢包地址2> <錢包地址3> ...
44 | ```
45 | 
46 | ### 命令行參數
47 | 
48 | - `<錢包地址>`: 一個或多個要分析的 Solana 錢包地址
49 | - `keep_open`: 分析完成後保持瀏覽器開啟（用於調試）
50 | - `clean`: 只輸出乾淨的 JSON 數據，不輸出調試信息
51 | 
52 | ### 使用示例
53 | 
54 | 分析單個錢包地址：
55 | 
56 | ```bash
57 | python wallet_analysis.py 8zab1batbJZZz5MnawzLz3MqkWJBP9LF4AdZCE3y2JJF
58 | ```
59 | 
60 | 分析多個錢包地址並保持瀏覽器開啟：
61 | 
62 | ```bash
63 | python wallet_analysis.py 8zab1batbJZZz5MnawzLz3MqkWJBP9LF4AdZCE3y2JJF 4Xky4NEi6rPsLzQxNhZ3JvKnasocUL4cT3x4fso76qxN keep_open
64 | ```
65 | 
66 | 只輸出乾淨的 JSON 數據（適合後續處理）：
67 | 
68 | ```bash
69 | python wallet_analysis.py 8zab1batbJZZz5MnawzLz3MqkWJBP9LF4AdZCE3y2JJF clean
70 | ```
71 | 
72 | ### 輸出數據
73 | 
74 | 腳本將輸出 JSON 格式的數據，包含每個錢包的：
75 | 
76 | - wallet_summary: 錢包的統計摘要，包括勝率等信息
77 | - wallet_holdings: 錢包交易過的的代幣信息
78 | 
79 | 如果發生錯誤，將返回錯誤信息。
80 | 


--------------------------------------------------------------------------------
/assets/demo.mp4:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/a00012025/crawler-scripts/93406f694f9abab2b477c4818370dfca41032e18/assets/demo.mp4


--------------------------------------------------------------------------------
/assets/demo_preview.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/a00012025/crawler-scripts/93406f694f9abab2b477c4818370dfca41032e18/assets/demo_preview.jpeg


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | undetected-chromedriver>=3.5.0
2 | selenium>=4.10.0


--------------------------------------------------------------------------------
/wallet_analysis.py:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env python3
  2 | import sys
  3 | import json
  4 | import time
  5 | import os
  6 | import traceback
  7 | from typing import List, Dict, Any, Optional, Union
  8 | 
  9 | import undetected_chromedriver as uc
 10 | from selenium.webdriver.common.by import By
 11 | from selenium.webdriver.support.ui import WebDriverWait
 12 | from selenium.webdriver.support import expected_conditions as EC
 13 | from selenium.common.exceptions import TimeoutException, NoSuchElementException
 14 | 
 15 | # Constants
 16 | BASE_URL = "https://gmgn.ai/sol/address/{}"
 17 | DEFAULT_TIMEOUT = 10
 18 | PAGE_LOAD_WAIT = 3
 19 | ANIMATION_WAIT = 2
 20 | 
 21 | def log(message: str, debug: bool = False) -> None:
 22 |     """Print debug messages to stderr if debug mode is enabled.
 23 |     
 24 |     Args:
 25 |         message: The message to log
 26 |         debug: Whether to print the message
 27 |     """
 28 |     if debug:
 29 |         print(message, file=sys.stderr)
 30 | 
 31 | def setup_driver(keep_browser_open: bool = False) -> uc.Chrome:
 32 |     """Configure and initialize the Chrome WebDriver.
 33 |     
 34 |     Args:
 35 |         keep_browser_open: Whether to keep the browser open after completion
 36 |         
 37 |     Returns:
 38 |         An instance of undetected_chromedriver Chrome
 39 |     """
 40 |     # Configure Chrome options
 41 |     options = uc.ChromeOptions()
 42 |     options.add_argument("--no-sandbox")
 43 |     options.add_argument("--disable-dev-shm-usage")
 44 |     options.add_argument("--disable-blink-features=AutomationControlled")
 45 |     
 46 |     # Handle display environment variable if present
 47 |     display = os.environ.get("DISPLAY", None)
 48 |     if display:
 49 |         options.add_argument(f"--display={display}")
 50 |     
 51 |     # Enable performance logging
 52 |     options.set_capability('goog:loggingPrefs', {'performance': 'ALL'})
 53 |     
 54 |     # Create driver
 55 |     driver = uc.Chrome(options=options)
 56 |     driver.set_page_load_timeout(60)
 57 |     
 58 |     return driver
 59 | 
 60 | def click_element_by_js(driver: uc.Chrome, element, debug: bool = False) -> bool:
 61 |     """Click an element using JavaScript to avoid potential WebDriver issues.
 62 |     
 63 |     Args:
 64 |         driver: The WebDriver instance
 65 |         element: The WebElement to click
 66 |         debug: Whether to log debug messages
 67 |         
 68 |     Returns:
 69 |         True if successful, False otherwise
 70 |     """
 71 |     try:
 72 |         driver.execute_script("arguments[0].click();", element)
 73 |         log(f"Clicked element using JavaScript: {element.get_attribute('outerHTML')}", debug)
 74 |         return True
 75 |     except Exception as e:
 76 |         log(f"Error clicking element: {e}", debug)
 77 |         return False
 78 | 
 79 | def find_and_click_element(driver: uc.Chrome, by: By, selector: str, 
 80 |                           description: str, debug: bool = False) -> bool:
 81 |     """Find an element by selector and click it.
 82 |     
 83 |     Args:
 84 |         driver: The WebDriver instance
 85 |         by: The method to locate elements
 86 |         selector: The selector string
 87 |         description: Description of the element for logging
 88 |         debug: Whether to log debug messages
 89 |         
 90 |     Returns:
 91 |         True if successful, False otherwise
 92 |     """
 93 |     log(f"Looking for '{description}' element...", debug)
 94 |     try:
 95 |         element = WebDriverWait(driver, DEFAULT_TIMEOUT).until(
 96 |             EC.presence_of_element_located((by, selector))
 97 |         )
 98 |         log(f"Found '{description}' element", debug)
 99 |         return click_element_by_js(driver, element, debug)
100 |     except Exception as e:
101 |         log(f"Error finding or clicking '{description}' element: {e}", debug)
102 |         return False
103 | 
104 | def complete_onboarding_flow(driver: uc.Chrome, debug: bool = False) -> bool:
105 |     """Complete the onboarding flow by clicking through intro screens.
106 |     
107 |     Args:
108 |         driver: The WebDriver instance
109 |         debug: Whether to log debug messages
110 |         
111 |     Returns:
112 |         True if successful, False otherwise
113 |     """
114 |     # Try to close intro modal if present
115 |     try:
116 |         close_icons = driver.find_elements(By.CLASS_NAME, "css-pt4g3d")
117 |         if close_icons:
118 |             click_element_by_js(driver, close_icons[0], debug)
119 |     except Exception as e:
120 |         log(f"Error closing intro modal: {e}", debug)
121 |     
122 |     # Click through the Next buttons
123 |     try:
124 |         for _ in range(3):
125 |             next_button = WebDriverWait(driver, DEFAULT_TIMEOUT).until(
126 |                 EC.presence_of_element_located(
127 |                     (By.XPATH, "//button[contains(@class, 'pi-btn')]//span[text()='Next']/..")
128 |                 )
129 |             )
130 |             click_element_by_js(driver, next_button, debug)
131 |             time.sleep(ANIMATION_WAIT)
132 |         
133 |         # Click Finish button
134 |         finish_selector = "//button[contains(@class, 'pi-btn')]//span[text()='Finish']/.."
135 |         find_and_click_element(driver, By.XPATH, finish_selector, "Finish button", debug)
136 |         time.sleep(ANIMATION_WAIT)
137 |         
138 |         return True
139 |     except Exception as e:
140 |         log(f"Error in onboarding flow: {e}", debug)
141 |         return False
142 | 
143 | def navigate_to_wallet_page(driver: uc.Chrome, wallet_address: str, debug: bool = False) -> bool:
144 |     """Navigate to the wallet page and prepare it for data extraction.
145 |     
146 |     Args:
147 |         driver: The WebDriver instance
148 |         wallet_address: The wallet address to analyze
149 |         debug: Whether to log debug messages
150 |         
151 |     Returns:
152 |         True if successful, False otherwise
153 |     """
154 |     try:
155 |         # Enable network monitoring
156 |         log("Setting up network interception...", debug)
157 |         driver.execute_cdp_cmd('Network.enable', {})
158 |         
159 |         # Clear existing logs
160 |         driver.get_log('performance')
161 |         
162 |         # Navigate to the URL
163 |         wallet_url = BASE_URL.format(wallet_address)
164 |         log(f"Navigating to {wallet_url}...", debug)
165 |         driver.get(wallet_url)
166 |         
167 |         # Wait for the page to load
168 |         log("Waiting for page to load...", debug)
169 |         time.sleep(PAGE_LOAD_WAIT)
170 |         
171 |         # Complete onboarding flow
172 |         if not complete_onboarding_flow(driver, debug):
173 |             log("Warning: Onboarding flow may not have completed successfully", debug)
174 |         
175 |         # Click on Recent PnL tab
176 |         find_and_click_element(
177 |             driver, By.XPATH, "//*[contains(text(), 'Recent PnL')]", 
178 |             "Recent PnL", debug
179 |         )
180 |         
181 |         # Scroll the page to load all content
182 |         scroll_page_for_content(driver, debug)
183 |         
184 |         return True
185 |     except Exception as e:
186 |         log(f"Error navigating to wallet page: {e}", debug)
187 |         return False
188 | 
189 | def scroll_page_for_content(driver: uc.Chrome, debug: bool = False) -> None:
190 |     """Scroll all scrollable containers to load content.
191 |     
192 |     Args:
193 |         driver: The WebDriver instance
194 |         debug: Whether to log debug messages
195 |     """
196 |     log("Scrolling to load all content...", debug)
197 |     
198 |     # JavaScript to find all scrollable containers and scroll them
199 |     js_find_and_scroll = """
200 |     let containers = Array.from(document.querySelectorAll('*')).filter(el => {
201 |       let style = window.getComputedStyle(el);
202 |       return (style.overflowY === 'scroll' || style.overflowY === 'auto') &&
203 |              el.scrollHeight > el.clientHeight;
204 |     });
205 |     
206 |     let scrolledContainers = 0;
207 |     containers.forEach(container => {
208 |       container.scrollTop = container.scrollHeight;
209 |       scrolledContainers++;
210 |     });
211 |     
212 |     return {
213 |       totalContainers: containers.length,
214 |       scrolledContainers: scrolledContainers
215 |     };
216 |     """
217 |     
218 |     # Execute scroll script and log results
219 |     scroll_result = driver.execute_script(js_find_and_scroll)
220 |     log(f"Found {scroll_result['totalContainers']} scrollable containers and "
221 |         f"scrolled {scroll_result['scrolledContainers']}", debug)
222 |     
223 |     # Wait for content to load
224 |     time.sleep(ANIMATION_WAIT)
225 |     
226 |     # Scroll again to catch any newly loaded content
227 |     driver.execute_script(js_find_and_scroll)
228 |     log("Performed second scroll to catch dynamically loaded content", debug)
229 |     time.sleep(ANIMATION_WAIT)
230 | 
231 | def extract_network_data(driver: uc.Chrome, debug: bool = False) -> Dict[str, Any]:
232 |     """Extract wallet data from network requests.
233 |     
234 |     Args:
235 |         driver: The WebDriver instance
236 |         debug: Whether to log debug messages
237 |         
238 |     Returns:
239 |         Dictionary containing wallet summary and holdings data
240 |     """
241 |     logs = driver.get_log('performance')
242 |     wallet_summary_data = None
243 |     wallet_holdings_data = []
244 |     
245 |     for entry in logs:
246 |         try:
247 |             if 'message' not in entry:
248 |                 continue
249 |                 
250 |             message = json.loads(entry['message'])
251 |             if ('message' not in message or 
252 |                 'method' not in message['message'] or 
253 |                 message['message']['method'] != 'Network.responseReceived'):
254 |                 continue
255 |                 
256 |             params = message['message']['params']
257 |             request_id = params['requestId']
258 |             url = params['response']['url']
259 |             
260 |             # Extract wallet summary data
261 |             if '/api/v1/wallet_stat/sol/' in url:
262 |                 log(f"Found wallet summary request: {url}", debug)
263 |                 try:
264 |                     response = driver.execute_cdp_cmd('Network.getResponseBody', {'requestId': request_id})
265 |                     if 'body' in response:
266 |                         wallet_summary_data = json.loads(response['body'])['data']
267 |                         log("Successfully captured wallet summary data", debug)
268 |                 except Exception as e:
269 |                     log(f"Error extracting wallet summary data: {e}", debug)
270 |             
271 |             # Extract wallet holdings data
272 |             elif '/api/v1/wallet_holdings' in url:
273 |                 log(f"Found wallet holdings request: {url}", debug)
274 |                 try:
275 |                     response = driver.execute_cdp_cmd('Network.getResponseBody', {'requestId': request_id})
276 |                     if 'body' in response:
277 |                         wallet_holdings = json.loads(response['body'])
278 |                         wallet_holdings_data.extend(wallet_holdings['data']['holdings'])
279 |                         log("Successfully captured wallet holdings data", debug)
280 |                 except Exception as e:
281 |                     log(f"Error extracting wallet holdings data: {e}", debug)
282 |         except Exception as e:
283 |             log(f"Error processing network log entry: {e}", debug)
284 |     
285 |     return {
286 |         "wallet_summary": wallet_summary_data,
287 |         "wallet_holdings": wallet_holdings_data
288 |     }
289 | 
290 | def analyze_wallet(wallet_address: str, driver: uc.Chrome, debug: bool = False) -> Dict[str, Any]:
291 |     """Analyze a single wallet address.
292 |     
293 |     Args:
294 |         wallet_address: The wallet address to analyze
295 |         driver: The WebDriver instance
296 |         debug: Whether to log debug messages
297 |         
298 |     Returns:
299 |         Dictionary containing analysis results or error
300 |     """
301 |     log(f"Processing wallet: {wallet_address}", debug)
302 |     
303 |     try:
304 |         if navigate_to_wallet_page(driver, wallet_address, debug):
305 |             return extract_network_data(driver, debug)
306 |         else:
307 |             return {"error": "Failed to navigate to wallet page"}
308 |     except Exception as e:
309 |         log(f"Error analyzing wallet {wallet_address}: {e}", debug)
310 |         return {"error": str(e)}
311 | 
312 | def wallet_analysis(
313 |     wallet_addresses: Union[str, List[str]], 
314 |     keep_browser_open: bool = False, 
315 |     clean_output: bool = False
316 | ) -> Dict[str, Any]:
317 |     """Analyze multiple wallet addresses and extract their data.
318 |     
319 |     Args:
320 |         wallet_addresses: Single wallet address or list of addresses
321 |         keep_browser_open: Whether to keep the browser open after completion
322 |         clean_output: Whether to produce clean output (no formatting or debug info)
323 |         
324 |     Returns:
325 |         Dictionary mapping wallet addresses to their analysis results
326 |     """
327 |     # Normalize input to list
328 |     if isinstance(wallet_addresses, str):
329 |         wallet_addresses = [wallet_addresses]
330 |     
331 |     log(f"Starting wallet analysis for {len(wallet_addresses)} wallets...", not clean_output)
332 |     all_results = {}
333 |     driver = None
334 |     
335 |     try:
336 |         driver = setup_driver(keep_browser_open)
337 |         
338 |         # Process each wallet address
339 |         for wallet_address in wallet_addresses:
340 |             all_results[wallet_address] = analyze_wallet(wallet_address, driver, not clean_output)
341 |         
342 |         # Handle keep_browser_open flag
343 |         if keep_browser_open:
344 |             print(json.dumps(all_results, indent=2))
345 |             log("Debug session complete. Browser will remain open for inspection.", not clean_output)
346 |             log("Press Ctrl+C to close the browser and exit.", not clean_output)
347 |             
348 |             # Keep the script running
349 |             while True:
350 |                 time.sleep(3)
351 |         elif driver:
352 |             log("Debug session complete. Closing browser.", not clean_output)
353 |             driver.quit()
354 |             
355 |     except Exception as e:
356 |         log(f"Error in wallet analysis: {e}", not clean_output)
357 |         log(traceback.format_exc(), not clean_output)
358 |         
359 |         # Keep the browser open for inspection if requested
360 |         if keep_browser_open and driver:
361 |             log("Browser will remain open for inspection due to error.", not clean_output)
362 |             while True:
363 |                 time.sleep(3)
364 |         elif driver:
365 |             driver.quit()
366 |     
367 |     # Output the results
368 |     if clean_output:
369 |         print(json.dumps(all_results))
370 |     else:
371 |         print(json.dumps(all_results, indent=2))
372 |     
373 |     return all_results
374 | 
375 | def parse_arguments() -> tuple:
376 |     """Parse command line arguments.
377 |     
378 |     Returns:
379 |         Tuple of (wallet_addresses, keep_browser_open, clean_output)
380 |     """
381 |     if len(sys.argv) < 2:
382 |         print("Usage: python3 wallet_analysis.py <wallet_address1> [wallet_address2 ...] [keep_open] [clean]")
383 |         print("  <wallet_address1> [wallet_address2 ...] - One or more wallet addresses to analyze")
384 |         print("  [keep_open] - Optional: 'keep_open' to keep the browser open after completion")
385 |         print("  [clean] - Optional: 'clean' to output only the JSON data")
386 |         sys.exit(1)
387 |     
388 |     wallet_addresses = []
389 |     keep_browser_open = False
390 |     clean_output = False
391 |     
392 |     for arg in sys.argv[1:]:
393 |         if arg.lower() == 'keep_open':
394 |             keep_browser_open = True
395 |         elif arg.lower() == 'clean':
396 |             clean_output = True
397 |         else:
398 |             wallet_addresses.append(arg)
399 |     
400 |     if not wallet_addresses:
401 |         print("Error: No wallet addresses provided")
402 |         sys.exit(1)
403 |         
404 |     return wallet_addresses, keep_browser_open, clean_output
405 | 
406 | if __name__ == "__main__":
407 |     wallet_addresses, keep_browser_open, clean_output = parse_arguments()
408 |     wallet_analysis(wallet_addresses, keep_browser_open, clean_output) 


--------------------------------------------------------------------------------