├── .gitignore ├── requirements.txt ├── README.md └── simpcity.py /.gitignore: -------------------------------------------------------------------------------- 1 | .venv/ 2 | downloads/ 3 | cookies.json -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | aiohttp 2 | yarl 3 | aiolimiter 4 | bs4 -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # SimpCity Downloader 2 | 3 | This Python script downloads media files (images, videos, attachments) from a specific thread on the SimpCity forum. 4 | 5 | **Features:** 6 | 7 | * **Media Download:** Downloads images, videos, and attachments from forum posts. 8 | * **Thread Navigation:** Automatically navigates through multiple pages of a thread. 9 | * **Login Support:** Supports login using a username/password or an `xf_user` cookie. 10 | * **Rate Limiting:** Implements rate limiting to prevent server overload. 11 | * **Progress Tracking:** Displays download progress for each file. 12 | * **Error Handling:** Includes basic error handling and logging. 13 | 14 | **Installation:** 15 | 16 | 1. **Clone the repository:** 17 | 18 | To get started, clone the repository to your local machine: 19 | 20 | ```bash 21 | git clone https://github.com/Emy69/SimpCityCLI.git 22 | ``` 23 | 24 | 2. **Install the required libraries:** 25 | 26 | Navigate to the project directory and install the dependencies: 27 | 28 | ```bash 29 | pip install aiohttp asyncio yarl aiolimiter beautifulsoup4 30 | ``` 31 | 32 | **Usage:** 33 | 34 | 1. **Run the script from the command line:** 35 | 36 | After installation, you can run the script by using the following command: 37 | 38 | ```bash 39 | python simpcity.py 40 | ``` 41 | 42 | Replace `` with the URL of the SimpCity thread you want to download. 43 | 44 | **Example:** 45 | 46 | ```bash 47 | python simpcity.py https://simpcity.su/threads/user/ 48 | ``` 49 | ## Support My Work 50 | 51 | If you find this tool helpful, please consider supporting my efforts: 52 | 53 | [![Donate with PayPal](https://img.shields.io/badge/Donate-PayPal-blue.svg?logo=paypal&style=for-the-badge)](https://www.paypal.com/paypalme/Emy699) 54 | [![Buy Me a Coffee](https://img.shields.io/badge/Buy%20Me%20a%20Coffee-FFDD00.svg?style=for-the-badge&logo=buy-me-a-coffee&logoColor=black)](https://buymeacoffee.com/emy_69) 55 | [![Support on Patreon](https://img.shields.io/badge/Support%20on%20Patreon-FF424D.svg?style=for-the-badge&logo=patreon&logoColor=white)](https://www.patreon.com/emy69) 56 | 57 | 58 | -------------------------------------------------------------------------------- /simpcity.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | import aiohttp 3 | import logging 4 | import sys 5 | import os 6 | import json 7 | from pathlib import Path 8 | from typing import Optional, List, Tuple 9 | from yarl import URL 10 | from aiolimiter import AsyncLimiter 11 | from bs4 import BeautifulSoup 12 | import re 13 | import getpass 14 | 15 | # Configure logging 16 | logging.basicConfig( 17 | level=logging.INFO, 18 | format='%(asctime)s - %(levelname)s - %(message)s', 19 | datefmt='%Y-%m-%d %H:%M:%S' 20 | ) 21 | logger = logging.getLogger(__name__) 22 | 23 | class SimpCityDownloader: 24 | def __init__(self): 25 | self.base_url = URL("https://www.simpcity.su") 26 | self.session = None 27 | self.logged_in = False 28 | self.login_attempts = 0 29 | self.request_limiter = AsyncLimiter(10, 1) # 10 requests per second 30 | self.download_path = Path("downloads/simpcity") 31 | self.download_path.mkdir(parents=True, exist_ok=True) 32 | # Path to store cookies 33 | self.cookie_file = Path("cookies.json") 34 | 35 | # Selectors according to the original crawler 36 | self.title_selector = "h1[class=p-title-value]" 37 | self.posts_selector = "div[class*=message-main]" 38 | self.post_content_selector = "div[class*=message-userContent]" 39 | self.images_selector = "img[class*=bbImage]" 40 | self.videos_selector = "video source" 41 | self.iframe_selector = "iframe[class=saint-iframe]" 42 | self.attachments_block_selector = "section[class=message-attachments]" 43 | self.attachments_selector = "a" 44 | self.next_page_selector = "a[class*=pageNav-jump--next]" 45 | 46 | async def init_session(self): 47 | """Initialize the aiohttp session and load persistent cookies (if available)""" 48 | if not self.session: 49 | self.session = aiohttp.ClientSession() 50 | self.load_cookies() 51 | 52 | async def close(self): 53 | """Save cookies and close the session""" 54 | if self.session: 55 | self.save_cookies() 56 | await self.session.close() 57 | self.session = None 58 | 59 | def save_cookies(self): 60 | """Save the current cookies to a JSON file""" 61 | if self.session and self.session.cookie_jar: 62 | # Obtain cookies for the base domain 63 | simple_cookie = self.session.cookie_jar.filter_cookies(str(self.base_url)) 64 | cookies = {key: morsel.value for key, morsel in simple_cookie.items()} 65 | try: 66 | with open(self.cookie_file, 'w') as f: 67 | json.dump(cookies, f) 68 | logger.info("Cookies saved in %s", self.cookie_file) 69 | except Exception as e: 70 | logger.error("Error saving cookies: %s", str(e)) 71 | 72 | def load_cookies(self): 73 | """Load cookies from the file (if it exists) and add them to the session""" 74 | if self.cookie_file.exists(): 75 | try: 76 | with open(self.cookie_file, 'r') as f: 77 | cookies = json.load(f) 78 | self.session.cookie_jar.update_cookies(cookies) 79 | logger.info("Cookies loaded from %s", self.cookie_file) 80 | except Exception as e: 81 | logger.error("Error loading cookies: %s", str(e)) 82 | 83 | async def check_login_required(self, url: str) -> bool: 84 | """Check if login is required to access the given URL""" 85 | try: 86 | async with self.session.get(url) as response: 87 | if response.status == 200: 88 | text = await response.text() 89 | # Look for login indicators in the response 90 | return 'You must be logged-in to do that.' in text or 'Login or register' in text 91 | return True # Assume login is required if the page cannot be accessed 92 | except Exception: 93 | return True 94 | 95 | async def prompt_and_login(self) -> bool: 96 | """Prompt for credentials and perform login""" 97 | print("\nLogin required for SimpCity") 98 | print("1. Login with username/password") 99 | print("2. Login with xf_user cookie") 100 | print("3. Continue without login") 101 | 102 | choice = input("\nEnter your choice (1-3): ").strip() 103 | 104 | if choice == "1": 105 | username = input("Username: ").strip() 106 | password = getpass.getpass("Password: ").strip() 107 | return await self.login(username, password) 108 | 109 | elif choice == "2": 110 | print("\nTo obtain your xf_user cookie:") 111 | print("1. Visit SimpCity in your browser") 112 | print("2. Open the developer tools (F12)") 113 | print("3. Go to Application/Storage -> Cookies") 114 | print("4. Copy the value of the 'xf_user' cookie") 115 | xf_user = input("\nEnter the value of the xf_user cookie: ").strip() 116 | return await self.login(None, None, xf_user) 117 | 118 | else: 119 | logger.warning("Continuing without authentication") 120 | return False 121 | 122 | async def verify_login(self) -> bool: 123 | """Verify if we are logged in by checking the account details page""" 124 | headers = { 125 | 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36', 126 | 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8', 127 | 'Accept-Language': 'en-US,en;q=0.5', 128 | 'Referer': str(self.base_url) 129 | } 130 | 131 | try: 132 | async with self.session.get(self.base_url / "account/account-details", headers=headers) as response: 133 | if response.status != 200: 134 | return False 135 | text = await response.text() 136 | return 'You must be logged in to view this page.' not in text 137 | except Exception: 138 | return False 139 | 140 | async def login(self, username: str = None, password: str = None, xf_user_cookie: str = None) -> bool: 141 | """Log in to SimpCity through https://www.simpcity.su/login/ and save the cookies""" 142 | await self.init_session() 143 | 144 | # Common headers for all requests 145 | headers = { 146 | 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36', 147 | 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8', 148 | 'Accept-Language': 'en-US,en;q=0.5', 149 | 'Accept-Encoding': 'gzip, deflate, br', 150 | 'Connection': 'keep-alive', 151 | 'Upgrade-Insecure-Requests': '1', 152 | 'Sec-Fetch-Dest': 'document', 153 | 'Sec-Fetch-Mode': 'navigate', 154 | 'Sec-Fetch-Site': 'none', 155 | 'Sec-Fetch-User': '?1', 156 | 'Cache-Control': 'max-age=0', 157 | 'DNT': '1' 158 | } 159 | 160 | if xf_user_cookie: 161 | self.session.cookie_jar.update_cookies({'xf_user': xf_user_cookie}) 162 | if await self.verify_login(): 163 | self.logged_in = True 164 | logger.info("Successful login using xf_user cookie") 165 | return True 166 | else: 167 | logger.error("Login failed: invalid or expired xf_user cookie") 168 | return False 169 | 170 | if not username or not password: 171 | return False 172 | 173 | try: 174 | # First, get the login page to extract the CSRF token and any hidden fields 175 | login_page_url = self.base_url / "login" 176 | headers['Referer'] = str(self.base_url) 177 | 178 | async with self.session.get(login_page_url, headers=headers) as response: 179 | if response.status == 403: 180 | logger.error("Access forbidden. The site may be blocking automated access.") 181 | logger.info("Try using the xf_user cookie method.") 182 | return False 183 | elif response.status != 200: 184 | logger.error(f"Error getting the login page: {response.status}") 185 | return False 186 | 187 | text = await response.text() 188 | soup = BeautifulSoup(text, 'html.parser') 189 | 190 | # Extract CSRF token 191 | csrf_token_elem = soup.select_one('input[name=_xfToken]') 192 | if not csrf_token_elem: 193 | logger.error("CSRF token not found. The login page structure may have changed.") 194 | return False 195 | csrf_token = csrf_token_elem['value'] 196 | 197 | # Extract hidden fields (if any) 198 | hidden_fields = {} 199 | for hidden in soup.find_all('input', type='hidden'): 200 | if hidden.get('name') and hidden.get('value'): 201 | hidden_fields[hidden['name']] = hidden['value'] 202 | 203 | # Prepare data for login 204 | login_url = self.base_url / "login/login" 205 | data = { 206 | 'login': username, 207 | 'password': password, 208 | '_xfToken': csrf_token, 209 | '_xfRedirect': str(self.base_url), # Will redirect to the homepage (then the user enters the desired URL) 210 | 'remember': '1' 211 | } 212 | data.update(hidden_fields) 213 | 214 | # Update headers for the login request 215 | headers.update({ 216 | 'Content-Type': 'application/x-www-form-urlencoded', 217 | 'Origin': str(self.base_url), 218 | 'Referer': str(login_page_url) 219 | }) 220 | 221 | # Attempt login 222 | async with self.session.post(login_url, data=data, headers=headers, allow_redirects=True) as response: 223 | if response.status == 403: 224 | logger.error("Access forbidden during login. The site may be blocking automated access.") 225 | logger.info("Try using the xf_user cookie method.") 226 | return False 227 | elif response.status not in [200, 303]: 228 | logger.error(f"Login failed: unexpected status code {response.status}") 229 | return False 230 | 231 | # Verify that login was successful 232 | if await self.verify_login(): 233 | self.logged_in = True 234 | logger.info("Successful login") 235 | return True 236 | 237 | # If verification fails, look for error messages in the response 238 | text = await response.text() 239 | if any(error in text.lower() for error in ['invalid password', 'invalid username', 'incorrect password']): 240 | logger.error("Invalid username or password") 241 | else: 242 | logger.error("Login failed: could not verify authentication status") 243 | return False 244 | 245 | except Exception as e: 246 | logger.error(f"Error during login: {str(e)}") 247 | return False 248 | 249 | async def get_page(self, url: URL) -> Optional[BeautifulSoup]: 250 | """Get the content of a page while applying rate limiting""" 251 | headers = { 252 | 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36', 253 | 'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8', 254 | 'Accept-Language': 'en-US,en;q=0.5', 255 | 'Accept-Encoding': 'gzip, deflate, br', 256 | 'Connection': 'keep-alive', 257 | 'Upgrade-Insecure-Requests': '1', 258 | 'DNT': '1', 259 | 'Referer': str(self.base_url) 260 | } 261 | 262 | async with self.request_limiter: 263 | try: 264 | async with self.session.get(url, headers=headers) as response: 265 | if response.status == 403: 266 | logger.error(f"Access forbidden for {url}. The site may be blocking automated access.") 267 | return None 268 | elif response.status != 200: 269 | logger.error(f"Error getting page {url}: {response.status}") 270 | return None 271 | text = await response.text() 272 | return BeautifulSoup(text, 'html.parser') 273 | except Exception as e: 274 | logger.error(f"Error getting page {url}: {str(e)}") 275 | return None 276 | 277 | async def download_file(self, url: str, filename: str, subfolder: str = ""): 278 | """Download a file showing progress""" 279 | save_path = self.download_path / subfolder 280 | save_path.mkdir(exist_ok=True) 281 | filepath = save_path / filename 282 | 283 | if filepath.exists(): 284 | logger.info(f"File already exists: {filename}") 285 | return True 286 | 287 | headers = { 288 | 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36' 289 | } 290 | 291 | try: 292 | async with self.request_limiter: 293 | async with self.session.get(url, headers=headers) as response: 294 | if response.status != 200: 295 | logger.error(f"Error downloading {filename}: {response.status}") 296 | return False 297 | 298 | file_size = int(response.headers.get('content-length', 0)) 299 | if file_size == 0: 300 | logger.error(f"Empty file: {filename}") 301 | return False 302 | 303 | logger.info(f"Downloading {filename} ({file_size/1024/1024:.1f} MB)") 304 | 305 | temp_filepath = filepath.with_suffix('.temp') 306 | try: 307 | with open(temp_filepath, 'wb') as f: 308 | downloaded = 0 309 | async for chunk in response.content.iter_chunked(8192): 310 | if chunk: 311 | f.write(chunk) 312 | downloaded += len(chunk) 313 | if file_size: 314 | progress = (downloaded / file_size) * 100 315 | if downloaded % (8192 * 100) == 0: 316 | print(f"\rProgress: {progress:.1f}%", end='', flush=True) 317 | 318 | print() # New line after progress 319 | 320 | temp_filepath.replace(filepath) 321 | logger.info(f"File downloaded successfully: {filename}") 322 | return True 323 | except Exception as e: 324 | if temp_filepath.exists(): 325 | temp_filepath.unlink() 326 | raise e 327 | except Exception as e: 328 | logger.error(f"Error downloading {filename}: {str(e)}") 329 | if filepath.exists(): 330 | filepath.unlink() 331 | return False 332 | 333 | async def process_post(self, post_content: BeautifulSoup, subfolder: str) -> List[Tuple[str, str]]: 334 | """Process a forum post and extract multimedia files""" 335 | files = [] 336 | try: 337 | # Process images 338 | images = post_content.select(self.images_selector) 339 | logger.debug(f"Found {len(images)} images in the post") 340 | for img in images: 341 | src = img.get('src') 342 | if src: 343 | if src.startswith('//'): 344 | src = 'https:' + src 345 | elif src.startswith('/'): 346 | src = str(self.base_url / src[1:]) 347 | filename = src.split('/')[-1] 348 | files.append((src, filename)) 349 | 350 | # Process videos 351 | videos = post_content.select(self.videos_selector) 352 | logger.debug(f"Found {len(videos)} videos in the post") 353 | for video in videos: 354 | src = video.get('src') 355 | if src: 356 | if src.startswith('//'): 357 | src = 'https:' + src 358 | elif src.startswith('/'): 359 | src = str(self.base_url / src[1:]) 360 | filename = src.split('/')[-1] 361 | files.append((src, filename)) 362 | 363 | # Process attachments 364 | attachments_block = post_content.select_one(self.attachments_block_selector) 365 | if attachments_block: 366 | attachments = attachments_block.select(self.attachments_selector) 367 | logger.debug(f"Found {len(attachments)} attachments in the post") 368 | for attachment in attachments: 369 | href = attachment.get('href') 370 | if href: 371 | if href.startswith('//'): 372 | href = 'https:' + href 373 | elif href.startswith('/'): 374 | href = str(self.base_url / href[1:]) 375 | filename = href.split('/')[-1] 376 | files.append((href, filename)) 377 | 378 | if files: 379 | logger.debug(f"Total files found in the post: {len(files)}") 380 | return files 381 | except Exception as e: 382 | logger.error(f"Error processing post: {str(e)}") 383 | return [] 384 | 385 | async def process_thread(self, url: str) -> None: 386 | """Process a forum thread and download all multimedia files""" 387 | logger.info(f"Starting processing thread: {url}") 388 | 389 | if not url.startswith(('http://', 'https://')): 390 | url = f"https://www.simpcity.su/{url.lstrip('/')}" 391 | logger.info(f"Converted URL to: {url}") 392 | 393 | thread_url = URL(url) 394 | current_url = thread_url 395 | 396 | # Check if login is required 397 | logger.info("Verifying if login is required...") 398 | if await self.check_login_required(str(current_url)): 399 | if not await self.prompt_and_login(): 400 | logger.error("Login is required but authentication failed") 401 | return 402 | 403 | # Once logged in, redirect to the requested thread 404 | logger.info("Getting thread page...") 405 | soup = await self.get_page(current_url) 406 | if not soup: 407 | logger.error("Error getting thread page") 408 | return 409 | 410 | title_elem = soup.select_one(self.title_selector) 411 | if not title_elem: 412 | logger.error("Thread title not found") 413 | return 414 | 415 | thread_title = re.sub(r'[<>:"/\\|?*]', '_', title_elem.text.strip()) 416 | logger.info(f"Processing thread: {thread_title}") 417 | 418 | page_num = 1 419 | total_files = 0 420 | 421 | while True: 422 | logger.info(f"Processing page {page_num}") 423 | soup = await self.get_page(current_url) 424 | if not soup: 425 | logger.error(f"Error getting page {page_num}") 426 | break 427 | 428 | # Process each post 429 | posts = soup.select(self.posts_selector) 430 | if not posts: 431 | logger.warning(f"No posts found on page {page_num}") 432 | break 433 | 434 | logger.info(f"Found {len(posts)} posts on page {page_num}") 435 | 436 | for post_index, post in enumerate(posts, 1): 437 | logger.info(f"Processing post {post_index}/{len(posts)} on page {page_num}") 438 | post_content = post.select_one(self.post_content_selector) 439 | if post_content: 440 | files = await self.process_post(post_content, thread_title) 441 | if files: 442 | logger.info(f"Found {len(files)} files in post {post_index}") 443 | for file_url, filename in files: 444 | if await self.download_file(file_url, filename, thread_title): 445 | total_files += 1 446 | else: 447 | logger.warning(f"No content found in post {post_index}") 448 | 449 | # Check if there is a next page 450 | next_page = soup.select_one(self.next_page_selector) 451 | if next_page and (href := next_page.get('href')): 452 | if href.startswith('/'): 453 | current_url = self.base_url / href[1:] 454 | else: 455 | current_url = URL(href) 456 | logger.info(f"Moving to page {page_num + 1}: {current_url}") 457 | page_num += 1 458 | else: 459 | logger.info("No more pages found") 460 | break 461 | 462 | if total_files > 0: 463 | logger.info(f"Thread processing complete. Downloaded {total_files} files.") 464 | else: 465 | logger.warning("No files were downloaded from this thread.") 466 | 467 | async def main(): 468 | if len(sys.argv) != 2: 469 | print("Usage: python simpcity.py ") 470 | print("Example: python simpcity.py https://www.simpcity.su/threads/thread-title.12345") 471 | return 472 | 473 | url = sys.argv[1] 474 | downloader = SimpCityDownloader() 475 | 476 | try: 477 | # Timeout for the entire process (1 hour) 478 | timeout = 3600 # 1 hour timeout 479 | async with asyncio.timeout(timeout): 480 | await downloader.init_session() 481 | await downloader.process_thread(url) 482 | 483 | except asyncio.TimeoutError: 484 | logger.error(f"The operation exceeded the timeout limit of {timeout} seconds") 485 | except KeyboardInterrupt: 486 | logger.info("Operation cancelled by the user") 487 | except Exception as e: 488 | logger.error(f"An error occurred: {str(e)}") 489 | finally: 490 | logger.info("Cleaning up resources...") 491 | await downloader.close() 492 | logger.info("Done!") 493 | 494 | if __name__ == "__main__": 495 | try: 496 | if sys.platform == 'win32': 497 | asyncio.set_event_loop_policy(asyncio.WindowsSelectorEventLoopPolicy()) 498 | asyncio.run(main()) 499 | except KeyboardInterrupt: 500 | print("\nOperation cancelled by the user") 501 | except Exception as e: 502 | print(f"\nFatal error: {str(e)}") 503 | --------------------------------------------------------------------------------