├── .gitignore
├── requirements.txt
├── README.md
└── simpcity.py


/.gitignore:
--------------------------------------------------------------------------------
1 | .venv/
2 | downloads/
3 | cookies.json


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | aiohttp
2 | yarl
3 | aiolimiter
4 | bs4


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # SimpCity Downloader
 2 | 
 3 | This Python script downloads media files (images, videos, attachments) from a specific thread on the SimpCity forum.
 4 | 
 5 | **Features:**
 6 | 
 7 | *   **Media Download:** Downloads images, videos, and attachments from forum posts.
 8 | *   **Thread Navigation:** Automatically navigates through multiple pages of a thread.
 9 | *   **Login Support:** Supports login using a username/password or an `xf_user` cookie.
10 | *   **Rate Limiting:** Implements rate limiting to prevent server overload.
11 | *   **Progress Tracking:** Displays download progress for each file.
12 | *   **Error Handling:** Includes basic error handling and logging.
13 | 
14 | **Installation:**
15 | 
16 | 1.  **Clone the repository:**
17 | 
18 |     To get started, clone the repository to your local machine:
19 | 
20 |     ```bash
21 |     git clone https://github.com/Emy69/SimpCityCLI.git
22 |     ```
23 | 
24 | 2.  **Install the required libraries:**
25 | 
26 |     Navigate to the project directory and install the dependencies:
27 | 
28 |     ```bash
29 |     pip install aiohttp asyncio yarl aiolimiter beautifulsoup4
30 |     ```
31 | 
32 | **Usage:**
33 | 
34 | 1.  **Run the script from the command line:**
35 | 
36 |     After installation, you can run the script by using the following command:
37 | 
38 |     ```bash
39 |     python simpcity.py <thread_url>
40 |     ```
41 | 
42 |     Replace `<thread_url>` with the URL of the SimpCity thread you want to download.
43 | 
44 | **Example:**
45 | 
46 | ```bash
47 | python simpcity.py https://simpcity.su/threads/user/
48 | ```
49 | ## Support My Work
50 | 
51 | If you find this tool helpful, please consider supporting my efforts:
52 | 
53 | [![Donate with PayPal](https://img.shields.io/badge/Donate-PayPal-blue.svg?logo=paypal&style=for-the-badge)](https://www.paypal.com/paypalme/Emy699)
54 | [![Buy Me a Coffee](https://img.shields.io/badge/Buy%20Me%20a%20Coffee-FFDD00.svg?style=for-the-badge&logo=buy-me-a-coffee&logoColor=black)](https://buymeacoffee.com/emy_69)
55 | [![Support on Patreon](https://img.shields.io/badge/Support%20on%20Patreon-FF424D.svg?style=for-the-badge&logo=patreon&logoColor=white)](https://www.patreon.com/emy69)
56 | 
57 | 
58 | 


--------------------------------------------------------------------------------
/simpcity.py:
--------------------------------------------------------------------------------
  1 | import asyncio
  2 | import aiohttp
  3 | import logging
  4 | import sys
  5 | import os
  6 | import json
  7 | from pathlib import Path
  8 | from typing import Optional, List, Tuple
  9 | from yarl import URL
 10 | from aiolimiter import AsyncLimiter
 11 | from bs4 import BeautifulSoup
 12 | import re
 13 | import getpass
 14 | 
 15 | # Configure logging
 16 | logging.basicConfig(
 17 |     level=logging.INFO,
 18 |     format='%(asctime)s - %(levelname)s - %(message)s',
 19 |     datefmt='%Y-%m-%d %H:%M:%S'
 20 | )
 21 | logger = logging.getLogger(__name__)
 22 | 
 23 | class SimpCityDownloader:
 24 |     def __init__(self):
 25 |         self.base_url = URL("https://www.simpcity.su")
 26 |         self.session = None
 27 |         self.logged_in = False
 28 |         self.login_attempts = 0
 29 |         self.request_limiter = AsyncLimiter(10, 1)  # 10 requests per second
 30 |         self.download_path = Path("downloads/simpcity")
 31 |         self.download_path.mkdir(parents=True, exist_ok=True)
 32 |         # Path to store cookies
 33 |         self.cookie_file = Path("cookies.json")
 34 |         
 35 |         # Selectors according to the original crawler
 36 |         self.title_selector = "h1[class=p-title-value]"
 37 |         self.posts_selector = "div[class*=message-main]"
 38 |         self.post_content_selector = "div[class*=message-userContent]"
 39 |         self.images_selector = "img[class*=bbImage]"
 40 |         self.videos_selector = "video source"
 41 |         self.iframe_selector = "iframe[class=saint-iframe]"
 42 |         self.attachments_block_selector = "section[class=message-attachments]"
 43 |         self.attachments_selector = "a"
 44 |         self.next_page_selector = "a[class*=pageNav-jump--next]"
 45 | 
 46 |     async def init_session(self):
 47 |         """Initialize the aiohttp session and load persistent cookies (if available)"""
 48 |         if not self.session:
 49 |             self.session = aiohttp.ClientSession()
 50 |             self.load_cookies()
 51 | 
 52 |     async def close(self):
 53 |         """Save cookies and close the session"""
 54 |         if self.session:
 55 |             self.save_cookies()
 56 |             await self.session.close()
 57 |             self.session = None
 58 | 
 59 |     def save_cookies(self):
 60 |         """Save the current cookies to a JSON file"""
 61 |         if self.session and self.session.cookie_jar:
 62 |             # Obtain cookies for the base domain
 63 |             simple_cookie = self.session.cookie_jar.filter_cookies(str(self.base_url))
 64 |             cookies = {key: morsel.value for key, morsel in simple_cookie.items()}
 65 |             try:
 66 |                 with open(self.cookie_file, 'w') as f:
 67 |                     json.dump(cookies, f)
 68 |                 logger.info("Cookies saved in %s", self.cookie_file)
 69 |             except Exception as e:
 70 |                 logger.error("Error saving cookies: %s", str(e))
 71 | 
 72 |     def load_cookies(self):
 73 |         """Load cookies from the file (if it exists) and add them to the session"""
 74 |         if self.cookie_file.exists():
 75 |             try:
 76 |                 with open(self.cookie_file, 'r') as f:
 77 |                     cookies = json.load(f)
 78 |                 self.session.cookie_jar.update_cookies(cookies)
 79 |                 logger.info("Cookies loaded from %s", self.cookie_file)
 80 |             except Exception as e:
 81 |                 logger.error("Error loading cookies: %s", str(e))
 82 | 
 83 |     async def check_login_required(self, url: str) -> bool:
 84 |         """Check if login is required to access the given URL"""
 85 |         try:
 86 |             async with self.session.get(url) as response:
 87 |                 if response.status == 200:
 88 |                     text = await response.text()
 89 |                     # Look for login indicators in the response
 90 |                     return 'You must be logged-in to do that.' in text or 'Login or register' in text
 91 |                 return True  # Assume login is required if the page cannot be accessed
 92 |         except Exception:
 93 |             return True
 94 | 
 95 |     async def prompt_and_login(self) -> bool:
 96 |         """Prompt for credentials and perform login"""
 97 |         print("\nLogin required for SimpCity")
 98 |         print("1. Login with username/password")
 99 |         print("2. Login with xf_user cookie")
100 |         print("3. Continue without login")
101 |         
102 |         choice = input("\nEnter your choice (1-3): ").strip()
103 |         
104 |         if choice == "1":
105 |             username = input("Username: ").strip()
106 |             password = getpass.getpass("Password: ").strip()
107 |             return await self.login(username, password)
108 |             
109 |         elif choice == "2":
110 |             print("\nTo obtain your xf_user cookie:")
111 |             print("1. Visit SimpCity in your browser")
112 |             print("2. Open the developer tools (F12)")
113 |             print("3. Go to Application/Storage -> Cookies")
114 |             print("4. Copy the value of the 'xf_user' cookie")
115 |             xf_user = input("\nEnter the value of the xf_user cookie: ").strip()
116 |             return await self.login(None, None, xf_user)
117 |             
118 |         else:
119 |             logger.warning("Continuing without authentication")
120 |             return False
121 | 
122 |     async def verify_login(self) -> bool:
123 |         """Verify if we are logged in by checking the account details page"""
124 |         headers = {
125 |             'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36',
126 |             'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
127 |             'Accept-Language': 'en-US,en;q=0.5',
128 |             'Referer': str(self.base_url)
129 |         }
130 |         
131 |         try:
132 |             async with self.session.get(self.base_url / "account/account-details", headers=headers) as response:
133 |                 if response.status != 200:
134 |                     return False
135 |                 text = await response.text()
136 |                 return 'You must be logged in to view this page.' not in text
137 |         except Exception:
138 |             return False
139 | 
140 |     async def login(self, username: str = None, password: str = None, xf_user_cookie: str = None) -> bool:
141 |         """Log in to SimpCity through https://www.simpcity.su/login/ and save the cookies"""
142 |         await self.init_session()
143 |         
144 |         # Common headers for all requests
145 |         headers = {
146 |             'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36',
147 |             'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
148 |             'Accept-Language': 'en-US,en;q=0.5',
149 |             'Accept-Encoding': 'gzip, deflate, br',
150 |             'Connection': 'keep-alive',
151 |             'Upgrade-Insecure-Requests': '1',
152 |             'Sec-Fetch-Dest': 'document',
153 |             'Sec-Fetch-Mode': 'navigate',
154 |             'Sec-Fetch-Site': 'none',
155 |             'Sec-Fetch-User': '?1',
156 |             'Cache-Control': 'max-age=0',
157 |             'DNT': '1'
158 |         }
159 |         
160 |         if xf_user_cookie:
161 |             self.session.cookie_jar.update_cookies({'xf_user': xf_user_cookie})
162 |             if await self.verify_login():
163 |                 self.logged_in = True
164 |                 logger.info("Successful login using xf_user cookie")
165 |                 return True
166 |             else:
167 |                 logger.error("Login failed: invalid or expired xf_user cookie")
168 |                 return False
169 |             
170 |         if not username or not password:
171 |             return False
172 |             
173 |         try:
174 |             # First, get the login page to extract the CSRF token and any hidden fields
175 |             login_page_url = self.base_url / "login"
176 |             headers['Referer'] = str(self.base_url)
177 |             
178 |             async with self.session.get(login_page_url, headers=headers) as response:
179 |                 if response.status == 403:
180 |                     logger.error("Access forbidden. The site may be blocking automated access.")
181 |                     logger.info("Try using the xf_user cookie method.")
182 |                     return False
183 |                 elif response.status != 200:
184 |                     logger.error(f"Error getting the login page: {response.status}")
185 |                     return False
186 |                     
187 |                 text = await response.text()
188 |                 soup = BeautifulSoup(text, 'html.parser')
189 |                 
190 |                 # Extract CSRF token
191 |                 csrf_token_elem = soup.select_one('input[name=_xfToken]')
192 |                 if not csrf_token_elem:
193 |                     logger.error("CSRF token not found. The login page structure may have changed.")
194 |                     return False
195 |                 csrf_token = csrf_token_elem['value']
196 |                 
197 |                 # Extract hidden fields (if any)
198 |                 hidden_fields = {}
199 |                 for hidden in soup.find_all('input', type='hidden'):
200 |                     if hidden.get('name') and hidden.get('value'):
201 |                         hidden_fields[hidden['name']] = hidden['value']
202 |             
203 |             # Prepare data for login
204 |             login_url = self.base_url / "login/login"
205 |             data = {
206 |                 'login': username,
207 |                 'password': password,
208 |                 '_xfToken': csrf_token,
209 |                 '_xfRedirect': str(self.base_url),  # Will redirect to the homepage (then the user enters the desired URL)
210 |                 'remember': '1'
211 |             }
212 |             data.update(hidden_fields)
213 |             
214 |             # Update headers for the login request
215 |             headers.update({
216 |                 'Content-Type': 'application/x-www-form-urlencoded',
217 |                 'Origin': str(self.base_url),
218 |                 'Referer': str(login_page_url)
219 |             })
220 |             
221 |             # Attempt login
222 |             async with self.session.post(login_url, data=data, headers=headers, allow_redirects=True) as response:
223 |                 if response.status == 403:
224 |                     logger.error("Access forbidden during login. The site may be blocking automated access.")
225 |                     logger.info("Try using the xf_user cookie method.")
226 |                     return False
227 |                 elif response.status not in [200, 303]:
228 |                     logger.error(f"Login failed: unexpected status code {response.status}")
229 |                     return False
230 |                 
231 |                 # Verify that login was successful
232 |                 if await self.verify_login():
233 |                     self.logged_in = True
234 |                     logger.info("Successful login")
235 |                     return True
236 |                 
237 |                 # If verification fails, look for error messages in the response
238 |                 text = await response.text()
239 |                 if any(error in text.lower() for error in ['invalid password', 'invalid username', 'incorrect password']):
240 |                     logger.error("Invalid username or password")
241 |                 else:
242 |                     logger.error("Login failed: could not verify authentication status")
243 |                 return False
244 |                     
245 |         except Exception as e:
246 |             logger.error(f"Error during login: {str(e)}")
247 |             return False
248 | 
249 |     async def get_page(self, url: URL) -> Optional[BeautifulSoup]:
250 |         """Get the content of a page while applying rate limiting"""
251 |         headers = {
252 |             'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/119.0.0.0 Safari/537.36',
253 |             'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8',
254 |             'Accept-Language': 'en-US,en;q=0.5',
255 |             'Accept-Encoding': 'gzip, deflate, br',
256 |             'Connection': 'keep-alive',
257 |             'Upgrade-Insecure-Requests': '1',
258 |             'DNT': '1',
259 |             'Referer': str(self.base_url)
260 |         }
261 |         
262 |         async with self.request_limiter:
263 |             try:
264 |                 async with self.session.get(url, headers=headers) as response:
265 |                     if response.status == 403:
266 |                         logger.error(f"Access forbidden for {url}. The site may be blocking automated access.")
267 |                         return None
268 |                     elif response.status != 200:
269 |                         logger.error(f"Error getting page {url}: {response.status}")
270 |                         return None
271 |                     text = await response.text()
272 |                     return BeautifulSoup(text, 'html.parser')
273 |             except Exception as e:
274 |                 logger.error(f"Error getting page {url}: {str(e)}")
275 |                 return None
276 | 
277 |     async def download_file(self, url: str, filename: str, subfolder: str = ""):
278 |         """Download a file showing progress"""
279 |         save_path = self.download_path / subfolder
280 |         save_path.mkdir(exist_ok=True)
281 |         filepath = save_path / filename
282 |         
283 |         if filepath.exists():
284 |             logger.info(f"File already exists: {filename}")
285 |             return True
286 |         
287 |         headers = {
288 |             'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'
289 |         }
290 |         
291 |         try:
292 |             async with self.request_limiter:
293 |                 async with self.session.get(url, headers=headers) as response:
294 |                     if response.status != 200:
295 |                         logger.error(f"Error downloading {filename}: {response.status}")
296 |                         return False
297 |                     
298 |                     file_size = int(response.headers.get('content-length', 0))
299 |                     if file_size == 0:
300 |                         logger.error(f"Empty file: {filename}")
301 |                         return False
302 |                     
303 |                     logger.info(f"Downloading {filename} ({file_size/1024/1024:.1f} MB)")
304 |                     
305 |                     temp_filepath = filepath.with_suffix('.temp')
306 |                     try:
307 |                         with open(temp_filepath, 'wb') as f:
308 |                             downloaded = 0
309 |                             async for chunk in response.content.iter_chunked(8192):
310 |                                 if chunk:
311 |                                     f.write(chunk)
312 |                                     downloaded += len(chunk)
313 |                                     if file_size:
314 |                                         progress = (downloaded / file_size) * 100
315 |                                         if downloaded % (8192 * 100) == 0:
316 |                                             print(f"\rProgress: {progress:.1f}%", end='', flush=True)
317 |                             
318 |                             print()  # New line after progress
319 |                             
320 |                         temp_filepath.replace(filepath)
321 |                         logger.info(f"File downloaded successfully: {filename}")
322 |                         return True
323 |                     except Exception as e:
324 |                         if temp_filepath.exists():
325 |                             temp_filepath.unlink()
326 |                         raise e
327 |         except Exception as e:
328 |             logger.error(f"Error downloading {filename}: {str(e)}")
329 |             if filepath.exists():
330 |                 filepath.unlink()
331 |             return False
332 | 
333 |     async def process_post(self, post_content: BeautifulSoup, subfolder: str) -> List[Tuple[str, str]]:
334 |         """Process a forum post and extract multimedia files"""
335 |         files = []
336 |         try:
337 |             # Process images
338 |             images = post_content.select(self.images_selector)
339 |             logger.debug(f"Found {len(images)} images in the post")
340 |             for img in images:
341 |                 src = img.get('src')
342 |                 if src:
343 |                     if src.startswith('//'):
344 |                         src = 'https:' + src
345 |                     elif src.startswith('/'):
346 |                         src = str(self.base_url / src[1:])
347 |                     filename = src.split('/')[-1]
348 |                     files.append((src, filename))
349 |             
350 |             # Process videos
351 |             videos = post_content.select(self.videos_selector)
352 |             logger.debug(f"Found {len(videos)} videos in the post")
353 |             for video in videos:
354 |                 src = video.get('src')
355 |                 if src:
356 |                     if src.startswith('//'):
357 |                         src = 'https:' + src
358 |                     elif src.startswith('/'):
359 |                         src = str(self.base_url / src[1:])
360 |                     filename = src.split('/')[-1]
361 |                     files.append((src, filename))
362 |             
363 |             # Process attachments
364 |             attachments_block = post_content.select_one(self.attachments_block_selector)
365 |             if attachments_block:
366 |                 attachments = attachments_block.select(self.attachments_selector)
367 |                 logger.debug(f"Found {len(attachments)} attachments in the post")
368 |                 for attachment in attachments:
369 |                     href = attachment.get('href')
370 |                     if href:
371 |                         if href.startswith('//'):
372 |                             href = 'https:' + href
373 |                         elif href.startswith('/'):
374 |                             href = str(self.base_url / href[1:])
375 |                         filename = href.split('/')[-1]
376 |                         files.append((href, filename))
377 |             
378 |             if files:
379 |                 logger.debug(f"Total files found in the post: {len(files)}")
380 |             return files
381 |         except Exception as e:
382 |             logger.error(f"Error processing post: {str(e)}")
383 |             return []
384 | 
385 |     async def process_thread(self, url: str) -> None:
386 |         """Process a forum thread and download all multimedia files"""
387 |         logger.info(f"Starting processing thread: {url}")
388 |         
389 |         if not url.startswith(('http://', 'https://')):
390 |             url = f"https://www.simpcity.su/{url.lstrip('/')}"
391 |             logger.info(f"Converted URL to: {url}")
392 |         
393 |         thread_url = URL(url)
394 |         current_url = thread_url
395 |         
396 |         # Check if login is required
397 |         logger.info("Verifying if login is required...")
398 |         if await self.check_login_required(str(current_url)):
399 |             if not await self.prompt_and_login():
400 |                 logger.error("Login is required but authentication failed")
401 |                 return
402 |         
403 |         # Once logged in, redirect to the requested thread
404 |         logger.info("Getting thread page...")
405 |         soup = await self.get_page(current_url)
406 |         if not soup:
407 |             logger.error("Error getting thread page")
408 |             return
409 |             
410 |         title_elem = soup.select_one(self.title_selector)
411 |         if not title_elem:
412 |             logger.error("Thread title not found")
413 |             return
414 |             
415 |         thread_title = re.sub(r'[<>:"/\\|?*]', '_', title_elem.text.strip())
416 |         logger.info(f"Processing thread: {thread_title}")
417 |         
418 |         page_num = 1
419 |         total_files = 0
420 |         
421 |         while True:
422 |             logger.info(f"Processing page {page_num}")
423 |             soup = await self.get_page(current_url)
424 |             if not soup:
425 |                 logger.error(f"Error getting page {page_num}")
426 |                 break
427 |             
428 |             # Process each post
429 |             posts = soup.select(self.posts_selector)
430 |             if not posts:
431 |                 logger.warning(f"No posts found on page {page_num}")
432 |                 break
433 |                 
434 |             logger.info(f"Found {len(posts)} posts on page {page_num}")
435 |             
436 |             for post_index, post in enumerate(posts, 1):
437 |                 logger.info(f"Processing post {post_index}/{len(posts)} on page {page_num}")
438 |                 post_content = post.select_one(self.post_content_selector)
439 |                 if post_content:
440 |                     files = await self.process_post(post_content, thread_title)
441 |                     if files:
442 |                         logger.info(f"Found {len(files)} files in post {post_index}")
443 |                         for file_url, filename in files:
444 |                             if await self.download_file(file_url, filename, thread_title):
445 |                                 total_files += 1
446 |                 else:
447 |                     logger.warning(f"No content found in post {post_index}")
448 |             
449 |             # Check if there is a next page
450 |             next_page = soup.select_one(self.next_page_selector)
451 |             if next_page and (href := next_page.get('href')):
452 |                 if href.startswith('/'):
453 |                     current_url = self.base_url / href[1:]
454 |                 else:
455 |                     current_url = URL(href)
456 |                 logger.info(f"Moving to page {page_num + 1}: {current_url}")
457 |                 page_num += 1
458 |             else:
459 |                 logger.info("No more pages found")
460 |                 break
461 |         
462 |         if total_files > 0:
463 |             logger.info(f"Thread processing complete. Downloaded {total_files} files.")
464 |         else:
465 |             logger.warning("No files were downloaded from this thread.")
466 | 
467 | async def main():
468 |     if len(sys.argv) != 2:
469 |         print("Usage: python simpcity.py <thread_url>")
470 |         print("Example: python simpcity.py https://www.simpcity.su/threads/thread-title.12345")
471 |         return
472 |     
473 |     url = sys.argv[1]
474 |     downloader = SimpCityDownloader()
475 |     
476 |     try:
477 |         # Timeout for the entire process (1 hour)
478 |         timeout = 3600  # 1 hour timeout
479 |         async with asyncio.timeout(timeout):
480 |             await downloader.init_session()
481 |             await downloader.process_thread(url)
482 |             
483 |     except asyncio.TimeoutError:
484 |         logger.error(f"The operation exceeded the timeout limit of {timeout} seconds")
485 |     except KeyboardInterrupt:
486 |         logger.info("Operation cancelled by the user")
487 |     except Exception as e:
488 |         logger.error(f"An error occurred: {str(e)}")
489 |     finally:
490 |         logger.info("Cleaning up resources...")
491 |         await downloader.close()
492 |         logger.info("Done!")
493 | 
494 | if __name__ == "__main__":
495 |     try:
496 |         if sys.platform == 'win32':
497 |             asyncio.set_event_loop_policy(asyncio.WindowsSelectorEventLoopPolicy())
498 |         asyncio.run(main())
499 |     except KeyboardInterrupt:
500 |         print("\nOperation cancelled by the user")
501 |     except Exception as e:
502 |         print(f"\nFatal error: {str(e)}")
503 | 


--------------------------------------------------------------------------------