├── Procfile ├── Readme.md ├── app.json ├── bot.py ├── pin.py ├── requirements.txt └── runtime.txt /Procfile: -------------------------------------------------------------------------------- 1 | worker: python3 bot.py -------------------------------------------------------------------------------- /Readme.md: -------------------------------------------------------------------------------- 1 | ## Pinterest Downloader 2 | 3 | Telegram bot with download video and image from Pinterest. 4 | 5 | 6 | ## ~~Deploy To HEROKU - Doesnt support yet.~~ (Heroku currently not free) 7 | 8 | [![Deploy](https://www.herokucdn.com/deploy/button.svg)](https://heroku.com/deploy?template=https://github.com/muhammedfurkan/pinterest_downloader_telegram) 9 | 10 | #### Deoploy to Manually 11 | Simply clone the repository and run the main file: 12 | ```sh 13 | git clone https://github.com/muhammedfurkan/pinterest_downloader_telegram.git 14 | cd pinterest_downloader_telegram 15 | virtualenv -p /usr/bin/python3 venv 16 | . ./venv/bin/activate 17 | pip install -r requirements.txt 18 | python3 bot.py 19 | ``` 20 | 21 | ## Special Thanks 22 | 23 | [Pinterest Downloader](https://github.com/kamronbek29/pinterst_downloader) 24 | 25 | 26 | ## Contact With Me 27 | 28 | If you want to contact with me feel free to text from Telegram using [my link](https://t.me/By_Azade) 29 | -------------------------------------------------------------------------------- /app.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "pinterest_downloader_telegram", 3 | "description": "pinterest downloader!", 4 | "logo": "https://i.imgur.com/dePcvmz.jpg", 5 | "keywords": [ 6 | "telegram", 7 | "pinterest", 8 | "download", 9 | "video", 10 | "image" 11 | ], 12 | "repository": "https://github.com/muhammedfurkan/TelethonUserBot", 13 | "website": "https://telegram.dog/KanalLinkleri", 14 | "success_url": "https://t.me/KanalLinkleri", 15 | "env": { 16 | "ENV": { 17 | "description": "Setting this to ANYTHING will enable environment variables.", 18 | "value": "ANYTHING", 19 | "required": true 20 | }, 21 | "APP_ID": { 22 | "description": "Get this value from my.telegram.org! Please do not steal", 23 | "value": "", 24 | "required": true 25 | }, 26 | "API_HASH": { 27 | "description": "Get this value from my.telegram.org! Please do not steal", 28 | "value": "", 29 | "required": true 30 | }, 31 | "TMP_DOWNLOAD_DIRECTORY": { 32 | "description": "This is required for the plugins involving the file system.", 33 | "value": "./DOWNLOADS/", 34 | "required": false 35 | }, 36 | "BOT_TOKEN": { 37 | "description": "Telegram Bot Token frim @Botfather", 38 | "value": "", 39 | "required": true 40 | } 41 | }, 42 | "buildpacks": [ 43 | { 44 | "url": "https://github.com/jonathanong/heroku-buildpack-ffmpeg-latest" 45 | }, 46 | { 47 | "url": "heroku/python" 48 | } 49 | ] 50 | } 51 | -------------------------------------------------------------------------------- /bot.py: -------------------------------------------------------------------------------- 1 | import asyncio 2 | import importlib 3 | import logging 4 | import math 5 | import os 6 | import re 7 | import time 8 | from typing import List 9 | 10 | import pymongo 11 | from hachoir.metadata import extractMetadata 12 | from hachoir.parser import createParser 13 | from telethon import TelegramClient, events 14 | from telethon.sync import TelegramClient 15 | from telethon.tl.custom import Button 16 | from telethon.tl.types import DocumentAttributeVideo 17 | 18 | logging.basicConfig( 19 | format="[%(levelname) 5s/%(asctime)s] %(name)s: %(message)s", level=logging.WARNING 20 | ) 21 | logger = logging.getLogger(__name__) 22 | 23 | 24 | APP_ID = os.environ.get("APP_ID", None) 25 | APP_HASH = os.environ.get("APP_HASH", None) 26 | BOT_TOKEN = os.environ.get("BOT_TOKEN", None) 27 | TMP_DOWNLOAD_DIRECTORY = os.environ.get("TMP_DOWNLOAD_DIRECTORY", "./DOWNLOADS/") 28 | MONGO_DB = os.environ.get("MONGO_DB", None) 29 | # type yout telegram id or username 30 | LOG = os.environ.get("LOG", None) 31 | ADMIN = os.environ.get("ADMIN", None) 32 | 33 | 34 | bot = TelegramClient("pinterestbot", APP_ID, APP_HASH).start(bot_token=BOT_TOKEN) 35 | 36 | msg = """ 37 | Merhaba ben Pinterest üzerinden Video ve Resim indirebilen bir botum. 38 | `Hello, I am a bot that can download Videos and Images via Pinterest.` 39 | 40 | Şunları yapabilirim: 41 | `I can:` 42 | 43 | 👉 **Video indirmek için:** `/pvid pinterestURL` 44 | 👉 **To download a video:** `/pvid pinterestURL` 45 | 46 | 47 | 👉 **Resim indirebilmek için:** `/pimg pinterestURL` 48 | 👉 **To download a image:** `/pimg pinterestURL` 49 | """ 50 | 51 | 52 | SESSION_ADI = "pinterest" 53 | 54 | 55 | class pinterest_db: 56 | def __init__(self): 57 | client = pymongo.MongoClient(MONGO_DB) 58 | db = client["Telegram"] 59 | self.collection = db[SESSION_ADI] 60 | 61 | def ara(self, sorgu: dict): 62 | say = self.collection.count_documents(sorgu) 63 | if say == 1: 64 | return self.collection.find_one(sorgu, {"_id": 0}) 65 | elif say > 1: 66 | cursor = self.collection.find(sorgu, {"_id": 0}) 67 | return { 68 | bak["uye_id"]: {"uye_nick": bak["uye_nick"], "uye_adi": bak["uye_adi"]} 69 | for bak in cursor 70 | } 71 | else: 72 | return None 73 | 74 | def ekle(self, uye_id, uye_nick, uye_adi): 75 | if not self.ara({"uye_id": {"$in": [str(uye_id), int(uye_id)]}}): 76 | return self.collection.insert_one( 77 | { 78 | "uye_id": uye_id, 79 | "uye_nick": uye_nick, 80 | "uye_adi": uye_adi, 81 | } 82 | ) 83 | else: 84 | return None 85 | 86 | def sil(self, uye_id): 87 | if not self.ara({"uye_id": {"$in": [str(uye_id), int(uye_id)]}}): 88 | return None 89 | 90 | self.collection.delete_one({"uye_id": {"$in": [str(uye_id), int(uye_id)]}}) 91 | return True 92 | 93 | @property 94 | def kullanici_idleri(self): 95 | return list(self.ara({"uye_id": {"$exists": True}}).keys()) 96 | 97 | 98 | async def log_yolla(event): 99 | j = await event.client.get_input_entity(event.chat_id) 100 | print(j, "j--------------------------------") 101 | uye_id = j.id 102 | uye_nick = f"@{j.username}" if j.username else None 103 | uye_adi = f"{j.first_name or ''} {j.last_name or ''}".strip() 104 | komut = event.text 105 | 106 | # Kullanıcı Kaydet 107 | db = pinterest_db() 108 | db.ekle(uye_id, uye_nick, uye_adi) 109 | 110 | 111 | # total number of users using the bot 112 | @bot.on(events.NewMessage(pattern="/kul_say")) 113 | async def say(event): 114 | j = await event.client.get_entity(event.chat_id) 115 | 116 | db = pinterest_db() 117 | db.ekle(j.user.id, j.user.username, j.user.first_name) 118 | 119 | def KULLANICILAR(): 120 | return db.kullanici_idleri 121 | 122 | await event.client.send_message( 123 | ADMIN, f"ℹ️ `{len(KULLANICILAR())}` __Adet Kullanıcıya Sahipsin..__" 124 | ) 125 | 126 | 127 | # Command to make an announcement to users using the bot 128 | @bot.on(events.NewMessage(pattern="/duyuru ?(.*)")) 129 | async def duyuru(event): 130 | # < Başlangıç 131 | 132 | ilk_mesaj = await event.client.send_message( 133 | event.chat_id, "⌛️ `Hallediyorum..`", reply_to=event.chat_id, link_preview=False 134 | ) 135 | # ------------------------------------------------------------- Başlangıç > 136 | 137 | db = pinterest_db() 138 | 139 | def KULLANICILAR(): 140 | return db.kullanici_idleri 141 | 142 | if not KULLANICILAR(): 143 | await ilk_mesaj.edit("ℹ️ __Start vermiş kimse yok kanka..__") 144 | return 145 | 146 | if not event.message.reply_to: 147 | await ilk_mesaj.edit("⚠️ __Duyurmak için mesaj yanıtlayın..__") 148 | return 149 | 150 | basarili = 0 151 | hatalar = [] 152 | mesaj_giden_kisiler = [] 153 | get_reply_msg = await event.get_reply_message() 154 | for kullanici_id in KULLANICILAR(): 155 | try: 156 | await event.client.send_message( 157 | entity=kullanici_id, message=get_reply_msg.message 158 | ) 159 | mesaj_giden_kisiler.append(kullanici_id) 160 | basarili += 1 161 | except Exception as hata: 162 | hatalar.append(type(hata).__name__) 163 | db.sil(kullanici_id) 164 | 165 | mesaj = ( 166 | f"⁉️ `{len(hatalar)}` __Adet Kişiye Mesaj Atamadım ve DB'den Sildim..__\n\n" 167 | if hatalar 168 | else "" 169 | ) 170 | mesaj += f"📜 `{basarili}` __Adet Kullanıcıya Mesaj Attım..__" 171 | 172 | await ilk_mesaj.edit(mesaj) 173 | 174 | 175 | @bot.on(events.NewMessage(pattern="/start", func=lambda e: e.is_private)) 176 | async def start(event): 177 | j = await event.client.get_entity(event.chat_id) 178 | mesaj = f"Gönderen [{j.first_name}](tg://user?id={event.chat_id})\nMesaj: {event.message.message}" 179 | await bot.send_message(ADMIN, mesaj) 180 | if event: 181 | markup = bot.build_reply_markup( 182 | [ 183 | [ 184 | Button.url(text="📍 Kanal Linki", url="t.me/KanalLinkleri"), 185 | Button.url(text="👤 Yapımcı", url="t.me/ADMIN"), 186 | ], 187 | [ 188 | Button.url( 189 | text="🔗 GitHub Repo", 190 | url="https://github.com/muhammedfurkan/pinterest_downloader_telegram", 191 | ) 192 | ], 193 | [Button.inline(text="🤖 Diğer Botlar", data="digerbotlar")], 194 | ] 195 | ) 196 | await bot.send_message(event.chat_id, msg, buttons=markup, link_preview=False) 197 | 198 | 199 | @bot.on(events.NewMessage(pattern="/pvid ?(.*)", func=lambda e: e.is_private)) 200 | async def vid(event): 201 | try: 202 | j = await event.client.get_entity(event.chat_id) 203 | mesaj = f"Gönderen [{j.first_name}](tg://user?id={event.chat_id})\nMesaj: {event.message.message}" 204 | await bot.send_message(ADMIN, mesaj) 205 | markup = bot.build_reply_markup( 206 | [ 207 | [ 208 | Button.url(text="📍 Kanal Linki", url="t.me/KanalLinkleri"), 209 | Button.url(text="👤 Yapımcı", url="t.me/ADMIN"), 210 | ], 211 | [Button.inline(text="🤖 Diğer Botlar", data="digerbotlar")], 212 | ] 213 | ) 214 | 215 | url = event.pattern_match.group(1) 216 | if url: 217 | x = await event.reply("`işlem yapılıyor bekleyiniz...`") 218 | 219 | # get_url = get_download_url(url) 220 | pin_dl = importlib.import_module("pin") 221 | pin_dl.run_library_main( 222 | url, 223 | TMP_DOWNLOAD_DIRECTORY, 224 | 0, 225 | -1, 226 | False, 227 | False, 228 | False, 229 | False, 230 | False, 231 | False, 232 | True, 233 | False, 234 | None, 235 | None, 236 | None, 237 | ) 238 | j = None 239 | for file in os.listdir(TMP_DOWNLOAD_DIRECTORY): 240 | if file.endswith(".log"): 241 | os.remove(f"{TMP_DOWNLOAD_DIRECTORY}/{file}") 242 | continue 243 | if file.endswith(".mp4"): 244 | j = TMP_DOWNLOAD_DIRECTORY + file 245 | 246 | # j = download_video(get_url) 247 | thumb_image_path = TMP_DOWNLOAD_DIRECTORY + "thumb_image.jpg" 248 | 249 | if not os.path.isdir(TMP_DOWNLOAD_DIRECTORY): 250 | os.makedirs(TMP_DOWNLOAD_DIRECTORY) 251 | 252 | metadata = extractMetadata(createParser(j)) 253 | duration = 0 254 | 255 | if metadata.has("duration"): 256 | duration = metadata.get("duration").seconds 257 | width = 0 258 | height = 0 259 | thumb = None 260 | 261 | if os.path.exists(thumb_image_path): 262 | thumb = thumb_image_path 263 | else: 264 | thumb = await take_screen_shot( 265 | j, os.path.dirname(os.path.abspath(j)), (duration / 2) 266 | ) 267 | width = 0 268 | height = 0 269 | if os.path.exists(thumb_image_path): 270 | metadata = extractMetadata(createParser(thumb_image_path)) 271 | if metadata.has("width"): 272 | width = metadata.get("width") 273 | if metadata.has("height"): 274 | height = metadata.get("height") 275 | c_time = time.time() 276 | await event.client.send_file( 277 | event.chat_id, 278 | j, 279 | thumb=thumb, 280 | caption="**@Pinterestdown_Robot** tarafından indirilmiştir\n\nDownloaded by **@Pinterestdown_Robot**", 281 | force_document=False, 282 | allow_cache=False, 283 | reply_to=event.message.id, 284 | buttons=markup, 285 | attributes=[ 286 | DocumentAttributeVideo( 287 | duration=duration, 288 | w=width, 289 | h=height, 290 | round_message=False, 291 | supports_streaming=True, 292 | ) 293 | ], 294 | progress_callback=lambda d, t: asyncio.get_event_loop().create_task( 295 | progress(d, t, event, c_time, "yükleniyor...") 296 | ), 297 | ) 298 | await event.delete() 299 | await x.delete() 300 | os.remove(j) 301 | os.remove(thumb_image_path) 302 | else: 303 | await event.reply( 304 | "**bana komutla beraber link gönder.**\n\n`send me the link with the command.`" 305 | ) 306 | except FileNotFoundError: 307 | return 308 | 309 | 310 | @bot.on(events.NewMessage(pattern="/pimg ?(.*)", func=lambda e: e.is_private)) 311 | async def img(event): 312 | j = await event.client.get_entity(event.chat_id) 313 | mesaj = f"Gönderen [{j.first_name}](tg://user?id={event.chat_id})\nMesaj: {event.message.message}" 314 | await bot.send_message(ADMIN, mesaj) 315 | markup = bot.build_reply_markup( 316 | [ 317 | [ 318 | Button.url(text="📍 Kanal Linki", url="t.me/KanalLinkleri"), 319 | Button.url(text="👤 Yapımcı", url="t.me/ADMIN"), 320 | ], 321 | [Button.inline(text="🤖 Diğer Botlar", data="digerbotlar")], 322 | ] 323 | ) 324 | url = event.pattern_match.group(1) 325 | if url: 326 | x = await event.reply( 327 | "`İşlem yapılıyor lütfen bekleyiniz...`\n\nProcessing please wait ..." 328 | ) 329 | # get_url = await get_download_url(url) 330 | # j = await download_image(get_url) 331 | pin_dl = importlib.import_module("pin") 332 | pin_dl.run_library_main( 333 | url, 334 | TMP_DOWNLOAD_DIRECTORY, 335 | 0, 336 | -1, 337 | False, 338 | False, 339 | False, 340 | False, 341 | False, 342 | True, 343 | False, 344 | False, 345 | None, 346 | None, 347 | None, 348 | ) 349 | j = None 350 | for file in os.listdir(TMP_DOWNLOAD_DIRECTORY): 351 | if file.endswith(".log"): 352 | os.remove(f"{TMP_DOWNLOAD_DIRECTORY}/{file}") 353 | continue 354 | if file.endswith(".jpg"): 355 | j = TMP_DOWNLOAD_DIRECTORY + file 356 | 357 | if not os.path.isdir(TMP_DOWNLOAD_DIRECTORY): 358 | os.makedirs(TMP_DOWNLOAD_DIRECTORY) 359 | c_time = time.time() 360 | await event.client.send_file( 361 | event.chat_id, 362 | j, 363 | caption="**@Pinterestdown_Robot** tarafından indirilmiştir\n\nDownloaded by **@Pinterestdown_Robot**", 364 | force_document=False, 365 | allow_cache=False, 366 | reply_to=event.message.id, 367 | buttons=markup, 368 | progress_callback=lambda d, t: asyncio.get_event_loop().create_task( 369 | progress(d, t, event, c_time, "yükleniyor...") 370 | ), 371 | ) 372 | await event.delete() 373 | await x.delete() 374 | os.remove(j) 375 | else: 376 | await event.reply( 377 | "**bana komutla beraber link gönder.**\n\n`send me the link with the command.`" 378 | ) 379 | 380 | 381 | @bot.on(events.CallbackQuery(pattern=b"digerbotlar")) 382 | async def digerbotlar(event): 383 | markup = bot.build_reply_markup( 384 | [ 385 | [ 386 | Button.url(text="📍 Kanal Linki", url="t.me/KanalLinkleri"), 387 | Button.url(text="👤 Yapımcı", url="t.me/ADMIN"), 388 | ], 389 | [Button.inline(text="Ana Sayfa", data="ana")], 390 | ] 391 | ) 392 | await event.edit( 393 | "**Diğer Botlarımız:**\n\n" 394 | + "📍 [A101 Katalog Bot](t.me/A101KatalogBot)\n" 395 | + "📍 [Osmanlıca Bot](t.me/OsmanlicaBot)\n" 396 | + "📍 [Pinterest Video Resim İndirici Bot](t.me/A101KatalogBot)\n" 397 | + "📍 [Arşiv Çıkarıcı Bot](t.me/ExtractorRobot)\n" 398 | + "📍 [Vimeo Video İndirici Bot](t.me/vimeo_robot)\n" 399 | + "📍 [Tureng Bot](t.me/TurengRobot)\n" 400 | + "📍 [TDK Bot](t.me/TDK_ROBOT)\n" 401 | + "📍 [Müzik Arama Bot](t.me/muzikaramabot)\n" 402 | + "📍 [ÖSYM Bot](t.me/OSYMRobot)\n" 403 | + "📍 [Youtube Playlist İndirici Bot](t.me/PlaylistIndirRobot)\n" 404 | + "📍 [Drive Upload Bot](t.me/driveyuklebot)\n" 405 | + "📍 [GoFile Upload Bot](t.me/GofileRobot)\n" 406 | + "📍 [Bim Aktuel Ürünler Bot](t.me/BimAktuelBot)\n", 407 | buttons=markup, 408 | link_preview=False, 409 | ) 410 | 411 | 412 | @bot.on(events.CallbackQuery(pattern=b"ana")) 413 | async def ana(event): 414 | markup = bot.build_reply_markup( 415 | [ 416 | [ 417 | Button.url(text="📍 Kanal Linki", url="t.me/KanalLinkleri"), 418 | Button.url(text="👤 Yapımcı", url="t.me/ADMIN"), 419 | ], 420 | [ 421 | Button.url( 422 | text="🔗 GitHub Repo", 423 | url="https://github.com/muhammedfurkan/pinterest_downloader_telegram", 424 | ) 425 | ], 426 | [Button.inline(text="🤖 Diğer Botlar", data="digerbotlar")], 427 | ] 428 | ) 429 | await event.edit(msg, buttons=markup, link_preview=False) 430 | 431 | 432 | async def run_command(command: List[str]): 433 | process = await asyncio.create_subprocess_exec( 434 | *command, 435 | # stdout must a pipe to be accessible as process.stdout 436 | stdout=asyncio.subprocess.PIPE, 437 | stderr=asyncio.subprocess.PIPE, 438 | ) 439 | # Wait for the subprocess to finish 440 | stdout, stderr = await process.communicate() 441 | e_response: str = stderr.decode().strip() 442 | t_response: str = stdout.decode().strip() 443 | print(e_response) 444 | print(t_response) 445 | return t_response, e_response 446 | 447 | 448 | async def take_screen_shot(video_file, output_directory, ttl): 449 | # https://stackoverflow.com/a/13891070/4723940 450 | out_put_file_name = output_directory + "/" + str(time.time()) + ".jpg" 451 | file_genertor_command = [ 452 | "ffmpeg", 453 | "-ss", 454 | str(ttl), 455 | "-i", 456 | video_file, 457 | "-vframes", 458 | "1", 459 | out_put_file_name, 460 | ] 461 | # width = "90" 462 | t_response, e_response = await run_command(file_genertor_command) 463 | if os.path.lexists(out_put_file_name): 464 | return out_put_file_name 465 | logger.info(e_response) 466 | logger.info(t_response) 467 | return None 468 | 469 | 470 | def humanbytes(size): 471 | """Input size in bytes, 472 | outputs in a human readable format""" 473 | # https://stackoverflow.com/a/49361727/4723940 474 | if not size: 475 | return "" 476 | # 2 ** 10 = 1024 477 | power = 2**10 478 | raised_to_pow = 0 479 | dict_power_n = {0: "", 1: "Ki", 2: "Mi", 3: "Gi", 4: "Ti"} 480 | while size > power: 481 | size /= power 482 | raised_to_pow += 1 483 | return str(round(size, 2)) + " " + dict_power_n[raised_to_pow] + "B" 484 | 485 | 486 | def time_formatter(seconds: int) -> str: 487 | """Inputs time in seconds, to get beautified time, 488 | as string""" 489 | result = "" 490 | v_m = 0 491 | remainder = seconds 492 | r_ange_s = {"days": 24 * 60 * 60, "hours": 60**2, "minutes": 60, "seconds": 1} 493 | for age, divisor in r_ange_s.items(): 494 | v_m, remainder = divmod(remainder, divisor) 495 | v_m = int(v_m) 496 | if v_m != 0: 497 | result += f" {v_m} {age} " 498 | return result 499 | 500 | 501 | async def progress(current, total, event, start, type_of_ps): 502 | """Generic progress_callback for both 503 | upload.py and download.py""" 504 | now = time.time() 505 | diff = now - start 506 | if round(diff % 10.00) == 0 or current == total: 507 | percentage = current * 100 / total 508 | elapsed_time = round(diff) 509 | if elapsed_time == 0: 510 | return 511 | speed = current / diff 512 | time_to_completion = round((total - current) / speed) 513 | estimated_total_time = elapsed_time + time_to_completion 514 | progress_str = "[{0}{1}]\nPercent: {2}%\n".format( 515 | "".join(["█" for _ in range(math.floor(percentage / 5))]), 516 | "".join(["░" for _ in range(20 - math.floor(percentage / 5))]), 517 | round(percentage, 2), 518 | ) 519 | tmp = progress_str + "{0} of {1}\nETA: {2}".format( 520 | humanbytes(current), humanbytes(total), time_formatter(estimated_total_time) 521 | ) 522 | await event.edit("{}\n {}".format(type_of_ps, tmp)) 523 | 524 | 525 | bot.start() 526 | bot.run_until_disconnected() 527 | -------------------------------------------------------------------------------- /pin.py: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env python3 2 | # -*- coding: utf-8 -*- 3 | 4 | __author__ = "Lim Kok Hole" 5 | __copyright__ = "Copyright 2020" 6 | __credits__ = [ 7 | "Inspired by https://github.com/SevenLines/pinterest-board-downloader", 8 | "S/O", 9 | ] 10 | __license__ = "MIT" 11 | # Version increase if the output file/dir naming incompatible with existing 12 | # , which might re-download for some files of previous version because of dir/filename not match 13 | # Or log files structure changed reference. 14 | __version__ = 1.9 15 | __maintainer__ = "Lim Kok Hole" 16 | __email__ = "limkokhole@gmail.com" 17 | __status__ = "Production" 18 | 19 | # Note: Support python 3 but not python 2 20 | 21 | import os 22 | import platform 23 | import sys 24 | import traceback 25 | from pathlib import PurePath 26 | 27 | plat = platform.system().lower() 28 | if ("window" in plat) or plat.startswith("win"): 29 | # Darwin should treat as Linux 30 | IS_WIN = True 31 | # https://stackoverflow.com/questions/16755142/how-to-make-win32-console-recognize-ansi-vt100-escape-sequences 32 | # Even though ANSI escape sequence can be enable via `REG ADD HKCU\CONSOLE /f /v VirtualTerminalLevel /t REG_DWORD /d 1` 33 | # But since this is not big deal to hide logo testing for this project, so no need. 34 | ANSI_CLEAR = "\r" # Default cmd settings not support ANSI sequence 35 | ANSI_END_COLOR = "" 36 | ANSI_BLUE = "" 37 | else: 38 | IS_WIN = False 39 | ANSI_CLEAR = "\r\x1b[0m\x1b[K" 40 | ANSI_END_COLOR = "\x1b[0m\x1b[K" 41 | ANSI_BLUE = "\x1b[1;44m" 42 | try: 43 | import readline # to make input() edit-able by LEFT key 44 | except ModuleNotFoundError: 45 | if not IS_WIN: # pyreadline for Windows? overkill 46 | print("Please install readline module.") 47 | raise 48 | 49 | # IS_WIN = True # TESTING PURPOSE 50 | 51 | import colorama 52 | from colorama import Fore 53 | from termcolor import cprint 54 | 55 | colorama.init() # Windows need this 56 | 57 | HIGHER_GREEN = Fore.LIGHTGREEN_EX 58 | HIGHER_RED = Fore.LIGHTRED_EX 59 | HIGHER_YELLOW = Fore.LIGHTYELLOW_EX 60 | BOLD_ONLY = ["bold"] 61 | 62 | 63 | def quit(msgs, exit=True): 64 | if not isinstance(msgs, list): 65 | msgs = [msgs] 66 | if exit: 67 | msgs[-1] += " Abort." 68 | for msg in msgs: 69 | if msg == "\n": 70 | print("\n") 71 | else: 72 | cprint("".join([HIGHER_RED, "%s" % (msg)]), attrs=BOLD_ONLY, end="\n") 73 | 74 | 75 | try: 76 | x_tag = "✖" 77 | done_tag = "✔" 78 | plus_tag = "➕" 79 | pinterest_logo = "🅿️" 80 | # Test Windows unicode capability by printing logo, throws if not: 81 | print(pinterest_logo, end=ANSI_CLEAR, flush=True) 82 | except Exception: # UnicodeEncodeError: # Will error later if not do this, so better quit() early 83 | cprint( 84 | "".join( 85 | [ 86 | HIGHER_RED, 87 | "%s" 88 | % ("Please run `export PYTHONIOENCODING=utf-8;` to support Unicode."), 89 | ] 90 | ), 91 | attrs=BOLD_ONLY, 92 | end="\n", 93 | ) 94 | quit("") 95 | sys.exit(1) 96 | 97 | import argparse 98 | import json 99 | import time 100 | import urllib 101 | from collections import OrderedDict 102 | from concurrent.futures import ThreadPoolExecutor, as_completed 103 | from datetime import datetime, timedelta 104 | from http.cookies import SimpleCookie 105 | from urllib.parse import unquote 106 | 107 | import lxml.html as html 108 | import requests 109 | from fake_useragent import UserAgent 110 | from requests.cookies import cookiejar_from_dict 111 | 112 | ua = UserAgent() 113 | # RIP UA, https://groups.google.com/a/chromium.org/forum/m/#!msg/blink-dev/-2JIRNMWJ7s/yHe4tQNLCgAJ 114 | # UA = 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/83.0.0.0 Safari/537.36' 115 | UA = ua.chrome 116 | 117 | # MAX_PATH 260 need exclude 1 terminating null character 118 | # if prefix \\?\ + abspath to use Windows extented-length(i.e. in my case, individual filename/dir can use full 259, no more 259 is fit full path), then the MAX_PATH is 259 - 4 = 255 119 | # [DEPRECATED] no more 259 since always -el now AND Windows 259 - \\?\ == 255 normal Linux 120 | WIN_MAX_PATH = 255 # MAX_PATH 260 need exclude 1 terminating null character 121 | 122 | # https://stackoverflow.com/a/34325723/1074998 123 | def printProgressBar( 124 | iteration, total, prefix="", suffix="", decimals=1, length=100, fill="#" 125 | ): 126 | """ 127 | Call in a loop to create terminal progress bar 128 | @params: 129 | iteration - Required : current iteration (Int) 130 | total - Required : total iterations (Int) 131 | prefix - Optional : prefix string (Str) 132 | suffix - Optional : suffix string (Str) 133 | decimals - Optional : positive number of decimals in percent complete (Int) 134 | length - Optional : character length of bar (Int) 135 | fill - Optional : bar fill character (Str) 136 | """ 137 | if total != 0: # ZeroDivisionError: float division by zero 138 | percent = ("{0:." + str(decimals) + "f}").format( 139 | 100 * (iteration / float(total)) 140 | ) 141 | filledLength = int(length * iteration // total) 142 | bar = fill * filledLength + "-" * (length - filledLength) 143 | # sys.stdout.write('\r{} |{}| {}%% {}'.format(prefix, bar, percent, suffix)) 144 | cprint( 145 | "".join( 146 | [ 147 | HIGHER_GREEN, 148 | "%s" % ("\r{} |{}| {}% {}".format(prefix, bar, percent, suffix)), 149 | ] 150 | ), 151 | attrs=BOLD_ONLY, 152 | end="", 153 | ) 154 | sys.stdout.flush() 155 | 156 | 157 | # imgs: 158 | # source_url=%2Fmistafisha%2Fanimals%2F&data=%7B%22options%22%3A%7B%22isPrefetch%22%3Afalse%2C%22board_id%22%3A 159 | #%2253761857990790784%22%2C%22board_url%22%3A%22%2Fmistafisha%2Fanimals%2F%22%2C%22field_set_key%22%3A%22react_grid_pin 160 | #%22%2C%22filter_section_pins%22%3Atrue%2C%22sort%22%3A%22default%22%2C%22layout%22%3A%22default%22%2C%22page_size 161 | #%22%3A25%2C%22redux_normalize_feed%22%3Atrue%7D%2C%22context%22%3A%7B%7D%7D&_=1592340515565 162 | # unquote: 163 | #'source_url=/mistafisha/animals/&data={"options":{"isPrefetch":false,"board_id":"53761857990790784" 164 | # ,"board_url":"/mistafisha/animals/","field_set_key":"react_grid_pin","filter_section_pins":true,"sort":"default" 165 | # ,"layout":"default","page_size":25,"redux_normalize_feed":true},"context":{}}&_=1592340515565 166 | VER = (None, "c643827", "4c8c36f") 167 | 168 | 169 | def get_session(ver_i, proxies, cookie_file): 170 | s = requests.Session() 171 | s.proxies = proxies 172 | 173 | try: 174 | with open(cookie_file) as f: 175 | rawdata = f.read() 176 | 177 | my_cookie = SimpleCookie() 178 | my_cookie.load(rawdata) 179 | cookies = {key: morsel.value for key, morsel in my_cookie.items()} 180 | 181 | except: 182 | cookies = None 183 | 184 | try: 185 | s.cookies = cookiejar_from_dict(cookies) 186 | except: 187 | pass 188 | 189 | if ver_i == 0: 190 | s.headers = { 191 | #'Host': 'www.pinterest.com', 192 | "User-Agent": UA, 193 | "Accept": "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,*/*;q=0.8", 194 | "Accept-Language": "en-US,en;q=0.5", 195 | "DNT": "1", 196 | "Upgrade-Insecure-Requests": "1", 197 | "Connection": "keep-alive", 198 | } 199 | elif ver_i == 3: 200 | s.headers = { 201 | #'Host': 'www.pinterest.com', #Image can be https://i.pinimg.com, so let it auto or else fail 202 | "User-Agent": UA, 203 | "Accept": "image/webp,*/*", 204 | "Accept-Language": "en-US,en;q=0.5", 205 | "Referer": "https://www.pinterest.com/", 206 | "Connection": "keep-alive", 207 | "Pragma": "no-cache", 208 | "Cache-Control": "no-cache", 209 | "TE": "Trailers", 210 | } 211 | 212 | elif ver_i == 4: 213 | # 'https://v.pinimg.com/videos/mc/hls/8a/99/7d/8a997df97cab576795be2a4490457ea3.m3u8' 214 | s.headers = { 215 | "User-Agent": UA, 216 | "Accept": "*/*", 217 | "Accept-Language": "en-US,en;q=0.5", 218 | "Origin": "https://www.pinterest.com", 219 | "DNT": "1", 220 | "Referer": "https://www.pinterest.com/", 221 | "Connection": "keep-alive", 222 | "Pragma": "no-cache", 223 | "Cache-Control": "no-cache", 224 | } 225 | 226 | else: # == 1, 2 227 | s.headers = { 228 | #'Host': 'www.pinterest.com', 229 | "User-Agent": UA, 230 | "Accept": "application/json, text/javascript, */*, q=0.01", 231 | "Accept-Language": "en-US,en;q=0.5", 232 | "Accept-Encoding": "gzip, deflate, br", 233 | "Referer": "https://www.pinterest.com/", 234 | "X-Requested-With": "XMLHttpRequest", 235 | "X-APP-VERSION": VER[ver_i], 236 | "X-Pinterest-AppState": "active", 237 | #'X-Pinterest-Source-Url': '/ackohole/a/sec2/', #[todo:0] 238 | "X-Pinterest-PWS-Handler": "www/[username]/[slug]/[section_slug].js", 239 | "DNT": "1", 240 | "Connection": "keep-alive", 241 | "Sec-Fetch-Dest": "empty", 242 | "Sec-Fetch-Mode": "cors", 243 | "Sec-Fetch-Site": "same-origin", 244 | "TE": "Trailers", 245 | } 246 | 247 | return s 248 | 249 | 250 | def dj(j, tag=None): 251 | if tag: 252 | print("### [" + tag + "] ###") 253 | print(json.dumps(j, sort_keys=True, indent=4)) 254 | 255 | 256 | def get_pin_info( 257 | pin_id, 258 | arg_timestamp_log, 259 | url_path, 260 | arg_force_update, 261 | arg_img_only, 262 | arg_v_only, 263 | arg_dir, 264 | arg_cut, 265 | arg_el, 266 | fs_f_max, 267 | IMG_SESSION, 268 | V_SESSION, 269 | PIN_SESSION, 270 | proxies, 271 | cookie_file, 272 | get_data_only, 273 | ): 274 | 275 | scripts = [] 276 | is_success = False 277 | image = None 278 | for t in (15, 30, 40, 50, 60, 120, 250, 500): 279 | # print('https://www.pinterest.com/pin/{}/'.format(pin_id)) 280 | # comment the next block to add video data 281 | if is_success: 282 | break 283 | 284 | try: 285 | with open(cookie_file) as f: 286 | rawdata = f.read() 287 | my_cookie = SimpleCookie() 288 | my_cookie.load(rawdata) 289 | cookies = {key: morsel.value for key, morsel in my_cookie.items()} 290 | cookies = cookiejar_from_dict(cookies) 291 | except: 292 | cookies = None 293 | 294 | try: 295 | r = PIN_SESSION.get( 296 | "https://www.pinterest.com/pin/{}/".format(pin_id), 297 | timeout=(t, t), 298 | cookies=cookies, 299 | ) 300 | except ( 301 | requests.exceptions.ReadTimeout, 302 | requests.exceptions.ConnectionError, 303 | ) as e: 304 | # print('[E1][pin] Failed. Retry after 5 seconds...') 305 | time.sleep(5) 306 | PIN_SESSION = get_session(0, proxies, cookies) 307 | continue 308 | 309 | root = html.fromstring(r.content) 310 | # print(r.content) 311 | try: 312 | # tag = root.xpath("//script[@id='initial-state']")[0] 313 | scripts = root.xpath("//script/text()") 314 | except IndexError: # list index out of range 315 | time.sleep(5) 316 | PIN_SESSION = get_session(0, proxies, cookies) 317 | continue 318 | 319 | indexErr = False 320 | for script in scripts: 321 | # if 'imageSpec_orig' in script: 322 | # print("script:", script) 323 | try: 324 | data = json.loads(script) 325 | try: 326 | _image_url = data["response"]["data"]["v3GetPinQuery"]["data"][ 327 | "imageSpec_orig" 328 | ]["url"] 329 | print("found v3GetPinQuery:", _image_url) 330 | _id = "an_id" 331 | _split = _image_url.split("/") 332 | if len(_split) > 0: 333 | _split = _split[-1].split(".") 334 | if len(_split) > 0: 335 | _id = _split[0] 336 | if image is None: 337 | image = {} 338 | image["id"] = _id 339 | image["images"] = {"orig": {"url": _image_url}} 340 | is_success = True 341 | # try: 342 | # _video_urls = data['response']['data']['v3GetPinQuery']['data']['videos']['videoUrls'] 343 | # print("found v3GetPinQuery video_list:", _video_urls) 344 | # image["videos"] = { 345 | # "video_list": _video_urls 346 | # } 347 | # break 348 | # except (TypeError, KeyError): 349 | # pass 350 | break 351 | except (TypeError, KeyError): 352 | if "v3GetPinQuery" in script: 353 | print("error in exracting info from v3GetPinQuery:", script) 354 | if "props" in data: 355 | pins = data["props"]["initialReduxState"]["pins"] 356 | try: 357 | image = pins[list(pins.keys())[0]] 358 | is_success = True 359 | break 360 | except IndexError: # Sometime `"pins":{}``, need retry 361 | indexErr = True 362 | except json.decoder.JSONDecodeError: 363 | pass 364 | 365 | if not is_success: 366 | if indexErr: 367 | print("\n[Retry] Getting error pin id: " + repr(pin_id) + "...\n\n") 368 | continue 369 | 370 | if not is_success: 371 | if not get_data_only: # get data error show later 372 | print("### HTML START ###") 373 | print(r.content) 374 | print( 375 | "### HTML END ###\n\nPlease report this issue at https://github.com/limkokhole/pinterest-downloader/issues , thanks.\n\n" 376 | ) 377 | cprint( 378 | "".join( 379 | [ 380 | HIGHER_RED, 381 | "%s %s%s" 382 | % ("\n[" + x_tag + "] Get this pin id failed :", pin_id, "\n"), 383 | ] 384 | ), 385 | attrs=BOLD_ONLY, 386 | end="", 387 | ) 388 | return 389 | 390 | if get_data_only: 391 | return image 392 | try: 393 | # This is the User Responsibilities to ensure -d is not too long 394 | # Program can't automate for you, imagine -d already 2045th bytes in full path 395 | # , is unwise if program make dir in parent directory. 396 | create_dir(arg_dir) 397 | write_log( 398 | arg_timestamp_log, 399 | url_path, 400 | None, 401 | arg_img_only, 402 | arg_v_only, 403 | arg_dir, 404 | [image], 405 | image["id"], 406 | arg_cut, 407 | False, 408 | ) 409 | print( 410 | "[i] Download Pin id: " 411 | + str(image["id"]) 412 | + " into directory: " 413 | + arg_dir.rstrip(os.sep) 414 | + os.sep 415 | ) 416 | printProgressBar( 417 | 0, 1, prefix="[...] Downloading:", suffix="Complete", length=50 418 | ) 419 | download_img( 420 | image, 421 | arg_dir, 422 | arg_force_update, 423 | arg_img_only, 424 | arg_v_only, 425 | IMG_SESSION, 426 | V_SESSION, 427 | PIN_SESSION, 428 | proxies, 429 | cookie_file, 430 | arg_cut, 431 | arg_el, 432 | fs_f_max, 433 | ) 434 | printProgressBar( 435 | 1, 436 | 1, 437 | prefix="[" + done_tag + "] Downloaded:", 438 | suffix="Complete ", 439 | length=50, 440 | ) 441 | except KeyError: 442 | return quit(traceback.format_exc()) 443 | print() 444 | 445 | 446 | def get_board_info( 447 | board_or_sec_path, 448 | exclude_section, 449 | section, 450 | board_path, 451 | proxies, 452 | cookie_file, 453 | retry=False, 454 | ): 455 | try: 456 | with open(cookie_file) as f: 457 | rawdata = f.read() 458 | my_cookie = SimpleCookie() 459 | my_cookie.load(rawdata) 460 | cookies = {key: morsel.value for key, morsel in my_cookie.items()} 461 | cookies = cookiejar_from_dict(cookies) 462 | except: 463 | cookies = None 464 | 465 | s = get_session(0, proxies, cookies) 466 | # s.cookies = cookies 467 | 468 | # dj(data, 'board main') 469 | boards = {} 470 | sections = [] 471 | 472 | is_success = False 473 | # print('https://www.pinterest.com/{}/'.format(board_or_sec_path)) 474 | for t in (15, 30, 40, 50, 60): 475 | try: 476 | with open(cookie_file) as f: 477 | rawdata = f.read() 478 | my_cookie = SimpleCookie() 479 | my_cookie.load(rawdata) 480 | cookies = {key: morsel.value for key, morsel in my_cookie.items()} 481 | cookies = cookiejar_from_dict(cookies) 482 | except: 483 | cookies = None 484 | try: 485 | r = s.get( 486 | "https://www.pinterest.com/{}/".format(board_or_sec_path), 487 | timeout=(t, t), 488 | cookies=cookies, 489 | ) 490 | is_success = True 491 | break 492 | except ( 493 | requests.exceptions.ReadTimeout, 494 | requests.exceptions.ConnectionError, 495 | ) as e: 496 | # print('[E1][pin] Failed. Retry after 5 seconds...') 497 | time.sleep(5) 498 | s = get_session(0, proxies, cookies) 499 | 500 | if is_success: 501 | root = html.fromstring(r.content) 502 | # print(str(r.content)) 503 | # tag = root.xpath("//script[@id='initial-state']")[0] 504 | scripts = root.xpath("//script/text()") 505 | board_d = {} 506 | for script in scripts: 507 | try: 508 | data = json.loads(script) 509 | if "props" in data: 510 | # dj(data) 511 | board_d = data["props"]["initialReduxState"]["boards"] 512 | # dj(board_d) 513 | board_sec_d = data["props"]["initialReduxState"]["boardsections"] 514 | # dj(board_sec_d) 515 | is_success = True 516 | break 517 | except json.decoder.JSONDecodeError: 518 | is_success = False 519 | 520 | if not is_success: 521 | cprint( 522 | "".join( 523 | [ 524 | HIGHER_RED, 525 | "%s %s%s" 526 | % ( 527 | "\n[" + x_tag + "] Get this board/section failed :", 528 | board_or_sec_path, 529 | "\n", 530 | ), 531 | ] 532 | ), 533 | attrs=BOLD_ONLY, 534 | end="", 535 | ) 536 | if section: 537 | return boards 538 | else: 539 | return boards, sections 540 | 541 | board_dk = list(board_d.keys()) 542 | if section: 543 | path_to_compare = board_path 544 | else: 545 | path_to_compare = board_or_sec_path 546 | for k in board_dk: 547 | if unquote(board_d[k].get("url", "").strip("/")) == unquote(path_to_compare): 548 | b_dk = board_d[k] 549 | board_d_map = {} 550 | board_d_map["url"] = b_dk.get("url", "") 551 | # board_d_map['modified_at'] = b_dk.get('board_order_modified_at', '') 552 | # print('Board modified: ' + repr(board_d_map['modified_at'])) 553 | # dj(b_dk, 'board d') # [todo:0] board_order_modified_at help decide re-scrape? 554 | board_d_map["id"] = b_dk.get("id", "") 555 | board_d_map["name"] = b_dk.get("name", "") 556 | board_d_map["section_count"] = b_dk.get("section_count", "") 557 | boards["board"] = board_d_map 558 | break 559 | 560 | if not exclude_section: 561 | board_sec_dk = list(board_sec_d.keys()) 562 | for k in board_sec_dk: 563 | b_dk = board_sec_d[k] 564 | sec_d_map = {} 565 | # dj(b_dk) 566 | sec_slug = unquote(b_dk.get("slug", "")) 567 | if section and (sec_slug != section): 568 | continue 569 | 570 | # sec_d_map['modified_at'] = b_dk.get('board_order_modified_at', '') 571 | # print('Section modified: ' + repr(sec_d_map['modified_at'])) 572 | 573 | sec_d_map["slug"] = sec_slug 574 | sec_d_map["id"] = b_dk.get("id", "") 575 | sec_d_map["title"] = b_dk.get("title", "") 576 | 577 | if section: 578 | boards["section"] = sec_d_map 579 | else: 580 | sections.append(sec_d_map) 581 | 582 | # dj(board_d, 'board raw') 583 | # dj(boards, 'boarded') 584 | # dj(board_sec_d, 'sect raw') 585 | # dj(sections, 'sectioned') 586 | 587 | if section: 588 | return boards 589 | else: 590 | return boards, sections 591 | 592 | 593 | def fetch_boards(uname, proxies, cookie_file): 594 | 595 | try: 596 | with open(cookie_file) as f: 597 | rawdata = f.read() 598 | my_cookie = SimpleCookie() 599 | my_cookie.load(rawdata) 600 | cookies = {key: morsel.value for key, morsel in my_cookie.items()} 601 | cookies = cookiejar_from_dict(cookies) 602 | except: 603 | cookies = None 604 | 605 | s = get_session(1, proxies, cookies) 606 | # s.cookies = cookies 607 | 608 | bookmark = None 609 | boards = [] 610 | 611 | # print('Username: ' + uname) 612 | 613 | # if url != '/mistafisha/animals/': 614 | # continue 615 | 616 | while bookmark != "-end-": 617 | 618 | options = { 619 | "isPrefetch": "false", 620 | "privacy_filter": "all", 621 | "sort": "alphabetical", 622 | "field_set_key": "profile_grid_item", 623 | "username": uname, 624 | "page_size": 25, 625 | "group_by": "visibility", 626 | "include_archived": "true", 627 | "redux_normalize_feed": "true", 628 | } 629 | 630 | if bookmark: 631 | options.update( 632 | { 633 | "bookmarks": [bookmark], 634 | } 635 | ) 636 | 637 | b_len = len(boards) - 1 638 | if b_len < 0: 639 | b_len = 0 640 | # Got end='' here to make flush work 641 | print("\r[...] Getting all boards [ " + str(b_len) + " / ? ]", end="") 642 | sys.stdout.flush() 643 | 644 | post_d = ( 645 | urllib.parse.urlencode( 646 | { 647 | "source_url": uname, 648 | "data": {"options": options, "context": {}}, 649 | "_": int(time.time() * 1000), 650 | } 651 | ) 652 | .replace("+", "") 653 | .replace("%27", "%22") 654 | .replace("%3A%22true%22", "%3Atrue") 655 | .replace("%3A%22false%22", "%3Afalse") 656 | ) 657 | 658 | # print('[boards] called headers: ' + repr(s.headers)) 659 | 660 | is_success = False 661 | for t in (15, 30, 40, 50, 60): 662 | try: 663 | with open(cookie_file) as f: 664 | rawdata = f.read() 665 | my_cookie = SimpleCookie() 666 | my_cookie.load(rawdata) 667 | cookies = {key: morsel.value for key, morsel in my_cookie.items()} 668 | cookies = cookiejar_from_dict(cookies) 669 | except: 670 | cookies = None 671 | try: 672 | r = s.get( 673 | "https://www.pinterest.com/resource/BoardsResource/get/", 674 | params=post_d, 675 | timeout=(t, t), 676 | cookies=cookies, 677 | ) 678 | is_success = True 679 | break 680 | except ( 681 | requests.exceptions.ReadTimeout, 682 | requests.exceptions.ConnectionError, 683 | ) as e: 684 | time.sleep(5) 685 | s = get_session(1, proxies, cookies) 686 | # s.cookies = cookies 687 | if not is_success: 688 | cprint( 689 | "".join( 690 | [ 691 | HIGHER_RED, 692 | "%s %s%s" 693 | % ("\n[" + x_tag + "] Get this username failed :", uname, "\n"), 694 | ] 695 | ), 696 | attrs=BOLD_ONLY, 697 | end="", 698 | ) 699 | break 700 | # print('[Boards url]: ' + r.url) 701 | data = r.json() 702 | # print('res data: ' + repr(data)) 703 | try: 704 | boards.extend(data["resource_response"]["data"]) 705 | bookmark = data["resource"]["options"]["bookmarks"][0] 706 | except TypeError: # Normal if invalid username 707 | cprint( 708 | "".join( 709 | [ 710 | HIGHER_RED, 711 | "%s" % ("\n[" + x_tag + "] Possible invalid username.\n\n"), 712 | ] 713 | ), 714 | attrs=BOLD_ONLY, 715 | end="", 716 | ) 717 | break 718 | 719 | b_len = len(boards) 720 | print( 721 | "[" + plus_tag + "] Found {} Board{}.".format(b_len, "s" if b_len > 1 else "") 722 | ) 723 | 724 | return boards 725 | 726 | 727 | def sanitize(path): 728 | # trim multiple whitespaces # ".." is the responsibilities of get max path 729 | 730 | # Use PurePath instead of os.path.basename https://stackoverflow.com/a/31273488/1074998 , e.g.: 731 | # >>> PurePath( '/home/iced/..'.replace('..', '') ).parts[-1] # get 'iced' 732 | # >>> os.path.basename('/home/iced/..'.replace('..', '')) # get empty '' 733 | # Ensure .replace('..', '') is last replacement before .strip() AND not replace back to dot '.' 734 | # https://docs.microsoft.com/en-us/windows/win32/fileio/naming-a-file 735 | 736 | # [todo:0] Handle case sensitive and reserved file names in Windows like Chrome "Save page as" do 737 | # For portable to move filename between linux <-> win, should use IS_WIN only (but still can't care if case sensitive filename move to case in-sensitive filesystem). 738 | # IS_WIN: 739 | path = ( 740 | path.replace("<", "") 741 | .replace(">", "") 742 | .replace('"', "'") 743 | .replace("?", "") 744 | .replace("*", "") 745 | .replace("/", "_") 746 | .replace("\\", "_") 747 | .replace("|", "_") 748 | .replace(":", "_") 749 | .replace(".", "_") 750 | .strip() 751 | ) 752 | # Linux: 753 | # path.replace('/', '|').replace(':', '_').replace('.', '_').strip() 754 | 755 | # Put this after replace patterns above bcoz 2 distinct spaces may merge together become multiple-spaces, e.g. after ' ? ' replace to ' ' 756 | # If using .replace(' ', ' ') will only replace once round, e.g. ' ' become 757 | path = " ".join(path.split()) 758 | 759 | p = PurePath(path) 760 | 761 | if p.parts: 762 | return p.parts[-1] 763 | else: 764 | return "" 765 | 766 | 767 | # The filesystem limits is 255(normal) , 242(docker) or 143((eCryptfs) bytes 768 | # So can't blindly [:] slice without encode first (which most downloaders do the wrong way) 769 | # And need decode back after slice 770 | # And to ensure mix sequence byte in UTF-8 and work 771 | # , e.g. abc𪍑𪍑𪍑 772 | # , need try/catch to skip next full bytes of "1" byte ascii" OR "3 bytes 我" or "4 bytes 𪍑" 773 | # ... by looping 4 bytes(UTF-8 max) from right to left 774 | # HTML5 forbid UTF-16, UTF-16/32 not encourage to use in web page 775 | # So only encode/decode in utf-8 776 | # https://stackoverflow.com/questions/13132976 777 | # https://stackoverflow.com/questions/50385123 778 | # https://stackoverflow.com/questions/11820006 779 | 780 | 781 | def get_max_path(arg_cut, fs_f_max, fpart_excluded_immutable, immutable): 782 | # print('before f: ' + fpart_excluded_immutable) 783 | if arg_cut >= 0: 784 | fpart_excluded_immutable = fpart_excluded_immutable[:arg_cut] 785 | if immutable: 786 | # immutable shouldn't limit to 1 byte(may be change next time or next project), so need encode also 787 | immutable_len = len(immutable.encode("utf-8")) 788 | else: 789 | immutable_len = 0 790 | 791 | space_remains = fs_f_max - immutable_len 792 | if space_remains < 1: 793 | return "" # No more spaces to trim(bcoz directories name too long), so only shows PinID.jpg 794 | 795 | # range([start], stop[, step]) 796 | # -1 step * 4 loop = -4, means looping 4 bytes(UTF-8 max) from right to left 797 | for gostan in range(space_remains, space_remains - 4, -1): 798 | try: 799 | fpart_excluded_immutable = fpart_excluded_immutable.encode("utf-8")[ 800 | :gostan 801 | ].decode("utf-8") 802 | break # No break still same result, but waste 803 | except UnicodeDecodeError: 804 | pass # print('Calm down, this is normal: ' + str(gostan) + ' f: ' + fpart_excluded_immutable) 805 | # print('after f: ' + fpart_excluded_immutable) 806 | # Last safety resort, in case any bug: 807 | fpart_excluded_immutable_base = sanitize(fpart_excluded_immutable) 808 | if ( 809 | fpart_excluded_immutable_base != fpart_excluded_immutable.strip() 810 | ): # Original need strip bcoz it might cut in space 811 | cprint( 812 | "".join( 813 | [ 814 | HIGHER_RED, 815 | "\n[! A] Please report to me which Link/scenario it print this log.\ 816 | Thanks:\n{} # {} # {} # {} # {}\n\n".format( 817 | arg_cut, 818 | fs_f_max, 819 | repr(fpart_excluded_immutable), 820 | repr(fpart_excluded_immutable_base), 821 | immutable, 822 | ), 823 | ] 824 | ), 825 | attrs=BOLD_ONLY, 826 | end="", 827 | ) 828 | return fpart_excluded_immutable_base 829 | 830 | 831 | def get_output_file_path(url, arg_cut, fs_f_max, image_id, human_fname, save_dir): 832 | 833 | pin_id_str = sanitize(str(image_id)) 834 | basename = os.path.basename( 835 | url 836 | ) # basename not enough to handle '..', but will sanitize later 837 | # throws ValueError is fine bcoz it's not normal 838 | 839 | # Test case need consider what if multiple dots in basename 840 | # human_fname_unused = '.'.join(basename.split('.')[:-1]) # this project already has human_fname, but other project can use this 841 | ext = basename.split(".")[-1] 842 | 843 | ext = sanitize(ext) 844 | if ( 845 | not ext.strip() 846 | ): # Ensure add hard-coded extension to avoid empty id and leave single dot in next step 847 | ext = "unknown" 848 | # Currently not possible ..jpg here bcoz above must single '.' do not throws 849 | # , even replace ..jpg to _.jpg is fine, just can't preview in explorer only 850 | immutable = sanitize(pin_id_str + "." + ext) 851 | 852 | fpart_excluded_ext_before = sanitize(human_fname) 853 | # print( 'get output f:' + repr(fpart_excluded_ext_before) ) 854 | 855 | # [DEPRECATED, now always use extended length which apply to single component instead of full path] 856 | # if IS_WIN: # Windows MAX_PATH 260 is full path not single component (https://docs.microsoft.com/en-us/windows/win32/fileio/naming-a-file , https://docs.microsoft.com/en-us/windows/win32/fileio/naming-a-file#maximum-path-length-limitation) 857 | # immutable_file_path = os.path.abspath( os.path.join(save_dir, '{}'.format( immutable)) ) 858 | # fpart_excluded_ext = get_max_path(arg_cut, fs_f_max, fpart_excluded_ext_before 859 | # , immutable_file_path) 860 | # else: 861 | fpart_excluded_ext = get_max_path( 862 | arg_cut, fs_f_max, fpart_excluded_ext_before, immutable 863 | ) 864 | if fpart_excluded_ext: 865 | if fpart_excluded_ext_before == fpart_excluded_ext: # means not truncat 866 | # Prevent confuse when trailing period become '..'ext and looks like '...' 867 | if fpart_excluded_ext[-1] == ".": 868 | fpart_excluded_ext = fpart_excluded_ext[:-1] 869 | else: # Truncated 870 | # No need care if two/three/... dots, overkill to trim more and loss information 871 | if fpart_excluded_ext[-1] == ".": 872 | fpart_excluded_ext = fpart_excluded_ext[:-1] 873 | 874 | # if IS_WIN: # [DEPRECATED] Now always use -el 875 | # # Need set ... here, not abspath below which trimmed ... if ... at the end. 876 | # # Also ensures sanitize replace single '.', not '..' which causes number not equal after added ... later 877 | # immutable = sanitize( pin_id_str + '....' + ext ) 878 | # immutable_file_path = os.path.abspath( os.path.join(save_dir, '{}'.format( immutable)) ) 879 | # fpart_excluded_ext = get_max_path(arg_cut, fs_f_max, fpart_excluded_ext_before 880 | # , immutable_file_path) 881 | # else: 882 | fpart_excluded_ext = get_max_path( 883 | arg_cut, fs_f_max, fpart_excluded_ext, "..." + immutable 884 | ) 885 | 886 | fpart_excluded_ext = fpart_excluded_ext + "..." 887 | 888 | # To make final output path consistent with IS_WIN's abspath above, so also do abspath here: 889 | # (Please ensure below PurePath's file_path checking is using abspath if remove abspath here in future) 890 | file_path = os.path.abspath( 891 | os.path.join(save_dir, "{}".format(pin_id_str + fpart_excluded_ext + "." + ext)) 892 | ) 893 | # if '111' in file_path: 894 | # print('last fp: ' + file_path + ' len: ' + str(len(file_path.encode('utf-8')))) 895 | try: 896 | # Note this is possible here if only . while the rest is empty, e.g. './.' 897 | # But better throws and inform me if that abnormal case. 898 | if ( 899 | PurePath(os.path.abspath(save_dir)).parts[:] 900 | != PurePath(file_path).parts[:-1] 901 | ): 902 | cprint( 903 | "".join( 904 | [ 905 | HIGHER_RED, 906 | "\n[! B] Please report to me which Link/scenario it print this log.\ 907 | Thanks: {} # {} # {} # {} # {} \n\n".format( 908 | arg_cut, 909 | fs_f_max, 910 | pin_id_str + fpart_excluded_ext + "." + ext, 911 | save_dir, 912 | file_path, 913 | ), 914 | ] 915 | ), 916 | attrs=BOLD_ONLY, 917 | end="", 918 | ) 919 | file_path = os.path.join( 920 | save_dir, 921 | "{}".format(sanitize(pin_id_str + fpart_excluded_ext + "." + ext)), 922 | ) 923 | if ( 924 | PurePath(os.path.abspath(save_dir)).parts[:] 925 | != PurePath(file_path).parts[:-1] 926 | ): 927 | cprint( 928 | "".join( 929 | [ 930 | HIGHER_RED, 931 | "\n[! C] Please report to me which Link/scenario it print this log.\ 932 | Thanks: {} # {} # {} # {} # {} \n\n".format( 933 | arg_cut, 934 | fs_f_max, 935 | pin_id_str + fpart_excluded_ext + "." + ext, 936 | save_dir, 937 | file_path, 938 | ), 939 | ] 940 | ), 941 | attrs=BOLD_ONLY, 942 | end="", 943 | ) 944 | raise 945 | except IndexError: 946 | cprint( 947 | "".join( 948 | [ 949 | HIGHER_RED, 950 | "\n[! D] Please report to me which Link/scenario it print this log.\ 951 | Thanks: {} # {} # {}\n\n".format( 952 | arg_cut, fs_f_max, pin_id_str + fpart_excluded_ext + "." + ext 953 | ), 954 | ] 955 | ), 956 | attrs=BOLD_ONLY, 957 | end="", 958 | ) 959 | raise 960 | # print('final f: ' + file_path) 961 | return file_path 962 | 963 | 964 | def isVideoExist(image): 965 | # dj(image) 966 | if ("videos" in image) and image["videos"]: # image['videos'] may None 967 | return 1 # Video type 1 in 'V_720P' 968 | elif ( 969 | "story_pin_data" in image 970 | and image["story_pin_data"] 971 | and ("pages" in image["story_pin_data"]) 972 | ): # image['story_pin_data'] may null 973 | pg = image["story_pin_data"]["pages"] 974 | if (len(pg) > 0) and ("blocks" in pg[0]): 975 | blocks = pg[0]["blocks"] 976 | if ( 977 | len(blocks) > 0 978 | and "video" in blocks[0] 979 | and ("video_list" in blocks[0]["video"]) 980 | ): 981 | return 2 # Video type 2 in 'V_EXP7' 982 | return 0 # No video 983 | 984 | 985 | def download_img( 986 | image, 987 | save_dir, 988 | arg_force_update, 989 | arg_img_only, 990 | arg_v_only, 991 | IMG_SESSION, 992 | V_SESSION, 993 | PIN_SESSION, 994 | proxies, 995 | cookie_file, 996 | arg_cut, 997 | arg_el, 998 | fs_f_max, 999 | ): 1000 | 1001 | try: 1002 | # Using threading.Lock() if necessary 1003 | if "id" not in image: 1004 | print("\n\nSkip no id\n\n") 1005 | return 1006 | image_id = image["id"] 1007 | 1008 | human_fname = "" 1009 | if ("grid_title" in image) and image["grid_title"]: 1010 | human_fname = "_" + image["grid_title"] 1011 | # Got case NoneType 1012 | if ( 1013 | ("closeup_unified_description" in image) 1014 | and image["closeup_unified_description"] 1015 | and image["closeup_unified_description"].strip() 1016 | ): 1017 | human_fname = "_".join( 1018 | (human_fname, image["closeup_unified_description"].strip()) 1019 | ) 1020 | elif ( 1021 | ("description" in image) 1022 | and image["description"] 1023 | and image["description"].strip() 1024 | ): 1025 | human_fname = "_".join((human_fname, image["description"].strip())) 1026 | if ("created_at" in image) and image["created_at"]: 1027 | # Don't want ':' become '..' later, so remove ':' early 1028 | img_created_at = image["created_at"].replace(":", "").replace(" +0000", "") 1029 | # Trim `Tue, 01 Sep 2015 011033` to 01Sep2015 to save space in filename 1030 | # , can check log if want details 1031 | img_created_at_l = img_created_at.split(" ") 1032 | if len(img_created_at_l) == 5: 1033 | img_created_at = "".join(img_created_at_l[1:4]) 1034 | human_fname = "_".join([human_fname, img_created_at]) 1035 | # Avoid DD/MM/YYYY truncated when do basename 1036 | # But inside get_output_file_path got sanitize also # So no need do here 1037 | # human_fname = human_fname.replace('/', '|').replace(':', '_') 1038 | 1039 | # print(human_fname) 1040 | 1041 | if not arg_v_only and ("images" in image): 1042 | url = image["images"]["orig"]["url"] 1043 | 1044 | # hn_bk = human_fname # TESTING -el 1045 | # human_fname = human_fname + 'A'*500 # TESTING -el 1046 | file_path = get_output_file_path( 1047 | url, arg_cut, fs_f_max, image_id, human_fname, save_dir 1048 | ) 1049 | # human_fname = hn_bk # TESTING -el 1050 | if arg_el: 1051 | file_path = "\\\\?\\" + os.path.abspath(file_path) 1052 | 1053 | if not os.path.exists(file_path) or arg_force_update: 1054 | # print(IMG_SESSION.headers) 1055 | 1056 | # url = 'https://httpbin.org/get' 1057 | is_ok = False 1058 | for t in (15, 30, 40, 50, 60): 1059 | try: 1060 | with open(cookie_file) as f: 1061 | rawdata = f.read() 1062 | my_cookie = SimpleCookie() 1063 | my_cookie.load(rawdata) 1064 | cookies = { 1065 | key: morsel.value for key, morsel in my_cookie.items() 1066 | } 1067 | cookies = cookiejar_from_dict(cookies) 1068 | except: 1069 | cookies = None 1070 | try: 1071 | r = IMG_SESSION.get( 1072 | url, stream=True, timeout=(t, t), cookies=cookies 1073 | ) 1074 | is_ok = True 1075 | # raise(requests.exceptions.ConnectionError) 1076 | break 1077 | except ( 1078 | requests.exceptions.ReadTimeout, 1079 | requests.exceptions.ConnectionError, 1080 | ) as e: 1081 | # Shouldn't print bcoz quite common 1082 | time.sleep(5) 1083 | IMG_SESSION = get_session(3, proxies, cookies) 1084 | # cprint(''.join([ HIGHER_RED, '{}'.format('\n[' + x_tag + '] Image Timeout (Retry next).\n') ]), attrs=BOLD_ONLY, end='' ) 1085 | 1086 | # print(url + ' ok? ' + str(r.ok)) 1087 | 1088 | # print('https://www.pinterest.com/pin/' + image['id']) 1089 | if ( 1090 | is_ok and r.ok 1091 | ): # not `or`, 1st check is ensure no throws, 2nd check is ensure valid url 1092 | # print(r.text) 1093 | try: 1094 | with open(file_path, "wb") as f: 1095 | for chunk in r: 1096 | f.write(chunk) 1097 | # raise(requests.exceptions.ConnectionError) 1098 | except ( 1099 | requests.exceptions.ReadTimeout, 1100 | requests.exceptions.ConnectionError, 1101 | ) as e: 1102 | is_success = False 1103 | for t in (15, 30, 40, 50, 60): 1104 | time.sleep(5) 1105 | try: 1106 | with open(cookie_file) as f: 1107 | rawdata = f.read() 1108 | my_cookie = SimpleCookie() 1109 | my_cookie.load(rawdata) 1110 | cookies = { 1111 | key: morsel.value 1112 | for key, morsel in my_cookie.items() 1113 | } 1114 | cookies = cookiejar_from_dict(cookies) 1115 | except: 1116 | cookies = None 1117 | try: 1118 | IMG_SESSION_RETY = get_session(3, proxies, cookies) 1119 | r = IMG_SESSION_RETY.get( 1120 | url, stream=True, timeout=(t, t), cookies=cookies 1121 | ) # Need higher timeout 1122 | with open(file_path, "wb") as f: 1123 | for chunk in r: 1124 | f.write(chunk) 1125 | # raise(requests.exceptions.ConnectionError) 1126 | is_success = True 1127 | break 1128 | except ( 1129 | requests.exceptions.ReadTimeout, 1130 | requests.exceptions.ConnectionError, 1131 | ) as e: 1132 | pass 1133 | if not is_success: 1134 | cprint( 1135 | "".join( 1136 | [ 1137 | HIGHER_RED, 1138 | "%s %s %s %s%s" 1139 | % ( 1140 | "\n[" + x_tag + "] Download this image at", 1141 | file_path, 1142 | "failed URL:", 1143 | url, 1144 | "\n", 1145 | ), 1146 | ] 1147 | ), 1148 | attrs=BOLD_ONLY, 1149 | end="", 1150 | ) 1151 | cprint( 1152 | "".join( 1153 | [ 1154 | HIGHER_RED, 1155 | "%s" 1156 | % ( 1157 | "\n[e1] You may want to delete this image manually and retry later(with -rs or try with single pin " 1158 | + ( 1159 | "https://www.pinterest.com/pin/" 1160 | + repr(image["id"]).strip("'") 1161 | ) 1162 | + ").\n\n" 1163 | ), 1164 | ] 1165 | ), 1166 | attrs=BOLD_ONLY, 1167 | end="", 1168 | ) 1169 | except OSError: # e.g. File name too long 1170 | cprint( 1171 | "".join( 1172 | [ 1173 | HIGHER_RED, 1174 | "%s %s %s %s%s" 1175 | % ( 1176 | "\n[" + x_tag + "] Download this image at", 1177 | file_path, 1178 | "failed :", 1179 | url, 1180 | "\n", 1181 | ), 1182 | ] 1183 | ), 1184 | attrs=BOLD_ONLY, 1185 | end="", 1186 | ) 1187 | cprint( 1188 | "".join( 1189 | [ 1190 | HIGHER_RED, 1191 | "%s" 1192 | % ( 1193 | "\nYou may want to use -c \n\n" 1194 | ), 1195 | ] 1196 | ), 1197 | attrs=BOLD_ONLY, 1198 | end="", 1199 | ) 1200 | return quit(traceback.format_exc()) 1201 | 1202 | else: 1203 | # cprint(''.join([ HIGHER_RED, '%s %s %s %s%s' % ('\n[' + x_tag + '] Download this image at' 1204 | # , file_path, 'failed :', url, '\n') ]), attrs=BOLD_ONLY, end='' ) 1205 | imgDimens = [] 1206 | imgDimensD = {} 1207 | for ik, iv in image["images"].items(): 1208 | if "x" in ik: 1209 | imgDimens.append(iv["width"]) 1210 | imgDimensD[iv["width"]] = iv["url"] 1211 | if imgDimens: 1212 | imgDimens.sort(key=int) 1213 | url = imgDimensD[int(imgDimens[-1])] 1214 | # double \n\n to make if unlikely same line behind thread progress bar 1215 | # cprint('\n\n[...] Retry with second best quality url: {}'.format(url), attrs=BOLD_ONLY) 1216 | 1217 | file_path = get_output_file_path( 1218 | url, arg_cut, fs_f_max, image_id, human_fname, save_dir 1219 | ) 1220 | if arg_el: 1221 | file_path = "\\\\?\\" + os.path.abspath(file_path) 1222 | 1223 | if not os.path.exists(file_path) or arg_force_update: 1224 | is_ok = False 1225 | for t in (15, 30, 40, 50, 60): 1226 | try: 1227 | with open(cookie_file) as f: 1228 | rawdata = f.read() 1229 | my_cookie = SimpleCookie() 1230 | my_cookie.load(rawdata) 1231 | cookies = { 1232 | key: morsel.value 1233 | for key, morsel in my_cookie.items() 1234 | } 1235 | cookies = cookiejar_from_dict(cookies) 1236 | except: 1237 | cookies = None 1238 | try: 1239 | # timeout=(connect_timeout, read_timeout) 1240 | # https://github.com/psf/requests/issues/3099#issuecomment-215498005 1241 | 1242 | r = IMG_SESSION.get( 1243 | url, 1244 | stream=True, 1245 | timeout=(t, t), 1246 | cookies=cookies, 1247 | ) 1248 | is_ok = True 1249 | break 1250 | except ( 1251 | requests.exceptions.ReadTimeout, 1252 | requests.exceptions.ConnectionError, 1253 | ) as e: 1254 | time.sleep(5) 1255 | IMG_SESSION = get_session(3, proxies, cookies) 1256 | if is_ok and r.ok: 1257 | 1258 | try: 1259 | with open(file_path, "wb") as f: 1260 | for chunk in r: 1261 | f.write(chunk) 1262 | # raise(requests.exceptions.ConnectionError) 1263 | except ( 1264 | requests.exceptions.ReadTimeout, 1265 | requests.exceptions.ConnectionError, 1266 | requests.exceptions.ChunkedEncodingError, 1267 | ) as e: 1268 | is_success = False 1269 | for t in (15, 30, 40, 50, 60): 1270 | time.sleep(5) 1271 | try: 1272 | with open(cookie_file) as f: 1273 | rawdata = f.read() 1274 | my_cookie = SimpleCookie() 1275 | my_cookie.load(rawdata) 1276 | cookies = { 1277 | key: morsel.value 1278 | for key, morsel in my_cookie.items() 1279 | } 1280 | cookies = cookiejar_from_dict(cookies) 1281 | except: 1282 | cookies = None 1283 | try: 1284 | IMG_SESSION_RETY = get_session( 1285 | 3, proxies, cookies 1286 | ) 1287 | r = IMG_SESSION_RETY.get( 1288 | url, 1289 | stream=True, 1290 | timeout=(t, t), 1291 | cookies=cookies, 1292 | ) 1293 | with open(file_path, "wb") as f: 1294 | for chunk in r: 1295 | f.write(chunk) 1296 | # raise(requests.exceptions.ConnectionError) 1297 | is_success = True 1298 | break 1299 | except ( 1300 | requests.exceptions.ReadTimeout, 1301 | requests.exceptions.ConnectionError, 1302 | requests.exceptions.ChunkedEncodingError, 1303 | ) as e: 1304 | pass 1305 | if not is_success: 1306 | cprint( 1307 | "".join( 1308 | [ 1309 | HIGHER_RED, 1310 | "%s %s %s %s%s" 1311 | % ( 1312 | "\n[" 1313 | + x_tag 1314 | + "] Download this image at", 1315 | file_path, 1316 | "failed :", 1317 | url, 1318 | "\n", 1319 | ), 1320 | ] 1321 | ), 1322 | attrs=BOLD_ONLY, 1323 | end="", 1324 | ) 1325 | cprint( 1326 | "".join( 1327 | [ 1328 | HIGHER_RED, 1329 | "%s" 1330 | % ( 1331 | "\n[e2] You may want to delete this image manually and retry later.\n\n" 1332 | ), 1333 | ] 1334 | ), 1335 | attrs=BOLD_ONLY, 1336 | end="", 1337 | ) 1338 | 1339 | except OSError: # e.g. File name too long 1340 | cprint( 1341 | "".join( 1342 | [ 1343 | HIGHER_RED, 1344 | "%s %s %s %s%s" 1345 | % ( 1346 | "\n[" 1347 | + x_tag 1348 | + "] Retried this image at", 1349 | file_path, 1350 | "failed :", 1351 | url, 1352 | "\n", 1353 | ), 1354 | ] 1355 | ), 1356 | attrs=BOLD_ONLY, 1357 | end="", 1358 | ) 1359 | cprint( 1360 | "".join( 1361 | [ 1362 | HIGHER_RED, 1363 | "%s" 1364 | % ( 1365 | "\nYou may want to use -c \n\n" 1366 | ), 1367 | ] 1368 | ), 1369 | attrs=BOLD_ONLY, 1370 | end="", 1371 | ) 1372 | return quit(traceback.format_exc()) 1373 | 1374 | # print('\n\n[' + plus_tag + '] ', end='') # konsole has issue if BOLD_ONLY with cprint with plus_tag 1375 | # Got case replace /originals/(detected is .png by imghdr)->covert to .png replace 736x bigger size than orig's png (but compare quality is not trivial), better use orig as first choice 1376 | # e.g. https://www.pinterest.com/antonellomiglio/computer/ 's https://i.pinimg.com/736x/3d/f0/88/3df088200b94f0b6b8325ae0a118b401--apple-computer-next-computer.jpg 1377 | # cprint('\nRetried with second best quality url success :D {} saved to {}\n'.format(url, file_path), attrs=BOLD_ONLY) 1378 | else: 1379 | cprint( 1380 | "".join( 1381 | [ 1382 | HIGHER_RED, 1383 | "%s %s %s %s%s" 1384 | % ( 1385 | "\n[" 1386 | + x_tag 1387 | + "] Retried this image at", 1388 | file_path, 1389 | "failed :", 1390 | url, 1391 | "\n", 1392 | ), 1393 | ] 1394 | ), 1395 | attrs=BOLD_ONLY, 1396 | end="", 1397 | ) 1398 | else: 1399 | pass # cprint('\nFile at {} already exist.\n'.format(file_path), attrs=BOLD_ONLY) 1400 | 1401 | else: 1402 | pass # print('No image found in this image index. This is normal (may be 1))') 1403 | 1404 | if not arg_img_only: 1405 | video_type = isVideoExist(image) 1406 | if video_type == 0: # No video 1407 | return 1408 | 1409 | # dj(image, 'before override') # override m3u8-only data with pin details page mp4 1410 | v_pin_id = image["id"] 1411 | image = get_pin_info( 1412 | v_pin_id, 1413 | None, 1414 | None, 1415 | None, 1416 | False, 1417 | False, 1418 | None, 1419 | None, 1420 | None, 1421 | None, 1422 | IMG_SESSION, 1423 | V_SESSION, 1424 | PIN_SESSION, 1425 | proxies, 1426 | cookie_file, 1427 | True, 1428 | ) 1429 | # dj(image, 'after override') # [todo:0] Rich Metadata for video write to log (only pin can get) 1430 | if not image: 1431 | cprint( 1432 | "".join( 1433 | [ 1434 | HIGHER_RED, 1435 | "%s %s%s" 1436 | % ( 1437 | "\n[" + x_tag + "] Get this video pin id failed :", 1438 | v_pin_id, 1439 | "\n", 1440 | ), 1441 | ] 1442 | ), 1443 | attrs=BOLD_ONLY, 1444 | end="", 1445 | ) 1446 | return 1447 | if video_type == 1: 1448 | v_d = image["videos"]["video_list"] 1449 | elif video_type == 2: # Already check index/key in isVideoExist() 1450 | v_d_unsort = image["story_pin_data"]["pages"][0]["blocks"][0]["video"][ 1451 | "video_list" 1452 | ] 1453 | v_d = OrderedDict( 1454 | sorted(v_d_unsort.items(), key=lambda t: t[0]) 1455 | ) # Sort by V_EXP{3-7} (V_HLSV3_MOBILE will ignore below since not .mp4) 1456 | # dj(v_d) 1457 | vDimens = [] 1458 | vDimensD = {} 1459 | for v_format, v_v in v_d.items(): 1460 | if "url" in v_v and v_v["url"].endswith("mp4"): 1461 | vDimens.append(v_v["width"]) 1462 | vDimensD[v_v["width"]] = v_v["url"] 1463 | if vDimens: 1464 | vDimens.sort(key=int) 1465 | vurl = vDimensD[int(vDimens[-1])] 1466 | # print('\n' + vurl) 1467 | # cprint('\n\n[...] Try with best quality video: {}'.format(vurl), attrs=BOLD_ONLY) 1468 | 1469 | file_path = get_output_file_path( 1470 | vurl, arg_cut, fs_f_max, image_id, human_fname, save_dir 1471 | ) 1472 | # print(file_path) 1473 | 1474 | # We MUST get correct file_path first to avoid final filename != trimmed filename 1475 | # ... which causes `not os.path.exists(file_path)` failed and re-download 1476 | if arg_el: 1477 | file_path = "\\\\?\\" + os.path.abspath(file_path) 1478 | 1479 | if not os.path.exists(file_path) or arg_force_update: 1480 | 1481 | is_ok = False 1482 | for t in (15, 30, 40, 50, 60): 1483 | try: 1484 | with open(cookie_file) as f: 1485 | rawdata = f.read() 1486 | my_cookie = SimpleCookie() 1487 | my_cookie.load(rawdata) 1488 | cookies = { 1489 | key: morsel.value for key, morsel in my_cookie.items() 1490 | } 1491 | cookies = cookiejar_from_dict(cookies) 1492 | except: 1493 | cookies = None 1494 | try: 1495 | r = V_SESSION.get( 1496 | vurl, stream=True, timeout=(t, t), cookies=cookies 1497 | ) 1498 | is_ok = True 1499 | break 1500 | except ( 1501 | requests.exceptions.ReadTimeout, 1502 | requests.exceptions.ConnectionError, 1503 | ) as e: 1504 | # Shouldn't print bcoz quite common 1505 | time.sleep( 1506 | 5 1507 | ) # cprint(''.join([ HIGHER_RED, '{}'.format('\n[' + x_tag + '] Video Timeout (Retry next).\n') ]), attrs=BOLD_ONLY, end='' ) 1508 | V_SESSION = get_session(4, proxies, cookies) 1509 | 1510 | # print(vurl + ' ok? ' + str(r.ok)) 1511 | 1512 | if is_ok and r.ok: 1513 | # print(r.text) 1514 | try: 1515 | with open(file_path, "wb") as f: 1516 | for chunk in r: 1517 | f.write(chunk) 1518 | # raise(requests.exceptions.ConnectionError) 1519 | except ( 1520 | requests.exceptions.ReadTimeout, 1521 | requests.exceptions.ConnectionError, 1522 | requests.exceptions.ChunkedEncodingError, 1523 | ) as e: 1524 | # requests.exceptions.ChunkedEncodingError: ("Connection broken: ConnectionResetError(104, 'Connection reset by peer')", ConnectionResetError(104, 'Connection reset by peer')) 1525 | is_success = False 1526 | for t in (15, 30, 40, 50, 60): 1527 | time.sleep(5) 1528 | try: 1529 | with open(cookie_file) as f: 1530 | rawdata = f.read() 1531 | my_cookie = SimpleCookie() 1532 | my_cookie.load(rawdata) 1533 | cookies = { 1534 | key: morsel.value 1535 | for key, morsel in my_cookie.items() 1536 | } 1537 | cookies = cookiejar_from_dict(cookies) 1538 | except: 1539 | cookies = None 1540 | try: 1541 | V_SESSION_RETY = get_session(4, proxies, cookies) 1542 | r = V_SESSION_RETY.get( 1543 | vurl, 1544 | stream=True, 1545 | timeout=(t, t), 1546 | cookies=cookies, 1547 | ) 1548 | with open(file_path, "wb") as f: 1549 | for chunk in r: 1550 | f.write(chunk) 1551 | # raise(requests.exceptions.ConnectionError) 1552 | is_success = True 1553 | break 1554 | except ( 1555 | requests.exceptions.ReadTimeout, 1556 | requests.exceptions.ConnectionError, 1557 | requests.exceptions.ChunkedEncodingError, 1558 | ) as e: 1559 | pass 1560 | if not is_success: 1561 | cprint( 1562 | "".join( 1563 | [ 1564 | HIGHER_RED, 1565 | "%s %s %s %s%s" 1566 | % ( 1567 | "\n[" 1568 | + x_tag 1569 | + "] Download this video at", 1570 | file_path, 1571 | "failed :", 1572 | vurl, 1573 | "\n", 1574 | ), 1575 | ] 1576 | ), 1577 | attrs=BOLD_ONLY, 1578 | end="", 1579 | ) 1580 | cprint( 1581 | "".join( 1582 | [ 1583 | HIGHER_RED, 1584 | "%s" 1585 | % ( 1586 | "\n[e3] You may want to delete this video manually and retry later.\n\n" 1587 | ), 1588 | ] 1589 | ), 1590 | attrs=BOLD_ONLY, 1591 | end="", 1592 | ) 1593 | except OSError: # e.g. File name still too long 1594 | cprint( 1595 | "".join( 1596 | [ 1597 | HIGHER_RED, 1598 | "%s %s %s %s%s" 1599 | % ( 1600 | "\n[" + x_tag + "] Download this video at", 1601 | file_path, 1602 | "failed :", 1603 | vurl, 1604 | "\n", 1605 | ), 1606 | ] 1607 | ), 1608 | attrs=BOLD_ONLY, 1609 | end="", 1610 | ) 1611 | cprint( 1612 | "".join( 1613 | [ 1614 | HIGHER_RED, 1615 | "%s" 1616 | % ( 1617 | "\nYou may want to use -c \n\n" 1618 | ), 1619 | ] 1620 | ), 1621 | attrs=BOLD_ONLY, 1622 | end="", 1623 | ) 1624 | return quit(traceback.format_exc()) 1625 | 1626 | else: 1627 | cprint( 1628 | "".join( 1629 | [ 1630 | HIGHER_RED, 1631 | "%s %s %s %s%s" 1632 | % ( 1633 | "\n[" + x_tag + "] Download this video at", 1634 | save_dir, 1635 | "failed :", 1636 | vurl, 1637 | "\n", 1638 | ), 1639 | ] 1640 | ), 1641 | attrs=BOLD_ONLY, 1642 | end="", 1643 | ) 1644 | 1645 | except: # Need catch inside job, or else it doesn't throws 1646 | print() 1647 | return quit(traceback.format_exc()) 1648 | 1649 | 1650 | def create_dir(save_dir): 1651 | 1652 | try: 1653 | if IS_WIN: 1654 | os.makedirs("\\\\?\\" + os.path.abspath(save_dir)) 1655 | else: 1656 | os.makedirs(save_dir) 1657 | except FileExistsError: # Check this first to avoid OSError cover this 1658 | pass # Normal if re-download 1659 | except OSError: # e.g. File name too long 1660 | 1661 | # Only need to care for individual path component 1662 | # , i.e. os.statvfs('./').f_namemax = 255(normal fs), 242(docker) or 143(eCryptfs) ) 1663 | # , not full path( os.statvfs('./').f_frsize - 1 = 2045) 1664 | # Overkill seems even you do extra work to truncate path, then what if user give arg_dir at 1665 | # ... 2045th path? how is it possible create new dir/file from that point? 1666 | # So only need to care for individual component 1667 | # ... which max total(estimate) is uname 100 + (boardname 50*4)+( section 50*3) = ~450 bytes only. 1668 | # Then add max file 255 bcome 705, still far away from 2045th byte(or 335 4_bytes utf-8) 1669 | # So you direct throws OSError enough to remind that user don't make insane fs hier 1670 | 1671 | cprint( 1672 | "".join( 1673 | [ 1674 | HIGHER_RED, 1675 | "%s" 1676 | % ( 1677 | "\nIt might causes by too long(2045 bytes) in full path.\ 1678 | You may want to to use -d OR -c .\n\n" 1679 | ), 1680 | ] 1681 | ), 1682 | attrs=BOLD_ONLY, 1683 | end="", 1684 | ) 1685 | raise 1686 | 1687 | 1688 | def write_log( 1689 | arg_timestamp_log, 1690 | url_path, 1691 | shortform, 1692 | arg_img_only, 1693 | arg_v_only, 1694 | save_dir, 1695 | images, 1696 | pin, 1697 | arg_cut, 1698 | break_from_latest_pin, 1699 | ): 1700 | 1701 | got_img = False 1702 | 1703 | if arg_timestamp_log: 1704 | if pin: 1705 | log_timestamp = ( 1706 | "log-pinterest-downloader_" 1707 | + str(pin) 1708 | + "_" 1709 | + datetime.now().strftime("%Y-%m-%d %H.%M.%S") 1710 | ) 1711 | else: # None 1712 | log_timestamp = "log-pinterest-downloader_" + datetime.now().strftime( 1713 | "%Y-%m-%d %H.%M.%S" 1714 | ) 1715 | else: 1716 | if pin: 1717 | log_timestamp = "log-pinterest-downloader_" + str(pin) 1718 | else: 1719 | log_timestamp = "log-pinterest-downloader" 1720 | # sanitize enough, no nid max path in case PIN id too long, throws err (too long path) 1721 | # to inform me better than silently guess to slice [:100] early and hide this issue 1722 | # Currently possible long non-number A8pQTwIQQLQGWEacY5vc6og pin id 1723 | log_path = os.path.join(save_dir, "{}".format(sanitize(log_timestamp) + ".log")) 1724 | 1725 | if not pin: 1726 | # Since no image will not log, so need separate file store for urls 1727 | # Don't want combine with old log or else you need open the file to see got title/desc or not even though no content. 1728 | log_url_path = os.path.join(save_dir, "urls-pinterest-downloader.urls") 1729 | 1730 | with open(log_url_path, "w", encoding="utf-8") as f: 1731 | f.write( 1732 | "Pinterest Downloader: Version " + str(__version__) + "\n\n" 1733 | ) # Easy to recognize if future want to change something 1734 | # Ensure -ua same parsing format 1735 | f.write( 1736 | "Input URL: https://www.pinterest.com/" + url_path.rstrip("/") + "/\n" 1737 | ) # Reuse/refer when want to update 1738 | if shortform: # single pin no need 1739 | f.write( 1740 | "Folder URL: https://www.pinterest.com/" 1741 | + shortform.rstrip("/") 1742 | + "/\n\n" 1743 | ) # Reuse/refer when want to update specific folder only 1744 | 1745 | if images: 1746 | # dj(images) 1747 | # print('len(images) IF: ' + str(len(images))) 1748 | index_last = 0 1749 | existing_indexes = [] 1750 | 1751 | if break_from_latest_pin and not arg_timestamp_log: 1752 | try: 1753 | with open(log_path, encoding="utf-8") as f: 1754 | index_line = [l for l in f.readlines() if l.startswith("[ ")] 1755 | index_last_tmp = index_line[-1].split("[ ")[1].split(" ]")[0] 1756 | if index_last_tmp.isdigit(): 1757 | index_last = int(index_last_tmp) 1758 | for l in index_line: 1759 | existing_indexes.append( 1760 | l.split("[ ")[1].split(" ] Pin Id: ")[1].strip() 1761 | ) 1762 | except (FileNotFoundError, OSError, KeyError, TypeError): 1763 | cprint( 1764 | "".join( 1765 | [ 1766 | HIGHER_YELLOW, 1767 | "%s" 1768 | % ( 1769 | "\nWrite log increment from last log stored index failed. Fallback to -lt\n\n" 1770 | ), 1771 | ] 1772 | ), 1773 | attrs=BOLD_ONLY, 1774 | end="", 1775 | ) 1776 | log_timestamp = "log-pinterest-downloader_" + datetime.now().strftime( 1777 | "%Y-%m-%d %H.%M.%S" 1778 | ) 1779 | log_path = os.path.join( 1780 | save_dir, "{}".format(sanitize(log_timestamp) + ".log") 1781 | ) 1782 | with open(log_path, "w", encoding="utf-8") as f: # Refer below else: 1783 | f.write( 1784 | "Pinterest Downloader: Version " + str(__version__) + "\n\n" 1785 | ) 1786 | else: 1787 | 1788 | if pin: 1789 | img_total = 1 1790 | elif break_from_latest_pin: # Already cut last non-image, so no need -1 1791 | img_total = len( 1792 | images 1793 | ) # possible single video without extra padding, so still need loop all+check id/image/video to get real total count 1794 | else: 1795 | img_total = len(images) - 1 1796 | if img_total == 0: 1797 | if (not arg_img_only and isVideoExist(images[0])) or ( 1798 | not arg_v_only and ("images" in images[0]) 1799 | ): 1800 | img_total = ( 1801 | 1 # 1st index may valid item if single video in board 1802 | ) 1803 | if ( 1804 | img_total == 0 1805 | ): # No need create log when empty folder, but still created .urls above 1806 | return False 1807 | else: 1808 | with open(log_path, "w", encoding="utf-8") as f: # Reset before append 1809 | f.write( 1810 | "Pinterest Downloader: Version " + str(__version__) + "\n\n" 1811 | ) # Easy to recognize if future want to change something 1812 | f.write( 1813 | "Input URL: https://www.pinterest.com/" 1814 | + url_path.rstrip("/") 1815 | + "/\n" 1816 | ) # Reuse/refer when want to update 1817 | if shortform: # single pin no need 1818 | f.write( 1819 | "Folder URL: https://www.pinterest.com/" 1820 | + shortform.rstrip("/") 1821 | + "/\n\n" 1822 | ) # Reuse/refer when want to update specific folder only 1823 | else: 1824 | f.write("\n") 1825 | skipped_total = 0 1826 | # print(existing_indexes) 1827 | for log_i, image in enumerate(images): 1828 | if "id" not in image: 1829 | # dj(image) 1830 | skipped_total += 1 1831 | continue 1832 | got_img = True 1833 | image_id = image["id"] 1834 | # print('valid id:' + str(image_id)) 1835 | if image_id in existing_indexes: 1836 | print("dup image_id " + str(image_id)) 1837 | # Still got_img True to try re-download flow since only want to ensure log don't want duplicated if reorder 1838 | continue 1839 | # Exclude image log if --video-only, and vice-versa. 1840 | if not ( 1841 | (not arg_img_only and isVideoExist(image)) 1842 | or (not arg_v_only and ("images" in image)) 1843 | ): 1844 | skipped_total += 1 1845 | continue 1846 | 1847 | # print('last:'+str(index_last) + ' curr_i:' + str(log_i) + ' curr:' + str(skipped_total)) 1848 | 1849 | # dj(image) 1850 | # print('got img: ' + image_id) # Possible got id but empty section 1851 | # , so still failed to use got_img to skip showing estimated 1 image if actually empty 1852 | story = "" 1853 | # print(image) 1854 | # print(image_id) 1855 | # if use 'title' may returns dict {'format': 'Find more ideas', 'text': None, 'args': []} 1856 | if ("grid_title" in image) and image["grid_title"]: 1857 | story = "\nTitle: " + image["grid_title"].replace("\n", " ").strip() 1858 | # Got case NoneType 1859 | if ( 1860 | ("closeup_unified_description" in image) 1861 | and image["closeup_unified_description"] 1862 | and image["closeup_unified_description"].strip() 1863 | ): 1864 | story += ( 1865 | "\nDescription: " 1866 | + image["closeup_unified_description"].replace("\n", " ").strip() 1867 | ) 1868 | elif ( 1869 | ("description" in image) 1870 | and image["description"] 1871 | and image["description"].strip() 1872 | ): 1873 | story += ( 1874 | "\nDescription: " + image["description"].replace("\n", " ").strip() 1875 | ) 1876 | if ("created_at" in image) and image["created_at"]: 1877 | story += ( 1878 | "\nCreated at: " + image["created_at"].replace("\n", " ").strip() 1879 | ) 1880 | if ("link" in image) and image["link"]: 1881 | story += "\nLink: " + image["link"].replace("\n", " ").strip() 1882 | if ("rich_metadata" in image) and image["rich_metadata"]: 1883 | story += ( 1884 | "\n\nMetadata: " 1885 | + repr(image["rich_metadata"]).replace("\n", " ").strip() 1886 | ) 1887 | if story: 1888 | try: 1889 | # Windows need utf-8 1890 | with open(log_path, "a", encoding="utf-8") as f: 1891 | # print('last index:'+ str(index_last) + ' curr_i:' + str(log_i) + ' curr:' + str(skipped_total)) 1892 | # print('last log:'+ str(index_last + log_i + 1 - skipped_total)) 1893 | f.write( 1894 | "[ " 1895 | + str(index_last + log_i + 1 - skipped_total) 1896 | + " ] Pin Id: " 1897 | + str(image_id) 1898 | + "\n" 1899 | ) 1900 | f.write(story + "\n\n") 1901 | except OSError: # e.g. File name too long 1902 | cprint( 1903 | "".join( 1904 | [ 1905 | HIGHER_RED, 1906 | "%s" 1907 | % ( 1908 | "\nYou may want to use -c \n\n" 1909 | ), 1910 | ] 1911 | ), 1912 | attrs=BOLD_ONLY, 1913 | end="", 1914 | ) 1915 | return quit(traceback.format_exc()) 1916 | else: 1917 | skipped_total += 1 1918 | continue 1919 | 1920 | return got_img 1921 | 1922 | 1923 | def sort_func(x): 1924 | prefix = x.split(".")[0].split("_")[0] 1925 | if prefix.isdigit(): 1926 | return int(prefix) 1927 | return 0 1928 | 1929 | 1930 | def get_latest_pin(save_dir): 1931 | # Currently possible long non-number A8pQTwIQQLQGWEacY5vc6og pin id but should rare case and ignore/re-scrape is fine 1932 | latest_pin = "0" 1933 | depth = 1 1934 | # rf: https://stackoverflow.com/a/42720847/1074998 # Don't use expanduser and expandvars for arbitrary input 1935 | # [1] abspath() already acts as normpath() to remove trailing os.sep 1936 | # , and we need ensures trailing os.sep not exists to make slicing accurate. 1937 | # [2] abspath() also make /../ and ////, "." get resolved even though os.walk can returns it literally. 1938 | walk_dir = os.path.abspath(save_dir) 1939 | for root, dirs, files in os.walk(walk_dir): 1940 | if root[len(walk_dir) :].count(os.sep) < depth: 1941 | imgs_f = [ 1942 | _ 1943 | for _ in files 1944 | if _.lower().endswith( 1945 | ( 1946 | ".png", 1947 | ".jpg", 1948 | ".jpeg", 1949 | ".bmp", 1950 | ".gif", 1951 | ".mp4", 1952 | ".mkv", 1953 | ".webp", 1954 | ".svg", 1955 | ".m4a", 1956 | ".mp3", 1957 | ".flac", 1958 | ".m3u8", 1959 | ".wmv", 1960 | ".webm", 1961 | ".mov", 1962 | ".flv", 1963 | ".m4v", 1964 | ".ogg", 1965 | ".avi", 1966 | ".wav", 1967 | ".apng", 1968 | ".avif", 1969 | ) 1970 | ) 1971 | ] # paranoid list 1972 | imgs_f_sorted = sorted(imgs_f, key=sort_func) 1973 | if not imgs_f_sorted: # only 1 depth 1974 | break 1975 | latest_pin = imgs_f_sorted[-1].split(".")[0].split("_")[0] 1976 | 1977 | # if latest pin deleted remote then will acts as -rs 1978 | # print('latest_pin: ' + latest_pin) 1979 | return latest_pin 1980 | 1981 | 1982 | def fetch_imgs( 1983 | board, 1984 | uname, 1985 | board_slug, 1986 | section_slug, 1987 | is_main_board, 1988 | arg_timestamp, 1989 | arg_timestamp_log, 1990 | url_path, 1991 | arg_force_update, 1992 | arg_rescrape, 1993 | arg_img_only, 1994 | arg_v_only, 1995 | arg_dir, 1996 | arg_thread_max, 1997 | IMGS_SESSION, 1998 | IMG_SESSION, 1999 | V_SESSION, 2000 | PIN_SESSION, 2001 | proxies, 2002 | cookie_file, 2003 | arg_cut, 2004 | arg_el, 2005 | fs_f_max, 2006 | ): 2007 | 2008 | bookmark = None 2009 | images = [] 2010 | 2011 | if is_main_board: 2012 | shortform = uname 2013 | else: 2014 | if section_slug: 2015 | shortform = "/".join((uname, board_slug, section_slug)) 2016 | else: 2017 | shortform = "/".join((uname, board_slug)) 2018 | 2019 | if arg_timestamp: 2020 | timestamp_d = "_" + datetime.now().strftime("%Y-%m-%d %H.%M.%S") + ".d" 2021 | else: 2022 | timestamp_d = "" 2023 | try: 2024 | # dj(board, 'fetch imgs') 2025 | if "owner" in board: 2026 | # uname = board['owner']['username'] 2027 | # save_dir = os.path.join(arg_dir, uname, board['name'] + timestamp_d) 2028 | # url = board['url'] 2029 | bid = board["id"] 2030 | # Might unicode, so copy from web browser become %E4%Bd 2031 | # ... which is not the board filename I want 2032 | board_name_folder = board["name"] 2033 | # print('root bname: ' + repr(board_name_folder)) 2034 | elif "board" in board: 2035 | # uname = board['pinner']['username'] 2036 | # save_dir = os.path.join(arg_dir, uname, board['board']['name'] + timestamp_d) 2037 | # url = board['board']['url'] 2038 | bid = board["board"]["id"] 2039 | board_name_folder = board["board"]["name"] 2040 | # print('child bname: ' + repr(board_name_folder)) 2041 | if section_slug: 2042 | try: 2043 | section_id = board["section"]["id"] 2044 | except (KeyError, TypeError): 2045 | return quit( 2046 | "{}".format("\n[" + x_tag + "] Section may not exist.\n") 2047 | ) 2048 | section_folder = board["section"]["title"] 2049 | else: 2050 | return quit( 2051 | "{}".format( 2052 | "\n[" 2053 | + x_tag 2054 | + "] No item found.\n\ 2055 | Please ensure your username/boardname/[section] or link has media item.\n" 2056 | ) 2057 | ) 2058 | except (KeyError, TypeError): 2059 | cprint( 2060 | "".join( 2061 | [ 2062 | HIGHER_RED, 2063 | "%s %s %s" % ("\n[" + x_tag + "] Failed. Path:", shortform, "\n\n"), 2064 | ] 2065 | ), 2066 | attrs=BOLD_ONLY, 2067 | end="", 2068 | ) 2069 | return quit( 2070 | traceback.format_exc() 2071 | + "\n[!] Something wrong with Pinterest URL. Please report this issue at https://github.com/limkokhole/pinterest-downloader/issues , thanks." 2072 | ) 2073 | 2074 | fs_d_max = fs_f_max 2075 | # if IS_WIN: # [DEPRECATED] since always -el now AND Windows 259 - \\?\ = 255 normal Linux 2076 | # if arg_el: # Directory cannot use -el 2077 | # fs_d_max = WIN_MAX_PATH 2078 | 2079 | if section_slug: 2080 | # Put -1 fot arg_cut arg bcoz don't want cut on directory 2081 | # to avoid cut become empty (or provide new arg -c-cut-directory 2082 | # , but overcomplicated and in reality who want to cut dir? 2083 | # ... Normally only want cut filename bcoz of included title/description ) 2084 | save_dir = os.path.join( 2085 | arg_dir, 2086 | get_max_path(-1, fs_d_max, sanitize(uname), None), 2087 | get_max_path(-1, fs_d_max, sanitize(board_name_folder + timestamp_d), None), 2088 | get_max_path(-1, fs_d_max, sanitize(section_folder), None), 2089 | ) 2090 | # Impossible is_main_board here 2091 | url = "/" + "/".join((uname, board_slug, section_slug)) + "/" 2092 | else: 2093 | save_dir = os.path.join( 2094 | arg_dir, 2095 | get_max_path(-1, fs_d_max, sanitize(uname), None), 2096 | get_max_path(-1, fs_d_max, sanitize(board_name_folder + timestamp_d), None), 2097 | ) 2098 | # If boardname in url is lowercase but title startswith ' which quotes to %22 and cause err 2099 | # ... So don't use board_name_folder as board_name in url below to call API 2100 | if is_main_board: 2101 | url = uname 2102 | else: 2103 | url = "/".join((uname, board_slug)) 2104 | 2105 | # if not section_slug: 2106 | # print('[Board id]: '+ repr(bid)) 2107 | 2108 | if not arg_rescrape: 2109 | latest_pin = get_latest_pin(save_dir) 2110 | 2111 | break_from_latest_pin = False 2112 | sorted_api = True 2113 | while bookmark != "-end-": 2114 | 2115 | if section_slug: 2116 | 2117 | options = { 2118 | "isPrefetch": "false", 2119 | "field_set_key": "react_grid_pin", 2120 | "is_own_profile_pins": "false", 2121 | "page_size": 25, 2122 | "redux_normalize_feed": "true", 2123 | "section_id": section_id, 2124 | } 2125 | 2126 | else: 2127 | options = { 2128 | "isPrefetch": "false", 2129 | "board_id": bid, 2130 | "board_url": url, 2131 | "field_set_key": "react_grid_pin", 2132 | "filter_section_pins": "true", 2133 | #'order': 'DESCENDING',#'oldest',#'default', 2134 | #'order': 'default', 2135 | #'sort':'last_pinned_to', 2136 | #'sortDirection': 'newest', 2137 | #'most_recent_board_sort_order': 'first_pinned_to', 2138 | "layout": "default", 2139 | "page_size": 25, # 10,#25, 2140 | "redux_normalize_feed": "true", 2141 | } 2142 | 2143 | # print('bookmark: ' + repr(bookmark)) 2144 | if bookmark: 2145 | options.update( 2146 | { 2147 | "bookmarks": [bookmark], 2148 | } 2149 | ) 2150 | 2151 | i_len = len(images) - 1 2152 | if i_len < 0: 2153 | i_len = 0 2154 | # Got end='' here also not able make flush work 2155 | if section_slug: 2156 | print( 2157 | "\r[...] Getting all images in this section: {}/{} ... [ {} / ? ]".format( 2158 | board_slug, section_slug, str(i_len) 2159 | ), 2160 | end="", 2161 | ) 2162 | else: 2163 | print( 2164 | "\r[...] Getting all images in this board: {} ... [ {} / ? ]".format( 2165 | board_slug, str(i_len) 2166 | ), 2167 | end="", 2168 | ) 2169 | sys.stdout.flush() 2170 | 2171 | post_d = ( 2172 | urllib.parse.urlencode( 2173 | { 2174 | "source_url": url, 2175 | "data": {"options": options, "context": {}}, 2176 | "_": int(time.time() * 1000), 2177 | } 2178 | ) 2179 | .replace("+", "") 2180 | .replace("%27", "%22") 2181 | .replace("%3A%22true%22", "%3Atrue") 2182 | .replace("%3A%22false%22", "%3Afalse") 2183 | ) 2184 | 2185 | # print(post_d) 2186 | # print('[imgs] called headers: ' + repr(IMGS_SESSION.headers)) 2187 | 2188 | for t in (15, 30, 40, 50, 60): 2189 | try: 2190 | if section_slug: 2191 | try: 2192 | with open(cookie_file) as f: 2193 | rawdata = f.read() 2194 | my_cookie = SimpleCookie() 2195 | my_cookie.load(rawdata) 2196 | cookies = { 2197 | key: morsel.value for key, morsel in my_cookie.items() 2198 | } 2199 | cookies = cookiejar_from_dict(cookies) 2200 | except: 2201 | cookies = None 2202 | r = IMGS_SESSION.get( 2203 | "https://www.pinterest.com/resource/BoardSectionPinsResource/get/", 2204 | params=post_d, 2205 | timeout=(t, t), 2206 | cookies=cookies, 2207 | ) 2208 | else: 2209 | try: 2210 | with open(cookie_file) as f: 2211 | rawdata = f.read() 2212 | my_cookie = SimpleCookie() 2213 | my_cookie.load(rawdata) 2214 | cookies = { 2215 | key: morsel.value for key, morsel in my_cookie.items() 2216 | } 2217 | cookies = cookiejar_from_dict(cookies) 2218 | except: 2219 | cookies = None 2220 | r = IMGS_SESSION.get( 2221 | "https://www.pinterest.com/resource/BoardFeedResource/get/", 2222 | params=post_d, 2223 | timeout=(t, t), 2224 | cookies=cookies, 2225 | ) 2226 | data = r.json() 2227 | if data["resource_response"]["data"] is None: 2228 | cprint( 2229 | "".join( 2230 | [HIGHER_YELLOW, "%s" % ("Failed. Retry after 30 seconds.")] 2231 | ), 2232 | attrs=BOLD_ONLY, 2233 | end="\n", 2234 | ) 2235 | time.sleep(30) 2236 | IMGS_SESSION = get_session(2, proxies, cookies) 2237 | continue # Retry for issues #19 2238 | break 2239 | except ( 2240 | requests.exceptions.ReadTimeout, 2241 | requests.exceptions.ConnectionError, 2242 | ) as e: 2243 | time.sleep(5) 2244 | IMGS_SESSION = get_session(2, proxies, cookies) 2245 | 2246 | # print('Imgs url ok: ' + str(r.ok)) 2247 | # print('Imgs url: ' + r.url) 2248 | # dj(data, 'imgs loop raw') 2249 | # Useful for debug with print only specific id log 2250 | # if 'e07614d79a22d22c83d51649e2e01e43' in repr(data): 2251 | # print('res data: ' + repr(data)) 2252 | imgs_round = data["resource_response"]["data"] 2253 | 2254 | # print() 2255 | # for img in imgs_round: 2256 | # print('before img: ' + repr(img['id'])) 2257 | 2258 | reach_lastest_pin = False 2259 | if not arg_rescrape and sorted_api and (latest_pin != "0"): 2260 | img_prev = 0 2261 | on_hold_break = False 2262 | # Video + thumbnails has 2 same id files with diff extension 2263 | # , but API only return single item, so no nid handle equal flow 2264 | for img_round_i, img in enumerate(imgs_round): 2265 | # print('Check: ' + repr(img['id'])) 2266 | if (isVideoExist(img)) or "images" in img: 2267 | if img["id"].isdigit(): 2268 | img_curr = img["id"] 2269 | if img_prev and (int(img_curr) > int(img_prev)): 2270 | cprint( 2271 | "".join( 2272 | [ 2273 | HIGHER_YELLOW, 2274 | "%s" 2275 | % ( 2276 | "\n[W] This images list is not sorted(Due to user reorder), fallback to -rs for this list.\n\n" 2277 | ), 2278 | ] 2279 | ), 2280 | attrs=BOLD_ONLY, 2281 | end="", 2282 | ) 2283 | sorted_api = False 2284 | reach_lastest_pin = False 2285 | if on_hold_break: 2286 | imgs_round = data["resource_response"][ 2287 | "data" 2288 | ] # replaced back below 2289 | break 2290 | if latest_pin == img_curr: 2291 | # print('\nAlready scroll to latest downloaded pin. Break.') 2292 | # print('bookmark: ' + repr(bookmark)) 2293 | imgs_round = imgs_round[:img_round_i] 2294 | reach_lastest_pin = True 2295 | # Next check all 25 items in current page to know owner recently has reorder habit or not 2296 | on_hold_break = True 2297 | img_prev = img_curr 2298 | else: 2299 | cprint( 2300 | "".join( 2301 | [ 2302 | HIGHER_YELLOW, 2303 | "%s" 2304 | % ( 2305 | "\n[W] This images list is not sorted(Due to alphanumeric pin ID), fallback to -rs for this list.\n\n" 2306 | ), 2307 | ] 2308 | ), 2309 | attrs=BOLD_ONLY, 2310 | end="", 2311 | ) 2312 | sorted_api = False 2313 | reach_lastest_pin = False 2314 | imgs_round = data["resource_response"][ 2315 | "data" 2316 | ] # replaced back above 2317 | break 2318 | else: 2319 | pass # print('Not media.') 2320 | # for img in imgs_round: 2321 | # print('real img: ' + repr(img['id'])) 2322 | images.extend(imgs_round) 2323 | if reach_lastest_pin: 2324 | break_from_latest_pin = True 2325 | break 2326 | 2327 | # dj(data['resource_response']['data'], 'img raw') 2328 | # print(data.keys()) 2329 | # dj(data['client_context'], 'img raw') 2330 | # dj(data['resource'], 'img raw') 2331 | # dj(data['request_identifier'], 'img raw') # "" only 2332 | # dj(data['resource_response'], 'img raw') 2333 | bookmark = data["resource"]["options"]["bookmarks"][0] 2334 | 2335 | # break # hole: testing purpose # Remember remove this after test lolr 2336 | 2337 | if sorted_api: 2338 | images = images[ 2339 | ::-1 2340 | ] # reverse order to oldest pin id -> latest pin id for -u to work 2341 | # for img in images: 2342 | # print(img['id']) 2343 | 2344 | create_dir(save_dir) 2345 | got_img = write_log( 2346 | arg_timestamp_log, 2347 | url_path, 2348 | shortform, 2349 | arg_img_only, 2350 | arg_v_only, 2351 | save_dir, 2352 | images, 2353 | None, 2354 | arg_cut, 2355 | break_from_latest_pin, 2356 | ) 2357 | 2358 | if got_img: 2359 | # Always got extra index is not media, so -1 # [UPDATE] single video board might a media 2360 | # Didn't bring loop above detect early 2361 | if ( 2362 | break_from_latest_pin 2363 | ): # Already check got video/image for images, so no need -1 2364 | img_total = len(images) 2365 | else: 2366 | img_total = len(images) - 1 2367 | if img_total == 0: 2368 | if (not arg_img_only and isVideoExist(images[0])) or ( 2369 | not arg_v_only and ("images" in images[0]) 2370 | ): 2371 | img_total = 1 # 1st index may valid item if single video in board 2372 | if img_total == 0: 2373 | print( 2374 | "\n[i] No {}item found.".format("new " if break_from_latest_pin else "") 2375 | ) 2376 | return 2377 | print( 2378 | ( 2379 | " [" 2380 | + plus_tag 2381 | + "] Found {} {}image/video" 2382 | + ("s" if img_total > 1 else "") 2383 | ).format(img_total, "new " if break_from_latest_pin else "") 2384 | ) 2385 | print("Download into directory: " + save_dir.rstrip(os.sep) + os.sep) 2386 | else: 2387 | print("\n[i] No {}item found.".format("new " if break_from_latest_pin else "")) 2388 | return 2389 | 2390 | if arg_thread_max < 1: 2391 | arg_thread_max = ( 2392 | None # Use default: "number of processors on the machine, multiplied by 5" 2393 | ) 2394 | 2395 | with ThreadPoolExecutor(max_workers=arg_thread_max) as executor: 2396 | 2397 | # Create threads 2398 | futures = { 2399 | executor.submit( 2400 | download_img, 2401 | image, 2402 | save_dir, 2403 | arg_force_update, 2404 | arg_img_only, 2405 | arg_v_only, 2406 | IMG_SESSION, 2407 | V_SESSION, 2408 | PIN_SESSION, 2409 | proxies, 2410 | cookie_file, 2411 | arg_cut, 2412 | arg_el, 2413 | fs_f_max, 2414 | ) 2415 | for image in images 2416 | } 2417 | 2418 | # as_completed() gives you the threads once finished 2419 | for index, f in enumerate(as_completed(futures)): 2420 | # Get the results 2421 | # rs = f.result() 2422 | # print('done') 2423 | printProgressBar( 2424 | index + 1, 2425 | len(images), 2426 | prefix="[...] Downloading:", 2427 | suffix="Complete", 2428 | length=50, 2429 | ) 2430 | 2431 | # Need suffix with extra 3 spaces to replace previous longer ... + Downloading->ed line 2432 | # ... to avoid see wrong word "Complete" 2433 | printProgressBar( 2434 | len(images), 2435 | len(images), 2436 | prefix="[" + done_tag + "] Downloaded:", 2437 | suffix="Complete ", 2438 | length=50, 2439 | ) 2440 | 2441 | print() 2442 | 2443 | 2444 | def update_all( 2445 | arg_thread_max: int, 2446 | arg_cut: int, 2447 | arg_rescrape: bool, 2448 | arg_img_only, 2449 | arg_v_only, 2450 | arg_https_proxy: str, 2451 | arg_http_proxy: str, 2452 | arg_cookies: str, 2453 | ): 2454 | 2455 | bk_cwd = os.path.abspath(os.getcwd()) 2456 | cwd_component_total = len(PurePath(os.path.abspath(bk_cwd)).parts[:]) 2457 | imgs_f = [] 2458 | for root, dirs, files in os.walk(bk_cwd): 2459 | # print('#r: ' + repr(root) + ' #d: ' + repr(dirs) + ' #f: ' + repr(files)) 2460 | imgs_f.extend( 2461 | [ 2462 | os.path.join(root, _) 2463 | for _ in files 2464 | if (_ == "urls-pinterest-downloader.urls") 2465 | ] 2466 | ) 2467 | 2468 | urls_map = {} 2469 | cd_back_fixed_range = (1, 2, 3) 2470 | for f in imgs_f: 2471 | r = open(f, "r") 2472 | input_url = None 2473 | folder_url = None 2474 | for line in r: 2475 | l_strip = line.strip() 2476 | if l_strip.startswith("Input URL: "): # re.search('^Input URL: ', line): 2477 | input_url = l_strip.split("Input URL: ")[1].strip() 2478 | elif l_strip.startswith("Folder URL: "): 2479 | folder_url = l_strip.split("Folder URL: ")[1].strip() 2480 | if input_url and folder_url: 2481 | cd_back_count = len(folder_url.split("/")[3:]) - 1 # -1 is trailing '/' 2482 | if cd_back_count not in cd_back_fixed_range: 2483 | return quit( 2484 | [ 2485 | "[E1][-ua] Input url: " 2486 | + input_url 2487 | + "\nFolder url: " 2488 | + folder_url, 2489 | "Something is not right. Please report this issue at https://github.com/limkokhole/pinterest-downloader/issues , thanks.", 2490 | ] 2491 | ) 2492 | # +1 is the upper path to run script previously 2493 | dir_origin = os.path.abspath( 2494 | os.path.join(f, "../" * (cd_back_count + 1)) 2495 | ) 2496 | dir_split = PurePath(dir_origin).parts[:] 2497 | # Safeguard to avoid travel to parent of current directory 2498 | if len(dir_split) < cwd_component_total: 2499 | cprint( 2500 | "".join( 2501 | [ 2502 | HIGHER_YELLOW, 2503 | "%s" 2504 | % ( 2505 | "\n" 2506 | + "Update from parent directory of current directory is forbidden. Skipped.\n" 2507 | + "You should cd to parent directory to update this folder:" 2508 | + "\nurls file: " 2509 | + f 2510 | + "\nInput url: " 2511 | + input_url 2512 | + "\nFolder url: " 2513 | + folder_url 2514 | + "\nParent directory: " 2515 | + dir_origin 2516 | + "\nCurrent directory: " 2517 | + bk_cwd 2518 | + "\n\n" 2519 | ), 2520 | ] 2521 | ) 2522 | ) 2523 | break 2524 | if dir_origin in urls_map: 2525 | # cd_back_count: 3 means section, 2 means board, 1 means username 2526 | # section separate scrape, not by username/board, while board filter by username below 2527 | # -es force later so no section repeat. 2528 | # So not included new created section(new board possible if got username) 2529 | if cd_back_count in (2, 3): 2530 | urls_map[dir_origin]["info"].append( 2531 | {"url": folder_url, "cd": cd_back_count} 2532 | ) 2533 | # print(urls_map[dir_origin]) 2534 | elif cd_back_count == 1: 2535 | urls_map[dir_origin]["username"] = True 2536 | 2537 | else: 2538 | urls_map[dir_origin] = { 2539 | "info": [{"url": input_url, "cd": cd_back_count}], 2540 | "username": True if (cd_back_count == 1) else False, 2541 | } 2542 | break # Only read headers 2543 | 2544 | pre_calc_total = 0 2545 | for i, (dir_origin, map_d) in enumerate(urls_map.items()): 2546 | got_username = map_d["username"] 2547 | for info in map_d["info"]: 2548 | if got_username and info["cd"] == 2: 2549 | # print('Skip board ' + info['url'] + ' since got username already.') 2550 | continue 2551 | pre_calc_total += 1 2552 | real_run_index = 1 2553 | for i, (dir_origin, map_d) in enumerate(urls_map.items()): 2554 | os.chdir(dir_origin) 2555 | got_username = map_d["username"] 2556 | for info in map_d["info"]: 2557 | if got_username and info["cd"] == 2: 2558 | # print('Skip board ' + info['url'] + ' since got username already.') 2559 | continue 2560 | # if info['cd'] == 2: 2561 | # print('THIS board can use bcoz no username!') 2562 | print( 2563 | "\n" 2564 | + ANSI_BLUE 2565 | + "[U] Updating [ " 2566 | + str(real_run_index) 2567 | + " / " 2568 | + str(pre_calc_total) 2569 | + " ] \n" 2570 | + ANSI_END_COLOR 2571 | + ANSI_BLUE 2572 | + "[U] Changed to directory: " 2573 | + str(dir_origin).rstrip(os.sep) 2574 | + os.sep 2575 | + ANSI_END_COLOR 2576 | ) 2577 | real_run_index += 1 2578 | input_url = info["url"] 2579 | # print('run URL:' + input_url) 2580 | while 1: 2581 | try: 2582 | run_library_main( 2583 | input_url, 2584 | ".", 2585 | arg_thread_max, 2586 | arg_cut, 2587 | False, 2588 | False, 2589 | False, 2590 | True, 2591 | arg_rescrape, 2592 | arg_img_only, 2593 | arg_v_only, 2594 | False, 2595 | arg_https_proxy, 2596 | arg_http_proxy, 2597 | arg_cookies, 2598 | ) 2599 | break 2600 | except requests.exceptions.ReadTimeout: 2601 | cprint( 2602 | "".join( 2603 | [ 2604 | HIGHER_RED, 2605 | "{}".format( 2606 | "\n[" 2607 | + x_tag 2608 | + "] [U] Suddenly not able to connect. Please check your network.\n" 2609 | ), 2610 | ] 2611 | ), 2612 | attrs=BOLD_ONLY, 2613 | end="", 2614 | ) 2615 | time.sleep(5) 2616 | except requests.exceptions.ConnectionError: 2617 | cprint( 2618 | "".join( 2619 | [ 2620 | HIGHER_RED, 2621 | "{}".format( 2622 | "\n[" 2623 | + x_tag 2624 | + "] [U] Not able to connect. Please check your network.\n" 2625 | ), 2626 | ] 2627 | ), 2628 | attrs=BOLD_ONLY, 2629 | end="", 2630 | ) 2631 | time.sleep(5) 2632 | 2633 | 2634 | # Caller script example: 2635 | # import importlib 2636 | # pin_dl = importlib.import_module('pinterest-downloader') 2637 | # pin_dl.run_library_main('antonellomiglio/computer', '.', 0, -1, False, False, False, False, False, False, False, False, None, None) 2638 | 2639 | 2640 | def run_library_main( 2641 | arg_path: str, 2642 | arg_dir: str, 2643 | arg_thread_max: int, 2644 | arg_cut: int, 2645 | arg_board_timestamp: bool, 2646 | arg_log_timestamp: bool, 2647 | arg_force: bool, 2648 | arg_exclude_section: bool, 2649 | arg_rescrape: bool, 2650 | arg_img_only: bool, 2651 | arg_v_only: bool, 2652 | arg_update_all: bool, 2653 | arg_https_proxy: str, 2654 | arg_http_proxy: str, 2655 | arg_cookies: str, 2656 | ): 2657 | 2658 | # Not feasible update based on latest pin if v/img only 2659 | # , unless download zero size img if video only(vice-versa) which seems not desired. 2660 | if arg_img_only or arg_v_only: 2661 | arg_rescrape = True 2662 | 2663 | if arg_update_all: 2664 | return update_all( 2665 | arg_thread_max, 2666 | arg_cut, 2667 | arg_rescrape, 2668 | arg_img_only, 2669 | arg_v_only, 2670 | arg_https_proxy, 2671 | arg_http_proxy, 2672 | arg_cookies, 2673 | ) 2674 | 2675 | start_time = int(time.time()) 2676 | 2677 | if not arg_path: 2678 | return quit("Path cannot be empty. ") 2679 | 2680 | proxies = dict(http=arg_http_proxy, https=arg_https_proxy) 2681 | cookies = str(arg_cookies) 2682 | print("[i] User Agent: " + UA) 2683 | 2684 | arg_path = arg_path.strip() 2685 | if arg_path.startswith("https://pin.it/"): 2686 | print("[i] Try to expand shorten url") 2687 | SHARE_SESSION = get_session(0, proxies, cookies) 2688 | r = SHARE_SESSION.get(arg_path, timeout=(15, 15)) 2689 | if (r.status_code == 200) and "/sent" in r.url: 2690 | arg_path = r.url.split("/sent")[0] 2691 | print("[i] Pin url is: " + arg_path + "/") # may err without trailing '/' 2692 | 2693 | url_path = arg_path.split("?")[0].split("#")[0] 2694 | # Convert % format of unicode url when copied from Firefox 2695 | # This is important especially section need compare the section name later 2696 | url_path = unquote(url_path).rstrip("/") 2697 | if "://" in url_path: 2698 | url_path = "/".join(url_path.split("/")[3:]) 2699 | if not url_path: 2700 | return quit( 2701 | "{} {} {}".format( 2702 | "\n[" + x_tag + "] Neither username/boardname nor valid link: ", 2703 | arg_path, 2704 | "\n", 2705 | ) 2706 | ) 2707 | url_path = url_path.lstrip("/") 2708 | slash_path = url_path.split("/") 2709 | if "." in slash_path[0]: 2710 | # Impossible dot in username, so it means host without https:// and nid remove 2711 | slash_path = slash_path[1:] 2712 | if len(slash_path) == 0: 2713 | return quit( 2714 | "{} {} {}".format( 2715 | "\n[" + x_tag + "] Neither username/boardname nor valid link: ", 2716 | arg_path, 2717 | "\n", 2718 | ) 2719 | ) 2720 | elif len(slash_path) > 3: 2721 | return quit( 2722 | "[!] Something wrong with Pinterest URL. Please report this issue at https://github.com/limkokhole/pinterest-downloader/issues , thanks." 2723 | ) 2724 | 2725 | fs_f_max = None 2726 | if IS_WIN: 2727 | # if arg_extended_len >= 0: 2728 | # fs_f_max = arg_extended_len 2729 | arg_el = True 2730 | # else: [DEPRECATED] now always -el now AND Windows 259 - \\?\ == 255 normal Linux 2731 | fs_f_max = WIN_MAX_PATH 2732 | else: 2733 | arg_el = False 2734 | # 255 bytes is normaly fs max, 242 is docker max, 143 bytes is eCryptfs max 2735 | # https://github.com/moby/moby/issues/1413 , https://unix.stackexchange.com/questions/32795/ 2736 | # To test eCryptfs: https://unix.stackexchange.com/questions/426950/ 2737 | # If IS_WIN check here then need add \\?\\ for WIN-only 2738 | for fs_f_max_i in (255, 242, 143): 2739 | try: 2740 | with open("A" * fs_f_max_i, "r") as f: 2741 | fs_f_max = ( 2742 | fs_f_max_i # if got really this long A exists will come here 2743 | ) 2744 | break 2745 | except FileNotFoundError: 2746 | # Will throws OSError first if both FileNotFoundError and OSError met 2747 | # , BUT if folder not exist then will throws FileNotFoundError first 2748 | # But current directory already there, so can use this trick 2749 | # In worst case just raise it 2750 | fs_f_max = fs_f_max_i # Normally came here in first loop 2751 | break 2752 | except OSError: # e.g. File name too long 2753 | pass # print('Try next') # Or here first if eCryptfs 2754 | # print('fs filename max len is ' + repr(fs_f_max)) 2755 | # https://github.com/ytdl-org/youtube-dl/pull/25475 2756 | # https://stackoverflow.com/questions/54823541/what-do-f-bsize-and-f-frsize-in-struct-statvfs-stand-for 2757 | if ( 2758 | fs_f_max is None 2759 | ): # os.statvfs ,ay not avaiable in Windows, so lower priority 2760 | # os.statvfs('.').f_frsize - 1 = 4095 # full path max bytes 2761 | fs_f_max = os.statvfs(".").f_namemax 2762 | 2763 | if len(slash_path) == 2: 2764 | # may copy USERNAME/boards/ links 2765 | # _saved and _created only shows instead of boards if logged in, e.g. user maryellengolden 2766 | # pins under _saved, e.g. user maryellengolden 2767 | if slash_path[-1].strip() in ("boards", "_saved", "_created", "pins"): 2768 | slash_path = slash_path[:-1] 2769 | elif slash_path[-2].strip() == "pin": 2770 | print("[i] Job is download video/image of single pin page.") 2771 | pin_id = slash_path[-1] # bk first before reset 2772 | slash_path = [] # reset for later in case exception 2773 | PIN_SESSION = get_session(0, proxies, cookies) 2774 | IMG_SESSION = get_session(3, proxies, cookies) 2775 | V_SESSION = get_session(4, proxies, cookies) 2776 | get_pin_info( 2777 | pin_id.strip(), 2778 | arg_log_timestamp, 2779 | url_path, 2780 | arg_force, 2781 | arg_img_only, 2782 | arg_v_only, 2783 | arg_dir, 2784 | arg_cut, 2785 | arg_el, 2786 | fs_f_max, 2787 | IMG_SESSION, 2788 | V_SESSION, 2789 | PIN_SESSION, 2790 | proxies, 2791 | cookies, 2792 | False, 2793 | ) 2794 | 2795 | if len(slash_path) == 3: 2796 | sec_path = "/".join(slash_path) 2797 | board_path = "/".join(slash_path[:-1]) 2798 | print( 2799 | "[i] Job is download single section by username/boardname/section: {}".format( 2800 | sec_path 2801 | ) 2802 | ) 2803 | # Will err if try to create section by naming 'more_ideas' 2804 | if (slash_path[-3] in ("search", "categories", "topics")) or ( 2805 | slash_path[-1] in ["more_ideas"] 2806 | ): 2807 | return quit( 2808 | "{}".format( 2809 | "\n[" 2810 | + x_tag 2811 | + "] Search, Categories, Topics, more_ideas are not supported.\n" 2812 | ) 2813 | ) 2814 | board = get_board_info( 2815 | sec_path, False, slash_path[-1], board_path, proxies, cookies 2816 | ) # need_get_section's True/False not used 2817 | try: 2818 | PIN_SESSION = get_session(0, proxies, cookies) 2819 | IMGS_SESSION = get_session(2, proxies, cookies) 2820 | IMG_SESSION = get_session(3, proxies, cookies) 2821 | V_SESSION = get_session(4, proxies, cookies) 2822 | fetch_imgs( 2823 | board, 2824 | slash_path[-3], 2825 | slash_path[-2], 2826 | slash_path[-1], 2827 | False, 2828 | arg_board_timestamp, 2829 | arg_log_timestamp, 2830 | url_path, 2831 | arg_force, 2832 | arg_rescrape, 2833 | arg_img_only, 2834 | arg_v_only, 2835 | arg_dir, 2836 | arg_thread_max, 2837 | IMGS_SESSION, 2838 | IMG_SESSION, 2839 | V_SESSION, 2840 | PIN_SESSION, 2841 | proxies, 2842 | cookies, 2843 | arg_cut, 2844 | arg_el, 2845 | fs_f_max, 2846 | ) 2847 | except KeyError: 2848 | return quit(traceback.format_exc()) 2849 | 2850 | elif len(slash_path) == 2: 2851 | board_path = "/".join(slash_path) 2852 | print( 2853 | "[i] Job is download single board by username/boardname: {}".format( 2854 | board_path 2855 | ) 2856 | ) 2857 | if slash_path[-2] in ("search", "categories", "topics"): 2858 | return quit( 2859 | "{}".format( 2860 | "\n[" + x_tag + "] Search, Categories and Topics not supported.\n" 2861 | ) 2862 | ) 2863 | board, sections = get_board_info( 2864 | board_path, arg_exclude_section, None, None, proxies, cookies 2865 | ) 2866 | try: 2867 | PIN_SESSION = get_session(0, proxies, cookies) 2868 | IMGS_SESSION = get_session(2, proxies, cookies) 2869 | IMG_SESSION = get_session(3, proxies, cookies) 2870 | V_SESSION = get_session(4, proxies, cookies) 2871 | fetch_imgs( 2872 | board, 2873 | slash_path[-2], 2874 | slash_path[-1], 2875 | None, 2876 | False, 2877 | arg_board_timestamp, 2878 | arg_log_timestamp, 2879 | url_path, 2880 | arg_force, 2881 | arg_rescrape, 2882 | arg_img_only, 2883 | arg_v_only, 2884 | arg_dir, 2885 | arg_thread_max, 2886 | IMGS_SESSION, 2887 | IMG_SESSION, 2888 | V_SESSION, 2889 | PIN_SESSION, 2890 | proxies, 2891 | cookies, 2892 | arg_cut, 2893 | arg_el, 2894 | fs_f_max, 2895 | ) 2896 | if (not arg_exclude_section) and sections: 2897 | sec_c = len(sections) 2898 | print( 2899 | "[i] Trying to get " 2900 | + str(sec_c) 2901 | + " section{}".format("s" if sec_c > 1 else "") 2902 | ) 2903 | for sec in sections: 2904 | sec_path = board_path + "/" + sec["slug"] 2905 | board = get_board_info( 2906 | sec_path, False, sec["slug"], board_path, proxies, cookies 2907 | ) # False not using bcoz sections not [] already 2908 | fetch_imgs( 2909 | board, 2910 | slash_path[-2], 2911 | slash_path[-1], 2912 | sec["slug"], 2913 | False, 2914 | arg_board_timestamp, 2915 | arg_log_timestamp, 2916 | url_path, 2917 | arg_force, 2918 | arg_rescrape, 2919 | arg_img_only, 2920 | arg_v_only, 2921 | arg_dir, 2922 | arg_thread_max, 2923 | IMGS_SESSION, 2924 | IMG_SESSION, 2925 | V_SESSION, 2926 | PIN_SESSION, 2927 | proxies, 2928 | cookies, 2929 | arg_cut, 2930 | arg_el, 2931 | fs_f_max, 2932 | ) 2933 | 2934 | except KeyError: 2935 | return quit(traceback.format_exc()) 2936 | 2937 | elif len(slash_path) == 1: 2938 | print("[i] Job is download all boards by username: {}".format(slash_path[-1])) 2939 | if slash_path[-1] in ("search", "categories", "topics"): 2940 | return quit( 2941 | "{}".format( 2942 | "\n[" + x_tag + "] Search, Categories and Topics not supported.\n" 2943 | ) 2944 | ) 2945 | try: 2946 | boards = fetch_boards(slash_path[-1], proxies, cookies) 2947 | PIN_SESSION = get_session(0, proxies, cookies) 2948 | IMGS_SESSION = get_session(2, proxies, cookies) 2949 | IMG_SESSION = get_session(3, proxies, cookies) 2950 | V_SESSION = get_session(4, proxies, cookies) 2951 | # Multiple logs saved inside relevant board dir 2952 | for index, board in enumerate(boards): 2953 | if "name" not in board: 2954 | print("Skip no name") 2955 | continue 2956 | 2957 | # dj(board) 2958 | # E.g. /example/commodore-computers/ need trim to example/commodore-computers 2959 | board_path = board["url"].strip("/") 2960 | # fetch_imgs() should use url style `A-B`` instead of Title `A B``(board['name']) 2961 | # print(board_path) 2962 | if "/" in board_path: 2963 | board_slug = board_path.split("/")[1] 2964 | is_main_board = False 2965 | else: # username main board 2966 | board_slug = board_path 2967 | is_main_board = True 2968 | board["owner"]["id"] = board["id"] # hole: [todo:0] remove this 2969 | 2970 | fetch_imgs( 2971 | board, 2972 | slash_path[-1], 2973 | board_slug, 2974 | None, 2975 | is_main_board, 2976 | arg_board_timestamp, 2977 | arg_log_timestamp, 2978 | url_path, 2979 | arg_force, 2980 | arg_rescrape, 2981 | arg_img_only, 2982 | arg_v_only, 2983 | arg_dir, 2984 | arg_thread_max, 2985 | IMGS_SESSION, 2986 | IMG_SESSION, 2987 | V_SESSION, 2988 | PIN_SESSION, 2989 | proxies, 2990 | cookies, 2991 | arg_cut, 2992 | arg_el, 2993 | fs_f_max, 2994 | ) 2995 | if (not arg_exclude_section) and (board["section_count"] > 0): 2996 | sec_c = board["section_count"] 2997 | print( 2998 | "[i] Trying to get " 2999 | + str(sec_c) 3000 | + " section{}".format("s" if sec_c > 1 else "") 3001 | ) 3002 | # ags.es placeholder below always False bcoz above already check (not arg_exclude_section) 3003 | board, sections = get_board_info( 3004 | board_path, False, None, None, proxies, cookies 3005 | ) 3006 | for sec in sections: 3007 | sec_path = board_path + "/" + sec["slug"] 3008 | board = get_board_info( 3009 | sec_path, False, sec["slug"], board_path, proxies, cookies 3010 | ) 3011 | sec_uname, sec_bname = board_path.split("/") 3012 | fetch_imgs( 3013 | board, 3014 | sec_uname, 3015 | sec_bname, 3016 | sec["slug"], 3017 | False, 3018 | arg_board_timestamp, 3019 | arg_log_timestamp, 3020 | url_path, 3021 | arg_force, 3022 | arg_rescrape, 3023 | arg_img_only, 3024 | arg_v_only, 3025 | arg_dir, 3026 | arg_thread_max, 3027 | IMGS_SESSION, 3028 | IMG_SESSION, 3029 | V_SESSION, 3030 | PIN_SESSION, 3031 | proxies, 3032 | cookies, 3033 | arg_cut, 3034 | arg_el, 3035 | fs_f_max, 3036 | ) 3037 | 3038 | except KeyError: 3039 | return quit(traceback.format_exc()) 3040 | 3041 | end_time = int(time.time()) 3042 | try: 3043 | print("[i] Time Spent: " + str(timedelta(seconds=end_time - start_time))) 3044 | except OverflowError: 3045 | # after 999999999 days OR ~2,739,726 years, test case: str(timedelta(seconds= 86400000000000)) 3046 | print("Can you revive me please? Thanks.") 3047 | 3048 | 3049 | def run_direct_main(): 3050 | 3051 | arg_parser = argparse.ArgumentParser( 3052 | description="Download ALL board/section from " 3053 | + pinterest_logo 3054 | + "interest by username, username/boardname, username/boardname/section or link. Support image and video.\n\ 3055 | Filename compose of PinId_Title_Description_Date.Ext. PinId always there while the rest is optional.\n\ 3056 | If filename too long will endswith ... and you can check details in log-pinterest-downloader.log file." 3057 | ) 3058 | arg_parser.add_argument( 3059 | "path", 3060 | nargs="?", 3061 | help="Pinterest username, or username/boardname, or username/boardname/section, or relevant link( /pin/ may include created time ).", 3062 | ) 3063 | arg_parser.add_argument( 3064 | "-d", 3065 | "--dir", 3066 | dest="dir", 3067 | type=str, 3068 | default="images", 3069 | help='Specify folder path/name to store. Default is "images".', 3070 | ) 3071 | arg_parser.add_argument( 3072 | "-j", 3073 | "--job", 3074 | dest="thread_max", 3075 | type=int, 3076 | default=0, 3077 | help="Specify maximum threads when downloading images. Default is number of processors on the machine, multiplied by 5.", 3078 | ) 3079 | # Username or Boardname might longer than 255 bytes 3080 | # Username max is 100(not allow 3 bytes unicode) 3081 | # Section/Boardname(Title) max are 50(count as singe char(i.e. 3 bytes unicode same as 1 byte ASCII), not bytes) 3082 | # Board not possible create 4 bytes UTF-8 (become empty // or trim, lolr) 3083 | # Description is 500, source_url(link) is 2048 (but not able save even though no error) 3084 | # Pin Title is max 100 , which emoji count as 2 bytes per glyph (Chinese char still count as 1 per glyph) 3085 | # [UPDATE] now --cut is per glyph, not byte, which is most users expected 3086 | # , whereas bytes should detect by program (255/242/143) or raise by simply use -c to solve 3087 | arg_parser.add_argument( 3088 | "-c", 3089 | "--cut", 3090 | type=int, 3091 | default=-1, 3092 | help='Specify maximum length of "_TITLE_DESCRIPTION_DATE"(exclude ...) in filename.', 3093 | ) 3094 | # Disable since better become default (so no more calc full path for 259(-el is exclude \\?\ = 255), instead only single path 259): 3095 | # arg_parser.add_argument('-el', '--extended-length', dest='extended_len', type=int, default=-1, help='Specify Windows extended-length by prefix \\\\?\\ in output path. E.g. 339 work in my system.') 3096 | arg_parser.add_argument( 3097 | "-bt", 3098 | "--board-timestamp", 3099 | dest="board_timestamp", 3100 | action="store_true", 3101 | help="Suffix board directory name with unique timestamp.", 3102 | ) 3103 | arg_parser.add_argument( 3104 | "-lt", 3105 | "--log-timestamp", 3106 | dest="log_timestamp", 3107 | action="store_true", 3108 | help="Suffix log-pinterest-downloader.log filename with unique timestamp. Default filename is log-pinterest-downloader.log.\n\ 3109 | Note: Pin id without Title/Description/Link/Metadata/Created_at will not write to log.", 3110 | ) 3111 | arg_parser.add_argument( 3112 | "-co", 3113 | "--cookies", 3114 | help="Set the cookies file to be used to login into Pinterest. Useful for personal secret boards.", 3115 | ) 3116 | arg_parser.add_argument( 3117 | "-f", 3118 | "--force", 3119 | action="store_true", 3120 | help="Force re-download even if image already exist. Normally used with -rs", 3121 | ) 3122 | # Need reverse images order(previously is latest to oldest) to avoid abort this need re-download in-between missing images. 3123 | arg_parser.add_argument( 3124 | "-rs", 3125 | "--re-scrape", 3126 | dest="rescrape", 3127 | action="store_true", 3128 | help="Default is only fetch new images since latest(highest) Pin ID local image to speed up update process.\n\ 3129 | This option disable that behavior and re-scrape all, use it when you feel missing images somewhere or incomplete download.\n\ 3130 | This issue is because Pinterest only lists reordered as you see in the webpage which possible newer images reorder below local highest Pin ID image and missed unless fetch all pages.", 3131 | ) 3132 | arg_parser.add_argument( 3133 | "-ua", 3134 | "--update-all", 3135 | dest="update_all", 3136 | action="store_true", 3137 | help="Update all folders in current directory recursively based on theirs urls-pinterest-downloader.urls.\n\ 3138 | New section will not download. New board may download if previously download by username.\n\ 3139 | Options other than -c, -j, -rs, -io/vo, -ps/p will ignore.\n\ 3140 | -c must same if provided previously or else filename not same will re-download. Not recommend to use -c at all.", 3141 | ) 3142 | arg_parser.add_argument( 3143 | "-es", 3144 | "--exclude-section", 3145 | dest="exclude_section", 3146 | action="store_true", 3147 | help="Exclude sections if download from username or board.", 3148 | ) 3149 | arg_parser.add_argument( 3150 | "-io", 3151 | "--image-only", 3152 | dest="img_only", 3153 | action="store_true", 3154 | help="Download image only. Assumed -rs", 3155 | ) 3156 | arg_parser.add_argument( 3157 | "-vo", 3158 | "--video-only", 3159 | dest="v_only", 3160 | action="store_true", 3161 | help="Download video only. Assumed -rs", 3162 | ) 3163 | arg_parser.add_argument("-ps", "--https-proxy", help="Set proxy for https.") 3164 | arg_parser.add_argument("-p", "--http-proxy", help="Set proxy for http.") 3165 | try: 3166 | args, remaining = arg_parser.parse_known_args() 3167 | except SystemExit: # Normal if --help, catch here to avoid main() global ex catch it 3168 | return 3169 | if remaining: 3170 | return quit( 3171 | [ 3172 | "You type redundant options: " + " ".join(remaining), 3173 | "Please check your command or --help to see options manual.", 3174 | ] 3175 | ) 3176 | 3177 | if not args.update_all and not args.path: 3178 | args.path = input("Username/Boardname/Section or Link: ").strip() 3179 | 3180 | return run_library_main( 3181 | args.path, 3182 | args.dir, 3183 | args.thread_max, 3184 | args.cut, 3185 | args.board_timestamp, 3186 | args.log_timestamp, 3187 | args.force, 3188 | args.exclude_section, 3189 | args.rescrape, 3190 | args.img_only, 3191 | args.v_only, 3192 | args.update_all, 3193 | args.https_proxy, 3194 | args.http_proxy, 3195 | args.cookies, 3196 | ) 3197 | 3198 | 3199 | if __name__ == "__main__": 3200 | try: 3201 | run_direct_main() 3202 | except requests.exceptions.ReadTimeout: 3203 | cprint( 3204 | "".join( 3205 | [ 3206 | HIGHER_RED, 3207 | "{}".format( 3208 | "\n[" 3209 | + x_tag 3210 | + "] Suddenly not able to connect. Please check your network.\n" 3211 | ), 3212 | ] 3213 | ), 3214 | attrs=BOLD_ONLY, 3215 | end="", 3216 | ) 3217 | quit("") 3218 | except requests.exceptions.ConnectionError: 3219 | cprint( 3220 | "".join( 3221 | [ 3222 | HIGHER_RED, 3223 | "{}".format( 3224 | "\n[" 3225 | + x_tag 3226 | + "] Not able to connect. Please check your network.\n" 3227 | ), 3228 | ] 3229 | ), 3230 | attrs=BOLD_ONLY, 3231 | end="", 3232 | ) 3233 | quit("") 3234 | except: 3235 | quit(traceback.format_exc()) 3236 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | telethon==1.29.3 2 | telethon-tgcrypto 3 | hachoir 4 | pyquery 5 | urllib3 6 | requests 7 | pymongo 8 | dnspython 9 | aiohttp 10 | brotli 11 | colorama 12 | lxml 13 | requests 14 | termcolor 15 | pysocks 16 | fake-useragent 17 | termcolor 18 | 19 | 20 | 21 | -------------------------------------------------------------------------------- /runtime.txt: -------------------------------------------------------------------------------- 1 | python-3.10.4 2 | --------------------------------------------------------------------------------