├── csv converter.png ├── .gitignore ├── package.json ├── README.md └── index.js /csv converter.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Poudlardo/Allocine2Letterboxd/HEAD/csv converter.png -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | node_modules/ 2 | allocine-films-critiques.csv 3 | allocine-films-a-voir.csv 4 | allocine-films.csv 5 | .vscode -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "allocine-to-letterboxd", 3 | "version": "1.0.0", 4 | "description": "tool to export lists from allocine to letterboxd app", 5 | "main": "index.js", 6 | "scripts": { 7 | "test": "echo \"Error: no test specified\" && exit 1" 8 | }, 9 | "author": "", 10 | "license": "ISC", 11 | "dependencies": { 12 | "csv-writer": "^1.6.0", 13 | "fast-csv": "^4.3.6", 14 | "puppeteer": "^24.1.0", 15 | "readline-sync": "^1.4.10" 16 | }, 17 | "type": "module", 18 | "devDependencies": { 19 | "nodemon": "^3.0.1" 20 | }, 21 | "keywords": [] 22 | } 23 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Allocine2Letterboxd 2 | 3 | Script qui permet d'importer sur Letterboxd sa liste de films vus, notés et critiqués sur Allociné, ainsi que sa liste des films à voir (watchlist). Tous les films, leur notes, et leurs éventuelles critiques écrites seront sauvegardés dans un fichier .csv à importer sur Letterboxd 4 | 5 |

6 | 7 |

8 | 9 | ## Utilisation du script 10 | 11 | 1. Installe les programmes ci-dessous 12 | 13 | - [Git](https://git-scm.com/downloads) 14 | - [Node.js](https://nodejs.org/en/download/) 15 | 16 | 2. Ouvre un terminal et copie cette ligne de commande à l'endroit où tu souhaites télécharger le dossier du script 17 | ``` 18 | git clone https://github.com/Poudlardo/Allocine2Letterboxd.git 19 | cd Allocine2Letterboxd/ 20 | npm install 21 | node index.js 22 | ``` 23 | 24 | 3. Rends-toi sur ton profil [Allocine](https://mon.allocine.fr/mes-films/envie-de-voir/) > Boutton 'Partager' > Copie le lien (de type : [https://www.allocine.fr/membre-Z20220820103049710645480/films/](https://www.allocine.fr/membre-Z20220820103049710645480/films/)) 25 | 26 | 4. Entre le lien sur le terminal une fois la question posée, et attends la fin du script. 27 | 28 | 5. Après quelques secondes, le fichier `films-vus.csv` (ou `films-à-voir.csv` pour la liste de film à voir) est généré dans le dossier /Allocine2Letterboxd. Rends-toi sur la [page d'import Letterboxd](https://letterboxd.com/import/) des films vus, ou la [page d'import des films à voir](https://letterboxd.com/watchlist/), pour charger le fichier sur son profil. 29 | 30 | ## Tu rencontres un problème ? 31 | 32 | [Ouvre un ticket](https://github.com/Poudlardo/Allocine2Letterboxd/issues/new/choose) et décris ton problème en quelques lignes avec une capture d'écran. 33 | -------------------------------------------------------------------------------- /index.js: -------------------------------------------------------------------------------- 1 | import puppeteer from 'puppeteer'; 2 | import { createObjectCsvWriter } from 'csv-writer'; 3 | import readlineSync from 'readline-sync'; 4 | import { execSync } from 'child_process'; 5 | import fs from 'fs'; 6 | import path from 'path'; 7 | import os from 'os'; 8 | 9 | const SELECTORS = { 10 | filmItem: '.card.entity-card-simple.userprofile-entity-card-simple', 11 | filmTitle: '.meta-title.meta-title-link', 12 | filmRating: '.rating-mdl', 13 | tabReview: 'span.roller-item[title="Critiques"]', 14 | filmReviewBlock: '.review-card', 15 | filmReview: '.content-txt.review-card-content', 16 | filmReviewLirePlus: '.blue-link.link-more', 17 | filmTitleOnReview: 'a.xXx', 18 | nextPage: '.button.button-md.button-primary-full.button-right', 19 | nextPageAlt: 'button[title="Page suivante"]', 20 | pagination: '.pagination-item-holder', 21 | popupAcceptCookies: '.jad_cmp_paywall_button' 22 | }; 23 | 24 | // Helper function to wait for a specific duration 25 | const delay = (ms) => new Promise(resolve => setTimeout(resolve, ms)); 26 | 27 | // Get Puppeteer cache directory in an OS-agnostic way 28 | function getPuppeteerCacheDir() { 29 | const platform = os.platform(); 30 | const homeDir = os.homedir(); 31 | 32 | if (platform === 'win32') { 33 | const localAppData = process.env.LOCALAPPDATA || path.join(homeDir, 'AppData', 'Local'); 34 | return path.join(localAppData, 'puppeteer'); 35 | } else if (platform === 'darwin') { 36 | return path.join(homeDir, 'Library', 'Caches', 'puppeteer'); 37 | } else { 38 | return path.join(homeDir, '.cache', 'puppeteer'); 39 | } 40 | } 41 | 42 | // Function to check if browser is installed in Puppeteer cache 43 | function isBrowserInstalled(browserName) { 44 | const cacheDir = getPuppeteerCacheDir(); 45 | 46 | if (!fs.existsSync(cacheDir)) { 47 | return false; 48 | } 49 | 50 | try { 51 | const browserFolders = fs.readdirSync(cacheDir); 52 | return browserFolders.some(folder => 53 | folder.toLowerCase().includes(browserName.toLowerCase()) 54 | ); 55 | } catch (error) { 56 | console.log(`⚠️ Could not read cache directory: ${error.message}`); 57 | return false; 58 | } 59 | } 60 | 61 | // Function to install browser 62 | async function installBrowser(browserName) { 63 | console.log(`📦 Installing ${browserName} for Puppeteer...`); 64 | try { 65 | const command = `npx puppeteer browsers install ${browserName}`; 66 | const options = { 67 | stdio: 'inherit', 68 | shell: true, 69 | env: { ...process.env, PUPPETEER_SKIP_DOWNLOAD: 'false' } 70 | }; 71 | 72 | execSync(command, options); 73 | console.log(`✅ ${browserName} installed successfully!`); 74 | return true; 75 | } catch (error) { 76 | console.error(`❌ Failed to install ${browserName}:`, error.message); 77 | return false; 78 | } 79 | } 80 | 81 | // Function to launch browser with automatic installation 82 | async function launchBrowser() { 83 | const commonArgs = [ 84 | '--no-sandbox', 85 | '--disable-setuid-sandbox', 86 | '--disable-dev-shm-usage', 87 | '--disable-gpu', 88 | ]; 89 | 90 | console.log('🦊 Checking Firefox availability...'); 91 | 92 | if (!isBrowserInstalled('firefox')) { 93 | console.log('📥 Firefox not found in cache, installing...'); 94 | const firefoxInstalled = await installBrowser('firefox'); 95 | 96 | if (firefoxInstalled) { 97 | try { 98 | const browser = await puppeteer.launch({ 99 | browser: 'firefox', 100 | headless: true, 101 | args: commonArgs 102 | }); 103 | console.log('✅ Firefox launched successfully'); 104 | return browser; 105 | } catch (error) { 106 | console.log('⚠️ Firefox launch failed:', error.message); 107 | } 108 | } 109 | } else { 110 | try { 111 | const browser = await puppeteer.launch({ 112 | browser: 'firefox', 113 | headless: true, 114 | args: commonArgs 115 | }); 116 | console.log('✅ Firefox launched successfully'); 117 | return browser; 118 | } catch (error) { 119 | console.log('⚠️ Firefox launch failed:', error.message); 120 | } 121 | } 122 | 123 | console.log('🔵 Switching to Chrome...'); 124 | 125 | if (!isBrowserInstalled('chrome')) { 126 | console.log('📥 Chrome not found in cache, installing...'); 127 | const chromeInstalled = await installBrowser('chrome'); 128 | 129 | if (!chromeInstalled) { 130 | throw new Error('Failed to install any browser. Please check your internet connection and permissions.'); 131 | } 132 | } 133 | 134 | try { 135 | const browser = await puppeteer.launch({ 136 | headless: 'new', 137 | args: commonArgs 138 | }); 139 | console.log('✅ Chrome launched successfully'); 140 | return browser; 141 | } catch (error) { 142 | try { 143 | const browser = await puppeteer.launch({ 144 | headless: true, 145 | args: ['--no-sandbox'] 146 | }); 147 | console.log('✅ Chrome launched successfully (minimal args)'); 148 | return browser; 149 | } catch (fallbackError) { 150 | throw new Error(`Failed to launch Chrome: ${error.message}`); 151 | } 152 | } 153 | } 154 | 155 | async function gotoTabCritiques(page, url) { 156 | const reviewUrl = url.replace(/\/films\/?$/, '/critiques/films/'); 157 | await page.goto(reviewUrl, { 158 | waitUntil: 'domcontentloaded', 159 | timeout: 15000 160 | }); 161 | if (await page.$(SELECTORS.popupAcceptCookies)) { 162 | await page.click(SELECTORS.popupAcceptCookies); 163 | await delay(600); 164 | } 165 | await page.waitForSelector(SELECTORS.filmReviewBlock, { timeout: 8000 }).catch(() => { 166 | console.log(` ℹ️ Aucune critique trouvée pour ce profil`); 167 | }); 168 | } 169 | 170 | async function scrapeAllFilms(page, profileUrl) { 171 | let films = []; 172 | let url = profileUrl; 173 | const visitedUrls = new Set(); 174 | let pageNumber = 1; 175 | let consecutiveErrors = 0; 176 | 177 | while (true) { 178 | if (visitedUrls.has(url)) break; 179 | visitedUrls.add(url); 180 | 181 | console.log(`📄 Scraping films page ${pageNumber}: ${url}`); 182 | 183 | try { 184 | await page.goto(url, { 185 | waitUntil: 'domcontentloaded', 186 | timeout: 15000 187 | }); 188 | 189 | await delay(1000); 190 | 191 | try { 192 | await page.waitForSelector(SELECTORS.filmItem, { timeout: 5000 }); 193 | } catch (e) { 194 | console.log(` ⚠️ Sélecteur principal non trouvé, recherche d'alternatives...`); 195 | 196 | try { 197 | await page.waitForSelector('.card', { timeout: 3000 }); 198 | } catch (e2) { 199 | console.log(` ⚠️ Aucun film trouvé sur cette page`); 200 | break; 201 | } 202 | } 203 | 204 | if (await page.$(SELECTORS.popupAcceptCookies)) { 205 | await page.click(SELECTORS.popupAcceptCookies); 206 | await delay(1000); 207 | } 208 | 209 | await delay(1500); 210 | 211 | consecutiveErrors = 0; 212 | } catch (error) { 213 | console.log(` ⚠️ Erreur de navigation: ${error.message}`); 214 | consecutiveErrors++; 215 | 216 | if (consecutiveErrors >= 2) { 217 | console.log(` ❌ Trop d'erreurs consécutives, arrêt du scraping des films`); 218 | break; 219 | } 220 | 221 | pageNumber++; 222 | url = `${profileUrl}?page=${pageNumber}`; 223 | continue; 224 | } 225 | 226 | // Récupérer les films de la page - SANS $$eval 227 | let pageFilms = []; 228 | try { 229 | pageFilms = await page.evaluate((selector) => { 230 | const elements = document.querySelectorAll(selector); 231 | const filmList = []; 232 | 233 | for (let el of elements) { 234 | try { 235 | const titleEl = el.querySelector('.meta-title.meta-title-link'); 236 | const title = titleEl?.title?.trim() || titleEl?.textContent?.trim() || ""; 237 | 238 | let rating = ""; 239 | const ratingEl = el.querySelector('.rating-mdl'); 240 | if (ratingEl) { 241 | const match = ratingEl.className.match(/n(\d{2})/); 242 | if (match) { 243 | rating = `${match[1][0]}.${match[1][1]}`; 244 | } 245 | } 246 | 247 | if (title) { 248 | filmList.push({ title, rating }); 249 | } 250 | } catch (err) { 251 | console.error('Erreur extraction film:', err); 252 | } 253 | } 254 | 255 | return filmList; 256 | }, SELECTORS.filmItem); 257 | 258 | if (pageFilms.length === 0) { 259 | console.log(` 🔄 Essai avec sélecteur alternatif...`); 260 | pageFilms = await page.evaluate(() => { 261 | const elements = document.querySelectorAll('.card'); 262 | const filmList = []; 263 | 264 | for (let el of elements) { 265 | try { 266 | const titleEl = el.querySelector('.meta-title-link') || 267 | el.querySelector('[class*="title"]'); 268 | const title = titleEl?.title?.trim() || 269 | titleEl?.textContent?.trim() || ""; 270 | 271 | let rating = ""; 272 | const ratingEl = el.querySelector('.rating-mdl') || 273 | el.querySelector('[class*="rating"]'); 274 | if (ratingEl) { 275 | const match = ratingEl.className.match(/n(\d{2})/); 276 | if (match) { 277 | rating = `${match[1][0]}.${match[1][1]}`; 278 | } 279 | } 280 | 281 | if (title) { 282 | filmList.push({ title, rating }); 283 | } 284 | } catch (err) { 285 | console.error('Erreur extraction film alt:', err); 286 | } 287 | } 288 | 289 | return filmList; 290 | }); 291 | } 292 | 293 | } catch (error) { 294 | console.log(` ❌ Erreur lors de l'extraction: ${error.message}`); 295 | } 296 | 297 | films = films.concat(pageFilms); 298 | console.log(` ✓ ${pageFilms.length} films trouvés sur cette page (total: ${films.length})`); 299 | 300 | console.log(` 🔍 Recherche du bouton page suivante...`); 301 | 302 | let nextPage = await page.$(SELECTORS.nextPage); 303 | if (!nextPage) { 304 | nextPage = await page.$(SELECTORS.nextPageAlt); 305 | } 306 | if (!nextPage) { 307 | nextPage = await page.$('a[href*="?page="]'); 308 | } 309 | 310 | if (!nextPage) { 311 | console.log(` 📄 Pas de bouton page suivante trouvé - fin de la pagination`); 312 | break; 313 | } 314 | 315 | const nextHref = await page.evaluate(el => { 316 | return el.getAttribute('href') || el.href; 317 | }, nextPage).catch(() => null); 318 | 319 | if (!nextHref) { 320 | console.log(` 📄 Plus de pages suivantes - fin de la pagination`); 321 | break; 322 | } 323 | 324 | let finalUrl = nextHref; 325 | if (!nextHref.startsWith('http')) { 326 | const baseUrl = new URL(page.url()); 327 | finalUrl = new URL(nextHref, baseUrl.origin).href; 328 | } 329 | 330 | if (visitedUrls.has(finalUrl)) { 331 | console.log(` 📄 Page déjà visitée - fin de la pagination`); 332 | break; 333 | } 334 | 335 | url = finalUrl; 336 | pageNumber++; 337 | } 338 | 339 | console.log(`🎬 Total: ${films.length} films extraits.`); 340 | return films; 341 | } 342 | 343 | async function scrapeAllReviews(page, profileUrl) { 344 | const reviews = []; 345 | await gotoTabCritiques(page, profileUrl); 346 | 347 | let currentUrl = page.url(); 348 | const visitedUrls = new Set(); 349 | let pageNum = 1; 350 | 351 | while (true) { 352 | if (visitedUrls.has(currentUrl)) { 353 | console.log(`⚠️ URL déjà visitée, arrêt: ${currentUrl}`); 354 | break; 355 | } 356 | visitedUrls.add(currentUrl); 357 | 358 | console.log(`📝 Scraping critiques page ${pageNum}: ${currentUrl}`); 359 | 360 | try { 361 | await page.waitForSelector(SELECTORS.filmReviewBlock, { timeout: 5000 }); 362 | } catch (e) { 363 | console.log(`⚠️ Aucune critique trouvée sur la page ${pageNum}`); 364 | break; 365 | } 366 | 367 | // Récupérer les critiques SANS $$eval 368 | let pageReviews = []; 369 | try { 370 | pageReviews = await page.evaluate((selector) => { 371 | const blocks = document.querySelectorAll(selector); 372 | const reviews = []; 373 | 374 | for (let block of blocks) { 375 | let filmTitle = ""; 376 | let reviewText = ""; 377 | let hasLirePlus = false; 378 | let moreUrl = ""; 379 | 380 | try { 381 | const titleEl = block.querySelector('.review-card-title a.xXx'); 382 | filmTitle = titleEl ? titleEl.textContent.trim() : ''; 383 | } catch (e) { 384 | filmTitle = ''; 385 | } 386 | 387 | try { 388 | const reviewEl = block.querySelector('.content-txt.review-card-content'); 389 | reviewText = reviewEl ? reviewEl.textContent.trim() : ''; 390 | } catch (e) { 391 | reviewText = ''; 392 | } 393 | 394 | try { 395 | const lirePlusEl = block.querySelector('.blue-link.link-more'); 396 | if (lirePlusEl) { 397 | hasLirePlus = true; 398 | moreUrl = lirePlusEl.href; 399 | } 400 | } catch (e) { 401 | hasLirePlus = false; 402 | } 403 | 404 | reviews.push({ 405 | filmTitle, 406 | reviewText, 407 | hasLirePlus, 408 | moreUrl 409 | }); 410 | } 411 | 412 | return reviews; 413 | }, SELECTORS.filmReviewBlock); 414 | } catch (error) { 415 | console.log(` ⚠️ Erreur lors de l'extraction des critiques: ${error.message}`); 416 | } 417 | 418 | if (pageReviews.length === 0) { 419 | console.log(`⚠️ Aucune critique trouvée sur la page ${pageNum}`); 420 | break; 421 | } 422 | 423 | console.log(` ✓ ${pageReviews.length} critiques trouvées sur cette page`); 424 | 425 | for (let [idx, reviewData] of pageReviews.entries()) { 426 | let { filmTitle, reviewText, hasLirePlus, moreUrl } = reviewData; 427 | 428 | console.log(` 🎬 ${filmTitle}`); 429 | 430 | if (hasLirePlus && moreUrl) { 431 | try { 432 | const originalUrl = page.url(); 433 | await page.goto(moreUrl, { waitUntil: 'domcontentloaded' }); 434 | await page.waitForSelector(SELECTORS.filmReview, { timeout: 2500 }).catch(() => {}); 435 | reviewText = await page.$eval(SELECTORS.filmReview, el => el.textContent.trim()).catch(() => reviewText); 436 | await page.goto(originalUrl, { waitUntil: 'domcontentloaded' }); 437 | await page.waitForSelector(SELECTORS.filmReviewBlock, { timeout: 3000 }).catch(() => {}); 438 | } catch (e) { 439 | console.log(` ⚠️ Erreur sur 'lire plus', texte tronqué conservé: ${e.message}`); 440 | } 441 | } 442 | 443 | reviews.push({ 444 | title: filmTitle, 445 | review: reviewText.replace(/\n/g, "").replace(/\s+/g, " ").trim() 446 | }); 447 | } 448 | 449 | const nextPageButton = await page.$(SELECTORS.nextPage); 450 | if (!nextPageButton) { 451 | console.log(`📄 Pas de page suivante trouvée, fin du scraping`); 452 | break; 453 | } 454 | 455 | const isClickable = await page.evaluate(button => { 456 | return button && !button.disabled && button.offsetParent !== null; 457 | }, nextPageButton); 458 | 459 | if (!isClickable) { 460 | console.log(`📄 Bouton page suivante non cliquable, fin du scraping`); 461 | break; 462 | } 463 | 464 | try { 465 | await nextPageButton.click(); 466 | await delay(2000); 467 | 468 | const newUrl = page.url(); 469 | if (newUrl === currentUrl) { 470 | console.log(`📄 URL inchangée après clic, fin du scraping`); 471 | break; 472 | } 473 | 474 | currentUrl = newUrl; 475 | pageNum++; 476 | 477 | } catch (e) { 478 | console.log(`❌ Erreur lors du clic sur page suivante: ${e.message}`); 479 | break; 480 | } 481 | } 482 | 483 | return reviews; 484 | } 485 | 486 | async function scrapeWishlist(page, profileUrl) { 487 | let url = profileUrl.replace(/\/films\/?$/, "/films/envie-de-voir/"); 488 | let wishlistFilms = []; 489 | const visitedUrls = new Set(); 490 | 491 | while (true) { 492 | if (visitedUrls.has(url)) break; 493 | visitedUrls.add(url); 494 | 495 | console.log(`📋 Scraping wishlist page: ${url}`); 496 | await page.goto(url, { waitUntil: 'domcontentloaded' }); 497 | if (await page.$(SELECTORS.popupAcceptCookies)) { 498 | await page.click(SELECTORS.popupAcceptCookies); 499 | await delay(600); 500 | } 501 | 502 | // Récupérer les films SANS $$eval 503 | const films = await page.evaluate((selector) => { 504 | const elements = document.querySelectorAll(selector); 505 | const filmList = []; 506 | 507 | for (let el of elements) { 508 | const titleEl = el.querySelector('.meta-title.meta-title-link'); 509 | const title = titleEl?.title?.trim() || ""; 510 | 511 | if (title) { 512 | filmList.push({ Title: title }); 513 | } 514 | } 515 | 516 | return filmList; 517 | }, SELECTORS.filmItem); 518 | 519 | wishlistFilms = wishlistFilms.concat(films); 520 | console.log(` ✓ ${films.length} films wishlist trouvés`); 521 | 522 | const nextPage = await page.$(SELECTORS.nextPage); 523 | if (!nextPage) break; 524 | const nextHref = await page.evaluate(el => el.getAttribute('href') || el.href, nextPage); 525 | 526 | let finalUrl = nextHref; 527 | if (nextHref && !nextHref.startsWith('http')) { 528 | const baseUrl = new URL(page.url()); 529 | finalUrl = new URL(nextHref, baseUrl.origin).href; 530 | } 531 | 532 | if (!finalUrl || !finalUrl.startsWith('http') || visitedUrls.has(finalUrl)) break; 533 | url = finalUrl; 534 | } 535 | return wishlistFilms; 536 | } 537 | 538 | function mergeFilmsAndReviews(films, reviews) { 539 | const revmap = Object.fromEntries(reviews.map(r => [r.title.normalize('NFD').replace(/\p{Diacritic}/gu,"").toLowerCase(), r.review])); 540 | return films.map(f => { 541 | let baseTitle = f.title.normalize('NFD').replace(/\p{Diacritic}/gu,"").toLowerCase(); 542 | return { 543 | Title: f.title, 544 | Rating: f.rating, 545 | Review: revmap[baseTitle] ?? "" 546 | }; 547 | }); 548 | } 549 | 550 | async function exportToCsv(filename, headers, data) { 551 | const csvWriter = createObjectCsvWriter({ 552 | path: filename, 553 | header: headers.map(h => ({ id: h, title: h })), 554 | alwaysQuote: true 555 | }); 556 | await csvWriter.writeRecords(data); 557 | } 558 | 559 | function isValidAllocineProfileUrl(url) { 560 | return /^https:\/\/www\.allocine\.fr\/membre-\w+\/films\/?$/i.test(url); 561 | } 562 | 563 | function displayPlatformInfo() { 564 | const platform = os.platform(); 565 | const arch = os.arch(); 566 | const nodeVersion = process.version; 567 | 568 | console.log(`📊 Platform: ${platform} (${arch})`); 569 | console.log(`📦 Node.js: ${nodeVersion}`); 570 | console.log(`📁 Cache directory: ${getPuppeteerCacheDir()}`); 571 | } 572 | 573 | (async () => { 574 | console.log('🎬 Allocine2Letterboxd Scraper'); 575 | console.log('============================='); 576 | displayPlatformInfo(); 577 | console.log('=============================\n'); 578 | 579 | const url = readlineSync.question('Copie-colle ici le lien de ton profil Allociné (format : https://www.allocine.fr/membre-.../films/) :\n> '); 580 | if (!isValidAllocineProfileUrl(url)) { 581 | console.error('❌ Lien Allociné invalide !'); 582 | process.exit(1); 583 | } 584 | 585 | console.log('⏳ Scraping en cours, merci de patienter...'); 586 | 587 | let browser; 588 | try { 589 | browser = await launchBrowser(); 590 | } catch (error) { 591 | console.error('❌ Impossible de lancer un navigateur:', error.message); 592 | console.error('💡 Suggestion: Essayez d\'installer manuellement avec:'); 593 | console.error(' npx puppeteer browsers install chrome'); 594 | process.exit(1); 595 | } 596 | 597 | const page = await browser.newPage(); 598 | 599 | try { 600 | console.log('\n📖 === SCRAPING DES FILMS ==='); 601 | const films = await scrapeAllFilms(page, url); 602 | console.log(`\n✅ Scraping des films terminé.`); 603 | 604 | console.log('\n📝 === SCRAPING DES CRITIQUES ==='); 605 | let reviews = []; 606 | try { 607 | reviews = await scrapeAllReviews(page, url); 608 | if (reviews.length) { 609 | console.log(`📝 ${reviews.length} critiques extraites au total.`); 610 | } else { 611 | console.log("⚠️ Aucune critique trouvée sur ce profil."); 612 | } 613 | } catch (error) { 614 | console.log(`⚠️ Erreur lors du scraping des critiques: ${error.message}`); 615 | console.log(" Continuation avec les films uniquement..."); 616 | } 617 | 618 | console.log('\n📋 === SCRAPING DE LA WISHLIST ==='); 619 | let wishlistFilms = []; 620 | try { 621 | wishlistFilms = await scrapeWishlist(page, url); 622 | if (wishlistFilms.length) { 623 | console.log(`📋 ${wishlistFilms.length} films "à voir" extraits.`); 624 | await exportToCsv('allocine-films-a-voir.csv', ['Title'], wishlistFilms); 625 | console.log('✅ Export wishlist : allocine-films-a-voir.csv'); 626 | } else { 627 | console.log('⚠️ Aucun film dans la wishlist.'); 628 | } 629 | } catch (error) { 630 | console.log(`⚠️ Erreur lors du scraping de la wishlist: ${error.message}`); 631 | } 632 | 633 | console.log('\n💾 === EXPORT DES DONNÉES ==='); 634 | if (reviews.length) { 635 | const entries = mergeFilmsAndReviews(films, reviews); 636 | await exportToCsv('allocine-films-critiques.csv', ['Title', 'Rating', 'Review'], entries); 637 | console.log('✅ Export films+critiques : allocine-films-critiques.csv'); 638 | } else { 639 | const entries = films.map(x => ({ Title: x.title, Rating: x.rating })); 640 | await exportToCsv('allocine-films.csv', ['Title', 'Rating'], entries); 641 | console.log('✅ Export films seuls : allocine-films.csv'); 642 | } 643 | } catch (error) { 644 | console.error('❌ Erreur lors du scraping:', error); 645 | } finally { 646 | await browser.close(); 647 | console.log('🎉 Fini !'); 648 | } 649 | })(); --------------------------------------------------------------------------------