├── .gitignore ├── package.json └── index.js /.gitignore: -------------------------------------------------------------------------------- 1 | node_modules -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "first-puppeteer-scraper-example", 3 | "version": "1.0.0", 4 | "main": "index.js", 5 | "scripts": { 6 | "test": "echo \"Error: no test specified\" && exit 1" 7 | }, 8 | "keywords": [], 9 | "author": "", 10 | "license": "ISC", 11 | "description": "", 12 | "types": "module", 13 | "type":"module", 14 | "dependencies": { 15 | "puppeteer": "^24.2.1" 16 | } 17 | } 18 | -------------------------------------------------------------------------------- /index.js: -------------------------------------------------------------------------------- 1 | import puppeteer from "puppeteer"; 2 | 3 | async function getQuotes() { 4 | const browser = await puppeteer.launch({ 5 | headless: true, 6 | args: ['--no-sandbox', '--disable-extensions'], // Disable extensions 7 | }); 8 | const page = await browser.newPage(); 9 | 10 | await page.goto('http://quotes.toscrape.com/', { waitUntil: 'domcontentloaded' }); 11 | 12 | // Scrape the data 13 | const quotes = await page.evaluate(() => { 14 | const quoteElements = document.querySelectorAll('.quote'); 15 | const quoteList = []; 16 | quoteElements.forEach((quote) => { 17 | const text = quote.querySelector('.text') ? quote.querySelector('.text').innerText : null; 18 | const author = quote.querySelector('.author') ? quote.querySelector('.author').innerText : null; 19 | const tags = quote.querySelector('.tags') ? Array.from(quote.querySelectorAll('.tag')).map(tag => tag.innerText) : []; 20 | if (text && author) { 21 | quoteList.push({ text, author, tags }); 22 | } 23 | }); 24 | return quoteList; 25 | }); 26 | 27 | console.log(quotes); 28 | 29 | await browser.close(); 30 | } 31 | 32 | getQuotes(); --------------------------------------------------------------------------------