├── .gitignore ├── Dockerfile ├── package.json ├── index.js └── scrapeLogic.js /.gitignore: -------------------------------------------------------------------------------- 1 | /node_modules 2 | .env -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM ghcr.io/puppeteer/puppeteer:19.7.2 2 | 3 | ENV PUPPETEER_SKIP_CHROMIUM_DOWNLOAD=true \ 4 | PUPPETEER_EXECUTABLE_PATH=/usr/bin/google-chrome-stable 5 | 6 | WORKDIR /usr/src/app 7 | 8 | COPY package*.json ./ 9 | RUN npm ci 10 | COPY . . 11 | CMD [ "node", "index.js" ] 12 | 13 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "puppeteer-render", 3 | "version": "1.0.0", 4 | "description": "", 5 | "main": "index.js", 6 | "scripts": { 7 | "test": "echo \"Error: no test specified\" && exit 1", 8 | "start": "node index" 9 | }, 10 | "keywords": [], 11 | "author": "", 12 | "license": "ISC", 13 | "dependencies": { 14 | "dotenv": "^16.0.3", 15 | "express": "^4.18.2", 16 | "puppeteer": "^19.7.2" 17 | } 18 | } 19 | -------------------------------------------------------------------------------- /index.js: -------------------------------------------------------------------------------- 1 | const express = require("express"); 2 | const { scrapeLogic } = require("./scrapeLogic"); 3 | const app = express(); 4 | 5 | const PORT = process.env.PORT || 4000; 6 | 7 | app.get("/scrape", (req, res) => { 8 | scrapeLogic(res); 9 | }); 10 | 11 | app.get("/", (req, res) => { 12 | res.send("Render Puppeteer server is up and running!"); 13 | }); 14 | 15 | app.listen(PORT, () => { 16 | console.log(`Listening on port ${PORT}`); 17 | }); 18 | -------------------------------------------------------------------------------- /scrapeLogic.js: -------------------------------------------------------------------------------- 1 | const puppeteer = require("puppeteer"); 2 | require("dotenv").config(); 3 | 4 | const scrapeLogic = async (res) => { 5 | const browser = await puppeteer.launch({ 6 | args: [ 7 | "--disable-setuid-sandbox", 8 | "--no-sandbox", 9 | "--single-process", 10 | "--no-zygote", 11 | ], 12 | executablePath: 13 | process.env.NODE_ENV === "production" 14 | ? process.env.PUPPETEER_EXECUTABLE_PATH 15 | : puppeteer.executablePath(), 16 | }); 17 | try { 18 | const page = await browser.newPage(); 19 | 20 | await page.goto("https://developer.chrome.com/"); 21 | 22 | // Set screen size 23 | await page.setViewport({ width: 1080, height: 1024 }); 24 | 25 | // Type into search box 26 | await page.type(".search-box__input", "automate beyond recorder"); 27 | 28 | // Wait and click on first result 29 | const searchResultSelector = ".search-box__link"; 30 | await page.waitForSelector(searchResultSelector); 31 | await page.click(searchResultSelector); 32 | 33 | // Locate the full title with a unique string 34 | const textSelector = await page.waitForSelector( 35 | "text/Customize and automate" 36 | ); 37 | const fullTitle = await textSelector.evaluate((el) => el.textContent); 38 | 39 | // Print the full title 40 | const logStatement = `The title of this blog post is ${fullTitle}`; 41 | console.log(logStatement); 42 | res.send(logStatement); 43 | } catch (e) { 44 | console.error(e); 45 | res.send(`Something went wrong while running Puppeteer: ${e}`); 46 | } finally { 47 | await browser.close(); 48 | } 49 | }; 50 | 51 | module.exports = { scrapeLogic }; 52 | --------------------------------------------------------------------------------