├── .gitignore ├── LICENSE.md ├── README.md ├── api ├── dom-simple.js ├── dom.js ├── image.js ├── imageBrowser.js ├── meta.js ├── page.js └── screenshot.ts ├── config └── config.js ├── lib └── helpers.js ├── package.json ├── pnpm-lock.yaml ├── public └── index.html ├── test ├── app.js └── server.js └── vercel.json /.gitignore: -------------------------------------------------------------------------------- 1 | node_modules/ 2 | 3 | cli/input.txt 4 | cli/output.tsv 5 | .vercel 6 | -------------------------------------------------------------------------------- /LICENSE.md: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2017 Weld Your Own App AB 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Scraping Service (serverless) 2 | 3 | **Scraping Service** is a REST API for scraping dynamic websites using Node.js, Puppeteer and Cheerio. It works in serverless environments such as Vercel. 4 | 5 | ---------- 6 | 7 | Made by the team at **Weld** ([www.weldyourownapp.com](https://www.weldyourownapp.com?utm_source=github-scraping-service)), the #codefree web/app creation tool: 8 | 9 | [![Weld](https://s3-eu-west-1.amazonaws.com/weld-social-and-blog/gif/weld_explained.gif?v2)](https://www.weldyourownapp.com?utm_source=github-scraping-service) 10 | 11 | 12 | ## How to Run 13 | 14 | Start Scraping Service in development mode: 15 | 16 | API=dom yarn dev 17 | # you can replace `dom` with: dom-simple (just fetch, no Chromium), image, meta, page 18 | 19 | or in production mode: 20 | 21 | yarn start 22 | 23 | Server will default to **http://localhost:3036** 24 | 25 | 26 | ## Environment variables 27 | 28 | * `API`: dom-simple/dom/image/meta/page – for testing only. See /app/controllers/api folder 29 | * `MAX_BROWSER_THREADS`: default 3 Puppeteer browsers 30 | * `RENDER_TIMEOUT`: default 20000 millisecs 31 | * `PORT`: server port 32 | * `NODE_ENV`: Node.js environment 33 | 34 | 35 | ## How to Test 36 | 37 | yarn test 38 | 39 | 40 | ## How to Use 41 | 42 | ### Scrape DOM 43 | 44 | Do a HTTP GET: 45 | 46 | http://localhost:3036/api/dom?url=https://news.ycombinator.com&selector=.title+a 47 | 48 | or simple with just Fetch: 49 | 50 | http://localhost:3036/api/dom-simple?url=https://news.ycombinator.com&selector=.title+a 51 | 52 | Results: 53 | 54 | { 55 | "time": 792, 56 | "results": [ 57 | { 58 | "selector": ".title a", 59 | "count": 61, 60 | "items": [ 61 | "Ask a Female Engineer: Thoughts on the Google Memo", 62 | (more items...) 63 | ] 64 | } 65 | ] 66 | } 67 | 68 | Parameters: 69 | 70 | * `url` (required) 71 | * `selector` is a JQuery style selector, defaults to `body`. You can use multiple selectors separated by comma, which leads to more items in the `results` array. Use `$` instead of `#` for element ID selectors. 72 | * `time` e.g. `time=2000` adds extra loading time before accessing DOM. Use `time=networkidle0` to wait until network requests are idle. 73 | * `deep` set to `true` to get recursive object trees, not just first-level text contents. 74 | * `complete` set to `true` to get complete HTML tags, not just text contents. 75 | * `useIndex` set to `true` to use element index instead of class/id. 76 | 77 | ### Scrape page content 78 | 79 | http://localhost:3036/api/page?url=https://www.weldyourownapp.com 80 | 81 | Results: 82 | 83 | { 84 | "url": "http://www.tomsoderlund.com", 85 | "length": 13560, 86 | "content": "..." 87 | } 88 | 89 | Parameters: 90 | 91 | * `url` (required) 92 | * `time` e.g. `&time=2000` adds extra loading time before accessing page content. Default is 100. 93 | * `bodyOnly=true` skips the head of the page 94 | 95 | ### Scrape metadata 96 | 97 | http://localhost:3036/api/meta?url=https://www.weldyourownapp.com 98 | 99 | Results: 100 | 101 | { 102 | "url":"https://www.weldyourownapp.com", 103 | "general":{ 104 | "appleTouchIcons":[ 105 | { 106 | "href":"/images/apple-touch-icon.png" 107 | } 108 | ], 109 | "icons":[ 110 | { 111 | "href":"/images/apple-touch-icon.png" 112 | } 113 | ], 114 | "canonical":"http://www.weldyourownapp.com/", 115 | "description":"Create visual, animated, interactive content on your existing web/e-commerce platform.", 116 | "title":"Weld - The Visual CMS" 117 | }, 118 | "openGraph":{ 119 | "site_name":"Weld - The Visual CMS", 120 | "title":"Weld - The Visual CMS", 121 | "description":"Create visual, animated, interactive content on your existing web/e-commerce platform.", 122 | "locale":"en_US", 123 | "url":"http://www.weldyourownapp.com/", 124 | "image":{ 125 | "url":"https://s3-eu-west-1.amazonaws.com/weld-design-kit/weld-logo-square.png" 126 | } 127 | }, 128 | "twitter":{ 129 | "title":"Weld - The Visual CMS", 130 | "description":"Create visual, animated, interactive content on your existing web/e-commerce platform.", 131 | "card":"summary", 132 | "url":"http://www.weldyourownapp.com/", 133 | "site":"@Weld_io", 134 | "creator":"@Weld_io", 135 | "image":"https://s3-eu-west-1.amazonaws.com/weld-design-kit/weld-logo-square.png" 136 | } 137 | } 138 | 139 | ### Get image 140 | 141 | http://localhost:3036/api/image?url=https://www.weldyourownapp.com 142 | 143 | * `url` (required) 144 | * `format`: `jpeg` (default) or `png` 145 | * `width`: default 800 146 | * `height`: default 450 147 | * `dpr`: deviceScaleFactor, default is 1.0. Note you can use this as a zoom factor; the browser canvas has the same size, but the output image has different size. 148 | * `time`: milliseconds or `networkidle0` 149 | 150 | 151 | ## Implementation 152 | 153 | Built on Node.js, Express, Puppeteer, Cheerio, html-metadata. 154 | 155 | ## Deploying on Vercel 156 | 157 | See `vercel.json` – set up as serverless API controllers. 158 | 159 | ## Older: Deploying on Heroku 160 | 161 | Stack: **Heroku-18** 162 | 163 | Buildpacks: 164 | 165 | 1. https://buildpack-registry.s3.amazonaws.com/buildpacks/jontewks/puppeteer.tgz 166 | 2. heroku/nodejs 167 | 168 | ### Heroku set-up 169 | 170 | # Set up and configure app 171 | heroku create MYAPPNAME 172 | heroku config:set NODE_ENV=production 173 | 174 | # Stack and Buildpacks 175 | heroku buildpacks:add --index 1 https://buildpack-registry.s3.amazonaws.com/buildpacks/jontewks/puppeteer.tgz 176 | -------------------------------------------------------------------------------- /api/dom-simple.js: -------------------------------------------------------------------------------- 1 | // 2 | // Name: dom-simple.js 3 | // Purpose: Controller for pure-fetch (no-Puppeteer) scraping 4 | // Creator: Tom Söderlund 5 | // 6 | 7 | 'use strict' 8 | 9 | const fetch = (...args) => import('node-fetch').then(({default: fetch}) => fetch(...args)) 10 | const { parseRequestQuery, parseDOM } = require('../lib/helpers') 11 | 12 | const scrapePage = async function (req, res) { 13 | try { 14 | const query = parseRequestQuery(req.url) 15 | if (!query.url) throw new Error(`No "url" specified: ${req.url}`) 16 | const pageUrl = decodeURIComponent(query.url) 17 | // Use $ instead of # to allow for easier URL parsing 18 | const pageSelector = decodeURIComponent(query.selector || 'body').replace(/\$/g, '#') 19 | const loadExtraTime = query.time || 3000 20 | const deepResults = query.deep || false 21 | const completeResults = query.complete || false 22 | const useIndex = query.useIndex || false 23 | const timeStart = Date.now() 24 | 25 | console.log(`Scrape DOM (simple): "${pageUrl}"`, { pageSelector, loadExtraTime }) 26 | 27 | const documentResponse = await fetch(pageUrl) 28 | const documentHTML = await documentResponse.text() 29 | const selectorsArray = pageSelector.split(',') 30 | const resultsObj = selectorsArray.map(selector => { 31 | const items = parseDOM(documentHTML, selector, completeResults, deepResults, useIndex) 32 | return { selector, count: items.length, items } 33 | }) 34 | const timeFinish = Date.now() 35 | res.setHeader('Content-Type', 'application/json') 36 | res.end(JSON.stringify({ time: (timeFinish - timeStart), results: resultsObj })) 37 | } catch (err) { 38 | res.statusCode = 500 39 | res.setHeader('Content-Type', 'application/json') 40 | res.end(JSON.stringify({ code: res.statusCode, message: err.message })) 41 | console.error(err.message) 42 | } 43 | } 44 | 45 | // Routes 46 | 47 | module.exports = scrapePage 48 | -------------------------------------------------------------------------------- /api/dom.js: -------------------------------------------------------------------------------- 1 | // 2 | // Name: scrape.js 3 | // Purpose: Controller and routing for scraping 4 | // Creator: Tom Söderlund 5 | // 6 | 7 | 'use strict' 8 | 9 | const { parseRequestQuery, parseDOM, fetchPageWithPuppeteer } = require('../lib/helpers') 10 | 11 | const scrapePage = async function (req, res) { 12 | try { 13 | const query = parseRequestQuery(req.url) 14 | if (!query.url) throw new Error(`No "url" specified: ${req.url}`) 15 | const pageUrl = decodeURIComponent(query.url) 16 | // Use $ instead of # to allow for easier URL parsing 17 | const pageSelector = decodeURIComponent(query.selector || 'body').replace(/\$/g, '#') 18 | const loadExtraTime = query.time || 3000 19 | const deepResults = query.deep || false 20 | const completeResults = query.complete || false 21 | const useIndex = query.useIndex || false 22 | const timeStart = Date.now() 23 | 24 | console.log(`Scrape DOM: "${pageUrl}"`, { pageSelector, loadExtraTime }) 25 | 26 | const documentHTML = await fetchPageWithPuppeteer(pageUrl, { waitForSelector: pageSelector, loadExtraTime, bodyOnly: true }) 27 | const selectorsArray = pageSelector.split(',') 28 | const resultsObj = selectorsArray.map(selector => { 29 | const items = parseDOM(documentHTML, selector, completeResults, deepResults, useIndex) 30 | return { selector, count: items.length, items } 31 | }) 32 | const timeFinish = Date.now() 33 | res.setHeader('Content-Type', 'application/json') 34 | res.end(JSON.stringify({ time: (timeFinish - timeStart), results: resultsObj })) 35 | } catch (err) { 36 | res.statusCode = 500 37 | res.setHeader('Content-Type', 'application/json') 38 | res.end(JSON.stringify({ code: res.statusCode, message: err.message })) 39 | console.error(err.message) 40 | } 41 | } 42 | 43 | // Routes 44 | 45 | module.exports = scrapePage 46 | -------------------------------------------------------------------------------- /api/image.js: -------------------------------------------------------------------------------- 1 | 'use strict' 2 | 3 | const { createReadStream, createWriteStream, unlink } = require('fs') 4 | const { promisify } = require('util') 5 | const { pipeline } = require('stream') 6 | const { resolve } = require('path') 7 | const { tmpdir } = require('os') 8 | const { randomBytes } = require('crypto') 9 | 10 | const streamPipeline = promisify(pipeline) 11 | 12 | const getCachedImage = async function (req, res) { 13 | try { 14 | const fetch = (await import('node-fetch')).default; 15 | const requestUrl = new URL(req.url, `http://${req.headers.host}`) 16 | const imageUrl = requestUrl.searchParams.get('url') 17 | if (!imageUrl) throw new Error('No "url" specified') 18 | 19 | const decodedImageUrl = decodeURIComponent(imageUrl) 20 | 21 | // Generate a unique filename for the cached image 22 | const randomFileName = randomBytes(16).toString('hex') 23 | const tempFilePath = resolve(tmpdir(), randomFileName) 24 | 25 | // Download the image and save it to the temporary file 26 | const response = await fetch(decodedImageUrl) 27 | if (!response.ok) throw new Error(`Failed to fetch image: ${response.statusText}`) 28 | 29 | await streamPipeline(response.body, createWriteStream(tempFilePath)) 30 | 31 | // Set cache headers 32 | res.setHeader('Cache-Control', 'public, max-age=31536000, immutable') 33 | 34 | // Stream the cached image file back to the client 35 | const readStream = createReadStream(tempFilePath) 36 | readStream.pipe(res) 37 | 38 | readStream.on('end', () => { 39 | // Clean up the temporary file after serving it 40 | unlink(tempFilePath, (err) => { 41 | if (err) console.error(`Failed to delete temporary file: ${tempFilePath}`) 42 | }) 43 | }) 44 | } catch (error) { 45 | res.status(500).send(error.message) 46 | } 47 | } 48 | 49 | module.exports = getCachedImage -------------------------------------------------------------------------------- /api/imageBrowser.js: -------------------------------------------------------------------------------- 1 | // 2 | // Name: image.js 3 | // Purpose: Controller and routing for full image 4 | // Creator: Tom Söderlund 5 | // 6 | 7 | 'use strict' 8 | 9 | const { parseRequestQuery, fetchImageWithPuppeteer } = require('../lib/helpers') 10 | 11 | const getImage = async function (req, res) { 12 | try { 13 | const query = parseRequestQuery(req.url) 14 | if (!query.url) throw new Error(`No "url" specified: ${req.url}`) 15 | const pageUrl = decodeURIComponent(query.url) 16 | const options = { 17 | ...query, 18 | width: query.width ? parseInt(query.width) : undefined, 19 | height: query.height ? parseInt(query.height) : undefined, 20 | loadExtraTime: query.time || 0 21 | } 22 | // Get image 23 | const image = await fetchImageWithPuppeteer(pageUrl, options) 24 | res.setHeader('content-type', 'image/' + (options.format || 'jpeg')) 25 | res.end(image) 26 | } catch (err) { 27 | res.statusCode = 500 28 | res.setHeader('Content-Type', 'text/html') 29 | res.end(`

Server Error

Sorry, there was a problem: ${err.message}

`) 30 | console.error(err.message) 31 | } 32 | } 33 | 34 | // Routes 35 | 36 | module.exports = getImage 37 | -------------------------------------------------------------------------------- /api/meta.js: -------------------------------------------------------------------------------- 1 | // 2 | // Name: meta.js 3 | // Purpose: Controller and routing for metadata 4 | // Creator: Tom Söderlund 5 | // 6 | 7 | 'use strict' 8 | 9 | const { includes, merge } = require('lodash') 10 | const htmlMetadata = require('html-metadata') 11 | const { parseRequestQuery } = require('../lib/helpers') 12 | 13 | const scrapeMetaData = async function (req, res) { 14 | try { 15 | const query = parseRequestQuery(req.url) 16 | if (!query.url) throw new Error(`No "url" specified: ${req.url}`) 17 | const pageUrl = decodeURIComponent(query.url) 18 | const protocol = includes(pageUrl, 'https:') ? 'https' : 'http' 19 | 20 | console.log(`Scrape metadata: "${pageUrl}"`) 21 | let metadata 22 | try { 23 | metadata = await htmlMetadata(pageUrl) 24 | } catch (getErr) { 25 | if (getErr.status === 504 && protocol === 'https') { 26 | const pageUrlWithHttp = pageUrl.replace('https:', 'http:') 27 | metadata = await htmlMetadata(pageUrlWithHttp) 28 | } 29 | } 30 | const metadataAndUrl = merge({}, { url: pageUrl }, metadata) 31 | res.setHeader('Content-Type', 'application/json') 32 | res.end(JSON.stringify(metadataAndUrl)) 33 | } catch (err) { 34 | res.statusCode = 500 35 | res.setHeader('Content-Type', 'application/json') 36 | res.end(JSON.stringify({ code: res.statusCode, message: err.message })) 37 | console.error(err.message) 38 | } 39 | } 40 | 41 | // Routes 42 | 43 | module.exports = scrapeMetaData 44 | -------------------------------------------------------------------------------- /api/page.js: -------------------------------------------------------------------------------- 1 | // 2 | // Name: page.js 3 | // Purpose: Controller and routing for full page text 4 | // Creator: Tom Söderlund 5 | // 6 | 7 | 'use strict' 8 | 9 | const { parseRequestQuery, fetchPageWithPuppeteer } = require('../lib/helpers') 10 | 11 | const scrapePageContent = async function (req, res) { 12 | try { 13 | const query = parseRequestQuery(req.url) 14 | if (!query.url) throw new Error(`No "url" specified: ${req.url}`) 15 | const pageUrl = decodeURIComponent(query.url) 16 | const loadExtraTime = query.time || 1000 17 | const bodyOnly = query.bodyOnly 18 | 19 | console.log(`Scrape text: "${pageUrl}", ${loadExtraTime} ms`) 20 | 21 | const documentHTML = await fetchPageWithPuppeteer(pageUrl, { loadExtraTime, bodyOnly }) 22 | res.setHeader('Content-Type', 'application/json') 23 | res.end(JSON.stringify({ 24 | url: pageUrl, 25 | length: documentHTML.length, 26 | content: documentHTML 27 | })) 28 | } catch (err) { 29 | res.statusCode = 500 30 | res.setHeader('Content-Type', 'application/json') 31 | res.end(JSON.stringify({ code: res.statusCode, message: err.message })) 32 | console.error(err.message) 33 | } 34 | } 35 | 36 | module.exports = scrapePageContent 37 | -------------------------------------------------------------------------------- /api/screenshot.ts: -------------------------------------------------------------------------------- 1 | 2 | const chrome = require('@sparticuz/chromium') 3 | const puppeteer = require('puppeteer-core') 4 | 5 | // const getAbsoluteURL = (hash: string, path?: string) => { 6 | // if (!process.env.NODE_ENV) { 7 | // return `http://localhost:3000/${hash}` 8 | // } 9 | 10 | // return `https://image.w.kodadot.xyz/ipfs/${path}/${hash}` 11 | // } 12 | 13 | // type ScreenshotRequest = { 14 | // url: string 15 | // settings?: Settings 16 | // } 17 | 18 | // export type Settings = { 19 | // delay?: number; 20 | // width?: number; 21 | // height?: number; 22 | // } 23 | 24 | 25 | const performCanvasCapture = async (page: any, canvasSelector: string) => { 26 | try { 27 | // get the base64 image from the CANVAS targetted 28 | const base64 = await page.$eval(canvasSelector, el => { 29 | if (!el || el.tagName !== "CANVAS") return null 30 | return el.toDataURL() 31 | }) 32 | if (!base64) throw new Error("No canvas found") 33 | // remove the base64 mimetype at the beginning of the string 34 | const pureBase64 = base64.replace(/^data:image\/png;base64,/, "") 35 | return Buffer.from(pureBase64, "base64") 36 | } catch (err) { 37 | return null 38 | } 39 | } 40 | 41 | export default async (req: any, res: any) => { 42 | let { 43 | // query: { hash, path, resolution }, 44 | body, 45 | method 46 | } = req 47 | 48 | if (method !== 'POST') { 49 | // CORS https://vercel.com/guides/how-to-enable-cors 50 | res.setHeader('Access-Control-Allow-Credentials', true) 51 | res.setHeader('Access-Control-Allow-Origin', '*') 52 | res.setHeader('Access-Control-Allow-Methods', 'GET,OPTIONS,PATCH,DELETE,POST,PUT') 53 | res.setHeader( 54 | 'Access-Control-Allow-Headers', 55 | 'X-CSRF-Token, X-Requested-With, Accept, Accept-Version, Content-Length, Content-MD5, Content-Type, Date, X-Api-Version' 56 | ) 57 | return res.status(200).end() 58 | } 59 | 60 | if (!body) return res.status(400).end(`No body provided`) 61 | 62 | if (typeof body === 'object' && !body.url) return res.status(400).end(`No url provided`) 63 | 64 | const isProd = process.env.NODE_ENV === 'production' 65 | 66 | let browser 67 | 68 | if (isProd) { 69 | browser = await puppeteer.launch({ 70 | args: chrome.args, 71 | defaultViewport: chrome.defaultViewport, 72 | executablePath: await chrome.executablePath(), 73 | headless: 'new', 74 | ignoreHTTPSErrors: true 75 | }) 76 | } else { 77 | browser = await puppeteer.launch({ 78 | headless: 'new', 79 | executablePath: '/Applications/Google Chrome.app/Contents/MacOS/Google Chrome', 80 | }) 81 | } 82 | 83 | const page = await browser.newPage() 84 | 85 | await page.setViewport({ width: 600, height: 600 }) 86 | 87 | // const url = getAbsoluteURL(`?hash=${hash}`, path) 88 | const url = body.url 89 | 90 | console.log('url', url) 91 | 92 | await page.goto(url); 93 | 94 | const selector = 'canvas'; 95 | 96 | await page.waitForSelector(selector); 97 | 98 | const element = await performCanvasCapture(page, selector) // const element = page.$(selector) 99 | 100 | // const data = await page.screenshot({ 101 | // type: 'png' 102 | // }) 103 | 104 | const data = element 105 | 106 | await browser.close() 107 | // Set the s-maxage property which caches the images then on the Vercel edge 108 | res.setHeader('Cache-Control', 's-maxage=3600, stale-while-revalidate') 109 | res.setHeader('Content-Type', 'image/png') 110 | // CORS 111 | // res.setHeader('Access-Control-Allow-Headers', '*') 112 | res.setHeader('Access-Control-Allow-Credentials', true) 113 | res.setHeader('Access-Control-Allow-Origin', '*') 114 | res.setHeader('Access-Control-Allow-Methods', 'GET,OPTIONS,PATCH,DELETE,POST,PUT') 115 | res.setHeader( 116 | 'Access-Control-Allow-Headers', 117 | 'X-CSRF-Token, X-Requested-With, Accept, Accept-Version, Content-Length, Content-MD5, Content-Type, Date, X-Api-Version' 118 | ) 119 | res.end(data) 120 | } 121 | -------------------------------------------------------------------------------- /config/config.js: -------------------------------------------------------------------------------- 1 | const path = require('path') 2 | const rootPath = path.join(__dirname, '/..') 3 | const env = process.env.NODE_ENV || 'development' 4 | 5 | const config = { 6 | 7 | development: { 8 | root: rootPath, 9 | app: { 10 | name: 'scraping-service' 11 | }, 12 | port: 3036 13 | }, 14 | 15 | test: { 16 | root: rootPath, 17 | app: { 18 | name: 'scraping-service' 19 | }, 20 | port: 3000 21 | }, 22 | 23 | production: { 24 | root: rootPath, 25 | app: { 26 | name: 'scraping-service' 27 | }, 28 | port: 3000 29 | } 30 | 31 | } 32 | 33 | module.exports = config[env] 34 | -------------------------------------------------------------------------------- /lib/helpers.js: -------------------------------------------------------------------------------- 1 | // 2 | // Name: helpers.js 3 | // Purpose: Library for helper functions 4 | // Creator: Tom Söderlund 5 | // 6 | 7 | 'use strict' 8 | 9 | const chromium = require('@sparticuz/chromium') 10 | const puppeteer = process.env.NODE_ENV === 'production' ? require('puppeteer-core') : require('puppeteer') 11 | const cheerio = require('cheerio') 12 | const { get, compact } = require('lodash') 13 | 14 | const parseRequestParams = url => (url.split('?')[0] || '/').substr(1).split('/') 15 | 16 | const parseRequestQuery = url => (url.split('?')[1] || '') 17 | .split('&') 18 | .reduce((result, propValue) => { 19 | const key = propValue.split('=')[0] 20 | if (key) result[key] = propValue.split('=')[1] 21 | return result 22 | }, {}) 23 | 24 | const compactString = str => str.replace(/[\n\t]/g, '').replace(/\s+/g, ' ').trim() 25 | 26 | /** 27 | * Parses the given DOM string and extracts information based on the provided options. 28 | * 29 | * @param {string} domString - The DOM string to parse. 30 | * @param {string} pageSel - The selector to filter the elements to be parsed. 31 | * @param {boolean} complete - Whether to return complete DOM nodes as strings. 32 | * @param {boolean} deep - Whether to traverse the DOM tree deeply and return nested objects. 33 | * @param {boolean} useIndex - Whether element index instead of class/id. 34 | * @returns {Array|string|Object} - The parsed result based on the provided options. 35 | */ 36 | const parseDOM = (domString, pageSel, complete = false, deep = false, useIndex = false) => { 37 | // Use _ instead of . and $ instead of # to allow for easier JavaScript parsing 38 | const getElementReference = ($element, index = 0) => { 39 | if (useIndex) { 40 | return `${$element[0].name}_${index}` 41 | } else { 42 | return $element[0].name 43 | + ($element.attr('class') ? '_' + $element.attr('class').replace(/ /g, '_') : '') 44 | + ($element.attr('id') ? '$' + $element.attr('id') : '') 45 | } 46 | } 47 | 48 | const traverseChildren = function (parentObj, obj, index, elem) { 49 | const $node = $(elem) 50 | const nodeRef = getElementReference($node, index) 51 | // Has children and is not a text node 52 | if (deep || ($node.children().length > 0 && typeof (obj[nodeRef]) !== 'string')) { 53 | obj[nodeRef] = obj[nodeRef] || {} 54 | // Attributes 55 | obj[nodeRef].attributes = ['href', 'src'].reduce((result, attr) => { 56 | if ($node.attr(attr)) result[attr] = $node.attr(attr) 57 | return result 58 | }, {}) 59 | if (Object.keys(obj[nodeRef].attributes).length === 0) delete obj[nodeRef].attributes 60 | // Has children AND text: use '.$text=' 61 | if ($node.text().length > 0) { 62 | obj[nodeRef].$text = compactString($node.text()) 63 | } 64 | // Traverse the children 65 | $node.children().each(traverseChildren.bind(undefined, obj, obj[nodeRef])) 66 | } else { 67 | // Has only text 68 | obj[nodeRef] = compactString($node.text()) 69 | } 70 | // Delete parent.$text if same as this 71 | if ($node.text() === get(parentObj, '$text')) { 72 | delete parentObj.$text 73 | } 74 | } 75 | 76 | const $ = cheerio.load(domString) 77 | const resultArray = $(pageSel).map(function (i, el) { 78 | // this === el 79 | if (complete) { 80 | // Complete DOM nodes 81 | return compactString($(this).toString()) 82 | } else if (deep) { 83 | // Deep objects 84 | let deepObj = {} 85 | traverseChildren(undefined, deepObj, undefined, this) 86 | return deepObj 87 | } else { 88 | // Shallow text 89 | return compactString($(this).text()) 90 | } 91 | }).get() 92 | return compact(resultArray) 93 | } 94 | 95 | const fetchPageWithPuppeteer = async function (pageUrl, { waitForSelector, loadExtraTime, bodyOnly }) { 96 | console.log(`Fetch page with Puppeteer: "${pageUrl}"`, { waitForSelector, loadExtraTime, bodyOnly }) 97 | 98 | let browser 99 | if (process.env.NODE_ENV === 'production') { 100 | browser = await puppeteer.launch({ 101 | args: chromium.args, 102 | defaultViewport: chromium.defaultViewport, 103 | executablePath: await chromium.executablePath(), 104 | headless: 'new', 105 | ignoreHTTPSErrors: true 106 | }) 107 | } else { 108 | browser = await puppeteer.launch({ 109 | args: [ 110 | '--disable-dev-shm-usage', 111 | '--disable-gpu', 112 | '--disable-setuid-sandbox', 113 | '--headless', 114 | '--no-sandbox', 115 | '--single-process' 116 | ], 117 | ignoreHTTPSErrors: true 118 | }) 119 | } 120 | 121 | const page = await browser.newPage() 122 | 123 | if (waitForSelector !== undefined) { 124 | await page.goto(pageUrl) 125 | await page.waitForSelector(waitForSelector) 126 | } else if (['networkidle0'].includes(loadExtraTime)) { 127 | await page.goto(pageUrl, { waitUntil: loadExtraTime }) 128 | } else { 129 | await page.goto(pageUrl) 130 | await page.waitFor(parseInt((loadExtraTime))) 131 | } 132 | 133 | // await page.content(), document.body.innerHTML, document.documentElement.outerHTML 134 | const documentHTML = bodyOnly 135 | ? await page.evaluate(() => document.body.outerHTML) 136 | : await page.evaluate(() => document.documentElement.outerHTML) 137 | 138 | await browser.close() 139 | return documentHTML 140 | } 141 | 142 | const fetchImageWithPuppeteer = async function (pageUrl, { loadExtraTime, format = 'jpeg', width = 800, height = 450, dpr = 1.0 }) { 143 | height = height || width 144 | dpr = parseFloat(dpr) 145 | 146 | console.log(`Fetch image with Puppeteer: "${pageUrl}"`, { loadExtraTime, format, width, height, dpr }) 147 | 148 | const browser = await puppeteer.launch({ 149 | args: chromium.args, 150 | executablePath: await chromium.executablePath, 151 | headless: chromium.headless 152 | }) 153 | const page = await browser.newPage() 154 | await page.setViewport({ width, height, deviceScaleFactor: dpr, isMobile: false }) 155 | if (['networkidle0'].includes(loadExtraTime)) { 156 | await page.goto(pageUrl, { waitUntil: loadExtraTime }) 157 | } else { 158 | // Wait milliseconds 159 | await page.goto(pageUrl) 160 | await page.waitFor(parseInt((loadExtraTime))) 161 | } 162 | const screenshot = await page.screenshot({ type: format, fullPage: false }) 163 | await browser.close() 164 | 165 | return screenshot 166 | } 167 | 168 | // Public API 169 | 170 | module.exports = { 171 | 172 | parseRequestParams, 173 | parseRequestQuery, 174 | 175 | parseDOM, 176 | 177 | fetchPageWithPuppeteer, 178 | fetchImageWithPuppeteer 179 | 180 | } 181 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "scraping-service", 3 | "version": "2.0.0", 4 | "description": "", 5 | "scripts": { 6 | "dev": "echo '\n** NOTE: using PNPM not Yarn/NPM **\n'; node test/server" 7 | }, 8 | "keywords": [], 9 | "author": "Tom Söderlund", 10 | "license": "MIT", 11 | "engines": { 12 | "node": ">=18.0.0" 13 | }, 14 | "dependencies": { 15 | "@sparticuz/chromium": "^119.0.2", 16 | "cheerio": "1.0.0-rc.12", 17 | "lodash": "^4.17.21", 18 | "node-fetch": "^3.3.2", 19 | "puppeteer-core": "^21.5.1" 20 | }, 21 | "devDependencies": { 22 | "@types/node": "^20.10.6", 23 | "puppeteer": "^22.1.0", 24 | "typescript": "^5.3.3" 25 | } 26 | } 27 | -------------------------------------------------------------------------------- /pnpm-lock.yaml: -------------------------------------------------------------------------------- 1 | lockfileVersion: '6.0' 2 | 3 | settings: 4 | autoInstallPeers: true 5 | excludeLinksFromLockfile: false 6 | 7 | dependencies: 8 | '@sparticuz/chromium': 9 | specifier: ^119.0.2 10 | version: 119.0.2 11 | cheerio: 12 | specifier: 1.0.0-rc.12 13 | version: 1.0.0-rc.12 14 | lodash: 15 | specifier: ^4.17.21 16 | version: 4.17.21 17 | node-fetch: 18 | specifier: ^3.3.2 19 | version: 3.3.2 20 | puppeteer-core: 21 | specifier: ^21.5.1 22 | version: 21.6.1 23 | 24 | devDependencies: 25 | '@types/node': 26 | specifier: ^20.10.6 27 | version: 20.10.6 28 | puppeteer: 29 | specifier: ^22.1.0 30 | version: 22.1.0(typescript@5.3.3) 31 | typescript: 32 | specifier: ^5.3.3 33 | version: 5.3.3 34 | 35 | packages: 36 | 37 | /@babel/code-frame@7.23.5: 38 | resolution: {integrity: sha512-CgH3s1a96LipHCmSUmYFPwY7MNx8C3avkq7i4Wl3cfa662ldtUe4VM1TPXX70pfmrlWTb6jLqTYrZyT2ZTJBgA==} 39 | engines: {node: '>=6.9.0'} 40 | dependencies: 41 | '@babel/highlight': 7.23.4 42 | chalk: 2.4.2 43 | dev: true 44 | 45 | /@babel/helper-validator-identifier@7.22.20: 46 | resolution: {integrity: sha512-Y4OZ+ytlatR8AI+8KZfKuL5urKp7qey08ha31L8b3BwewJAoJamTzyvxPR/5D+KkdJCGPq/+8TukHBlY10FX9A==} 47 | engines: {node: '>=6.9.0'} 48 | dev: true 49 | 50 | /@babel/highlight@7.23.4: 51 | resolution: {integrity: sha512-acGdbYSfp2WheJoJm/EBBBLh/ID8KDc64ISZ9DYtBmC8/Q204PZJLHyzeB5qMzJ5trcOkybd78M4x2KWsUq++A==} 52 | engines: {node: '>=6.9.0'} 53 | dependencies: 54 | '@babel/helper-validator-identifier': 7.22.20 55 | chalk: 2.4.2 56 | js-tokens: 4.0.0 57 | dev: true 58 | 59 | /@puppeteer/browsers@1.9.0: 60 | resolution: {integrity: sha512-QwguOLy44YBGC8vuPP2nmpX4MUN2FzWbsnvZJtiCzecU3lHmVZkaC1tq6rToi9a200m8RzlVtDyxCS0UIDrxUg==} 61 | engines: {node: '>=16.3.0'} 62 | hasBin: true 63 | dependencies: 64 | debug: 4.3.4 65 | extract-zip: 2.0.1 66 | progress: 2.0.3 67 | proxy-agent: 6.3.1 68 | tar-fs: 3.0.4 69 | unbzip2-stream: 1.4.3 70 | yargs: 17.7.2 71 | transitivePeerDependencies: 72 | - supports-color 73 | dev: false 74 | 75 | /@puppeteer/browsers@2.0.1: 76 | resolution: {integrity: sha512-IQj/rJY1MNfZ6Z2ERu+6S0LkIPBSXRGddgmvODqjm1afHy04aJIiWmoohuFtL78SPSlbjpIMuFVfhyqsR5Ng4A==} 77 | engines: {node: '>=18'} 78 | hasBin: true 79 | dependencies: 80 | debug: 4.3.4 81 | extract-zip: 2.0.1 82 | progress: 2.0.3 83 | proxy-agent: 6.4.0 84 | tar-fs: 3.0.5 85 | unbzip2-stream: 1.4.3 86 | yargs: 17.7.2 87 | transitivePeerDependencies: 88 | - supports-color 89 | dev: true 90 | 91 | /@sparticuz/chromium@119.0.2: 92 | resolution: {integrity: sha512-VB6tHW13YpQ7HqyhKG1KU8Z1RIb/kfHMg8j/4ft5rk8mMvEm2YkBYFCHnj+yb0P/RPLcdgX3+VC58GuJilOtww==} 93 | engines: {node: '>= 16'} 94 | dependencies: 95 | follow-redirects: 1.15.4 96 | tar-fs: 3.0.4 97 | transitivePeerDependencies: 98 | - debug 99 | dev: false 100 | 101 | /@tootallnate/quickjs-emscripten@0.23.0: 102 | resolution: {integrity: sha512-C5Mc6rdnsaJDjO3UpGW/CQTHtCKaYlScZTly4JIu97Jxo/odCiH0ITnDXSJPTOrEKk/ycSZ0AOgTmkDtkOsvIA==} 103 | 104 | /@types/node@20.10.6: 105 | resolution: {integrity: sha512-Vac8H+NlRNNlAmDfGUP7b5h/KA+AtWIzuXy0E6OyP8f1tCLYAtPvKRRDJjAPqhpCb0t6U2j7/xqAuLEebW2kiw==} 106 | requiresBuild: true 107 | dependencies: 108 | undici-types: 5.26.5 109 | 110 | /@types/yauzl@2.10.3: 111 | resolution: {integrity: sha512-oJoftv0LSuaDZE3Le4DbKX+KS9G36NzOeSap90UIK0yMA/NhKJhqlSGtNDORNRaIbQfzjXDrQa0ytJ6mNRGz/Q==} 112 | requiresBuild: true 113 | dependencies: 114 | '@types/node': 20.10.6 115 | optional: true 116 | 117 | /agent-base@7.1.0: 118 | resolution: {integrity: sha512-o/zjMZRhJxny7OyEF+Op8X+efiELC7k7yOjMzgfzVqOzXqkBkWI79YoTdOtsuWd5BWhAGAuOY/Xa6xpiaWXiNg==} 119 | engines: {node: '>= 14'} 120 | dependencies: 121 | debug: 4.3.4 122 | transitivePeerDependencies: 123 | - supports-color 124 | 125 | /ansi-regex@5.0.1: 126 | resolution: {integrity: sha512-quJQXlTSUGL2LH9SUXo8VwsY4soanhgo6LNSm84E1LBcE8s3O0wpdiRzyR9z/ZZJMlMWv37qOOb9pdJlMUEKFQ==} 127 | engines: {node: '>=8'} 128 | 129 | /ansi-styles@3.2.1: 130 | resolution: {integrity: sha512-VT0ZI6kZRdTh8YyJw3SMbYm/u+NqfsAxEpWO0Pf9sq8/e94WxxOpPKx9FR1FlyCtOVDNOQ+8ntlqFxiRc+r5qA==} 131 | engines: {node: '>=4'} 132 | dependencies: 133 | color-convert: 1.9.3 134 | dev: true 135 | 136 | /ansi-styles@4.3.0: 137 | resolution: {integrity: sha512-zbB9rCJAT1rbjiVDb2hqKFHNYLxgtk8NURxZ3IZwD3F6NtxbXZQCnnSi1Lkx+IDohdPlFp222wVALIheZJQSEg==} 138 | engines: {node: '>=8'} 139 | dependencies: 140 | color-convert: 2.0.1 141 | 142 | /argparse@2.0.1: 143 | resolution: {integrity: sha512-8+9WqebbFzpX9OR+Wa6O29asIogeRMzcGtAINdpMHHyAg10f05aSFVBbcEqGf/PXw1EjAZ+q2/bEBg3DvurK3Q==} 144 | dev: true 145 | 146 | /ast-types@0.13.4: 147 | resolution: {integrity: sha512-x1FCFnFifvYDDzTaLII71vG5uvDwgtmDTEVWAxrgeiR8VjMONcCXJx7E+USjDtHlwFmt9MysbqgF9b9Vjr6w+w==} 148 | engines: {node: '>=4'} 149 | dependencies: 150 | tslib: 2.6.2 151 | 152 | /b4a@1.6.4: 153 | resolution: {integrity: sha512-fpWrvyVHEKyeEvbKZTVOeZF3VSKKWtJxFIxX/jaVPf+cLbGUSitjb49pHLqPV2BUNNZ0LcoeEGfE/YCpyDYHIw==} 154 | 155 | /bare-events@2.2.0: 156 | resolution: {integrity: sha512-Yyyqff4PIFfSuthCZqLlPISTWHmnQxoPuAvkmgzsJEmG3CesdIv6Xweayl0JkCZJSB2yYIdJyEz97tpxNhgjbg==} 157 | requiresBuild: true 158 | dev: true 159 | optional: true 160 | 161 | /bare-fs@2.1.5: 162 | resolution: {integrity: sha512-5t0nlecX+N2uJqdxe9d18A98cp2u9BETelbjKpiVgQqzzmVNFYWEAjQHqS+2Khgto1vcwhik9cXucaj5ve2WWA==} 163 | requiresBuild: true 164 | dependencies: 165 | bare-events: 2.2.0 166 | bare-os: 2.2.0 167 | bare-path: 2.1.0 168 | streamx: 2.15.6 169 | dev: true 170 | optional: true 171 | 172 | /bare-os@2.2.0: 173 | resolution: {integrity: sha512-hD0rOPfYWOMpVirTACt4/nK8mC55La12K5fY1ij8HAdfQakD62M+H4o4tpfKzVGLgRDTuk3vjA4GqGXXCeFbag==} 174 | requiresBuild: true 175 | dev: true 176 | optional: true 177 | 178 | /bare-path@2.1.0: 179 | resolution: {integrity: sha512-DIIg7ts8bdRKwJRJrUMy/PICEaQZaPGZ26lsSx9MJSwIhSrcdHn7/C8W+XmnG/rKi6BaRcz+JO00CjZteybDtw==} 180 | requiresBuild: true 181 | dependencies: 182 | bare-os: 2.2.0 183 | dev: true 184 | optional: true 185 | 186 | /base64-js@1.5.1: 187 | resolution: {integrity: sha512-AKpaYlHn8t4SVbOHCy+b5+KKgvR4vrsD8vbvrbiQJps7fKDTkjkDry6ji0rUJjC0kzbNePLwzxq8iypo41qeWA==} 188 | 189 | /basic-ftp@5.0.4: 190 | resolution: {integrity: sha512-8PzkB0arJFV4jJWSGOYR+OEic6aeKMu/osRhBULN6RY0ykby6LKhbmuQ5ublvaas5BOwboah5D87nrHyuh8PPA==} 191 | engines: {node: '>=10.0.0'} 192 | 193 | /boolbase@1.0.0: 194 | resolution: {integrity: sha512-JZOSA7Mo9sNGB8+UjSgzdLtokWAky1zbztM3WRLCbZ70/3cTANmQmOdR7y2g+J0e2WXywy1yS468tY+IruqEww==} 195 | dev: false 196 | 197 | /buffer-crc32@0.2.13: 198 | resolution: {integrity: sha512-VO9Ht/+p3SN7SKWqcrgEzjGbRSJYTx+Q1pTQC0wrWqHx0vpJraQ6GtHx8tvcg1rlK1byhU5gccxgOgj7B0TDkQ==} 199 | 200 | /buffer@5.7.1: 201 | resolution: {integrity: sha512-EHcyIPBQ4BSGlvjB16k5KgAJ27CIsHY/2JBmCRReo48y9rQ3MaUzWX3KVlBa4U7MyX02HdVj0K7C3WaB3ju7FQ==} 202 | dependencies: 203 | base64-js: 1.5.1 204 | ieee754: 1.2.1 205 | 206 | /callsites@3.1.0: 207 | resolution: {integrity: sha512-P8BjAsXvZS+VIDUI11hHCQEv74YT67YUi5JJFNWIqL235sBmjX4+qx9Muvls5ivyNENctx46xQLQ3aTuE7ssaQ==} 208 | engines: {node: '>=6'} 209 | dev: true 210 | 211 | /chalk@2.4.2: 212 | resolution: {integrity: sha512-Mti+f9lpJNcwF4tWV8/OrTTtF1gZi+f8FqlyAdouralcFWFQWF2+NgCHShjkCb+IFBLq9buZwE1xckQU4peSuQ==} 213 | engines: {node: '>=4'} 214 | dependencies: 215 | ansi-styles: 3.2.1 216 | escape-string-regexp: 1.0.5 217 | supports-color: 5.5.0 218 | dev: true 219 | 220 | /cheerio-select@2.1.0: 221 | resolution: {integrity: sha512-9v9kG0LvzrlcungtnJtpGNxY+fzECQKhK4EGJX2vByejiMX84MFNQw4UxPJl3bFbTMw+Dfs37XaIkCwTZfLh4g==} 222 | dependencies: 223 | boolbase: 1.0.0 224 | css-select: 5.1.0 225 | css-what: 6.1.0 226 | domelementtype: 2.3.0 227 | domhandler: 5.0.3 228 | domutils: 3.1.0 229 | dev: false 230 | 231 | /cheerio@1.0.0-rc.12: 232 | resolution: {integrity: sha512-VqR8m68vM46BNnuZ5NtnGBKIE/DfN0cRIzg9n40EIq9NOv90ayxLBXA8fXC5gquFRGJSTRqBq25Jt2ECLR431Q==} 233 | engines: {node: '>= 6'} 234 | dependencies: 235 | cheerio-select: 2.1.0 236 | dom-serializer: 2.0.0 237 | domhandler: 5.0.3 238 | domutils: 3.1.0 239 | htmlparser2: 8.0.2 240 | parse5: 7.1.2 241 | parse5-htmlparser2-tree-adapter: 7.0.0 242 | dev: false 243 | 244 | /chromium-bidi@0.5.1(devtools-protocol@0.0.1203626): 245 | resolution: {integrity: sha512-dcCqOgq9fHKExc2R4JZs/oKbOghWpUNFAJODS8WKRtLhp3avtIH5UDCBrutdqZdh3pARogH8y1ObXm87emwb3g==} 246 | peerDependencies: 247 | devtools-protocol: '*' 248 | dependencies: 249 | devtools-protocol: 0.0.1203626 250 | mitt: 3.0.1 251 | urlpattern-polyfill: 9.0.0 252 | dev: false 253 | 254 | /chromium-bidi@0.5.9(devtools-protocol@0.0.1232444): 255 | resolution: {integrity: sha512-wOTX3m2zuHX0zRX4h7Ol1DAGz0cqHzo2IrAPvOqBxdd4ZR32vxg4FKNjmBihi1oP9b1QGSBBG5VNUUXUCsxDfg==} 256 | peerDependencies: 257 | devtools-protocol: '*' 258 | dependencies: 259 | devtools-protocol: 0.0.1232444 260 | mitt: 3.0.1 261 | urlpattern-polyfill: 10.0.0 262 | dev: true 263 | 264 | /cliui@8.0.1: 265 | resolution: {integrity: sha512-BSeNnyus75C4//NQ9gQt1/csTXyo/8Sb+afLAkzAptFuMsod9HFokGNudZpi/oQV73hnVK+sR+5PVRMd+Dr7YQ==} 266 | engines: {node: '>=12'} 267 | dependencies: 268 | string-width: 4.2.3 269 | strip-ansi: 6.0.1 270 | wrap-ansi: 7.0.0 271 | 272 | /color-convert@1.9.3: 273 | resolution: {integrity: sha512-QfAUtd+vFdAtFQcC8CCyYt1fYWxSqAiK2cSD6zDB8N3cpsEBAvRxp9zOGg6G/SHHJYAT88/az/IuDGALsNVbGg==} 274 | dependencies: 275 | color-name: 1.1.3 276 | dev: true 277 | 278 | /color-convert@2.0.1: 279 | resolution: {integrity: sha512-RRECPsj7iu/xb5oKYcsFHSppFNnsj/52OVTRKb4zP5onXwVF3zVmmToNcOfGC+CRDpfK/U584fMg38ZHCaElKQ==} 280 | engines: {node: '>=7.0.0'} 281 | dependencies: 282 | color-name: 1.1.4 283 | 284 | /color-name@1.1.3: 285 | resolution: {integrity: sha512-72fSenhMw2HZMTVHeCA9KCmpEIbzWiQsjN+BHcBbS9vr1mtt+vJjPdksIBNUmKAW8TFUDPJK5SUU3QhE9NEXDw==} 286 | dev: true 287 | 288 | /color-name@1.1.4: 289 | resolution: {integrity: sha512-dOy+3AuW3a2wNbZHIuMZpTcgjGuLU/uBL/ubcZF9OXbDo8ff4O8yVp5Bf0efS8uEoYo5q4Fx7dY9OgQGXgAsQA==} 290 | 291 | /cosmiconfig@9.0.0(typescript@5.3.3): 292 | resolution: {integrity: sha512-itvL5h8RETACmOTFc4UfIyB2RfEHi71Ax6E/PivVxq9NseKbOWpeyHEOIbmAw1rs8Ak0VursQNww7lf7YtUwzg==} 293 | engines: {node: '>=14'} 294 | peerDependencies: 295 | typescript: '>=4.9.5' 296 | peerDependenciesMeta: 297 | typescript: 298 | optional: true 299 | dependencies: 300 | env-paths: 2.2.1 301 | import-fresh: 3.3.0 302 | js-yaml: 4.1.0 303 | parse-json: 5.2.0 304 | typescript: 5.3.3 305 | dev: true 306 | 307 | /cross-fetch@4.0.0: 308 | resolution: {integrity: sha512-e4a5N8lVvuLgAWgnCrLr2PP0YyDOTHa9H/Rj54dirp61qXnNq46m82bRhNqIA5VccJtWBvPTFRV3TtvHUKPB1g==} 309 | dependencies: 310 | node-fetch: 2.7.0 311 | transitivePeerDependencies: 312 | - encoding 313 | 314 | /css-select@5.1.0: 315 | resolution: {integrity: sha512-nwoRF1rvRRnnCqqY7updORDsuqKzqYJ28+oSMaJMMgOauh3fvwHqMS7EZpIPqK8GL+g9mKxF1vP/ZjSeNjEVHg==} 316 | dependencies: 317 | boolbase: 1.0.0 318 | css-what: 6.1.0 319 | domhandler: 5.0.3 320 | domutils: 3.1.0 321 | nth-check: 2.1.1 322 | dev: false 323 | 324 | /css-what@6.1.0: 325 | resolution: {integrity: sha512-HTUrgRJ7r4dsZKU6GjmpfRK1O76h97Z8MfS1G0FozR+oF2kG6Vfe8JE6zwrkbxigziPHinCJ+gCPjA9EaBDtRw==} 326 | engines: {node: '>= 6'} 327 | dev: false 328 | 329 | /data-uri-to-buffer@4.0.1: 330 | resolution: {integrity: sha512-0R9ikRb668HB7QDxT1vkpuUBtqc53YyAwMwGeUFKRojY/NWKvdZ+9UYtRfGmhqNbRkTSVpMbmyhXipFFv2cb/A==} 331 | engines: {node: '>= 12'} 332 | dev: false 333 | 334 | /data-uri-to-buffer@6.0.1: 335 | resolution: {integrity: sha512-MZd3VlchQkp8rdend6vrx7MmVDJzSNTBvghvKjirLkD+WTChA3KUf0jkE68Q4UyctNqI11zZO9/x2Yx+ub5Cvg==} 336 | engines: {node: '>= 14'} 337 | 338 | /debug@4.3.4: 339 | resolution: {integrity: sha512-PRWFHuSU3eDtQJPvnNY7Jcket1j0t5OuOsFzPPzsekD52Zl8qUfFIPEiswXqIvHWGVHOgX+7G/vCNNhehwxfkQ==} 340 | engines: {node: '>=6.0'} 341 | peerDependencies: 342 | supports-color: '*' 343 | peerDependenciesMeta: 344 | supports-color: 345 | optional: true 346 | dependencies: 347 | ms: 2.1.2 348 | 349 | /degenerator@5.0.1: 350 | resolution: {integrity: sha512-TllpMR/t0M5sqCXfj85i4XaAzxmS5tVA16dqvdkMwGmzI+dXLXnw3J+3Vdv7VKw+ThlTMboK6i9rnZ6Nntj5CQ==} 351 | engines: {node: '>= 14'} 352 | dependencies: 353 | ast-types: 0.13.4 354 | escodegen: 2.1.0 355 | esprima: 4.0.1 356 | 357 | /devtools-protocol@0.0.1203626: 358 | resolution: {integrity: sha512-nEzHZteIUZfGCZtTiS1fRpC8UZmsfD1SiyPvaUNvS13dvKf666OAm8YTi0+Ca3n1nLEyu49Cy4+dPWpaHFJk9g==} 359 | dev: false 360 | 361 | /devtools-protocol@0.0.1232444: 362 | resolution: {integrity: sha512-pM27vqEfxSxRkTMnF+XCmxSEb6duO5R+t8A9DEEJgy4Wz2RVanje2mmj99B6A3zv2r/qGfYlOvYznUhuokizmg==} 363 | dev: true 364 | 365 | /dom-serializer@2.0.0: 366 | resolution: {integrity: sha512-wIkAryiqt/nV5EQKqQpo3SToSOV9J0DnbJqwK7Wv/Trc92zIAYZ4FlMu+JPFW1DfGFt81ZTCGgDEabffXeLyJg==} 367 | dependencies: 368 | domelementtype: 2.3.0 369 | domhandler: 5.0.3 370 | entities: 4.5.0 371 | dev: false 372 | 373 | /domelementtype@2.3.0: 374 | resolution: {integrity: sha512-OLETBj6w0OsagBwdXnPdN0cnMfF9opN69co+7ZrbfPGrdpPVNBUj02spi6B1N7wChLQiPn4CSH/zJvXw56gmHw==} 375 | dev: false 376 | 377 | /domhandler@5.0.3: 378 | resolution: {integrity: sha512-cgwlv/1iFQiFnU96XXgROh8xTeetsnJiDsTc7TYCLFd9+/WNkIqPTxiM/8pSd8VIrhXGTf1Ny1q1hquVqDJB5w==} 379 | engines: {node: '>= 4'} 380 | dependencies: 381 | domelementtype: 2.3.0 382 | dev: false 383 | 384 | /domutils@3.1.0: 385 | resolution: {integrity: sha512-H78uMmQtI2AhgDJjWeQmHwJJ2bLPD3GMmO7Zja/ZZh84wkm+4ut+IUnUdRa8uCGX88DiVx1j6FRe1XfxEgjEZA==} 386 | dependencies: 387 | dom-serializer: 2.0.0 388 | domelementtype: 2.3.0 389 | domhandler: 5.0.3 390 | dev: false 391 | 392 | /emoji-regex@8.0.0: 393 | resolution: {integrity: sha512-MSjYzcWNOA0ewAHpz0MxpYFvwg6yjy1NG3xteoqz644VCo/RPgnr1/GGt+ic3iJTzQ8Eu3TdM14SawnVUmGE6A==} 394 | 395 | /end-of-stream@1.4.4: 396 | resolution: {integrity: sha512-+uw1inIHVPQoaVuHzRyXd21icM+cnt4CzD5rW+NC1wjOUSTOs+Te7FOv7AhN7vS9x/oIyhLP5PR1H+phQAHu5Q==} 397 | dependencies: 398 | once: 1.4.0 399 | 400 | /entities@4.5.0: 401 | resolution: {integrity: sha512-V0hjH4dGPh9Ao5p0MoRY6BVqtwCjhz6vI5LT8AJ55H+4g9/4vbHx1I54fS0XuclLhDHArPQCiMjDxjaL8fPxhw==} 402 | engines: {node: '>=0.12'} 403 | dev: false 404 | 405 | /env-paths@2.2.1: 406 | resolution: {integrity: sha512-+h1lkLKhZMTYjog1VEpJNG7NZJWcuc2DDk/qsqSTRRCOXiLjeQ1d1/udrUGhqMxUgAlwKNZ0cf2uqan5GLuS2A==} 407 | engines: {node: '>=6'} 408 | dev: true 409 | 410 | /error-ex@1.3.2: 411 | resolution: {integrity: sha512-7dFHNmqeFSEt2ZBsCriorKnn3Z2pj+fd9kmI6QoWw4//DL+icEBfc0U7qJCisqrTsKTjw4fNFy2pW9OqStD84g==} 412 | dependencies: 413 | is-arrayish: 0.2.1 414 | dev: true 415 | 416 | /escalade@3.1.1: 417 | resolution: {integrity: sha512-k0er2gUkLf8O0zKJiAhmkTnJlTvINGv7ygDNPbeIsX/TJjGJZHuh9B2UxbsaEkmlEo9MfhrSzmhIlhRlI2GXnw==} 418 | engines: {node: '>=6'} 419 | 420 | /escape-string-regexp@1.0.5: 421 | resolution: {integrity: sha512-vbRorB5FUQWvla16U8R/qgaFIya2qGzwDrNmCZuYKrbdSUMG6I1ZCGQRefkRVhuOkIGVne7BQ35DSfo1qvJqFg==} 422 | engines: {node: '>=0.8.0'} 423 | dev: true 424 | 425 | /escodegen@2.1.0: 426 | resolution: {integrity: sha512-2NlIDTwUWJN0mRPQOdtQBzbUHvdGY2P1VXSyU83Q3xKxM7WHX2Ql8dKq782Q9TgQUNOLEzEYu9bzLNj1q88I5w==} 427 | engines: {node: '>=6.0'} 428 | hasBin: true 429 | dependencies: 430 | esprima: 4.0.1 431 | estraverse: 5.3.0 432 | esutils: 2.0.3 433 | optionalDependencies: 434 | source-map: 0.6.1 435 | 436 | /esprima@4.0.1: 437 | resolution: {integrity: sha512-eGuFFw7Upda+g4p+QHvnW0RyTX/SVeJBDM/gCtMARO0cLuT2HcEKnTPvhjV6aGeqrCB/sbNop0Kszm0jsaWU4A==} 438 | engines: {node: '>=4'} 439 | hasBin: true 440 | 441 | /estraverse@5.3.0: 442 | resolution: {integrity: sha512-MMdARuVEQziNTeJD8DgMqmhwR11BRQ/cBP+pLtYdSTnf3MIO8fFeiINEbX36ZdNlfU/7A9f3gUw49B3oQsvwBA==} 443 | engines: {node: '>=4.0'} 444 | 445 | /esutils@2.0.3: 446 | resolution: {integrity: sha512-kVscqXk4OCp68SZ0dkgEKVi6/8ij300KBWTJq32P/dYeWTSwK41WyTxalN1eRmA5Z9UU/LX9D7FWSmV9SAYx6g==} 447 | engines: {node: '>=0.10.0'} 448 | 449 | /extract-zip@2.0.1: 450 | resolution: {integrity: sha512-GDhU9ntwuKyGXdZBUgTIe+vXnWj0fppUEtMDL0+idd5Sta8TGpHssn/eusA9mrPr9qNDym6SxAYZjNvCn/9RBg==} 451 | engines: {node: '>= 10.17.0'} 452 | hasBin: true 453 | dependencies: 454 | debug: 4.3.4 455 | get-stream: 5.2.0 456 | yauzl: 2.10.0 457 | optionalDependencies: 458 | '@types/yauzl': 2.10.3 459 | transitivePeerDependencies: 460 | - supports-color 461 | 462 | /fast-fifo@1.3.2: 463 | resolution: {integrity: sha512-/d9sfos4yxzpwkDkuN7k2SqFKtYNmCTzgfEpz82x34IM9/zc8KGxQoXg1liNC/izpRM/MBdt44Nmx41ZWqk+FQ==} 464 | 465 | /fd-slicer@1.1.0: 466 | resolution: {integrity: sha512-cE1qsB/VwyQozZ+q1dGxR8LBYNZeofhEdUNGSMbQD3Gw2lAzX9Zb3uIU6Ebc/Fmyjo9AWWfnn0AUCHqtevs/8g==} 467 | dependencies: 468 | pend: 1.2.0 469 | 470 | /fetch-blob@3.2.0: 471 | resolution: {integrity: sha512-7yAQpD2UMJzLi1Dqv7qFYnPbaPx7ZfFK6PiIxQ4PfkGPyNyl2Ugx+a/umUonmKqjhM4DnfbMvdX6otXq83soQQ==} 472 | engines: {node: ^12.20 || >= 14.13} 473 | dependencies: 474 | node-domexception: 1.0.0 475 | web-streams-polyfill: 3.3.3 476 | dev: false 477 | 478 | /follow-redirects@1.15.4: 479 | resolution: {integrity: sha512-Cr4D/5wlrb0z9dgERpUL3LrmPKVDsETIJhaCMeDfuFYcqa5bldGV6wBsAN6X/vxlXQtFBMrXdXxdL8CbDTGniw==} 480 | engines: {node: '>=4.0'} 481 | peerDependencies: 482 | debug: '*' 483 | peerDependenciesMeta: 484 | debug: 485 | optional: true 486 | dev: false 487 | 488 | /formdata-polyfill@4.0.10: 489 | resolution: {integrity: sha512-buewHzMvYL29jdeQTVILecSaZKnt/RJWjoZCF5OW60Z67/GmSLBkOFM7qh1PI3zFNtJbaZL5eQu1vLfazOwj4g==} 490 | engines: {node: '>=12.20.0'} 491 | dependencies: 492 | fetch-blob: 3.2.0 493 | dev: false 494 | 495 | /fs-extra@8.1.0: 496 | resolution: {integrity: sha512-yhlQgA6mnOJUKOsRUFsgJdQCvkKhcz8tlZG5HBQfReYZy46OwLcY+Zia0mtdHsOo9y/hP+CxMN0TU9QxoOtG4g==} 497 | engines: {node: '>=6 <7 || >=8'} 498 | dependencies: 499 | graceful-fs: 4.2.11 500 | jsonfile: 4.0.0 501 | universalify: 0.1.2 502 | 503 | /get-caller-file@2.0.5: 504 | resolution: {integrity: sha512-DyFP3BM/3YHTQOCUL/w0OZHR0lpKeGrxotcHWcqNEdnltqFwXVfhEBQ94eIo34AfQpo0rGki4cyIiftY06h2Fg==} 505 | engines: {node: 6.* || 8.* || >= 10.*} 506 | 507 | /get-stream@5.2.0: 508 | resolution: {integrity: sha512-nBF+F1rAZVCu/p7rjzgA+Yb4lfYXrpl7a6VmJrU8wF9I1CKvP/QwPNZHnOlwbTkY6dvtFIzFMSyQXbLoTQPRpA==} 509 | engines: {node: '>=8'} 510 | dependencies: 511 | pump: 3.0.0 512 | 513 | /get-uri@6.0.2: 514 | resolution: {integrity: sha512-5KLucCJobh8vBY1K07EFV4+cPZH3mrV9YeAruUseCQKHB58SGjjT2l9/eA9LD082IiuMjSlFJEcdJ27TXvbZNw==} 515 | engines: {node: '>= 14'} 516 | dependencies: 517 | basic-ftp: 5.0.4 518 | data-uri-to-buffer: 6.0.1 519 | debug: 4.3.4 520 | fs-extra: 8.1.0 521 | transitivePeerDependencies: 522 | - supports-color 523 | 524 | /graceful-fs@4.2.11: 525 | resolution: {integrity: sha512-RbJ5/jmFcNNCcDV5o9eTnBLJ/HszWV0P73bc+Ff4nS/rJj+YaS6IGyiOL0VoBYX+l1Wrl3k63h/KrH+nhJ0XvQ==} 526 | 527 | /has-flag@3.0.0: 528 | resolution: {integrity: sha512-sKJf1+ceQBr4SMkvQnBDNDtf4TXpVhVGateu0t918bl30FnbE2m4vNLX+VWe/dpjlb+HugGYzW7uQXH98HPEYw==} 529 | engines: {node: '>=4'} 530 | dev: true 531 | 532 | /htmlparser2@8.0.2: 533 | resolution: {integrity: sha512-GYdjWKDkbRLkZ5geuHs5NY1puJ+PXwP7+fHPRz06Eirsb9ugf6d8kkXav6ADhcODhFFPMIXyxkxSuMf3D6NCFA==} 534 | dependencies: 535 | domelementtype: 2.3.0 536 | domhandler: 5.0.3 537 | domutils: 3.1.0 538 | entities: 4.5.0 539 | dev: false 540 | 541 | /http-proxy-agent@7.0.0: 542 | resolution: {integrity: sha512-+ZT+iBxVUQ1asugqnD6oWoRiS25AkjNfG085dKJGtGxkdwLQrMKU5wJr2bOOFAXzKcTuqq+7fZlTMgG3SRfIYQ==} 543 | engines: {node: '>= 14'} 544 | dependencies: 545 | agent-base: 7.1.0 546 | debug: 4.3.4 547 | transitivePeerDependencies: 548 | - supports-color 549 | 550 | /http-proxy-agent@7.0.2: 551 | resolution: {integrity: sha512-T1gkAiYYDWYx3V5Bmyu7HcfcvL7mUrTWiM6yOfa3PIphViJ/gFPbvidQ+veqSOHci/PxBcDabeUNCzpOODJZig==} 552 | engines: {node: '>= 14'} 553 | dependencies: 554 | agent-base: 7.1.0 555 | debug: 4.3.4 556 | transitivePeerDependencies: 557 | - supports-color 558 | dev: true 559 | 560 | /https-proxy-agent@7.0.2: 561 | resolution: {integrity: sha512-NmLNjm6ucYwtcUmL7JQC1ZQ57LmHP4lT15FQ8D61nak1rO6DH+fz5qNK2Ap5UN4ZapYICE3/0KodcLYSPsPbaA==} 562 | engines: {node: '>= 14'} 563 | dependencies: 564 | agent-base: 7.1.0 565 | debug: 4.3.4 566 | transitivePeerDependencies: 567 | - supports-color 568 | 569 | /https-proxy-agent@7.0.4: 570 | resolution: {integrity: sha512-wlwpilI7YdjSkWaQ/7omYBMTliDcmCN8OLihO6I9B86g06lMyAoqgoDpV0XqoaPOKj+0DIdAvnsWfyAAhmimcg==} 571 | engines: {node: '>= 14'} 572 | dependencies: 573 | agent-base: 7.1.0 574 | debug: 4.3.4 575 | transitivePeerDependencies: 576 | - supports-color 577 | dev: true 578 | 579 | /ieee754@1.2.1: 580 | resolution: {integrity: sha512-dcyqhDvX1C46lXZcVqCpK+FtMRQVdIMN6/Df5js2zouUsqG7I6sFxitIC+7KYK29KdXOLHdu9zL4sFnoVQnqaA==} 581 | 582 | /import-fresh@3.3.0: 583 | resolution: {integrity: sha512-veYYhQa+D1QBKznvhUHxb8faxlrwUnxseDAbAp457E0wLNio2bOSKnjYDhMj+YiAq61xrMGhQk9iXVk5FzgQMw==} 584 | engines: {node: '>=6'} 585 | dependencies: 586 | parent-module: 1.0.1 587 | resolve-from: 4.0.0 588 | dev: true 589 | 590 | /ip@1.1.8: 591 | resolution: {integrity: sha512-PuExPYUiu6qMBQb4l06ecm6T6ujzhmh+MeJcW9wa89PoAz5pvd4zPgN5WJV104mb6S2T1AwNIAaB70JNrLQWhg==} 592 | 593 | /ip@2.0.0: 594 | resolution: {integrity: sha512-WKa+XuLG1A1R0UWhl2+1XQSi+fZWMsYKffMZTTYsiZaUD8k2yDAj5atimTUD2TZkyCkNEeYE5NhFZmupOGtjYQ==} 595 | 596 | /is-arrayish@0.2.1: 597 | resolution: {integrity: sha512-zz06S8t0ozoDXMG+ube26zeCTNXcKIPJZJi8hBrF4idCLms4CG9QtK7qBl1boi5ODzFpjswb5JPmHCbMpjaYzg==} 598 | dev: true 599 | 600 | /is-fullwidth-code-point@3.0.0: 601 | resolution: {integrity: sha512-zymm5+u+sCsSWyD9qNaejV3DFvhCKclKdizYaJUuHA83RLjb7nSuGnddCHGv0hk+KY7BMAlsWeK4Ueg6EV6XQg==} 602 | engines: {node: '>=8'} 603 | 604 | /js-tokens@4.0.0: 605 | resolution: {integrity: sha512-RdJUflcE3cUzKiMqQgsCu06FPu9UdIJO0beYbPhHN4k6apgJtifcoCtT9bcxOpYBtpD2kCM6Sbzg4CausW/PKQ==} 606 | dev: true 607 | 608 | /js-yaml@4.1.0: 609 | resolution: {integrity: sha512-wpxZs9NoxZaJESJGIZTyDEaYpl0FKSA+FB9aJiyemKhMwkxQg63h4T1KJgUGHpTqPDNRcmmYLugrRjJlBtWvRA==} 610 | hasBin: true 611 | dependencies: 612 | argparse: 2.0.1 613 | dev: true 614 | 615 | /json-parse-even-better-errors@2.3.1: 616 | resolution: {integrity: sha512-xyFwyhro/JEof6Ghe2iz2NcXoj2sloNsWr/XsERDK/oiPCfaNhl5ONfp+jQdAZRQQ0IJWNzH9zIZF7li91kh2w==} 617 | dev: true 618 | 619 | /jsonfile@4.0.0: 620 | resolution: {integrity: sha512-m6F1R3z8jjlf2imQHS2Qez5sjKWQzbuuhuJ/FKYFRZvPE3PuHcSMVZzfsLhGVOkfd20obL5SWEBew5ShlquNxg==} 621 | optionalDependencies: 622 | graceful-fs: 4.2.11 623 | 624 | /lines-and-columns@1.2.4: 625 | resolution: {integrity: sha512-7ylylesZQ/PV29jhEDl3Ufjo6ZX7gCqJr5F7PKrqc93v7fzSymt1BpwEU8nAUXs8qzzvqhbjhK5QZg6Mt/HkBg==} 626 | dev: true 627 | 628 | /lodash@4.17.21: 629 | resolution: {integrity: sha512-v2kDEe57lecTulaDIuNTPy3Ry4gLGJ6Z1O3vE1krgXZNrsQ+LFTGHVxVjcXPs17LhbZVGedAJv8XZ1tvj5FvSg==} 630 | dev: false 631 | 632 | /lru-cache@7.18.3: 633 | resolution: {integrity: sha512-jumlc0BIUrS3qJGgIkWZsyfAM7NCWiBcCDhnd+3NNM5KbBmLTgHVfWBcg6W+rLUsIpzpERPsvwUP7CckAQSOoA==} 634 | engines: {node: '>=12'} 635 | 636 | /mitt@3.0.1: 637 | resolution: {integrity: sha512-vKivATfr97l2/QBCYAkXYDbrIWPM2IIKEl7YPhjCvKlG3kE2gm+uBo6nEXK3M5/Ffh/FLpKExzOQ3JJoJGFKBw==} 638 | 639 | /mkdirp-classic@0.5.3: 640 | resolution: {integrity: sha512-gKLcREMhtuZRwRAfqP3RFW+TK4JqApVBtOIftVgjuABpAtpxhPGaDcfvbhNvD0B8iD1oUr/txX35NjcaY6Ns/A==} 641 | dev: false 642 | 643 | /ms@2.1.2: 644 | resolution: {integrity: sha512-sGkPx+VjMtmA6MX27oA4FBFELFCZZ4S4XqeGOXCv68tT+jb3vk/RyaKWP0PTKyWtmLSM0b+adUTEvbs1PEaH2w==} 645 | 646 | /netmask@2.0.2: 647 | resolution: {integrity: sha512-dBpDMdxv9Irdq66304OLfEmQ9tbNRFnFTuZiLo+bD+r332bBmMJ8GBLXklIXXgxd3+v9+KUnZaUR5PJMa75Gsg==} 648 | engines: {node: '>= 0.4.0'} 649 | 650 | /node-domexception@1.0.0: 651 | resolution: {integrity: sha512-/jKZoMpw0F8GRwl4/eLROPA3cfcXtLApP0QzLmUT/HuPCZWyB7IY9ZrMeKw2O/nFIqPQB3PVM9aYm0F312AXDQ==} 652 | engines: {node: '>=10.5.0'} 653 | dev: false 654 | 655 | /node-fetch@2.7.0: 656 | resolution: {integrity: sha512-c4FRfUm/dbcWZ7U+1Wq0AwCyFL+3nt2bEw05wfxSz+DWpWsitgmSgYmy2dQdWyKC1694ELPqMs/YzUSNozLt8A==} 657 | engines: {node: 4.x || >=6.0.0} 658 | peerDependencies: 659 | encoding: ^0.1.0 660 | peerDependenciesMeta: 661 | encoding: 662 | optional: true 663 | dependencies: 664 | whatwg-url: 5.0.0 665 | 666 | /node-fetch@3.3.2: 667 | resolution: {integrity: sha512-dRB78srN/l6gqWulah9SrxeYnxeddIG30+GOqK/9OlLVyLg3HPnr6SqOWTWOXKRwC2eGYCkZ59NNuSgvSrpgOA==} 668 | engines: {node: ^12.20.0 || ^14.13.1 || >=16.0.0} 669 | dependencies: 670 | data-uri-to-buffer: 4.0.1 671 | fetch-blob: 3.2.0 672 | formdata-polyfill: 4.0.10 673 | dev: false 674 | 675 | /nth-check@2.1.1: 676 | resolution: {integrity: sha512-lqjrjmaOoAnWfMmBPL+XNnynZh2+swxiX3WUE0s4yEHI6m+AwrK2UZOimIRl3X/4QctVqS8AiZjFqyOGrMXb/w==} 677 | dependencies: 678 | boolbase: 1.0.0 679 | dev: false 680 | 681 | /once@1.4.0: 682 | resolution: {integrity: sha512-lNaJgI+2Q5URQBkccEKHTQOPaXdUxnZZElQTZY0MFUAuaEqe1E+Nyvgdz/aIyNi6Z9MzO5dv1H8n58/GELp3+w==} 683 | dependencies: 684 | wrappy: 1.0.2 685 | 686 | /pac-proxy-agent@7.0.1: 687 | resolution: {integrity: sha512-ASV8yU4LLKBAjqIPMbrgtaKIvxQri/yh2OpI+S6hVa9JRkUI3Y3NPFbfngDtY7oFtSMD3w31Xns89mDa3Feo5A==} 688 | engines: {node: '>= 14'} 689 | dependencies: 690 | '@tootallnate/quickjs-emscripten': 0.23.0 691 | agent-base: 7.1.0 692 | debug: 4.3.4 693 | get-uri: 6.0.2 694 | http-proxy-agent: 7.0.0 695 | https-proxy-agent: 7.0.2 696 | pac-resolver: 7.0.0 697 | socks-proxy-agent: 8.0.2 698 | transitivePeerDependencies: 699 | - supports-color 700 | 701 | /pac-resolver@7.0.0: 702 | resolution: {integrity: sha512-Fd9lT9vJbHYRACT8OhCbZBbxr6KRSawSovFpy8nDGshaK99S/EBhVIHp9+crhxrsZOuvLpgL1n23iyPg6Rl2hg==} 703 | engines: {node: '>= 14'} 704 | dependencies: 705 | degenerator: 5.0.1 706 | ip: 1.1.8 707 | netmask: 2.0.2 708 | 709 | /parent-module@1.0.1: 710 | resolution: {integrity: sha512-GQ2EWRpQV8/o+Aw8YqtfZZPfNRWZYkbidE9k5rpl/hC3vtHHBfGm2Ifi6qWV+coDGkrUKZAxE3Lot5kcsRlh+g==} 711 | engines: {node: '>=6'} 712 | dependencies: 713 | callsites: 3.1.0 714 | dev: true 715 | 716 | /parse-json@5.2.0: 717 | resolution: {integrity: sha512-ayCKvm/phCGxOkYRSCM82iDwct8/EonSEgCSxWxD7ve6jHggsFl4fZVQBPRNgQoKiuV/odhFrGzQXZwbifC8Rg==} 718 | engines: {node: '>=8'} 719 | dependencies: 720 | '@babel/code-frame': 7.23.5 721 | error-ex: 1.3.2 722 | json-parse-even-better-errors: 2.3.1 723 | lines-and-columns: 1.2.4 724 | dev: true 725 | 726 | /parse5-htmlparser2-tree-adapter@7.0.0: 727 | resolution: {integrity: sha512-B77tOZrqqfUfnVcOrUvfdLbz4pu4RopLD/4vmu3HUPswwTA8OH0EMW9BlWR2B0RCoiZRAHEUu7IxeP1Pd1UU+g==} 728 | dependencies: 729 | domhandler: 5.0.3 730 | parse5: 7.1.2 731 | dev: false 732 | 733 | /parse5@7.1.2: 734 | resolution: {integrity: sha512-Czj1WaSVpaoj0wbhMzLmWD69anp2WH7FXMB9n1Sy8/ZFF9jolSQVMu1Ij5WIyGmcBmhk7EOndpO4mIpihVqAXw==} 735 | dependencies: 736 | entities: 4.5.0 737 | dev: false 738 | 739 | /pend@1.2.0: 740 | resolution: {integrity: sha512-F3asv42UuXchdzt+xXqfW1OGlVBe+mxa2mqI0pg5yAHZPvFmY3Y6drSf/GQ1A86WgWEN9Kzh/WrgKa6iGcHXLg==} 741 | 742 | /progress@2.0.3: 743 | resolution: {integrity: sha512-7PiHtLll5LdnKIMw100I+8xJXR5gW2QwWYkT6iJva0bXitZKa/XMrSbdmg3r2Xnaidz9Qumd0VPaMrZlF9V9sA==} 744 | engines: {node: '>=0.4.0'} 745 | 746 | /proxy-agent@6.3.1: 747 | resolution: {integrity: sha512-Rb5RVBy1iyqOtNl15Cw/llpeLH8bsb37gM1FUfKQ+Wck6xHlbAhWGUFiTRHtkjqGTA5pSHz6+0hrPW/oECihPQ==} 748 | engines: {node: '>= 14'} 749 | dependencies: 750 | agent-base: 7.1.0 751 | debug: 4.3.4 752 | http-proxy-agent: 7.0.0 753 | https-proxy-agent: 7.0.2 754 | lru-cache: 7.18.3 755 | pac-proxy-agent: 7.0.1 756 | proxy-from-env: 1.1.0 757 | socks-proxy-agent: 8.0.2 758 | transitivePeerDependencies: 759 | - supports-color 760 | dev: false 761 | 762 | /proxy-agent@6.4.0: 763 | resolution: {integrity: sha512-u0piLU+nCOHMgGjRbimiXmA9kM/L9EHh3zL81xCdp7m+Y2pHIsnmbdDoEDoAz5geaonNR6q6+yOPQs6n4T6sBQ==} 764 | engines: {node: '>= 14'} 765 | dependencies: 766 | agent-base: 7.1.0 767 | debug: 4.3.4 768 | http-proxy-agent: 7.0.2 769 | https-proxy-agent: 7.0.4 770 | lru-cache: 7.18.3 771 | pac-proxy-agent: 7.0.1 772 | proxy-from-env: 1.1.0 773 | socks-proxy-agent: 8.0.2 774 | transitivePeerDependencies: 775 | - supports-color 776 | dev: true 777 | 778 | /proxy-from-env@1.1.0: 779 | resolution: {integrity: sha512-D+zkORCbA9f1tdWRK0RaCR3GPv50cMxcrz4X8k5LTSUD1Dkw47mKJEZQNunItRTkWwgtaUSo1RVFRIG9ZXiFYg==} 780 | 781 | /pump@3.0.0: 782 | resolution: {integrity: sha512-LwZy+p3SFs1Pytd/jYct4wpv49HiYCqd9Rlc5ZVdk0V+8Yzv6jR5Blk3TRmPL1ft69TxP0IMZGJ+WPFU2BFhww==} 783 | dependencies: 784 | end-of-stream: 1.4.4 785 | once: 1.4.0 786 | 787 | /puppeteer-core@21.6.1: 788 | resolution: {integrity: sha512-0chaaK/RL9S1U3bsyR4fUeUfoj51vNnjWvXgG6DcsyMjwYNpLcAThv187i1rZCo7QhJP0wZN8plQkjNyrq2h+A==} 789 | engines: {node: '>=16.13.2'} 790 | dependencies: 791 | '@puppeteer/browsers': 1.9.0 792 | chromium-bidi: 0.5.1(devtools-protocol@0.0.1203626) 793 | cross-fetch: 4.0.0 794 | debug: 4.3.4 795 | devtools-protocol: 0.0.1203626 796 | ws: 8.15.1 797 | transitivePeerDependencies: 798 | - bufferutil 799 | - encoding 800 | - supports-color 801 | - utf-8-validate 802 | dev: false 803 | 804 | /puppeteer-core@22.1.0: 805 | resolution: {integrity: sha512-LdsQxslPf0Rpk6gLvkyyrraad2S4PUjGCT2CAKS2EnrRPpzIb6fsrFnoPNZxLlMkU7apU1g4Nf5wYePdSjxLkQ==} 806 | engines: {node: '>=18'} 807 | dependencies: 808 | '@puppeteer/browsers': 2.0.1 809 | chromium-bidi: 0.5.9(devtools-protocol@0.0.1232444) 810 | cross-fetch: 4.0.0 811 | debug: 4.3.4 812 | devtools-protocol: 0.0.1232444 813 | ws: 8.16.0 814 | transitivePeerDependencies: 815 | - bufferutil 816 | - encoding 817 | - supports-color 818 | - utf-8-validate 819 | dev: true 820 | 821 | /puppeteer@22.1.0(typescript@5.3.3): 822 | resolution: {integrity: sha512-suatHy6A48YkoykjrJNkJaixWVrvnPtzIgngK17V/P0MvgSyJzuu21PyR+0lWIK0cfZqKqmR8CHZvHzOCd4MIg==} 823 | engines: {node: '>=18'} 824 | hasBin: true 825 | requiresBuild: true 826 | dependencies: 827 | '@puppeteer/browsers': 2.0.1 828 | cosmiconfig: 9.0.0(typescript@5.3.3) 829 | puppeteer-core: 22.1.0 830 | transitivePeerDependencies: 831 | - bufferutil 832 | - encoding 833 | - supports-color 834 | - typescript 835 | - utf-8-validate 836 | dev: true 837 | 838 | /queue-tick@1.0.1: 839 | resolution: {integrity: sha512-kJt5qhMxoszgU/62PLP1CJytzd2NKetjSRnyuj31fDd3Rlcz3fzlFdFLD1SItunPwyqEOkca6GbV612BWfaBag==} 840 | 841 | /require-directory@2.1.1: 842 | resolution: {integrity: sha512-fGxEI7+wsG9xrvdjsrlmL22OMTTiHRwAMroiEeMgq8gzoLC/PQr7RsRDSTLUg/bZAZtF+TVIkHc6/4RIKrui+Q==} 843 | engines: {node: '>=0.10.0'} 844 | 845 | /resolve-from@4.0.0: 846 | resolution: {integrity: sha512-pb/MYmXstAkysRFx8piNI1tGFNQIFA3vkE3Gq4EuA1dF6gHp/+vgZqsCGJapvy8N3Q+4o7FwvquPJcnZ7RYy4g==} 847 | engines: {node: '>=4'} 848 | dev: true 849 | 850 | /smart-buffer@4.2.0: 851 | resolution: {integrity: sha512-94hK0Hh8rPqQl2xXc3HsaBoOXKV20MToPkcXvwbISWLEs+64sBq5kFgn2kJDHb1Pry9yrP0dxrCI9RRci7RXKg==} 852 | engines: {node: '>= 6.0.0', npm: '>= 3.0.0'} 853 | 854 | /socks-proxy-agent@8.0.2: 855 | resolution: {integrity: sha512-8zuqoLv1aP/66PHF5TqwJ7Czm3Yv32urJQHrVyhD7mmA6d61Zv8cIXQYPTWwmg6qlupnPvs/QKDmfa4P/qct2g==} 856 | engines: {node: '>= 14'} 857 | dependencies: 858 | agent-base: 7.1.0 859 | debug: 4.3.4 860 | socks: 2.7.1 861 | transitivePeerDependencies: 862 | - supports-color 863 | 864 | /socks@2.7.1: 865 | resolution: {integrity: sha512-7maUZy1N7uo6+WVEX6psASxtNlKaNVMlGQKkG/63nEDdLOWNbiUMoLK7X4uYoLhQstau72mLgfEWcXcwsaHbYQ==} 866 | engines: {node: '>= 10.13.0', npm: '>= 3.0.0'} 867 | dependencies: 868 | ip: 2.0.0 869 | smart-buffer: 4.2.0 870 | 871 | /source-map@0.6.1: 872 | resolution: {integrity: sha512-UjgapumWlbMhkBgzT7Ykc5YXUT46F0iKu8SGXq0bcwP5dz/h0Plj6enJqjz1Zbq2l5WaqYnrVbwWOWMyF3F47g==} 873 | engines: {node: '>=0.10.0'} 874 | requiresBuild: true 875 | optional: true 876 | 877 | /streamx@2.15.6: 878 | resolution: {integrity: sha512-q+vQL4AAz+FdfT137VF69Cc/APqUbxy+MDOImRrMvchJpigHj9GksgDU2LYbO9rx7RX6osWgxJB2WxhYv4SZAw==} 879 | dependencies: 880 | fast-fifo: 1.3.2 881 | queue-tick: 1.0.1 882 | 883 | /string-width@4.2.3: 884 | resolution: {integrity: sha512-wKyQRQpjJ0sIp62ErSZdGsjMJWsap5oRNihHhu6G7JVO/9jIB6UyevL+tXuOqrng8j/cxKTWyWUwvSTriiZz/g==} 885 | engines: {node: '>=8'} 886 | dependencies: 887 | emoji-regex: 8.0.0 888 | is-fullwidth-code-point: 3.0.0 889 | strip-ansi: 6.0.1 890 | 891 | /strip-ansi@6.0.1: 892 | resolution: {integrity: sha512-Y38VPSHcqkFrCpFnQ9vuSXmquuv5oXOKpGeT6aGrr3o3Gc9AlVa6JBfUSOCnbxGGZF+/0ooI7KrPuUSztUdU5A==} 893 | engines: {node: '>=8'} 894 | dependencies: 895 | ansi-regex: 5.0.1 896 | 897 | /supports-color@5.5.0: 898 | resolution: {integrity: sha512-QjVjwdXIt408MIiAqCX4oUKsgU2EqAGzs2Ppkm4aQYbjm+ZEWEcW4SfFNTr4uMNZma0ey4f5lgLrkB0aX0QMow==} 899 | engines: {node: '>=4'} 900 | dependencies: 901 | has-flag: 3.0.0 902 | dev: true 903 | 904 | /tar-fs@3.0.4: 905 | resolution: {integrity: sha512-5AFQU8b9qLfZCX9zp2duONhPmZv0hGYiBPJsyUdqMjzq/mqVpy/rEUSeHk1+YitmxugaptgBh5oDGU3VsAJq4w==} 906 | dependencies: 907 | mkdirp-classic: 0.5.3 908 | pump: 3.0.0 909 | tar-stream: 3.1.6 910 | dev: false 911 | 912 | /tar-fs@3.0.5: 913 | resolution: {integrity: sha512-JOgGAmZyMgbqpLwct7ZV8VzkEB6pxXFBVErLtb+XCOqzc6w1xiWKI9GVd6bwk68EX7eJ4DWmfXVmq8K2ziZTGg==} 914 | dependencies: 915 | pump: 3.0.0 916 | tar-stream: 3.1.6 917 | optionalDependencies: 918 | bare-fs: 2.1.5 919 | bare-path: 2.1.0 920 | dev: true 921 | 922 | /tar-stream@3.1.6: 923 | resolution: {integrity: sha512-B/UyjYwPpMBv+PaFSWAmtYjwdrlEaZQEhMIBFNC5oEG8lpiW8XjcSdmEaClj28ArfKScKHs2nshz3k2le6crsg==} 924 | dependencies: 925 | b4a: 1.6.4 926 | fast-fifo: 1.3.2 927 | streamx: 2.15.6 928 | 929 | /through@2.3.8: 930 | resolution: {integrity: sha512-w89qg7PI8wAdvX60bMDP+bFoD5Dvhm9oLheFp5O4a2QF0cSBGsBX4qZmadPMvVqlLJBBci+WqGGOAPvcDeNSVg==} 931 | 932 | /tr46@0.0.3: 933 | resolution: {integrity: sha512-N3WMsuqV66lT30CrXNbEjx4GEwlow3v6rr4mCcv6prnfwhS01rkgyFdjPNBYd9br7LpXV1+Emh01fHnq2Gdgrw==} 934 | 935 | /tslib@2.6.2: 936 | resolution: {integrity: sha512-AEYxH93jGFPn/a2iVAwW87VuUIkR1FVUKB77NwMF7nBTDkDrrT/Hpt/IrCJ0QXhW27jTBDcf5ZY7w6RiqTMw2Q==} 937 | 938 | /typescript@5.3.3: 939 | resolution: {integrity: sha512-pXWcraxM0uxAS+tN0AG/BF2TyqmHO014Z070UsJ+pFvYuRSq8KH8DmWpnbXe0pEPDHXZV3FcAbJkijJ5oNEnWw==} 940 | engines: {node: '>=14.17'} 941 | hasBin: true 942 | dev: true 943 | 944 | /unbzip2-stream@1.4.3: 945 | resolution: {integrity: sha512-mlExGW4w71ebDJviH16lQLtZS32VKqsSfk80GCfUlwT/4/hNRFsoscrF/c++9xinkMzECL1uL9DDwXqFWkruPg==} 946 | dependencies: 947 | buffer: 5.7.1 948 | through: 2.3.8 949 | 950 | /undici-types@5.26.5: 951 | resolution: {integrity: sha512-JlCMO+ehdEIKqlFxk6IfVoAUVmgz7cU7zD/h9XZ0qzeosSHmUJVOzSQvvYSYWXkFXC+IfLKSIffhv0sVZup6pA==} 952 | requiresBuild: true 953 | 954 | /universalify@0.1.2: 955 | resolution: {integrity: sha512-rBJeI5CXAlmy1pV+617WB9J63U6XcazHHF2f2dbJix4XzpUF0RS3Zbj0FGIOCAva5P/d/GBOYaACQ1w+0azUkg==} 956 | engines: {node: '>= 4.0.0'} 957 | 958 | /urlpattern-polyfill@10.0.0: 959 | resolution: {integrity: sha512-H/A06tKD7sS1O1X2SshBVeA5FLycRpjqiBeqGKmBwBDBy28EnRjORxTNe269KSSr5un5qyWi1iL61wLxpd+ZOg==} 960 | dev: true 961 | 962 | /urlpattern-polyfill@9.0.0: 963 | resolution: {integrity: sha512-WHN8KDQblxd32odxeIgo83rdVDE2bvdkb86it7bMhYZwWKJz0+O0RK/eZiHYnM+zgt/U7hAHOlCQGfjjvSkw2g==} 964 | dev: false 965 | 966 | /web-streams-polyfill@3.3.3: 967 | resolution: {integrity: sha512-d2JWLCivmZYTSIoge9MsgFCZrt571BikcWGYkjC1khllbTeDlGqZ2D8vD8E/lJa8WGWbb7Plm8/XJYV7IJHZZw==} 968 | engines: {node: '>= 8'} 969 | dev: false 970 | 971 | /webidl-conversions@3.0.1: 972 | resolution: {integrity: sha512-2JAn3z8AR6rjK8Sm8orRC0h/bcl/DqL7tRPdGZ4I1CjdF+EaMLmYxBHyXuKL849eucPFhvBoxMsflfOb8kxaeQ==} 973 | 974 | /whatwg-url@5.0.0: 975 | resolution: {integrity: sha512-saE57nupxk6v3HY35+jzBwYa0rKSy0XR8JSxZPwgLr7ys0IBzhGviA1/TUGJLmSVqs8pb9AnvICXEuOHLprYTw==} 976 | dependencies: 977 | tr46: 0.0.3 978 | webidl-conversions: 3.0.1 979 | 980 | /wrap-ansi@7.0.0: 981 | resolution: {integrity: sha512-YVGIj2kamLSTxw6NsZjoBxfSwsn0ycdesmc4p+Q21c5zPuZ1pl+NfxVdxPtdHvmNVOQ6XSYG4AUtyt/Fi7D16Q==} 982 | engines: {node: '>=10'} 983 | dependencies: 984 | ansi-styles: 4.3.0 985 | string-width: 4.2.3 986 | strip-ansi: 6.0.1 987 | 988 | /wrappy@1.0.2: 989 | resolution: {integrity: sha512-l4Sp/DRseor9wL6EvV2+TuQn63dMkPjZ/sp9XkghTEbV9KlPS1xUsZ3u7/IQO4wxtcFB4bgpQPRcR3QCvezPcQ==} 990 | 991 | /ws@8.15.1: 992 | resolution: {integrity: sha512-W5OZiCjXEmk0yZ66ZN82beM5Sz7l7coYxpRkzS+p9PP+ToQry8szKh+61eNktr7EA9DOwvFGhfC605jDHbP6QQ==} 993 | engines: {node: '>=10.0.0'} 994 | peerDependencies: 995 | bufferutil: ^4.0.1 996 | utf-8-validate: '>=5.0.2' 997 | peerDependenciesMeta: 998 | bufferutil: 999 | optional: true 1000 | utf-8-validate: 1001 | optional: true 1002 | dev: false 1003 | 1004 | /ws@8.16.0: 1005 | resolution: {integrity: sha512-HS0c//TP7Ina87TfiPUz1rQzMhHrl/SG2guqRcTOIUYD2q8uhUdNHZYJUaQ8aTGPzCh+c6oawMKW35nFl1dxyQ==} 1006 | engines: {node: '>=10.0.0'} 1007 | peerDependencies: 1008 | bufferutil: ^4.0.1 1009 | utf-8-validate: '>=5.0.2' 1010 | peerDependenciesMeta: 1011 | bufferutil: 1012 | optional: true 1013 | utf-8-validate: 1014 | optional: true 1015 | dev: true 1016 | 1017 | /y18n@5.0.8: 1018 | resolution: {integrity: sha512-0pfFzegeDWJHJIAmTLRP2DwHjdF5s7jo9tuztdQxAhINCdvS+3nGINqPd00AphqJR/0LhANUS6/+7SCb98YOfA==} 1019 | engines: {node: '>=10'} 1020 | 1021 | /yargs-parser@21.1.1: 1022 | resolution: {integrity: sha512-tVpsJW7DdjecAiFpbIB1e3qxIQsE6NoPc5/eTdrbbIC4h0LVsWhnoa3g+m2HclBIujHzsxZ4VJVA+GUuc2/LBw==} 1023 | engines: {node: '>=12'} 1024 | 1025 | /yargs@17.7.2: 1026 | resolution: {integrity: sha512-7dSzzRQ++CKnNI/krKnYRV7JKKPUXMEh61soaHKg9mrWEhzFWhFnxPxGl+69cD1Ou63C13NUPCnmIcrvqCuM6w==} 1027 | engines: {node: '>=12'} 1028 | dependencies: 1029 | cliui: 8.0.1 1030 | escalade: 3.1.1 1031 | get-caller-file: 2.0.5 1032 | require-directory: 2.1.1 1033 | string-width: 4.2.3 1034 | y18n: 5.0.8 1035 | yargs-parser: 21.1.1 1036 | 1037 | /yauzl@2.10.0: 1038 | resolution: {integrity: sha512-p4a9I6X6nu6IhoGmBqAcbJy1mlC4j27vEPZX9F4L4/vZT3Lyq1VkFHw/V/PUcB9Buo+DG3iHkT0x3Qya58zc3g==} 1039 | dependencies: 1040 | buffer-crc32: 0.2.13 1041 | fd-slicer: 1.1.0 1042 | -------------------------------------------------------------------------------- /public/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | Scraping Service 5 | 6 | 7 | 8 | Scraping Service 9 | 10 | 11 | -------------------------------------------------------------------------------- /test/app.js: -------------------------------------------------------------------------------- 1 | 'use strict' 2 | 3 | const test = require('tape') 4 | const request = require('supertest') 5 | const async = require('async') 6 | 7 | test('Test /api/dom', function (assert) { 8 | const app = require('../app/app') 9 | async.waterfall([ 10 | (cb) => request(app).get('/api/dom?url=https://news.ycombinator.com&selector=.title+a').expect(200, cb), 11 | (results, cb) => { assert.ok(results.body, 'Returned list'); cb(null, results) }, 12 | (results, cb) => { assert.ok(results.body.results[0].count > 0, '1+ items returned'); cb(null, results) } 13 | ], 14 | (err, results) => { 15 | if (err) console.error(err) 16 | assert.end() 17 | } 18 | ) 19 | }) 20 | 21 | test('Test /api/metadata', function (assert) { 22 | const app = require('../app/app') 23 | async.waterfall([ 24 | (cb) => request(app).get('/api/meta?url=https://www.weld.io').expect(200, cb), 25 | (results, cb) => { assert.ok(results.body, 'Returned results'); cb(null, results) }, 26 | (results, cb) => { assert.ok(results.body.general.title.indexOf('Weld') !== -1, 'Page contains “Weld”'); cb(null, results) } 27 | ], 28 | (err, results) => { 29 | if (err) console.error(err) 30 | assert.end() 31 | } 32 | ) 33 | }) 34 | 35 | test('Test /api/page', function (assert) { 36 | const app = require('../app/app') 37 | async.waterfall([ 38 | (cb) => request(app).get('/api/page?url=http://www.tomsoderlund.com').expect(200, cb), 39 | (results, cb) => { assert.ok(results.body, 'Returned results'); cb(null, results) }, 40 | (results, cb) => { assert.ok(results.body.length > 12000, 'Page size is more than 12000 characters'); cb(null, results) }, 41 | (results, cb) => { assert.ok(results.body.content.indexOf('Tom Söderlund\'s website') !== -1, 'Page content contains “Tom Söderlund\'s website”'); cb(null, results) } 42 | ], 43 | (err, results) => { 44 | if (err) console.error(err) 45 | assert.end() 46 | } 47 | ) 48 | }) 49 | 50 | // https://github.com/substack/tape 51 | 52 | /* 53 | test.skip(name, cb) 54 | test.onFinish(fn) 55 | test.only(name, cb) 56 | test.createStream().pipe(process.stdout); 57 | test.createStream({ objectMode: true }).on('data', function (row) { console.log(JSON.stringify(row)) }); 58 | 59 | assert.plan(n) 60 | assert.end(err) 61 | assert.fail(msg) 62 | assert.pass(msg) 63 | assert.timeoutAfter(ms) 64 | assert.skip(msg) 65 | assert.ok(value, msg) 66 | assert.notOk(value, msg) 67 | assert.error(err, msg) 68 | assert.equal(actual, expected, msg) 69 | assert.notEqual(actual, expected, msg) 70 | assert.deepEqual(actual, expected, msg) 71 | assert.notDeepEqual(actual, expected, msg) 72 | assert.deepLooseEqual(actual, expected, msg) 73 | assert.notDeepLooseEqual(actual, expected, msg) 74 | assert.throws(fn, expected, msg) 75 | assert.doesNotThrow(fn, expected, msg) 76 | assert.test(name, [opts], cb) 77 | assert.comment(message) 78 | */ 79 | -------------------------------------------------------------------------------- /test/server.js: -------------------------------------------------------------------------------- 1 | // Just a test server for development, not used by Vercel hosting 2 | const { createServer } = require('http') 3 | 4 | const PORT = process.env.PORT || 3036 5 | 6 | // dom-simple/dom/image/meta/page – See /app/controllers/api folder 7 | const controller = process.env.API || 'dom-simple' 8 | createServer(require(`../api/${controller}`)).listen(PORT, () => console.log(`scraping-service:${controller} running on http://localhost:${PORT}, NODE_ENV: ${process.env.NODE_ENV}`)) 9 | -------------------------------------------------------------------------------- /vercel.json: -------------------------------------------------------------------------------- 1 | { 2 | "headers": [ 3 | { 4 | "source": "/api/(.*)", 5 | "headers": [ 6 | { "key": "Access-Control-Allow-Credentials", "value": "true" }, 7 | { "key": "Access-Control-Allow-Origin", "value": "*" }, 8 | { "key": "Access-Control-Allow-Methods", "value": "GET,OPTIONS,PATCH,DELETE,POST,PUT" }, 9 | { "key": "Access-Control-Allow-Headers", "value": "X-CSRF-Token, X-Requested-With, Accept, Accept-Version, Content-Length, Content-MD5, Content-Type, Date, X-Api-Version" } 10 | ] 11 | } 12 | ], 13 | "functions": { 14 | "api/**/*": { 15 | "maxDuration": 60 16 | } 17 | } 18 | } --------------------------------------------------------------------------------