├── .editorconfig ├── .github └── workflows │ └── publish.yml ├── .gitignore ├── LICENSE ├── README.md ├── extension ├── background.ts ├── content.ts ├── eval.ts ├── manifest.json ├── popup.html └── popup.ts ├── nodejs ├── Browser.ts ├── PuppeteerNode.ts └── index.ts ├── package.json ├── tsconfig.ext.json ├── tsconfig.json └── tsconfig.node.json /.editorconfig: -------------------------------------------------------------------------------- 1 | root = true 2 | 3 | [*] 4 | indent_style = tab 5 | 6 | [*.md] 7 | indent_style = space 8 | indent_size = 2 9 | -------------------------------------------------------------------------------- /.github/workflows/publish.yml: -------------------------------------------------------------------------------- 1 | name: publish 2 | 3 | on: 4 | push: 5 | tags: 6 | - 'v*' 7 | 8 | jobs: 9 | publish: 10 | runs-on: ubuntu-latest 11 | steps: 12 | - uses: actions/checkout@v2 13 | - uses: actions/setup-node@v2 14 | with: 15 | node-version: '15.x' 16 | registry-url: 'https://registry.npmjs.org' 17 | 18 | - run: npm install 19 | - run: npm run build 20 | 21 | - name: Bump package.json 22 | run: | 23 | sudo apt-get install jq 24 | jq ". + {version: \"${GITHUB_REF##*/v}\"}" package.json > dist/nodejs/package.json 25 | 26 | - name: Publish on NPM 27 | run: | 28 | cd dist/nodejs 29 | npm publish 30 | env: 31 | NODE_AUTH_TOKEN: ${{ secrets.NPM_TOKEN }} 32 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | /dist/ 2 | /node_modules/ 3 | package-lock.json 4 | yarn.lock 5 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2021 maxwrlr 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Puppeteer Extension 2 | 3 | [![npm puppeteer-extension package](https://img.shields.io/npm/v/puppeteer-extension.svg)](https://npmjs.org/package/puppeteer-extension) 4 | 5 | Puppeteer Extension is software used for browser automation. Its main goal is to support the API 6 | of [puppeteer](https://github.com/puppeteer/puppeteer), while 3rd party websites cannot detect browser automation ( 7 | especially with Chromium). It works by using the default browser with GUI (that normal users use), which has the browser 8 | extension of this repository installed, that communicates to the controller - a NodeJS/express app. 9 | 10 | Since the browser will be run without telling it, that it will be automated, a real headless mode isn't supported. I 11 | personally use it with Xvfb and Chromium on a RaspberryPI. 12 | 13 | ## Installation 14 | 15 | ### Browser Extension 16 | 17 | > **Note:** Since there is no UI yet, you first might want to configure the URL of the middleware 18 | in [extension/background.ts](extension/background.ts) to connect to the NodeJS app. 19 | 20 | 1. Compile the extension: `npm run build:extension` 21 | 2. Open [chrome://extensions/](chrome://extensions/). 22 | 3. Enable developer mode. 23 | 4. Click "Load unpacked" (recommended for Chrome) or "Pack extension" (recommended for Chromium). 24 | 5. Choose the `dist/extension` directory that was created in step 1 as extension root. 25 | 6. *If extension was packed in step 4:* Drag and Drop the created `.crx` file into Chromium. 26 | 27 | ### NodeJS 28 | 29 | `npm install puppeteer-extension` 30 | 31 | ## Usage 32 | 33 | Create the browser connector and add the middleware to an express server. Not perfect at all, but a quick solution. 34 | 35 | ```typescript 36 | import puppeteer from 'puppeteer-extension'; 37 | import * as express from 'express'; 38 | 39 | // server for communication between browser extension and NodeJS 40 | const app = express(); 41 | app.use(express.json()); 42 | const server = app.listen(8088); 43 | 44 | puppeteer.launch({ 45 | executablePath: '/path/to/chrome-or-chromium' 46 | }).then(async browser => { 47 | // Register communication endpoint 48 | app.use('/api/bridge/puppeteer', browser.middleware()); 49 | 50 | // do something with puppeteer 51 | const page = await browser.newPage(); 52 | await page.goto('https://example.com'); 53 | 54 | const content = await page.evaluate('document.documentElement.innerHTML'); 55 | if(content.includes(' { 8 | if(msg.topic === '@/polls') { 9 | sendResponse('Last poll: ' + time); 10 | } 11 | }); 12 | 13 | async function main() { 14 | let data = null; 15 | for(; ;) { 16 | try { 17 | time = Date.now(); 18 | data = await pollTask(data); 19 | if(data === false) { 20 | break; 21 | } 22 | } catch(err) { 23 | data = { error: err }; 24 | console.debug(err) 25 | await new Promise(r => setTimeout(r, 10_000)); 26 | } 27 | } 28 | } 29 | 30 | async function pollTask(data: any) { 31 | const task = await fetch(url, { 32 | method: 'POST', 33 | headers: { 34 | 'Content-Type': 'application/json' 35 | }, 36 | body: JSON.stringify(data || {}) 37 | }).then(r => r.json()); 38 | 39 | if(!task) { 40 | return false; 41 | } 42 | 43 | switch(task.name) { 44 | case 'Browser.newPage': { 45 | const tab = await new Promise(r => chrome.tabs.create({}, r)); 46 | return { 47 | payload: tab.id 48 | }; 49 | } 50 | case 'Browser.pages': { 51 | const window = await new Promise(r => chrome.windows.getLastFocused(r)); 52 | const tabs = await new Promise(r => chrome.tabs.getAllInWindow(window.id, r as any)); 53 | return { 54 | payload: tabs.filter(t => typeof t.id === 'number').map(t => t.id) 55 | }; 56 | } 57 | 58 | case 'Page.goto': { 59 | await chrome.tabs.update(task.ref, { 60 | url: task.args[0] 61 | }); 62 | return {}; 63 | } 64 | case 'Page.screenshot': { 65 | const tab = await new Promise(r => 66 | chrome.tabs.update(task.ref, { active: true }, r) 67 | ); 68 | if(!tab) { 69 | return { 70 | error: `Failed to execute ${task.name}: Tab was not found.` 71 | }; 72 | } 73 | 74 | const dataURL = await new Promise(r => 75 | chrome.tabs.captureVisibleTab(tab.windowId, { 76 | format: task.args[0]?.type, 77 | quality: task.args[0]?.quality 78 | }, r) 79 | ); 80 | return { 81 | payload: dataURL.replace(/^.*?,/, '') 82 | }; 83 | } 84 | case 'Page.close': { 85 | await new Promise(r => setTimeout(r, 1000)); 86 | await chrome.tabs.remove(task.ref); 87 | return {}; 88 | } 89 | default: { 90 | let i = 0, response; 91 | await new Promise(r => setTimeout(r, 2500)); 92 | while(response === undefined) { 93 | const tab = await new Promise(r => chrome.tabs.get(task.ref, r)); 94 | response = tab.status === 'complete' ? await new Promise(res => { 95 | chrome.tabs.sendMessage(task.ref, { 96 | topic: 'execute', 97 | payload: task 98 | }, res); 99 | }) : undefined; 100 | 101 | if(chrome.runtime.lastError) { 102 | return {}; 103 | } 104 | 105 | if(response === undefined) { 106 | if(++i === 10) { 107 | break; 108 | } else { 109 | await new Promise(r => setTimeout(r, 500)); 110 | } 111 | } 112 | } 113 | return response; 114 | } 115 | } 116 | } 117 | -------------------------------------------------------------------------------- /extension/content.ts: -------------------------------------------------------------------------------- 1 | chrome.runtime.onMessage.addListener((msg, sender, sendResponse) => { 2 | if(msg.topic !== 'execute') { 3 | return; 4 | } 5 | 6 | const task = msg.payload; 7 | switch(task.name) { 8 | case 'Page.click': { 9 | const element = document.querySelector(task.args[0]); 10 | if(element) { 11 | element.click(); 12 | sendResponse(); 13 | } else { 14 | sendResponse({ 15 | error: 'Failed to execute Page.click: No element found for "' + task.args[0] + '"' 16 | }); 17 | } 18 | return; 19 | } 20 | 21 | case 'Page.evaluate': { 22 | const div = document.createElement('div'); 23 | div.innerText = task.args[0]; 24 | document.body.appendChild(div); 25 | 26 | const script = document.createElement('script'); 27 | script.src = chrome.runtime.getURL('eval.js'); 28 | script.addEventListener('load', () => { 29 | sendResponse({ 30 | payload: JSON.parse(div.innerText) 31 | }); 32 | 33 | script.remove(); 34 | div.remove(); 35 | }); 36 | 37 | document.head.appendChild(script); 38 | return true; 39 | } 40 | 41 | case 'Page.waitForNavigation': { 42 | sendResponse(); 43 | return; 44 | } 45 | 46 | default: { 47 | sendResponse({ 48 | error: `${task.name} is not Implemented!` 49 | }); 50 | return; 51 | } 52 | } 53 | }); 54 | -------------------------------------------------------------------------------- /extension/eval.ts: -------------------------------------------------------------------------------- 1 | (function() { 2 | const div = document.body.lastElementChild! as HTMLElement; 3 | div.innerText = JSON.stringify(eval(div.innerText)); 4 | }()); 5 | -------------------------------------------------------------------------------- /extension/manifest.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "Puppeteer Extension", 3 | "description": "Automate your browser without allowing websites to detect automated control.", 4 | "version": "0.3.0", 5 | "manifest_version": 2, 6 | "browser_action": { 7 | "default_popup": "popup.html" 8 | }, 9 | "background": { 10 | "scripts": [ 11 | "background.js" 12 | ] 13 | }, 14 | "content_scripts": [ 15 | { 16 | "run_at": "document_end", 17 | "matches": [ 18 | "*://*/*" 19 | ], 20 | "js": [ 21 | "content.js" 22 | ] 23 | } 24 | ], 25 | "web_accessible_resources": [ 26 | "eval.js" 27 | ], 28 | "permissions": [ 29 | "", 30 | "activeTab", 31 | "storage" 32 | ] 33 | } 34 | -------------------------------------------------------------------------------- /extension/popup.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | Puppeteer Extension 6 | 7 | 8 | 9 | 10 | 11 |
12 | 13 | 14 | -------------------------------------------------------------------------------- /extension/popup.ts: -------------------------------------------------------------------------------- 1 | document.addEventListener('DOMContentLoaded', () => { 2 | chrome.runtime.sendMessage({ topic: '@/polls' }, msg => { 3 | (document.body.lastElementChild! as HTMLElement).innerText = msg; 4 | }); 5 | }); 6 | -------------------------------------------------------------------------------- /nodejs/Browser.ts: -------------------------------------------------------------------------------- 1 | import * as express from 'express'; 2 | import {Page} from 'puppeteer'; 3 | import {ChildProcess} from 'child_process'; 4 | 5 | interface IPoll { 6 | timeout: NodeJS.Timeout; 7 | request: express.Request; 8 | response: express.Response; 9 | } 10 | 11 | interface ITask { 12 | /** 13 | * An ID of an object, that should be the target of the action. 14 | */ 15 | ref?: any; 16 | 17 | name: string; 18 | args: any[]; 19 | resolve: (value: any) => void; 20 | reject: (reason?: any) => void; 21 | } 22 | 23 | export class Browser { 24 | private readonly _process?: ChildProcess; 25 | private readonly _pageProxy: ProxyHandler<{ id: number }>; 26 | 27 | // required for communication 28 | private _scheduledTasks: ITask[] = []; 29 | private _activeTask: ITask | null = null; 30 | private _activePoll: IPoll | null = null; 31 | 32 | constructor(process?: ChildProcess) { 33 | this._process = process; 34 | this._pageProxy = { 35 | get: (target, prop: string) => { 36 | if(prop === 'then') { 37 | return undefined; 38 | } else { 39 | return (...args: any) => this._execute(target.id, `Page.${prop}`, args); 40 | } 41 | } 42 | }; 43 | } 44 | 45 | middleware(): express.Handler { 46 | return (request, response) => { 47 | if(request.headers['content-type'] !== 'application/json') { 48 | return response.status(400).send(`Invalid 'Content-Type': expected 'application/json'.`); 49 | } 50 | 51 | // only one poller is allowed 52 | if(this._activePoll) { 53 | return response.status(503).send('Somebody is already polling.'); 54 | } 55 | 56 | // handle response of active task 57 | const msg = request.body; 58 | if(this._activeTask) { 59 | if(msg.error) { 60 | this._activeTask.reject(new Error(msg.error)); 61 | } else { 62 | this._activeTask.resolve(msg.payload); 63 | } 64 | this._activeTask = null; 65 | } 66 | 67 | // check if another task is planned 68 | if(this._scheduledTasks.length) { 69 | // mark task as active and send back for handling 70 | const task = this._scheduledTasks.shift()!; 71 | this._activeTask = task; 72 | response.send(task); 73 | return; 74 | } 75 | 76 | // if no task is planned yet, keep browser in line for notification. 77 | this._activePoll = { 78 | request, 79 | response, 80 | timeout: setTimeout(() => { 81 | response.send(null); 82 | this._activePoll = null; 83 | }, 30_000) 84 | }; 85 | }; 86 | } 87 | 88 | private _execute(ref: any, name: string, args: any[] = []): Promise { 89 | return new Promise((resolve, reject) => { 90 | const task = { 91 | ref, 92 | name, 93 | args, 94 | resolve, 95 | reject 96 | }; 97 | 98 | // schedule or start task 99 | if(this._activePoll) { 100 | clearTimeout(this._activePoll.timeout); 101 | this._activePoll.response.send(task); 102 | this._activeTask = task; 103 | this._activePoll = null; 104 | } else { 105 | this._scheduledTasks.push(task); 106 | } 107 | }); 108 | } 109 | 110 | private _createPageProxy(id: number): Page { 111 | return new Proxy({ id }, this._pageProxy) as any; 112 | } 113 | 114 | async newPage(): Promise { 115 | const pageId = await this._execute(null, 'Browser.newPage'); 116 | return this._createPageProxy(pageId); 117 | } 118 | 119 | async pages(): Promise { 120 | const pages = await this._execute(null, 'Browser.pages'); 121 | return pages.map(id => this._createPageProxy(id)); 122 | } 123 | 124 | close() { 125 | if(this._process) { 126 | this._process.kill(); 127 | } 128 | } 129 | } 130 | -------------------------------------------------------------------------------- /nodejs/PuppeteerNode.ts: -------------------------------------------------------------------------------- 1 | import {Browser} from './Browser'; 2 | import {ChildProcess, execFile} from 'child_process'; 3 | import type {LaunchOptions} from 'puppeteer'; 4 | 5 | /** 6 | * Promise-based function for starting the process. 7 | */ 8 | function execBrowserAsync(path: string): Promise { 9 | return new Promise((resolve, reject) => { 10 | const process = execFile(path) 11 | .once('spawn', () => { 12 | // started successfully. don't care about runtime errors 13 | process.off('error', reject); 14 | resolve(process); 15 | }) 16 | .once('error', reject); 17 | }); 18 | } 19 | 20 | export class PuppeteerNode { 21 | /** 22 | * Create a browser instance. A browser process will only be started if `options.executablePath` was defined. 23 | * @param options - Only `options.executablePath` works. 24 | */ 25 | async launch(options?: LaunchOptions): Promise { 26 | let process: ChildProcess | undefined = undefined; 27 | if(options?.executablePath) { 28 | process = await execBrowserAsync(options.executablePath); 29 | } 30 | 31 | return new Browser(process); 32 | } 33 | } 34 | -------------------------------------------------------------------------------- /nodejs/index.ts: -------------------------------------------------------------------------------- 1 | import {PuppeteerNode} from './PuppeteerNode'; 2 | 3 | export default new PuppeteerNode(); 4 | export {Browser} from './Browser'; 5 | export type {Page} from 'puppeteer'; 6 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "puppeteer-extension", 3 | "description": "Automate your browser without allowing websites to detect automated control.", 4 | "version": "", 5 | "author": "maxwrlr", 6 | "license": "MIT", 7 | "main": "index.js", 8 | "types": "index.d.ts", 9 | "repository": "https://github.com/maxwrlr/puppeteer-extension", 10 | "scripts": { 11 | "build": "npm run build:extension && npm run build:node", 12 | "build:extension": "tsc --project tsconfig.ext.json && cp extension/manifest.json dist/extension/ && cp extension/popup.html dist/extension/", 13 | "build:node": "tsc --project tsconfig.node.json && cp package.json dist/nodejs/ && cp README.md dist/nodejs/ && cp LICENSE dist/nodejs/", 14 | "watch:extension": "tsc --project tsconfig.ext.json --watch", 15 | "test": "ts-node test/test.ts" 16 | }, 17 | "keywords": [ 18 | "automation", 19 | "chrome", 20 | "chromium", 21 | "puppeteer", 22 | "scraper" 23 | ], 24 | "devDependencies": { 25 | "@types/chrome": "0.0.136", 26 | "@types/express": "^4.17.13", 27 | "express": "^4.17.1", 28 | "puppeteer": "^10.1.0", 29 | "ts-node": "^10.1.0", 30 | "typescript": "^4.3.5" 31 | } 32 | } 33 | -------------------------------------------------------------------------------- /tsconfig.ext.json: -------------------------------------------------------------------------------- 1 | { 2 | "extends": "./tsconfig.json", 3 | "compilerOptions": { 4 | "types": [ 5 | "chrome" 6 | ], 7 | "lib": [ 8 | "ESNext", 9 | "DOM" 10 | ] 11 | }, 12 | "include": [ 13 | "extension/**/*" 14 | ] 15 | } 16 | -------------------------------------------------------------------------------- /tsconfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "compilerOptions": { 3 | "module": "CommonJS", 4 | "moduleResolution": "Node", 5 | "target": "ES6", 6 | "rootDir": ".", 7 | "outDir": "dist", 8 | "strict": true, 9 | "skipLibCheck": true, 10 | "lib": [ 11 | "ESNext" 12 | ] 13 | }, 14 | "references": [ 15 | { 16 | "path": "./tsconfig.ext.json" 17 | }, 18 | { 19 | "path": "./tsconfig.node.json" 20 | } 21 | ] 22 | } 23 | -------------------------------------------------------------------------------- /tsconfig.node.json: -------------------------------------------------------------------------------- 1 | { 2 | "extends": "./tsconfig.json", 3 | "compilerOptions": { 4 | "declaration": true 5 | }, 6 | "include": [ 7 | "nodejs/**/*" 8 | ] 9 | } 10 | --------------------------------------------------------------------------------