├── .gitignore ├── test ├── test.env ├── expected-facebook-videos.log ├── expected-instagram.log ├── expected-facebook-page.log ├── expected-facebook-photos.log ├── expected-twitter.log └── expected-twitter-logged-in.log ├── index.ts ├── tsconfig.eslint.json ├── tsconfig.json ├── src ├── site │ ├── index.ts │ ├── youtube.ts │ ├── telegram.ts │ ├── tiktok.ts │ ├── instagram.ts │ ├── twitter.ts │ └── facebook.ts ├── autoclick.ts ├── lib │ ├── behavior.ts │ └── utils.ts ├── autoplay.ts ├── autoscroll.ts ├── autofetcher.ts └── index.ts ├── .github ├── workflows │ ├── npm-release.yaml │ ├── lint_and_build.yaml │ ├── make-draft-release.yaml │ ├── autoplay-vimeo.yaml │ ├── autoplay-youtube.yaml │ ├── autoscroll.yaml │ ├── twitter.yaml │ ├── instagram.yaml │ ├── facebook-page.yaml │ ├── facebook-photos.yaml │ ├── facebook-videos.yaml │ └── twitter-logged-in.yaml └── ISSUE_TEMPLATE │ ├── new-behavior-request.md │ └── behavior-bug.md ├── webpack.config.js ├── docs ├── dsl.ts └── TUTORIAL.md ├── package.json ├── scripts └── test-harness.mjs ├── .eslintrc.cjs ├── README.md └── LICENSE.md /.gitignore: -------------------------------------------------------------------------------- 1 | node_modules/ 2 | dist 3 | -------------------------------------------------------------------------------- /test/test.env: -------------------------------------------------------------------------------- 1 | BROWSERTRIX_IMAGE=webrecorder/browsertrix-crawler:latest 2 | -------------------------------------------------------------------------------- /index.ts: -------------------------------------------------------------------------------- 1 | import { BehaviorManager } from "./src"; 2 | export { BehaviorManager }; 3 | -------------------------------------------------------------------------------- /tsconfig.eslint.json: -------------------------------------------------------------------------------- 1 | { 2 | "compilerOptions": { 3 | "noEmit": true 4 | }, 5 | "extends": "./tsconfig.json", 6 | "include": ["**/*.ts", "**/*.js", ".*.js", "**/*.cjs"], 7 | "exclude": ["__generated__", "__mocks__", "dist"] 8 | } 9 | -------------------------------------------------------------------------------- /test/expected-facebook-videos.log: -------------------------------------------------------------------------------- 1 | {"state":{},"msg":"Starting..."} 2 | {"state":{"videos":1,"comments":0},"msg":"Viewing video: https://www.facebook.com/105559135004406/videos/117689630323556"} 3 | {"state":{"videos":1,"comments":0},"msg":"done!"} 4 | -------------------------------------------------------------------------------- /tsconfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "compilerOptions": { 3 | "module": "esnext", 4 | "moduleResolution": "node", 5 | "removeComments": true, 6 | "preserveConstEnums": true, 7 | "allowJs": true, 8 | "checkJs": true, 9 | "target": "es2022", 10 | "lib": ["es2022", "dom", "dom.iterable"], 11 | "outDir": "./dist/" 12 | }, 13 | "files": ["index.ts"], 14 | "include": ["src/**/*"], 15 | "exclude": ["node_modules", "**/*.spec.ts"] 16 | } 17 | -------------------------------------------------------------------------------- /test/expected-instagram.log: -------------------------------------------------------------------------------- 1 | {"msg":"Loading single post view for first post: /p/CMtilvmsXzm/","state":{"posts":0,"slides":0,"rows":0,"comments":0}} 2 | {"msg":"Loading Row","state":{"posts":0,"slides":0,"rows":1,"comments":0}} 3 | {"msg":"Loading Post: https://www.instagram.com/capturesample/","state":{"posts":1,"slides":0,"rows":1,"comments":0}} 4 | {"msg":"Loaded Comments","state":{"posts":1,"slides":0,"rows":1,"comments":1}} 5 | {"msg":"done!","state":{"posts":1,"slides":0,"rows":1,"comments":1}} 6 | -------------------------------------------------------------------------------- /src/site/index.ts: -------------------------------------------------------------------------------- 1 | import { FacebookTimelineBehavior } from "./facebook"; 2 | import { InstagramPostsBehavior } from "./instagram"; 3 | import { TelegramBehavior } from "./telegram"; 4 | import { TwitterTimelineBehavior } from "./twitter"; 5 | import { TikTokVideoBehavior, TikTokProfileBehavior } from "./tiktok"; 6 | 7 | const siteBehaviors = [ 8 | InstagramPostsBehavior, 9 | TwitterTimelineBehavior, 10 | FacebookTimelineBehavior, 11 | TelegramBehavior, 12 | TikTokVideoBehavior, 13 | TikTokProfileBehavior, 14 | ]; 15 | 16 | export default siteBehaviors; 17 | -------------------------------------------------------------------------------- /src/site/youtube.ts: -------------------------------------------------------------------------------- 1 | import { AutoScroll } from "../autoscroll"; 2 | 3 | export class YoutubeBehavior extends AutoScroll { 4 | override async awaitPageLoad(ctx: any) { 5 | const { sleep, assertContentValid } = ctx.Lib; 6 | await sleep(10); 7 | assertContentValid(() => { 8 | const video = document.querySelector("video"); 9 | const paused = video && video.paused; 10 | if (paused) { 11 | return false; 12 | } 13 | return document.documentElement.outerHTML.indexOf("not a bot") === -1; 14 | }, "no_video_playing"); 15 | } 16 | } 17 | -------------------------------------------------------------------------------- /.github/workflows/npm-release.yaml: -------------------------------------------------------------------------------- 1 | name: Publish Package to npmjs 2 | on: 3 | release: 4 | types: [published] 5 | 6 | jobs: 7 | build-release: 8 | runs-on: ubuntu-latest 9 | steps: 10 | - uses: actions/checkout@v3 11 | - uses: actions/setup-node@v3 12 | with: 13 | node-version: "22.x" 14 | registry-url: "https://registry.npmjs.org" 15 | cache: "yarn" 16 | 17 | - name: Yarn Install 18 | run: yarn install --frozen-lockfile 19 | 20 | - name: Yarn Build 21 | run: yarn run build 22 | 23 | - run: npm publish 24 | env: 25 | NODE_AUTH_TOKEN: ${{ secrets.NPM_TOKEN }} 26 | -------------------------------------------------------------------------------- /.github/workflows/lint_and_build.yaml: -------------------------------------------------------------------------------- 1 | name: Lint Check 2 | 3 | on: [push] 4 | 5 | jobs: 6 | lint: 7 | runs-on: ubuntu-latest 8 | 9 | strategy: 10 | matrix: 11 | node-version: [22.x] 12 | 13 | steps: 14 | - uses: actions/checkout@v3 15 | - name: Use Node.js ${{ matrix.node-version }} 16 | uses: actions/setup-node@v3 17 | with: 18 | node-version: ${{ matrix.node-version }} 19 | - name: Install Requirements 20 | run: yarn install 21 | - name: Lint 22 | run: yarn lint:check 23 | - name: Check Formatting 24 | run: yarn format:check 25 | - name: Build 26 | run: yarn build 27 | -------------------------------------------------------------------------------- /.github/workflows/make-draft-release.yaml: -------------------------------------------------------------------------------- 1 | name: Generate Draft Release 2 | 3 | on: 4 | push: 5 | branches: 6 | - main 7 | - "*-release" 8 | 9 | jobs: 10 | package_chart: 11 | runs-on: ubuntu-latest 12 | 13 | steps: 14 | - name: Check out Git repository 15 | uses: actions/checkout@v3 16 | 17 | - name: Get Version 18 | run: | 19 | echo "version=$(jq -r .version package.json)" >> "$GITHUB_ENV" 20 | 21 | - name: Make Draft Release 22 | uses: softprops/action-gh-release@v1 23 | with: 24 | name: "Browsertrix Behaviors v${{ env.version }}" 25 | tag_name: v${{ env.version }} 26 | draft: true 27 | -------------------------------------------------------------------------------- /test/expected-facebook-page.log: -------------------------------------------------------------------------------- 1 | {"msg":"Starting...","state":{}} 2 | {"msg":"Viewing post ","state":{"posts":1,"comments":0,"videos":0}} 3 | {"msg":"Loading comments","state":{"posts":1,"comments":1,"videos":0}} 4 | {"msg":"Loading comments","state":{"posts":1,"comments":2,"videos":0}} 5 | {"msg":"Viewing post ","state":{"posts":2,"comments":2,"videos":0}} 6 | {"msg":"Playing inline video","state":{"posts":2,"comments":2,"videos":1}} 7 | {"msg":"Viewing post ","state":{"posts":3,"comments":2,"videos":1}} 8 | {"msg":"Loading comments","state":{"posts":3,"comments":3,"videos":1}} 9 | {"msg":"Loading comments","state":{"posts":3,"comments":4,"videos":1}} 10 | {"msg":"done!","state":{"posts":3,"comments":4,"videos":1}} 11 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/new-behavior-request.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: New Behavior Request 3 | about: Suggest a New Behavior 4 | title: "New Behavior Request For: " 5 | labels: enhancement 6 | assignees: "" 7 | --- 8 | 9 | **What website should this before be applied to?** 10 | 11 | **Is this a well known site that often needs to be archived?** 12 | If not, please explain who may benefit from automation for this site 13 | 14 | **Describe in detail what the automated behavior should do. Please be as specific as possible.** 15 | 16 | **Do you have the ability/bandwidth to contribute a pull request to implement this behavior?** 17 | If you are able to help in other ways, please describe here as well. 18 | 19 | **Additional context** 20 | Add any other context or screenshots about the new behavior request here. 21 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/behavior-bug.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Behavior Bug 3 | about: Let us know when a behavior isn't working as expected. 4 | title: Behavior Bug 5 | labels: bug 6 | assignees: "" 7 | --- 8 | 9 | **URL** 10 | Enter URL where you're running the behavior. 11 | 12 | **Describe the bug** 13 | Describe what is happening when you run Autopilot. 14 | 15 | Note: The expected functionality can be found on [Behaviors Overview](https://archiveweb.page/guide/features/behaviors). 16 | If the status of one of the test is failing, that may be related and a similar issue. 17 | 18 | **What tool do you see this issue in?** 19 | 20 | - [ ] ArchiveWeb.page Extension 21 | - [ ] ArchiveWeb.page App 22 | - [ ] Browsertrix Crawler 23 | - [ ] Other/custom 24 | 25 | **Screenshots** 26 | If applicable, add screenshots to help explain your problem. 27 | 28 | **Additional context** 29 | Add any other context about the problem here. 30 | -------------------------------------------------------------------------------- /test/expected-facebook-photos.log: -------------------------------------------------------------------------------- 1 | {"state":{},"msg":"Starting..."} 2 | {"state":{"photos":1,"comments":0},"msg":"Viewing photo https://www.facebook.com/Capture-Sample-105559135004406/photos/105560261670960"} 3 | {"state":{"photos":1,"comments":1},"msg":"Loading comments"} 4 | {"state":{"photos":1,"comments":2},"msg":"Loading comments"} 5 | {"state":{"photos":2,"comments":2},"msg":"Viewing photo https://www.facebook.com/Capture-Sample-105559135004406/photos/105560188337634"} 6 | {"state":{"photos":3,"comments":2},"msg":"Viewing photo https://www.facebook.com/Capture-Sample-105559135004406/photos/105560165004303"} 7 | {"state":{"photos":3,"comments":3},"msg":"Loading comments"} 8 | {"state":{"photos":3,"comments":4},"msg":"Loading comments"} 9 | {"state":{"photos":4,"comments":4},"msg":"Viewing photo https://www.facebook.com/Capture-Sample-105559135004406/photos/105560131670973"} 10 | {"state":{"photos":4,"comments":4},"msg":"done!"} 11 | -------------------------------------------------------------------------------- /.github/workflows/autoplay-vimeo.yaml: -------------------------------------------------------------------------------- 1 | name: "Autoplay Behavior: Vimeo Embed" 2 | 3 | on: 4 | push: 5 | pull_request: 6 | schedule: 7 | - cron: "12 16 * * *" 8 | 9 | jobs: 10 | autoscroll_test: 11 | runs-on: ubuntu-latest 12 | 13 | strategy: 14 | matrix: 15 | node-version: [22.x] 16 | 17 | steps: 18 | - uses: actions/checkout@v3 19 | - name: Use Node.js ${{ matrix.node-version }} 20 | uses: actions/setup-node@v3 21 | with: 22 | node-version: ${{ matrix.node-version }} 23 | 24 | - name: install requirements 25 | run: yarn install 26 | 27 | - name: build behaviors 28 | run: yarn run build 29 | 30 | - uses: c-py/action-dotenv-to-setenv@v2 31 | with: 32 | env-file: test/test.env 33 | 34 | - name: get browsertrix image 35 | run: docker pull $BROWSERTRIX_IMAGE 36 | 37 | - name: run crawl 38 | run: docker run -v $PWD/dist/behaviors.js:/app/node_modules/browsertrix-behaviors/dist/behaviors.js -v $PWD/crawls:/crawls $BROWSERTRIX_IMAGE crawl --url https://oembed.link/https://vimeo.com/1084537 --limit 1 --generateCDX --collection test 39 | 40 | - name: check mp4 capture 41 | run: grep '"video/mp4"' ./crawls/collections/test/indexes/index.cdxj 42 | -------------------------------------------------------------------------------- /.github/workflows/autoplay-youtube.yaml: -------------------------------------------------------------------------------- 1 | name: "Autoplay Behavior: YouTube Embed" 2 | 3 | on: 4 | push: 5 | pull_request: 6 | schedule: 7 | - cron: "10 16 * * *" 8 | 9 | jobs: 10 | autoscroll_test: 11 | runs-on: ubuntu-latest 12 | 13 | strategy: 14 | matrix: 15 | node-version: [22.x] 16 | 17 | steps: 18 | - uses: actions/checkout@v3 19 | - name: Use Node.js ${{ matrix.node-version }} 20 | uses: actions/setup-node@v3 21 | with: 22 | node-version: ${{ matrix.node-version }} 23 | 24 | - name: install requirements 25 | run: yarn install 26 | 27 | - name: build behaviors 28 | run: yarn run build 29 | 30 | - uses: c-py/action-dotenv-to-setenv@v2 31 | with: 32 | env-file: test/test.env 33 | 34 | - name: get browsertrix image 35 | run: docker pull $BROWSERTRIX_IMAGE 36 | 37 | - name: run crawl 38 | run: docker run -v $PWD/dist/behaviors.js:/app/node_modules/browsertrix-behaviors/dist/behaviors.js -v $PWD/crawls:/crawls $BROWSERTRIX_IMAGE crawl --url https://oembed.link/https://www.youtube.com/watch?v=aT-Up5Y4uRI --limit 1 --generateCDX --collection test 39 | 40 | - name: check mp4 capture 41 | run: grep '"video/mp4"' ./crawls/collections/test/indexes/index.cdxj 42 | -------------------------------------------------------------------------------- /webpack.config.js: -------------------------------------------------------------------------------- 1 | /*eslint-env node */ 2 | 3 | const webpack = require("webpack"); 4 | const TerserPlugin = require("terser-webpack-plugin"); 5 | 6 | const path = require("path"); 7 | 8 | const tsConfig = (_env, argv) => { 9 | return { 10 | mode: argv.mode, 11 | plugins: [ 12 | new webpack.BannerPlugin( 13 | `behaviors.js is part of Webrecorder project. Copyright (C) 2021-${new Date().getFullYear()}, Webrecorder Software. Licensed under the Affero General Public License v3.`, 14 | ), 15 | new webpack.ProgressPlugin(), 16 | new webpack.optimize.LimitChunkCountPlugin({ maxChunks: 1 }), 17 | ], 18 | entry: "./index.ts", 19 | module: { 20 | rules: [ 21 | { 22 | test: /\.tsx?$/, 23 | use: "ts-loader", 24 | exclude: /node_modules/, 25 | }, 26 | ], 27 | }, 28 | resolve: { 29 | extensions: [".tsx", ".ts", ".js"], 30 | }, 31 | output: { 32 | filename: "behaviors.js", 33 | path: path.resolve(__dirname, "dist"), 34 | }, 35 | optimization: { 36 | minimize: true, 37 | minimizer: [ 38 | new TerserPlugin({ 39 | extractComments: false, 40 | }), 41 | ], 42 | }, 43 | }; 44 | }; 45 | 46 | // module.exports = jsConfig; 47 | module.exports = tsConfig; 48 | -------------------------------------------------------------------------------- /test/expected-twitter.log: -------------------------------------------------------------------------------- 1 | {"msg":"Capturing thread: https://twitter.com/CaptureSample","state":{"tweets":0,"images":0,"videos":0,"threads":1}} 2 | {"msg":"Loading Image: https://twitter.com/CaptureSample/status/1369531770962550788/photo/1","state":{"tweets":1,"images":1,"videos":0,"threads":1}} 3 | {"msg":"Loading Image: https://twitter.com/CaptureSample/status/1369531770962550788/photo/2","state":{"tweets":1,"images":2,"videos":0,"threads":1}} 4 | {"msg":"Loading Image: https://twitter.com/CaptureSample/status/1369531770962550788/photo/3","state":{"tweets":1,"images":3,"videos":0,"threads":1}} 5 | {"msg":"Loading Image: https://twitter.com/CaptureSample/status/1369531770962550788/photo/4","state":{"tweets":1,"images":4,"videos":0,"threads":1}} 6 | {"msg":"Waiting for media playback for https://twitter.com/CaptureSample/status/1369492279585501184 to finish","state":{"tweets":4,"images":4,"videos":1,"threads":1}} 7 | {"msg":"Loading video for https://twitter.com/CaptureSample/status/1369487726710431750","state":{"tweets":7,"images":4,"videos":2,"threads":1}} 8 | {"msg":"Loading Image: https://twitter.com/CaptureSample/status/1369486018714345477/photo/1","state":{"tweets":9,"images":5,"videos":2,"threads":1}} 9 | {"msg":"Loading video for https://twitter.com/CaptureSample/status/1369487726710431750","state":{"tweets":15,"images":5,"videos":3,"threads":1}} 10 | -------------------------------------------------------------------------------- /test/expected-twitter-logged-in.log: -------------------------------------------------------------------------------- 1 | {"msg":"Capturing thread: https://twitter.com/CaptureSample","state":{"tweets":0,"images":0,"videos":0,"threads":1}} 2 | {"msg":"Loading Image: https://twitter.com/CaptureSample/status/1369531770962550788/photo/1","state":{"tweets":1,"images":1,"videos":0,"threads":1}} 3 | {"msg":"Loading Image: https://twitter.com/CaptureSample/status/1369531770962550788/photo/2","state":{"tweets":1,"images":2,"videos":0,"threads":1}} 4 | {"msg":"Loading Image: https://twitter.com/CaptureSample/status/1369531770962550788/photo/3","state":{"tweets":1,"images":3,"videos":0,"threads":1}} 5 | {"msg":"Loading Image: https://twitter.com/CaptureSample/status/1369531770962550788/photo/4","state":{"tweets":1,"images":4,"videos":0,"threads":1}} 6 | {"msg":"Waiting for media playback for https://twitter.com/CaptureSample/status/1369492279585501184 to finish","state":{"tweets":4,"images":4,"videos":1,"threads":1}} 7 | {"msg":"Loading video for https://twitter.com/CaptureSample/status/1369487726710431750","state":{"tweets":7,"images":4,"videos":2,"threads":1}} 8 | {"msg":"Loading Image: https://twitter.com/CaptureSample/status/1369486018714345477/photo/1","state":{"tweets":9,"images":5,"videos":2,"threads":1}} 9 | {"msg":"Loading video for https://twitter.com/CaptureSample/status/1369487726710431750","state":{"tweets":15,"images":5,"videos":3,"threads":1}} 10 | -------------------------------------------------------------------------------- /.github/workflows/autoscroll.yaml: -------------------------------------------------------------------------------- 1 | name: Autoscroll Behavior 2 | 3 | on: 4 | push: 5 | pull_request: 6 | schedule: 7 | - cron: "10 16 * * *" 8 | 9 | jobs: 10 | autoscroll_test: 11 | runs-on: ubuntu-latest 12 | 13 | strategy: 14 | matrix: 15 | node-version: [22.x] 16 | 17 | steps: 18 | - uses: actions/checkout@v3 19 | - name: Use Node.js ${{ matrix.node-version }} 20 | uses: actions/setup-node@v3 21 | with: 22 | node-version: ${{ matrix.node-version }} 23 | 24 | - name: install requirements 25 | run: yarn install 26 | 27 | - name: build behaviors 28 | run: yarn run build 29 | 30 | - uses: c-py/action-dotenv-to-setenv@v2 31 | with: 32 | env-file: test/test.env 33 | 34 | - name: get browsertrix image 35 | run: docker pull $BROWSERTRIX_IMAGE 36 | 37 | - name: run crawl 38 | run: docker run -v $PWD/dist/behaviors.js:/app/node_modules/browsertrix-behaviors/dist/behaviors.js $BROWSERTRIX_IMAGE crawl --url https://www.iana.org/numbers --limit 1 --logging debug --context behaviorScript --behaviors autoscroll > ./log 39 | 40 | - name: check for autoscroll debug log line 41 | run: grep 'Skipping autoscroll, page seems to not be responsive to scrolling events' ./log 42 | 43 | - name: check that state is logged as well 44 | run: grep '{"state":{"segments":1}' ./log 45 | -------------------------------------------------------------------------------- /docs/dsl.ts: -------------------------------------------------------------------------------- 1 | import { XPath, Behavior } from "../lib/dsl"; 2 | 3 | // Queries 4 | 5 | const CommentListContainer = (q: XPath) => 6 | q.descendant("div").where(q.attr("class").contains("CommentListContainer")); 7 | 8 | const CommentItemContainer = (q: XPath) => 9 | q.descendant("div").where(q.attr("class").contains("CommentItemContainer")); 10 | 11 | const ViewMoreReplies = (q: XPath) => 12 | q.descendant("p").where(q.attr("class").contains("ReplyActionText")); 13 | 14 | const ViewMoreThread = (q: XPath) => 15 | q.descendant("p").where(q.attr("data-e2e").startsWith("view-more")); 16 | 17 | // Behaviors 18 | 19 | const viewThread = (bx: Behavior) => 20 | bx.scrollIntoView().yield("View thread", "threads"); 21 | 22 | const expandThread = (bx: Behavior) => 23 | bx 24 | .findOne(ViewMoreReplies, { timeout: 1000 }) 25 | .yield("Expand thread", "expandedThreads") 26 | .scrollIntoView() 27 | .wait(500) 28 | .click(); 29 | 30 | const crawlThread = (bx: Behavior) => 31 | bx 32 | .findOne(ViewMoreThread, { timeout: 1000 }) 33 | .yield("View more replies", "replies") 34 | .scrollIntoView() 35 | .wait(500) 36 | .click() 37 | .chain(crawlThread); 38 | 39 | export default (bx: Behavior) => 40 | bx 41 | .findOne(CommentListContainer) 42 | .yield("Iterating Commentlist") 43 | .iterateMatches( 44 | CommentItemContainer, 45 | (bx: Behavior) => bx.chain(viewThread, expandThread, crawlThread), 46 | { waitForMore: 2000 }, 47 | ); 48 | -------------------------------------------------------------------------------- /.github/workflows/twitter.yaml: -------------------------------------------------------------------------------- 1 | name: Twitter Behavior 2 | 3 | on: 4 | push: 5 | pull_request: 6 | schedule: 7 | - cron: "10 16 * * *" 8 | 9 | jobs: 10 | twitter_test: 11 | runs-on: ubuntu-latest 12 | 13 | strategy: 14 | matrix: 15 | node-version: [22.x] 16 | 17 | steps: 18 | - uses: actions/checkout@v3 19 | - name: Use Node.js ${{ matrix.node-version }} 20 | uses: actions/setup-node@v3 21 | with: 22 | node-version: ${{ matrix.node-version }} 23 | 24 | - name: install requirements 25 | run: yarn install 26 | 27 | - name: build behaviors 28 | run: yarn run build 29 | 30 | - uses: c-py/action-dotenv-to-setenv@v2 31 | with: 32 | env-file: test/test.env 33 | 34 | - name: get browsertrix image 35 | run: docker pull $BROWSERTRIX_IMAGE 36 | 37 | - name: run crawl 38 | run: docker run -v $PWD/dist/behaviors.js:/app/node_modules/browsertrix-behaviors/dist/behaviors.js $BROWSERTRIX_IMAGE crawl --url https://twitter.com/CaptureSample --limit 1 --logging behaviors > ./log 39 | 40 | - name: cat log 41 | run: cat ./log 42 | 43 | - uses: sergeysova/jq-action@v2 44 | name: compare crawl log to expected 45 | with: 46 | cmd: cat log 47 | | jq -Rr -c 'fromjson? | .details | select(type == "object") | {msg,state} | select(.msg != null) | select(.msg | contains("?cxt") | not)' 48 | | diff - ./test/expected-twitter.log 49 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "browsertrix-behaviors", 3 | "version": "0.9.7", 4 | "main": "index.js", 5 | "author": "Webrecorder Software", 6 | "license": "AGPL-3.0-or-later", 7 | "devDependencies": { 8 | "@typescript-eslint/eslint-plugin": "^8.28.0", 9 | "@typescript-eslint/parser": "^8.28.0", 10 | "@webpack-cli/init": "^1.1.3", 11 | "eslint": "^8.56.0", 12 | "eslint-config-prettier": "^9.1.0", 13 | "memfs": "^4.17.0", 14 | "prettier": "^3.6.2", 15 | "puppeteer": "^24.7.2", 16 | "ts-loader": "^9.4.2", 17 | "typescript": "^5.7.3", 18 | "webpack": "^5.75.0", 19 | "webpack-cli": "^4.5.0", 20 | "webpack-dev-server": "^3.11.2" 21 | }, 22 | "scripts": { 23 | "build": "webpack --mode production", 24 | "build-dev": "webpack --mode development", 25 | "build-dev-copy": "webpack --mode development && cat ./dist/behaviors.js | pbcopy", 26 | "watch": "webpack watch --mode production", 27 | "watch-dev": "webpack watch --mode development", 28 | "lint": "eslint --fix index.ts ./src/**/*.ts webpack.config.js", 29 | "lint:check": "eslint index.ts ./src/**/*.ts webpack.config.js", 30 | "format": "prettier --write . webpack.config.js", 31 | "format:check": "prettier --check . webpack.config.js", 32 | "test": "node ./scripts/test-harness.mjs" 33 | }, 34 | "description": "Browsertrix Behaviors", 35 | "files": [ 36 | "src", 37 | "index.js", 38 | "dist/" 39 | ], 40 | "dependencies": { 41 | "query-selector-shadow-dom": "^1.0.1" 42 | } 43 | } 44 | -------------------------------------------------------------------------------- /scripts/test-harness.mjs: -------------------------------------------------------------------------------- 1 | import puppeteer from "puppeteer"; 2 | import Webpack from "webpack"; 3 | import { fs } from "memfs"; 4 | 5 | import webpackConfig from "../webpack.config.js"; 6 | 7 | /** 8 | * Validate a URL 9 | * @param {URL} url 10 | * @returns {boolean} 11 | */ 12 | const validateUrl = (url) => { 13 | try { 14 | return new URL(url); 15 | } catch (_e) { 16 | return false; 17 | } 18 | }; 19 | 20 | if (!process.argv[2]) { 21 | console.error("Usage: yarn test ''"); 22 | process.exit(1); 23 | } 24 | 25 | if (!validateUrl(process.argv[2])) { 26 | console.error("Invalid URL (hint: include http:// or https://)"); 27 | process.exit(1); 28 | } 29 | 30 | const config = webpackConfig({}, { mode: "development" }); 31 | 32 | const compiler = Webpack(config); 33 | compiler.outputFileSystem = fs; 34 | 35 | const browser = await puppeteer.launch({ headless: false, devtools: true }); 36 | const page = await browser.newPage(); 37 | 38 | const _watching = compiler.watch({}, async (err, stats) => { 39 | if (err) { 40 | console.error(err); 41 | console.error("Not opening browser"); 42 | return; 43 | } 44 | console.log( 45 | stats.toString({ 46 | colors: true, 47 | preset: "summary", 48 | }), 49 | ); 50 | const behaviorScript = fs.readFileSync("dist/behaviors.js", "utf8"); 51 | 52 | await page.goto(validateUrl(process.argv[2])); 53 | 54 | await page.evaluate( 55 | behaviorScript + 56 | ` 57 | self.__bx_behaviors.init({ 58 | autofetch: true, 59 | autoplay: true, 60 | autoscroll: true, 61 | siteSpecific: true, 62 | }); 63 | `, 64 | ); 65 | 66 | // call and await run on top frame and all child iframes 67 | await Promise.allSettled( 68 | page 69 | .frames() 70 | .map(async (frame) => frame.evaluate("self.__bx_behaviors.run()")), 71 | ); 72 | }); 73 | -------------------------------------------------------------------------------- /src/site/telegram.ts: -------------------------------------------------------------------------------- 1 | const Q = { 2 | telegramContainer: 3 | "//main//section[@class='tgme_channel_history js-message_history']", 4 | postId: "string(./div[@data-post]/@data-post)", 5 | linkExternal: 6 | "string(.//a[@class='tgme_widget_message_link_preview' and @href]/@href)", 7 | }; 8 | 9 | export class TelegramBehavior { 10 | static id = "Telegram"; 11 | 12 | static isMatch() { 13 | return !!window.location.href.match(/https:\/\/t.me\/s\/\w[\w]+/); 14 | } 15 | 16 | static init() { 17 | return { 18 | state: { messages: 0 }, 19 | }; 20 | } 21 | 22 | async waitForPrev(ctx, child) { 23 | if (!child) { 24 | return null; 25 | } 26 | 27 | await ctx.Lib.sleep(ctx.Lib.waitUnit * 5); 28 | 29 | if (!child.previousElementSibling) { 30 | return null; 31 | } 32 | 33 | return child.previousElementSibling; 34 | } 35 | 36 | async *run(ctx) { 37 | const { 38 | getState, 39 | scrollIntoView, 40 | sleep, 41 | waitUnit, 42 | xpathNode, 43 | xpathString, 44 | } = ctx.Lib; 45 | const root = xpathNode(Q.telegramContainer); 46 | 47 | if (!root) { 48 | return; 49 | } 50 | 51 | let child = root.lastElementChild; 52 | 53 | while (child) { 54 | scrollIntoView(child); 55 | 56 | const postId = xpathString(Q.postId, child) || "unknown"; 57 | 58 | const linkUrl = xpathString(Q.linkExternal, child); 59 | 60 | if (linkUrl?.endsWith(".jpg") || linkUrl.endsWith(".png")) { 61 | yield getState(ctx, "Loading External Image: " + linkUrl); 62 | const image = new Image(); 63 | image.src = linkUrl; 64 | document.body.appendChild(image); 65 | await sleep(waitUnit * 2.5); 66 | document.body.removeChild(image); 67 | } 68 | 69 | yield getState(ctx, "Loading Message: " + postId, "messages"); 70 | 71 | child = await this.waitForPrev(ctx, child); 72 | } 73 | } 74 | } 75 | -------------------------------------------------------------------------------- /.github/workflows/instagram.yaml: -------------------------------------------------------------------------------- 1 | name: "Instagram Behavior (Logged In)" 2 | 3 | on: 4 | push: 5 | pull_request: 6 | schedule: 7 | - cron: "15 16 * * *" 8 | 9 | jobs: 10 | instagram_test: 11 | environment: DigitalOcean 12 | runs-on: ubuntu-latest 13 | 14 | strategy: 15 | matrix: 16 | node-version: [22.x] 17 | 18 | steps: 19 | - uses: actions/checkout@v3 20 | - name: Use Node.js ${{ matrix.node-version }} 21 | uses: actions/setup-node@v3 22 | with: 23 | node-version: ${{ matrix.node-version }} 24 | 25 | - uses: prewk/s3-cp-action@v1 26 | env: 27 | AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }} 28 | AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }} 29 | AWS_S3_ENDPOINT: ${{ secrets.AWS_S3_ENDPOINT }} 30 | SOURCE: ${{ secrets.PROFILE_URL }} 31 | DEST: "./profile.tar.gz" 32 | 33 | - name: install requirements 34 | run: yarn install 35 | 36 | - name: build behaviors 37 | run: yarn run build 38 | 39 | - uses: c-py/action-dotenv-to-setenv@v2 40 | with: 41 | env-file: test/test.env 42 | 43 | - name: get browsertrix image 44 | run: docker pull $BROWSERTRIX_IMAGE 45 | 46 | - name: run crawl 47 | run: docker run -v $PWD/dist/behaviors.js:/app/node_modules/browsertrix-behaviors/dist/behaviors.js -v $PWD/profile.tar.gz:/profile.tar.gz $BROWSERTRIX_IMAGE crawl --url https://instagram.com/capturesample --limit 1 --logging behaviors --profile /profile.tar.gz > ./log 48 | 49 | - name: cat log 50 | run: cat ./log 51 | 52 | - uses: sergeysova/jq-action@v2 53 | name: compare crawl log to expected 54 | with: 55 | cmd: > 56 | cat log 57 | | jq -Rr -c 'fromjson? | .details | select(type == "object") | {msg,state} | select(.msg != null)' 58 | | diff - ./test/expected-instagram.log 59 | -------------------------------------------------------------------------------- /.github/workflows/facebook-page.yaml: -------------------------------------------------------------------------------- 1 | name: "Facebook Behavior: Page (Owner Logged In)" 2 | 3 | on: 4 | push: 5 | pull_request: 6 | schedule: 7 | - cron: "20 16 * * *" 8 | 9 | jobs: 10 | facebook_page: 11 | environment: DigitalOcean 12 | runs-on: ubuntu-latest 13 | 14 | strategy: 15 | matrix: 16 | node-version: [22.x] 17 | 18 | steps: 19 | - uses: actions/checkout@v3 20 | - name: Use Node.js ${{ matrix.node-version }} 21 | uses: actions/setup-node@v3 22 | with: 23 | node-version: ${{ matrix.node-version }} 24 | 25 | - uses: prewk/s3-cp-action@v1 26 | env: 27 | AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }} 28 | AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }} 29 | AWS_S3_ENDPOINT: ${{ secrets.AWS_S3_ENDPOINT }} 30 | SOURCE: ${{ secrets.PROFILE_URL }} 31 | DEST: "./profile.tar.gz" 32 | 33 | - name: install requirements 34 | run: yarn install 35 | 36 | - name: build behaviors 37 | run: yarn run build 38 | 39 | - uses: c-py/action-dotenv-to-setenv@v2 40 | with: 41 | env-file: test/test.env 42 | 43 | - name: get browsertrix image 44 | run: docker pull $BROWSERTRIX_IMAGE 45 | 46 | - name: run crawl 47 | run: docker run -v $PWD/dist/behaviors.js:/app/node_modules/browsertrix-behaviors/dist/behaviors.js -v $PWD/profile.tar.gz:/profile.tar.gz $BROWSERTRIX_IMAGE crawl --url https://www.facebook.com/Capture-Sample-105559135004406 --limit 1 --logging behaviors --profile /profile.tar.gz > ./log 48 | 49 | - name: cat log 50 | run: cat ./log 51 | 52 | - uses: sergeysova/jq-action@v2 53 | name: compare crawl log to expected 54 | with: 55 | cmd: > 56 | cat log 57 | | jq -Rr -c 'fromjson? | .details | select(type == "object") | {msg,state} | select(.msg != null)' 58 | | diff - ./test/expected-facebook-page.log 59 | -------------------------------------------------------------------------------- /.github/workflows/facebook-photos.yaml: -------------------------------------------------------------------------------- 1 | name: "Facebook Behavior: Page Photos (Owner Logged In)" 2 | 3 | on: 4 | push: 5 | pull_request: 6 | schedule: 7 | - cron: "29 16 * * *" 8 | 9 | jobs: 10 | facebook_test: 11 | environment: DigitalOcean 12 | runs-on: ubuntu-latest 13 | 14 | strategy: 15 | matrix: 16 | node-version: [22.x] 17 | 18 | steps: 19 | - uses: actions/checkout@v3 20 | - name: Use Node.js ${{ matrix.node-version }} 21 | uses: actions/setup-node@v3 22 | with: 23 | node-version: ${{ matrix.node-version }} 24 | 25 | - uses: prewk/s3-cp-action@v1 26 | env: 27 | AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }} 28 | AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }} 29 | AWS_S3_ENDPOINT: ${{ secrets.AWS_S3_ENDPOINT }} 30 | SOURCE: ${{ secrets.PROFILE_URL }} 31 | DEST: "./profile.tar.gz" 32 | 33 | - name: install requirements 34 | run: yarn install 35 | 36 | - name: build behaviors 37 | run: yarn run build 38 | 39 | - uses: c-py/action-dotenv-to-setenv@v2 40 | with: 41 | env-file: test/test.env 42 | 43 | - name: get browsertrix image 44 | run: docker pull $BROWSERTRIX_IMAGE 45 | 46 | - name: run crawl 47 | run: docker run -v $PWD/dist/behaviors.js:/app/node_modules/browsertrix-behaviors/dist/behaviors.js -v $PWD/profile.tar.gz:/profile.tar.gz $BROWSERTRIX_IMAGE crawl --url https://www.facebook.com/Capture-Sample-105559135004406/photos --limit 1 --logging behaviors --profile /profile.tar.gz > ./log 48 | 49 | - name: cat log 50 | run: cat ./log 51 | 52 | - uses: sergeysova/jq-action@v2 53 | name: compare crawl log to expected 54 | with: 55 | cmd: > 56 | cat log 57 | | jq -Rr -c 'fromjson? | .details | select(type == "object") | {msg,state} | select(.msg != null)' 58 | | diff - ./test/expected-facebook-photos.log 59 | -------------------------------------------------------------------------------- /.github/workflows/facebook-videos.yaml: -------------------------------------------------------------------------------- 1 | name: "Facebook Behavior: Page Videos (Owner Logged In)" 2 | 3 | on: 4 | push: 5 | pull_request: 6 | schedule: 7 | - cron: "33 16 * * *" 8 | 9 | jobs: 10 | facebook_test: 11 | environment: DigitalOcean 12 | runs-on: ubuntu-latest 13 | 14 | strategy: 15 | matrix: 16 | node-version: [22.x] 17 | 18 | steps: 19 | - uses: actions/checkout@v3 20 | - name: Use Node.js ${{ matrix.node-version }} 21 | uses: actions/setup-node@v3 22 | with: 23 | node-version: ${{ matrix.node-version }} 24 | 25 | - uses: prewk/s3-cp-action@v1 26 | env: 27 | AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }} 28 | AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }} 29 | AWS_S3_ENDPOINT: ${{ secrets.AWS_S3_ENDPOINT }} 30 | SOURCE: ${{ secrets.PROFILE_URL }} 31 | DEST: "./profile.tar.gz" 32 | 33 | - name: install requirements 34 | run: yarn install 35 | 36 | - name: build behaviors 37 | run: yarn run build 38 | 39 | - uses: c-py/action-dotenv-to-setenv@v2 40 | with: 41 | env-file: test/test.env 42 | 43 | - name: get browsertrix image 44 | run: docker pull $BROWSERTRIX_IMAGE 45 | 46 | - name: run crawl 47 | run: docker run -v $PWD/dist/behaviors.js:/app/node_modules/browsertrix-behaviors/dist/behaviors.js -v $PWD/profile.tar.gz:/profile.tar.gz $BROWSERTRIX_IMAGE crawl --url https://www.facebook.com/Capture-Sample-105559135004406/videos --limit 1 --logging behaviors --profile /profile.tar.gz > ./log 48 | 49 | - name: cat log 50 | run: cat ./log 51 | 52 | - uses: sergeysova/jq-action@v2 53 | name: compare crawl log to expected 54 | with: 55 | cmd: > 56 | cat log 57 | | jq -Rr -c 'fromjson? | .details | select(type == "object") | {msg,state} | select(.msg != null)' 58 | | diff - ./test/expected-facebook-videos.log 59 | -------------------------------------------------------------------------------- /.github/workflows/twitter-logged-in.yaml: -------------------------------------------------------------------------------- 1 | name: "Twitter Behavior (Logged In)" 2 | 3 | on: 4 | push: 5 | pull_request: 6 | schedule: 7 | - cron: "14 16 * * *" 8 | 9 | jobs: 10 | twitter_logged_in_test: 11 | environment: DigitalOcean 12 | runs-on: ubuntu-latest 13 | 14 | strategy: 15 | matrix: 16 | node-version: [22.x] 17 | 18 | steps: 19 | - uses: actions/checkout@v3 20 | - name: Use Node.js ${{ matrix.node-version }} 21 | uses: actions/setup-node@v3 22 | with: 23 | node-version: ${{ matrix.node-version }} 24 | 25 | - uses: prewk/s3-cp-action@v1 26 | env: 27 | AWS_ACCESS_KEY_ID: ${{ secrets.AWS_ACCESS_KEY_ID }} 28 | AWS_SECRET_ACCESS_KEY: ${{ secrets.AWS_SECRET_ACCESS_KEY }} 29 | AWS_S3_ENDPOINT: ${{ secrets.AWS_S3_ENDPOINT }} 30 | SOURCE: ${{ secrets.PROFILE_URL }} 31 | DEST: "./profile.tar.gz" 32 | 33 | - name: install requirements 34 | run: yarn install 35 | 36 | - name: build behaviors 37 | run: yarn run build 38 | 39 | - uses: c-py/action-dotenv-to-setenv@v2 40 | with: 41 | env-file: test/test.env 42 | 43 | - name: get browsertrix image 44 | run: docker pull $BROWSERTRIX_IMAGE 45 | 46 | - name: run crawl 47 | run: docker run -v $PWD/dist/behaviors.js:/app/node_modules/browsertrix-behaviors/dist/behaviors.js -v $PWD/profile.tar.gz:/profile.tar.gz $BROWSERTRIX_IMAGE crawl --url https://twitter.com/CaptureSample --limit 1 --logging behaviors --profile /profile.tar.gz > ./log 48 | 49 | - name: cat log 50 | run: cat ./log 51 | 52 | - uses: sergeysova/jq-action@v2 53 | name: compare crawl log to expected 54 | with: 55 | cmd: > 56 | cat log 57 | | jq -Rr -c 'fromjson? | .details | select(type == "object") | {msg,state} | select(.msg != null) | select(.msg | contains("?cxt") | not)' 58 | | diff - ./test/expected-twitter-logged-in.log 59 | -------------------------------------------------------------------------------- /src/autoclick.ts: -------------------------------------------------------------------------------- 1 | import { BackgroundBehavior } from "./lib/behavior"; 2 | import { addToExternalSet, sleep } from "./lib/utils"; 3 | 4 | export class AutoClick extends BackgroundBehavior { 5 | _donePromise: Promise; 6 | _markDone: () => void; 7 | selector: string; 8 | seenElem = new WeakSet(); 9 | 10 | static id = "Autoclick"; 11 | 12 | constructor(selector = "a") { 13 | super(); 14 | this.selector = selector; 15 | this._donePromise = new Promise( 16 | (resolve) => (this._markDone = resolve), 17 | ); 18 | } 19 | 20 | nextSameOriginLink(): HTMLAnchorElement | null { 21 | try { 22 | const allLinks = document.querySelectorAll(this.selector); 23 | for (const el of allLinks) { 24 | const elem = el as HTMLAnchorElement; 25 | 26 | // skip URLs to different origin as they won't be handled dynamically, most likely just regular navigation 27 | if (elem.href && !elem.href.startsWith(self.location.origin)) { 28 | continue; 29 | } 30 | if (!elem.isConnected) { 31 | continue; 32 | } 33 | if (!elem.checkVisibility()) { 34 | continue; 35 | } 36 | if (this.seenElem.has(elem)) { 37 | continue; 38 | } 39 | this.seenElem.add(elem); 40 | return elem; 41 | } 42 | } catch (e) { 43 | this.debug(e.toString()); 44 | } 45 | 46 | return null; 47 | } 48 | 49 | async start() { 50 | const beforeUnload = (event) => { 51 | event.preventDefault(); 52 | return false; 53 | }; 54 | 55 | // process all links (except hash links) which could result in attempted navigation 56 | window.addEventListener("beforeunload", beforeUnload); 57 | 58 | // process external links on current origin 59 | 60 | // eslint-disable-next-line no-constant-condition 61 | while (true) { 62 | const elem = this.nextSameOriginLink(); 63 | 64 | if (!elem) { 65 | break; 66 | } 67 | 68 | await this.processElem(elem); 69 | } 70 | 71 | window.removeEventListener("beforeunload", beforeUnload); 72 | 73 | this._markDone(); 74 | } 75 | 76 | async processElem(elem: HTMLAnchorElement) { 77 | if (elem.target) { 78 | return; 79 | } 80 | 81 | if (elem.href) { 82 | // skip if already clicked this URL, tracked in external state 83 | if (!(await addToExternalSet(elem.href))) { 84 | return; 85 | } 86 | 87 | this.debug("Clicking on link: " + elem.href); 88 | } else { 89 | this.debug("Click empty link"); 90 | } 91 | 92 | const origHref = self.location.href; 93 | const origHistoryLen = self.history.length; 94 | 95 | // eslint-disable-next-line @typescript-eslint/no-unnecessary-condition 96 | if (elem.click) { 97 | elem.click(); 98 | // eslint-disable-next-line @typescript-eslint/no-unnecessary-condition 99 | } else if (elem.dispatchEvent) { 100 | elem.dispatchEvent(new MouseEvent("click")); 101 | } 102 | 103 | await sleep(250); 104 | 105 | // only attempt to go back if history stack updated (pushState, not replaceState) and location changed 106 | if ( 107 | self.history.length === origHistoryLen + 1 && 108 | self.location.href != origHref 109 | ) { 110 | await new Promise((resolve) => { 111 | window.addEventListener( 112 | "popstate", 113 | () => { 114 | resolve(null); 115 | }, 116 | { once: true }, 117 | ); 118 | 119 | window.history.back(); 120 | }); 121 | } 122 | } 123 | catch(e) { 124 | this.debug(e.toString()); 125 | } 126 | 127 | done() { 128 | return this._donePromise; 129 | } 130 | } 131 | -------------------------------------------------------------------------------- /.eslintrc.cjs: -------------------------------------------------------------------------------- 1 | /* eslint-env node */ 2 | /** @type {import('eslint').Linter.Config} */ 3 | module.exports = { 4 | parser: "@typescript-eslint/parser", 5 | env: { 6 | browser: true, 7 | es2021: true, 8 | }, 9 | extends: [ 10 | "eslint:recommended", 11 | "plugin:@typescript-eslint/eslint-recommended", 12 | "prettier", 13 | ], 14 | plugins: ["@typescript-eslint"], 15 | parserOptions: { 16 | project: ["./tsconfig.eslint.json"], 17 | tsconfigRootDir: __dirname, 18 | }, 19 | root: true, 20 | rules: { 21 | /* start stylistic rules */ 22 | "@typescript-eslint/adjacent-overload-signatures": "error", 23 | "@typescript-eslint/array-type": "error", 24 | "@typescript-eslint/consistent-type-imports": [ 25 | "error", 26 | { 27 | fixStyle: "inline-type-imports", 28 | }, 29 | ], 30 | "@typescript-eslint/consistent-type-exports": "error", 31 | "@typescript-eslint/prefer-readonly": "warn", 32 | "@typescript-eslint/class-literal-property-style": ["warn", "getters"], 33 | "@typescript-eslint/consistent-generic-constructors": "error", 34 | "@typescript-eslint/consistent-type-assertions": "error", 35 | "@typescript-eslint/no-confusing-non-null-assertion": "warn", 36 | "@typescript-eslint/no-inferrable-types": "warn", 37 | "@typescript-eslint/non-nullable-type-assertion-style": "warn", 38 | "@typescript-eslint/prefer-for-of": "warn", 39 | // "@typescript-eslint/prefer-nullish-coalescing": "warn", 40 | "@typescript-eslint/prefer-optional-chain": "warn", 41 | "@typescript-eslint/prefer-string-starts-ends-with": "error", 42 | "@typescript-eslint/no-meaningless-void-operator": "error", 43 | "@typescript-eslint/no-unnecessary-boolean-literal-compare": "warn", 44 | "@typescript-eslint/no-unnecessary-condition": "warn", 45 | "@typescript-eslint/no-unnecessary-qualifier": "warn", 46 | "@typescript-eslint/no-unnecessary-type-arguments": "warn", 47 | "@typescript-eslint/prefer-reduce-type-parameter": "warn", 48 | "@typescript-eslint/promise-function-async": "warn", 49 | /* end stylistic rules */ 50 | 51 | /* start recommended rules */ 52 | "no-restricted-globals": [2, "event", "error"], 53 | "@typescript-eslint/no-base-to-string": "warn", 54 | "@typescript-eslint/no-duplicate-enum-values": "error", 55 | "@typescript-eslint/no-duplicate-type-constituents": "warn", 56 | "@typescript-eslint/no-explicit-any": "warn", 57 | "@typescript-eslint/no-extra-non-null-assertion": "error", 58 | "@typescript-eslint/no-floating-promises": "warn", 59 | "@typescript-eslint/no-for-in-array": "warn", 60 | "no-unused-vars": "off", 61 | "@typescript-eslint/no-unused-vars": [ 62 | "error", 63 | { 64 | argsIgnorePattern: "^_", 65 | varsIgnorePattern: "^_", 66 | destructuredArrayIgnorePattern: "^_", 67 | caughtErrorsIgnorePattern: "^_", 68 | }, 69 | ], 70 | "no-implied-eval": "off", 71 | "@typescript-eslint/no-implied-eval": "error", 72 | "no-loss-of-precision": "off", 73 | "@typescript-eslint/no-loss-of-precision": "warn", 74 | "@typescript-eslint/no-misused-new": "error", 75 | "@typescript-eslint/no-misused-promises": [ 76 | "error", 77 | { checksVoidReturn: false }, 78 | ], 79 | "@typescript-eslint/no-non-null-asserted-nullish-coalescing": "error", 80 | "@typescript-eslint/no-non-null-asserted-optional-chain": "warn", 81 | "@typescript-eslint/no-redundant-type-constituents": "warn", 82 | "@typescript-eslint/no-this-alias": "warn", 83 | "@typescript-eslint/no-unnecessary-type-assertion": "warn", 84 | "@typescript-eslint/no-unnecessary-type-constraint": "warn", 85 | /* TODO eventually turn all these on */ 86 | "@typescript-eslint/no-unsafe-argument": "warn", 87 | // "@typescript-eslint/no-unsafe-assignment": "warn", 88 | // "@typescript-eslint/no-unsafe-call": "warn", 89 | "@typescript-eslint/no-unsafe-declaration-merging": "warn", 90 | "@typescript-eslint/no-unsafe-enum-comparison": "warn", 91 | // "@typescript-eslint/no-unsafe-member-access": "warn", 92 | "@typescript-eslint/no-unsafe-return": "warn", 93 | "@typescript-eslint/prefer-as-const": "warn", 94 | "require-await": "off", 95 | // "@typescript-eslint/require-await": "warn", 96 | "@typescript-eslint/restrict-template-expressions": "warn", 97 | "@typescript-eslint/unbound-method": "off", 98 | "@typescript-eslint/method-signature-style": "error", 99 | }, 100 | reportUnusedDisableDirectives: true, 101 | ignorePatterns: ["__generated__", "__mocks__", "dist", "static"], 102 | overrides: [ 103 | { 104 | extends: ["plugin:@typescript-eslint/disable-type-checked"], 105 | files: ["webpack.*.js", ".*.cjs"], 106 | rules: { 107 | "@typescript-eslint/no-var-requires": "off", 108 | }, 109 | }, 110 | ], 111 | }; 112 | -------------------------------------------------------------------------------- /src/site/tiktok.ts: -------------------------------------------------------------------------------- 1 | const Q = { 2 | commentButton: "button[aria-label^='Read or add comments']", 3 | commentList: "//div[contains(@class, 'CommentListContainer')]", 4 | commentItem: "div[contains(@class, 'CommentItemContainer')]", 5 | viewMoreReplies: ".//p[contains(@class, 'ReplyActionText')]", 6 | viewMoreThread: 7 | ".//p[starts-with(@data-e2e, 'view-more') and string-length(text()) > 0]", 8 | profileVideoList: "//div[starts-with(@data-e2e, 'user-post-item-list')]", 9 | profileVideoItem: "div[contains(@class, 'DivItemContainerV2')]", 10 | backButton: "button[contains(@class, 'StyledCloseIconContainer')]", 11 | pageLoadWaitUntil: "//*[@role='dialog']", 12 | }; 13 | 14 | export const BREADTH_ALL = Symbol("BREADTH_ALL"); 15 | 16 | export class TikTokSharedBehavior { 17 | async awaitPageLoad(ctx: any) { 18 | const { assertContentValid, waitUntilNode } = ctx.Lib; 19 | await waitUntilNode(Q.pageLoadWaitUntil, document, null, 20000); 20 | 21 | assertContentValid( 22 | () => !!document.querySelector("*[aria-label='Messages']"), 23 | "not_logged_in", 24 | ); 25 | } 26 | } 27 | 28 | export class TikTokVideoBehavior extends TikTokSharedBehavior { 29 | static id = "TikTokVideo"; 30 | 31 | static init() { 32 | return { 33 | state: { comments: 0 }, 34 | opts: { breadth: BREADTH_ALL }, 35 | }; 36 | } 37 | 38 | static isMatch() { 39 | const pathRegex = /https:\/\/(www\.)?tiktok\.com\/@.+\/video\/\d+\/?.*/; 40 | return !!window.location.href.match(pathRegex); 41 | } 42 | 43 | breadthComplete({ opts: { breadth } }, iter) { 44 | return breadth !== BREADTH_ALL && breadth <= iter; 45 | } 46 | 47 | async *crawlThread(ctx, parentNode, prev = null, iter = 0) { 48 | const { waitUntilNode, scrollAndClick, getState } = ctx.Lib; 49 | const next = await waitUntilNode(Q.viewMoreThread, parentNode, prev); 50 | if (!next || this.breadthComplete(ctx, iter)) return; 51 | await scrollAndClick(next, 500); 52 | yield getState(ctx, "View more replies", "comments"); 53 | yield* this.crawlThread(ctx, parentNode, next, iter + 1); 54 | } 55 | 56 | async *expandThread(ctx, item) { 57 | const { xpathNode, scrollAndClick, getState } = ctx.Lib; 58 | const viewMore = xpathNode(Q.viewMoreReplies, item); 59 | if (!viewMore) return; 60 | await scrollAndClick(viewMore, 500); 61 | yield getState(ctx, "View comment", "comments"); 62 | yield* this.crawlThread(ctx, item, null, 1); 63 | } 64 | 65 | async *run(ctx) { 66 | const { 67 | xpathNode, 68 | iterChildMatches, 69 | scrollIntoView, 70 | getState, 71 | assertContentValid, 72 | sleep, 73 | } = ctx.Lib; 74 | 75 | const showComments = document.querySelector(Q.commentButton); 76 | if (showComments) { 77 | (showComments as HTMLButtonElement).click(); 78 | await sleep(10000); 79 | } 80 | 81 | // assert no captcha every 0.5 seconds 82 | void (async () => { 83 | // eslint-disable-next-line no-constant-condition 84 | while (true) { 85 | if (document.querySelector("div[class*=captcha]")) { 86 | assertContentValid(false, "not_logged_in"); 87 | break; 88 | } 89 | await sleep(500); 90 | } 91 | })(); 92 | 93 | const commentList = xpathNode(Q.commentList); 94 | const commentItems = iterChildMatches(Q.commentItem, commentList); 95 | for await (const item of commentItems) { 96 | scrollIntoView(item); 97 | yield getState(ctx, "View comment", "comments"); 98 | if (this.breadthComplete(ctx, 0)) continue; 99 | yield* this.expandThread(ctx, item); 100 | } 101 | 102 | yield getState(ctx, "TikTok Video Behavior Complete"); 103 | } 104 | } 105 | 106 | export class TikTokProfileBehavior extends TikTokSharedBehavior { 107 | static id = "TikTokProfile"; 108 | 109 | static isMatch() { 110 | const pathRegex = 111 | /https:\/\/(www\.)?tiktok\.com\/@[a-zA-Z0-9]+(\/?$|\/\?.*)/; 112 | return !!window.location.href.match(pathRegex); 113 | } 114 | 115 | static init() { 116 | return { 117 | state: { videos: 0, comments: 0 }, 118 | opts: { breadth: BREADTH_ALL }, 119 | }; 120 | } 121 | 122 | async *openVideo(ctx, item) { 123 | const { HistoryState, xpathNode, sleep } = ctx.Lib; 124 | const link = xpathNode(".//a", item); 125 | if (!link) return; 126 | const viewState = new HistoryState(() => link.click()); 127 | await sleep(500); 128 | if (viewState.changed) { 129 | const videoBehavior = new TikTokVideoBehavior(); 130 | yield* videoBehavior.run(ctx); 131 | await sleep(500); 132 | await viewState.goBack(Q.backButton); 133 | } 134 | } 135 | 136 | async *run(ctx) { 137 | const { xpathNode, iterChildMatches, scrollIntoView, getState, sleep } = 138 | ctx.Lib; 139 | const profileVideoList = xpathNode(Q.profileVideoList); 140 | const profileVideos = iterChildMatches( 141 | Q.profileVideoItem, 142 | profileVideoList, 143 | ); 144 | for await (const item of profileVideos) { 145 | scrollIntoView(item); 146 | yield getState(ctx, "View video", "videos"); 147 | yield* this.openVideo(ctx, item); 148 | await sleep(500); 149 | } 150 | yield getState(ctx, "TikTok Profile Behavior Complete"); 151 | } 152 | } 153 | -------------------------------------------------------------------------------- /src/lib/behavior.ts: -------------------------------------------------------------------------------- 1 | import { behaviorLog } from "./utils"; 2 | import * as Lib from "./utils"; 3 | 4 | // =========================================================================== 5 | export class BackgroundBehavior { 6 | debug(msg) { 7 | behaviorLog(msg, "debug"); 8 | } 9 | 10 | error(msg) { 11 | behaviorLog(msg, "error"); 12 | } 13 | 14 | log(msg, type = "info") { 15 | behaviorLog(msg, type); 16 | } 17 | } 18 | 19 | // =========================================================================== 20 | export class Behavior extends BackgroundBehavior { 21 | _running: any; 22 | paused: any; 23 | _unpause: any; 24 | state: any; 25 | scrollOpts: { 26 | behavior: string; 27 | block: string; 28 | inline: string; 29 | }; 30 | 31 | constructor() { 32 | super(); 33 | this._running = null; 34 | this.paused = null; 35 | this._unpause = null; 36 | this.state = {}; 37 | 38 | this.scrollOpts = { behavior: "smooth", block: "center", inline: "center" }; 39 | } 40 | 41 | start() { 42 | this._running = this.run(); 43 | } 44 | 45 | done() { 46 | return this._running ? this._running : Promise.resolve(); 47 | } 48 | 49 | async run() { 50 | try { 51 | for await (const step of this) { 52 | this.debug(step); 53 | if (this.paused) { 54 | await this.paused; 55 | } 56 | } 57 | this.debug(this.getState("done!")); 58 | } catch (e) { 59 | this.error(e.toString()); 60 | } 61 | } 62 | 63 | pause() { 64 | if (this.paused) { 65 | return; 66 | } 67 | this.paused = new Promise((resolve) => { 68 | this._unpause = resolve; 69 | }); 70 | } 71 | 72 | unpause() { 73 | if (this._unpause) { 74 | this._unpause(); 75 | this.paused = null; 76 | this._unpause = null; 77 | } 78 | } 79 | 80 | getState(msg: string, incrValue?) { 81 | if (incrValue) { 82 | if (this.state[incrValue] === undefined) { 83 | this.state[incrValue] = 1; 84 | } else { 85 | this.state[incrValue]++; 86 | } 87 | } 88 | 89 | return { state: this.state, msg }; 90 | } 91 | 92 | cleanup() {} 93 | 94 | async awaitPageLoad(_: any) { 95 | // wait for initial page load here 96 | } 97 | 98 | static load() { 99 | if (self["__bx_behaviors"]) { 100 | self["__bx_behaviors"].load(this); 101 | } else { 102 | console.warn( 103 | `Could not load ${this.name} behavior: window.__bx_behaviors is not initialized`, 104 | ); 105 | } 106 | } 107 | 108 | async *[Symbol.asyncIterator]() { 109 | yield; 110 | } 111 | } 112 | 113 | // WIP: BehaviorRunner class allows for arbitrary behaviors outside of the 114 | // library to be run through the BehaviorManager 115 | 116 | abstract class AbstractBehaviorInst { 117 | abstract run: (ctx: any) => AsyncIterable; 118 | 119 | abstract awaitPageLoad?: (ctx: any) => Promise; 120 | } 121 | 122 | interface StaticAbstractBehavior { 123 | id: String; 124 | isMatch: () => boolean; 125 | init: () => any; 126 | } 127 | 128 | type AbstractBehavior = (new () => AbstractBehaviorInst) & 129 | StaticAbstractBehavior; 130 | 131 | export class BehaviorRunner extends BackgroundBehavior { 132 | inst: AbstractBehaviorInst; 133 | behaviorProps: StaticAbstractBehavior; 134 | ctx: any; 135 | _running: any; 136 | paused: any; 137 | _unpause: any; 138 | 139 | get id() { 140 | return (this.inst?.constructor as any).id; 141 | } 142 | 143 | constructor(behavior: AbstractBehavior, mainOpts = {}) { 144 | super(); 145 | this.behaviorProps = behavior; 146 | this.inst = new behavior(); 147 | 148 | if ( 149 | typeof this.inst.run !== "function" || 150 | this.inst.run.constructor.name !== "AsyncGeneratorFunction" 151 | ) { 152 | throw Error("Invalid behavior: missing `async run*` instance method"); 153 | } 154 | 155 | let { state, opts } = behavior.init(); 156 | state = state || {}; 157 | opts = opts ? { ...opts, ...mainOpts } : mainOpts; 158 | // eslint-disable-next-line @typescript-eslint/no-explicit-any 159 | const log = async (data: any, type: string) => this.wrappedLog(data, type); 160 | 161 | this.ctx = { Lib, state, opts, log }; 162 | 163 | this._running = null; 164 | this.paused = null; 165 | this._unpause = null; 166 | } 167 | 168 | // eslint-disable-next-line @typescript-eslint/no-explicit-any 169 | wrappedLog(data: any, type = "info") { 170 | let logData; 171 | if (typeof data === "string" || data instanceof String) { 172 | logData = { msg: data }; 173 | } else { 174 | logData = data; 175 | } 176 | this.log( 177 | { ...logData, behavior: this.behaviorProps.id, siteSpecific: true }, 178 | type, 179 | ); 180 | } 181 | 182 | start() { 183 | this._running = this.run(); 184 | } 185 | 186 | done() { 187 | return this._running ? this._running : Promise.resolve(); 188 | } 189 | 190 | async run() { 191 | try { 192 | for await (const step of this.inst.run(this.ctx)) { 193 | if (step) { 194 | this.wrappedLog(step); 195 | } 196 | if (this.paused) { 197 | await this.paused; 198 | } 199 | } 200 | this.debug({ msg: "done!", behavior: this.behaviorProps.id }); 201 | } catch (e) { 202 | this.error({ msg: e.toString(), behavior: this.behaviorProps.id }); 203 | } 204 | } 205 | 206 | pause() { 207 | if (this.paused) { 208 | return; 209 | } 210 | this.paused = new Promise((resolve) => { 211 | this._unpause = resolve; 212 | }); 213 | } 214 | 215 | unpause() { 216 | if (this._unpause) { 217 | this._unpause(); 218 | this.paused = null; 219 | this._unpause = null; 220 | } 221 | } 222 | 223 | cleanup() {} 224 | 225 | async awaitPageLoad() { 226 | if (this.inst.awaitPageLoad) { 227 | await this.inst.awaitPageLoad(this.ctx); 228 | } 229 | } 230 | 231 | static load() { 232 | if (self["__bx_behaviors"]) { 233 | self["__bx_behaviors"].load(this); 234 | } else { 235 | console.warn( 236 | `Could not load ${this.name} behavior: window.__bx_behaviors is not initialized`, 237 | ); 238 | } 239 | } 240 | } 241 | -------------------------------------------------------------------------------- /src/autoplay.ts: -------------------------------------------------------------------------------- 1 | import { querySelectorAllDeep } from "query-selector-shadow-dom"; 2 | 3 | import { BackgroundBehavior } from "./lib/behavior"; 4 | import { sleep } from "./lib/utils"; 5 | import { type AutoFetcher } from "./autofetcher"; 6 | 7 | // =========================================================================== 8 | export class Autoplay extends BackgroundBehavior { 9 | mediaSet: Set; 10 | autofetcher: AutoFetcher; 11 | numPlaying: number; 12 | promises: Promise[]; 13 | _initDone: Function; 14 | running = false; 15 | polling = false; 16 | 17 | static id = "Autoplay"; 18 | 19 | constructor(autofetcher: AutoFetcher, startEarly = false) { 20 | super(); 21 | this.mediaSet = new Set(); 22 | this.autofetcher = autofetcher; 23 | this.numPlaying = 0; 24 | this.promises = []; 25 | this._initDone = () => null; 26 | this.promises.push(new Promise((resolve) => (this._initDone = resolve))); 27 | if (startEarly) { 28 | document.addEventListener("DOMContentLoaded", () => 29 | this.pollAudioVideo(), 30 | ); 31 | } 32 | } 33 | 34 | async start() { 35 | this.running = true; 36 | //this.initObserver(); 37 | 38 | this.pollAudioVideo(); 39 | 40 | this._initDone(); 41 | } 42 | 43 | async pollAudioVideo() { 44 | const run = true; 45 | 46 | if (this.polling) { 47 | return; 48 | } 49 | 50 | this.polling = true; 51 | 52 | while (run) { 53 | for (const [, elem] of querySelectorAllDeep( 54 | "video, audio, picture", 55 | ).entries()) { 56 | if (!elem["__bx_autoplay_found"]) { 57 | if (!this.running) { 58 | if (this.processFetchableUrl(elem)) { 59 | elem["__bx_autoplay_found"] = true; 60 | } 61 | continue; 62 | } 63 | 64 | await this.loadMedia(elem); 65 | elem["__bx_autoplay_found"] = true; 66 | } 67 | } 68 | 69 | await sleep(500); 70 | } 71 | 72 | this.polling = false; 73 | } 74 | 75 | fetchSrcUrl(source) { 76 | const url: string = source.src || source.currentSrc; 77 | 78 | if (!url) { 79 | return false; 80 | } 81 | 82 | if (!url.startsWith("http:") && !url.startsWith("https:")) { 83 | return false; 84 | } 85 | 86 | if (this.mediaSet.has(url)) { 87 | return true; 88 | } 89 | 90 | this.debug("fetch media source URL: " + url); 91 | this.mediaSet.add(url); 92 | this.autofetcher.queueUrl(url); 93 | 94 | return true; 95 | } 96 | 97 | processFetchableUrl(media) { 98 | let found = this.fetchSrcUrl(media); 99 | 100 | const sources = media.querySelectorAll("source"); 101 | 102 | for (const source of sources) { 103 | const foundSource = this.fetchSrcUrl(source); 104 | found = found || foundSource; 105 | } 106 | 107 | return found; 108 | } 109 | 110 | async loadMedia(media) { 111 | this.debug("processing media element: " + media.outerHTML); 112 | 113 | const found = this.processFetchableUrl(media); 114 | 115 | if (!media.play) { 116 | this.debug("media not playable, skipping"); 117 | return; 118 | } 119 | 120 | // if fixed URL found, stop playing 121 | if (found) { 122 | if (!media.paused) { 123 | media.pause(); 124 | this.debug("media URL found, pausing playback"); 125 | } 126 | return; 127 | } 128 | 129 | if (media.paused || media.currentTime) { 130 | if (media.paused) { 131 | this.debug( 132 | "no src url found, attempting to click or play: " + media.outerHTML, 133 | ); 134 | } else { 135 | this.debug( 136 | "media already playing, waiting for full playback to finish: " + 137 | media.outerHTML, 138 | ); 139 | } 140 | 141 | this.attemptMediaPlay(media).then( 142 | async (finished: Promise | null) => { 143 | let check = true; 144 | 145 | if (finished) { 146 | finished.then(() => (check = false)); 147 | } 148 | 149 | while (check) { 150 | if (this.processFetchableUrl(media)) { 151 | check = false; 152 | } 153 | this.debug( 154 | "Waiting for fixed URL or media to finish: " + media.currentSrc, 155 | ); 156 | await sleep(1000); 157 | } 158 | }, 159 | ); 160 | } else if (media.currentSrc) { 161 | this.debug("media playing from non-URL source: " + media.currentSrc); 162 | } 163 | } 164 | 165 | async attemptMediaPlay(media) { 166 | // finished promise 167 | let resolveFinished; 168 | 169 | const finished = new Promise((res) => { 170 | resolveFinished = res; 171 | }); 172 | 173 | // started promise 174 | let resolveStarted; 175 | 176 | const started = new Promise((res) => { 177 | resolveStarted = res; 178 | }); 179 | 180 | started.then(() => this.promises.push(finished)); 181 | 182 | // already started 183 | if (!media.paused && media.currentTime > 0) { 184 | resolveStarted(); 185 | } 186 | 187 | media.addEventListener("loadstart", () => { 188 | this.debug("media event: loadstart"); 189 | resolveStarted(true); 190 | }); 191 | media.addEventListener("playing", () => { 192 | this.debug("media event: playing"); 193 | resolveStarted(true); 194 | }); 195 | 196 | media.addEventListener("loadeddata", () => 197 | this.debug("media event: loadeddata"), 198 | ); 199 | 200 | media.addEventListener("ended", () => { 201 | this.debug("media event: ended"); 202 | resolveFinished(); 203 | }); 204 | media.addEventListener("pause", () => { 205 | this.debug("media event: pause"); 206 | resolveFinished(); 207 | }); 208 | media.addEventListener("abort", () => { 209 | this.debug("media event: abort"); 210 | resolveFinished(); 211 | }); 212 | media.addEventListener("error", () => { 213 | this.debug("media event: error"); 214 | resolveFinished(); 215 | }); 216 | media.addEventListener("stalled", () => { 217 | this.debug("media event: stalled"); 218 | resolveFinished(); 219 | }); 220 | media.addEventListener("suspend", () => { 221 | this.debug("media event: suspend"); 222 | resolveFinished(); 223 | }); 224 | 225 | media.muted = true; 226 | 227 | if (!media.paused && media.currentTime > 0) { 228 | return finished; 229 | } 230 | 231 | const hasA = media.closest("a"); 232 | 233 | // if contained in tag, clicking may navigate away, so avoid 234 | if (!hasA) { 235 | media.click(); 236 | 237 | if (await Promise.race([started, sleep(1000)])) { 238 | this.debug("play started after media.click()"); 239 | return finished; 240 | } 241 | } 242 | 243 | media.play(); 244 | 245 | if (await Promise.race([started, sleep(1000)])) { 246 | this.debug("play started after media.play()"); 247 | } 248 | 249 | return finished; 250 | } 251 | 252 | done() { 253 | return Promise.allSettled(this.promises); 254 | } 255 | } 256 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Browsertrix Behaviors 2 | 3 |
4 | Behavior Testing Results 5 | 6 | [![Autoscroll Behavior](https://github.com/webrecorder/browsertrix-behaviors/actions/workflows/autoscroll.yaml/badge.svg)](https://github.com/webrecorder/browsertrix-behaviors/actions/workflows/autoscroll.yaml) 7 | 8 | [![Autoplay Behavior: Youtube](https://github.com/webrecorder/browsertrix-behaviors/actions/workflows/autoplay-youtube.yaml/badge.svg)](https://github.com/webrecorder/browsertrix-behaviors/actions/workflows/autoplay-youtube.yaml) 9 | 10 | [![Autoplay Behavior: Vimeo](https://github.com/webrecorder/browsertrix-behaviors/actions/workflows/autoplay-vimeo.yaml/badge.svg)](https://github.com/webrecorder/browsertrix-behaviors/actions/workflows/autoplay-vimeo.yaml) 11 | 12 | [![Instagram Behavior (Logged In)](https://github.com/webrecorder/browsertrix-behaviors/actions/workflows/instagram.yaml/badge.svg)](https://github.com/webrecorder/browsertrix-behaviors/actions/workflows/instagram.yaml) 13 | 14 | [![Twitter Behavior](https://github.com/webrecorder/browsertrix-behaviors/actions/workflows/twitter.yaml/badge.svg)](https://github.com/webrecorder/browsertrix-behaviors/actions/workflows/twitter.yaml) 15 | 16 | [![Twitter Behavior (Logged In)](https://github.com/webrecorder/browsertrix-behaviors/actions/workflows/twitter-logged-in.yaml/badge.svg)](https://github.com/webrecorder/browsertrix-behaviors/actions/workflows/twitter-logged-in.yaml) 17 | 18 | [![Facebook Behavior: Page (Owner Logged In)](https://github.com/webrecorder/browsertrix-behaviors/actions/workflows/facebook-page.yaml/badge.svg)](https://github.com/webrecorder/browsertrix-behaviors/actions/workflows/facebook-page.yaml) 19 | 20 | [![Facebook Behavior: Page Photos (Owner Logged In)](https://github.com/webrecorder/browsertrix-behaviors/actions/workflows/facebook-photos.yaml/badge.svg)](https://github.com/webrecorder/browsertrix-behaviors/actions/workflows/facebook-photos.yaml) 21 | 22 | [![Facebook Behavior: Page Videos (Owner Logged In)](https://github.com/webrecorder/browsertrix-behaviors/actions/workflows/facebook-videos.yaml/badge.svg)](https://github.com/webrecorder/browsertrix-behaviors/actions/workflows/facebook-videos.yaml) 23 | 24 |
25 | 26 | A set of behaviors injected into the browser to perform certain operations on a page, such as scrolling, fetching additional URLs, or performing 27 | customized actions for social-media sites. 28 | 29 | Additional information about behaviors and how they work can be found in the [Browser Behaviors section of the Browsertrix Crawler documentation](https://crawler.docs.browsertrix.com/user-guide/behaviors/). 30 | 31 | For an extensive walkthrough of creating your own custom behaviors to use with Browsertrix and Browsertrix Crawler, [follow the Tutorial](https://crawler.docs.browsertrix.com/user-guide/behaviors/#creating-custom-behaviors). 32 | 33 | If you are looking for advice on how to create or use Browsertrix Behaviors there is a dedicated category for [custom behaviors](https://forum.webrecorder.net/c/behaviors/29) in the Webrecorder forum. 34 | 35 | ## Usage 36 | 37 | The behaviors are compiled into a single file, `dist/behaviors.js`, which can be injected into any modern browser to load the behavior system. 38 | 39 | No additional dependencies are required, and the behaviors file can be pasted directly into your browser. 40 | 41 | The file can injected in a number of ways, using tools like puppeteer/playwright, a browser extension content script, or even a devtools Snippet, or even a regular 42 | `