├── .dockerignore ├── .eslintignore ├── .eslintrc.cjs ├── .github └── workflows │ ├── ci.yaml │ ├── deploy-dev-channel.yaml │ ├── docs-publish.yaml │ ├── make-draft-release.yaml │ └── release.yaml ├── .gitignore ├── .husky └── pre-commit ├── .pre-commit-config.yaml ├── .prettierignore ├── .prettierrc ├── CHANGES.md ├── Dockerfile ├── LICENSE ├── NOTICE ├── README.md ├── config └── policies │ ├── brave.json │ └── chromium.json ├── docker-compose.yml ├── docker-entrypoint.sh ├── docs ├── docs │ ├── CNAME │ ├── assets │ │ ├── brand │ │ │ ├── browsertrix-crawler-icon-color-dynamic.svg │ │ │ └── browsertrix-crawler-white.svg │ │ └── fonts │ │ │ ├── Inter-Italic.var.woff2 │ │ │ ├── Inter.var.woff2 │ │ │ └── Recursive_VF_1.084.woff2 │ ├── develop │ │ ├── docs.md │ │ └── index.md │ ├── index.md │ ├── overrides │ │ ├── .icons │ │ │ └── bootstrap │ │ │ │ ├── bug-fill.svg │ │ │ │ ├── chat-left-text-fill.svg │ │ │ │ ├── check-circle-fill.svg │ │ │ │ ├── check-circle.svg │ │ │ │ ├── dash-circle.svg │ │ │ │ ├── exclamation-circle-fill.svg │ │ │ │ ├── exclamation-diamond-fill.svg │ │ │ │ ├── exclamation-triangle-fill.svg │ │ │ │ ├── exclamation-triangle.svg │ │ │ │ ├── eye.svg │ │ │ │ ├── file-earmark-text-fill.svg │ │ │ │ ├── github.svg │ │ │ │ ├── globe.svg │ │ │ │ ├── info-circle-fill.svg │ │ │ │ ├── mastodon.svg │ │ │ │ ├── mortarboard-fill.svg │ │ │ │ ├── pencil-fill.svg │ │ │ │ ├── pencil.svg │ │ │ │ ├── question-circle-fill.svg │ │ │ │ ├── quote.svg │ │ │ │ ├── x-octagon-fill.svg │ │ │ │ ├── x-octagon.svg │ │ │ │ └── youtube.svg │ │ └── main.html │ ├── stylesheets │ │ └── extra.css │ └── user-guide │ │ ├── behaviors.md │ │ ├── browser-profiles.md │ │ ├── cli-options.md │ │ ├── common-options.md │ │ ├── crawl-scope.md │ │ ├── exit-codes.md │ │ ├── index.md │ │ ├── outputs.md │ │ ├── proxies.md │ │ ├── qa.md │ │ └── yaml-config.md ├── gen-cli.sh └── mkdocs.yml ├── html ├── createProfile.html ├── replay.html ├── screencast.html └── vnc_lite.html ├── package.json ├── requirements.txt ├── src ├── crawler.ts ├── create-login-profile.ts ├── main.ts ├── replaycrawler.ts └── util │ ├── argParser.ts │ ├── blockrules.ts │ ├── browser.ts │ ├── constants.ts │ ├── file_reader.ts │ ├── flowbehavior.ts │ ├── healthcheck.ts │ ├── logger.ts │ ├── originoverride.ts │ ├── proxy.ts │ ├── recorder.ts │ ├── redis.ts │ ├── replayserver.ts │ ├── reqresp.ts │ ├── robots.ts │ ├── screencaster.ts │ ├── screenshots.ts │ ├── seeds.ts │ ├── sitemapper.ts │ ├── state.ts │ ├── storage.ts │ ├── textextract.ts │ ├── timing.ts │ ├── wacz.ts │ ├── warcwriter.ts │ └── worker.ts ├── test-setup.js ├── tests ├── .DS_Store ├── adblockrules.test.js ├── add-exclusion.test.js ├── basic_crawl.test.js ├── blockrules.test.js ├── brave-query-redir.test.js ├── collection_name.test.js ├── config_file.test.js ├── config_stdin.test.js ├── crawl_overwrite.js ├── custom-behavior-flow.test.js ├── custom-behavior.test.js ├── custom-behaviors │ ├── custom-2.js │ ├── custom-flow.json │ └── custom.js ├── custom_driver.test.js ├── custom_selector.test.js ├── dryrun.test.js ├── exclude-redirected.test.js ├── extra_hops_depth.test.js ├── file_stats.test.js ├── fixtures │ ├── crawl-1.yaml │ ├── crawl-2.yaml │ ├── driver-1.mjs │ ├── pages.jsonl │ ├── proxies │ │ ├── proxy-key │ │ ├── proxy-key.pub │ │ ├── proxy-test-bad-auth.pac │ │ └── proxy-test-good-auth.pac │ ├── sample-profile.tar.gz │ └── urlSeedFile.txt ├── http-auth.test.js ├── invalid-behaviors │ └── invalid-export.js ├── lang-code.test.js ├── limit_reached.test.js ├── log_filtering.test.js ├── mult_url_crawl_with_favicon.test.js ├── multi-instance-crawl.test.js ├── non-html-crawl.test.js ├── pageinfo-records.test.js ├── profiles.test.js ├── proxy.test.js ├── qa_compare.test.js ├── retry-failed.test.js ├── robots_txt.test.js ├── rollover-writer.test.js ├── saved-state.test.js ├── scopes.test.js ├── screenshot.test.js ├── seeds.test.js ├── sitemap-parse.test.js ├── storage.test.js ├── text-extract.test.js ├── upload-wacz.test.js ├── url-normalize.test.js ├── url_file_list.test.js └── warcinfo.test.js ├── tsconfig.eslint.json ├── tsconfig.json └── yarn.lock /.dockerignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/webrecorder/browsertrix-crawler/HEAD/.dockerignore -------------------------------------------------------------------------------- /.eslintignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/webrecorder/browsertrix-crawler/HEAD/.eslintignore -------------------------------------------------------------------------------- /.eslintrc.cjs: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/webrecorder/browsertrix-crawler/HEAD/.eslintrc.cjs -------------------------------------------------------------------------------- /.github/workflows/ci.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/webrecorder/browsertrix-crawler/HEAD/.github/workflows/ci.yaml -------------------------------------------------------------------------------- /.github/workflows/deploy-dev-channel.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/webrecorder/browsertrix-crawler/HEAD/.github/workflows/deploy-dev-channel.yaml -------------------------------------------------------------------------------- /.github/workflows/docs-publish.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/webrecorder/browsertrix-crawler/HEAD/.github/workflows/docs-publish.yaml -------------------------------------------------------------------------------- /.github/workflows/make-draft-release.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/webrecorder/browsertrix-crawler/HEAD/.github/workflows/make-draft-release.yaml -------------------------------------------------------------------------------- /.github/workflows/release.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/webrecorder/browsertrix-crawler/HEAD/.github/workflows/release.yaml -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/webrecorder/browsertrix-crawler/HEAD/.gitignore -------------------------------------------------------------------------------- /.husky/pre-commit: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env sh 2 | . "$(dirname -- "$0")/_/husky.sh" 3 | yarn lint:fix 4 | -------------------------------------------------------------------------------- /.pre-commit-config.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/webrecorder/browsertrix-crawler/HEAD/.pre-commit-config.yaml -------------------------------------------------------------------------------- /.prettierignore: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/webrecorder/browsertrix-crawler/HEAD/.prettierignore -------------------------------------------------------------------------------- /.prettierrc: -------------------------------------------------------------------------------- 1 | {} 2 | -------------------------------------------------------------------------------- /CHANGES.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/webrecorder/browsertrix-crawler/HEAD/CHANGES.md -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/webrecorder/browsertrix-crawler/HEAD/Dockerfile -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/webrecorder/browsertrix-crawler/HEAD/LICENSE -------------------------------------------------------------------------------- /NOTICE: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/webrecorder/browsertrix-crawler/HEAD/NOTICE -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/webrecorder/browsertrix-crawler/HEAD/README.md -------------------------------------------------------------------------------- /config/policies/brave.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/webrecorder/browsertrix-crawler/HEAD/config/policies/brave.json -------------------------------------------------------------------------------- /config/policies/chromium.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/webrecorder/browsertrix-crawler/HEAD/config/policies/chromium.json -------------------------------------------------------------------------------- /docker-compose.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/webrecorder/browsertrix-crawler/HEAD/docker-compose.yml -------------------------------------------------------------------------------- /docker-entrypoint.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/webrecorder/browsertrix-crawler/HEAD/docker-entrypoint.sh -------------------------------------------------------------------------------- /docs/docs/CNAME: -------------------------------------------------------------------------------- 1 | crawler.docs.browsertrix.com 2 | -------------------------------------------------------------------------------- /docs/docs/assets/brand/browsertrix-crawler-icon-color-dynamic.svg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/webrecorder/browsertrix-crawler/HEAD/docs/docs/assets/brand/browsertrix-crawler-icon-color-dynamic.svg -------------------------------------------------------------------------------- /docs/docs/assets/brand/browsertrix-crawler-white.svg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/webrecorder/browsertrix-crawler/HEAD/docs/docs/assets/brand/browsertrix-crawler-white.svg -------------------------------------------------------------------------------- /docs/docs/assets/fonts/Inter-Italic.var.woff2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/webrecorder/browsertrix-crawler/HEAD/docs/docs/assets/fonts/Inter-Italic.var.woff2 -------------------------------------------------------------------------------- /docs/docs/assets/fonts/Inter.var.woff2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/webrecorder/browsertrix-crawler/HEAD/docs/docs/assets/fonts/Inter.var.woff2 -------------------------------------------------------------------------------- /docs/docs/assets/fonts/Recursive_VF_1.084.woff2: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/webrecorder/browsertrix-crawler/HEAD/docs/docs/assets/fonts/Recursive_VF_1.084.woff2 -------------------------------------------------------------------------------- /docs/docs/develop/docs.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/webrecorder/browsertrix-crawler/HEAD/docs/docs/develop/docs.md -------------------------------------------------------------------------------- /docs/docs/develop/index.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/webrecorder/browsertrix-crawler/HEAD/docs/docs/develop/index.md -------------------------------------------------------------------------------- /docs/docs/index.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/webrecorder/browsertrix-crawler/HEAD/docs/docs/index.md -------------------------------------------------------------------------------- /docs/docs/overrides/.icons/bootstrap/bug-fill.svg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/webrecorder/browsertrix-crawler/HEAD/docs/docs/overrides/.icons/bootstrap/bug-fill.svg -------------------------------------------------------------------------------- /docs/docs/overrides/.icons/bootstrap/chat-left-text-fill.svg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/webrecorder/browsertrix-crawler/HEAD/docs/docs/overrides/.icons/bootstrap/chat-left-text-fill.svg -------------------------------------------------------------------------------- /docs/docs/overrides/.icons/bootstrap/check-circle-fill.svg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/webrecorder/browsertrix-crawler/HEAD/docs/docs/overrides/.icons/bootstrap/check-circle-fill.svg -------------------------------------------------------------------------------- /docs/docs/overrides/.icons/bootstrap/check-circle.svg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/webrecorder/browsertrix-crawler/HEAD/docs/docs/overrides/.icons/bootstrap/check-circle.svg -------------------------------------------------------------------------------- /docs/docs/overrides/.icons/bootstrap/dash-circle.svg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/webrecorder/browsertrix-crawler/HEAD/docs/docs/overrides/.icons/bootstrap/dash-circle.svg -------------------------------------------------------------------------------- /docs/docs/overrides/.icons/bootstrap/exclamation-circle-fill.svg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/webrecorder/browsertrix-crawler/HEAD/docs/docs/overrides/.icons/bootstrap/exclamation-circle-fill.svg -------------------------------------------------------------------------------- /docs/docs/overrides/.icons/bootstrap/exclamation-diamond-fill.svg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/webrecorder/browsertrix-crawler/HEAD/docs/docs/overrides/.icons/bootstrap/exclamation-diamond-fill.svg -------------------------------------------------------------------------------- /docs/docs/overrides/.icons/bootstrap/exclamation-triangle-fill.svg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/webrecorder/browsertrix-crawler/HEAD/docs/docs/overrides/.icons/bootstrap/exclamation-triangle-fill.svg -------------------------------------------------------------------------------- /docs/docs/overrides/.icons/bootstrap/exclamation-triangle.svg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/webrecorder/browsertrix-crawler/HEAD/docs/docs/overrides/.icons/bootstrap/exclamation-triangle.svg -------------------------------------------------------------------------------- /docs/docs/overrides/.icons/bootstrap/eye.svg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/webrecorder/browsertrix-crawler/HEAD/docs/docs/overrides/.icons/bootstrap/eye.svg -------------------------------------------------------------------------------- /docs/docs/overrides/.icons/bootstrap/file-earmark-text-fill.svg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/webrecorder/browsertrix-crawler/HEAD/docs/docs/overrides/.icons/bootstrap/file-earmark-text-fill.svg -------------------------------------------------------------------------------- /docs/docs/overrides/.icons/bootstrap/github.svg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/webrecorder/browsertrix-crawler/HEAD/docs/docs/overrides/.icons/bootstrap/github.svg -------------------------------------------------------------------------------- /docs/docs/overrides/.icons/bootstrap/globe.svg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/webrecorder/browsertrix-crawler/HEAD/docs/docs/overrides/.icons/bootstrap/globe.svg -------------------------------------------------------------------------------- /docs/docs/overrides/.icons/bootstrap/info-circle-fill.svg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/webrecorder/browsertrix-crawler/HEAD/docs/docs/overrides/.icons/bootstrap/info-circle-fill.svg -------------------------------------------------------------------------------- /docs/docs/overrides/.icons/bootstrap/mastodon.svg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/webrecorder/browsertrix-crawler/HEAD/docs/docs/overrides/.icons/bootstrap/mastodon.svg -------------------------------------------------------------------------------- /docs/docs/overrides/.icons/bootstrap/mortarboard-fill.svg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/webrecorder/browsertrix-crawler/HEAD/docs/docs/overrides/.icons/bootstrap/mortarboard-fill.svg -------------------------------------------------------------------------------- /docs/docs/overrides/.icons/bootstrap/pencil-fill.svg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/webrecorder/browsertrix-crawler/HEAD/docs/docs/overrides/.icons/bootstrap/pencil-fill.svg -------------------------------------------------------------------------------- /docs/docs/overrides/.icons/bootstrap/pencil.svg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/webrecorder/browsertrix-crawler/HEAD/docs/docs/overrides/.icons/bootstrap/pencil.svg -------------------------------------------------------------------------------- /docs/docs/overrides/.icons/bootstrap/question-circle-fill.svg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/webrecorder/browsertrix-crawler/HEAD/docs/docs/overrides/.icons/bootstrap/question-circle-fill.svg -------------------------------------------------------------------------------- /docs/docs/overrides/.icons/bootstrap/quote.svg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/webrecorder/browsertrix-crawler/HEAD/docs/docs/overrides/.icons/bootstrap/quote.svg -------------------------------------------------------------------------------- /docs/docs/overrides/.icons/bootstrap/x-octagon-fill.svg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/webrecorder/browsertrix-crawler/HEAD/docs/docs/overrides/.icons/bootstrap/x-octagon-fill.svg -------------------------------------------------------------------------------- /docs/docs/overrides/.icons/bootstrap/x-octagon.svg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/webrecorder/browsertrix-crawler/HEAD/docs/docs/overrides/.icons/bootstrap/x-octagon.svg -------------------------------------------------------------------------------- /docs/docs/overrides/.icons/bootstrap/youtube.svg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/webrecorder/browsertrix-crawler/HEAD/docs/docs/overrides/.icons/bootstrap/youtube.svg -------------------------------------------------------------------------------- /docs/docs/overrides/main.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/webrecorder/browsertrix-crawler/HEAD/docs/docs/overrides/main.html -------------------------------------------------------------------------------- /docs/docs/stylesheets/extra.css: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/webrecorder/browsertrix-crawler/HEAD/docs/docs/stylesheets/extra.css -------------------------------------------------------------------------------- /docs/docs/user-guide/behaviors.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/webrecorder/browsertrix-crawler/HEAD/docs/docs/user-guide/behaviors.md -------------------------------------------------------------------------------- /docs/docs/user-guide/browser-profiles.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/webrecorder/browsertrix-crawler/HEAD/docs/docs/user-guide/browser-profiles.md -------------------------------------------------------------------------------- /docs/docs/user-guide/cli-options.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/webrecorder/browsertrix-crawler/HEAD/docs/docs/user-guide/cli-options.md -------------------------------------------------------------------------------- /docs/docs/user-guide/common-options.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/webrecorder/browsertrix-crawler/HEAD/docs/docs/user-guide/common-options.md -------------------------------------------------------------------------------- /docs/docs/user-guide/crawl-scope.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/webrecorder/browsertrix-crawler/HEAD/docs/docs/user-guide/crawl-scope.md -------------------------------------------------------------------------------- /docs/docs/user-guide/exit-codes.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/webrecorder/browsertrix-crawler/HEAD/docs/docs/user-guide/exit-codes.md -------------------------------------------------------------------------------- /docs/docs/user-guide/index.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/webrecorder/browsertrix-crawler/HEAD/docs/docs/user-guide/index.md -------------------------------------------------------------------------------- /docs/docs/user-guide/outputs.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/webrecorder/browsertrix-crawler/HEAD/docs/docs/user-guide/outputs.md -------------------------------------------------------------------------------- /docs/docs/user-guide/proxies.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/webrecorder/browsertrix-crawler/HEAD/docs/docs/user-guide/proxies.md -------------------------------------------------------------------------------- /docs/docs/user-guide/qa.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/webrecorder/browsertrix-crawler/HEAD/docs/docs/user-guide/qa.md -------------------------------------------------------------------------------- /docs/docs/user-guide/yaml-config.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/webrecorder/browsertrix-crawler/HEAD/docs/docs/user-guide/yaml-config.md -------------------------------------------------------------------------------- /docs/gen-cli.sh: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/webrecorder/browsertrix-crawler/HEAD/docs/gen-cli.sh -------------------------------------------------------------------------------- /docs/mkdocs.yml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/webrecorder/browsertrix-crawler/HEAD/docs/mkdocs.yml -------------------------------------------------------------------------------- /html/createProfile.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/webrecorder/browsertrix-crawler/HEAD/html/createProfile.html -------------------------------------------------------------------------------- /html/replay.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/webrecorder/browsertrix-crawler/HEAD/html/replay.html -------------------------------------------------------------------------------- /html/screencast.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/webrecorder/browsertrix-crawler/HEAD/html/screencast.html -------------------------------------------------------------------------------- /html/vnc_lite.html: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/webrecorder/browsertrix-crawler/HEAD/html/vnc_lite.html -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/webrecorder/browsertrix-crawler/HEAD/package.json -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | wacz>=0.5.0 2 | -------------------------------------------------------------------------------- /src/crawler.ts: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/webrecorder/browsertrix-crawler/HEAD/src/crawler.ts -------------------------------------------------------------------------------- /src/create-login-profile.ts: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/webrecorder/browsertrix-crawler/HEAD/src/create-login-profile.ts -------------------------------------------------------------------------------- /src/main.ts: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/webrecorder/browsertrix-crawler/HEAD/src/main.ts -------------------------------------------------------------------------------- /src/replaycrawler.ts: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/webrecorder/browsertrix-crawler/HEAD/src/replaycrawler.ts -------------------------------------------------------------------------------- /src/util/argParser.ts: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/webrecorder/browsertrix-crawler/HEAD/src/util/argParser.ts -------------------------------------------------------------------------------- /src/util/blockrules.ts: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/webrecorder/browsertrix-crawler/HEAD/src/util/blockrules.ts -------------------------------------------------------------------------------- /src/util/browser.ts: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/webrecorder/browsertrix-crawler/HEAD/src/util/browser.ts -------------------------------------------------------------------------------- /src/util/constants.ts: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/webrecorder/browsertrix-crawler/HEAD/src/util/constants.ts -------------------------------------------------------------------------------- /src/util/file_reader.ts: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/webrecorder/browsertrix-crawler/HEAD/src/util/file_reader.ts -------------------------------------------------------------------------------- /src/util/flowbehavior.ts: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/webrecorder/browsertrix-crawler/HEAD/src/util/flowbehavior.ts -------------------------------------------------------------------------------- /src/util/healthcheck.ts: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/webrecorder/browsertrix-crawler/HEAD/src/util/healthcheck.ts -------------------------------------------------------------------------------- /src/util/logger.ts: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/webrecorder/browsertrix-crawler/HEAD/src/util/logger.ts -------------------------------------------------------------------------------- /src/util/originoverride.ts: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/webrecorder/browsertrix-crawler/HEAD/src/util/originoverride.ts -------------------------------------------------------------------------------- /src/util/proxy.ts: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/webrecorder/browsertrix-crawler/HEAD/src/util/proxy.ts -------------------------------------------------------------------------------- /src/util/recorder.ts: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/webrecorder/browsertrix-crawler/HEAD/src/util/recorder.ts -------------------------------------------------------------------------------- /src/util/redis.ts: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/webrecorder/browsertrix-crawler/HEAD/src/util/redis.ts -------------------------------------------------------------------------------- /src/util/replayserver.ts: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/webrecorder/browsertrix-crawler/HEAD/src/util/replayserver.ts -------------------------------------------------------------------------------- /src/util/reqresp.ts: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/webrecorder/browsertrix-crawler/HEAD/src/util/reqresp.ts -------------------------------------------------------------------------------- /src/util/robots.ts: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/webrecorder/browsertrix-crawler/HEAD/src/util/robots.ts -------------------------------------------------------------------------------- /src/util/screencaster.ts: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/webrecorder/browsertrix-crawler/HEAD/src/util/screencaster.ts -------------------------------------------------------------------------------- /src/util/screenshots.ts: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/webrecorder/browsertrix-crawler/HEAD/src/util/screenshots.ts -------------------------------------------------------------------------------- /src/util/seeds.ts: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/webrecorder/browsertrix-crawler/HEAD/src/util/seeds.ts -------------------------------------------------------------------------------- /src/util/sitemapper.ts: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/webrecorder/browsertrix-crawler/HEAD/src/util/sitemapper.ts -------------------------------------------------------------------------------- /src/util/state.ts: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/webrecorder/browsertrix-crawler/HEAD/src/util/state.ts -------------------------------------------------------------------------------- /src/util/storage.ts: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/webrecorder/browsertrix-crawler/HEAD/src/util/storage.ts -------------------------------------------------------------------------------- /src/util/textextract.ts: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/webrecorder/browsertrix-crawler/HEAD/src/util/textextract.ts -------------------------------------------------------------------------------- /src/util/timing.ts: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/webrecorder/browsertrix-crawler/HEAD/src/util/timing.ts -------------------------------------------------------------------------------- /src/util/wacz.ts: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/webrecorder/browsertrix-crawler/HEAD/src/util/wacz.ts -------------------------------------------------------------------------------- /src/util/warcwriter.ts: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/webrecorder/browsertrix-crawler/HEAD/src/util/warcwriter.ts -------------------------------------------------------------------------------- /src/util/worker.ts: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/webrecorder/browsertrix-crawler/HEAD/src/util/worker.ts -------------------------------------------------------------------------------- /test-setup.js: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/webrecorder/browsertrix-crawler/HEAD/test-setup.js -------------------------------------------------------------------------------- /tests/.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/webrecorder/browsertrix-crawler/HEAD/tests/.DS_Store -------------------------------------------------------------------------------- /tests/adblockrules.test.js: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/webrecorder/browsertrix-crawler/HEAD/tests/adblockrules.test.js -------------------------------------------------------------------------------- /tests/add-exclusion.test.js: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/webrecorder/browsertrix-crawler/HEAD/tests/add-exclusion.test.js -------------------------------------------------------------------------------- /tests/basic_crawl.test.js: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/webrecorder/browsertrix-crawler/HEAD/tests/basic_crawl.test.js -------------------------------------------------------------------------------- /tests/blockrules.test.js: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/webrecorder/browsertrix-crawler/HEAD/tests/blockrules.test.js -------------------------------------------------------------------------------- /tests/brave-query-redir.test.js: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/webrecorder/browsertrix-crawler/HEAD/tests/brave-query-redir.test.js -------------------------------------------------------------------------------- /tests/collection_name.test.js: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/webrecorder/browsertrix-crawler/HEAD/tests/collection_name.test.js -------------------------------------------------------------------------------- /tests/config_file.test.js: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/webrecorder/browsertrix-crawler/HEAD/tests/config_file.test.js -------------------------------------------------------------------------------- /tests/config_stdin.test.js: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/webrecorder/browsertrix-crawler/HEAD/tests/config_stdin.test.js -------------------------------------------------------------------------------- /tests/crawl_overwrite.js: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/webrecorder/browsertrix-crawler/HEAD/tests/crawl_overwrite.js -------------------------------------------------------------------------------- /tests/custom-behavior-flow.test.js: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/webrecorder/browsertrix-crawler/HEAD/tests/custom-behavior-flow.test.js -------------------------------------------------------------------------------- /tests/custom-behavior.test.js: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/webrecorder/browsertrix-crawler/HEAD/tests/custom-behavior.test.js -------------------------------------------------------------------------------- /tests/custom-behaviors/custom-2.js: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/webrecorder/browsertrix-crawler/HEAD/tests/custom-behaviors/custom-2.js -------------------------------------------------------------------------------- /tests/custom-behaviors/custom-flow.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/webrecorder/browsertrix-crawler/HEAD/tests/custom-behaviors/custom-flow.json -------------------------------------------------------------------------------- /tests/custom-behaviors/custom.js: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/webrecorder/browsertrix-crawler/HEAD/tests/custom-behaviors/custom.js -------------------------------------------------------------------------------- /tests/custom_driver.test.js: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/webrecorder/browsertrix-crawler/HEAD/tests/custom_driver.test.js -------------------------------------------------------------------------------- /tests/custom_selector.test.js: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/webrecorder/browsertrix-crawler/HEAD/tests/custom_selector.test.js -------------------------------------------------------------------------------- /tests/dryrun.test.js: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/webrecorder/browsertrix-crawler/HEAD/tests/dryrun.test.js -------------------------------------------------------------------------------- /tests/exclude-redirected.test.js: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/webrecorder/browsertrix-crawler/HEAD/tests/exclude-redirected.test.js -------------------------------------------------------------------------------- /tests/extra_hops_depth.test.js: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/webrecorder/browsertrix-crawler/HEAD/tests/extra_hops_depth.test.js -------------------------------------------------------------------------------- /tests/file_stats.test.js: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/webrecorder/browsertrix-crawler/HEAD/tests/file_stats.test.js -------------------------------------------------------------------------------- /tests/fixtures/crawl-1.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/webrecorder/browsertrix-crawler/HEAD/tests/fixtures/crawl-1.yaml -------------------------------------------------------------------------------- /tests/fixtures/crawl-2.yaml: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/webrecorder/browsertrix-crawler/HEAD/tests/fixtures/crawl-2.yaml -------------------------------------------------------------------------------- /tests/fixtures/driver-1.mjs: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/webrecorder/browsertrix-crawler/HEAD/tests/fixtures/driver-1.mjs -------------------------------------------------------------------------------- /tests/fixtures/pages.jsonl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/webrecorder/browsertrix-crawler/HEAD/tests/fixtures/pages.jsonl -------------------------------------------------------------------------------- /tests/fixtures/proxies/proxy-key: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/webrecorder/browsertrix-crawler/HEAD/tests/fixtures/proxies/proxy-key -------------------------------------------------------------------------------- /tests/fixtures/proxies/proxy-key.pub: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/webrecorder/browsertrix-crawler/HEAD/tests/fixtures/proxies/proxy-key.pub -------------------------------------------------------------------------------- /tests/fixtures/proxies/proxy-test-bad-auth.pac: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/webrecorder/browsertrix-crawler/HEAD/tests/fixtures/proxies/proxy-test-bad-auth.pac -------------------------------------------------------------------------------- /tests/fixtures/proxies/proxy-test-good-auth.pac: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/webrecorder/browsertrix-crawler/HEAD/tests/fixtures/proxies/proxy-test-good-auth.pac -------------------------------------------------------------------------------- /tests/fixtures/sample-profile.tar.gz: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/webrecorder/browsertrix-crawler/HEAD/tests/fixtures/sample-profile.tar.gz -------------------------------------------------------------------------------- /tests/fixtures/urlSeedFile.txt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/webrecorder/browsertrix-crawler/HEAD/tests/fixtures/urlSeedFile.txt -------------------------------------------------------------------------------- /tests/http-auth.test.js: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/webrecorder/browsertrix-crawler/HEAD/tests/http-auth.test.js -------------------------------------------------------------------------------- /tests/invalid-behaviors/invalid-export.js: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/webrecorder/browsertrix-crawler/HEAD/tests/invalid-behaviors/invalid-export.js -------------------------------------------------------------------------------- /tests/lang-code.test.js: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/webrecorder/browsertrix-crawler/HEAD/tests/lang-code.test.js -------------------------------------------------------------------------------- /tests/limit_reached.test.js: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/webrecorder/browsertrix-crawler/HEAD/tests/limit_reached.test.js -------------------------------------------------------------------------------- /tests/log_filtering.test.js: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/webrecorder/browsertrix-crawler/HEAD/tests/log_filtering.test.js -------------------------------------------------------------------------------- /tests/mult_url_crawl_with_favicon.test.js: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/webrecorder/browsertrix-crawler/HEAD/tests/mult_url_crawl_with_favicon.test.js -------------------------------------------------------------------------------- /tests/multi-instance-crawl.test.js: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/webrecorder/browsertrix-crawler/HEAD/tests/multi-instance-crawl.test.js -------------------------------------------------------------------------------- /tests/non-html-crawl.test.js: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/webrecorder/browsertrix-crawler/HEAD/tests/non-html-crawl.test.js -------------------------------------------------------------------------------- /tests/pageinfo-records.test.js: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/webrecorder/browsertrix-crawler/HEAD/tests/pageinfo-records.test.js -------------------------------------------------------------------------------- /tests/profiles.test.js: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/webrecorder/browsertrix-crawler/HEAD/tests/profiles.test.js -------------------------------------------------------------------------------- /tests/proxy.test.js: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/webrecorder/browsertrix-crawler/HEAD/tests/proxy.test.js -------------------------------------------------------------------------------- /tests/qa_compare.test.js: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/webrecorder/browsertrix-crawler/HEAD/tests/qa_compare.test.js -------------------------------------------------------------------------------- /tests/retry-failed.test.js: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/webrecorder/browsertrix-crawler/HEAD/tests/retry-failed.test.js -------------------------------------------------------------------------------- /tests/robots_txt.test.js: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/webrecorder/browsertrix-crawler/HEAD/tests/robots_txt.test.js -------------------------------------------------------------------------------- /tests/rollover-writer.test.js: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/webrecorder/browsertrix-crawler/HEAD/tests/rollover-writer.test.js -------------------------------------------------------------------------------- /tests/saved-state.test.js: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/webrecorder/browsertrix-crawler/HEAD/tests/saved-state.test.js -------------------------------------------------------------------------------- /tests/scopes.test.js: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/webrecorder/browsertrix-crawler/HEAD/tests/scopes.test.js -------------------------------------------------------------------------------- /tests/screenshot.test.js: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/webrecorder/browsertrix-crawler/HEAD/tests/screenshot.test.js -------------------------------------------------------------------------------- /tests/seeds.test.js: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/webrecorder/browsertrix-crawler/HEAD/tests/seeds.test.js -------------------------------------------------------------------------------- /tests/sitemap-parse.test.js: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/webrecorder/browsertrix-crawler/HEAD/tests/sitemap-parse.test.js -------------------------------------------------------------------------------- /tests/storage.test.js: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/webrecorder/browsertrix-crawler/HEAD/tests/storage.test.js -------------------------------------------------------------------------------- /tests/text-extract.test.js: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/webrecorder/browsertrix-crawler/HEAD/tests/text-extract.test.js -------------------------------------------------------------------------------- /tests/upload-wacz.test.js: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/webrecorder/browsertrix-crawler/HEAD/tests/upload-wacz.test.js -------------------------------------------------------------------------------- /tests/url-normalize.test.js: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/webrecorder/browsertrix-crawler/HEAD/tests/url-normalize.test.js -------------------------------------------------------------------------------- /tests/url_file_list.test.js: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/webrecorder/browsertrix-crawler/HEAD/tests/url_file_list.test.js -------------------------------------------------------------------------------- /tests/warcinfo.test.js: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/webrecorder/browsertrix-crawler/HEAD/tests/warcinfo.test.js -------------------------------------------------------------------------------- /tsconfig.eslint.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/webrecorder/browsertrix-crawler/HEAD/tsconfig.eslint.json -------------------------------------------------------------------------------- /tsconfig.json: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/webrecorder/browsertrix-crawler/HEAD/tsconfig.json -------------------------------------------------------------------------------- /yarn.lock: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/webrecorder/browsertrix-crawler/HEAD/yarn.lock --------------------------------------------------------------------------------