├── apps ├── test-suite │ ├── jest.setup.js │ ├── .env.example │ ├── jest.config.js │ ├── load-test-results │ │ ├── tests-1-5 │ │ │ └── assets │ │ │ │ ├── metrics-test-2.png │ │ │ │ ├── metrics-test-3.png │ │ │ │ ├── metrics-test-4.png │ │ │ │ ├── metrics-test-5.png │ │ │ │ ├── CPU-utilization-report-test-1.png │ │ │ │ └── memory-utilization-report-test-1.png │ │ └── tests-6-7 │ │ │ └── assets │ │ │ ├── metrics-test-6.png │ │ │ ├── metrics-test-7.png │ │ │ ├── metrics-test-8.png │ │ │ ├── metrics-fire-engine-test-7.png │ │ │ ├── metrics-fire-engine-test-8.png │ │ │ └── metrics-fire-engine-test-7-2.png │ ├── utils │ │ ├── types.ts │ │ ├── log.ts │ │ └── tokens.ts │ └── audit-ci.jsonc ├── rust-sdk │ ├── .gitignore │ ├── tests │ │ └── .env.example │ ├── CHANGELOG.md │ ├── examples │ │ └── cancel_crawl_example.rs │ └── Cargo.toml ├── api │ ├── native │ │ ├── rustfmt.toml │ │ ├── src │ │ │ ├── document │ │ │ │ ├── renderers │ │ │ │ │ └── mod.rs │ │ │ │ ├── providers │ │ │ │ │ ├── mod.rs │ │ │ │ │ └── factory.rs │ │ │ │ └── mod.rs │ │ │ ├── utils.rs │ │ │ └── lib.rs │ │ ├── build.rs │ │ ├── .cargo │ │ │ └── config.toml │ │ ├── .yarnrc.yml │ │ ├── .prettierignore │ │ ├── .taplo.toml │ │ ├── .editorconfig │ │ ├── tsconfig.json │ │ ├── .gitattributes │ │ ├── Cargo.toml │ │ └── wasi-worker-browser.mjs │ ├── src │ │ ├── scraper │ │ │ ├── scrapeURL │ │ │ │ ├── .gitignore │ │ │ │ ├── lib │ │ │ │ │ └── cacheableLookup.ts │ │ │ │ ├── postprocessors │ │ │ │ │ └── index.ts │ │ │ │ ├── transformers │ │ │ │ │ ├── removeBase64Images.ts │ │ │ │ │ └── uploadScreenshot.ts │ │ │ │ ├── engines │ │ │ │ │ └── fire-engine │ │ │ │ │ │ └── delete.ts │ │ │ │ └── README.md │ │ │ └── WebScraper │ │ │ │ ├── utils │ │ │ │ └── maxDepthUtils.ts │ │ │ │ └── __tests__ │ │ │ │ └── dns.test.ts │ │ ├── controllers │ │ │ ├── v0 │ │ │ │ ├── readiness.ts │ │ │ │ ├── liveness.ts │ │ │ │ └── admin │ │ │ │ │ ├── precrawl.ts │ │ │ │ │ ├── acuc-cache-clear.ts │ │ │ │ │ └── index-queue-prometheus.ts │ │ │ ├── v1 │ │ │ │ ├── concurrency-check.ts │ │ │ │ ├── crawl-ongoing.ts │ │ │ │ ├── credit-usage.ts │ │ │ │ ├── token-usage.ts │ │ │ │ └── generate-llmstxt-status.ts │ │ │ └── v2 │ │ │ │ ├── crawl-ongoing.ts │ │ │ │ ├── credit-usage.ts │ │ │ │ └── token-usage.ts │ │ ├── lib │ │ │ ├── extract │ │ │ │ ├── helpers │ │ │ │ │ ├── dereference-schema.ts │ │ │ │ │ └── deduplicate-objs-array.ts │ │ │ │ ├── fire-0 │ │ │ │ │ ├── helpers │ │ │ │ │ │ ├── dereference-schema-f0.ts │ │ │ │ │ │ └── deduplicate-objs-array-f0.ts │ │ │ │ │ └── build-document-f0.ts │ │ │ │ ├── config.ts │ │ │ │ ├── build-document.ts │ │ │ │ └── team-id-sync.ts │ │ │ ├── strings.ts │ │ │ ├── canonical-url.ts │ │ │ ├── custom-error.ts │ │ │ ├── default-values.ts │ │ │ ├── deployment.ts │ │ │ ├── parseApi.ts │ │ │ ├── withAuth.ts │ │ │ └── permissions.ts │ │ ├── services │ │ │ ├── notification │ │ │ │ ├── notification-check.ts │ │ │ │ └── notification_string.ts │ │ │ ├── billing │ │ │ │ └── issue_credits.ts │ │ │ ├── idempotency │ │ │ │ ├── create.ts │ │ │ │ └── validate.ts │ │ │ ├── alerts │ │ │ │ └── slack.ts │ │ │ ├── webhook │ │ │ │ ├── index.ts │ │ │ │ └── schema.ts │ │ │ ├── redlock.ts │ │ │ ├── subscription │ │ │ │ └── enterprise-check.ts │ │ │ └── rate-limiter.ts │ │ ├── __tests__ │ │ │ └── snips │ │ │ │ ├── utils │ │ │ │ └── collect-mocks.js │ │ │ │ ├── v2 │ │ │ │ └── crawl-prompt.test.ts │ │ │ │ └── v0 │ │ │ │ └── lib.ts │ │ ├── natives.ts │ │ ├── utils │ │ │ └── integration.ts │ │ ├── types │ │ │ ├── parse-diff.d.ts │ │ │ └── x402.d.ts │ │ └── routes │ │ │ └── v0.ts │ ├── .husky │ │ └── pre-commit │ ├── .dockerignore │ ├── .gitattributes │ ├── samples │ │ ├── sample.odt │ │ ├── sample.docx │ │ └── sample.xlsx │ ├── sharedLibs │ │ └── go-html-to-md │ │ │ ├── .gitignore │ │ │ ├── README.md │ │ │ └── go.mod │ ├── pnpm-workspace.yaml │ ├── .gitignore │ ├── .prettierrc │ ├── .env.local │ ├── utils │ │ ├── urldump-redis.js │ │ ├── find_uncovered_files.sh │ │ └── urldump.js │ ├── knip.config.ts │ ├── jest.config.ts │ ├── requests │ │ ├── v2 │ │ │ ├── map.requests.http │ │ │ ├── search.requests.http │ │ │ └── scrape.requests.http │ │ └── branding.requests.http │ └── tsconfig.json ├── redis │ ├── .dockerignore │ ├── Procfile │ ├── Dockerfile │ ├── scripts │ │ └── version.sh │ ├── fly.toml │ └── start-redis-server.sh ├── test-site │ ├── README.md │ ├── .npmrc │ ├── public │ │ ├── example.pdf │ │ ├── favicon.png │ │ ├── example-long.pdf │ │ ├── example.json │ │ ├── firecrawl-logo.png │ │ ├── firecrawl-wordmark.png │ │ └── fonts │ │ │ ├── atkinson-bold.woff │ │ │ └── atkinson-regular.woff │ ├── src │ │ ├── assets │ │ │ ├── blog │ │ │ │ ├── lw3-d2-3.webp │ │ │ │ ├── lw3-d3-2.webp │ │ │ │ ├── lw3-d4-2.webp │ │ │ │ ├── lw3-d5-2.webp │ │ │ │ ├── lw3-d5-3.webp │ │ │ │ ├── lw3-d6-2.webp │ │ │ │ ├── lw3-d7-2.webp │ │ │ │ ├── series-a.webp │ │ │ │ ├── changeTracking.jpg │ │ │ │ ├── or_firecrawl.webp │ │ │ │ ├── search-endpoint.jpg │ │ │ │ └── templateslaunch.webp │ │ │ ├── firecrawl-icon.png │ │ │ ├── firecrawl-logo.png │ │ │ ├── blog-placeholder.jpg │ │ │ ├── firecrawl-app-icon.png │ │ │ ├── firecrawl-wordmark.png │ │ │ ├── firecrawl-light-logo.png │ │ │ └── firecrawl-light-wordmark.png │ │ ├── consts.ts │ │ ├── components │ │ │ ├── FormattedDate.astro │ │ │ └── HeaderLink.astro │ │ ├── pages │ │ │ ├── robots.txt.ts │ │ │ └── blog │ │ │ │ └── [...slug].astro │ │ └── content.config.ts │ ├── tsconfig.json │ ├── audit-ci.jsonc │ ├── .gitignore │ ├── .prettierrc │ ├── astro.config.mjs │ └── package.json ├── python-sdk │ ├── .pylintrc │ ├── firecrawl │ │ ├── v2 │ │ │ ├── methods │ │ │ │ └── aio │ │ │ │ │ └── __init__.py │ │ │ ├── __init__.py │ │ │ └── utils │ │ │ │ ├── __init__.py │ │ │ │ └── get_version.py │ │ ├── __tests__ │ │ │ ├── e2e │ │ │ │ └── v2 │ │ │ │ │ ├── .env.example │ │ │ │ │ ├── test_usage.py │ │ │ │ │ └── aio │ │ │ │ │ └── test_aio_extract.py │ │ │ └── unit │ │ │ │ └── v2 │ │ │ │ ├── methods │ │ │ │ ├── aio │ │ │ │ │ ├── test_aio_crawl_validation.py │ │ │ │ │ ├── test_aio_crawl_params.py │ │ │ │ │ ├── test_aio_map_request_preparation.py │ │ │ │ │ └── test_batch_request_preparation_async.py │ │ │ │ └── test_usage_types.py │ │ │ │ └── utils │ │ │ │ └── test_metadata_extras_multivalue.py │ │ └── v1 │ │ │ └── __init__.py │ ├── .gitignore │ ├── .env.example │ ├── requirements.txt │ └── LICENSE ├── playwright-service-ts │ ├── .dockerignore │ ├── audit-ci.jsonc │ ├── Dockerfile │ ├── package.json │ └── README.md ├── ui │ └── ingestion-ui │ │ ├── src │ │ ├── vite-env.d.ts │ │ ├── lib │ │ │ └── utils.ts │ │ ├── main.tsx │ │ ├── components │ │ │ └── ui │ │ │ │ ├── collapsible.tsx │ │ │ │ ├── label.tsx │ │ │ │ ├── input.tsx │ │ │ │ └── checkbox.tsx │ │ └── App.tsx │ │ ├── public │ │ └── favicon.ico │ │ ├── audit-ci.jsonc │ │ ├── postcss.config.js │ │ ├── vite.config.ts │ │ ├── tsconfig.json │ │ ├── .gitignore │ │ ├── tsconfig.node.json │ │ ├── components.json │ │ ├── index.html │ │ ├── .eslintrc.cjs │ │ ├── tsconfig.app.json │ │ └── LICENSE ├── js-sdk │ ├── .env.example │ ├── audit-ci.jsonc │ ├── firecrawl │ │ ├── audit-ci.jsonc │ │ ├── .env.example │ │ ├── src │ │ │ ├── types │ │ │ │ └── node-undici.d.ts │ │ │ ├── __tests__ │ │ │ │ └── unit │ │ │ │ │ └── v2 │ │ │ │ │ ├── scrape.unit.test.ts │ │ │ │ │ └── errorHandler.test.ts │ │ │ └── v2 │ │ │ │ ├── utils │ │ │ │ ├── getVersion.ts │ │ │ │ └── errorHandler.ts │ │ │ │ └── methods │ │ │ │ └── scrape.ts │ │ ├── jest.config.js │ │ ├── tsup.config.ts │ │ ├── tsconfig.json │ │ └── LICENSE │ ├── example_v1.js │ ├── package.json │ ├── example_v1.ts │ ├── LICENSE │ └── example_watcher.ts ├── go-html-to-md-service │ ├── .dockerignore │ ├── .gitignore │ ├── docker-compose.yml │ ├── go.mod │ └── Dockerfile └── nuq-postgres │ └── Dockerfile ├── examples ├── llama-4-maverick-web-extractor │ ├── .gitignore │ ├── requirements.txt │ └── .env.example ├── openai_swarm_firecrawl │ ├── requirements.txt │ ├── .env.example │ └── README.md ├── groq_web_crawler │ └── requirements.txt ├── scrape_and_analyze_airbnb_data_e2b │ ├── .prettierignore │ ├── airbnb_prices_chart.png │ ├── .env.template │ ├── prettier.config.mjs │ ├── package.json │ ├── README.md │ └── codeInterpreter.ts ├── o3-web-crawler │ ├── requirements.txt │ ├── .env.example │ └── .gitignore ├── sales_web_crawler │ ├── .env.example │ └── requirements.txt ├── gpt-4.1-web-crawler │ ├── requirements.txt │ ├── .env.example │ └── .gitignore ├── o4-mini-web-crawler │ ├── requirements.txt │ └── .env.example ├── deepseek-v3-crawler │ ├── requirements.txt │ └── .gitignore ├── kubernetes │ ├── firecrawl-helm │ │ ├── overlays │ │ │ ├── dev │ │ │ │ └── values.yaml │ │ │ └── prod │ │ │ │ └── values.yaml │ │ ├── Chart.yaml │ │ ├── templates │ │ │ ├── playwright-configmap.yaml │ │ │ ├── service.yaml │ │ │ ├── redis-service.yaml │ │ │ ├── playwright-service.yaml │ │ │ ├── _helpers.tpl │ │ │ ├── configmap.yaml │ │ │ ├── secret.yaml │ │ │ ├── worker-deployment.yaml │ │ │ └── redis-deployment.yaml │ │ └── .helmignore │ └── cluster-install │ │ ├── secret.yaml │ │ ├── configmap.yaml │ │ ├── redis.yaml │ │ ├── README.md │ │ ├── playwright-service.yaml │ │ └── nuq-postgres.yaml ├── llama-4-maverick-web-crawler │ ├── requirements.txt │ ├── .env.example │ └── .gitignore ├── openai_swarm_firecrawl_web_extractor │ ├── .env.example │ └── requirements.txt ├── deep-research-apartment-finder │ ├── requirements.txt │ └── .env.example ├── gemini-2.5-web-extractor │ ├── requirements.txt │ ├── .env.example │ └── .gitignore ├── hacker_news_scraper │ └── requirements.txt ├── full_example_apps │ └── README.md ├── gemini-2.5-screenshot-editor │ ├── requirements.txt │ └── .env.example ├── gpt-4.1-company-researcher │ ├── requirements.txt │ └── .env.example ├── deepseek-v3-company-researcher │ ├── requirements.txt │ └── .gitignore ├── gemini-2.5-crawler │ ├── requirements.txt │ └── .env.example ├── blog-articles │ ├── scheduling_scrapers │ │ ├── scheduling-scrapers-images │ │ │ ├── actions.png │ │ │ ├── output.png │ │ │ ├── finished_actions.png │ │ │ └── hacker_news_homepage.png │ │ └── scripts │ │ │ ├── scrape_scheduler.py │ │ │ └── cron_scraper.py │ ├── amazon-price-tracking │ │ └── amazon-price-tracking-images │ │ │ ├── alert.png │ │ │ ├── actions.png │ │ │ ├── discord.png │ │ │ ├── finished.png │ │ │ ├── linechart.png │ │ │ ├── new-alert.png │ │ │ ├── webhook.png │ │ │ ├── new-server.png │ │ │ ├── sneak-peek.png │ │ │ └── supabase_connect.png │ ├── deploying_web_scrapers │ │ └── deploying-web-scrapers-images │ │ │ ├── meme.png │ │ │ ├── ph-sample.png │ │ │ ├── workflow.png │ │ │ ├── pa-console.png │ │ │ ├── pa-scheduler.png │ │ │ ├── ph-homepage.png │ │ │ └── heroku-scheduler.png │ └── github-actions-tutorial │ │ └── github-actions-tutorial-images │ │ └── cron-syntax.png ├── aginews-ai-newsletter │ └── README.md ├── ai-podcast-generator │ └── README.md └── openai-realtime-firecrawl │ └── README.md ├── .github ├── scripts │ ├── requirements.txt │ └── eval_run.py ├── workflows │ ├── ghcr-clean.yml │ ├── deploy-image-staging.yml │ ├── deploy-nuq-postgres.yml │ ├── test-js-sdk.yml │ ├── deploy-redis.yml │ ├── deploy-playwright.yml │ ├── publish-js-sdk.yml │ ├── deploy-image.yml │ ├── eval-prod.yml │ └── deploy-go-service.yaml ├── ISSUE_TEMPLATE │ ├── feature_request.md │ └── bug_report.md ├── archive │ └── publish-rust-sdk.yml └── dependabot.yml ├── .gitattributes ├── img ├── firecrawl_logo.png ├── firecrawl_logo_v1.png └── open-source-cloud.png ├── .gitmodules └── .gitignore /apps/test-suite/jest.setup.js: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /apps/rust-sdk/.gitignore: -------------------------------------------------------------------------------- 1 | target/ 2 | -------------------------------------------------------------------------------- /apps/api/native/rustfmt.toml: -------------------------------------------------------------------------------- 1 | tab_spaces = 2 2 | -------------------------------------------------------------------------------- /apps/api/src/scraper/scrapeURL/.gitignore: -------------------------------------------------------------------------------- 1 | /mocks -------------------------------------------------------------------------------- /apps/redis/.dockerignore: -------------------------------------------------------------------------------- 1 | .git 2 | fly.toml 3 | -------------------------------------------------------------------------------- /apps/test-site/README.md: -------------------------------------------------------------------------------- 1 | # Firecrawl Test Site 2 | -------------------------------------------------------------------------------- /examples/llama-4-maverick-web-extractor/.gitignore: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /apps/test-site/.npmrc: -------------------------------------------------------------------------------- 1 | minimum-release-age=720 2 | 3 | -------------------------------------------------------------------------------- /.github/scripts/requirements.txt: -------------------------------------------------------------------------------- 1 | requests 2 | packaging 3 | toml -------------------------------------------------------------------------------- /apps/api/native/src/document/renderers/mod.rs: -------------------------------------------------------------------------------- 1 | pub mod html; 2 | -------------------------------------------------------------------------------- /apps/python-sdk/.pylintrc: -------------------------------------------------------------------------------- 1 | [FORMAT] 2 | max-line-length = 120 -------------------------------------------------------------------------------- /apps/api/native/build.rs: -------------------------------------------------------------------------------- 1 | fn main() { 2 | napi_build::setup(); 3 | } 4 | -------------------------------------------------------------------------------- /examples/openai_swarm_firecrawl/requirements.txt: -------------------------------------------------------------------------------- 1 | firecrawl-py 2 | openai -------------------------------------------------------------------------------- /apps/api/.husky/pre-commit: -------------------------------------------------------------------------------- 1 | cd apps/api && pnpm knip --cache && pnpm lint-staged -------------------------------------------------------------------------------- /apps/playwright-service-ts/.dockerignore: -------------------------------------------------------------------------------- 1 | /node_modules/ 2 | /dist/ 3 | .env 4 | -------------------------------------------------------------------------------- /apps/ui/ingestion-ui/src/vite-env.d.ts: -------------------------------------------------------------------------------- 1 | /// 2 | -------------------------------------------------------------------------------- /examples/openai_swarm_firecrawl/.env.example: -------------------------------------------------------------------------------- 1 | OPENAI_API_KEY= 2 | FIRECRAWL_API_KEY= -------------------------------------------------------------------------------- /.gitattributes: -------------------------------------------------------------------------------- 1 | # Auto detect text files and perform LF normalization 2 | * text=auto 3 | -------------------------------------------------------------------------------- /apps/python-sdk/firecrawl/v2/methods/aio/__init__.py: -------------------------------------------------------------------------------- 1 | # Async (aio) method modules for v2 -------------------------------------------------------------------------------- /examples/groq_web_crawler/requirements.txt: -------------------------------------------------------------------------------- 1 | firecrawl-py 2 | groq 3 | python-dotenv 4 | -------------------------------------------------------------------------------- /apps/api/.dockerignore: -------------------------------------------------------------------------------- 1 | **/node_modules/ 2 | **/dist/ 3 | **/target/ 4 | .env 5 | *.csv 6 | -------------------------------------------------------------------------------- /apps/api/.gitattributes: -------------------------------------------------------------------------------- 1 | # Auto detect text files and perform LF normalization 2 | * text=auto 3 | -------------------------------------------------------------------------------- /examples/scrape_and_analyze_airbnb_data_e2b/.prettierignore: -------------------------------------------------------------------------------- 1 | # Ignore artifacts: 2 | node_modules -------------------------------------------------------------------------------- /apps/rust-sdk/tests/.env.example: -------------------------------------------------------------------------------- 1 | API_URL=http://localhost:3002 2 | TEST_API_KEY=fc-YOUR_API_KEY 3 | -------------------------------------------------------------------------------- /examples/o3-web-crawler/requirements.txt: -------------------------------------------------------------------------------- 1 | firecrawl>=0.1.0 2 | openai>=1.0.0 3 | python-dotenv>=0.19.0 -------------------------------------------------------------------------------- /examples/sales_web_crawler/.env.example: -------------------------------------------------------------------------------- 1 | OPENAI_API_KEY= 2 | FIRECRAWL_API_KEY= 3 | SERP_API_KEY= 4 | -------------------------------------------------------------------------------- /img/firecrawl_logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/code/app-firecrawl-agpl/main/img/firecrawl_logo.png -------------------------------------------------------------------------------- /apps/js-sdk/.env.example: -------------------------------------------------------------------------------- 1 | FIRECRAWL_API_KEY= 2 | FIRECRAWL_API_URL=https://api.firecrawl.dev -------------------------------------------------------------------------------- /apps/python-sdk/.gitignore: -------------------------------------------------------------------------------- 1 | bin 2 | build 3 | include 4 | lib 5 | # local venv 6 | apps/python-sdk/.venv/ 7 | -------------------------------------------------------------------------------- /examples/gpt-4.1-web-crawler/requirements.txt: -------------------------------------------------------------------------------- 1 | firecrawl==0.11.0 2 | openai==1.14.0 3 | python-dotenv==1.0.0 -------------------------------------------------------------------------------- /examples/o4-mini-web-crawler/requirements.txt: -------------------------------------------------------------------------------- 1 | firecrawl==1.0.0 2 | openai==1.16.0 3 | python-dotenv==1.0.0 -------------------------------------------------------------------------------- /apps/api/samples/sample.odt: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/code/app-firecrawl-agpl/main/apps/api/samples/sample.odt -------------------------------------------------------------------------------- /apps/api/sharedLibs/go-html-to-md/.gitignore: -------------------------------------------------------------------------------- 1 | html-to-markdown.* 2 | !html-to-markdown.go 3 | libhtml-to-markdown.* -------------------------------------------------------------------------------- /img/firecrawl_logo_v1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/code/app-firecrawl-agpl/main/img/firecrawl_logo_v1.png -------------------------------------------------------------------------------- /img/open-source-cloud.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/code/app-firecrawl-agpl/main/img/open-source-cloud.png -------------------------------------------------------------------------------- /apps/api/samples/sample.docx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/code/app-firecrawl-agpl/main/apps/api/samples/sample.docx -------------------------------------------------------------------------------- /apps/api/samples/sample.xlsx: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/code/app-firecrawl-agpl/main/apps/api/samples/sample.xlsx -------------------------------------------------------------------------------- /apps/rust-sdk/CHANGELOG.md: -------------------------------------------------------------------------------- 1 | ## CHANGELOG 2 | 3 | ## [0.1] 4 | 5 | ### Added 6 | 7 | - [feat] Firecrawl rust sdk. 8 | -------------------------------------------------------------------------------- /examples/deepseek-v3-crawler/requirements.txt: -------------------------------------------------------------------------------- 1 | firecrawl==1.13.5 2 | python-dotenv==1.0.1 3 | huggingface-hub>=0.20.0 -------------------------------------------------------------------------------- /examples/kubernetes/firecrawl-helm/overlays/dev/values.yaml: -------------------------------------------------------------------------------- 1 | # Override the default values for the dev environment 2 | -------------------------------------------------------------------------------- /examples/llama-4-maverick-web-crawler/requirements.txt: -------------------------------------------------------------------------------- 1 | firecrawl>=0.1.0 2 | together>=0.2.0 3 | python-dotenv>=0.19.0 -------------------------------------------------------------------------------- /examples/openai_swarm_firecrawl_web_extractor/.env.example: -------------------------------------------------------------------------------- 1 | OPENAI_API_KEY= 2 | FIRECRAWL_API_KEY= 3 | SERP_API_KEY= -------------------------------------------------------------------------------- /apps/api/native/.cargo/config.toml: -------------------------------------------------------------------------------- 1 | [target.x86_64-pc-windows-msvc] 2 | rustflags = ["-C", "target-feature=+crt-static"] 3 | -------------------------------------------------------------------------------- /examples/deep-research-apartment-finder/requirements.txt: -------------------------------------------------------------------------------- 1 | anthropic==0.18.0 2 | firecrawl==0.2.0 3 | python-dotenv==1.0.0 -------------------------------------------------------------------------------- /examples/kubernetes/firecrawl-helm/overlays/prod/values.yaml: -------------------------------------------------------------------------------- 1 | # Override the default values for the prod environment 2 | -------------------------------------------------------------------------------- /apps/test-site/public/example.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/code/app-firecrawl-agpl/main/apps/test-site/public/example.pdf -------------------------------------------------------------------------------- /apps/test-site/public/favicon.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/code/app-firecrawl-agpl/main/apps/test-site/public/favicon.png -------------------------------------------------------------------------------- /apps/test-suite/.env.example: -------------------------------------------------------------------------------- 1 | OPENAI_API_KEY= 2 | TEST_API_KEY= 3 | TEST_URL=http://localhost:3002 4 | ANTHROPIC_API_KEY= 5 | ENV= -------------------------------------------------------------------------------- /apps/api/pnpm-workspace.yaml: -------------------------------------------------------------------------------- 1 | packages: 2 | - "native" 3 | injectWorkspacePackages: true 4 | minimumReleaseAge: 720 # 12 hours 5 | -------------------------------------------------------------------------------- /apps/js-sdk/audit-ci.jsonc: -------------------------------------------------------------------------------- 1 | { 2 | "$schema": "https://github.com/IBM/audit-ci/raw/main/docs/schema.json", 3 | "low": true 4 | } -------------------------------------------------------------------------------- /examples/o4-mini-web-crawler/.env.example: -------------------------------------------------------------------------------- 1 | FIRECRAWL_API_KEY=your_firecrawl_api_key_here 2 | OPENAI_API_KEY=your_openai_api_key_here -------------------------------------------------------------------------------- /apps/test-site/public/example-long.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/code/app-firecrawl-agpl/main/apps/test-site/public/example-long.pdf -------------------------------------------------------------------------------- /apps/ui/ingestion-ui/public/favicon.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/code/app-firecrawl-agpl/main/apps/ui/ingestion-ui/public/favicon.ico -------------------------------------------------------------------------------- /apps/js-sdk/firecrawl/audit-ci.jsonc: -------------------------------------------------------------------------------- 1 | { 2 | "$schema": "https://github.com/IBM/audit-ci/raw/main/docs/schema.json", 3 | "low": true 4 | } -------------------------------------------------------------------------------- /apps/test-site/public/example.json: -------------------------------------------------------------------------------- 1 | { 2 | "userId": 1, 3 | "id": 1, 4 | "title": "delectus aut autem", 5 | "completed": false 6 | } 7 | -------------------------------------------------------------------------------- /apps/test-site/public/firecrawl-logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/code/app-firecrawl-agpl/main/apps/test-site/public/firecrawl-logo.png -------------------------------------------------------------------------------- /apps/ui/ingestion-ui/audit-ci.jsonc: -------------------------------------------------------------------------------- 1 | { 2 | "$schema": "https://github.com/IBM/audit-ci/raw/main/docs/schema.json", 3 | "low": true 4 | } -------------------------------------------------------------------------------- /apps/ui/ingestion-ui/postcss.config.js: -------------------------------------------------------------------------------- 1 | export default { 2 | plugins: { 3 | tailwindcss: {}, 4 | autoprefixer: {}, 5 | }, 6 | } 7 | -------------------------------------------------------------------------------- /examples/gemini-2.5-web-extractor/requirements.txt: -------------------------------------------------------------------------------- 1 | python-dotenv==1.0.0 2 | google-generativeai==0.3.2 3 | requests==2.32.4 4 | serpapi==0.1.5 -------------------------------------------------------------------------------- /examples/hacker_news_scraper/requirements.txt: -------------------------------------------------------------------------------- 1 | requests 2 | beautifulsoup4 3 | firecrawl 4 | pydantic 5 | python-dotenv 6 | firecrawl-py 7 | -------------------------------------------------------------------------------- /examples/o3-web-crawler/.env.example: -------------------------------------------------------------------------------- 1 | # API Keys 2 | FIRECRAWL_API_KEY=your_firecrawl_api_key_here 3 | OPENAI_API_KEY=your_openai_api_key_here -------------------------------------------------------------------------------- /examples/sales_web_crawler/requirements.txt: -------------------------------------------------------------------------------- 1 | firecrawl-py 2 | openai 3 | google-search-results 4 | git+https://github.com/openai/swarm.git 5 | -------------------------------------------------------------------------------- /apps/playwright-service-ts/audit-ci.jsonc: -------------------------------------------------------------------------------- 1 | { 2 | "$schema": "https://github.com/IBM/audit-ci/raw/main/docs/schema.json", 3 | "low": true 4 | } -------------------------------------------------------------------------------- /apps/test-site/public/firecrawl-wordmark.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/code/app-firecrawl-agpl/main/apps/test-site/public/firecrawl-wordmark.png -------------------------------------------------------------------------------- /apps/test-site/src/assets/blog/lw3-d2-3.webp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/code/app-firecrawl-agpl/main/apps/test-site/src/assets/blog/lw3-d2-3.webp -------------------------------------------------------------------------------- /apps/test-site/src/assets/blog/lw3-d3-2.webp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/code/app-firecrawl-agpl/main/apps/test-site/src/assets/blog/lw3-d3-2.webp -------------------------------------------------------------------------------- /apps/test-site/src/assets/blog/lw3-d4-2.webp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/code/app-firecrawl-agpl/main/apps/test-site/src/assets/blog/lw3-d4-2.webp -------------------------------------------------------------------------------- /apps/test-site/src/assets/blog/lw3-d5-2.webp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/code/app-firecrawl-agpl/main/apps/test-site/src/assets/blog/lw3-d5-2.webp -------------------------------------------------------------------------------- /apps/test-site/src/assets/blog/lw3-d5-3.webp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/code/app-firecrawl-agpl/main/apps/test-site/src/assets/blog/lw3-d5-3.webp -------------------------------------------------------------------------------- /apps/test-site/src/assets/blog/lw3-d6-2.webp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/code/app-firecrawl-agpl/main/apps/test-site/src/assets/blog/lw3-d6-2.webp -------------------------------------------------------------------------------- /apps/test-site/src/assets/blog/lw3-d7-2.webp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/code/app-firecrawl-agpl/main/apps/test-site/src/assets/blog/lw3-d7-2.webp -------------------------------------------------------------------------------- /apps/test-site/src/assets/blog/series-a.webp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/code/app-firecrawl-agpl/main/apps/test-site/src/assets/blog/series-a.webp -------------------------------------------------------------------------------- /apps/test-site/src/assets/firecrawl-icon.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/code/app-firecrawl-agpl/main/apps/test-site/src/assets/firecrawl-icon.png -------------------------------------------------------------------------------- /apps/test-site/src/assets/firecrawl-logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/code/app-firecrawl-agpl/main/apps/test-site/src/assets/firecrawl-logo.png -------------------------------------------------------------------------------- /apps/python-sdk/.env.example: -------------------------------------------------------------------------------- 1 | FIRECRAWL_API_KEY= 2 | FIRECRAWL_API_URL=https://api.firecrawl.dev 3 | 4 | # TESTING PURPOSE: 5 | IDMUX_URL= -------------------------------------------------------------------------------- /apps/test-site/public/fonts/atkinson-bold.woff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/code/app-firecrawl-agpl/main/apps/test-site/public/fonts/atkinson-bold.woff -------------------------------------------------------------------------------- /apps/test-site/src/assets/blog-placeholder.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/code/app-firecrawl-agpl/main/apps/test-site/src/assets/blog-placeholder.jpg -------------------------------------------------------------------------------- /examples/llama-4-maverick-web-extractor/requirements.txt: -------------------------------------------------------------------------------- 1 | together>=0.2.5 2 | python-dotenv>=1.0.0 3 | requests>=2.31.0 4 | google-search-results>=2.4.2 -------------------------------------------------------------------------------- /apps/js-sdk/firecrawl/.env.example: -------------------------------------------------------------------------------- 1 | FIRECRAWL_API_KEY= 2 | FIRECRAWL_API_URL=https://api.firecrawl.dev 3 | 4 | # TESTING PURPOSE: 5 | IDMUX_URL= -------------------------------------------------------------------------------- /apps/test-site/public/fonts/atkinson-regular.woff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/code/app-firecrawl-agpl/main/apps/test-site/public/fonts/atkinson-regular.woff -------------------------------------------------------------------------------- /apps/test-site/src/assets/blog/changeTracking.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/code/app-firecrawl-agpl/main/apps/test-site/src/assets/blog/changeTracking.jpg -------------------------------------------------------------------------------- /apps/test-site/src/assets/blog/or_firecrawl.webp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/code/app-firecrawl-agpl/main/apps/test-site/src/assets/blog/or_firecrawl.webp -------------------------------------------------------------------------------- /apps/test-site/src/assets/firecrawl-app-icon.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/code/app-firecrawl-agpl/main/apps/test-site/src/assets/firecrawl-app-icon.png -------------------------------------------------------------------------------- /apps/test-site/src/assets/firecrawl-wordmark.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/code/app-firecrawl-agpl/main/apps/test-site/src/assets/firecrawl-wordmark.png -------------------------------------------------------------------------------- /apps/test-suite/jest.config.js: -------------------------------------------------------------------------------- 1 | module.exports = { 2 | preset: "ts-jest", 3 | testEnvironment: "node", 4 | setupFiles: ["./jest.setup.js"], 5 | }; 6 | -------------------------------------------------------------------------------- /apps/api/native/.yarnrc.yml: -------------------------------------------------------------------------------- 1 | nodeLinker: node-modules 2 | 3 | npmAuditRegistry: "https://registry.npmjs.org" 4 | 5 | yarnPath: .yarn/releases/yarn-4.9.4.cjs 6 | -------------------------------------------------------------------------------- /apps/redis/Procfile: -------------------------------------------------------------------------------- 1 | redis: /usr/bin/start-redis-server.sh 2 | metrics: /usr/local/bin/redis_exporter -redis.addr localhost:6379 -web.listen-address ":9091" 3 | -------------------------------------------------------------------------------- /apps/test-site/src/assets/blog/search-endpoint.jpg: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/code/app-firecrawl-agpl/main/apps/test-site/src/assets/blog/search-endpoint.jpg -------------------------------------------------------------------------------- /apps/test-site/src/assets/blog/templateslaunch.webp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/code/app-firecrawl-agpl/main/apps/test-site/src/assets/blog/templateslaunch.webp -------------------------------------------------------------------------------- /apps/test-site/src/assets/firecrawl-light-logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/code/app-firecrawl-agpl/main/apps/test-site/src/assets/firecrawl-light-logo.png -------------------------------------------------------------------------------- /apps/test-site/src/consts.ts: -------------------------------------------------------------------------------- 1 | export const SITE_TITLE = "Firecrawl Test Website"; 2 | export const SITE_DESCRIPTION = "Welcome to the Firecrawl Test Website!"; 3 | -------------------------------------------------------------------------------- /examples/full_example_apps/README.md: -------------------------------------------------------------------------------- 1 | Full examples apps built with Firecrawl can be found at this repo: https://github.com/firecrawl/firecrawl-app-examples 2 | -------------------------------------------------------------------------------- /examples/gemini-2.5-screenshot-editor/requirements.txt: -------------------------------------------------------------------------------- 1 | firecrawl-py>=4.3.6 2 | google-genai>=1.29.0 3 | Pillow>=10.0.0 4 | requests>=2.31.0 5 | python-dotenv>=1.0.0 -------------------------------------------------------------------------------- /examples/gpt-4.1-company-researcher/requirements.txt: -------------------------------------------------------------------------------- 1 | python-dotenv==1.0.1 2 | requests==2.32.4 3 | serpapi-python==0.1.5 4 | openai==1.12.0 5 | firecrawl==0.1.2 -------------------------------------------------------------------------------- /examples/openai_swarm_firecrawl_web_extractor/requirements.txt: -------------------------------------------------------------------------------- 1 | firecrawl-py 2 | openai 3 | google-search-results 4 | git+https://github.com/openai/swarm.git 5 | -------------------------------------------------------------------------------- /apps/test-site/src/assets/firecrawl-light-wordmark.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/code/app-firecrawl-agpl/main/apps/test-site/src/assets/firecrawl-light-wordmark.png -------------------------------------------------------------------------------- /examples/deepseek-v3-company-researcher/requirements.txt: -------------------------------------------------------------------------------- 1 | python-dotenv>=1.0.0 2 | requests>=2.31.0 3 | openai>=1.12.0 4 | google-search-results>=2.4.2 5 | serpapi>=0.1.5 -------------------------------------------------------------------------------- /apps/python-sdk/firecrawl/__tests__/e2e/v2/.env.example: -------------------------------------------------------------------------------- 1 | FIRECRAWL_API_KEY= 2 | FIRECRAWL_API_URL=https://api.firecrawl.dev 3 | 4 | # TESTING PURPOSE: 5 | IDMUX_URL= -------------------------------------------------------------------------------- /apps/python-sdk/requirements.txt: -------------------------------------------------------------------------------- 1 | requests 2 | httpx 3 | pytest 4 | pytest-asyncio 5 | python-dotenv 6 | websockets 7 | nest-asyncio 8 | pydantic>=2.0 9 | aiohttp 10 | -------------------------------------------------------------------------------- /examples/gemini-2.5-crawler/requirements.txt: -------------------------------------------------------------------------------- 1 | google-cloud-aiplatform>=1.36.0 2 | google-generativeai>=0.3.2 3 | python-dotenv>=1.0.0 4 | requests>=2.31.0 5 | firecrawl>=0.1.0 -------------------------------------------------------------------------------- /examples/gpt-4.1-web-crawler/.env.example: -------------------------------------------------------------------------------- 1 | # Firecrawl API key 2 | FIRECRAWL_API_KEY=your_firecrawl_api_key_here 3 | 4 | # OpenAI API key 5 | OPENAI_API_KEY=your_openai_api_key_here -------------------------------------------------------------------------------- /apps/api/native/.prettierignore: -------------------------------------------------------------------------------- 1 | target 2 | .yarn 3 | index.js 4 | package-template.wasi-browser.js 5 | package-template.wasi.cjs 6 | wasi-worker-browser.mjs 7 | wasi-worker.mjs 8 | .yarnrc.yml -------------------------------------------------------------------------------- /apps/python-sdk/firecrawl/v2/__init__.py: -------------------------------------------------------------------------------- 1 | from .client import FirecrawlClient 2 | from .client_async import AsyncFirecrawlClient 3 | 4 | __all__ = ["FirecrawlClient", "AsyncFirecrawlClient"] -------------------------------------------------------------------------------- /examples/gpt-4.1-company-researcher/.env.example: -------------------------------------------------------------------------------- 1 | # API Keys 2 | OPENAI_API_KEY=your_openai_api_key_here 3 | FIRECRAWL_API_KEY=your_firecrawl_api_key_here 4 | SERP_API_KEY=your_serpapi_key_here -------------------------------------------------------------------------------- /examples/scrape_and_analyze_airbnb_data_e2b/airbnb_prices_chart.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/code/app-firecrawl-agpl/main/examples/scrape_and_analyze_airbnb_data_e2b/airbnb_prices_chart.png -------------------------------------------------------------------------------- /apps/test-suite/load-test-results/tests-1-5/assets/metrics-test-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/code/app-firecrawl-agpl/main/apps/test-suite/load-test-results/tests-1-5/assets/metrics-test-2.png -------------------------------------------------------------------------------- /apps/test-suite/load-test-results/tests-1-5/assets/metrics-test-3.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/code/app-firecrawl-agpl/main/apps/test-suite/load-test-results/tests-1-5/assets/metrics-test-3.png -------------------------------------------------------------------------------- /apps/test-suite/load-test-results/tests-1-5/assets/metrics-test-4.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/code/app-firecrawl-agpl/main/apps/test-suite/load-test-results/tests-1-5/assets/metrics-test-4.png -------------------------------------------------------------------------------- /apps/test-suite/load-test-results/tests-1-5/assets/metrics-test-5.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/code/app-firecrawl-agpl/main/apps/test-suite/load-test-results/tests-1-5/assets/metrics-test-5.png -------------------------------------------------------------------------------- /apps/test-suite/load-test-results/tests-6-7/assets/metrics-test-6.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/code/app-firecrawl-agpl/main/apps/test-suite/load-test-results/tests-6-7/assets/metrics-test-6.png -------------------------------------------------------------------------------- /apps/test-suite/load-test-results/tests-6-7/assets/metrics-test-7.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/code/app-firecrawl-agpl/main/apps/test-suite/load-test-results/tests-6-7/assets/metrics-test-7.png -------------------------------------------------------------------------------- /apps/test-suite/load-test-results/tests-6-7/assets/metrics-test-8.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/code/app-firecrawl-agpl/main/apps/test-suite/load-test-results/tests-6-7/assets/metrics-test-8.png -------------------------------------------------------------------------------- /examples/kubernetes/firecrawl-helm/Chart.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v2 2 | name: firecrawl 3 | description: A Helm chart for deploying the Firecrawl application 4 | type: application 5 | version: 0.1.0 6 | -------------------------------------------------------------------------------- /apps/api/native/src/utils.rs: -------------------------------------------------------------------------------- 1 | use napi::bindgen_prelude::*; 2 | 3 | pub fn to_napi_err(error: E) -> Error { 4 | Error::new(Status::GenericFailure, error.to_string()) 5 | } 6 | -------------------------------------------------------------------------------- /examples/llama-4-maverick-web-crawler/.env.example: -------------------------------------------------------------------------------- 1 | # Firecrawl API Key 2 | FIRECRAWL_API_KEY=your_firecrawl_api_key_here 3 | 4 | # Together AI API Key 5 | TOGETHER_API_KEY=your_together_api_key_here -------------------------------------------------------------------------------- /apps/redis/Dockerfile: -------------------------------------------------------------------------------- 1 | ARG REDIS_VERSION=8.0.3 2 | FROM bitnami/redis:${REDIS_VERSION} 3 | 4 | COPY start-redis-server.sh /usr/bin/start-redis-server.sh 5 | 6 | CMD ["/usr/bin/start-redis-server.sh"] 7 | -------------------------------------------------------------------------------- /apps/test-suite/utils/types.ts: -------------------------------------------------------------------------------- 1 | export interface WebsiteScrapeError { 2 | website: string; 3 | prompt: string; 4 | expected_output: string; 5 | actual_output: string; 6 | error: string; 7 | } 8 | -------------------------------------------------------------------------------- /apps/test-suite/load-test-results/tests-6-7/assets/metrics-fire-engine-test-7.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/code/app-firecrawl-agpl/main/apps/test-suite/load-test-results/tests-6-7/assets/metrics-fire-engine-test-7.png -------------------------------------------------------------------------------- /apps/test-suite/load-test-results/tests-6-7/assets/metrics-fire-engine-test-8.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/code/app-firecrawl-agpl/main/apps/test-suite/load-test-results/tests-6-7/assets/metrics-fire-engine-test-8.png -------------------------------------------------------------------------------- /examples/blog-articles/scheduling_scrapers/scheduling-scrapers-images/actions.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/code/app-firecrawl-agpl/main/examples/blog-articles/scheduling_scrapers/scheduling-scrapers-images/actions.png -------------------------------------------------------------------------------- /examples/blog-articles/scheduling_scrapers/scheduling-scrapers-images/output.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/code/app-firecrawl-agpl/main/examples/blog-articles/scheduling_scrapers/scheduling-scrapers-images/output.png -------------------------------------------------------------------------------- /apps/api/.gitignore: -------------------------------------------------------------------------------- 1 | /node_modules/ 2 | /dist/ 3 | .env 4 | *.csv 5 | dump.rdb 6 | /mongo-data 7 | 8 | /.next/ 9 | 10 | .rdb 11 | .sentryclirc 12 | 13 | .env.* 14 | 15 | firecrawl.log 16 | 17 | test-results/ 18 | -------------------------------------------------------------------------------- /apps/test-site/tsconfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "extends": "astro/tsconfigs/strict", 3 | "include": [".astro/types.d.ts", "**/*"], 4 | "exclude": ["dist"], 5 | "compilerOptions": { 6 | "strictNullChecks": true 7 | } 8 | } 9 | -------------------------------------------------------------------------------- /apps/test-suite/load-test-results/tests-1-5/assets/CPU-utilization-report-test-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/code/app-firecrawl-agpl/main/apps/test-suite/load-test-results/tests-1-5/assets/CPU-utilization-report-test-1.png -------------------------------------------------------------------------------- /apps/test-suite/load-test-results/tests-6-7/assets/metrics-fire-engine-test-7-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/code/app-firecrawl-agpl/main/apps/test-suite/load-test-results/tests-6-7/assets/metrics-fire-engine-test-7-2.png -------------------------------------------------------------------------------- /apps/ui/ingestion-ui/src/lib/utils.ts: -------------------------------------------------------------------------------- 1 | import { type ClassValue, clsx } from "clsx" 2 | import { twMerge } from "tailwind-merge" 3 | 4 | export function cn(...inputs: ClassValue[]) { 5 | return twMerge(clsx(inputs)) 6 | } 7 | -------------------------------------------------------------------------------- /examples/blog-articles/amazon-price-tracking/amazon-price-tracking-images/alert.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/code/app-firecrawl-agpl/main/examples/blog-articles/amazon-price-tracking/amazon-price-tracking-images/alert.png -------------------------------------------------------------------------------- /examples/blog-articles/deploying_web_scrapers/deploying-web-scrapers-images/meme.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/code/app-firecrawl-agpl/main/examples/blog-articles/deploying_web_scrapers/deploying-web-scrapers-images/meme.png -------------------------------------------------------------------------------- /apps/test-suite/load-test-results/tests-1-5/assets/memory-utilization-report-test-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/code/app-firecrawl-agpl/main/apps/test-suite/load-test-results/tests-1-5/assets/memory-utilization-report-test-1.png -------------------------------------------------------------------------------- /examples/blog-articles/amazon-price-tracking/amazon-price-tracking-images/actions.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/code/app-firecrawl-agpl/main/examples/blog-articles/amazon-price-tracking/amazon-price-tracking-images/actions.png -------------------------------------------------------------------------------- /examples/blog-articles/amazon-price-tracking/amazon-price-tracking-images/discord.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/code/app-firecrawl-agpl/main/examples/blog-articles/amazon-price-tracking/amazon-price-tracking-images/discord.png -------------------------------------------------------------------------------- /examples/blog-articles/amazon-price-tracking/amazon-price-tracking-images/finished.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/code/app-firecrawl-agpl/main/examples/blog-articles/amazon-price-tracking/amazon-price-tracking-images/finished.png -------------------------------------------------------------------------------- /examples/blog-articles/amazon-price-tracking/amazon-price-tracking-images/linechart.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/code/app-firecrawl-agpl/main/examples/blog-articles/amazon-price-tracking/amazon-price-tracking-images/linechart.png -------------------------------------------------------------------------------- /examples/blog-articles/amazon-price-tracking/amazon-price-tracking-images/new-alert.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/code/app-firecrawl-agpl/main/examples/blog-articles/amazon-price-tracking/amazon-price-tracking-images/new-alert.png -------------------------------------------------------------------------------- /examples/blog-articles/amazon-price-tracking/amazon-price-tracking-images/webhook.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/code/app-firecrawl-agpl/main/examples/blog-articles/amazon-price-tracking/amazon-price-tracking-images/webhook.png -------------------------------------------------------------------------------- /apps/api/native/.taplo.toml: -------------------------------------------------------------------------------- 1 | exclude = ["node_modules/**/*.toml"] 2 | 3 | # https://taplo.tamasfe.dev/configuration/formatter-options.html 4 | [formatting] 5 | align_entries = true 6 | indent_tables = true 7 | reorder_keys = true 8 | -------------------------------------------------------------------------------- /examples/blog-articles/amazon-price-tracking/amazon-price-tracking-images/new-server.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/code/app-firecrawl-agpl/main/examples/blog-articles/amazon-price-tracking/amazon-price-tracking-images/new-server.png -------------------------------------------------------------------------------- /examples/blog-articles/amazon-price-tracking/amazon-price-tracking-images/sneak-peek.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/code/app-firecrawl-agpl/main/examples/blog-articles/amazon-price-tracking/amazon-price-tracking-images/sneak-peek.png -------------------------------------------------------------------------------- /examples/blog-articles/deploying_web_scrapers/deploying-web-scrapers-images/ph-sample.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/code/app-firecrawl-agpl/main/examples/blog-articles/deploying_web_scrapers/deploying-web-scrapers-images/ph-sample.png -------------------------------------------------------------------------------- /examples/blog-articles/deploying_web_scrapers/deploying-web-scrapers-images/workflow.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/code/app-firecrawl-agpl/main/examples/blog-articles/deploying_web_scrapers/deploying-web-scrapers-images/workflow.png -------------------------------------------------------------------------------- /examples/blog-articles/deploying_web_scrapers/deploying-web-scrapers-images/pa-console.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/code/app-firecrawl-agpl/main/examples/blog-articles/deploying_web_scrapers/deploying-web-scrapers-images/pa-console.png -------------------------------------------------------------------------------- /examples/blog-articles/deploying_web_scrapers/deploying-web-scrapers-images/pa-scheduler.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/code/app-firecrawl-agpl/main/examples/blog-articles/deploying_web_scrapers/deploying-web-scrapers-images/pa-scheduler.png -------------------------------------------------------------------------------- /examples/blog-articles/deploying_web_scrapers/deploying-web-scrapers-images/ph-homepage.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/code/app-firecrawl-agpl/main/examples/blog-articles/deploying_web_scrapers/deploying-web-scrapers-images/ph-homepage.png -------------------------------------------------------------------------------- /examples/blog-articles/scheduling_scrapers/scheduling-scrapers-images/finished_actions.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/code/app-firecrawl-agpl/main/examples/blog-articles/scheduling_scrapers/scheduling-scrapers-images/finished_actions.png -------------------------------------------------------------------------------- /examples/blog-articles/amazon-price-tracking/amazon-price-tracking-images/supabase_connect.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/code/app-firecrawl-agpl/main/examples/blog-articles/amazon-price-tracking/amazon-price-tracking-images/supabase_connect.png -------------------------------------------------------------------------------- /examples/blog-articles/github-actions-tutorial/github-actions-tutorial-images/cron-syntax.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/code/app-firecrawl-agpl/main/examples/blog-articles/github-actions-tutorial/github-actions-tutorial-images/cron-syntax.png -------------------------------------------------------------------------------- /examples/blog-articles/scheduling_scrapers/scheduling-scrapers-images/hacker_news_homepage.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/code/app-firecrawl-agpl/main/examples/blog-articles/scheduling_scrapers/scheduling-scrapers-images/hacker_news_homepage.png -------------------------------------------------------------------------------- /examples/blog-articles/deploying_web_scrapers/deploying-web-scrapers-images/heroku-scheduler.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/code/app-firecrawl-agpl/main/examples/blog-articles/deploying_web_scrapers/deploying-web-scrapers-images/heroku-scheduler.png -------------------------------------------------------------------------------- /examples/gemini-2.5-web-extractor/.env.example: -------------------------------------------------------------------------------- 1 | # Google Gemini API Key 2 | GOOGLE_API_KEY=your_google_api_key_here 3 | 4 | # Firecrawl API Key 5 | FIRECRAWL_API_KEY=your_firecrawl_api_key_here 6 | 7 | # SerpAPI Key 8 | SERP_API_KEY=your_serp_api_key_here -------------------------------------------------------------------------------- /examples/kubernetes/firecrawl-helm/templates/playwright-configmap.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: ConfigMap 3 | metadata: 4 | name: {{ include "firecrawl.fullname" . }}-playwright-config 5 | data: 6 | PORT: {{ .Values.playwrightConfig.PORT | quote }} 7 | -------------------------------------------------------------------------------- /apps/api/.prettierrc: -------------------------------------------------------------------------------- 1 | { 2 | "trailingComma": "all", 3 | "tabWidth": 2, 4 | "useTabs": false, 5 | "semi": true, 6 | "singleQuote": false, 7 | "printWidth": 80, 8 | "bracketSpacing": true, 9 | "arrowParens": "avoid", 10 | "endOfLine": "lf" 11 | } 12 | -------------------------------------------------------------------------------- /examples/deep-research-apartment-finder/.env.example: -------------------------------------------------------------------------------- 1 | # Firecrawl API key (get from https://firecrawl.dev) 2 | FIRECRAWL_API_KEY=your_firecrawl_api_key_here 3 | 4 | # Anthropic API key (get from https://console.anthropic.com) 5 | ANTHROPIC_API_KEY=your_anthropic_api_key_here -------------------------------------------------------------------------------- /apps/redis/scripts/version.sh: -------------------------------------------------------------------------------- 1 | ORIGIN=${ORIGIN:-origin} 2 | 3 | version=$(git fetch --tags "${ORIGIN}" &>/dev/null | git -c "versionsort.prereleasesuffix=-pre" tag -l --sort=version:refname | grep -v dev | grep -vE '^v2$' | grep -vE '^v1$' | tail -n1 | cut -c 2-) 4 | 5 | echo "$version" 6 | -------------------------------------------------------------------------------- /apps/api/src/controllers/v0/readiness.ts: -------------------------------------------------------------------------------- 1 | import { Request, Response } from "express"; 2 | 3 | export async function readinessController(req: Request, res: Response) { 4 | // TODO: add checks when the application is ready to serve traffic 5 | res.status(200).json({ status: "ok" }); 6 | } 7 | -------------------------------------------------------------------------------- /apps/test-site/audit-ci.jsonc: -------------------------------------------------------------------------------- 1 | { 2 | "$schema": "https://github.com/IBM/audit-ci/raw/main/docs/schema.json", 3 | "low": true, 4 | "allowlist": [ 5 | "GHSA-4fh9-h7wg-q85m|@astrojs/mdx>@astrojs/markdown-remark>remark-rehype>mdast-util-to-hast" // not impacted by this 6 | ] 7 | } -------------------------------------------------------------------------------- /.gitmodules: -------------------------------------------------------------------------------- 1 | [submodule "apps/go-sdk/firecrawl-go"] 2 | path = apps/go-sdk/firecrawl-go 3 | url = https://github.com/firecrawl/firecrawl-go 4 | [submodule "apps/go-sdk/firecrawl-go-examples"] 5 | path = apps/go-sdk/firecrawl-go-examples 6 | url = https://github.com/firecrawl/firecrawl-go-examples 7 | -------------------------------------------------------------------------------- /apps/js-sdk/firecrawl/src/types/node-undici.d.ts: -------------------------------------------------------------------------------- 1 | type WebSocketCtor = typeof globalThis.WebSocket; 2 | 3 | declare module "node:undici" { 4 | export const WebSocket: WebSocketCtor; 5 | const _default: { 6 | WebSocket: WebSocketCtor; 7 | }; 8 | export default _default; 9 | } 10 | 11 | -------------------------------------------------------------------------------- /examples/gemini-2.5-crawler/.env.example: -------------------------------------------------------------------------------- 1 | # Firecrawl API key from your Firecrawl account 2 | FIRECRAWL_API_KEY=your_firecrawl_api_key_here 3 | 4 | # Google Cloud API key with Gemini API access 5 | # Get this from Google Cloud Console: https://console.cloud.google.com/ 6 | GEMINI_API_KEY=your_gemini_api_key_here -------------------------------------------------------------------------------- /apps/api/src/controllers/v0/liveness.ts: -------------------------------------------------------------------------------- 1 | import { Request, Response } from "express"; 2 | 3 | export async function livenessController(req: Request, res: Response) { 4 | //TODO: add checks if the application is live and healthy like checking the redis connection 5 | res.status(200).json({ status: "ok" }); 6 | } 7 | -------------------------------------------------------------------------------- /apps/ui/ingestion-ui/src/main.tsx: -------------------------------------------------------------------------------- 1 | import React from 'react' 2 | import ReactDOM from 'react-dom/client' 3 | import App from './App.tsx' 4 | import './index.css' 5 | 6 | ReactDOM.createRoot(document.getElementById('root')!).render( 7 | 8 | 9 | , 10 | ) 11 | -------------------------------------------------------------------------------- /examples/aginews-ai-newsletter/README.md: -------------------------------------------------------------------------------- 1 | # AGI News ✨ 2 | AGI News is a daily AI newsletter that's completely sourced by autonomous AI agents. It is live at [https://www.aginews.io/](https://www.aginews.io/) 3 | 4 | Here is a link to the repo: 5 | 6 | [https://github.com/ericciarla/aginews](https://github.com/ericciarla/aginews) -------------------------------------------------------------------------------- /apps/ui/ingestion-ui/vite.config.ts: -------------------------------------------------------------------------------- 1 | import path from "path" 2 | import react from "@vitejs/plugin-react" 3 | import { defineConfig } from "vite" 4 | 5 | export default defineConfig({ 6 | plugins: [react()], 7 | resolve: { 8 | alias: { 9 | "@": path.resolve(__dirname, "./src"), 10 | }, 11 | }, 12 | }) 13 | -------------------------------------------------------------------------------- /examples/ai-podcast-generator/README.md: -------------------------------------------------------------------------------- 1 | # Generate AI podcasts based on real time news 🎙️ 2 | 3 | This example crawls the web for interesting news stories then records a podcast with your own voice. 4 | 5 | Here is a link to the repo: 6 | 7 | [https://github.com/ericciarla/aginews-podcast](https://github.com/ericciarla/aginews-podcast) -------------------------------------------------------------------------------- /apps/api/src/scraper/scrapeURL/lib/cacheableLookup.ts: -------------------------------------------------------------------------------- 1 | import CacheableLookup from "cacheable-lookup"; 2 | import dns from "dns"; 3 | 4 | import { config } from "../../../config"; 5 | export const cacheableLookup = 6 | config.SENTRY_ENVIRONMENT === "dev" 7 | ? { lookup: dns.lookup, install: () => {} } 8 | : new CacheableLookup({}); 9 | -------------------------------------------------------------------------------- /examples/openai-realtime-firecrawl/README.md: -------------------------------------------------------------------------------- 1 | # OpenAI Realtime API with Firecrawl 2 | 3 | This project is a demo of the OpenAI Realtime API with Firecrawl integrated. 4 | 5 | Here is a link to the Realtime console fork: 6 | 7 | [https://github.com/nickscamara/firecrawl-openai-realtime](https://github.com/nickscamara/firecrawl-openai-realtime) -------------------------------------------------------------------------------- /apps/api/src/controllers/v0/admin/precrawl.ts: -------------------------------------------------------------------------------- 1 | import type { Request, Response } from "express"; 2 | import { getPrecrawlQueue } from "../../../services/queue-service"; 3 | 4 | export async function triggerPrecrawl(_: Request, res: Response) { 5 | await getPrecrawlQueue().add(new Date().toISOString(), {}); 6 | res.json({ ok: true }); 7 | } 8 | -------------------------------------------------------------------------------- /apps/ui/ingestion-ui/tsconfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "files": [], 3 | "references": [ 4 | { 5 | "path": "./tsconfig.app.json" 6 | }, 7 | { 8 | "path": "./tsconfig.node.json" 9 | } 10 | ], 11 | "compilerOptions": { 12 | "baseUrl": ".", 13 | "paths": { 14 | "@/*": ["./src/*"] 15 | } 16 | } 17 | } 18 | -------------------------------------------------------------------------------- /apps/api/.env.local: -------------------------------------------------------------------------------- 1 | NUM_WORKERS_PER_QUEUE=8 2 | PORT= 3 | HOST= 4 | SUPABASE_ANON_TOKEN= 5 | SUPABASE_URL= 6 | SUPABASE_SERVICE_TOKEN= 7 | REDIS_URL= 8 | REDIS_RATE_LIMIT_URL= 9 | SCRAPING_BEE_API_KEY= 10 | OPENAI_API_KEY= 11 | ANTHROPIC_API_KEY= 12 | BULL_AUTH_KEY= 13 | LOGTAIL_KEY= 14 | PLAYWRIGHT_MICROSERVICE_URL= 15 | SEARCHAPI_API_KEY= 16 | -------------------------------------------------------------------------------- /examples/blog-articles/scheduling_scrapers/scripts/scrape_scheduler.py: -------------------------------------------------------------------------------- 1 | import schedule 2 | import time 3 | from firecrawl_scraper import save_firecrawl_news_data 4 | 5 | # Schedule the scraper to run every hour 6 | schedule.every().hour.do(save_firecrawl_news_data) 7 | 8 | while True: 9 | schedule.run_pending() 10 | time.sleep(1) 11 | -------------------------------------------------------------------------------- /examples/scrape_and_analyze_airbnb_data_e2b/.env.template: -------------------------------------------------------------------------------- 1 | # TODO: Get your E2B API key from https://e2b.dev/docs 2 | E2B_API_KEY="" 3 | 4 | # TODO: Get your Firecrawl API key from https://firecrawl.dev 5 | FIRECRAWL_API_KEY="" 6 | 7 | # TODO: Get your Anthropic API key from https://anthropic.com 8 | ANTHROPIC_API_KEY="" 9 | 10 | 11 | 12 | -------------------------------------------------------------------------------- /examples/scrape_and_analyze_airbnb_data_e2b/prettier.config.mjs: -------------------------------------------------------------------------------- 1 | // prettier.config.js, .prettierrc.js, prettier.config.mjs, or .prettierrc.mjs 2 | 3 | /** @type {import("prettier").Config} */ 4 | const config = { 5 | trailingComma: 'es5', 6 | tabWidth: 2, 7 | semi: false, 8 | singleQuote: true, 9 | } 10 | 11 | export default config 12 | -------------------------------------------------------------------------------- /apps/test-site/src/components/FormattedDate.astro: -------------------------------------------------------------------------------- 1 | --- 2 | interface Props { 3 | date: Date; 4 | } 5 | 6 | const { date } = Astro.props; 7 | --- 8 | 9 | 18 | -------------------------------------------------------------------------------- /apps/api/src/lib/extract/helpers/dereference-schema.ts: -------------------------------------------------------------------------------- 1 | import { dereference } from "@apidevtools/json-schema-ref-parser"; 2 | 3 | export async function dereferenceSchema(schema: any): Promise { 4 | try { 5 | return await dereference(schema); 6 | } catch (error) { 7 | console.error("Failed to dereference schema:", error); 8 | throw error; 9 | } 10 | } 11 | -------------------------------------------------------------------------------- /apps/api/src/lib/extract/fire-0/helpers/dereference-schema-f0.ts: -------------------------------------------------------------------------------- 1 | import { dereference } from "@apidevtools/json-schema-ref-parser"; 2 | 3 | export async function dereferenceSchema_F0(schema: any): Promise { 4 | try { 5 | return await dereference(schema); 6 | } catch (error) { 7 | console.error("Failed to dereference schema:", error); 8 | throw error; 9 | } 10 | } 11 | -------------------------------------------------------------------------------- /apps/test-site/src/pages/robots.txt.ts: -------------------------------------------------------------------------------- 1 | import type { APIRoute } from "astro"; 2 | 3 | const getRobotsTxt = (sitemapURL: URL) => `\ 4 | User-agent: * 5 | Allow: / 6 | 7 | Sitemap: ${sitemapURL.href} 8 | `; 9 | 10 | export const GET: APIRoute = ({ site }) => { 11 | const sitemapURL = new URL("sitemap-index.xml", site); 12 | return new Response(getRobotsTxt(sitemapURL)); 13 | }; 14 | -------------------------------------------------------------------------------- /apps/test-site/.gitignore: -------------------------------------------------------------------------------- 1 | # build output 2 | dist/ 3 | # generated types 4 | .astro/ 5 | 6 | # dependencies 7 | node_modules/ 8 | 9 | # logs 10 | npm-debug.log* 11 | yarn-debug.log* 12 | yarn-error.log* 13 | pnpm-debug.log* 14 | 15 | # environment variables 16 | .env 17 | .env.production 18 | 19 | # macOS-specific files 20 | .DS_Store 21 | 22 | # jetbrains setting folder 23 | .idea/ 24 | -------------------------------------------------------------------------------- /apps/ui/ingestion-ui/.gitignore: -------------------------------------------------------------------------------- 1 | # Logs 2 | logs 3 | *.log 4 | npm-debug.log* 5 | yarn-debug.log* 6 | yarn-error.log* 7 | pnpm-debug.log* 8 | lerna-debug.log* 9 | 10 | node_modules 11 | dist 12 | dist-ssr 13 | *.local 14 | 15 | # Editor directories and files 16 | .vscode/* 17 | !.vscode/extensions.json 18 | .idea 19 | .DS_Store 20 | *.suo 21 | *.ntvs* 22 | *.njsproj 23 | *.sln 24 | *.sw? 25 | -------------------------------------------------------------------------------- /apps/go-html-to-md-service/.dockerignore: -------------------------------------------------------------------------------- 1 | # Ignore build artifacts 2 | *.dylib 3 | *.so 4 | *.dll 5 | *.exe 6 | 7 | # Ignore test files 8 | *_test.go 9 | 10 | # Ignore documentation 11 | README.md 12 | *.md 13 | 14 | # Ignore git 15 | .git 16 | .gitignore 17 | 18 | # Ignore IDE files 19 | .vscode 20 | .idea 21 | *.swp 22 | *.swo 23 | 24 | # Ignore temporary files 25 | *.tmp 26 | *.log 27 | 28 | -------------------------------------------------------------------------------- /apps/api/src/services/notification/notification-check.ts: -------------------------------------------------------------------------------- 1 | import { isEnterpriseTeamCreatedAfterRateLimitChange } from "../subscription/enterprise-check"; 2 | 3 | export async function shouldSendConcurrencyLimitNotification( 4 | team_id: string, 5 | ): Promise { 6 | const isEnterprise = 7 | await isEnterpriseTeamCreatedAfterRateLimitChange(team_id); 8 | return !isEnterprise; 9 | } 10 | -------------------------------------------------------------------------------- /apps/python-sdk/firecrawl/v2/utils/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Utility modules for v2 API client. 3 | """ 4 | 5 | from .http_client import HttpClient 6 | from .error_handler import FirecrawlError, handle_response_error 7 | from .validation import validate_scrape_options, prepare_scrape_options 8 | 9 | __all__ = ['HttpClient', 'FirecrawlError', 'handle_response_error', 'validate_scrape_options', 'prepare_scrape_options'] -------------------------------------------------------------------------------- /apps/ui/ingestion-ui/src/components/ui/collapsible.tsx: -------------------------------------------------------------------------------- 1 | import * as CollapsiblePrimitive from "@radix-ui/react-collapsible" 2 | 3 | const Collapsible = CollapsiblePrimitive.Root 4 | 5 | const CollapsibleTrigger = CollapsiblePrimitive.CollapsibleTrigger 6 | 7 | const CollapsibleContent = CollapsiblePrimitive.CollapsibleContent 8 | 9 | export { Collapsible, CollapsibleTrigger, CollapsibleContent } 10 | -------------------------------------------------------------------------------- /examples/kubernetes/cluster-install/secret.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: Secret 3 | metadata: 4 | name: firecrawl-secret 5 | type: Opaque 6 | data: 7 | OPENAI_API_KEY: "" 8 | SLACK_WEBHOOK_URL: "" 9 | LLAMAPARSE_API_KEY: "" 10 | BULL_AUTH_KEY: "" 11 | TEST_API_KEY: "" 12 | STRIPE_PRICE_ID_STANDARD: "" 13 | STRIPE_PRICE_ID_SCALE: "" 14 | FIRE_ENGINE_BETA_URL: "" 15 | REDIS_PASSWORD: "" 16 | -------------------------------------------------------------------------------- /apps/js-sdk/firecrawl/jest.config.js: -------------------------------------------------------------------------------- 1 | /** @type {import('ts-jest').JestConfigWithTsJest} **/ 2 | export default { 3 | testEnvironment: "node", 4 | "moduleNameMapper": { 5 | "^(\\.{1,2}/.*)\\.js$": "$1", 6 | }, 7 | "extensionsToTreatAsEsm": [".ts"], 8 | "transform": { 9 | "^.+\\.(mt|t|cj|j)s$": [ 10 | "ts-jest", 11 | { 12 | "useESM": true 13 | } 14 | ] 15 | }, 16 | }; -------------------------------------------------------------------------------- /examples/llama-4-maverick-web-extractor/.env.example: -------------------------------------------------------------------------------- 1 | # Together AI API Key (Required) 2 | # Get it from: https://www.together.ai/ 3 | TOGETHER_API_KEY=your_together_ai_key_here 4 | 5 | # SerpAPI Key (Required) 6 | # Get it from: https://serpapi.com/ 7 | SERP_API_KEY=your_serpapi_key_here 8 | 9 | # Firecrawl API Key (Required) 10 | # Get it from: https://firecrawl.dev/ 11 | FIRECRAWL_API_KEY=your_firecrawl_key_here -------------------------------------------------------------------------------- /apps/api/native/.editorconfig: -------------------------------------------------------------------------------- 1 | # EditorConfig helps developers define and maintain consistent 2 | # coding styles between different editors or IDEs 3 | # http://editorconfig.org 4 | root = true 5 | 6 | [*] 7 | indent_style = space 8 | indent_size = 2 9 | end_of_line = lf 10 | charset = utf-8 11 | trim_trailing_whitespace = true 12 | insert_final_newline = true 13 | 14 | [*.md] 15 | trim_trailing_whitespace = false 16 | -------------------------------------------------------------------------------- /apps/api/native/src/document/providers/mod.rs: -------------------------------------------------------------------------------- 1 | use crate::document::model::Document; 2 | use std::error::Error; 3 | 4 | pub mod docx; 5 | pub mod factory; 6 | pub mod odt; 7 | pub mod rtf; 8 | pub mod xlsx; 9 | 10 | pub trait DocumentProvider { 11 | fn parse_buffer(&self, data: &[u8]) -> Result>; 12 | 13 | #[allow(dead_code)] 14 | fn name(&self) -> &'static str; 15 | } 16 | -------------------------------------------------------------------------------- /apps/ui/ingestion-ui/tsconfig.node.json: -------------------------------------------------------------------------------- 1 | { 2 | "compilerOptions": { 3 | "composite": true, 4 | "tsBuildInfoFile": "./node_modules/.tmp/tsconfig.node.tsbuildinfo", 5 | "skipLibCheck": true, 6 | "module": "ESNext", 7 | "moduleResolution": "bundler", 8 | "allowSyntheticDefaultImports": true, 9 | "strict": true, 10 | "noEmit": true 11 | }, 12 | "include": ["vite.config.ts"] 13 | } 14 | -------------------------------------------------------------------------------- /examples/kubernetes/firecrawl-helm/templates/service.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: Service 3 | metadata: 4 | name: {{ include "firecrawl.fullname" . }}-api 5 | spec: 6 | type: {{ .Values.service.api.type }} 7 | selector: 8 | app: {{ include "firecrawl.name" . }}-api 9 | ports: 10 | - protocol: TCP 11 | port: {{ .Values.service.api.port }} 12 | targetPort: {{ .Values.service.api.port }} 13 | -------------------------------------------------------------------------------- /apps/api/native/tsconfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "compilerOptions": { 3 | "target": "ESNext", 4 | "strict": true, 5 | "moduleResolution": "node", 6 | "module": "CommonJS", 7 | "noUnusedLocals": true, 8 | "noUnusedParameters": true, 9 | "esModuleInterop": true, 10 | "allowSyntheticDefaultImports": true 11 | }, 12 | "include": ["."], 13 | "exclude": ["node_modules", "bench", "__test__"] 14 | } 15 | -------------------------------------------------------------------------------- /apps/api/sharedLibs/go-html-to-md/README.md: -------------------------------------------------------------------------------- 1 | To build the `go-html-to-md` library, run the following command: 2 | 3 | ```bash 4 | cd apps/api/sharedLibs/go-html-to-md 5 | go build -o -buildmode=c-shared html-to-markdown.go 6 | ``` 7 | 8 | Replace `` with the correct filename for your OS: 9 | 10 | - Windows → `html-to-markdown.dll` 11 | - Linux → `libhtml-to-markdown.so` 12 | - macOS → `libhtml-to-markdown.dylib` 13 | -------------------------------------------------------------------------------- /examples/kubernetes/firecrawl-helm/templates/redis-service.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: Service 3 | metadata: 4 | name: {{ include "firecrawl.fullname" . }}-redis 5 | spec: 6 | type: {{ .Values.service.redis.type }} 7 | selector: 8 | app: {{ include "firecrawl.name" . }}-redis 9 | ports: 10 | - protocol: TCP 11 | port: {{ .Values.service.redis.port }} 12 | targetPort: {{ .Values.service.redis.port }} 13 | -------------------------------------------------------------------------------- /apps/api/src/lib/strings.ts: -------------------------------------------------------------------------------- 1 | import { isSelfHosted } from "./deployment"; 2 | 3 | export const BLOCKLISTED_URL_MESSAGE = isSelfHosted() 4 | ? "This website is not currently supported. Please check your server configuration and logs for more details." 5 | : "This website is not currently supported. If you are part of an enterprise, please reach out to help@firecrawl.com to discuss the possibility of getting it activated on your account."; 6 | -------------------------------------------------------------------------------- /apps/js-sdk/firecrawl/src/__tests__/unit/v2/scrape.unit.test.ts: -------------------------------------------------------------------------------- 1 | /** 2 | * Minimal unit test for v2 scrape (no mocking; sanity check payload path) 3 | */ 4 | import { FirecrawlClient } from "../../../v2/client"; 5 | 6 | describe("v2.scrape unit", () => { 7 | test("constructor requires apiKey", () => { 8 | expect(() => new FirecrawlClient({ apiKey: "", apiUrl: "https://api.firecrawl.dev" })).toThrow(); 9 | }); 10 | }); 11 | 12 | -------------------------------------------------------------------------------- /apps/playwright-service-ts/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM node:18-slim 2 | 3 | WORKDIR /usr/src/app 4 | COPY package*.json ./ 5 | RUN npm install 6 | 7 | COPY . . 8 | 9 | ENV PLAYWRIGHT_BROWSERS_PATH=/usr/local/share/playwright 10 | 11 | # Install Playwright dependencies 12 | RUN npx playwright install chromium --with-deps 13 | 14 | RUN npm run build 15 | 16 | ARG PORT 17 | ENV PORT=${PORT} 18 | 19 | EXPOSE ${PORT} 20 | 21 | CMD [ "npm", "start" ] 22 | -------------------------------------------------------------------------------- /apps/ui/ingestion-ui/components.json: -------------------------------------------------------------------------------- 1 | { 2 | "$schema": "https://ui.shadcn.com/schema.json", 3 | "style": "default", 4 | "rsc": false, 5 | "tsx": true, 6 | "tailwind": { 7 | "config": "tailwind.config.js", 8 | "css": "src/index.css", 9 | "baseColor": "slate", 10 | "cssVariables": true, 11 | "prefix": "" 12 | }, 13 | "aliases": { 14 | "components": "@/components", 15 | "utils": "@/lib/utils" 16 | } 17 | } -------------------------------------------------------------------------------- /apps/ui/ingestion-ui/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | Firecrawl UI Template 9 | 10 | 11 | 12 |
13 | 14 | 15 | 16 | -------------------------------------------------------------------------------- /apps/redis/fly.toml: -------------------------------------------------------------------------------- 1 | app = 'firecrawl-dragonfly' 2 | primary_region = 'iad' 3 | 4 | [[mounts]] 5 | source = 'firecrawl_redis' 6 | destination = '/data' 7 | 8 | [[services]] 9 | protocol = 'tcp' 10 | internal_port = 6379 11 | 12 | [[services.tcp_checks]] 13 | interval = '10s' 14 | timeout = '2s' 15 | 16 | [[vm]] 17 | size = 'performance-4x' 18 | memory = '32gb' 19 | 20 | [[metrics]] 21 | port = 9091 22 | path = '/metrics' 23 | -------------------------------------------------------------------------------- /examples/kubernetes/firecrawl-helm/templates/playwright-service.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: Service 3 | metadata: 4 | name: {{ include "firecrawl.fullname" . }}-playwright 5 | spec: 6 | type: {{ .Values.service.playwright.type }} 7 | selector: 8 | app: {{ include "firecrawl.name" . }}-playwright 9 | ports: 10 | - protocol: TCP 11 | port: {{ .Values.service.playwright.port }} 12 | targetPort: {{ .Values.service.playwright.port }} 13 | -------------------------------------------------------------------------------- /apps/api/native/src/lib.rs: -------------------------------------------------------------------------------- 1 | #![deny(clippy::all)] 2 | 3 | pub use crate::crawler::*; 4 | pub use crate::engpicker::*; 5 | pub use crate::html::*; 6 | pub use crate::pdf::*; 7 | pub use crate::utils::*; 8 | 9 | pub use crate::document::{DocumentConverter, DocumentType}; 10 | 11 | mod crawler; 12 | mod document; 13 | mod engpicker; 14 | mod html; 15 | mod pdf; 16 | mod utils; 17 | 18 | pub use napi::bindgen_prelude::*; 19 | pub use serde::{Deserialize, Serialize}; 20 | -------------------------------------------------------------------------------- /apps/test-site/.prettierrc: -------------------------------------------------------------------------------- 1 | { 2 | "plugins": ["prettier-plugin-astro"], 3 | "overrides": [ 4 | { 5 | "files": "*.astro", 6 | "options": { 7 | "parser": "astro" 8 | } 9 | } 10 | ], 11 | "trailingComma": "all", 12 | "tabWidth": 2, 13 | "useTabs": false, 14 | "semi": true, 15 | "singleQuote": false, 16 | "printWidth": 80, 17 | "bracketSpacing": true, 18 | "arrowParens": "avoid", 19 | "endOfLine": "lf" 20 | } 21 | -------------------------------------------------------------------------------- /examples/gemini-2.5-web-extractor/.gitignore: -------------------------------------------------------------------------------- 1 | # Environment variables 2 | .env 3 | 4 | # Python 5 | __pycache__/ 6 | *.py[cod] 7 | *$py.class 8 | *.so 9 | .Python 10 | build/ 11 | develop-eggs/ 12 | dist/ 13 | downloads/ 14 | eggs/ 15 | .eggs/ 16 | lib/ 17 | lib64/ 18 | parts/ 19 | sdist/ 20 | var/ 21 | wheels/ 22 | *.egg-info/ 23 | .installed.cfg 24 | *.egg 25 | 26 | # IDE 27 | .idea/ 28 | .vscode/ 29 | *.swp 30 | *.swo 31 | 32 | # OS 33 | .DS_Store 34 | Thumbs.db -------------------------------------------------------------------------------- /apps/api/src/lib/extract/config.ts: -------------------------------------------------------------------------------- 1 | export const extractConfig = { 2 | RERANKING: { 3 | MAX_INITIAL_RANKING_LIMIT: 1000, 4 | MAX_RANKING_LIMIT_FOR_RELEVANCE: 100, 5 | INITIAL_SCORE_THRESHOLD_FOR_RELEVANCE: 0.00000001, 6 | FALLBACK_SCORE_THRESHOLD_FOR_RELEVANCE: 0.00000001, 7 | MIN_REQUIRED_LINKS: 1, 8 | }, 9 | DEDUPLICATION: { 10 | MAX_TOKENS: 4096, 11 | }, 12 | }; 13 | export const CUSTOM_U_TEAMS = ["874d40cc-a5c0-4e93-b661-9ddfbad5e51e"]; 14 | -------------------------------------------------------------------------------- /apps/go-html-to-md-service/.gitignore: -------------------------------------------------------------------------------- 1 | # Binaries for programs and plugins 2 | *.exe 3 | *.exe~ 4 | *.dll 5 | *.so 6 | *.dylib 7 | 8 | # Test binary, built with `go test -c` 9 | *.test 10 | 11 | # Output of the go coverage tool 12 | *.out 13 | 14 | # Dependency directories 15 | vendor/ 16 | 17 | # Go workspace file 18 | go.work 19 | 20 | # IDE specific files 21 | .vscode/ 22 | .idea/ 23 | *.swp 24 | *.swo 25 | 26 | # Logs 27 | *.log 28 | 29 | # Binary 30 | html-to-markdown-service 31 | 32 | -------------------------------------------------------------------------------- /examples/kubernetes/firecrawl-helm/.helmignore: -------------------------------------------------------------------------------- 1 | # Patterns to ignore when building packages. 2 | # This supports shell glob matching, relative path matching, and 3 | # negation (prefixed with !). Only one pattern per line. 4 | .DS_Store 5 | # Common VCS dirs 6 | .git/ 7 | .gitignore 8 | .bzr/ 9 | .bzrignore 10 | .hg/ 11 | .hgignore 12 | .svn/ 13 | # Common backup files 14 | *.swp 15 | *.bak 16 | *.tmp 17 | *.orig 18 | *~ 19 | # Various IDEs 20 | .project 21 | .idea/ 22 | *.tmproj 23 | .vscode/ 24 | -------------------------------------------------------------------------------- /apps/js-sdk/firecrawl/src/v2/utils/getVersion.ts: -------------------------------------------------------------------------------- 1 | export function getVersion(): string { 2 | try { 3 | if (typeof process !== "undefined" && process.env && process.env.npm_package_version) { 4 | return process.env.npm_package_version as string; 5 | } 6 | 7 | // eslint-disable-next-line @typescript-eslint/no-var-requires 8 | const pkg = require("../../../package.json"); 9 | return (pkg?.version as string) || "3.x.x"; 10 | } catch { 11 | return "3.x.x"; 12 | } 13 | } 14 | 15 | -------------------------------------------------------------------------------- /apps/api/src/__tests__/snips/utils/collect-mocks.js: -------------------------------------------------------------------------------- 1 | const path = require("path"); 2 | const fs = require("fs"); 3 | 4 | const mocksDirPath = path.join(__dirname, "../../../scraper/scrapeURL/mocks"); 5 | const files = fs.readdirSync(mocksDirPath); 6 | 7 | const contents = files.map(x => 8 | JSON.parse(fs.readFileSync(path.join(mocksDirPath, x), "utf8")), 9 | ); 10 | 11 | fs.writeFileSync( 12 | path.join(__dirname, "../mocks/" + process.argv[2] + ".json"), 13 | JSON.stringify(contents, undefined, 4), 14 | ); 15 | -------------------------------------------------------------------------------- /examples/gpt-4.1-web-crawler/.gitignore: -------------------------------------------------------------------------------- 1 | __pycache__/ 2 | *.py[cod] 3 | *$py.class 4 | *.so 5 | .Python 6 | env/ 7 | build/ 8 | develop-eggs/ 9 | dist/ 10 | downloads/ 11 | eggs/ 12 | .eggs/ 13 | lib/ 14 | lib64/ 15 | parts/ 16 | sdist/ 17 | var/ 18 | *.egg-info/ 19 | .installed.cfg 20 | *.egg 21 | 22 | # Virtual environment 23 | venv/ 24 | ENV/ 25 | .env 26 | 27 | # IDE specific files 28 | .idea/ 29 | .vscode/ 30 | *.swp 31 | *.swo 32 | 33 | # Logs 34 | *.log 35 | 36 | # OS specific files 37 | .DS_Store 38 | Thumbs.db -------------------------------------------------------------------------------- /apps/python-sdk/firecrawl/__tests__/unit/v2/methods/aio/test_aio_crawl_validation.py: -------------------------------------------------------------------------------- 1 | from firecrawl.v2.types import CrawlRequest, ScrapeOptions 2 | from firecrawl.v2.methods.aio.crawl import _prepare_crawl_request 3 | import pytest 4 | 5 | 6 | class TestAsyncCrawlValidation: 7 | def test_invalid_url(self): 8 | with pytest.raises(ValueError): 9 | _prepare_crawl_request(CrawlRequest(url="")) 10 | with pytest.raises(ValueError): 11 | _prepare_crawl_request(CrawlRequest(url=" ")) 12 | 13 | -------------------------------------------------------------------------------- /examples/gemini-2.5-screenshot-editor/.env.example: -------------------------------------------------------------------------------- 1 | # Firecrawl + Gemini Screenshot Editor Configuration 2 | # Copy this file to .env and fill in your API keys 3 | 4 | # Firecrawl API Key - Get yours at https://firecrawl.dev 5 | FIRECRAWL_API_KEY=your_firecrawl_api_key_here 6 | 7 | # Google Gemini API Key - Get yours at https://aistudio.google.com/ 8 | GEMINI_API_KEY=your_google_gemini_api_key_here 9 | 10 | # Optional: Custom Firecrawl API URL (for self-hosted instances) 11 | # FIRECRAWL_API_URL=https://your-firecrawl-instance.com -------------------------------------------------------------------------------- /apps/api/src/scraper/scrapeURL/postprocessors/index.ts: -------------------------------------------------------------------------------- 1 | import { Meta } from ".."; 2 | import { EngineScrapeResult } from "../engines"; 3 | import { youtubePostprocessor } from "./youtube"; 4 | 5 | export interface Postprocessor { 6 | name: string; 7 | shouldRun: (meta: Meta, url: URL, postProcessorsUsed?: string[]) => boolean; 8 | run: ( 9 | meta: Meta, 10 | engineResult: EngineScrapeResult, 11 | ) => Promise; 12 | } 13 | 14 | export const postprocessors: Postprocessor[] = [youtubePostprocessor]; 15 | -------------------------------------------------------------------------------- /apps/js-sdk/firecrawl/tsup.config.ts: -------------------------------------------------------------------------------- 1 | import { defineConfig } from "tsup"; 2 | 3 | export default defineConfig({ 4 | entryPoints: ["src/index.ts"], 5 | format: ["cjs", "esm"], 6 | dts: true, 7 | outDir: "dist", 8 | clean: true, 9 | platform: "node", 10 | target: "node22", 11 | noExternal: ["typescript-event-target"], 12 | esbuildOptions(options) { 13 | options.define = { 14 | ...options.define, 15 | "process.env.NODE_ENV": JSON.stringify(process.env.NODE_ENV || "production"), 16 | }; 17 | }, 18 | }); -------------------------------------------------------------------------------- /apps/api/src/scraper/WebScraper/utils/maxDepthUtils.ts: -------------------------------------------------------------------------------- 1 | export function getAdjustedMaxDepth( 2 | url: string, 3 | maxCrawlDepth: number, 4 | ): number { 5 | const baseURLDepth = getURLDepth(url); 6 | const adjustedMaxDepth = maxCrawlDepth + baseURLDepth; 7 | return adjustedMaxDepth; 8 | } 9 | 10 | export function getURLDepth(url: string): number { 11 | const pathSplits = new URL(url).pathname 12 | .split("/") 13 | .filter(x => x !== "" && x !== "index.php" && x !== "index.html"); 14 | return pathSplits.length; 15 | } 16 | -------------------------------------------------------------------------------- /apps/ui/ingestion-ui/.eslintrc.cjs: -------------------------------------------------------------------------------- 1 | module.exports = { 2 | root: true, 3 | env: { browser: true, es2020: true }, 4 | extends: [ 5 | 'eslint:recommended', 6 | 'plugin:@typescript-eslint/recommended', 7 | 'plugin:react-hooks/recommended', 8 | ], 9 | ignorePatterns: ['dist', '.eslintrc.cjs'], 10 | parser: '@typescript-eslint/parser', 11 | plugins: ['react-refresh'], 12 | rules: { 13 | 'react-refresh/only-export-components': [ 14 | 'warn', 15 | { allowConstantExport: true }, 16 | ], 17 | }, 18 | } 19 | -------------------------------------------------------------------------------- /.github/workflows/ghcr-clean.yml: -------------------------------------------------------------------------------- 1 | name: Cleanup Untagged Images 2 | 3 | on: 4 | workflow_dispatch: 5 | 6 | jobs: 7 | delete-untagged-images: 8 | name: Delete Untagged Images 9 | runs-on: ubuntu-latest 10 | steps: 11 | - uses: bots-house/ghcr-delete-image-action@v1.1.0 12 | with: 13 | owner: firecrawl 14 | name: firecrawl 15 | # NOTE: using Personal Access Token 16 | token: ${{secrets.GITHUB_TOKEN}} 17 | # Keep latest N untagged images 18 | untagged-keep-latest: 5 -------------------------------------------------------------------------------- /apps/api/src/scraper/scrapeURL/transformers/removeBase64Images.ts: -------------------------------------------------------------------------------- 1 | import { Meta } from ".."; 2 | import { Document } from "../../../controllers/v1/types"; 3 | 4 | const regex = /(!\[.*?\])\(data:image\/.*?;base64,.*?\)/g; 5 | 6 | export function removeBase64Images(meta: Meta, document: Document): Document { 7 | if (meta.options.removeBase64Images && document.markdown !== undefined) { 8 | document.markdown = document.markdown.replace( 9 | regex, 10 | "$1()", 11 | ); 12 | } 13 | return document; 14 | } 15 | -------------------------------------------------------------------------------- /apps/test-suite/utils/log.ts: -------------------------------------------------------------------------------- 1 | import { supabase_service } from "./supabase"; 2 | import { WebsiteScrapeError } from "./types"; 3 | 4 | export async function logErrors(dataError: WebsiteScrapeError[], time_taken: number, num_tokens:number, score: number, num_pages_tested: number,) { 5 | try { 6 | await supabase_service.from("test_suite_logs").insert([{log:dataError, time_taken, num_tokens, score, num_pages_tested, is_error: dataError.length > 0}]); 7 | } catch (error) { 8 | console.error(`Error logging to supabase: ${error}`); 9 | } 10 | } 11 | -------------------------------------------------------------------------------- /apps/api/src/scraper/WebScraper/__tests__/dns.test.ts: -------------------------------------------------------------------------------- 1 | import CacheableLookup from "cacheable-lookup"; 2 | import https from "node:https"; 3 | import axios from "axios"; 4 | 5 | describe("DNS", () => { 6 | it("cached dns", async () => { 7 | const cachedDns = new CacheableLookup(); 8 | cachedDns.install(https.globalAgent); 9 | jest.spyOn(cachedDns, "lookupAsync"); 10 | 11 | const res = await axios.get("https://example.com"); 12 | expect(res.status).toBe(200); 13 | expect(cachedDns.lookupAsync).toHaveBeenCalled(); 14 | }); 15 | }); 16 | -------------------------------------------------------------------------------- /apps/api/utils/urldump-redis.js: -------------------------------------------------------------------------------- 1 | require("dotenv").config(); 2 | const Redis = require("ioredis"); 3 | 4 | const crawlId = process.argv[2]; 5 | 6 | const redisConnection = new Redis(process.env.REDIS_URL, { 7 | maxRetriesPerRequest: null, 8 | }); 9 | 10 | (async () => { 11 | const res = await redisConnection.sscan("crawl:" + crawlId + ":visited_unique", 0, "COUNT", 999); 12 | await require("fs/promises").writeFile(crawlId + "-visited.txt", res[1].map(x => x.split("://").slice(1).join("://")).sort().join("\n")); 13 | process.exit(0); 14 | })(); -------------------------------------------------------------------------------- /examples/kubernetes/cluster-install/configmap.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: ConfigMap 3 | metadata: 4 | name: firecrawl-config 5 | data: 6 | HOST: "0.0.0.0" 7 | REDIS_URL: "redis://redis:6379" 8 | REDIS_RATE_LIMIT_URL: "redis://redis:6379" 9 | PLAYWRIGHT_MICROSERVICE_URL: "http://playwright-service:3000" 10 | USE_DB_AUTHENTICATION: "false" 11 | SENTRY_ENVIRONMENT: "production" 12 | ENV: "production" 13 | LOGGING_LEVEL: "DEBUG" 14 | IS_KUBERNETES: "true" 15 | NUQ_DATABASE_URL: "postgresql://postgres:password@nuq-postgres:5432/postgres" 16 | -------------------------------------------------------------------------------- /apps/api/src/lib/canonical-url.ts: -------------------------------------------------------------------------------- 1 | export function normalizeUrl(url: string) { 2 | url = url.replace(/^https?:\/\//, "").replace(/^www\./, ""); 3 | if (url.endsWith("/")) { 4 | url = url.slice(0, -1); 5 | } 6 | return url; 7 | } 8 | 9 | export function normalizeUrlOnlyHostname(url: string) { 10 | try { 11 | const urlObj = new URL(url); 12 | return urlObj.hostname.replace(/^www\./, ""); 13 | } catch (error) { 14 | return url 15 | .replace(/^https?:\/\//, "") 16 | .replace(/^www\./, "") 17 | .split("/")[0]; 18 | } 19 | } 20 | -------------------------------------------------------------------------------- /apps/go-html-to-md-service/docker-compose.yml: -------------------------------------------------------------------------------- 1 | version: '3.8' 2 | 3 | services: 4 | html-to-markdown: 5 | build: 6 | context: . 7 | dockerfile: Dockerfile 8 | container_name: html-to-markdown-service 9 | ports: 10 | - "8080:8080" 11 | environment: 12 | - PORT=8080 13 | restart: unless-stopped 14 | healthcheck: 15 | test: ["CMD", "wget", "--no-verbose", "--tries=1", "--spider", "http://localhost:8080/health"] 16 | interval: 30s 17 | timeout: 3s 18 | retries: 3 19 | start_period: 5s 20 | 21 | -------------------------------------------------------------------------------- /apps/python-sdk/firecrawl/v1/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Firecrawl v1 API (Legacy) 3 | 4 | This module provides the legacy v1 API for backward compatibility. 5 | 6 | Usage: 7 | from firecrawl.v1 import V1FirecrawlApp 8 | app = V1FirecrawlApp(api_key="your-api-key") 9 | result = app.scrape_url("https://example.com") 10 | """ 11 | 12 | from .client import V1FirecrawlApp, AsyncV1FirecrawlApp, V1JsonConfig, V1ScrapeOptions, V1ChangeTrackingOptions 13 | 14 | __all__ = ['V1FirecrawlApp', 'AsyncV1FirecrawlApp', 'V1JsonConfig', 'V1ScrapeOptions', 'V1ChangeTrackingOptions'] -------------------------------------------------------------------------------- /apps/test-site/astro.config.mjs: -------------------------------------------------------------------------------- 1 | // @ts-check 2 | import mdx from "@astrojs/mdx"; 3 | import sitemap from "@astrojs/sitemap"; 4 | import { defineConfig } from "astro/config"; 5 | 6 | // TEST_WEBSITE_URL required for deployment 7 | const SITE_URL = 8 | (process.env.VERCEL_URL && `https://${process.env.VERCEL_URL}`) || 9 | process.env.TEST_WEBSITE_URL || 10 | process.env.TEST_SUITE_WEBSITE || 11 | "http://127.0.0.1:4321"; 12 | 13 | export default defineConfig({ 14 | site: SITE_URL, 15 | output: 'static', 16 | integrations: [mdx(), sitemap()], 17 | }); 18 | -------------------------------------------------------------------------------- /apps/test-suite/audit-ci.jsonc: -------------------------------------------------------------------------------- 1 | { 2 | "$schema": "https://github.com/IBM/audit-ci/raw/main/docs/schema.json", 3 | "low": true, 4 | "allowlist": [ 5 | "GHSA-5j98-mcp5-4vw2|artillery>artillery-plugin-apdex>tap>@tapjs/fixture>rimraf>glob", // not impacted by this 6 | "GHSA-mh29-5h37-fv8m|@jest/globals>@jest/expect>jest-snapshot>@jest/transform>babel-plugin-istanbul>@istanbuljs/load-nyc-config>js-yaml", // not impacted by this 7 | "GHSA-5j98-mcp5-4vw2|jest>@jest/core>@jest/reporters>glob" // we do not use the glob CLI 8 | ] 9 | } 10 | -------------------------------------------------------------------------------- /apps/python-sdk/firecrawl/__tests__/unit/v2/methods/aio/test_aio_crawl_params.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from firecrawl.v2.types import CrawlParamsRequest 3 | from firecrawl.v2.methods.aio import crawl as aio_crawl 4 | 5 | 6 | @pytest.mark.asyncio 7 | async def test_crawl_params_request_validation(): 8 | with pytest.raises(ValueError): 9 | await aio_crawl.crawl_params_preview(None, CrawlParamsRequest(url="", prompt="x")) 10 | with pytest.raises(ValueError): 11 | await aio_crawl.crawl_params_preview(None, CrawlParamsRequest(url="https://x", prompt="")) 12 | 13 | -------------------------------------------------------------------------------- /apps/api/src/lib/custom-error.ts: -------------------------------------------------------------------------------- 1 | export class CustomError extends Error { 2 | statusCode: number; 3 | status: string; 4 | message: string; 5 | dataIngestionJob: any; 6 | 7 | constructor( 8 | statusCode: number, 9 | status: string, 10 | message: string = "", 11 | dataIngestionJob?: any, 12 | ) { 13 | super(message); 14 | this.statusCode = statusCode; 15 | this.status = status; 16 | this.message = message; 17 | this.dataIngestionJob = dataIngestionJob; 18 | 19 | Object.setPrototypeOf(this, CustomError.prototype); 20 | } 21 | } 22 | -------------------------------------------------------------------------------- /apps/api/knip.config.ts: -------------------------------------------------------------------------------- 1 | import type { KnipConfig } from "knip"; 2 | 3 | const config: KnipConfig = { 4 | workspaces: { 5 | ".": { 6 | entry: ["src/services/worker/**/*.ts", "src/services/**/*-worker.ts"], 7 | project: ["src/**/*.ts"], 8 | }, 9 | }, 10 | ignore: [ 11 | "native/**", 12 | "src/services/search-index-db.ts", // WIP 13 | "src/lib/search-index-client.ts", // WIP 14 | ], 15 | ignoreDependencies: [ 16 | "openai", 17 | "undici-types", 18 | "@pinecone-database/pinecone", // WIP 19 | ], 20 | }; 21 | 22 | export default config; 23 | -------------------------------------------------------------------------------- /apps/python-sdk/firecrawl/v2/utils/get_version.py: -------------------------------------------------------------------------------- 1 | import os 2 | import re 3 | from pathlib import Path 4 | 5 | def get_version(): 6 | try: 7 | package_path = Path(__file__).parents[2] 8 | version_file = (package_path / "__init__.py").read_text() 9 | version_match = re.search(r"^__version__ = ['\"]([^'\"]*)['\"]", version_file, re.M) 10 | if version_match: 11 | return version_match.group(1).strip() 12 | return "3.x.x" 13 | except Exception as e: 14 | print(f"Failed to get version from __init__.py: {e}") 15 | return "3.x.x" -------------------------------------------------------------------------------- /apps/test-site/src/pages/blog/[...slug].astro: -------------------------------------------------------------------------------- 1 | --- 2 | import { type CollectionEntry, getCollection, render } from "astro:content"; 3 | import BlogPost from "../../layouts/BlogPost.astro"; 4 | 5 | export async function getStaticPaths() { 6 | const posts = await getCollection("blog"); 7 | return posts.map(post => ({ 8 | params: { slug: post.id }, 9 | props: post, 10 | })); 11 | } 12 | type Props = CollectionEntry<"blog">; 13 | 14 | const post = Astro.props; 15 | const { Content } = await render(post); 16 | --- 17 | 18 | 19 | 20 | 21 | -------------------------------------------------------------------------------- /apps/test-suite/utils/tokens.ts: -------------------------------------------------------------------------------- 1 | import { encoding_for_model } from "@dqbd/tiktoken"; 2 | import { TiktokenModel } from "@dqbd/tiktoken"; 3 | 4 | // This function calculates the number of tokens in a text string using GPT-3.5-turbo model 5 | export function numTokensFromString(message: string, model: string): number { 6 | const encoder = encoding_for_model(model as TiktokenModel); 7 | 8 | // Encode the message into tokens 9 | const tokens = encoder.encode(message); 10 | 11 | // Free the encoder resources after use 12 | encoder.free(); 13 | 14 | // Return the number of tokens 15 | return tokens.length; 16 | } 17 | -------------------------------------------------------------------------------- /examples/kubernetes/firecrawl-helm/templates/_helpers.tpl: -------------------------------------------------------------------------------- 1 | {{/* 2 | Return the name of the chart. 3 | */}} 4 | {{- define "firecrawl.name" -}} 5 | {{- default .Chart.Name .Values.nameOverride | trunc 63 | trimSuffix "-" -}} 6 | {{- end -}} 7 | 8 | {{/* 9 | Return the fully qualified name of the chart. 10 | */}} 11 | {{- define "firecrawl.fullname" -}} 12 | {{- $name := default .Chart.Name .Values.nameOverride -}} 13 | {{- if .Values.fullnameOverride -}} 14 | {{- .Values.fullnameOverride | trunc 63 | trimSuffix "-" -}} 15 | {{- else -}} 16 | {{- printf "%s-%s" .Release.Name $name | trunc 63 | trimSuffix "-" -}} 17 | {{- end -}} 18 | {{- end -}} 19 | -------------------------------------------------------------------------------- /apps/test-site/src/content.config.ts: -------------------------------------------------------------------------------- 1 | import { defineCollection, z } from "astro:content"; 2 | import { glob } from "astro/loaders"; 3 | 4 | const blog = defineCollection({ 5 | loader: glob({ base: "./src/content/blog", pattern: "**/*.{md,mdx}" }), 6 | schema: ({ image }) => 7 | z.object({ 8 | title: z.string(), 9 | description: z.string(), 10 | pubDate: z.coerce.date(), 11 | updatedDate: z.coerce.date().optional(), 12 | heroImage: image().optional(), 13 | categories: z.array(z.string()).optional(), 14 | category: z.string().optional(), 15 | }), 16 | }); 17 | 18 | export const collections = { blog }; 19 | -------------------------------------------------------------------------------- /apps/api/sharedLibs/go-html-to-md/go.mod: -------------------------------------------------------------------------------- 1 | module html-to-markdown.go 2 | 3 | go 1.23.0 4 | 5 | toolchain go1.24.0 6 | 7 | require ( 8 | github.com/PuerkitoBio/goquery v1.10.3 9 | github.com/firecrawl/html-to-markdown v0.0.0-20250922154302-32a7ad4a22c3 10 | golang.org/x/net v0.41.0 11 | ) 12 | 13 | require ( 14 | github.com/andybalholm/cascadia v1.3.3 // indirect 15 | github.com/kr/pretty v0.3.0 // indirect 16 | gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c // indirect 17 | gopkg.in/yaml.v2 v2.4.0 // indirect 18 | ) 19 | 20 | replace github.com/JohannesKaufmann/html-to-markdown => github.com/firecrawl/html-to-markdown v0.0.0-20250917145228-b6d0a75dfdba 21 | -------------------------------------------------------------------------------- /apps/js-sdk/example_v1.js: -------------------------------------------------------------------------------- 1 | import FirecrawlApp from 'firecrawl'; 2 | 3 | // Placeholder v1 example (JavaScript) 4 | // Mirrors the older SDK usage. Replace with your API key before running. 5 | 6 | async function main() { 7 | const app = new Firecrawl({ apiKey: process.env.FIRECRAWL_API_KEY || 'fc-YOUR_API_KEY' }); 8 | 9 | const scrape = await app.v1.scrapeUrl('firecrawl.dev'); 10 | if (scrape && scrape.success) console.log(scrape.markdown); 11 | 12 | const crawl = await app.v1.crawlUrl('mendable.ai', { excludePaths: ['blog/*'], limit: 3 }); 13 | console.log(crawl); 14 | } 15 | 16 | main().catch((e) => { 17 | console.error(e); 18 | process.exit(1); 19 | }); -------------------------------------------------------------------------------- /apps/js-sdk/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "js-example", 3 | "version": "1.0.0", 4 | "description": "", 5 | "main": "example.js", 6 | "type": "module", 7 | "scripts": { 8 | "test": "echo \"Error: no test specified\" && exit 1" 9 | }, 10 | "keywords": [], 11 | "author": "", 12 | "license": "MIT", 13 | "dependencies": { 14 | "@mendable/firecrawl-js": "^4.3.4", 15 | "axios": "^1.12.2", 16 | "firecrawl": "^4.3.4", 17 | "ts-node": "^10.9.2", 18 | "typescript": "^5.4.5", 19 | "uuid": "^10.0.0", 20 | "zod": "^3.23.8" 21 | }, 22 | "devDependencies": { 23 | "@types/node": "^24.3.0", 24 | "tsx": "^4.9.3" 25 | } 26 | } 27 | -------------------------------------------------------------------------------- /examples/deepseek-v3-crawler/.gitignore: -------------------------------------------------------------------------------- 1 | # Environment variables 2 | .env 3 | .env.* 4 | 5 | # Python 6 | __pycache__/ 7 | *.py[cod] 8 | *$py.class 9 | *.so 10 | .Python 11 | env/ 12 | build/ 13 | develop-eggs/ 14 | dist/ 15 | downloads/ 16 | eggs/ 17 | .eggs/ 18 | lib/ 19 | lib64/ 20 | parts/ 21 | sdist/ 22 | var/ 23 | *.egg-info/ 24 | .installed.cfg 25 | *.egg 26 | 27 | # Virtual environments 28 | venv/ 29 | ENV/ 30 | env/ 31 | 32 | # Editor files 33 | .idea/ 34 | .vscode/ 35 | *.swp 36 | *.swo 37 | *~ 38 | 39 | # OS specific files 40 | .DS_Store 41 | .DS_Store? 42 | ._* 43 | .Spotlight-V100 44 | .Trashes 45 | ehthumbs.db 46 | Thumbs.db 47 | 48 | # Logs 49 | *.log 50 | logs/ -------------------------------------------------------------------------------- /examples/deepseek-v3-company-researcher/.gitignore: -------------------------------------------------------------------------------- 1 | # Environment variables 2 | .env 3 | .env.* 4 | 5 | # Python 6 | __pycache__/ 7 | *.py[cod] 8 | *$py.class 9 | *.so 10 | .Python 11 | env/ 12 | build/ 13 | develop-eggs/ 14 | dist/ 15 | downloads/ 16 | eggs/ 17 | .eggs/ 18 | lib/ 19 | lib64/ 20 | parts/ 21 | sdist/ 22 | var/ 23 | *.egg-info/ 24 | .installed.cfg 25 | *.egg 26 | 27 | # Virtual environments 28 | venv/ 29 | ENV/ 30 | env/ 31 | 32 | # Editor files 33 | .idea/ 34 | .vscode/ 35 | *.swp 36 | *.swo 37 | *~ 38 | 39 | # OS specific files 40 | .DS_Store 41 | .DS_Store? 42 | ._* 43 | .Spotlight-V100 44 | .Trashes 45 | ehthumbs.db 46 | Thumbs.db 47 | 48 | # Logs 49 | *.log 50 | logs/ -------------------------------------------------------------------------------- /apps/api/src/lib/default-values.ts: -------------------------------------------------------------------------------- 1 | export const defaultOrigin = "api"; 2 | 3 | export const defaultTimeout = 60000; // 60 seconds 4 | 5 | export const defaultPageOptions = { 6 | onlyMainContent: false, 7 | includeHtml: false, 8 | waitFor: 0, 9 | screenshot: false, 10 | fullPageScreenshot: false, 11 | parsePDF: true, 12 | }; 13 | 14 | export const defaultCrawlerOptions = { 15 | allowBackwardCrawling: false, 16 | limit: 10000, 17 | }; 18 | 19 | export const defaultCrawlPageOptions = { 20 | onlyMainContent: false, 21 | includeHtml: false, 22 | removeTags: [], 23 | parsePDF: true, 24 | }; 25 | 26 | export const defaultExtractorOptions = { 27 | mode: "markdown", 28 | }; 29 | -------------------------------------------------------------------------------- /apps/api/jest.config.ts: -------------------------------------------------------------------------------- 1 | import { createDefaultEsmPreset, type JestConfigWithTsJest } from "ts-jest"; 2 | 3 | const config: JestConfigWithTsJest = { 4 | ...createDefaultEsmPreset(), 5 | verbose: true, 6 | testPathIgnorePatterns: ["/dist/"], 7 | forceExit: true, 8 | detectOpenHandles: true, 9 | openHandlesTimeout: 120000, 10 | watchAll: false, 11 | reporters: [ 12 | "default", 13 | [ 14 | "jest-junit", 15 | { 16 | outputDirectory: "/test-results", 17 | outputName: "junit.xml", 18 | addFileAttribute: true, 19 | suiteNameTemplate: "{filepath}", 20 | }, 21 | ], 22 | ], 23 | }; 24 | 25 | export default config; 26 | -------------------------------------------------------------------------------- /apps/api/src/lib/extract/build-document.ts: -------------------------------------------------------------------------------- 1 | import { Document } from "../../controllers/v1/types"; 2 | 3 | export function buildDocument(document: Document): string { 4 | const metadata = document.metadata; 5 | const markdown = document.markdown; 6 | 7 | // for each key in the metadata allow up to 250 characters 8 | const metadataString = Object.entries(metadata) 9 | .map(([key, value]) => { 10 | return `${key}: ${value?.toString().slice(0, 250)}`; 11 | }) 12 | .join("\n"); 13 | 14 | const documentMetadataString = `\n- - - - - Page metadata - - - - -\n${metadataString}`; 15 | const documentString = `${markdown}${documentMetadataString}`; 16 | return documentString; 17 | } 18 | -------------------------------------------------------------------------------- /apps/api/src/services/billing/issue_credits.ts: -------------------------------------------------------------------------------- 1 | import { logger } from "../../lib/logger"; 2 | import { supabase_service } from "../supabase"; 3 | 4 | export async function issueCredits(team_id: string, credits: number) { 5 | // Add an entry to supabase coupons 6 | const { error } = await supabase_service.from("coupons").insert({ 7 | team_id: team_id, 8 | credits: credits, 9 | status: "active", 10 | // indicates that this coupon was issued from auto recharge 11 | from_auto_recharge: true, 12 | initial_credits: credits, 13 | }); 14 | 15 | if (error) { 16 | logger.error(`Error adding coupon: ${error}`); 17 | return false; 18 | } 19 | 20 | return true; 21 | } 22 | -------------------------------------------------------------------------------- /examples/llama-4-maverick-web-crawler/.gitignore: -------------------------------------------------------------------------------- 1 | # Dependencies 2 | node_modules/ 3 | venv/ 4 | .env 5 | .env.local 6 | .env.*.local 7 | 8 | # Build outputs 9 | dist/ 10 | build/ 11 | *.pyc 12 | __pycache__/ 13 | .cache/ 14 | .pytest_cache/ 15 | 16 | # IDE and editor files 17 | .idea/ 18 | .vscode/ 19 | *.swp 20 | *.swo 21 | .DS_Store 22 | Thumbs.db 23 | 24 | # Logs 25 | *.log 26 | npm-debug.log* 27 | yarn-debug.log* 28 | yarn-error.log* 29 | 30 | # Coverage and test reports 31 | coverage/ 32 | .coverage 33 | htmlcov/ 34 | 35 | # Temporary files 36 | *.tmp 37 | *.temp 38 | .tmp/ 39 | temp/ 40 | 41 | # System files 42 | .DS_Store 43 | .DS_Store? 44 | ._* 45 | .Spotlight-V100 46 | .Trashes 47 | ehthumbs.db 48 | Thumbs.db -------------------------------------------------------------------------------- /apps/api/src/lib/extract/fire-0/build-document-f0.ts: -------------------------------------------------------------------------------- 1 | import { Document } from "../../../controllers/v1/types"; 2 | 3 | export function buildDocument_F0(document: Document): string { 4 | const metadata = document.metadata; 5 | const markdown = document.markdown; 6 | 7 | // for each key in the metadata allow up to 250 characters 8 | const metadataString = Object.entries(metadata) 9 | .map(([key, value]) => { 10 | return `${key}: ${value?.toString().slice(0, 250)}`; 11 | }) 12 | .join("\n"); 13 | 14 | const documentMetadataString = `\n- - - - - Page metadata - - - - -\n${metadataString}`; 15 | const documentString = `${markdown}${documentMetadataString}`; 16 | return documentString; 17 | } 18 | -------------------------------------------------------------------------------- /apps/test-site/src/components/HeaderLink.astro: -------------------------------------------------------------------------------- 1 | --- 2 | import type { HTMLAttributes } from 'astro/types'; 3 | 4 | type Props = HTMLAttributes<'a'>; 5 | 6 | const { href, class: className, ...props } = Astro.props; 7 | const pathname = Astro.url.pathname.replace(import.meta.env.BASE_URL, ''); 8 | const subpath = pathname.match(/[^\/]+/g); 9 | const isActive = href === pathname || href === '/' + (subpath?.[0] || ''); 10 | --- 11 | 12 | 13 | 14 | 15 | 25 | -------------------------------------------------------------------------------- /apps/api/native/.gitattributes: -------------------------------------------------------------------------------- 1 | # Auto detect text files and perform LF normalization 2 | * text=auto 3 | 4 | 5 | *.ts text eol=lf merge=union 6 | *.tsx text eol=lf merge=union 7 | *.rs text eol=lf merge=union 8 | *.js text eol=lf merge=union 9 | *.json text eol=lf merge=union 10 | *.debug text eol=lf merge=union 11 | 12 | # Generated codes 13 | index.js linguist-detectable=false 14 | index.d.ts linguist-detectable=false 15 | firecrawl-rs.wasi-browser.js linguist-detectable=false 16 | firecrawl-rs.wasi.cjs linguist-detectable=false 17 | wasi-worker-browser.mjs linguist-detectable=false 18 | wasi-worker.mjs linguist-detectable=false 19 | -------------------------------------------------------------------------------- /apps/api/src/lib/deployment.ts: -------------------------------------------------------------------------------- 1 | import { config } from "../config"; 2 | export function isSelfHosted(): boolean { 3 | return config.USE_DB_AUTHENTICATION !== true; 4 | } 5 | 6 | export function getErrorContactMessage(errorId?: string): string { 7 | if (isSelfHosted()) { 8 | return errorId 9 | ? `An error occurred. Please check your logs for more details. Error ID: ${errorId}` 10 | : "An error occurred. Please check your logs for more details."; 11 | } else { 12 | return errorId 13 | ? `An unexpected error occurred. Please contact help@firecrawl.com for help. Your exception ID is ${errorId}` 14 | : "An unexpected error occurred. Please contact help@firecrawl.com for help."; 15 | } 16 | } 17 | -------------------------------------------------------------------------------- /apps/api/src/lib/parseApi.ts: -------------------------------------------------------------------------------- 1 | export function parseApi(api: string) { 2 | // Handle older versions of the API that don't have the fc- prefix 3 | if (!api.startsWith("fc-")) { 4 | return api; 5 | } 6 | 7 | // remove the fc- prefix 8 | // re add all the dashes based on the uuidv4 format 9 | // 3d478a29-6e59-403e-85c7-94aba81ffd2a 10 | const uuid = api 11 | .replace(/^fc-/, "") 12 | .replace(/(.{8})(.{4})(.{4})(.{4})(.{12})/, "$1-$2-$3-$4-$5"); 13 | return uuid; 14 | } 15 | 16 | export function apiKeyToFcApiKey(apiKey: string | null | undefined) { 17 | if (!apiKey) { 18 | return null; 19 | } 20 | const uuidWithoutDashes = apiKey.replace(/-/g, ""); 21 | return `fc-${uuidWithoutDashes}`; 22 | } 23 | -------------------------------------------------------------------------------- /apps/python-sdk/firecrawl/__tests__/unit/v2/methods/test_usage_types.py: -------------------------------------------------------------------------------- 1 | from firecrawl.v2.types import ConcurrencyCheck, CreditUsage, TokenUsage 2 | 3 | 4 | class TestUsageTypes: 5 | def test_concurrency_check_model(self): 6 | cc = ConcurrencyCheck(concurrency=3, max_concurrency=10) 7 | assert cc.concurrency == 3 8 | assert cc.max_concurrency == 10 9 | 10 | def test_credit_usage_model(self): 11 | cu = CreditUsage(remaining_credits=123) 12 | assert isinstance(cu.remaining_credits, int) 13 | assert cu.remaining_credits == 123 14 | 15 | def test_token_usage_model(self): 16 | tu = TokenUsage(remaining_tokens=10) 17 | assert tu.remaining_tokens == 10 18 | 19 | -------------------------------------------------------------------------------- /examples/scrape_and_analyze_airbnb_data_e2b/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "hello-world", 3 | "version": "1.0.0", 4 | "description": "", 5 | "main": "index.js", 6 | "scripts": { 7 | "start": "tsx index.ts", 8 | "test": "echo \"Error: no test specified\" && exit 1" 9 | }, 10 | "keywords": [], 11 | "author": "", 12 | "license": "ISC", 13 | "devDependencies": { 14 | "@types/node": "^20.12.12", 15 | "prettier": "3.2.5", 16 | "tsx": "^4.7.3", 17 | "typescript": "^5.4.5" 18 | }, 19 | "dependencies": { 20 | "@anthropic-ai/sdk": "^0.20.7", 21 | "@e2b/code-interpreter": "^0.0.2", 22 | "@mendable/firecrawl-js": "^4.3.5", 23 | "buffer": "^6.0.3", 24 | "dotenv": "^16.4.5" 25 | } 26 | } 27 | -------------------------------------------------------------------------------- /apps/api/src/__tests__/snips/v2/crawl-prompt.test.ts: -------------------------------------------------------------------------------- 1 | import { describe, it, expect } from "@jest/globals"; 2 | 3 | describe("V2 Crawl API with Prompt", () => { 4 | it("should accept prompt parameter in schema", () => { 5 | expect(true).toBe(true); 6 | }); 7 | 8 | it("should prioritize explicit options over prompt-generated options", () => { 9 | expect(true).toBe(true); 10 | }); 11 | 12 | it("should work without prompt parameter", () => { 13 | expect(true).toBe(true); 14 | }); 15 | 16 | it("should handle invalid prompt gracefully", () => { 17 | expect(true).toBe(true); 18 | }); 19 | 20 | it("should validate regex patterns in generated includePaths", () => { 21 | expect(true).toBe(true); 22 | }); 23 | }); 24 | -------------------------------------------------------------------------------- /apps/api/src/services/idempotency/create.ts: -------------------------------------------------------------------------------- 1 | import { Request } from "express"; 2 | import { supabase_service } from "../supabase"; 3 | import { logger } from "../../../src/lib/logger"; 4 | 5 | export async function createIdempotencyKey(req: Request): Promise { 6 | const idempotencyKey = req.headers["x-idempotency-key"] as string; 7 | if (!idempotencyKey) { 8 | throw new Error("No idempotency key provided in the request headers."); 9 | } 10 | 11 | const { data, error } = await supabase_service 12 | .from("idempotency_keys") 13 | .insert({ key: idempotencyKey }); 14 | 15 | if (error) { 16 | logger.error(`Failed to create idempotency key: ${error}`); 17 | throw error; 18 | } 19 | 20 | return idempotencyKey; 21 | } 22 | -------------------------------------------------------------------------------- /apps/api/src/natives.ts: -------------------------------------------------------------------------------- 1 | import { platform } from "os"; 2 | import { join } from "path"; 3 | 4 | const currentPlatform = platform(); 5 | const isWindows = currentPlatform === "win32"; 6 | 7 | const EXTENSIONS = { 8 | win32: ".dll", 9 | darwin: ".dylib", 10 | default: ".so", 11 | } as const; 12 | 13 | function createNativePath(subPath: string, filename: string): string { 14 | const extension = 15 | EXTENSIONS[currentPlatform as keyof typeof EXTENSIONS] ?? 16 | EXTENSIONS.default; 17 | const fullFilename = `${isWindows ? "" : "lib"}${filename}${extension}`; 18 | return join(process.cwd(), "sharedLibs", subPath, fullFilename); 19 | } 20 | 21 | export const HTML_TO_MARKDOWN_PATH = createNativePath( 22 | "go-html-to-md", 23 | "html-to-markdown", 24 | ); 25 | -------------------------------------------------------------------------------- /apps/api/src/lib/extract/team-id-sync.ts: -------------------------------------------------------------------------------- 1 | import { supabase_rr_service, supabase_service } from "../../services/supabase"; 2 | import { logger } from "../logger"; 3 | 4 | import { withAuth } from "../withAuth"; 5 | 6 | async function getTeamIdSyncBOriginal(teamId: string) { 7 | try { 8 | const { data, error } = await supabase_rr_service 9 | .from("eb-sync") 10 | .select("team_id") 11 | .eq("team_id", teamId) 12 | .limit(1); 13 | if (error) { 14 | throw new Error("Error getting team id (sync b)"); 15 | } 16 | return data[0] ?? null; 17 | } catch (error) { 18 | logger.error("Error getting team id (sync b)", error); 19 | return null; 20 | } 21 | } 22 | 23 | export const getTeamIdSyncB = withAuth(getTeamIdSyncBOriginal, null); 24 | -------------------------------------------------------------------------------- /apps/test-site/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "firecrawl-test-site", 3 | "type": "module", 4 | "version": "0.0.1", 5 | "scripts": { 6 | "dev": "astro dev", 7 | "build": "astro build", 8 | "preview": "astro preview", 9 | "test:prepare": "astro build && astro preview --host 127.0.0.1 --port 4321 --strictPort", 10 | "astro": "astro" 11 | }, 12 | "dependencies": { 13 | "@astrojs/mdx": "^4.3.12", 14 | "@astrojs/rss": "^4.0.12", 15 | "@astrojs/sitemap": "^3.6.0", 16 | "astro": "^5.16.0", 17 | "sharp": "^0.34.3" 18 | }, 19 | "devDependencies": { 20 | "prettier": "^3.6.2", 21 | "prettier-plugin-astro": "^0.14.1" 22 | }, 23 | "pnpm": { 24 | "onlyBuiltDependencies": [ 25 | "esbuild", 26 | "sharp" 27 | ] 28 | } 29 | } 30 | -------------------------------------------------------------------------------- /examples/kubernetes/firecrawl-helm/templates/configmap.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: ConfigMap 3 | metadata: 4 | name: {{ include "firecrawl.fullname" . }}-config 5 | data: 6 | NUM_WORKERS_PER_QUEUE: {{ .Values.config.NUM_WORKERS_PER_QUEUE | quote }} 7 | PORT: {{ .Values.config.PORT | quote }} 8 | HOST: {{ .Values.config.HOST | quote }} 9 | REDIS_URL: {{ .Values.config.REDIS_URL | quote }} 10 | REDIS_RATE_LIMIT_URL: {{ .Values.config.REDIS_RATE_LIMIT_URL | quote }} 11 | PLAYWRIGHT_MICROSERVICE_URL: {{ .Values.config.PLAYWRIGHT_MICROSERVICE_URL | quote }} 12 | USE_DB_AUTHENTICATION: {{ .Values.config.USE_DB_AUTHENTICATION | quote }} 13 | HDX_NODE_BETA_MODE: {{ .Values.config.HDX_NODE_BETA_MODE | quote }} 14 | NUQ_DATABASE_URL: {{ .Values.config.NUQ_DATABASE_URL | quote }} 15 | -------------------------------------------------------------------------------- /apps/js-sdk/firecrawl/tsconfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "compilerOptions": { 3 | // See https://www.totaltypescript.com/tsconfig-cheat-sheet 4 | /* Base Options: */ 5 | "esModuleInterop": true, 6 | "skipLibCheck": true, 7 | "target": "es2022", 8 | "allowJs": true, 9 | "resolveJsonModule": true, 10 | "moduleDetection": "force", 11 | "isolatedModules": true, 12 | "verbatimModuleSyntax": true, 13 | 14 | /* Strictness */ 15 | "strict": true, 16 | "noUncheckedIndexedAccess": true, 17 | "noImplicitOverride": true, 18 | 19 | /* If NOT transpiling with TypeScript: */ 20 | "module": "ESNext", 21 | "moduleResolution": "Bundler", 22 | "noEmit": true, 23 | }, 24 | "include": ["src/**/*"], 25 | "exclude": ["node_modules", "dist", "**/__tests__/*"] 26 | } 27 | -------------------------------------------------------------------------------- /apps/js-sdk/example_v1.ts: -------------------------------------------------------------------------------- 1 | // Placeholder v1 example (TypeScript) 2 | // Mirrors the older SDK usage. Replace with your API key before running. 3 | 4 | // import FirecrawlApp from 'firecrawl'; 5 | import Firecrawl from './firecrawl/src/index' 6 | 7 | async function main() { 8 | const app = new Firecrawl({ apiKey: process.env.FIRECRAWL_API_KEY || 'fc-YOUR_API_KEY' }); 9 | 10 | // Scrape a website (v1 style): 11 | const scrape = await app.v1.scrapeUrl('firecrawl.dev'); 12 | if ((scrape as any).success) console.log((scrape as any).markdown); 13 | 14 | // Crawl a website (v1 style): 15 | const crawl = await app.v1.crawlUrl('mendable.ai', { excludePaths: ['blog/*'], limit: 3 }); 16 | console.log(crawl); 17 | } 18 | 19 | main().catch((e) => { 20 | console.error(e); 21 | process.exit(1); 22 | }); 23 | 24 | -------------------------------------------------------------------------------- /apps/api/src/services/alerts/slack.ts: -------------------------------------------------------------------------------- 1 | import axios from "axios"; 2 | import { logger } from "../../../src/lib/logger"; 3 | 4 | import { config } from "../../config"; 5 | export async function sendSlackWebhook( 6 | message: string, 7 | alertEveryone: boolean = false, 8 | webhookUrl: string = config.SLACK_WEBHOOK_URL ?? "", 9 | ) { 10 | const messagePrefix = alertEveryone ? " " : ""; 11 | const payload = { 12 | text: `${messagePrefix} ${message}`, 13 | }; 14 | 15 | try { 16 | const response = await axios.post(webhookUrl, payload, { 17 | headers: { 18 | "Content-Type": "application/json", 19 | }, 20 | }); 21 | logger.info("Webhook sent successfully:", response.data); 22 | } catch (error) { 23 | logger.debug(`Error sending webhook: ${error}`); 24 | } 25 | } 26 | -------------------------------------------------------------------------------- /apps/playwright-service-ts/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "playwright-scraper-api", 3 | "version": "1.0.0", 4 | "description": "scraper api with playwright", 5 | "main": "api.ts", 6 | "scripts": { 7 | "start": "node dist/api.js", 8 | "build": "tsc", 9 | "dev": "ts-node api.ts" 10 | }, 11 | "keywords": [], 12 | "author": "Jeff Pereira", 13 | "license": "ISC", 14 | "dependencies": { 15 | "body-parser": "^1.20.2", 16 | "dotenv": "^16.4.5", 17 | "express": "4.22.0", 18 | "playwright": "^1.55.1", 19 | "user-agents": "^1.1.410" 20 | }, 21 | "devDependencies": { 22 | "@types/body-parser": "^1.19.5", 23 | "@types/express": "^4.17.21", 24 | "@types/node": "^20.14.9", 25 | "@types/user-agents": "^1.0.4", 26 | "ts-node": "^10.9.2", 27 | "typescript": "^5.5.2" 28 | } 29 | } 30 | -------------------------------------------------------------------------------- /apps/redis/start-redis-server.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | set -e 4 | 5 | sysctl vm.overcommit_memory=1 || true 6 | sysctl net.core.somaxconn=1024 || true 7 | 8 | PW_ARG="" 9 | if [[ ! -z "${REDIS_PASSWORD}" ]]; then 10 | PW_ARG="--requirepass $REDIS_PASSWORD" 11 | fi 12 | 13 | : ${MAXMEMORY_POLICY:="noeviction"} 14 | : ${APPENDONLY:="no"} 15 | : ${FLY_VM_MEMORY_MB:=$(($(grep MemTotal /proc/meminfo | awk '{print $2}') / 1024))} 16 | if [ "${NOSAVE}" = "" ] ; then 17 | : ${SAVE:="3600 1 300 100 60 10000"} 18 | fi 19 | # Set maxmemory to 80% of RAM 20 | MAXMEMORY=$(($FLY_VM_MEMORY_MB*80/100)) 21 | 22 | mkdir -p /data/redis 23 | 24 | redis-server $PW_ARG \ 25 | --dir /data/redis \ 26 | --maxmemory "${MAXMEMORY}mb" \ 27 | --maxmemory-policy $MAXMEMORY_POLICY \ 28 | --appendonly $APPENDONLY \ 29 | --save "$SAVE" \ 30 | --protected-mode no 31 | -------------------------------------------------------------------------------- /apps/ui/ingestion-ui/src/components/ui/label.tsx: -------------------------------------------------------------------------------- 1 | import * as React from "react" 2 | import * as LabelPrimitive from "@radix-ui/react-label" 3 | import { cva, type VariantProps } from "class-variance-authority" 4 | 5 | import { cn } from "@/lib/utils" 6 | 7 | const labelVariants = cva( 8 | "text-sm font-medium leading-none peer-disabled:cursor-not-allowed peer-disabled:opacity-70" 9 | ) 10 | 11 | const Label = React.forwardRef< 12 | React.ElementRef, 13 | React.ComponentPropsWithoutRef & 14 | VariantProps 15 | >(({ className, ...props }, ref) => ( 16 | 21 | )) 22 | Label.displayName = LabelPrimitive.Root.displayName 23 | 24 | export { Label } 25 | -------------------------------------------------------------------------------- /.github/workflows/deploy-image-staging.yml: -------------------------------------------------------------------------------- 1 | name: STAGING Deploy Images to GHCR 2 | 3 | env: 4 | DOTNET_VERSION: '6.0.x' 5 | 6 | on: 7 | workflow_dispatch: 8 | 9 | jobs: 10 | push-app-image: 11 | runs-on: ubuntu-latest 12 | defaults: 13 | run: 14 | working-directory: './apps/api' 15 | steps: 16 | - name: 'Checkout GitHub Action' 17 | uses: actions/checkout@main 18 | 19 | - name: 'Login to GitHub Container Registry' 20 | uses: docker/login-action@v3 21 | with: 22 | registry: ghcr.io 23 | username: ${{github.actor}} 24 | password: ${{secrets.GITHUB_TOKEN}} 25 | 26 | - name: 'Build Inventory Image' 27 | run: | 28 | docker build . --tag ghcr.io/firecrawl/firecrawl-staging:latest 29 | docker push ghcr.io/firecrawl/firecrawl-staging:latest -------------------------------------------------------------------------------- /apps/api/requests/v2/map.requests.http: -------------------------------------------------------------------------------- 1 | # Pick your baseUrl here: 2 | @baseUrl = http://localhost:3002 3 | # @baseUrl = https://api.firecrawl.dev 4 | 5 | 6 | ### Crawl 7 | # @name crawl 8 | POST {{baseUrl}}/v2/map HTTP/1.1 9 | Authorization: Bearer {{$dotenv TEST_API_KEY}} 10 | content-type: application/json 11 | 12 | { 13 | "url": "https://firecrawl.dev", 14 | "limit": 2 15 | } 16 | 17 | # { 18 | # "web": [ 19 | # { 20 | # "url": "https://firecrawl.dev", 21 | # "title": "Firecrawl", 22 | # "description": "Firecrawl is a platform for crawling and mapping websites." 23 | # }, 24 | # { 25 | # "url": "https://firecrawl.dev/blog", 26 | # "title": "Firecrawl Blog", 27 | # "description": "Firecrawl Blog is a blog about Firecrawl." 28 | # } 29 | # ] 30 | # } -------------------------------------------------------------------------------- /apps/js-sdk/firecrawl/src/__tests__/unit/v2/errorHandler.test.ts: -------------------------------------------------------------------------------- 1 | import { describe, test, expect } from "@jest/globals"; 2 | import { throwForBadResponse, normalizeAxiosError } from "../../../v2/utils/errorHandler"; 3 | 4 | describe("v2 utils: errorHandler", () => { 5 | test("throwForBadResponse: throws SdkError with message from body.error", () => { 6 | const resp: any = { status: 400, data: { error: "bad" } }; 7 | expect(() => throwForBadResponse(resp, "do thing")).toThrow(/bad/); 8 | }); 9 | 10 | test("normalizeAxiosError: prefers body.error then err.message", () => { 11 | const err: any = { 12 | isAxiosError: true, 13 | response: { status: 402, data: { error: "payment required" } }, 14 | message: "network", 15 | }; 16 | expect(() => normalizeAxiosError(err, "action")).toThrow(/payment required/); 17 | }); 18 | }); 19 | 20 | -------------------------------------------------------------------------------- /apps/python-sdk/firecrawl/__tests__/unit/v2/utils/test_metadata_extras_multivalue.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | 3 | from firecrawl.v2.types import Document 4 | from firecrawl.v2.utils.normalize import normalize_document_input 5 | 6 | 7 | class TestMetadataMultiValue: 8 | def test_article_tag_list_coerced_to_string(self): 9 | raw = { 10 | "markdown": "# Body", 11 | "metadata": { 12 | "title": "Page", 13 | "articleTag": ["one", "two"], 14 | }, 15 | } 16 | doc = Document(**normalize_document_input(raw)) 17 | # typed access works and is joined as string 18 | assert doc.metadata is not None 19 | assert doc.metadata.article_tag == "one, two" 20 | # dict view shows string 21 | md = doc.metadata_dict 22 | assert md["article_tag"] == "one, two" 23 | -------------------------------------------------------------------------------- /apps/api/src/utils/integration.ts: -------------------------------------------------------------------------------- 1 | import { z } from "zod"; 2 | 3 | enum IntegrationEnum { 4 | DIFY = "dify", 5 | ZAPIER = "zapier", 6 | PIPEDREAM = "pipedream", 7 | RAYCAST = "raycast", 8 | LANGCHAIN = "langchain", 9 | CREWAI = "crewai", 10 | LLAMAINDEX = "llamaindex", 11 | N8N = "n8n", 12 | CAMELAI = "camelai", 13 | MAKE = "make", 14 | FLOWISE = "flowise", 15 | METAGPT = "metagpt", 16 | RELEVANCEAI = "relevanceai", 17 | VIASOCKET = "viasocket", 18 | } 19 | 20 | export const integrationSchema = z 21 | .string() 22 | .refine( 23 | val => 24 | (typeof val === "string" && val.startsWith("_")) || 25 | Object.values(IntegrationEnum).includes(val as any), 26 | { 27 | message: `Invalid enum value. Expected ${Object.values(IntegrationEnum) 28 | .map(v => `'${v}'`) 29 | .join(" | ")}`, 30 | }, 31 | ); 32 | -------------------------------------------------------------------------------- /apps/go-html-to-md-service/go.mod: -------------------------------------------------------------------------------- 1 | module github.com/firecrawl/go-html-to-md-service 2 | 3 | go 1.23.0 4 | 5 | require ( 6 | github.com/PuerkitoBio/goquery v1.10.3 7 | github.com/firecrawl/html-to-markdown v0.0.0-20250922154302-32a7ad4a22c3 8 | github.com/gorilla/mux v1.8.1 9 | github.com/rs/zerolog v1.33.0 10 | golang.org/x/net v0.41.0 11 | ) 12 | 13 | require ( 14 | github.com/andybalholm/cascadia v1.3.3 // indirect 15 | github.com/kr/pretty v0.3.0 // indirect 16 | github.com/mattn/go-colorable v0.1.13 // indirect 17 | github.com/mattn/go-isatty v0.0.20 // indirect 18 | golang.org/x/sys v0.33.0 // indirect 19 | gopkg.in/check.v1 v1.0.0-20201130134442-10cb98267c6c // indirect 20 | gopkg.in/yaml.v2 v2.4.0 // indirect 21 | ) 22 | 23 | replace github.com/JohannesKaufmann/html-to-markdown => github.com/firecrawl/html-to-markdown v0.0.0-20250917145228-b6d0a75dfdba 24 | -------------------------------------------------------------------------------- /apps/api/src/__tests__/snips/v0/lib.ts: -------------------------------------------------------------------------------- 1 | import request from "supertest"; 2 | import { 3 | TEST_API_URL, 4 | scrapeTimeout, 5 | indexCooldown, 6 | Identity, 7 | idmux, 8 | } from "../lib"; 9 | 10 | // Re-export shared utilities for backwards compatibility 11 | export { scrapeTimeout, indexCooldown, Identity, idmux }; 12 | 13 | export interface V0ScrapeRequestInput { 14 | url: string; 15 | pageOptions?: any; 16 | extractorOptions?: any; 17 | crawlerOptions?: any; 18 | timeout?: number; 19 | origin?: string; 20 | integration?: string; 21 | } 22 | 23 | export async function scrapeRaw( 24 | body: V0ScrapeRequestInput, 25 | identity: Identity, 26 | ) { 27 | return await request(TEST_API_URL) 28 | .post("/v0/scrape") 29 | .set("Authorization", `Bearer ${identity.apiKey}`) 30 | .set("Content-Type", "application/json") 31 | .send(body); 32 | } 33 | -------------------------------------------------------------------------------- /apps/api/requests/v2/search.requests.http: -------------------------------------------------------------------------------- 1 | # Pick your baseUrl here: 2 | @baseUrl = http://localhost:3002 3 | # @baseUrl = https://api.firecrawl.dev 4 | 5 | 6 | ### Search - Simple string array format 7 | # @name searchSimple 8 | POST {{baseUrl}}/v2/search HTTP/1.1 9 | Authorization: Bearer {{$dotenv TEST_API_KEY}} 10 | content-type: application/json 11 | 12 | { 13 | "query": "firecrawl", 14 | "sources": ["web", "images", "news"], 15 | "limit": 5 16 | } 17 | 18 | ### Search - Object array format (with custom parameters per source) 19 | # @name searchAdvanced 20 | POST {{baseUrl}}/v2/search HTTP/1.1 21 | Authorization: Bearer {{$dotenv TEST_API_KEY}} 22 | content-type: application/json 23 | 24 | { 25 | "query": "firecrawl", 26 | "sources": [ 27 | { 28 | "type": "web" 29 | }, 30 | { 31 | "type": "images" 32 | } 33 | ], 34 | "limit": 5 35 | } 36 | -------------------------------------------------------------------------------- /apps/api/src/lib/withAuth.ts: -------------------------------------------------------------------------------- 1 | import { AuthResponse } from "../../src/types"; 2 | import { logger } from "./logger"; 3 | import * as Sentry from "@sentry/node"; 4 | import { configDotenv } from "dotenv"; 5 | import { config } from "../config"; 6 | configDotenv(); 7 | 8 | let warningCount = 0; 9 | 10 | export function withAuth( 11 | originalFunction: (...args: U) => Promise, 12 | mockSuccess: T, 13 | ) { 14 | return async function (...args: U): Promise { 15 | const useDbAuthentication = config.USE_DB_AUTHENTICATION; 16 | if (!useDbAuthentication) { 17 | if (warningCount < 5) { 18 | logger.warn("You're bypassing authentication"); 19 | warningCount++; 20 | } 21 | return { success: true, ...(mockSuccess || {}) } as T; 22 | } else { 23 | return await originalFunction(...args); 24 | } 25 | }; 26 | } 27 | -------------------------------------------------------------------------------- /examples/scrape_and_analyze_airbnb_data_e2b/README.md: -------------------------------------------------------------------------------- 1 | # Scrape and Analyze Airbnb Data with Firecrawl and E2B 2 | 3 | This example demonstrates how to scrape Airbnb data and analyze it using [Firecrawl](https://www.firecrawl.dev/) and the [Code Interpreter SDK](https://github.com/e2b-dev/code-interpreter) from E2B. 4 | 5 | ## Prerequisites 6 | 7 | - Node.js installed on your machine 8 | - An E2B API key 9 | - A Firecrawl API key 10 | - A Anthropic API key 11 | 12 | ## Setup & run 13 | 14 | ### 1. Install dependencies 15 | 16 | ``` 17 | npm install 18 | ``` 19 | 20 | ### 2. Set up `.env` 21 | 22 | 1. Copy `.env.template` to `.env` 23 | 2. Get [E2B API key](https://e2b.dev/docs/getting-started/api-key) 24 | 3. Get [Firecrawl API key](https://firecrawl.dev) 25 | 4. Get [Anthropic API key](https://anthropic.com) 26 | 27 | ### 3. Run the example 28 | 29 | ``` 30 | npm run start 31 | ``` -------------------------------------------------------------------------------- /.github/workflows/deploy-nuq-postgres.yml: -------------------------------------------------------------------------------- 1 | name: Deploy NuQ Postgres to GHCR 2 | 3 | on: 4 | push: 5 | branches: 6 | - main 7 | paths: 8 | - apps/nuq-postgres/** 9 | workflow_dispatch: 10 | 11 | jobs: 12 | push-app-image: 13 | runs-on: ubuntu-latest 14 | defaults: 15 | run: 16 | working-directory: './apps/nuq-postgres' 17 | steps: 18 | - name: 'Checkout GitHub Action' 19 | uses: actions/checkout@main 20 | 21 | - name: 'Login to GitHub Container Registry' 22 | uses: docker/login-action@v3 23 | with: 24 | registry: ghcr.io 25 | username: ${{github.actor}} 26 | password: ${{secrets.GITHUB_TOKEN}} 27 | 28 | - name: 'Build NuQ Postgres Image' 29 | run: | 30 | docker build . --tag ghcr.io/firecrawl/nuq-postgres:latest 31 | docker push ghcr.io/firecrawl/nuq-postgres:latest 32 | -------------------------------------------------------------------------------- /apps/api/native/Cargo.toml: -------------------------------------------------------------------------------- 1 | 2 | [package] 3 | authors = ["Firecrawl"] 4 | edition = "2021" 5 | name = "firecrawl_rs" 6 | version = "0.1.0" 7 | 8 | [lib] 9 | crate-type = ["cdylib"] 10 | 11 | [dependencies] 12 | chrono = { version = "0.4", features = ["serde"] } 13 | kuchikiki = "0.8.2" 14 | lol_html = "2.6.0" 15 | lopdf = "0.38.0" 16 | maud = "0.27.0" 17 | napi = { version = "3.0.0", features = ["serde-json", "tokio_rt"] } 18 | napi-derive = "3.0.0" 19 | nodesig = { git = "https://github.com/firecrawl/nodesig" } 20 | psl = "2.1.140" 21 | regex = "1.11.2" 22 | roxmltree = "0.20.0" 23 | serde = { version = "1.0.219", features = ["derive"] } 24 | serde_json = "1.0.143" 25 | strsim = "0.11" 26 | texting_robots = "0.2.2" 27 | url = "2.5.7" 28 | zip = "5.0.0" 29 | calamine = "0.26" 30 | tokio = "1.48.0" 31 | 32 | [build-dependencies] 33 | napi-build = "2" 34 | 35 | [profile.release] 36 | lto = true 37 | strip = "symbols" 38 | -------------------------------------------------------------------------------- /apps/js-sdk/firecrawl/src/v2/utils/errorHandler.ts: -------------------------------------------------------------------------------- 1 | import { type AxiosError, type AxiosResponse } from "axios"; 2 | import { SdkError } from "../types"; 3 | 4 | export function throwForBadResponse(resp: AxiosResponse, action: string): never { 5 | const status = resp.status; 6 | const body = resp.data || {}; 7 | const msg = body?.error || body?.message || `Request failed (${status}) while trying to ${action}`; 8 | throw new SdkError(msg, status, undefined, body?.details); 9 | } 10 | 11 | export function normalizeAxiosError(err: AxiosError, action: string): never { 12 | const status = err.response?.status; 13 | const body: any = err.response?.data; 14 | const message = body?.error || err.message || `Request failed${status ? ` (${status})` : ""} while trying to ${action}`; 15 | const code = (body?.code as string) || err.code; 16 | throw new SdkError(message, status, code, body?.details ?? body); 17 | } 18 | 19 | -------------------------------------------------------------------------------- /apps/api/tsconfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "compilerOptions": { 3 | "rootDir": "./src", 4 | "lib": ["ES2022", "DOM"], 5 | 6 | // or higher 7 | "target": "ES2022", 8 | 9 | "module": "NodeNext", 10 | "esModuleInterop": true, 11 | "sourceMap": true, 12 | "outDir": "./dist/src", 13 | "moduleResolution": "NodeNext", 14 | "strictNullChecks": true, 15 | 16 | "inlineSources": true, 17 | 18 | "typeRoots": ["./src/types", "./node_modules/@types"], 19 | "paths": { 20 | "x402": ["./src/types/x402"], 21 | "x402/*": ["./src/types/x402"], 22 | "x402-express": ["./src/types/x402"] 23 | } 24 | }, 25 | "include": [ 26 | "src/", 27 | "src/**/*", 28 | "services/db/supabase.ts", 29 | "utils/utils.ts", 30 | "services/db/supabaseEmbeddings.ts", 31 | "utils/EventEmmitter.ts", 32 | "src/services/queue-service.ts", 33 | "src/types/x402.d.ts" 34 | ] 35 | } 36 | -------------------------------------------------------------------------------- /apps/ui/ingestion-ui/tsconfig.app.json: -------------------------------------------------------------------------------- 1 | { 2 | "compilerOptions": { 3 | "composite": true, 4 | "tsBuildInfoFile": "./node_modules/.tmp/tsconfig.app.tsbuildinfo", 5 | "target": "ES2020", 6 | "useDefineForClassFields": true, 7 | "lib": ["ES2020", "DOM", "DOM.Iterable"], 8 | "module": "ESNext", 9 | "skipLibCheck": true, 10 | 11 | /* Bundler mode */ 12 | "moduleResolution": "bundler", 13 | "allowImportingTsExtensions": true, 14 | "resolveJsonModule": true, 15 | "isolatedModules": true, 16 | "moduleDetection": "force", 17 | "noEmit": true, 18 | "jsx": "react-jsx", 19 | 20 | /* Linting */ 21 | "strict": true, 22 | "noUnusedLocals": true, 23 | "noUnusedParameters": true, 24 | "noFallthroughCasesInSwitch": true, 25 | "baseUrl": ".", 26 | "paths": { 27 | "@/*": [ 28 | "./src/*" 29 | ] 30 | } 31 | }, 32 | "include": ["src"] 33 | } 34 | -------------------------------------------------------------------------------- /apps/api/src/controllers/v1/concurrency-check.ts: -------------------------------------------------------------------------------- 1 | import { 2 | ConcurrencyCheckParams, 3 | ConcurrencyCheckResponse, 4 | RequestWithAuth, 5 | } from "./types"; 6 | import { Response } from "express"; 7 | import { getRedisConnection } from "../../../src/services/queue-service"; 8 | 9 | // Basically just middleware and error wrapping 10 | export async function concurrencyCheckController( 11 | req: RequestWithAuth, 12 | res: Response, 13 | ) { 14 | const concurrencyLimiterKey = "concurrency-limiter:" + req.auth.team_id; 15 | const now = Date.now(); 16 | const activeJobsOfTeam = await getRedisConnection().zrangebyscore( 17 | concurrencyLimiterKey, 18 | now, 19 | Infinity, 20 | ); 21 | 22 | return res.status(200).json({ 23 | success: true, 24 | concurrency: activeJobsOfTeam.length, 25 | maxConcurrency: req.acuc?.concurrency ?? 0, 26 | }); 27 | } 28 | -------------------------------------------------------------------------------- /.github/workflows/test-js-sdk.yml: -------------------------------------------------------------------------------- 1 | name: JS SDK Test Suite 2 | 3 | on: 4 | pull_request: 5 | branches: 6 | - main 7 | paths: 8 | - apps/js-sdk/firecrawl/** 9 | 10 | env: 11 | TEST_API_KEY: ${{ secrets.TEST_API_KEY }} 12 | 13 | jobs: 14 | test: 15 | name: Run tests 16 | runs-on: ubuntu-latest 17 | steps: 18 | - uses: actions/checkout@v5 19 | - name: Set up Node.js 20 | uses: actions/setup-node@v4 21 | with: 22 | node-version: "20" 23 | cache: "npm" 24 | cache-dependency-path: './apps/js-sdk/firecrawl/package-lock.json' 25 | - name: Install dependencies 26 | run: npm install 27 | working-directory: ./apps/js-sdk/firecrawl 28 | - name: Build 29 | run: npm run build 30 | working-directory: ./apps/js-sdk/firecrawl 31 | - name: Run tests 32 | run: npm run test 33 | working-directory: ./apps/js-sdk/firecrawl 34 | -------------------------------------------------------------------------------- /apps/api/src/services/notification/notification_string.ts: -------------------------------------------------------------------------------- 1 | import { NotificationType } from "../../types"; 2 | 3 | // depending on the notification type, return the appropriate string 4 | export function getNotificationString( 5 | notificationType: NotificationType, 6 | ): string { 7 | switch (notificationType) { 8 | case NotificationType.APPROACHING_LIMIT: 9 | return "Approaching the limit (80%)"; 10 | case NotificationType.LIMIT_REACHED: 11 | return "Limit reached (100%)"; 12 | case NotificationType.RATE_LIMIT_REACHED: 13 | return "Rate limit reached"; 14 | case NotificationType.AUTO_RECHARGE_SUCCESS: 15 | return "Auto-recharge successful"; 16 | case NotificationType.AUTO_RECHARGE_FAILED: 17 | return "Auto-recharge failed"; 18 | case NotificationType.CONCURRENCY_LIMIT_REACHED: 19 | return "Concurrency limit reached"; 20 | default: 21 | return "Unknown notification type"; 22 | } 23 | } 24 | -------------------------------------------------------------------------------- /apps/python-sdk/firecrawl/__tests__/e2e/v2/test_usage.py: -------------------------------------------------------------------------------- 1 | import os 2 | from dotenv import load_dotenv 3 | from firecrawl import Firecrawl 4 | 5 | load_dotenv() 6 | 7 | 8 | class TestUsageE2E: 9 | def setup_method(self): 10 | # Environment is exported by conftest at import time 11 | self.client = Firecrawl(api_key=os.getenv("API_KEY"), api_url=os.getenv("API_URL")) 12 | 13 | def test_get_concurrency(self): 14 | resp = self.client.get_concurrency() 15 | # Shape assertions (endpoint not live yet, but types are defined) 16 | assert hasattr(resp, "concurrency") 17 | assert hasattr(resp, "max_concurrency") 18 | 19 | def test_get_credit_usage(self): 20 | resp = self.client.get_credit_usage() 21 | assert hasattr(resp, "remaining_credits") 22 | 23 | def test_get_token_usage(self): 24 | resp = self.client.get_token_usage() 25 | assert hasattr(resp, "remaining_tokens") 26 | 27 | -------------------------------------------------------------------------------- /examples/scrape_and_analyze_airbnb_data_e2b/codeInterpreter.ts: -------------------------------------------------------------------------------- 1 | import { CodeInterpreter } from '@e2b/code-interpreter' 2 | 3 | export async function codeInterpret( 4 | codeInterpreter: CodeInterpreter, 5 | code: string 6 | ) { 7 | console.log( 8 | `\n${'='.repeat(50)}\n> Running following AI-generated code:\n${code}\n${'='.repeat(50)}` 9 | ) 10 | 11 | const exec = await codeInterpreter.notebook.execCell(code, { 12 | // You can stream logs from the code interpreter 13 | // onStderr: (stderr: string) => console.log("\n[Code Interpreter stdout]", stderr), 14 | // onStdout: (stdout: string) => console.log("\n[Code Interpreter stderr]", stdout), 15 | // 16 | // You can also stream additional results like charts, images, etc. 17 | // onResult: ... 18 | }) 19 | 20 | if (exec.error) { 21 | console.log('[Code Interpreter error]', exec.error) // Runtime error 22 | return undefined 23 | } 24 | 25 | return exec 26 | } 27 | -------------------------------------------------------------------------------- /apps/ui/ingestion-ui/src/components/ui/input.tsx: -------------------------------------------------------------------------------- 1 | import * as React from "react" 2 | 3 | import { cn } from "@/lib/utils" 4 | 5 | export interface InputProps 6 | extends React.InputHTMLAttributes {} 7 | 8 | const Input = React.forwardRef( 9 | ({ className, type, ...props }, ref) => { 10 | return ( 11 | 20 | ) 21 | } 22 | ) 23 | Input.displayName = "Input" 24 | 25 | export { Input } 26 | -------------------------------------------------------------------------------- /apps/python-sdk/firecrawl/__tests__/unit/v2/methods/aio/test_aio_map_request_preparation.py: -------------------------------------------------------------------------------- 1 | import pytest 2 | from firecrawl.v2.types import MapOptions 3 | from firecrawl.v2.methods.aio.map import _prepare_map_request 4 | 5 | 6 | class TestAsyncMapRequestPreparation: 7 | def test_basic(self): 8 | payload = _prepare_map_request("https://example.com") 9 | assert payload["url"] == "https://example.com" 10 | 11 | def test_fields(self): 12 | opts = MapOptions(search="docs", include_subdomains=True, limit=10, sitemap="only", timeout=15000, integration=" _unit-test ") 13 | payload = _prepare_map_request("https://example.com", opts) 14 | assert payload["search"] == "docs" 15 | assert payload["includeSubdomains"] is True 16 | assert payload["limit"] == 10 17 | assert payload["sitemap"] == "only" 18 | assert payload["timeout"] == 15000 19 | assert payload["integration"] == "_unit-test" 20 | 21 | -------------------------------------------------------------------------------- /apps/api/src/services/webhook/index.ts: -------------------------------------------------------------------------------- 1 | import { logger as _logger } from "../../lib/logger"; 2 | import { getWebhookConfig } from "./config"; 3 | import { WebhookConfig } from "./types"; 4 | import { WebhookSender } from "./delivery"; 5 | 6 | export async function createWebhookSender(params: { 7 | teamId: string; 8 | jobId: string; 9 | webhook?: WebhookConfig; 10 | v0: boolean; 11 | }): Promise { 12 | const config = await getWebhookConfig( 13 | params.teamId, 14 | params.jobId, 15 | params.webhook, 16 | ); 17 | if (!config) { 18 | return null; 19 | } 20 | 21 | return new WebhookSender(config.config, config.secret, { 22 | teamId: params.teamId, 23 | jobId: params.jobId, 24 | v0: params.v0, 25 | }); 26 | } 27 | 28 | export { 29 | getWebhookInsertQueueLength, 30 | processWebhookInsertJobs, 31 | } from "./delivery"; 32 | export { WebhookEvent } from "./types"; 33 | export { shutdownWebhookQueue } from "./queue"; 34 | -------------------------------------------------------------------------------- /apps/nuq-postgres/Dockerfile: -------------------------------------------------------------------------------- 1 | # Build a Postgres image that runs nuq.sql during initdb 2 | 3 | ARG PG_MAJOR=17 4 | FROM postgres:${PG_MAJOR} 5 | 6 | # Install pg_cron for the specified Postgres major version 7 | RUN set -eux; \ 8 | apt-get update; \ 9 | apt-get install -y --no-install-recommends \ 10 | postgresql-${PG_MAJOR}-cron; \ 11 | rm -rf /var/lib/apt/lists/* 12 | 13 | # Ensure pg_cron is preloaded on first startup by modifying the initdb template 14 | # This must be set before the first server start (init scripts run after start) 15 | RUN set -eux; \ 16 | conf_sample="/usr/share/postgresql/${PG_MAJOR}/postgresql.conf.sample"; \ 17 | sed -ri "s/^#?shared_preload_libraries\s*=.*/shared_preload_libraries = 'pg_cron'/" "$conf_sample"; \ 18 | printf "\n# Added for pg_cron\ncron.database_name = 'postgres'\n" >> "$conf_sample" 19 | 20 | # Copy nuq.sql so it is executed as part of the initdb sequence 21 | COPY nuq.sql /docker-entrypoint-initdb.d/010-nuq.sql -------------------------------------------------------------------------------- /examples/blog-articles/scheduling_scrapers/scripts/cron_scraper.py: -------------------------------------------------------------------------------- 1 | # cron_scraper.py 2 | import sys 3 | import logging 4 | from datetime import datetime 5 | from pathlib import Path 6 | from firecrawl_scraper import save_firecrawl_news_data 7 | 8 | # Set up logging 9 | log_dir = Path("logs") 10 | log_dir.mkdir(exist_ok=True) 11 | log_file = log_dir / f"scraper_{datetime.now().strftime('%Y_%m')}.log" 12 | 13 | logging.basicConfig( 14 | level=logging.INFO, 15 | format="%(asctime)s - %(levelname)s - %(message)s", 16 | handlers=[logging.FileHandler(log_file), logging.StreamHandler(sys.stdout)], 17 | ) 18 | 19 | 20 | def main(): 21 | try: 22 | logging.info("Starting scraping job") 23 | filename = save_firecrawl_news_data() 24 | logging.info(f"Successfully saved data to {filename}") 25 | except Exception as e: 26 | logging.error(f"Scraping failed: {str(e)}", exc_info=True) 27 | 28 | 29 | if __name__ == "__main__": 30 | main() 31 | -------------------------------------------------------------------------------- /examples/kubernetes/firecrawl-helm/templates/secret.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: Secret 3 | metadata: 4 | name: {{ include "firecrawl.fullname" . }}-secret 5 | type: Opaque 6 | data: 7 | OPENAI_API_KEY: {{ .Values.secret.OPENAI_API_KEY | b64enc | quote }} 8 | SLACK_WEBHOOK_URL: {{ .Values.secret.SLACK_WEBHOOK_URL | b64enc | quote }} 9 | LLAMAPARSE_API_KEY: {{ .Values.secret.LLAMAPARSE_API_KEY | b64enc | quote }} 10 | BULL_AUTH_KEY: {{ .Values.secret.BULL_AUTH_KEY | b64enc | quote }} 11 | TEST_API_KEY: {{ .Values.secret.TEST_API_KEY | b64enc | quote }} 12 | SCRAPING_BEE_API_KEY: {{ .Values.secret.SCRAPING_BEE_API_KEY | b64enc | quote }} 13 | STRIPE_PRICE_ID_STANDARD: {{ .Values.secret.STRIPE_PRICE_ID_STANDARD | b64enc | quote }} 14 | STRIPE_PRICE_ID_SCALE: {{ .Values.secret.STRIPE_PRICE_ID_SCALE | b64enc | quote }} 15 | FIRE_ENGINE_BETA_URL: {{ .Values.secret.FIRE_ENGINE_BETA_URL | b64enc | quote }} 16 | REDIS_PASSWORD: {{ .Values.secret.REDIS_PASSWORD | b64enc | quote }} 17 | -------------------------------------------------------------------------------- /apps/api/src/scraper/scrapeURL/engines/fire-engine/delete.ts: -------------------------------------------------------------------------------- 1 | import { Logger } from "winston"; 2 | 3 | import { robustFetch } from "../../lib/fetch"; 4 | import { MockState } from "../../lib/mock"; 5 | import { fireEngineStagingURL, fireEngineURL } from "./scrape"; 6 | 7 | export async function fireEngineDelete( 8 | logger: Logger, 9 | jobId: string | undefined, 10 | mock: MockState | null, 11 | abort?: AbortSignal, 12 | production = true, 13 | ) { 14 | // jobId only supplied if we need to defer deletion 15 | if (!jobId) { 16 | logger.debug("Fire Engine job id not supplied, skipping delete"); 17 | return; 18 | } 19 | 20 | await robustFetch({ 21 | url: `${production ? fireEngineURL : fireEngineStagingURL}/scrape/${jobId}`, 22 | method: "DELETE", 23 | headers: {}, 24 | logger: logger.child({ method: "fireEngineDelete/robustFetch", jobId }), 25 | mock, 26 | abort, 27 | }); 28 | 29 | logger.debug("Deleted job from Fire Engine", { jobId }); 30 | } 31 | -------------------------------------------------------------------------------- /.github/workflows/deploy-redis.yml: -------------------------------------------------------------------------------- 1 | name: Deploy Redis to GHCR 2 | 3 | env: 4 | DOTNET_VERSION: '6.0.x' 5 | 6 | on: 7 | push: 8 | branches: 9 | - main 10 | paths: 11 | - apps/redis/** 12 | workflow_dispatch: 13 | 14 | jobs: 15 | push-app-image: 16 | runs-on: ubuntu-latest 17 | defaults: 18 | run: 19 | working-directory: './apps/redis' 20 | steps: 21 | - name: 'Checkout GitHub Action' 22 | uses: actions/checkout@main 23 | 24 | - name: 'Login to GitHub Container Registry' 25 | uses: docker/login-action@v3 26 | with: 27 | registry: ghcr.io 28 | username: ${{github.actor}} 29 | password: ${{secrets.GITHUB_TOKEN}} 30 | 31 | - name: 'Build Inventory Image' 32 | run: | 33 | docker build . --tag ghcr.io/firecrawl/firecrawl-redis:latest 34 | docker push ghcr.io/firecrawl/firecrawl-redis:latest -------------------------------------------------------------------------------- /apps/api/src/services/redlock.ts: -------------------------------------------------------------------------------- 1 | import Redlock from "redlock"; 2 | import { config } from "../config"; 3 | import Client from "ioredis"; 4 | 5 | export const redlock = new Redlock( 6 | // You should have one client for each independent redis node 7 | // or cluster. 8 | [new Client(config.REDIS_RATE_LIMIT_URL!)], 9 | { 10 | // The expected clock drift; for more details see: 11 | // http://redis.io/topics/distlock 12 | driftFactor: 0.01, // multiplied by lock ttl to determine drift time 13 | 14 | retryCount: 200, 15 | 16 | retryDelay: 100, 17 | 18 | // the max time in ms randomly added to retries 19 | // to improve performance under high contention 20 | // see https://www.awsarchitectureblog.com/2015/03/backoff.html 21 | retryJitter: 200, // time in ms 22 | 23 | // The minimum remaining time on a lock before an extension is automatically 24 | // attempted with the `using` API. 25 | automaticExtensionThreshold: 500, // time in ms 26 | }, 27 | ); 28 | -------------------------------------------------------------------------------- /apps/api/native/wasi-worker-browser.mjs: -------------------------------------------------------------------------------- 1 | import { instantiateNapiModuleSync, MessageHandler, WASI } from '@napi-rs/wasm-runtime' 2 | 3 | const handler = new MessageHandler({ 4 | onLoad({ wasmModule, wasmMemory }) { 5 | const wasi = new WASI({ 6 | print: function () { 7 | // eslint-disable-next-line no-console 8 | console.log.apply(console, arguments) 9 | }, 10 | printErr: function() { 11 | // eslint-disable-next-line no-console 12 | console.error.apply(console, arguments) 13 | }, 14 | }) 15 | return instantiateNapiModuleSync(wasmModule, { 16 | childThread: true, 17 | wasi, 18 | overwriteImports(importObject) { 19 | importObject.env = { 20 | ...importObject.env, 21 | ...importObject.napi, 22 | ...importObject.emnapi, 23 | memory: wasmMemory, 24 | } 25 | }, 26 | }) 27 | }, 28 | }) 29 | 30 | globalThis.onmessage = function (e) { 31 | handler.handle(e) 32 | } 33 | -------------------------------------------------------------------------------- /apps/api/src/services/webhook/schema.ts: -------------------------------------------------------------------------------- 1 | import { z } from "zod"; 2 | 3 | const BLACKLISTED_WEBHOOK_HEADERS = ["x-firecrawl-signature"]; 4 | 5 | export const webhookSchema = z.preprocess( 6 | x => (typeof x === "string" ? { url: x } : x), 7 | z 8 | .strictObject({ 9 | url: z.url(), 10 | headers: z.record(z.string(), z.string()).prefault({}), 11 | metadata: z.record(z.string(), z.string()).prefault({}), 12 | events: z 13 | .array(z.enum(["completed", "failed", "page", "started"])) 14 | .prefault(["completed", "failed", "page", "started"]), 15 | }) 16 | .refine( 17 | obj => { 18 | const blacklistedLower = BLACKLISTED_WEBHOOK_HEADERS.map(h => 19 | h.toLowerCase(), 20 | ); 21 | return !Object.keys(obj.headers).some(key => 22 | blacklistedLower.includes(key.toLowerCase()), 23 | ); 24 | }, 25 | `The following headers are not allowed: ${BLACKLISTED_WEBHOOK_HEADERS.join(", ")}`, 26 | ), 27 | ); 28 | -------------------------------------------------------------------------------- /apps/api/utils/find_uncovered_files.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Get all files tracked by git 4 | git ls-files > /tmp/all_files.txt 5 | 6 | # Get files matched by CODEOWNERS 7 | while read -r line; do 8 | # Skip comments and empty lines 9 | [[ "$line" =~ ^#.*$ ]] && continue 10 | [[ -z "$line" ]] && continue 11 | 12 | # Extract the path pattern 13 | pattern=$(echo "$line" | awk '{print $1}') 14 | 15 | # Convert the pattern to a form git understands 16 | # Remove leading slash if present 17 | pattern=${pattern#/} 18 | 19 | # List files matching this pattern 20 | git ls-files "$pattern" 2>/dev/null >> /tmp/covered_files.txt 21 | done < .github/CODEOWNERS 22 | 23 | # Sort and get unique entries 24 | sort -u /tmp/covered_files.txt > /tmp/covered_files_unique.txt 25 | 26 | # Find files that are in all_files but not in covered_files 27 | comm -23 /tmp/all_files.txt /tmp/covered_files_unique.txt 28 | 29 | # Cleanup 30 | rm /tmp/all_files.txt /tmp/covered_files.txt /tmp/covered_files_unique.txt 31 | -------------------------------------------------------------------------------- /apps/api/requests/branding.requests.http: -------------------------------------------------------------------------------- 1 | ### Test 1: Firecrawl, SVG logo, bright colors, light theme 2 | # @name brandingTest1 3 | POST http://localhost:3002/v2/scrape 4 | Authorization: Bearer {{$dotenv TEST_API_KEY}} 5 | Content-Type: application/json 6 | 7 | { 8 | "url": "https://firecrawl.dev", 9 | "formats": [ 10 | { "type": "branding" } 11 | ] 12 | } 13 | 14 | ### Test 2: Supabase, dark mode, dark colors, png logo 15 | # @name brandingTest2 16 | POST http://localhost:3002/v2/scrape 17 | Authorization: Bearer {{$dotenv TEST_API_KEY}} 18 | Content-Type: application/json 19 | 20 | { 21 | "url": "https://supabase.com", 22 | "formats": [ 23 | { "type": "branding" } 24 | ] 25 | } 26 | 27 | ### Test 3: Vercel, light mode, light colors, svg logo 28 | # @name brandingTest3 29 | POST http://localhost:3002/v2/scrape 30 | Authorization: Bearer {{$dotenv TEST_API_KEY}} 31 | Content-Type: application/json 32 | 33 | { 34 | "url": "https://vercel.com", 35 | "formats": [ 36 | { "type": "branding" } 37 | ] 38 | } 39 | -------------------------------------------------------------------------------- /.github/workflows/deploy-playwright.yml: -------------------------------------------------------------------------------- 1 | name: Deploy Playwright to GHCR 2 | 3 | env: 4 | DOTNET_VERSION: '6.0.x' 5 | 6 | on: 7 | push: 8 | branches: 9 | - main 10 | paths: 11 | - apps/playwright-service-ts/** 12 | workflow_dispatch: 13 | 14 | jobs: 15 | push-app-image: 16 | runs-on: ubuntu-latest 17 | defaults: 18 | run: 19 | working-directory: './apps/playwright-service-ts' 20 | steps: 21 | - name: 'Checkout GitHub Action' 22 | uses: actions/checkout@main 23 | 24 | - name: 'Login to GitHub Container Registry' 25 | uses: docker/login-action@v3 26 | with: 27 | registry: ghcr.io 28 | username: ${{github.actor}} 29 | password: ${{secrets.GITHUB_TOKEN}} 30 | 31 | - name: 'Build Inventory Image' 32 | run: | 33 | docker build . --tag ghcr.io/firecrawl/playwright-service:latest 34 | docker push ghcr.io/firecrawl/playwright-service:latest -------------------------------------------------------------------------------- /apps/api/src/controllers/v0/admin/acuc-cache-clear.ts: -------------------------------------------------------------------------------- 1 | import { Request, Response } from "express"; 2 | import { supabase_service } from "../../../services/supabase"; 3 | import { clearACUC, clearACUCTeam } from "../../auth"; 4 | import { logger } from "../../../lib/logger"; 5 | 6 | export async function acucCacheClearController(req: Request, res: Response) { 7 | try { 8 | const team_id: string = req.body.team_id; 9 | 10 | if (!team_id) { 11 | return res.status(400).json({ error: "team_id is required" }); 12 | } 13 | 14 | const keys = await supabase_service 15 | .from("api_keys") 16 | .select("*") 17 | .eq("team_id", team_id); 18 | 19 | await Promise.all((keys.data ?? []).map(x => clearACUC(x.key))); 20 | await clearACUCTeam(team_id); 21 | 22 | logger.info(`ACUC cache cleared for team ${team_id}`); 23 | res.json({ ok: true }); 24 | } catch (error) { 25 | logger.error(`Error clearing ACUC cache via API route: ${error}`); 26 | res.status(500).json({ error: "Internal server error" }); 27 | } 28 | } 29 | -------------------------------------------------------------------------------- /apps/go-html-to-md-service/Dockerfile: -------------------------------------------------------------------------------- 1 | # Build stage 2 | FROM golang:1.23-alpine AS builder 3 | 4 | # Install build dependencies 5 | RUN apk add --no-cache git 6 | 7 | # Set working directory 8 | WORKDIR /app 9 | 10 | # Copy go mod files 11 | COPY go.mod go.sum ./ 12 | 13 | # Download dependencies 14 | RUN go mod download 15 | 16 | # Copy source code 17 | COPY . . 18 | 19 | # Build the application 20 | RUN CGO_ENABLED=0 GOOS=linux go build -a -installsuffix cgo -o html-to-markdown-service . 21 | 22 | # Final stage 23 | FROM alpine:latest 24 | 25 | # Install ca-certificates for HTTPS requests 26 | RUN apk --no-cache add ca-certificates 27 | 28 | WORKDIR /root/ 29 | 30 | # Copy the binary from builder 31 | COPY --from=builder /app/html-to-markdown-service . 32 | 33 | # Expose port 34 | EXPOSE 8080 35 | 36 | # Health check 37 | HEALTHCHECK --interval=30s --timeout=3s --start-period=5s --retries=3 \ 38 | CMD wget --no-verbose --tries=1 --spider http://localhost:8080/health || exit 1 39 | 40 | # Run the application 41 | CMD ["./html-to-markdown-service"] 42 | 43 | -------------------------------------------------------------------------------- /apps/api/src/lib/extract/helpers/deduplicate-objs-array.ts: -------------------------------------------------------------------------------- 1 | export function deduplicateObjectsArray(objArray: { [key: string]: any[] }): { 2 | [key: string]: any[]; 3 | } { 4 | const deduplicatedObjArray: { [key: string]: any[] } = {}; 5 | 6 | for (const key in objArray) { 7 | if (Array.isArray(objArray[key])) { 8 | const seen = new Set(); 9 | deduplicatedObjArray[key] = objArray[key].filter(item => { 10 | // Create a unique identifier for each item based on its properties 11 | const identifier = JSON.stringify(item); 12 | 13 | // Check if this identifier has been seen before 14 | if (seen.has(identifier)) { 15 | return false; // Duplicate found, filter it out 16 | } 17 | 18 | // Add the identifier to the set and keep the item 19 | seen.add(identifier); 20 | return true; 21 | }); 22 | } else { 23 | // If the value is not an array, just copy it as is 24 | deduplicatedObjArray[key] = objArray[key]; 25 | } 26 | } 27 | 28 | return deduplicatedObjArray; 29 | } 30 | -------------------------------------------------------------------------------- /apps/api/src/controllers/v0/admin/index-queue-prometheus.ts: -------------------------------------------------------------------------------- 1 | import type { Request, Response } from "express"; 2 | import { 3 | getIndexInsertQueueLength, 4 | getIndexRFInsertQueueLength, 5 | getOMCEQueueLength, 6 | } from "../../../services"; 7 | import { getWebhookInsertQueueLength } from "../../../services/webhook"; 8 | 9 | export async function indexQueuePrometheus(req: Request, res: Response) { 10 | const queueLength = await getIndexInsertQueueLength(); 11 | const webhookQueueLength = await getWebhookInsertQueueLength(); 12 | const indexRFQueueLength = await getIndexRFInsertQueueLength(); 13 | const omceQueueLength = await getOMCEQueueLength(); 14 | res.setHeader("Content-Type", "text/plain"); 15 | res.send(`\ 16 | # HELP firecrawl_index_queue_length The number of items in the index insert queue 17 | # TYPE firecrawl_index_queue_length gauge 18 | firecrawl_index_queue_length ${queueLength} 19 | firecrawl_webhook_queue_length ${webhookQueueLength} 20 | firecrawl_index_rf_queue_length ${indexRFQueueLength} 21 | firecrawl_omce_queue_length ${omceQueueLength} 22 | `); 23 | } 24 | -------------------------------------------------------------------------------- /apps/api/src/lib/extract/fire-0/helpers/deduplicate-objs-array-f0.ts: -------------------------------------------------------------------------------- 1 | export function deduplicateObjectsArray_F0(objArray: { 2 | [key: string]: any[]; 3 | }): { 4 | [key: string]: any[]; 5 | } { 6 | const deduplicatedObjArray: { [key: string]: any[] } = {}; 7 | 8 | for (const key in objArray) { 9 | if (Array.isArray(objArray[key])) { 10 | const seen = new Set(); 11 | deduplicatedObjArray[key] = objArray[key].filter(item => { 12 | // Create a unique identifier for each item based on its properties 13 | const identifier = JSON.stringify(item); 14 | 15 | // Check if this identifier has been seen before 16 | if (seen.has(identifier)) { 17 | return false; // Duplicate found, filter it out 18 | } 19 | 20 | // Add the identifier to the set and keep the item 21 | seen.add(identifier); 22 | return true; 23 | }); 24 | } else { 25 | // If the value is not an array, just copy it as is 26 | deduplicatedObjArray[key] = objArray[key]; 27 | } 28 | } 29 | 30 | return deduplicatedObjArray; 31 | } 32 | -------------------------------------------------------------------------------- /.github/workflows/publish-js-sdk.yml: -------------------------------------------------------------------------------- 1 | name: Publish JS SDK 2 | 3 | on: 4 | push: 5 | branches: 6 | - main 7 | paths: 8 | - apps/js-sdk/firecrawl/package.json 9 | 10 | env: 11 | TEST_API_KEY: ${{ secrets.TEST_API_KEY }} 12 | 13 | jobs: 14 | publish: 15 | name: Publish 16 | runs-on: ubuntu-latest 17 | steps: 18 | - uses: actions/checkout@v5 19 | - name: Set up Node.js 20 | uses: actions/setup-node@v4 21 | with: 22 | node-version: "20" 23 | - name: Authenticate 24 | run: echo "//registry.npmjs.org/:_authToken=${{ secrets.NPM_TOKEN }}" > ~/.npmrc 25 | - name: Publish 26 | run: | 27 | npm install 28 | npm run build 29 | npm publish --access public 30 | sed -i 's/"name": "@mendable\/firecrawl-js"/"name": "@mendable\/firecrawl"/g' package.json 31 | npm publish --access public 32 | sed -i 's/"name": "@mendable\/firecrawl"/"name": "firecrawl"/g' package.json 33 | npm publish --access public 34 | working-directory: ./apps/js-sdk/firecrawl 35 | -------------------------------------------------------------------------------- /apps/api/src/types/parse-diff.d.ts: -------------------------------------------------------------------------------- 1 | declare module "parse-diff" { 2 | interface NormalChange { 3 | type: "normal"; 4 | normal: true; 5 | ln1: number; 6 | ln2: number; 7 | content: string; 8 | } 9 | 10 | interface AddChange { 11 | type: "add"; 12 | add: true; 13 | ln: number; 14 | content: string; 15 | } 16 | 17 | interface DeleteChange { 18 | type: "del"; 19 | del: true; 20 | ln: number; 21 | content: string; 22 | } 23 | 24 | type Change = NormalChange | AddChange | DeleteChange; 25 | 26 | interface Chunk { 27 | content: string; 28 | changes: Change[]; 29 | oldStart: number; 30 | oldLines: number; 31 | newStart: number; 32 | newLines: number; 33 | } 34 | 35 | interface File { 36 | chunks: Chunk[]; 37 | deletions: number; 38 | additions: number; 39 | from: string | null; 40 | to: string | null; 41 | index?: string[]; 42 | newMode?: string; 43 | oldMode?: string; 44 | binary?: boolean; 45 | } 46 | 47 | function parseDiff(diff: string): File[]; 48 | export = parseDiff; 49 | } 50 | -------------------------------------------------------------------------------- /examples/openai_swarm_firecrawl/README.md: -------------------------------------------------------------------------------- 1 | # Swarm Firecrawl Marketing Agent 2 | 3 | A multi-agent system using [OpenAI Swarm](https://github.com/openai/swarm) for AI-powered marketing strategies using [Firecrawl](https://firecrawl.dev) for web scraping. 4 | 5 | ## Agents 6 | 7 | 1. User Interface: Manages user interactions 8 | 2. Website Scraper: Extracts clean LLM-ready content via Firecrawl API 9 | 3. Analyst: Provides marketing insights 10 | 4. Campaign Idea: Generates marketing campaign concepts 11 | 5. Copywriter: Creates compelling marketing copy 12 | 13 | ## Requirements 14 | 15 | - [Firecrawl](https://firecrawl.dev) API key 16 | - [OpenAI](https://platform.openai.com/api-keys) API key 17 | 18 | ## Setup 19 | 20 | 1. Install the required packages: 21 | ``` 22 | pip install -r requirements.txt 23 | ``` 24 | 25 | 2. Set up your environment variables in a `.env` file: 26 | ``` 27 | OPENAI_API_KEY=your_openai_api_key 28 | FIRECRAWL_API_KEY=your_firecrawl_api_key 29 | ``` 30 | 31 | ## Usage 32 | 33 | Run the main script to start the interactive demo: 34 | 35 | ``` 36 | python main.py 37 | ``` -------------------------------------------------------------------------------- /apps/js-sdk/firecrawl/src/v2/methods/scrape.ts: -------------------------------------------------------------------------------- 1 | import { type Document, type ScrapeOptions } from "../types"; 2 | import { HttpClient } from "../utils/httpClient"; 3 | import { ensureValidScrapeOptions } from "../utils/validation"; 4 | import { throwForBadResponse, normalizeAxiosError } from "../utils/errorHandler"; 5 | 6 | export async function scrape(http: HttpClient, url: string, options?: ScrapeOptions): Promise { 7 | if (!url || !url.trim()) { 8 | throw new Error("URL cannot be empty"); 9 | } 10 | if (options) ensureValidScrapeOptions(options); 11 | 12 | const payload: Record = { url: url.trim() }; 13 | if (options) Object.assign(payload, options); 14 | 15 | try { 16 | const res = await http.post<{ success: boolean; data?: Document; error?: string }>("/v2/scrape", payload); 17 | if (res.status !== 200 || !res.data?.success) { 18 | throwForBadResponse(res, "scrape"); 19 | } 20 | return (res.data.data || {}) as Document; 21 | } catch (err: any) { 22 | if (err?.isAxiosError) return normalizeAxiosError(err, "scrape"); 23 | throw err; 24 | } 25 | } 26 | 27 | -------------------------------------------------------------------------------- /apps/api/src/services/subscription/enterprise-check.ts: -------------------------------------------------------------------------------- 1 | import { supabase_service } from "../supabase"; 2 | 3 | interface SubscriptionResponse { 4 | prices: { 5 | products: { 6 | is_enterprise: boolean; 7 | }; 8 | }; 9 | } 10 | 11 | const RATE_LIMIT_CHANGE_NOTIFICATION_START_DATE = new Date("2025-03-12"); 12 | 13 | export async function isEnterpriseTeamCreatedAfterRateLimitChange( 14 | team_id: string, 15 | ): Promise { 16 | const { data, error } = (await supabase_service 17 | .from("subscriptions") 18 | .select("prices(products(is_enterprise))") 19 | .eq("status", "active") 20 | .eq("team_id", team_id) 21 | .gt( 22 | "created", 23 | RATE_LIMIT_CHANGE_NOTIFICATION_START_DATE.toISOString(), 24 | )) as { 25 | data: SubscriptionResponse[] | null; 26 | error: any; 27 | }; 28 | 29 | if (error || !data) { 30 | // If there's an error or no subscription found, assume non-enterprise 31 | return false; 32 | } 33 | 34 | const isEnterprise = data.find( 35 | sub => sub.prices?.products?.is_enterprise === true, 36 | ); 37 | 38 | return !!isEnterprise; 39 | } 40 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/feature_request.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Feature request 3 | about: Suggest an idea for this project 4 | title: "[Feat] " 5 | labels: '' 6 | assignees: '' 7 | 8 | --- 9 | 10 | **Problem Description** 11 | Describe the issue you're experiencing that has prompted this feature request. For example, "I find it difficult when..." 12 | 13 | **Proposed Feature** 14 | Provide a clear and concise description of the feature you would like implemented. 15 | 16 | **Alternatives Considered** 17 | Discuss any alternative solutions or features you've considered. Why were these alternatives not suitable? 18 | 19 | **Implementation Suggestions** 20 | If you have ideas on how the feature could be implemented, share them here. This could include technical details, API changes, or interaction mechanisms. 21 | 22 | **Use Case** 23 | Explain how this feature would be used and what benefits it would bring. Include specific examples to illustrate how this would improve functionality or user experience. 24 | 25 | **Additional Context** 26 | Add any other context such as comparisons with similar features in other products, or links to prototypes or mockups. 27 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .DS_Store 2 | /node_modules/ 3 | /dist/ 4 | .env 5 | *.csv 6 | dump.rdb 7 | /mongo-data 8 | apps/js-sdk/node_modules/ 9 | 10 | apps/api/.env.local 11 | 12 | apps/test-suite/node_modules/ 13 | 14 | 15 | apps/test-suite/.env 16 | apps/test-suite/logs 17 | apps/test-suite/load-test-results/test-run-report.json 18 | 19 | apps/playwright-service-ts/node_modules/ 20 | apps/playwright-service-ts/package-lock.json 21 | 22 | 23 | /examples/o1_web_crawler/venv 24 | *.pyc 25 | .rdb 26 | 27 | apps/js-sdk/firecrawl/dist 28 | 29 | /examples/o1_web_crawler/firecrawl_env 30 | /examples/crm_lead_enrichment/crm_lead_enrichment_env 31 | /.venv 32 | /examples/claude_web_crawler/firecrawl_env 33 | /examples/haiku_web_crawler/firecrawl_env 34 | /examples/sonnet_web_crawler/firecrawl_env 35 | /examples/internal_link_assitant/firecrawl_env 36 | 37 | /apps/api/logs/* 38 | /apps/api/debug/* 39 | 40 | .vscode 41 | llm-links.txt 42 | mapped-links.txt 43 | gke-key.json 44 | 45 | CLAUDE.local.md 46 | 47 | *.egg-info/ 48 | # local SDK venv 49 | apps/python-sdk/.venv/ 50 | 51 | /apps/api/running-docs/ 52 | 53 | 54 | /apps/go-html-to-md-service/.gomodcache/ 55 | target/ -------------------------------------------------------------------------------- /examples/o3-web-crawler/.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | *.egg-info/ 24 | .installed.cfg 25 | *.egg 26 | MANIFEST 27 | 28 | # PyInstaller 29 | # Usually these files are written by a python script from a template 30 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 31 | *.manifest 32 | *.spec 33 | 34 | # Installer logs 35 | pip-log.txt 36 | pip-delete-this-directory.txt 37 | 38 | # Unit test / coverage reports 39 | htmlcov/ 40 | .tox/ 41 | .nox/ 42 | .coverage 43 | .coverage.* 44 | .cache 45 | nosetests.xml 46 | coverage.xml 47 | *.cover 48 | .hypothesis/ 49 | .pytest_cache/ 50 | 51 | # Environments 52 | .env 53 | .venv 54 | env/ 55 | venv/ 56 | ENV/ 57 | env.bak/ 58 | venv.bak/ 59 | 60 | # VS Code settings 61 | .vscode/ 62 | 63 | # Jupyter Notebook 64 | .ipynb_checkpoints 65 | 66 | # MacOS 67 | .DS_Store 68 | 69 | # Project specific 70 | *.log -------------------------------------------------------------------------------- /apps/js-sdk/LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2024 Sideguide Technologies Inc. 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /apps/python-sdk/LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2024 Sideguide Technologies Inc. 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /apps/playwright-service-ts/README.md: -------------------------------------------------------------------------------- 1 | # Playwright Scrape API 2 | 3 | This is a simple web scraping service built with Express and Playwright. 4 | 5 | ## Features 6 | 7 | - Scrapes HTML content from specified URLs. 8 | - Blocks requests to known ad-serving domains. 9 | - Blocks media files to reduce bandwidth usage. 10 | - Uses random user-agent strings to avoid detection. 11 | - Strategy to ensure the page is fully rendered. 12 | 13 | ## Install 14 | ```bash 15 | npm install 16 | npx playwright install 17 | ``` 18 | 19 | ## RUN 20 | ```bash 21 | npm run build 22 | npm start 23 | ``` 24 | OR 25 | ```bash 26 | npm run dev 27 | ``` 28 | 29 | ## USE 30 | 31 | ```bash 32 | curl -X POST http://localhost:3000/scrape \ 33 | -H "Content-Type: application/json" \ 34 | -d '{ 35 | "url": "https://example.com", 36 | "wait_after_load": 1000, 37 | "timeout": 15000, 38 | "headers": { 39 | "Custom-Header": "value" 40 | }, 41 | "check_selector": "#content" 42 | }' 43 | ``` 44 | 45 | ## USING WITH FIRECRAWL 46 | 47 | Add `PLAYWRIGHT_MICROSERVICE_URL=http://localhost:3003/scrape` to `/apps/api/.env` to configure the API to use this Playwright microservice for scraping operations. 48 | -------------------------------------------------------------------------------- /apps/ui/ingestion-ui/LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2024 Sideguide Technologies Inc. 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /apps/api/src/services/idempotency/validate.ts: -------------------------------------------------------------------------------- 1 | import { Request } from "express"; 2 | import { supabase_rr_service, supabase_service } from "../supabase"; 3 | import { validate as isUuid } from "uuid"; 4 | import { logger } from "../../../src/lib/logger"; 5 | 6 | export async function validateIdempotencyKey(req: Request): Promise { 7 | const idempotencyKey = req.headers["x-idempotency-key"]; 8 | if (!idempotencyKey) { 9 | // // not returning for missing idempotency key for now 10 | return true; 11 | } 12 | // Ensure idempotencyKey is treated as a string 13 | const key = Array.isArray(idempotencyKey) 14 | ? idempotencyKey[0] 15 | : idempotencyKey; 16 | if (!isUuid(key)) { 17 | logger.debug("Invalid idempotency key provided in the request headers."); 18 | return false; 19 | } 20 | 21 | const { data, error } = await supabase_rr_service 22 | .from("idempotency_keys") 23 | .select("key") 24 | .eq("key", idempotencyKey); 25 | 26 | if (error) { 27 | logger.error(`Error validating idempotency key: ${error}`); 28 | } 29 | 30 | if (!data || data.length === 0) { 31 | return true; 32 | } 33 | 34 | return false; 35 | } 36 | -------------------------------------------------------------------------------- /apps/js-sdk/firecrawl/LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2024 Sideguide Technologies Inc. 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /apps/api/native/src/document/providers/factory.rs: -------------------------------------------------------------------------------- 1 | use super::docx::DocxProvider; 2 | use super::odt::OdtProvider; 3 | use super::rtf::RtfProvider; 4 | use super::DocumentProvider; 5 | use super::xlsx::XlsxProvider; 6 | use napi_derive::napi; 7 | 8 | #[napi] 9 | #[derive(Debug, Clone, PartialEq, Eq)] 10 | pub enum DocumentType { 11 | Docx, 12 | Rtf, 13 | Odt, 14 | Xlsx, 15 | } 16 | 17 | pub struct ProviderFactory { 18 | docx_provider: DocxProvider, 19 | rtf_provider: RtfProvider, 20 | odt_provider: OdtProvider, 21 | xlsx_provider: XlsxProvider, 22 | } 23 | 24 | impl ProviderFactory { 25 | pub fn new() -> Self { 26 | Self { 27 | docx_provider: DocxProvider::new(), 28 | rtf_provider: RtfProvider::new(), 29 | odt_provider: OdtProvider::new(), 30 | xlsx_provider: XlsxProvider::new(), 31 | } 32 | } 33 | 34 | pub fn get_provider(&self, doc_type: DocumentType) -> &dyn DocumentProvider { 35 | match doc_type { 36 | DocumentType::Docx => &self.docx_provider, 37 | DocumentType::Rtf => &self.rtf_provider, 38 | DocumentType::Odt => &self.odt_provider, 39 | DocumentType::Xlsx => &self.xlsx_provider, 40 | } 41 | } 42 | } 43 | -------------------------------------------------------------------------------- /apps/api/src/lib/permissions.ts: -------------------------------------------------------------------------------- 1 | import { TeamFlags } from "../controllers/v2/types"; 2 | 3 | type LocationOptions = { country?: string }; 4 | 5 | interface APIRequest { 6 | zeroDataRetention?: boolean; 7 | location?: LocationOptions; 8 | scrapeOptions?: { 9 | location?: LocationOptions; 10 | }; 11 | } 12 | 13 | const SUPPORT_EMAIL = "support@firecrawl.com"; 14 | 15 | export function checkPermissions( 16 | request: APIRequest, 17 | flags?: TeamFlags, 18 | ): { error?: string } { 19 | // zdr perms 20 | if (request.zeroDataRetention && !flags?.allowZDR) { 21 | return { 22 | error: `Zero Data Retention (ZDR) is not enabled for your team. Contact ${SUPPORT_EMAIL} to enable this feature.`, 23 | }; 24 | } 25 | 26 | // ip whitelist perms 27 | const needsWhitelist = 28 | request.location?.country === "us-whitelist" || 29 | request.scrapeOptions?.location?.country === "us-whitelist"; 30 | 31 | if (needsWhitelist && !flags?.ipWhitelist) { 32 | return { 33 | error: `Static IP addresses are not enabled for your team. Contact ${SUPPORT_EMAIL} to get a dedicated set of IP addresses you can whitelist.`, 34 | }; 35 | } 36 | 37 | return {}; 38 | } 39 | -------------------------------------------------------------------------------- /apps/ui/ingestion-ui/src/components/ui/checkbox.tsx: -------------------------------------------------------------------------------- 1 | import * as React from "react" 2 | import * as CheckboxPrimitive from "@radix-ui/react-checkbox" 3 | import { Check } from "lucide-react" 4 | 5 | import { cn } from "@/lib/utils" 6 | 7 | const Checkbox = React.forwardRef< 8 | React.ElementRef, 9 | React.ComponentPropsWithoutRef 10 | >(({ className, ...props }, ref) => ( 11 | 19 | 22 | 23 | 24 | 25 | )) 26 | Checkbox.displayName = CheckboxPrimitive.Root.displayName 27 | 28 | export { Checkbox } 29 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/bug_report.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Bug report 3 | about: Create a report to help us improve 4 | title: "[Bug] " 5 | labels: bug 6 | assignees: '' 7 | 8 | --- 9 | 10 | **Describe the Bug** 11 | Provide a clear and concise description of what the bug is. 12 | 13 | **To Reproduce** 14 | Steps to reproduce the issue: 15 | 1. Configure the environment or settings with '...' 16 | 2. Run the command '...' 17 | 3. Observe the error or unexpected output at '...' 18 | 4. Log output/error message 19 | 20 | **Expected Behavior** 21 | A clear and concise description of what you expected to happen. 22 | 23 | **Screenshots** 24 | If applicable, add screenshots or copies of the command line output to help explain the issue. 25 | 26 | **Environment (please complete the following information):** 27 | - OS: [e.g. macOS, Linux, Windows] 28 | - Deployment Type: [Cloud (firecrawl.dev) / Self-hosted] 29 | - Firecrawl Version: [e.g. 1.2.3] 30 | - Node.js Version: [e.g. 14.x] 31 | 32 | **Logs** 33 | If applicable, include detailed logs to help understand the problem. 34 | 35 | **Additional Context** 36 | Add any other context about the problem here, such as configuration specifics, network conditions, data volumes, etc. 37 | -------------------------------------------------------------------------------- /apps/api/src/types/x402.d.ts: -------------------------------------------------------------------------------- 1 | declare module "x402" { 2 | export * from "x402/dist/cjs/types/index"; 3 | } 4 | 5 | declare module "x402/dist/cjs/types/index" { 6 | // Type definitions 7 | export interface ChainConfig { 8 | [key: string]: any; 9 | } 10 | 11 | export interface ConnectedClient { 12 | [key: string]: any; 13 | } 14 | 15 | export interface SignerWallet { 16 | [key: string]: any; 17 | } 18 | 19 | // Value exports (constants) 20 | export const index_ChainConfig: any; 21 | export const index_ConnectedClient: any; 22 | export const index_SignerWallet: any; 23 | 24 | // Re-export everything else that might be in the module 25 | const _default: any; 26 | export default _default; 27 | } 28 | 29 | // Provide shims for internal type imports used by @coinbase/x402 30 | declare module "x402/types" { 31 | export interface FacilitatorConfig { 32 | [key: string]: any; 33 | } 34 | } 35 | 36 | declare module "x402/verify" { 37 | export type CreateHeaders = any; 38 | } 39 | 40 | declare module "x402-express" { 41 | export function paymentMiddleware(...args: any[]): any; 42 | 43 | // Export any other functions or types that might be used 44 | export * from "x402-express"; 45 | } 46 | -------------------------------------------------------------------------------- /apps/rust-sdk/examples/cancel_crawl_example.rs: -------------------------------------------------------------------------------- 1 | use firecrawl::FirecrawlApp; 2 | use std::error::Error; 3 | use std::time::Duration; 4 | 5 | #[tokio::main] 6 | async fn main() -> Result<(), Box> { 7 | // Get API URL from environment 8 | let api_url = std::env::var("FIRECRAWL_API_URL") 9 | .expect("Please set the FIRECRAWL_API_URL environment variable"); 10 | 11 | // Create the FirecrawlApp instance 12 | let firecrawl = FirecrawlApp::new_selfhosted(api_url, None::<&str>)?; 13 | 14 | // Start a crawl job 15 | println!("Starting a crawl job..."); 16 | let crawl_response = firecrawl 17 | .crawl_url_async("https://example.com", None) 18 | .await?; 19 | println!("Crawl job started with ID: {}", crawl_response.id); 20 | 21 | // Wait for a moment to let the crawl job start 22 | println!("Waiting for a moment..."); 23 | tokio::time::sleep(Duration::from_secs(2)).await; 24 | 25 | // Cancel the crawl job 26 | println!("Cancelling the crawl job..."); 27 | let cancel_response = firecrawl.cancel_crawl(&crawl_response.id).await?; 28 | 29 | println!("Cancellation result:"); 30 | println!(" Status: {:?}", cancel_response.status); 31 | 32 | Ok(()) 33 | } 34 | -------------------------------------------------------------------------------- /examples/kubernetes/firecrawl-helm/templates/worker-deployment.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: apps/v1 2 | kind: Deployment 3 | metadata: 4 | name: {{ include "firecrawl.fullname" . }}-worker 5 | labels: 6 | app: {{ include "firecrawl.name" . }}-worker 7 | spec: 8 | replicas: {{ .Values.replicaCount }} 9 | selector: 10 | matchLabels: 11 | app: {{ include "firecrawl.name" . }}-worker 12 | template: 13 | metadata: 14 | labels: 15 | app: {{ include "firecrawl.name" . }}-worker 16 | spec: 17 | {{- if .Values.image.dockerSecretEnabled }} 18 | imagePullSecrets: 19 | {{- toYaml .Values.imagePullSecrets | nindent 8 }} 20 | {{- end }} 21 | containers: 22 | - name: worker 23 | image: "{{ .Values.image.repository }}:{{ .Values.image.tag }}" 24 | imagePullPolicy: {{ .Values.image.pullPolicy }} 25 | args: [ "pnpm", "run", "workers" ] 26 | env: 27 | - name: FLY_PROCESS_GROUP 28 | value: "worker" 29 | envFrom: 30 | - configMapRef: 31 | name: {{ include "firecrawl.fullname" . }}-config 32 | - secretRef: 33 | name: {{ include "firecrawl.fullname" . }}-secret 34 | -------------------------------------------------------------------------------- /apps/api/src/services/rate-limiter.ts: -------------------------------------------------------------------------------- 1 | import { RateLimiterRedis } from "rate-limiter-flexible"; 2 | import { config } from "../config"; 3 | import { RateLimiterMode } from "../types"; 4 | import Redis from "ioredis"; 5 | import type { AuthCreditUsageChunk } from "../controllers/v1/types"; 6 | 7 | export const redisRateLimitClient = new Redis(config.REDIS_RATE_LIMIT_URL!, { 8 | enableAutoPipelining: true, 9 | }); 10 | 11 | const createRateLimiter = (keyPrefix, points) => 12 | new RateLimiterRedis({ 13 | storeClient: redisRateLimitClient, 14 | keyPrefix, 15 | points, 16 | duration: 60, // Duration in seconds 17 | }); 18 | 19 | const fallbackRateLimits: AuthCreditUsageChunk["rate_limits"] = { 20 | crawl: 15, 21 | scrape: 100, 22 | search: 100, 23 | map: 100, 24 | extract: 100, 25 | preview: 25, 26 | extractStatus: 25000, 27 | crawlStatus: 25000, 28 | extractAgentPreview: 10, 29 | scrapeAgentPreview: 10, 30 | }; 31 | 32 | export function getRateLimiter( 33 | mode: RateLimiterMode, 34 | rate_limits: AuthCreditUsageChunk["rate_limits"] | null, 35 | ): RateLimiterRedis { 36 | return createRateLimiter( 37 | `${mode}`, 38 | rate_limits?.[mode] ?? fallbackRateLimits?.[mode] ?? 500, 39 | ); 40 | } 41 | -------------------------------------------------------------------------------- /examples/kubernetes/cluster-install/redis.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: apps/v1 2 | kind: Deployment 3 | metadata: 4 | name: redis 5 | spec: 6 | replicas: 1 7 | selector: 8 | matchLabels: 9 | app: redis 10 | template: 11 | metadata: 12 | labels: 13 | app: redis 14 | spec: 15 | containers: 16 | - name: redis 17 | image: redis:alpine 18 | command: [ "/bin/sh", "-c" ] # Run a shell script as entrypoint 19 | args: 20 | - | 21 | if [ -n "$REDIS_PASSWORD" ]; then 22 | echo "Starting Redis with authentication" 23 | exec redis-server --bind 0.0.0.0 --requirepass "$REDIS_PASSWORD" 24 | else 25 | echo "Starting Redis without authentication" 26 | exec redis-server --bind 0.0.0.0 27 | fi 28 | env: 29 | - name: REDIS_PASSWORD 30 | valueFrom: 31 | secretKeyRef: 32 | name: firecrawl-secret 33 | key: REDIS_PASSWORD 34 | --- 35 | apiVersion: v1 36 | kind: Service 37 | metadata: 38 | name: redis 39 | spec: 40 | selector: 41 | app: redis 42 | ports: 43 | - protocol: TCP 44 | port: 6379 45 | targetPort: 6379 46 | -------------------------------------------------------------------------------- /apps/api/src/controllers/v1/crawl-ongoing.ts: -------------------------------------------------------------------------------- 1 | import { Response } from "express"; 2 | import { 3 | OngoingCrawlsResponse, 4 | RequestWithAuth, 5 | toNewCrawlerOptions, 6 | } from "./types"; 7 | import { getCrawl } from "../../lib/crawl-redis"; 8 | import { configDotenv } from "dotenv"; 9 | import { crawlGroup } from "../../services/worker/nuq"; 10 | configDotenv(); 11 | 12 | export async function ongoingCrawlsController( 13 | req: RequestWithAuth<{}, undefined, OngoingCrawlsResponse>, 14 | res: Response, 15 | ) { 16 | const ids = (await crawlGroup.getOngoingByOwner(req.auth.team_id)).map( 17 | x => x.id, 18 | ); 19 | 20 | const crawls = ( 21 | await Promise.all(ids.map(async id => ({ ...(await getCrawl(id)), id }))) 22 | ).filter(crawl => crawl !== null && !crawl.cancelled && crawl.crawlerOptions); 23 | 24 | res.status(200).json({ 25 | success: true, 26 | crawls: crawls.map(x => ({ 27 | id: x.id, 28 | teamId: x.team_id!, 29 | url: x.originUrl!, 30 | created_at: new Date(x.createdAt || Date.now()).toISOString(), 31 | options: { 32 | ...toNewCrawlerOptions(x.crawlerOptions), 33 | scrapeOptions: x.scrapeOptions, 34 | }, 35 | })), 36 | }); 37 | } 38 | -------------------------------------------------------------------------------- /apps/api/src/controllers/v2/crawl-ongoing.ts: -------------------------------------------------------------------------------- 1 | import { Response } from "express"; 2 | import { 3 | OngoingCrawlsResponse, 4 | RequestWithAuth, 5 | toV2CrawlerOptions, 6 | } from "./types"; 7 | import { getCrawl } from "../../lib/crawl-redis"; 8 | import { configDotenv } from "dotenv"; 9 | import { crawlGroup } from "../../services/worker/nuq"; 10 | configDotenv(); 11 | 12 | export async function ongoingCrawlsController( 13 | req: RequestWithAuth<{}, undefined, OngoingCrawlsResponse>, 14 | res: Response, 15 | ) { 16 | const ids = (await crawlGroup.getOngoingByOwner(req.auth.team_id)).map( 17 | x => x.id, 18 | ); 19 | 20 | const crawls = ( 21 | await Promise.all(ids.map(async id => ({ ...(await getCrawl(id)), id }))) 22 | ).filter(crawl => crawl !== null && !crawl.cancelled && crawl.crawlerOptions); 23 | 24 | res.status(200).json({ 25 | success: true, 26 | crawls: crawls.map(x => ({ 27 | id: x.id, 28 | teamId: x.team_id!, 29 | url: x.originUrl!, 30 | created_at: new Date(x.createdAt || Date.now()).toISOString(), 31 | options: { 32 | ...toV2CrawlerOptions(x.crawlerOptions), 33 | scrapeOptions: x.scrapeOptions, 34 | }, 35 | })), 36 | }); 37 | } 38 | -------------------------------------------------------------------------------- /apps/api/src/controllers/v2/credit-usage.ts: -------------------------------------------------------------------------------- 1 | import { Response } from "express"; 2 | import { ErrorResponse, RequestWithAuth } from "./types"; 3 | import { getACUCTeam } from "../auth"; 4 | import { RateLimiterMode } from "../../types"; 5 | 6 | interface CreditUsageResponse { 7 | success: true; 8 | data: { 9 | remainingCredits: number; 10 | planCredits: number; 11 | billingPeriodStart: string | null; 12 | billingPeriodEnd: string | null; 13 | }; 14 | } 15 | 16 | export async function creditUsageController( 17 | req: RequestWithAuth, 18 | res: Response, 19 | ): Promise { 20 | const chunk = 21 | req.acuc ?? 22 | (await getACUCTeam(req.auth.team_id, false, false, RateLimiterMode.Scrape)); 23 | 24 | if (!chunk) { 25 | res.status(404).json({ 26 | success: false, 27 | error: "Could not find credit usage information", 28 | }); 29 | return; 30 | } 31 | 32 | res.json({ 33 | success: true, 34 | data: { 35 | remainingCredits: chunk.remaining_credits, 36 | planCredits: chunk.price_credits, 37 | billingPeriodStart: chunk.sub_current_period_start, 38 | billingPeriodEnd: chunk.sub_current_period_end, 39 | }, 40 | }); 41 | } 42 | -------------------------------------------------------------------------------- /examples/kubernetes/firecrawl-helm/templates/redis-deployment.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: apps/v1 2 | kind: Deployment 3 | metadata: 4 | name: {{ include "firecrawl.fullname" . }}-redis 5 | labels: 6 | app: {{ include "firecrawl.name" . }}-redis 7 | spec: 8 | replicas: {{ .Values.redis.replicaCount }} 9 | selector: 10 | matchLabels: 11 | app: {{ include "firecrawl.name" . }}-redis 12 | template: 13 | metadata: 14 | labels: 15 | app: {{ include "firecrawl.name" . }}-redis 16 | spec: 17 | containers: 18 | - name: redis 19 | image: {{ .Values.redis.image }} 20 | command: [ "/bin/sh", "-c" ] 21 | args: 22 | - | 23 | if [ -n "$REDIS_PASSWORD" ]; then 24 | echo "Starting Redis with authentication" 25 | exec redis-server --bind 0.0.0.0 --requirepass "$REDIS_PASSWORD" 26 | else 27 | echo "Starting Redis without authentication" 28 | exec redis-server --bind 0.0.0.0 29 | fi 30 | env: 31 | - name: REDIS_PASSWORD 32 | valueFrom: 33 | secretKeyRef: 34 | name: {{ include "firecrawl.fullname" . }}-secret 35 | key: REDIS_PASSWORD 36 | -------------------------------------------------------------------------------- /apps/api/src/controllers/v1/credit-usage.ts: -------------------------------------------------------------------------------- 1 | import { Response } from "express"; 2 | import { ErrorResponse, RequestWithAuth } from "./types"; 3 | import { getACUCTeam } from "../auth"; 4 | import { RateLimiterMode } from "../../types"; 5 | 6 | interface CreditUsageResponse { 7 | success: true; 8 | data: { 9 | remaining_credits: number; 10 | plan_credits: number; 11 | billing_period_start: string | null; 12 | billing_period_end: string | null; 13 | }; 14 | } 15 | 16 | export async function creditUsageController( 17 | req: RequestWithAuth, 18 | res: Response, 19 | ): Promise { 20 | const chunk = 21 | req.acuc ?? 22 | (await getACUCTeam(req.auth.team_id, false, false, RateLimiterMode.Scrape)); 23 | if (!chunk) { 24 | res.status(404).json({ 25 | success: false, 26 | error: "Could not find credit usage information", 27 | }); 28 | return; 29 | } 30 | 31 | res.json({ 32 | success: true, 33 | data: { 34 | remaining_credits: chunk.remaining_credits, 35 | plan_credits: chunk.price_credits, 36 | billing_period_start: chunk.sub_current_period_start, 37 | billing_period_end: chunk.sub_current_period_end, 38 | }, 39 | }); 40 | } 41 | -------------------------------------------------------------------------------- /apps/api/src/routes/v0.ts: -------------------------------------------------------------------------------- 1 | import express from "express"; 2 | import { crawlController } from "../../src/controllers/v0/crawl"; 3 | import { crawlStatusController } from "../../src/controllers/v0/crawl-status"; 4 | import { scrapeController } from "../../src/controllers/v0/scrape"; 5 | import { searchController } from "../../src/controllers/v0/search"; 6 | import { crawlCancelController } from "../../src/controllers/v0/crawl-cancel"; 7 | import { keyAuthController } from "../../src/controllers/v0/keyAuth"; 8 | import { livenessController } from "../controllers/v0/liveness"; 9 | import { readinessController } from "../controllers/v0/readiness"; 10 | 11 | export const v0Router = express.Router(); 12 | 13 | v0Router.post("/v0/scrape", scrapeController); 14 | v0Router.post("/v0/crawl", crawlController); 15 | v0Router.get("/v0/crawl/status/:jobId", crawlStatusController); 16 | v0Router.delete("/v0/crawl/cancel/:jobId", crawlCancelController); 17 | 18 | // Auth route for key based authentication 19 | v0Router.get("/v0/keyAuth", keyAuthController); 20 | 21 | // Search routes 22 | v0Router.post("/v0/search", searchController); 23 | 24 | // Health/Probe routes 25 | v0Router.get("/v0/health/liveness", livenessController); 26 | v0Router.get("/v0/health/readiness", readinessController); 27 | -------------------------------------------------------------------------------- /apps/api/src/scraper/scrapeURL/transformers/uploadScreenshot.ts: -------------------------------------------------------------------------------- 1 | // This file is an exception to the "no supabase in scrapeURL" rule, 2 | // and it makes me sad. - mogery 3 | 4 | import { supabase_service } from "../../../services/supabase"; 5 | import { config } from "../../../config"; 6 | import { Meta } from ".."; 7 | import { Document } from "../../../controllers/v1/types"; 8 | 9 | export function uploadScreenshot(meta: Meta, document: Document): Document { 10 | if ( 11 | config.USE_DB_AUTHENTICATION && 12 | document.screenshot !== undefined && 13 | document.screenshot.startsWith("data:") 14 | ) { 15 | meta.logger.debug("Uploading screenshot to Supabase..."); 16 | 17 | const fileName = `screenshot-${crypto.randomUUID()}.png`; 18 | 19 | supabase_service.storage 20 | .from("media") 21 | .upload( 22 | fileName, 23 | Buffer.from(document.screenshot.split(",")[1], "base64"), 24 | { 25 | cacheControl: "3600", 26 | upsert: false, 27 | contentType: document.screenshot.split(":")[1].split(";")[0], 28 | }, 29 | ); 30 | 31 | document.screenshot = `https://service.firecrawl.dev/storage/v1/object/public/media/${encodeURIComponent(fileName)}`; 32 | } 33 | 34 | return document; 35 | } 36 | -------------------------------------------------------------------------------- /.github/archive/publish-rust-sdk.yml: -------------------------------------------------------------------------------- 1 | name: Publish Rust SDK 2 | 3 | on: [] 4 | 5 | env: 6 | CRATES_IO_TOKEN: ${{ secrets.CRATES_IO_TOKEN }} 7 | 8 | jobs: 9 | build-and-publish: 10 | runs-on: ubuntu-latest 11 | 12 | steps: 13 | - name: Checkout repository 14 | uses: actions/checkout@v3 15 | 16 | - name: Set up Rust 17 | uses: actions-rs/toolchain@v1 18 | with: 19 | toolchain: stable 20 | default: true 21 | profile: minimal 22 | 23 | - name: Install dependencies 24 | run: cargo build --release 25 | 26 | - name: Run version check script 27 | id: version_check_script 28 | run: | 29 | VERSION_INCREMENTED=$(cargo search --limit 1 my_crate_name | grep my_crate_name) 30 | echo "VERSION_INCREMENTED=$VERSION_INCREMENTED" >> $GITHUB_ENV 31 | 32 | - name: Build the package 33 | if: ${{ env.VERSION_INCREMENTED == 'true' }} 34 | run: cargo package 35 | working-directory: ./apps/rust-sdk 36 | 37 | - name: Publish to crates.io 38 | if: ${{ env.VERSION_INCREMENTED == 'true' }} 39 | env: 40 | CARGO_REG_TOKEN: ${{ secrets.CRATES_IO_TOKEN }} 41 | run: cargo publish 42 | working-directory: ./apps/rust-sdk -------------------------------------------------------------------------------- /apps/api/src/controllers/v2/token-usage.ts: -------------------------------------------------------------------------------- 1 | import { Response } from "express"; 2 | import { ErrorResponse, RequestWithAuth } from "./types"; 3 | import { getACUCTeam } from "../auth"; 4 | import { RateLimiterMode } from "../../types"; 5 | 6 | interface TokenUsageResponse { 7 | success: true; 8 | data: { 9 | remainingTokens: number; 10 | planTokens: number; 11 | billingPeriodStart: string | null; 12 | billingPeriodEnd: string | null; 13 | }; 14 | } 15 | 16 | export async function tokenUsageController( 17 | req: RequestWithAuth, 18 | res: Response, 19 | ): Promise { 20 | const chunk = 21 | req.acuc ?? 22 | (await getACUCTeam( 23 | req.auth.team_id, 24 | false, 25 | false, 26 | RateLimiterMode.Extract, 27 | )); 28 | 29 | if (!chunk) { 30 | res.status(404).json({ 31 | success: false, 32 | error: "Could not find token usage information", 33 | }); 34 | return; 35 | } 36 | 37 | res.json({ 38 | success: true, 39 | data: { 40 | remainingTokens: chunk.remaining_credits, 41 | planTokens: chunk.price_credits, 42 | billingPeriodStart: chunk.sub_current_period_start, 43 | billingPeriodEnd: chunk.sub_current_period_end, 44 | }, 45 | }); 46 | } 47 | -------------------------------------------------------------------------------- /.github/workflows/deploy-image.yml: -------------------------------------------------------------------------------- 1 | name: Deploy Images to GHCR 2 | 3 | env: 4 | DOTNET_VERSION: '6.0.x' 5 | 6 | on: 7 | push: 8 | branches: 9 | - main 10 | paths: 11 | - apps/api/** 12 | workflow_dispatch: 13 | 14 | jobs: 15 | push-app-image: 16 | runs-on: ubuntu-latest 17 | defaults: 18 | run: 19 | working-directory: './apps/api' 20 | steps: 21 | - name: 'Checkout GitHub Action' 22 | uses: actions/checkout@main 23 | 24 | - name: 'Set up Docker Buildx' 25 | uses: docker/setup-buildx-action@v3 26 | 27 | - name: 'Login to GitHub Container Registry' 28 | uses: docker/login-action@v3 29 | with: 30 | registry: ghcr.io 31 | username: ${{github.actor}} 32 | password: ${{secrets.GITHUB_TOKEN}} 33 | 34 | - name: 'Build and Push Image' 35 | uses: docker/build-push-action@v6 36 | with: 37 | context: ./apps/api 38 | push: true 39 | tags: ghcr.io/firecrawl/firecrawl:latest 40 | cache-from: type=registry,ref=ghcr.io/firecrawl/firecrawl:buildcache 41 | cache-to: type=registry,ref=ghcr.io/firecrawl/firecrawl:buildcache,mode=max -------------------------------------------------------------------------------- /apps/api/src/controllers/v1/token-usage.ts: -------------------------------------------------------------------------------- 1 | import { Response } from "express"; 2 | import { ErrorResponse, RequestWithAuth } from "./types"; 3 | import { getACUCTeam } from "../auth"; 4 | import { RateLimiterMode } from "../../types"; 5 | 6 | interface TokenUsageResponse { 7 | success: true; 8 | data: { 9 | remaining_tokens: number; 10 | plan_tokens: number; 11 | billing_period_start: string | null; 12 | billing_period_end: string | null; 13 | }; 14 | } 15 | 16 | export async function tokenUsageController( 17 | req: RequestWithAuth, 18 | res: Response, 19 | ): Promise { 20 | const chunk = 21 | req.acuc ?? 22 | (await getACUCTeam( 23 | req.auth.team_id, 24 | false, 25 | false, 26 | RateLimiterMode.Extract, 27 | )); 28 | if (!chunk) { 29 | res.status(404).json({ 30 | success: false, 31 | error: "Could not find token usage information", 32 | }); 33 | return; 34 | } 35 | 36 | res.json({ 37 | success: true, 38 | data: { 39 | remaining_tokens: chunk.remaining_credits, 40 | plan_tokens: chunk.price_credits, 41 | billing_period_start: chunk.sub_current_period_start, 42 | billing_period_end: chunk.sub_current_period_end, 43 | }, 44 | }); 45 | } 46 | -------------------------------------------------------------------------------- /.github/workflows/eval-prod.yml: -------------------------------------------------------------------------------- 1 | name: Run Eval Benchmark Prod 2 | 3 | env: 4 | EVAL_API_URL: ${{ secrets.EVAL_API_URL }} 5 | EVAL_API_KEY: ${{ secrets.EVAL_API_KEY }} 6 | EVAL_EXPERIMENT_ID: ${{ secrets.EVAL_BENCHMARK_EXPERIMENT_ID }} 7 | 8 | on: 9 | workflow_run: 10 | workflows: ["Deploy Images to GHCR"] 11 | types: 12 | - completed 13 | branches: 14 | - main 15 | workflow_dispatch: 16 | 17 | jobs: 18 | run-eval-benchmark-prod: 19 | runs-on: ubuntu-latest 20 | if: ${{ github.event.workflow_run.conclusion == 'success' || github.event_name == 'workflow_dispatch' }} 21 | steps: 22 | - name: Checkout repository 23 | uses: actions/checkout@v5 24 | 25 | - name: 'Install dependencies' 26 | run: | 27 | python -m pip install --upgrade pip 28 | pip install requests 29 | 30 | # make sure the image is deployed before running the eval benchmark 31 | - name: Wait for 2 minutes 32 | run: sleep 120 33 | 34 | - name: 'Run Eval Benchmark Prod' 35 | run: | 36 | python .github/scripts/eval_run.py --label prod.${{ github.sha }} --api-url ${{ env.EVAL_API_URL }} --api-key ${{ env.EVAL_API_KEY }} --experiment-id ${{ env.EVAL_EXPERIMENT_ID }} 37 | -------------------------------------------------------------------------------- /apps/api/native/src/document/mod.rs: -------------------------------------------------------------------------------- 1 | pub mod model; 2 | pub mod providers; 3 | pub mod renderers; 4 | 5 | pub use providers::factory::DocumentType; 6 | 7 | use crate::document::model::Document; 8 | use crate::document::providers::factory::ProviderFactory; 9 | use crate::document::renderers::html::HtmlRenderer; 10 | use napi::bindgen_prelude::*; 11 | use napi_derive::napi; 12 | 13 | #[napi] 14 | pub struct DocumentConverter { 15 | factory: ProviderFactory, 16 | html_renderer: HtmlRenderer, 17 | } 18 | 19 | impl Default for DocumentConverter { 20 | fn default() -> Self { 21 | Self::new() 22 | } 23 | } 24 | 25 | #[napi] 26 | impl DocumentConverter { 27 | #[napi(constructor)] 28 | pub fn new() -> Self { 29 | Self { 30 | factory: ProviderFactory::new(), 31 | html_renderer: HtmlRenderer::new(), 32 | } 33 | } 34 | 35 | #[napi] 36 | pub fn convert_buffer_to_html( 37 | &self, 38 | data: &[u8], 39 | doc_type: DocumentType, 40 | ) -> napi::Result { 41 | let provider = self.factory.get_provider(doc_type); 42 | 43 | let document: Document = provider 44 | .parse_buffer(data) 45 | .map_err(|e| Error::new(Status::GenericFailure, format!("Provider error: {e}")))?; 46 | 47 | let html = self.html_renderer.render(&document); 48 | Ok(html) 49 | } 50 | } 51 | -------------------------------------------------------------------------------- /apps/api/src/scraper/scrapeURL/README.md: -------------------------------------------------------------------------------- 1 | # `scrapeURL` 2 | New URL scraper for Firecrawl 3 | 4 | ## Signal flow 5 | ```mermaid 6 | flowchart TD; 7 | scrapeURL-.->buildFallbackList; 8 | buildFallbackList-.->scrapeURLWithEngine; 9 | scrapeURLWithEngine-.->parseMarkdown; 10 | parseMarkdown-.->wasScrapeSuccessful{{Was scrape successful?}}; 11 | wasScrapeSuccessful-."No".->areEnginesLeft{{Are there engines left to try?}}; 12 | areEnginesLeft-."Yes, try next engine".->scrapeURLWithEngine; 13 | areEnginesLeft-."No".->NoEnginesLeftError[/NoEnginesLeftError/] 14 | wasScrapeSuccessful-."Yes".->asd; 15 | ``` 16 | 17 | ## Differences from `WebScraperDataProvider` 18 | - The job of `WebScraperDataProvider.validateInitialUrl` has been delegated to the zod layer above `scrapeUrl`. 19 | - `WebScraperDataProvider.mode` has no equivalent, only `scrape_url` is supported. 20 | - You may no longer specify multiple URLs. 21 | - Built on `v1` definitons, instead of `v0`. 22 | - PDFs are now converted straight to markdown using LlamaParse, instead of converting to just plaintext. 23 | - DOCXs are now converted straight to HTML (and then later to markdown) using mammoth, instead of converting to just plaintext. 24 | - Using new JSON Schema OpenAI API -- schema fails with LLM Extract will be basically non-existant. 25 | -------------------------------------------------------------------------------- /.github/scripts/eval_run.py: -------------------------------------------------------------------------------- 1 | import requests 2 | import argparse 3 | import sys 4 | import os 5 | 6 | def main(): 7 | parser = argparse.ArgumentParser(description='Run evaluation benchmark') 8 | parser.add_argument('--label', required=True, help='Label for the evaluation run') 9 | parser.add_argument('--api-url', required=True, help='API URL') 10 | parser.add_argument('--api-key', required=True, help='API key') 11 | parser.add_argument('--experiment-id', required=True, help='Experiment ID') 12 | 13 | args = parser.parse_args() 14 | 15 | try: 16 | response = requests.post( 17 | f"{args.api_url}/run", 18 | json={ 19 | "experiment_id": args.experiment_id, 20 | "api_key": args.api_key, 21 | "label": args.label 22 | }, 23 | headers={ 24 | "Content-Type": "application/json" 25 | } 26 | ) 27 | 28 | response.raise_for_status() 29 | 30 | print("Evaluation run started successfully") 31 | print(f"Response: {response.json()}") 32 | 33 | except requests.exceptions.RequestException as e: 34 | print(f"Error running evaluation: {str(e)}", file=sys.stderr) 35 | sys.exit(1) 36 | 37 | if __name__ == "__main__": 38 | main() -------------------------------------------------------------------------------- /apps/api/src/controllers/v1/generate-llmstxt-status.ts: -------------------------------------------------------------------------------- 1 | import { Response } from "express"; 2 | import { RequestWithAuth } from "./types"; 3 | import { 4 | getGeneratedLlmsTxt, 5 | getGeneratedLlmsTxtExpiry, 6 | } from "../../lib/generate-llmstxt/generate-llmstxt-redis"; 7 | 8 | export async function generateLLMsTextStatusController( 9 | req: RequestWithAuth<{ jobId: string }, any, any>, 10 | res: Response, 11 | ) { 12 | const generation = await getGeneratedLlmsTxt(req.params.jobId); 13 | const showFullText = generation?.showFullText ?? false; 14 | 15 | if (!generation) { 16 | return res.status(404).json({ 17 | success: false, 18 | error: "llmsTxt generation job not found", 19 | }); 20 | } 21 | 22 | let data: any = null; 23 | 24 | if (showFullText) { 25 | data = { 26 | llmstxt: generation.generatedText, 27 | llmsfulltxt: generation.fullText, 28 | }; 29 | } else { 30 | data = { 31 | llmstxt: generation.generatedText, 32 | }; 33 | } 34 | 35 | return res.status(200).json({ 36 | success: generation.status === "failed" ? false : true, 37 | 38 | data: data, 39 | status: generation.status, 40 | error: generation?.error ?? undefined, 41 | expiresAt: ( 42 | await getGeneratedLlmsTxtExpiry(req.params.jobId) 43 | ).toISOString(), 44 | }); 45 | } 46 | -------------------------------------------------------------------------------- /apps/api/utils/urldump.js: -------------------------------------------------------------------------------- 1 | require("dotenv").config(); 2 | 3 | //const baseUrl = "https://api.firecrawl.dev"; 4 | const baseUrl = "http://localhost:3002"; 5 | const crawlId = process.argv[2]; 6 | 7 | (async () => { 8 | let url = baseUrl + "/v1/crawl/" + crawlId; 9 | let urls = []; 10 | 11 | while (url) { 12 | let res; 13 | 14 | while (true) { 15 | try { 16 | res = (await (await fetch(url, { 17 | headers: { 18 | "Authorization": "Bearer " + process.env.TEST_API_KEY 19 | } 20 | })).json()); 21 | break; 22 | } catch (e) { 23 | console.error(e); 24 | } 25 | } 26 | 27 | console.log(res.data.length); 28 | if (res.data.length === 0) { 29 | break; 30 | } 31 | 32 | urls.push(...res.data.map(x => x.metadata.url ?? x.metadata.sourceURL)); 33 | 34 | url = res.next; 35 | if (url !== undefined) { 36 | const o = new URL(url) 37 | o.protocol = new URL(baseUrl).protocol; 38 | url = o.href; 39 | } 40 | } 41 | 42 | await require("fs/promises").writeFile(crawlId + "-urls.txt", urls.map(x => x.split("://").slice(1).join("://")).sort().join("\n")); 43 | })(); -------------------------------------------------------------------------------- /.github/dependabot.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | updates: 3 | # playwright-service 4 | - package-ecosystem: "pip" 5 | directory: "/apps/playwright-service" 6 | schedule: 7 | interval: "weekly" 8 | open-pull-requests-limit: 0 # Disable version updates 9 | commit-message: 10 | prefix: "apps/playwright-service" 11 | include: "scope" 12 | 13 | # python-sdk 14 | - package-ecosystem: "pip" 15 | directory: "/apps/python-sdk" 16 | schedule: 17 | interval: "weekly" 18 | open-pull-requests-limit: 0 # Disable version updates 19 | commit-message: 20 | prefix: "apps/python-sdk" 21 | include: "scope" 22 | 23 | # api 24 | - package-ecosystem: "npm" 25 | directory: "/apps/api" 26 | schedule: 27 | interval: "weekly" 28 | open-pull-requests-limit: 0 # Disable version updates 29 | commit-message: 30 | prefix: "apps/api" 31 | include: "scope" 32 | 33 | # test-suite 34 | - package-ecosystem: "npm" 35 | directory: "/apps/test-suite" 36 | schedule: 37 | interval: "weekly" 38 | open-pull-requests-limit: 0 # Disable version updates 39 | commit-message: 40 | prefix: "apps/test-suite" 41 | include: "scope" 42 | 43 | # GitHub Actions 44 | - package-ecosystem: "github-actions" 45 | directory: "/" 46 | schedule: 47 | interval: "weekly" 48 | -------------------------------------------------------------------------------- /.github/workflows/deploy-go-service.yaml: -------------------------------------------------------------------------------- 1 | name: Deploy Go Service to GHCR 2 | 3 | env: 4 | DOTNET_VERSION: "6.0.x" 5 | 6 | on: 7 | push: 8 | branches: 9 | - main 10 | paths: 11 | - apps/go-html-to-md-service/** 12 | - .github/workflows/deploy-go-service.yaml 13 | workflow_dispatch: 14 | 15 | jobs: 16 | push-app-image: 17 | runs-on: ubuntu-latest 18 | defaults: 19 | run: 20 | working-directory: "./apps/go-html-to-md-service" 21 | steps: 22 | - name: "Checkout GitHub Action" 23 | uses: actions/checkout@main 24 | 25 | - name: "Set up Docker Buildx" 26 | uses: docker/setup-buildx-action@v3 27 | 28 | - name: "Login to GitHub Container Registry" 29 | uses: docker/login-action@v3 30 | with: 31 | registry: ghcr.io 32 | username: ${{github.actor}} 33 | password: ${{secrets.GITHUB_TOKEN}} 34 | 35 | - name: "Build and Push Image" 36 | uses: docker/build-push-action@v6 37 | with: 38 | context: ./apps/go-html-to-md-service 39 | push: true 40 | tags: ghcr.io/firecrawl/go-html-to-md-service:latest 41 | cache-from: type=registry,ref=ghcr.io/firecrawl/go-html-to-md-service:buildcache 42 | cache-to: type=registry,ref=ghcr.io/firecrawl/go-html-to-md-service:buildcache,mode=max 43 | -------------------------------------------------------------------------------- /apps/js-sdk/example_watcher.ts: -------------------------------------------------------------------------------- 1 | /* 2 | Advanced watcher example using the v2 FirecrawlClient. 3 | 4 | Run with: 5 | node --env-file=.env -r esbuild-register apps/js-sdk/firecrawl/src/examples/watcher.ts 6 | or compile with your bundler, ensuring FIRECRAWL_API_KEY is set. 7 | */ 8 | 9 | import { FirecrawlClient } from "./firecrawl/src/v2/client"; 10 | 11 | async function main() { 12 | const apiKey = process.env.FIRECRAWL_API_KEY || "fc-YOUR_API_KEY"; 13 | const client = new FirecrawlClient({ apiKey }); 14 | 15 | // Start a crawl and attach a watcher for real-time updates 16 | const start = await client.startCrawl("https://example.com", { limit: 5 }); 17 | console.log("Started crawl:", start.id); 18 | 19 | const watcher = client.watcher(start.id, { kind: "crawl", pollInterval: 2, timeout: 120 }); 20 | 21 | watcher.on("document", (doc) => { 22 | console.log("document:", (doc as any).url || (doc as any).metadata?.sourceURL || ""); 23 | }); 24 | 25 | watcher.on("snapshot", (snap) => { 26 | console.log(`status: ${snap.status} (${snap.completed}/${snap.total})`); 27 | }); 28 | 29 | watcher.on("done", (finalState) => { 30 | console.log("done:", finalState.status, "docs:", finalState.data?.length ?? 0); 31 | }); 32 | 33 | await watcher.start(); 34 | } 35 | 36 | main().catch((e) => { 37 | console.error(e); 38 | process.exit(1); 39 | }); 40 | 41 | -------------------------------------------------------------------------------- /apps/api/requests/v2/scrape.requests.http: -------------------------------------------------------------------------------- 1 | # Pick your baseUrl here: 2 | @baseUrl = http://localhost:3002 3 | # @baseUrl = https://api.firecrawl.dev 4 | 5 | ### Summary 6 | POST {{baseUrl}}/v2/scrape HTTP/1.1 7 | Authorization: Bearer {{$dotenv TEST_API_KEY}} 8 | content-type: application/json 9 | 10 | { 11 | "url": "https://docs.firecrawl.dev", 12 | "formats": ["summary"] 13 | } 14 | 15 | ### JSON 16 | POST {{baseUrl}}/v2/scrape HTTP/1.1 17 | Authorization: Bearer {{$dotenv TEST_API_KEY}} 18 | content-type: application/json 19 | 20 | { 21 | "url": "https://docs.firecrawl.dev", 22 | "formats": [{ 23 | "type": "json", 24 | "schema": { 25 | "type": "object", 26 | "properties": { 27 | "name": { 28 | "type": "string" 29 | } 30 | } 31 | } 32 | }] 33 | } 34 | 35 | ### Change Tracking 36 | POST {{baseUrl}}/v2/scrape HTTP/1.1 37 | Authorization: Bearer {{$dotenv TEST_API_KEY}} 38 | content-type: application/json 39 | 40 | { 41 | "url": "https://docs.firecrawl.dev", 42 | "formats": [{ 43 | "type": "changeTracking", 44 | "modes": ["git-diff"] 45 | }] 46 | } 47 | 48 | ### Parsers 49 | POST {{baseUrl}}/v2/scrape HTTP/1.1 50 | Authorization: Bearer {{$dotenv TEST_API_KEY}} 51 | content-type: application/json 52 | 53 | { 54 | "url": "https://www.orimi.com/pdf-test.pdf", 55 | "parsers": { 56 | "pdf": false 57 | } 58 | } 59 | -------------------------------------------------------------------------------- /apps/python-sdk/firecrawl/__tests__/unit/v2/methods/aio/test_batch_request_preparation_async.py: -------------------------------------------------------------------------------- 1 | from firecrawl.v2.types import ScrapeOptions, Location 2 | from firecrawl.v2.methods.aio.batch import _prepare as _prepare_batch 3 | 4 | 5 | class TestAsyncBatchRequestPreparation: 6 | def test_urls_validation_and_conversion(self): 7 | payload = _prepare_batch(["https://example.com", "http://foo.bar"], options=None) 8 | assert payload["urls"] == ["https://example.com", "http://foo.bar"] 9 | 10 | def test_options_and_batch_fields(self): 11 | opts = ScrapeOptions(formats=["markdown"], only_main_content=True) 12 | payload = _prepare_batch( 13 | ["https://example.com"], 14 | options=opts, 15 | webhook="https://hook.example", 16 | append_to_id="00000000-0000-0000-0000-000000000000", 17 | ignore_invalid_urls=True, 18 | max_concurrency=3, 19 | zero_data_retention=True, 20 | integration="zapier", 21 | ) 22 | assert payload["webhook"] == "https://hook.example" 23 | assert payload["appendToId"] == "00000000-0000-0000-0000-000000000000" 24 | assert payload["ignoreInvalidURLs"] is True 25 | assert payload["maxConcurrency"] == 3 26 | assert payload["zeroDataRetention"] is True 27 | assert payload["integration"] == "zapier" 28 | 29 | -------------------------------------------------------------------------------- /apps/rust-sdk/Cargo.toml: -------------------------------------------------------------------------------- 1 | [package] 2 | name = "firecrawl" 3 | author= "Mendable.ai" 4 | version = "1.2.2" 5 | edition = "2021" 6 | license = "MIT" 7 | homepage = "https://www.firecrawl.dev/" 8 | repository ="https://github.com/firecrawl/firecrawl" 9 | description = "Rust SDK for Firecrawl API." 10 | authors = ["Gergő Móricz ", "sanix-darker ", "kkharji "] 11 | 12 | [lib] 13 | path = "src/lib.rs" 14 | name = "firecrawl" 15 | 16 | # See more keys and their definitions at https://doc.rust-lang.org/cargo/reference/manifest.html 17 | [dependencies] 18 | reqwest = { version = "0.12.22", features = ["json", "blocking"] } 19 | serde = { version = "^1.0", features = ["derive"] } 20 | serde_json = "^1.0" 21 | serde_with = "^3.9" 22 | log = "^0.4" 23 | thiserror = "^1.0" 24 | uuid = { version = "^1.10", features = ["v4"] } 25 | tokio = { version = "^1", features = ["full"] } 26 | futures = "0.3.31" 27 | schemars = "0.8.22" 28 | tracing = ">=0.1.0,<0.2.0" 29 | 30 | [dev-dependencies] 31 | clippy = "^0.0.302" 32 | assert_matches = "^1.5" 33 | dotenvy = "^0.15" 34 | tokio = { version = "1", features = ["full"] } 35 | mockito = "1.7.0" 36 | clap = { version ="4.5.35", features = ["derive"] } 37 | axum = { version = "0.8.3", features = ["tokio", "macros"] } 38 | bat = "0.25.0" 39 | 40 | [build-dependencies] 41 | tokio = { version = "1", features = ["full"] } 42 | -------------------------------------------------------------------------------- /examples/kubernetes/cluster-install/README.md: -------------------------------------------------------------------------------- 1 | # Install Firecrawl on a Kubernetes Cluster (Simple Version) 2 | # Before installing 3 | 1. Set [secret.yaml](secret.yaml) and [configmap.yaml](configmap.yaml) and do not check in secrets 4 | - **Note**: If `REDIS_PASSWORD` is configured in the secret, please modify the ConfigMap to reflect the following format for `REDIS_URL` and `REDIS_RATE_LIMIT_URL`: 5 | ```yaml 6 | REDIS_URL: "redis://:password@host:port" 7 | REDIS_RATE_LIMIT_URL: "redis://:password@host:port" 8 | ``` 9 | Replace `password`, `host`, and `port` with the appropriate values. 10 | 11 | ## Install 12 | ```bash 13 | kubectl apply -f configmap.yaml 14 | kubectl apply -f secret.yaml 15 | kubectl apply -f playwright-service.yaml 16 | kubectl apply -f api.yaml 17 | kubectl apply -f worker.yaml 18 | kubectl apply -f nuq-worker.yaml 19 | kubectl apply -f nuq-postgres.yaml 20 | kubectl apply -f redis.yaml 21 | ``` 22 | 23 | 24 | # Port Forwarding for Testing 25 | ```bash 26 | kubectl port-forward svc/api 3002:3002 -n dev 27 | ``` 28 | 29 | # Delete Firecrawl 30 | ```bash 31 | kubectl delete -f configmap.yaml 32 | kubectl delete -f secret.yaml 33 | kubectl delete -f playwright-service.yaml 34 | kubectl delete -f api.yaml 35 | kubectl delete -f worker.yaml 36 | kubectl delete -f nuq-worker.yaml 37 | kubectl delete -f nuq-postgres.yaml 38 | kubectl delete -f redis.yaml 39 | ``` 40 | -------------------------------------------------------------------------------- /examples/kubernetes/cluster-install/playwright-service.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: v1 2 | kind: ConfigMap 3 | metadata: 4 | name: playwright-service-config 5 | data: 6 | PORT: "3000" 7 | --- 8 | apiVersion: apps/v1 9 | kind: Deployment 10 | metadata: 11 | name: playwright-service 12 | spec: 13 | replicas: 1 14 | selector: 15 | matchLabels: 16 | app: playwright-service 17 | template: 18 | metadata: 19 | labels: 20 | app: playwright-service 21 | spec: 22 | imagePullSecrets: 23 | - name: docker-registry-secret 24 | containers: 25 | - name: playwright-service 26 | image: ghcr.io/firecrawl/playwright-service:latest 27 | imagePullPolicy: Always 28 | ports: 29 | - containerPort: 3000 30 | envFrom: 31 | - configMapRef: 32 | name: playwright-service-config 33 | livenessProbe: 34 | httpGet: 35 | path: /health 36 | port: 3000 37 | initialDelaySeconds: 30 38 | periodSeconds: 30 39 | timeoutSeconds: 5 40 | successThreshold: 1 41 | failureThreshold: 3 42 | --- 43 | apiVersion: v1 44 | kind: Service 45 | metadata: 46 | name: playwright-service 47 | spec: 48 | selector: 49 | app: playwright-service 50 | ports: 51 | - protocol: TCP 52 | port: 3000 53 | targetPort: 3000 54 | -------------------------------------------------------------------------------- /apps/ui/ingestion-ui/src/App.tsx: -------------------------------------------------------------------------------- 1 | import { useState } from "react"; 2 | import FirecrawlComponent from "./components/ingestion"; 3 | import FirecrawlComponentV1 from "./components/ingestionV1"; 4 | import { RadioGroup, RadioGroupItem } from "@/components/ui/radio-group"; 5 | import { Label } from "@/components/ui/label"; 6 | 7 | function App() { 8 | const [selectedComponent, setSelectedComponent] = useState<"v0" | "v1">("v1"); 9 | 10 | return ( 11 | <> 12 |
13 | setSelectedComponent(value as "v0" | "v1")} 16 | className="flex space-x-6 mt-6" 17 | > 18 |
19 | 20 | 21 |
22 |
23 | 24 | 25 |
26 |
27 |
28 | {selectedComponent === "v1" ? ( 29 | 30 | ) : ( 31 | 32 | )} 33 | 34 | ); 35 | } 36 | 37 | export default App; 38 | -------------------------------------------------------------------------------- /examples/kubernetes/cluster-install/nuq-postgres.yaml: -------------------------------------------------------------------------------- 1 | apiVersion: apps/v1 2 | kind: Deployment 3 | metadata: 4 | name: nuq-postgres 5 | spec: 6 | replicas: 1 7 | selector: 8 | matchLabels: 9 | app: nuq-postgres 10 | template: 11 | metadata: 12 | labels: 13 | app: nuq-postgres 14 | spec: 15 | containers: 16 | - name: nuq-postgres 17 | image: ghcr.io/firecrawl/nuq-postgres:latest 18 | imagePullPolicy: Always 19 | env: 20 | - name: POSTGRES_USER 21 | value: "postgres" 22 | - name: POSTGRES_PASSWORD 23 | value: "password" 24 | - name: POSTGRES_DB 25 | value: "postgres" 26 | ports: 27 | - containerPort: 5432 28 | volumeMounts: 29 | - name: postgres-storage 30 | mountPath: /var/lib/postgresql/data 31 | resources: 32 | requests: 33 | memory: "512Mi" 34 | cpu: "250m" 35 | limits: 36 | memory: "1Gi" 37 | cpu: "500m" 38 | volumes: 39 | - name: postgres-storage 40 | emptyDir: {} 41 | --- 42 | apiVersion: v1 43 | kind: Service 44 | metadata: 45 | name: nuq-postgres 46 | spec: 47 | selector: 48 | app: nuq-postgres 49 | ports: 50 | - protocol: TCP 51 | port: 5432 52 | targetPort: 5432 53 | type: ClusterIP 54 | -------------------------------------------------------------------------------- /apps/python-sdk/firecrawl/__tests__/e2e/v2/aio/test_aio_extract.py: -------------------------------------------------------------------------------- 1 | import os 2 | import pytest 3 | from dotenv import load_dotenv 4 | from firecrawl import AsyncFirecrawl 5 | 6 | 7 | load_dotenv() 8 | 9 | if not os.getenv("API_KEY"): 10 | raise ValueError("API_KEY is not set") 11 | 12 | if not os.getenv("API_URL"): 13 | raise ValueError("API_URL is not set") 14 | 15 | 16 | @pytest.mark.asyncio 17 | async def test_async_extract_minimal(): 18 | client = AsyncFirecrawl(api_key=os.getenv("API_KEY"), api_url=os.getenv("API_URL")) 19 | res = await client.extract(urls=["https://docs.firecrawl.dev"], prompt="Extract title") 20 | assert res is not None 21 | 22 | 23 | @pytest.mark.asyncio 24 | async def test_async_extract_with_schema_and_options(): 25 | client = AsyncFirecrawl(api_key=os.getenv("API_KEY"), api_url=os.getenv("API_URL")) 26 | schema = {"type": "object", "properties": {"title": {"type": "string"}}, "required": ["title"]} 27 | res = await client.extract( 28 | urls=["https://docs.firecrawl.dev"], 29 | prompt="Extract title", 30 | schema=schema, 31 | system_prompt="You are a helpful extractor", 32 | allow_external_links=False, 33 | enable_web_search=False, 34 | show_sources=False, 35 | integration="_e2e-test", 36 | # agent={"model": "FIRE-1", "prompt": "Extract title"}, # Skipping agent test in CI 37 | ) 38 | assert res is not None 39 | 40 | --------------------------------------------------------------------------------