├── .gitignore ├── README.md ├── app ├── (home) │ ├── constants.ts │ ├── data-mock.ts │ ├── layout.tsx │ └── page.tsx ├── [...slug] │ └── route.ts ├── api │ ├── map │ │ └── route.ts │ └── service │ │ └── route.ts ├── favicon.ico ├── fonts │ ├── GeistMonoVF.woff │ └── GeistVF.woff ├── globals.css └── layout.tsx ├── components.json ├── components ├── theme-provider.tsx └── ui │ ├── button.tsx │ ├── credenza.tsx │ ├── dialog.tsx │ ├── drawer.tsx │ ├── input.tsx │ ├── label.tsx │ ├── popover.tsx │ ├── scroll-area.tsx │ ├── switch.tsx │ ├── toast.tsx │ └── toaster.tsx ├── hooks ├── use-media-query.ts └── use-toast.ts ├── lib └── utils.ts ├── next.config.ts ├── package-lock.json ├── package.json ├── pnpm-lock.yaml ├── postcss.config.mjs ├── public ├── file.svg ├── globe.svg ├── next.svg ├── vercel.svg └── window.svg ├── tailwind.config.ts └── tsconfig.json /.gitignore: -------------------------------------------------------------------------------- 1 | # See https://help.github.com/articles/ignoring-files/ for more about ignoring files. 2 | 3 | # dependencies 4 | /node_modules 5 | /.pnp 6 | .pnp.* 7 | .yarn/* 8 | !.yarn/patches 9 | !.yarn/plugins 10 | !.yarn/releases 11 | !.yarn/versions 12 | 13 | # testing 14 | /coverage 15 | 16 | # next.js 17 | /.next/ 18 | /out/ 19 | 20 | # production 21 | /build 22 | 23 | # misc 24 | .DS_Store 25 | *.pem 26 | 27 | # debug 28 | npm-debug.log* 29 | yarn-debug.log* 30 | yarn-error.log* 31 | 32 | # env files (can opt-in for committing if needed) 33 | .env* 34 | 35 | # vercel 36 | .vercel 37 | 38 | # typescript 39 | *.tsbuildinfo 40 | next-env.d.ts 41 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # llms.txt Generator 🚀 2 | 3 | Generate consolidated text files from websites for LLM training and inference. Powered by [@firecrawl_dev](https://twitter.com/firecrawl_dev) for web crawling and GPT-4-mini for text processing. 4 | 5 | ## Features 6 | - Crawls websites and combines content into a single text file 7 | - Generates both standard (`llms.txt`) and full (`llms-full.txt`) versions 8 | - Web interface and API access available 9 | - No API key required for basic usage 10 | 11 | ## Usage 12 | 13 | ### Web Interface 14 | Visit [llmstxt.firecrawl.dev](https://llmstxt.firecrawl.dev) to generate files through the browser. 15 | 16 | ### API Endpoint 17 | ``` 18 | GET https://llmstxt.firecrawl.dev/[YOUR_URL_HERE] 19 | ``` 20 | 21 | Note: Processing may take several minutes due to crawling and LLM operations. 22 | 23 | ## Local Development 24 | 25 | ### Prerequisites 26 | Create a `.env` file with the following variables: 27 | ``` 28 | FIRECRAWL_API_KEY= 29 | SUPABASE_URL= 30 | SUPABASE_KEY= 31 | OPENAI_API_KEY= 32 | ``` 33 | 34 | ### Installation 35 | ```bash 36 | npm install 37 | npm run dev 38 | ``` 39 | -------------------------------------------------------------------------------- /app/(home)/constants.ts: -------------------------------------------------------------------------------- 1 | import { cn } from "@/lib/utils"; 2 | 3 | export const CN_SMOOTH_BORDER = cn("border-[.75px] border-border"); 4 | export const CN_SMOOTH_SHADOW = cn( 5 | "[box-shadow:0_0_0_1px_#e3e1de66,0_1px_2px_#5f4a2e14,0_4px_6px_#5f4a2e0a,0_40px_40px_-24px_#684b2514] dark:[box-shadow:none]", 6 | ); -------------------------------------------------------------------------------- /app/(home)/data-mock.ts: -------------------------------------------------------------------------------- 1 | export const dataMock = { 2 | "apiMessage": "*Note: This is llmstxt.txt is not complete, please enter a Firecrawl API key to get the entire llmstxt.txt at llmstxt.firecrawl.dev or you can access llms.txt via API with curl -X GET 'http://llmstxt.firecrawl.dev/firecrawl.dev?FIRECRAWL_API_KEY=YOUR_API_KEY' or llms-full.txt via API with curl -X GET 'http://llmstxt.firecrawl.dev/firecrawl.dev/full?FIRECRAWL_API_KEY=YOUR_API_KEY'\n\n# firecrawl.dev llms.txt\n\n- [llms.txt Generator](https://llmstxt.firecrawl.dev/): Generate text using the llms.txt Generator or API.\n- [Data Extraction Playground](https://www.firecrawl.dev/playground): Web data extraction playground: scrape, crawl, map, and extract data using prompts. Try our /extract beta!\n- [Firecrawl Integrations](https://docs.firecrawl.dev/integrations): Integrate Firecrawl with Langchain, LlamaIndex, Dify, Flowise, Crew AI, Langflow, Camel AI, and RAGaaS for enhanced web data capabilities.\n- [Firecrawl Pricing Plans](https://www.firecrawl.dev/pricing): Flexible web data scraping pricing plans; free, hobby, standard, growth and enterprise options available with various credit and speed options.\n- [Firecrawl Sign In](https://www.firecrawl.dev/signin/password_signin): Sign in to Firecrawl using email/password, magic link, or OAuth (GitHub, Google).\n- [Firecrawl Blog](https://www.firecrawl.dev/blog): Firecrawl blog: tutorials, product updates, customer stories & more on web scraping and AI data extraction.\n- [Firecrawl API Docs](https://docs.firecrawl.dev/introduction): Firecrawl API documentation: crawl and convert websites to markdown, use SDKs, LLM frameworks, and more.\n- [AI-Powered Web Scraping API](https://www.firecrawl.dev/extract): Effortlessly extract structured data from websites using natural language prompts via a single API call.\n- [Firecrawl: Web Data Extraction](https://www.firecrawl.dev/): Extract web data with a prompt; reliable web scraper; clean markdown output; LLM-ready data; trusted by top companies; open-source.\n- [Firecrawl Changelog: API Updates & New Features](https://www.firecrawl.dev/changelog): Release notes, bug fixes, new features, and improvements for Firecrawl's API and SDKs.\n", 3 | "fullApiMessage": "*Note: This is llms-full.txt is not complete, please enter a Firecrawl API key to get the entire llms-full.txt at llmstxt.firecrawl.dev or you can access llms.txt via API with curl -X GET 'http://llmstxt.firecrawl.dev/firecrawl.dev?FIRECRAWL_API_KEY=YOUR_API_KEY' or llms-full.txt via API with curl -X GET 'http://llmstxt.firecrawl.dev/firecrawl.dev/full?FIRECRAWL_API_KEY=YOUR_API_KEY'\n\n# firecrawl.dev llms-full.txt\n\n# llms.txt Generator\n\nGenerate\n\n[Use your Firecrawl key 🔥](https://llmstxt.firecrawl.dev/#) \\| [Use the llms.txt Generator API](https://llmstxt.firecrawl.dev/#)Introducing /extract - Get web data with a prompt [Try now](https://www.firecrawl.dev/extract)\n\n# Preview\n\nTake a look at the API response (Preview limited to 5 pages)\n\nSingle URL(/scrape)\n\nCrawl(/crawl)\n\nMap(/map)\n\nExtract(/extract)Beta\n\nScrape\n\nURL\n\nGet CodeRun\n\n### Options\n\nStart exploring with our playground![Firecrawl Docs home page![light logo](https://mintlify.s3.us-west-1.amazonaws.com/firecrawl/logo/light.svg)![dark logo](https://mintlify.s3.us-west-1.amazonaws.com/firecrawl/logo/dark.svg)](https://firecrawl.dev/)\n\nv1\n\nSearch or ask...\n\nCtrl K\n\nSearch...\n\nNavigation\n\nGet Started\n\nIntegrations\n\n[Documentation](https://docs.firecrawl.dev/introduction) [SDKs](https://docs.firecrawl.dev/sdks/overview) [Learn](https://www.firecrawl.dev/blog/category/tutorials) [API Reference](https://docs.firecrawl.dev/api-reference/introduction)\n\n[![Firecrawl Document Loader](https://raw.githubusercontent.com/hellofirecrawl/docs/main/images/integrations/langchain.png)\\\\\n\\\\\n**Langchain** \\\\\n\\\\\nCheck out Firecrawl Document Loader](https://docs.firecrawl.dev/integrations/langchain) [![Firecrawl Reader](https://raw.githubusercontent.com/hellofirecrawl/docs/main/images/integrations/llamaindex.jpeg)\\\\\n\\\\\n**LlamaIndex** \\\\\n\\\\\nCheck out Firecrawl Reader](https://docs.firecrawl.dev/integrations/llamaindex) [![Dify](https://raw.githubusercontent.com/hellofirecrawl/docs/main/images/integrations/dify.jpeg)\\\\\n\\\\\n**Dify** \\\\\n\\\\\nExtract structured data from web pages](https://docs.firecrawl.dev/integrations/dify) [![Flowise](https://raw.githubusercontent.com/hellofirecrawl/docs/main/images/integrations/flowise.png)\\\\\n\\\\\n**Flowise** \\\\\n\\\\\nSync data directly from websites](https://docs.firecrawl.dev/integrations/flowise) [![Crew AI](https://raw.githubusercontent.com/hellofirecrawl/docs/main/images/integrations/crewai.png)\\\\\n\\\\\n**CrewAI** \\\\\n\\\\\nCoordinate AI agents for web scraping tasks](https://docs.firecrawl.dev/integrations/crewai) [![Langflow](https://raw.githubusercontent.com/hellofirecrawl/docs/main/images/integrations/langflow.webp)\\\\\n\\\\\n**Langflow** \\\\\n\\\\\nDesign visual web data pipelines](https://docs.firecrawl.dev/integrations/langflow) [![CamelAI](https://raw.githubusercontent.com/hellofirecrawl/docs/main/images/integrations/camelai.jpg)\\\\\n\\\\\n**Camel AI** \\\\\n\\\\\nDesign visual web data pipelines](https://docs.firecrawl.dev/integrations/camelai) [![RAGaaS](https://raw.githubusercontent.com/hellofirecrawl/docs/main/images/integrations/ragaas.png)\\\\\n\\\\\n**RAGaaS** \\\\\n\\\\\nBuild RAG applications with web data](https://docs.firecrawl.dev/integrations/ragaas)\n\n[Suggest edits](https://github.com/hellofirecrawl/docs/edit/main/integrations.mdx) [Raise issue](https://github.com/hellofirecrawl/docs/issues/new?title=Issue%20on%20docs&body=Path:%20/integrations)\n\n[Rate Limits](https://docs.firecrawl.dev/rate-limits) [Advanced Scraping Guide](https://docs.firecrawl.dev/advanced-scraping-guide)\n\n![Firecrawl Document Loader](https://docs.firecrawl.dev/integrations)\n\n![Firecrawl Reader](https://docs.firecrawl.dev/integrations)\n\n![Dify](https://docs.firecrawl.dev/integrations)\n\n![Flowise](https://docs.firecrawl.dev/integrations)\n\n![Crew AI](https://docs.firecrawl.dev/integrations)\n\n![CamelAI](https://docs.firecrawl.dev/integrations)\n\n![RAGaaS](https://docs.firecrawl.dev/integrations)Introducing /extract - Get web data with a prompt [Try now](https://www.firecrawl.dev/extract)\n\n## Flexible Pricing\n\nStart for free, then scale as you grow\n\nStandard [Extract](https://www.firecrawl.dev/extract#pricing)\n\nMonthly\n\nYearly\n\n20% off\\- 2 months free\n\n## Free Plan\n\n500 credits\n\n$0 one-time\n\nNo credit card requiredGet Started\n\n- Scrape 500 pages\n- 10 /scrape per min\n- 1 /crawl per min\n\n## Hobby\n\n3,000 creditsper month\n\n$16/month\n\n$228/yr$190/yr(Billed annually)\n\nSubscribe$190/yr\n\n- Scrape 3,000 pages\\*\n- 20 /scrape per min\n- 3 /crawl per min\n- 1 seat\n\n## StandardMost Popular\n\n100,000 creditsper month\n\n$83/month\n\n$1188/yr$990/yr(Billed annually)\n\nSubscribe$990/yr\n\n- Scrape 100,000 pages\\*\n- 100 /scrape per min\n- 10 /crawl per min\n- 3 seats\n- Standard Support\n\n## Growth\n\n500,000 creditsper month\n\n$333/month\n\n$4788/yr$3990/yr(Billed annually)\n\nSubscribe$3990/yr\n\n- Scrape 500,000 pages\\*\n- 1000 /scrape per min\n- 50 /crawl per min\n- 5 seats\n- Priority Support\n\n## Add-ons\n\n### Auto Recharge Credits\n\nAutomatically recharge your credits when you run low.\n\n$11per 1000 credits\n\nEnable Auto Recharge\n\nSubscribe to a plan to enable auto recharge\n\n### Credit Pack\n\nPurchase a pack of additional monthly credits.\n\n$9/mo for 1000 credits\n\nPurchase Credit Pack\n\nSubscribe to a plan to purchase credit packs\n\n## Enterprise Plan\n\nUnlimited credits. Custom RPMs.\n\nTalk to us\n\n- Bulk discounts\n- Top priority support\n- Custom concurrency limits\n- Feature Acceleration\n- SLAs\n- Custom seats\n\n\\\\* a /scrape refers to the [scrape](https://docs.firecrawl.dev/api-reference/endpoint/scrape) API endpoint. Structured extraction costs vary. See [credits table](https://www.firecrawl.dev/pricing#credits).\n\n\\\\* a /crawl refers to the [crawl](https://docs.firecrawl.dev/api-reference/endpoint/crawl) API endpoint.\n\n## API Credits\n\nCredits are consumed for each API request, varying by endpoint and feature.\n\n| Features | Credits |\n| --- | --- |\n| Scrape(/scrape) | 1 / page |\n| with JSON format | 5 / page |\n| Crawl(/crawl) | 1 / page |\n| Map(/map) | 1 / call |\n| Search(/search) | 1 / page |\n| Extract(/extract) | New [Separate Pricing](https://www.firecrawl.dev/extract#pricing) |\n\nOur wall of love\n\n## Don't take our word for it\n\n![Morgan Linton](https://www.firecrawl.dev/_next/image?url=%2F_next%2Fstatic%2Fmedia%2Ftestimonial-02.4585097e.jpg&w=96&q=75)\n\nMorgan Linton\n\n[@morganlinton](https://x.com/morganlinton/status/1839454165703204955)\n\nIf you're coding with AI, and haven't discovered @firecrawl\\_dev yet, prepare to have your mind blown 🤯\n\n![Chris DeWeese](https://www.firecrawl.dev/_next/image?url=%2F_next%2Fstatic%2Fmedia%2Ftestimonial-09.a66dcc26.jpg&w=96&q=75)\n\nChris DeWeese\n\n[@ChrisDevApps](https://x.com/ChrisDevApps/status/1853587120406876601)\n\nStarted using @firecrawl\\_dev for a project, I wish I used this sooner.\n\n![Bardia Pourvakil](https://www.firecrawl.dev/_next/image?url=%2F_next%2Fstatic%2Fmedia%2Ftestimonial-01.025350bc.jpeg&w=96&q=75)\n\nBardia Pourvakil\n\n[@thepericulum](https://twitter.com/thepericulum/status/1781397799487078874)\n\nThe Firecrawl team ships. I wanted types for their node SDK, and less than an hour later, I got them.\n\n![Tom Reppelin](https://www.firecrawl.dev/_next/image?url=%2F_next%2Fstatic%2Fmedia%2Ftestimonial-04.1f2b4caf.jpg&w=96&q=75)\n\nTom Reppelin\n\n[@TomReppelin](https://x.com/TomReppelin/status/1844382491014201613)\n\nI found gold today. Thank you @firecrawl\\_dev\n\n![latentsauce 🧘🏽](https://www.firecrawl.dev/_next/image?url=%2F_next%2Fstatic%2Fmedia%2Ftestimonial-07.c2285d35.jpeg&w=96&q=75)\n\nlatentsauce 🧘🏽\n\n[@latentsauce](https://twitter.com/latentsauce/status/1781738253927735331)\n\nFirecrawl simplifies data preparation significantly, exactly what I was hoping for. Thank you Firecrawl ❤️❤️❤️\n\n![Morgan Linton](https://www.firecrawl.dev/_next/image?url=%2F_next%2Fstatic%2Fmedia%2Ftestimonial-02.4585097e.jpg&w=96&q=75)\n\nMorgan Linton\n\n[@morganlinton](https://x.com/morganlinton/status/1839454165703204955)\n\nIf you're coding with AI, and haven't discovered @firecrawl\\_dev yet, prepare to have your mind blown 🤯\n\n![Chris DeWeese](https://www.firecrawl.dev/_next/image?url=%2F_next%2Fstatic%2Fmedia%2Ftestimonial-09.a66dcc26.jpg&w=96&q=75)\n\nChris DeWeese\n\n[@ChrisDevApps](https://x.com/ChrisDevApps/status/1853587120406876601)\n\nStarted using @firecrawl\\_dev for a project, I wish I used this sooner.\n\n![Bardia Pourvakil](https://www.firecrawl.dev/_next/image?url=%2F_next%2Fstatic%2Fmedia%2Ftestimonial-01.025350bc.jpeg&w=96&q=75)\n\nBardia Pourvakil\n\n[@thepericulum](https://twitter.com/thepericulum/status/1781397799487078874)\n\nThe Firecrawl team ships. I wanted types for their node SDK, and less than an hour later, I got them.\n\n![Tom Reppelin](https://www.firecrawl.dev/_next/image?url=%2F_next%2Fstatic%2Fmedia%2Ftestimonial-04.1f2b4caf.jpg&w=96&q=75)\n\nTom Reppelin\n\n[@TomReppelin](https://x.com/TomReppelin/status/1844382491014201613)\n\nI found gold today. Thank you @firecrawl\\_dev\n\n![latentsauce 🧘🏽](https://www.firecrawl.dev/_next/image?url=%2F_next%2Fstatic%2Fmedia%2Ftestimonial-07.c2285d35.jpeg&w=96&q=75)\n\nlatentsauce 🧘🏽\n\n[@latentsauce](https://twitter.com/latentsauce/status/1781738253927735331)\n\nFirecrawl simplifies data preparation significantly, exactly what I was hoping for. Thank you Firecrawl ❤️❤️❤️\n\n![Michael Ning](https://www.firecrawl.dev/_next/image?url=%2F_next%2Fstatic%2Fmedia%2Ftestimonial-05.76d7cd3e.png&w=96&q=75)\n\nMichael Ning\n\nFirecrawl is impressive, saving us 2/3 the tokens and allowing gpt3.5turbo use over gpt4. Major savings in time and money.\n\n![Alex Reibman 🖇️](https://www.firecrawl.dev/_next/image?url=%2F_next%2Fstatic%2Fmedia%2Ftestimonial-06.4ee7cf5a.jpeg&w=96&q=75)\n\nAlex Reibman 🖇️\n\n[@AlexReibman](https://twitter.com/AlexReibman/status/1780299595484131836)\n\nMoved our internal agent's web scraping tool from Apify to Firecrawl because it benchmarked 50x faster with AgentOps.\n\n![Alex Fazio](https://www.firecrawl.dev/_next/image?url=%2F_next%2Fstatic%2Fmedia%2Ftestimonial-08.af684f18.jpg&w=96&q=75)\n\nAlex Fazio\n\n[@alxfazio](https://x.com/alxfazio/status/1826731977283641615)\n\nSemantic scraping with Firecrawl is 🔥!\n\n![Matt Busigin](https://www.firecrawl.dev/_next/image?url=%2F_next%2Fstatic%2Fmedia%2Ftestimonial-03.6f60bcb9.jpg&w=96&q=75)\n\nMatt Busigin\n\n[@mbusigin](https://x.com/mbusigin/status/1836065372010656069)\n\nFirecrawl is dope. Congrats guys 👏\n\n![Michael Ning](https://www.firecrawl.dev/_next/image?url=%2F_next%2Fstatic%2Fmedia%2Ftestimonial-05.76d7cd3e.png&w=96&q=75)\n\nMichael Ning\n\nFirecrawl is impressive, saving us 2/3 the tokens and allowing gpt3.5turbo use over gpt4. Major savings in time and money.\n\n![Alex Reibman 🖇️](https://www.firecrawl.dev/_next/image?url=%2F_next%2Fstatic%2Fmedia%2Ftestimonial-06.4ee7cf5a.jpeg&w=96&q=75)\n\nAlex Reibman 🖇️\n\n[@AlexReibman](https://twitter.com/AlexReibman/status/1780299595484131836)\n\nMoved our internal agent's web scraping tool from Apify to Firecrawl because it benchmarked 50x faster with AgentOps.\n\n![Alex Fazio](https://www.firecrawl.dev/_next/image?url=%2F_next%2Fstatic%2Fmedia%2Ftestimonial-08.af684f18.jpg&w=96&q=75)\n\nAlex Fazio\n\n[@alxfazio](https://x.com/alxfazio/status/1826731977283641615)\n\nSemantic scraping with Firecrawl is 🔥!\n\n![Matt Busigin](https://www.firecrawl.dev/_next/image?url=%2F_next%2Fstatic%2Fmedia%2Ftestimonial-03.6f60bcb9.jpg&w=96&q=75)\n\nMatt Busigin\n\n[@mbusigin](https://x.com/mbusigin/status/1836065372010656069)\n\nFirecrawl is dope. Congrats guys 👏Introducing /extract - Get web data with a prompt [Try now](https://www.firecrawl.dev/extract)\n\n🔥\n\n### Sign In\n\nEmailPassword\n\nSign in\n\n[Forgot your password?](https://www.firecrawl.dev/signin/forgot_password)\n\n[Sign in via magic link](https://www.firecrawl.dev/signin/email_signin)\n\n[Don't have an account? Sign up](https://www.firecrawl.dev/signin/signup)\n\nOAuth sign-in\n\nGitHubGoogleIntroducing /extract - Get web data with a prompt [Try now](https://www.firecrawl.dev/extract)\n\n[![Introducing /extract: Get structured web data with just a prompt](https://www.firecrawl.dev/images/blog/firecrawl-extract-endpoint.png)\\\\\n\\\\\nJanuary 20, 2025\\\\\n\\\\\n**Introducing /extract: Get structured web data with just a prompt** \\\\\n\\\\\nOur new /extract endpoint harnesses AI to turn any website into structured data for your applications seamlessly.\\\\\n\\\\\nBy Eric Ciarla](https://www.firecrawl.dev/blog/introducing-extract-open-beta)\n\n## Explore Articles\n\n[All](https://www.firecrawl.dev/blog) [Product Updates](https://www.firecrawl.dev/blog/category/product) [Tutorials](https://www.firecrawl.dev/blog/category/tutorials) [Customer Stories](https://www.firecrawl.dev/blog/category/customer-stories) [Tips & Resources](https://www.firecrawl.dev/blog/category/tips-and-resources)\n\n[![Building a Trend Detection System with AI in TypeScript: A Step-by-Step Guide](https://www.firecrawl.dev/images/blog/trend_finder/trend-finder-typescript.jpg)\\\\\n**Building a Trend Detection System with AI in TypeScript: A Step-by-Step Guide** \\\\\nLearn how to build an automated trend detection system in TypeScript that monitors social media and news sites, analyzes content with AI, and sends real-time Slack alerts using Firecrawl, Together AI, and GitHub Actions.\\\\\n\\\\\nBy Bex TuychievJan 11, 2025](https://www.firecrawl.dev/blog/trend-finder-typescript)\n\n[![How to Build an Automated Competitor Price Monitoring System with Python](https://www.firecrawl.dev/images/blog/competitor_price_scraping/competitor-price-scraping.jpg)\\\\\n**How to Build an Automated Competitor Price Monitoring System with Python** \\\\\nLearn how to build an automated competitor price monitoring system in Python that tracks prices across e-commerce sites, provides real-time comparisons, and maintains price history using Firecrawl, Streamlit, and GitHub Actions.\\\\\n\\\\\nBy Bex TuychievJan 6, 2025](https://www.firecrawl.dev/blog/automated-competitor-price-scraping)\n\n[![How Stack AI Uses Firecrawl to Power AI Agents](https://www.firecrawl.dev/images/blog/customer-story-stackai.jpg)\\\\\n**How Stack AI Uses Firecrawl to Power AI Agents** \\\\\nDiscover how Stack AI leverages Firecrawl to seamlessly feed agentic AI workflows with high-quality web data.\\\\\n\\\\\nBy Jonathan KleimanJan 3, 2025](https://www.firecrawl.dev/blog/how-stack-ai-uses-firecrawl-to-power-ai-agents)\n\n[![BeautifulSoup4 vs. Scrapy - A Comprehensive Comparison for Web Scraping in Python](https://www.firecrawl.dev/images/blog/bs4_scrapy/bs4-vs-scrapy-comparison.jpg)\\\\\n**BeautifulSoup4 vs. Scrapy - A Comprehensive Comparison for Web Scraping in Python** \\\\\nLearn the key differences between BeautifulSoup4 and Scrapy for web scraping in Python. Compare their features, performance, and use cases to choose the right tool for your web scraping needs.\\\\\n\\\\\nBy Bex TuychievDec 24, 2024](https://www.firecrawl.dev/blog/beautifulsoup4-vs-scrapy-comparison)\n\n[![15 Python Web Scraping Projects: From Beginner to Advanced](https://www.firecrawl.dev/images/blog/web_scraping_projects/python-web-scraping-projects.jpg)\\\\\n**15 Python Web Scraping Projects: From Beginner to Advanced** \\\\\nExplore 15 hands-on web scraping projects in Python, from beginner to advanced level. Learn essential concepts like data extraction, concurrent processing, and distributed systems while building real-world applications.\\\\\n\\\\\nBy Bex TuychievDec 17, 2024](https://www.firecrawl.dev/blog/python-web-scraping-projects)\n\n[![How to Deploy Python Web Scrapers](https://www.firecrawl.dev/images/blog/deploying-web-scrapers/deploy-web-scrapers.jpg)\\\\\n**How to Deploy Python Web Scrapers** \\\\\nLearn how to deploy Python web scrapers using GitHub Actions, Heroku, PythonAnywhere and more.\\\\\n\\\\\nBy Bex TuychievDec 16, 2024](https://www.firecrawl.dev/blog/deploy-web-scrapers)\n\n[![Why Companies Need a Data Strategy for Generative AI](https://www.firecrawl.dev/images/blog/data-strategy.jpg)\\\\\n**Why Companies Need a Data Strategy for Generative AI** \\\\\nLearn why a well-defined data strategy is essential for building robust, production-ready generative AI systems, and discover practical steps for curation, maintenance, and integration.\\\\\n\\\\\nBy Eric CiarlaDec 15, 2024](https://www.firecrawl.dev/blog/why-companies-need-a-data-strategy-for-generative-ai)\n\n[![Data Enrichment: A Complete Guide to Enhancing Your Data Quality](https://www.firecrawl.dev/images/blog/data_enrichment_guide/complete-data-enrichment-guide.jpg)\\\\\n**Data Enrichment: A Complete Guide to Enhancing Your Data Quality** \\\\\nLearn how to enrich your data quality with a comprehensive guide covering data enrichment tools, best practices, and real-world examples. Discover how to leverage modern solutions like Firecrawl to automate data collection, validation, and integration for better business insights.\\\\\n\\\\\nBy Bex TuychievDec 14, 2024](https://www.firecrawl.dev/blog/complete-guide-to-data-enrichment)\n\n[![A Complete Guide Scraping Authenticated Websites with cURL and Firecrawl](https://www.firecrawl.dev/images/blog/complete-guide-to-curl-authentication-firecrawl-api.jpg)\\\\\n**A Complete Guide Scraping Authenticated Websites with cURL and Firecrawl** \\\\\nLearn how to scrape login-protected websites using cURL and Firecrawl API. Step-by-step guide covering basic auth, tokens, and cookies with real examples.\\\\\n\\\\\nBy Rudrank RiyamDec 13, 2024](https://www.firecrawl.dev/blog/complete-guide-to-curl-authentication-firecrawl-api)\n\n[![Building an Automated Price Tracking Tool](https://www.firecrawl.dev/images/blog/price-tracking/price-tracking.jpg)\\\\\n**Building an Automated Price Tracking Tool** \\\\\nBuild an automated e-commerce price tracker in Python. Learn web scraping, price monitoring, and automated alerts using Firecrawl, Streamlit, PostgreSQL.\\\\\n\\\\\nBy Bex TuychievDec 9, 2024](https://www.firecrawl.dev/blog/automated-price-tracking-tutorial-python)\n\n[![Evaluating Web Data Extraction with CrawlBench](https://www.firecrawl.dev/images/blog/crawlbench/crawlbench.jpg)\\\\\n**Evaluating Web Data Extraction with CrawlBench** \\\\\nAn in-depth exploration of CrawlBench, a benchmark for testing LLM-based web data extraction.\\\\\n\\\\\nBy SwyxDec 9, 2024](https://www.firecrawl.dev/blog/crawlbench-llm-extraction)\n\n[![How Cargo Empowers GTM Teams with Firecrawl](https://www.firecrawl.dev/images/blog/customer-story-cargo.jpg)\\\\\n**How Cargo Empowers GTM Teams with Firecrawl** \\\\\nSee how Cargo uses Firecrawl to instantly analyze webpage content and power Go-To-Market workflows for their users.\\\\\n\\\\\nBy Tariq MinhasDec 6, 2024](https://www.firecrawl.dev/blog/how-cargo-empowers-gtm-teams-with-firecrawl)\n\n[![Web Scraping Automation: How to Run Scrapers on a Schedule](https://www.firecrawl.dev/images/blog/scheduling-scrapers-images/automated-web-scraping-free-2025.jpg)\\\\\n**Web Scraping Automation: How to Run Scrapers on a Schedule** \\\\\nLearn how to automate web scraping in Python using free tools like schedule, asyncio, cron jobs and GitHub Actions. This comprehensive guide covers local and cloud-based scheduling methods to run scrapers reliably in 2025.\\\\\n\\\\\nBy Bex TuychievDec 5, 2024](https://www.firecrawl.dev/blog/automated-web-scraping-free-2025)\n\n[![How to Generate Sitemaps Using Firecrawl's /map Endpoint: A Complete Guide](https://www.firecrawl.dev/images/blog/generating-sitemaps/how-to-generate-sitemap-using-firecrawl-map-endpoint.jpg)\\\\\n**How to Generate Sitemaps Using Firecrawl's /map Endpoint: A Complete Guide** \\\\\nLearn how to generate XML and visual sitemaps using Firecrawl's /map endpoint. Step-by-step guide with Python code examples, performance comparisons, and interactive visualization techniques for effective website mapping.\\\\\n\\\\\nBy Bex TuychievNov 29, 2024](https://www.firecrawl.dev/blog/how-to-generate-sitemaps-using-firecrawl-map-endpoint)\n\n[![How to Use Firecrawl's Scrape API: Complete Web Scraping Tutorial](https://www.firecrawl.dev/images/blog/scrape-masterclass/mastering-scrape.jpg)\\\\\n**How to Use Firecrawl's Scrape API: Complete Web Scraping Tutorial** \\\\\nLearn how to scrape websites using Firecrawl's /scrape endpoint. Master JavaScript rendering, structured data extraction, and batch operations with Python code examples.\\\\\n\\\\\nBy Bex TuychievNov 25, 2024](https://www.firecrawl.dev/blog/mastering-firecrawl-scrape-endpoint)\n\n[![How to Create an llms.txt File for Any Website](https://www.firecrawl.dev/images/blog/How-to-Create-an-llms-txt-File-for-Any-Website.jpg)\\\\\n**How to Create an llms.txt File for Any Website** \\\\\nLearn how to generate an llms.txt file for any website using the llms.txt Generator and Firecrawl.\\\\\n\\\\\nBy Eric CiarlaNov 22, 2024](https://www.firecrawl.dev/blog/How-to-Create-an-llms-txt-File-for-Any-Website)\n\n[![Mastering Firecrawl's Crawl Endpoint: A Complete Web Scraping Guide](https://www.firecrawl.dev/images/blog/crawl-masterclass/images/mastering-crawl.jpg)\\\\\n**Mastering Firecrawl's Crawl Endpoint: A Complete Web Scraping Guide** \\\\\nLearn how to use Firecrawl's /crawl endpoint for efficient web scraping. Master URL control, performance optimization, and integration with LangChain for AI-powered data extraction.\\\\\n\\\\\nBy Bex TuychievNov 18, 2024](https://www.firecrawl.dev/blog/mastering-the-crawl-endpoint-in-firecrawl)\n\n[![Getting Started with OpenAI's Predicted Outputs for Faster LLM Responses](https://www.firecrawl.dev/images/blog/openai-predicted-outputs.jpg)\\\\\n**Getting Started with OpenAI's Predicted Outputs for Faster LLM Responses** \\\\\nA guide to leveraging Predicted Outputs to speed up LLM tasks with GPT-4o models.\\\\\n\\\\\nBy Eric CiarlaNov 5, 2024](https://www.firecrawl.dev/blog/getting-started-with-predicted-outputs-openai)\n\n[![Launch Week II Recap](https://www.firecrawl.dev/images/blog/launch-week-ii-recap.jpg)\\\\\n**Launch Week II Recap** \\\\\nRecapping all the exciting announcements from Firecrawl's second Launch Week.\\\\\n\\\\\nBy Eric CiarlaNovember 4, 2024](https://www.firecrawl.dev/blog/launch-week-ii-recap)\n\n[![Launch Week II - Day 7: Introducing Faster Markdown Parsing](https://www.firecrawl.dev/images/blog/firecrawl-faster-markdown.jpg)\\\\\n**Launch Week II - Day 7: Introducing Faster Markdown Parsing** \\\\\nOur new HTML to Markdown parser is 4x faster, more reliable, and produces cleaner Markdown, built from the ground up for speed and performance.\\\\\n\\\\\nBy Eric CiarlaNovember 3, 2024](https://www.firecrawl.dev/blog/launch-week-ii-day-7-introducing-faster-markdown-parsing)\n\n[![Launch Week II - Day 6: Introducing Mobile Scraping and Mobile Screenshots](https://www.firecrawl.dev/images/blog/firecrawl-mobile-scraping.jpg)\\\\\n**Launch Week II - Day 6: Introducing Mobile Scraping and Mobile Screenshots** \\\\\nInteract with sites as if from a mobile device using Firecrawl's new mobile device emulation.\\\\\n\\\\\nBy Eric CiarlaNovember 2, 2024](https://www.firecrawl.dev/blog/launch-week-ii-day-6-introducing-mobile-scraping)\n\n[![Launch Week II - Day 5: Introducing New Actions](https://www.firecrawl.dev/images/blog/firecrawl-new-actions.jpg)\\\\\n**Launch Week II - Day 5: Introducing New Actions** \\\\\nCapture page content at any point and wait for specific elements with our new Scrape and Wait for Selector actions.\\\\\n\\\\\nBy Eric CiarlaNovember 1, 2024](https://www.firecrawl.dev/blog/launch-week-ii-day-5-introducing-two-new-actions)\n\n[![Launch Week II - Day 4: Advanced iframe Scraping](https://www.firecrawl.dev/images/blog/firecrawl-iframe-scraping.jpg)\\\\\n**Launch Week II - Day 4: Advanced iframe Scraping** \\\\\nWe are thrilled to announce comprehensive iframe scraping support in Firecrawl, enabling seamless handling of nested iframes, dynamically loaded content, and cross-origin frames.\\\\\n\\\\\nBy Eric CiarlaOctober 31, 2024](https://www.firecrawl.dev/blog/launch-week-ii-day-4-advanced-iframe-scraping)\n\n[![Launch Week II - Day 3: Introducing Credit Packs](https://www.firecrawl.dev/images/blog/firecrawl-credit-packs.jpg)\\\\\n**Launch Week II - Day 3: Introducing Credit Packs** \\\\\nEasily top up your plan with Credit Packs to keep your web scraping projects running smoothly. Plus, manage your credits effortlessly with our new Auto Recharge feature.\\\\\n\\\\\nBy Eric CiarlaOctober 30, 2024](https://www.firecrawl.dev/blog/launch-week-ii-day-3-introducing-credit-packs)\n\n[![Launch Week II - Day 2: Introducing Location and Language Settings](https://www.firecrawl.dev/images/blog/firecrawl-location-language.jpg)\\\\\n**Launch Week II - Day 2: Introducing Location and Language Settings** \\\\\nSpecify country and preferred languages to get relevant localized content, enhancing your web scraping results with region-specific data.\\\\\n\\\\\nBy Eric CiarlaOctober 29, 2024](https://www.firecrawl.dev/blog/launch-week-ii-day-2-introducing-location-language-settings)\n\n[![Launch Week II - Day 1: Introducing the Batch Scrape Endpoint](https://www.firecrawl.dev/images/blog/firecrawl-batch-scrape.jpg)\\\\\n**Launch Week II - Day 1: Introducing the Batch Scrape Endpoint** \\\\\nOur new Batch Scrape endpoint lets you scrape multiple URLs simultaneously, making bulk data collection faster and more efficient.\\\\\n\\\\\nBy Eric CiarlaOctober 28, 2024](https://www.firecrawl.dev/blog/launch-week-ii-day-1-introducing-batch-scrape-endpoint)\n\n[![Getting Started with Grok-2: Setup and Web Crawler Example](https://www.firecrawl.dev/images/blog/grok-2-web-crawler.jpg)\\\\\n**Getting Started with Grok-2: Setup and Web Crawler Example** \\\\\nA detailed guide on setting up Grok-2 and building a web crawler using Firecrawl.\\\\\n\\\\\nBy Nicolas CamaraOct 21, 2024](https://www.firecrawl.dev/blog/grok-2-setup-and-web-crawler-example)\n\n[![OpenAI Swarm Tutorial: Create Marketing Campaigns for Any Website](https://www.firecrawl.dev/images/blog/openai-swarm.png)\\\\\n**OpenAI Swarm Tutorial: Create Marketing Campaigns for Any Website** \\\\\nA guide to building a multi-agent system using OpenAI Swarm and Firecrawl for AI-driven marketing strategies\\\\\n\\\\\nBy Nicolas CamaraOct 12, 2024](https://www.firecrawl.dev/blog/openai-swarm-agent-tutorial)\n\n[![Using OpenAI's Realtime API and Firecrawl to Talk with Any Website](https://www.firecrawl.dev/images/blog/How-to-Talk-with-Any-Website-Using-OpenAIs-Realtime-API-and-Firecrawl.jpg)\\\\\n**Using OpenAI's Realtime API and Firecrawl to Talk with Any Website** \\\\\nBuild a real-time conversational agent that interacts with any website using OpenAI's Realtime API and Firecrawl.\\\\\n\\\\\nBy Nicolas CamaraOct 11, 2024](https://www.firecrawl.dev/blog/How-to-Talk-with-Any-Website-Using-OpenAIs-Realtime-API-and-Firecrawl)\n\n[![Scraping Job Boards Using Firecrawl Actions and OpenAI](https://www.firecrawl.dev/images/blog/firecrawl-openai-job-scraping.jpg)\\\\\n**Scraping Job Boards Using Firecrawl Actions and OpenAI** \\\\\nA step-by-step guide to scraping job boards and extracting structured data using Firecrawl and OpenAI.\\\\\n\\\\\nBy Eric CiarlaSept 27, 2024](https://www.firecrawl.dev/blog/scrape-job-boards-firecrawl-openai)\n\n[![Build a Full-Stack AI Web App in 12 Minutes](https://www.firecrawl.dev/images/blog/Build-a-Full-Stack-AI-Web-App-in-12-Minutes.png)\\\\\n**Build a Full-Stack AI Web App in 12 Minutes** \\\\\nBuild a Full-Stack AI Web App in 12 minutes with Cursor, OpenAI o1, V0, Firecrawl & Patched\\\\\n\\\\\nBy Dev DigestSep 18, 2024](https://www.firecrawl.dev/blog/Build-a-Full-Stack-AI-Web-App-in-12-Minutes)\n\n[![How to Use OpenAI's o1 Reasoning Models in Your Applications](https://www.firecrawl.dev/images/blog/how-to-use-openai-o1-reasoning-models-in-applications.jpg)\\\\\n**How to Use OpenAI's o1 Reasoning Models in Your Applications** \\\\\nLearn how to harness OpenAI's latest o1 series models for complex reasoning tasks in your apps.\\\\\n\\\\\nBy Eric CiarlaSep 16, 2024](https://www.firecrawl.dev/blog/how-to-use-openai-o1-reasoning-models-in-applications)\n\n[![Handling 300k requests per day: an adventure in scaling](https://www.firecrawl.dev/images/blog/an-adventure-in-scaling.jpg)\\\\\n**Handling 300k requests per day: an adventure in scaling** \\\\\nPutting out fires was taking up all our time, and we had to scale fast. This is how we did it.\\\\\n\\\\\nBy Gergő Móricz (mogery)Sep 13, 2024](https://www.firecrawl.dev/blog/an-adventure-in-scaling)\n\n[![How Athena Intelligence Empowers Enterprise Analysts with Firecrawl](https://www.firecrawl.dev/images/blog/customer-story-athena-intelligence.jpg)\\\\\n**How Athena Intelligence Empowers Enterprise Analysts with Firecrawl** \\\\\nDiscover how Athena Intelligence leverages Firecrawl to fuel its AI-native analytics platform for enterprise analysts.\\\\\n\\\\\nBy Ben ReillySep 10, 2024](https://www.firecrawl.dev/blog/how-athena-intelligence-empowers-analysts-with-firecrawl)\n\n[![Launch Week I Recap](https://www.firecrawl.dev/images/blog/launch-week-1-recap.png)\\\\\n**Launch Week I Recap** \\\\\nA look back at the new features and updates introduced during Firecrawl's inaugural Launch Week.\\\\\n\\\\\nBy Eric CiarlaSeptember 2, 2024](https://www.firecrawl.dev/blog/firecrawl-launch-week-1-recap)\n\n[![Launch Week I / Day 7: Crawl Webhooks (v1)](https://www.firecrawl.dev/images/blog/webhooks.png)\\\\\n**Launch Week I / Day 7: Crawl Webhooks (v1)** \\\\\nNew /crawl webhook support. Send notifications to your apps during a crawl.\\\\\n\\\\\nBy Nicolas CamaraSeptember 1, 2024](https://www.firecrawl.dev/blog/launch-week-i-day-7-webhooks)\n\n[![Launch Week I / Day 6: LLM Extract (v1)](https://www.firecrawl.dev/images/blog/firecrawl-llm-extract.png)\\\\\n**Launch Week I / Day 6: LLM Extract (v1)** \\\\\nExtract structured data from your web pages using the extract format in /scrape.\\\\\n\\\\\nBy Nicolas CamaraAugust 31, 2024](https://www.firecrawl.dev/blog/launch-week-i-day-6-llm-extract)\n\n[![Launch Week I / Day 5: Real-Time Crawling with WebSockets](https://www.firecrawl.dev/images/blog/firecrawl-websockets.png)\\\\\n**Launch Week I / Day 5: Real-Time Crawling with WebSockets** \\\\\nOur new WebSocket-based method for real-time data extraction and monitoring.\\\\\n\\\\\nBy Eric CiarlaAugust 30, 2024](https://www.firecrawl.dev/blog/launch-week-i-day-5-real-time-crawling-websockets)\n\n[![Launch Week I / Day 4: Introducing Firecrawl /v1](https://www.firecrawl.dev/images/blog/firecrawl-v1-release.png)\\\\\n**Launch Week I / Day 4: Introducing Firecrawl /v1** \\\\\nOur biggest release yet - v1, a more reliable and developer-friendly API for seamless web data gathering.\\\\\n\\\\\nBy Eric CiarlaAugust 29, 2024](https://www.firecrawl.dev/blog/launch-week-i-day-4-introducing-firecrawl-v1)\n\n[![Launch Week I / Day 3: Introducing the Map Endpoint](https://www.firecrawl.dev/images/blog/firecrawl-map-endpoint.png)\\\\\n**Launch Week I / Day 3: Introducing the Map Endpoint** \\\\\nOur new Map endpoint enables lightning-fast website mapping for enhanced web scraping projects.\\\\\n\\\\\nBy Eric CiarlaAugust 28, 2024](https://www.firecrawl.dev/blog/launch-week-i-day-3-introducing-map-endpoint)\n\n[![Launch Week I / Day 2: 2x Rate Limits](https://www.firecrawl.dev/images/blog/firecrawl-rate-limits.png)\\\\\n**Launch Week I / Day 2: 2x Rate Limits** \\\\\nFirecrawl doubles rate limits across all plans, supercharging your web scraping capabilities.\\\\\n\\\\\nBy Eric CiarlaAugust 27, 2024](https://www.firecrawl.dev/blog/launch-week-i-day-2-doubled-rate-limits)\n\n[![Launch Week I / Day 1: Introducing Teams](https://www.firecrawl.dev/images/blog/firecrawl-teams.png)\\\\\n**Launch Week I / Day 1: Introducing Teams** \\\\\nOur new Teams feature, enabling seamless collaboration on web scraping projects.\\\\\n\\\\\nBy Eric CiarlaAugust 26, 2024](https://www.firecrawl.dev/blog/launch-week-i-day-1-introducing-teams)\n\n[![How to Use Prompt Caching and Cache Control with Anthropic Models](https://www.firecrawl.dev/images/blog/anthropic-prompt-caching.png)\\\\\n**How to Use Prompt Caching and Cache Control with Anthropic Models** \\\\\nLearn how to cache large context prompts with Anthropic Models like Opus, Sonnet, and Haiku for faster and cheaper chats that analyze website data.\\\\\n\\\\\nBy Eric CiarlaAug 14, 2024](https://www.firecrawl.dev/blog/using-prompt-caching-with-anthropic)\n\n[![Building Knowledge Graphs from Web Data using CAMEL-AI and Firecrawl](https://www.firecrawl.dev/images/blog/knowledge-graph.jpg)\\\\\n**Building Knowledge Graphs from Web Data using CAMEL-AI and Firecrawl** \\\\\nA guide on constructing knowledge graphs from web pages using CAMEL-AI and Firecrawl\\\\\n\\\\\nBy Wendong FanAug 13, 2024](https://www.firecrawl.dev/blog/building-knowledge-graphs-from-web-data-camelai-firecrawl)\n\n[![How Gamma Supercharges Onboarding with Firecrawl](https://www.firecrawl.dev/images/blog/customer-story-gamma.jpg)\\\\\n**How Gamma Supercharges Onboarding with Firecrawl** \\\\\nSee how Gamma uses Firecrawl to instantly generate websites and presentations to 20+ million users.\\\\\n\\\\\nBy Jon NoronhaAug 8, 2024](https://www.firecrawl.dev/blog/how-gamma-supercharges-onboarding-with-firecrawl)\n\n[![How to Use OpenAI's Structured Outputs and JSON Strict Mode](https://www.firecrawl.dev/images/blog/openai-structured-output.png)\\\\\n**How to Use OpenAI's Structured Outputs and JSON Strict Mode** \\\\\nA guide for getting structured data from the latest OpenAI models.\\\\\n\\\\\nBy Eric CiarlaAug 7, 2024](https://www.firecrawl.dev/blog/using-structured-output-and-json-strict-mode-openai)\n\n[![Introducing Fire Engine for Firecrawl](https://www.firecrawl.dev/images/blog/fire-engine-launch.png)\\\\\n**Introducing Fire Engine for Firecrawl** \\\\\nThe most scalable, reliable, and fast way to get web data for Firecrawl.\\\\\n\\\\\nBy Eric CiarlaAug 6, 2024](https://www.firecrawl.dev/blog/introducing-fire-engine-for-firecrawl)\n\n[![Firecrawl July 2024 Updates](https://www.firecrawl.dev/images/blog/launch-yc-firecrawl.png)\\\\\n**Firecrawl July 2024 Updates** \\\\\nDiscover the latest features, integrations, and improvements in Firecrawl for July 2024.\\\\\n\\\\\nBy Eric CiarlaJuly 31, 2024](https://www.firecrawl.dev/blog/firecrawl-july-2024-updates)\n\n[![Firecrawl June 2024 Updates](https://www.firecrawl.dev/images/blog/dashboard2.png)\\\\\n**Firecrawl June 2024 Updates** \\\\\nDiscover the latest features, integrations, and improvements in Firecrawl for June 2024.\\\\\n\\\\\nBy Nicolas CamaraJune 30, 2024](https://www.firecrawl.dev/blog/firecrawl-june-2024-updates)\n\n[![Scrape and Analyze Airbnb Data with Firecrawl and E2B](https://www.firecrawl.dev/images/blog/firecrawl-e2b-airbnb.png)\\\\\n**Scrape and Analyze Airbnb Data with Firecrawl and E2B** \\\\\nLearn how to scrape and analyze Airbnb data using Firecrawl and E2B in a few lines of code.\\\\\n\\\\\nBy Nicolas CamaraMay 23, 2024](https://www.firecrawl.dev/blog/scrape-analyze-airbnb-data-with-e2b)\n\n[![Build a 'Chat with website' using Groq Llama 3](https://www.firecrawl.dev/images/blog/g4.png)\\\\\n**Build a 'Chat with website' using Groq Llama 3** \\\\\nLearn how to use Firecrawl, Groq Llama 3, and Langchain to build a 'Chat with your website' bot.\\\\\n\\\\\nBy Nicolas CamaraMay 22, 2024](https://www.firecrawl.dev/blog/chat-with-website)\n\n[![Using LLM Extraction for Customer Insights](https://www.firecrawl.dev/images/blog/g3.png)\\\\\n**Using LLM Extraction for Customer Insights** \\\\\nUsing LLM Extraction for Insights and Lead Generation using Make and Firecrawl.\\\\\n\\\\\nBy Caleb PefferMay 21, 2024](https://www.firecrawl.dev/blog/lead-gen-business-insights-make-firecrawl)\n\n[![Extract website data using LLMs](https://www.firecrawl.dev/images/blog/g2.png)\\\\\n**Extract website data using LLMs** \\\\\nLearn how to use Firecrawl and Groq to extract structured data from a web page in a few lines of code.\\\\\n\\\\\nBy Nicolas CamaraMay 20, 2024](https://www.firecrawl.dev/blog/data-extraction-using-llms)\n\n[![Build an agent that checks for website contradictions](https://www.firecrawl.dev/images/blog/g1.png)\\\\\n**Build an agent that checks for website contradictions** \\\\\nUsing Firecrawl and Claude to scrape your website's data and look for contradictions.\\\\\n\\\\\nBy Eric CiarlaMay 19, 2024](https://www.firecrawl.dev/blog/contradiction-agent)\n\n[🔥](https://www.firecrawl.dev/)\n\n## Ready to _Build?_\n\nStart scraping web data for your AI apps today.\n\nNo credit card needed.\n\nGet Started[Firecrawl Docs home page![light logo](https://mintlify.s3.us-west-1.amazonaws.com/firecrawl/logo/light.svg)![dark logo](https://mintlify.s3.us-west-1.amazonaws.com/firecrawl/logo/dark.svg)](https://firecrawl.dev/)\n\nv1\n\nSearch or ask...\n\nCtrl K\n\nSearch...\n\nNavigation\n\nGet Started\n\nQuickstart\n\n[Documentation](https://docs.firecrawl.dev/introduction) [SDKs](https://docs.firecrawl.dev/sdks/overview) [Learn](https://www.firecrawl.dev/blog/category/tutorials) [API Reference](https://docs.firecrawl.dev/api-reference/introduction)\n\n![Hero Light](https://mintlify.s3.us-west-1.amazonaws.com/firecrawl/images/hero.png)\n\n## [​](https://docs.firecrawl.dev/introduction\\#welcome-to-firecrawl) Welcome to Firecrawl\n\n[Firecrawl](https://firecrawl.dev/?ref=github) is an API service that takes a URL, crawls it, and converts it into clean markdown. We crawl all accessible subpages and give you clean markdown for each. No sitemap required.\n\n## [​](https://docs.firecrawl.dev/introduction\\#how-to-use-it) How to use it?\n\nWe provide an easy to use API with our hosted version. You can find the playground and documentation [here](https://firecrawl.dev/playground). You can also self host the backend if you’d like.\n\nCheck out the following resources to get started:\n\n- [x] **API**: [Documentation](https://docs.firecrawl.dev/api-reference/introduction)\n- [x] **SDKs**: [Python](https://docs.firecrawl.dev/sdks/python), [Node](https://docs.firecrawl.dev/sdks/node), [Go](https://docs.firecrawl.dev/sdks/go), [Rust](https://docs.firecrawl.dev/sdks/rust)\n- [x] **LLM Frameworks**: [Langchain (python)](https://python.langchain.com/docs/integrations/document_loaders/firecrawl/), [Langchain (js)](https://js.langchain.com/docs/integrations/document_loaders/web_loaders/firecrawl), [Llama Index](https://docs.llamaindex.ai/en/latest/examples/data_connectors/WebPageDemo/#using-firecrawl-reader), [Crew.ai](https://docs.crewai.com/), [Composio](https://composio.dev/tools/firecrawl/all), [PraisonAI](https://docs.praison.ai/firecrawl/), [Superinterface](https://superinterface.ai/docs/assistants/functions/firecrawl), [Vectorize](https://docs.vectorize.io/integrations/source-connectors/firecrawl)\n- [x] **Low-code Frameworks**: [Dify](https://dify.ai/blog/dify-ai-blog-integrated-with-firecrawl), [Langflow](https://docs.langflow.org/), [Flowise AI](https://docs.flowiseai.com/integrations/langchain/document-loaders/firecrawl), [Cargo](https://docs.getcargo.io/integration/firecrawl), [Pipedream](https://pipedream.com/apps/firecrawl/)\n- [x] **Others**: [Zapier](https://zapier.com/apps/firecrawl/integrations), [Pabbly Connect](https://www.pabbly.com/connect/integrations/firecrawl/)\n- [ ] Want an SDK or Integration? Let us know by opening an issue.\n\n**Self-host:** To self-host refer to guide [here](https://docs.firecrawl.dev/contributing/self-host).\n\n### [​](https://docs.firecrawl.dev/introduction\\#api-key) API Key\n\nTo use the API, you need to sign up on [Firecrawl](https://firecrawl.dev/) and get an API key.\n\n### [​](https://docs.firecrawl.dev/introduction\\#features) Features\n\n- [**Scrape**](https://docs.firecrawl.dev/introduction#scraping): scrapes a URL and get its content in LLM-ready format (markdown, structured data via [LLM Extract](https://docs.firecrawl.dev/introduction#extraction), screenshot, html)\n- [**Crawl**](https://docs.firecrawl.dev/introduction#crawling): scrapes all the URLs of a web page and return content in LLM-ready format\n- [**Map**](https://docs.firecrawl.dev/features/map): input a website and get all the website urls - extremely fast\n\n### [​](https://docs.firecrawl.dev/introduction\\#powerful-capabilities) Powerful Capabilities\n\n- **LLM-ready formats**: markdown, structured data, screenshot, HTML, links, metadata\n- **The hard stuff**: proxies, anti-bot mechanisms, dynamic content (js-rendered), output parsing, orchestration\n- **Customizability**: exclude tags, crawl behind auth walls with custom headers, max crawl depth, etc…\n- **Media parsing**: pdfs, docx, images.\n- **Reliability first**: designed to get the data you need - no matter how hard it is.\n- **Actions**: click, scroll, input, wait and more before extracting data\n\nYou can find all of Firecrawl’s capabilities and how to use them in our [documentation](https://docs.firecrawl.dev/)\n\n## [​](https://docs.firecrawl.dev/introduction\\#crawling) Crawling\n\nUsed to crawl a URL and all accessible subpages. This submits a crawl job and returns a job ID to check the status of the crawl.\n\n### [​](https://docs.firecrawl.dev/introduction\\#installation) Installation\n\nPython\n\nNode\n\nGo\n\nRust\n\nCopy\n\n```bash\npip install firecrawl-py\n\n```\n\n### [​](https://docs.firecrawl.dev/introduction\\#usage) Usage\n\nPython\n\nNode\n\nGo\n\nRust\n\ncURL\n\nCopy\n\n```python\nfrom firecrawl import FirecrawlApp\n\napp = FirecrawlApp(api_key=\"fc-YOUR_API_KEY\")\n\n# Crawl a website:\ncrawl_status = app.crawl_url(\n 'https://firecrawl.dev',\n params={\n 'limit': 100,\n 'scrapeOptions': {'formats': ['markdown', 'html']}\n },\n poll_interval=30\n)\nprint(crawl_status)\n\n```\n\nIf you’re using cURL or `async crawl` functions on SDKs, this will return an `ID` where you can use to check the status of the crawl.\n\nCopy\n\n```json\n{\n \"success\": true,\n \"id\": \"123-456-789\",\n \"url\": \"https://api.firecrawl.dev/v1/crawl/123-456-789\"\n}\n\n```\n\n### [​](https://docs.firecrawl.dev/introduction\\#check-crawl-job) Check Crawl Job\n\nUsed to check the status of a crawl job and get its result.\n\nPython\n\nNode\n\nGo\n\nRust\n\ncURL\n\nCopy\n\n```python\ncrawl_status = app.check_crawl_status(\"\")\nprint(crawl_status)\n\n```\n\n#### [​](https://docs.firecrawl.dev/introduction\\#response) Response\n\nThe response will be different depending on the status of the crawl. For not completed or large responses exceeding 10MB, a `next` URL parameter is provided. You must request this URL to retrieve the next 10MB of data. If the `next` parameter is absent, it indicates the end of the crawl data.\n\nScraping\n\nCompleted\n\nCopy\n\n```json\n{\n \"status\": \"scraping\",\n \"total\": 36,\n \"completed\": 10,\n \"creditsUsed\": 10,\n \"expiresAt\": \"2024-00-00T00:00:00.000Z\",\n \"next\": \"https://api.firecrawl.dev/v1/crawl/123-456-789?skip=10\",\n \"data\": [\\\n {\\\n \"markdown\": \"[Firecrawl Docs home page![light logo](https://mintlify.s3-us-west-1.amazonaws.com/firecrawl/logo/light.svg)!...\",\\\n \"html\": \"...\",\\\n \"metadata\": {\\\n \"title\": \"Build a 'Chat with website' using Groq Llama 3 | Firecrawl\",\\\n \"language\": \"en\",\\\n \"sourceURL\": \"https://docs.firecrawl.dev/learn/rag-llama3\",\\\n \"description\": \"Learn how to use Firecrawl, Groq Llama 3, and Langchain to build a 'Chat with your website' bot.\",\\\n \"ogLocaleAlternate\": [],\\\n \"statusCode\": 200\\\n }\\\n },\\\n ...\\\n ]\\\n}\\\n\\\n```\\\n\\\n## [​](https://docs.firecrawl.dev/introduction\\#scraping) Scraping\\\n\\\nTo scrape a single URL, use the `scrape_url` method. It takes the URL as a parameter and returns the scraped data as a dictionary.\\\n\\\nPython\\\n\\\nNode\\\n\\\nGo\\\n\\\nRust\\\n\\\ncURL\\\n\\\nCopy\\\n\\\n```python\\\nfrom firecrawl import FirecrawlApp\\\n\\\napp = FirecrawlApp(api_key=\"fc-YOUR_API_KEY\")\\\n\\\n# Scrape a website:\\\nscrape_result = app.scrape_url('firecrawl.dev', params={'formats': ['markdown', 'html']})\\\nprint(scrape_result)\\\n\\\n```\\\n\\\n### [​](https://docs.firecrawl.dev/introduction\\#response-2) Response\\\n\\\nSDKs will return the data object directly. cURL will return the payload exactly as shown below.\\\n\\\nCopy\\\n\\\n```json\\\n{\\\n \"success\": true,\\\n \"data\" : {\\\n \"markdown\": \"Launch Week I is here! [See our Day 2 Release 🚀](https://www.firecrawl.dev/blog/launch-week-i-day-2-doubled-rate-limits)[💥 Get 2 months free...\",\\\n \"html\": \"

Firecrawl

\"\\\n }\\\n ]\\\n },\\\n \"metadata\": {\\\n \"title\": \"Home - Firecrawl\",\\\n \"description\": \"Firecrawl crawls and converts any website into clean markdown.\",\\\n \"language\": \"en\",\\\n \"keywords\": \"Firecrawl,Markdown,Data,Mendable,Langchain\",\\\n \"robots\": \"follow, index\",\\\n \"ogTitle\": \"Firecrawl\",\\\n \"ogDescription\": \"Turn any website into LLM-ready data.\",\\\n \"ogUrl\": \"https://www.firecrawl.dev/\",\\\n \"ogImage\": \"https://www.firecrawl.dev/og.png?123\",\\\n \"ogLocaleAlternate\": [],\\\n \"ogSiteName\": \"Firecrawl\",\\\n \"sourceURL\": \"http://google.com\",\\\n \"statusCode\": 200\\\n }\\\n }\\\n}\\\n\\\n```\\\n\\\n## [​](https://docs.firecrawl.dev/introduction\\#open-source-vs-cloud) Open Source vs Cloud\\\n\\\nFirecrawl is open source available under the [AGPL-3.0 license](https://github.com/mendableai/firecrawl/blob/main/LICENSE).\\\n\\\nTo deliver the best possible product, we offer a hosted version of Firecrawl alongside our open-source offering. The cloud solution allows us to continuously innovate and maintain a high-quality, sustainable service for all users.\\\n\\\nFirecrawl Cloud is available at [firecrawl.dev](https://firecrawl.dev/) and offers a range of features that are not available in the open source version:\\\n\\\n![Firecrawl Cloud vs Open Source](https://mintlify.s3.us-west-1.amazonaws.com/firecrawl/images/open-source-cloud.png)\\\n\\\n## [​](https://docs.firecrawl.dev/introduction\\#contributing) Contributing\\\n\\\nWe love contributions! Please read our [contributing guide](https://github.com/mendableai/firecrawl/blob/main/CONTRIBUTING.md) before submitting a pull request.\\\n\\\n[Suggest edits](https://github.com/hellofirecrawl/docs/edit/main/introduction.mdx) [Raise issue](https://github.com/hellofirecrawl/docs/issues/new?title=Issue%20on%20docs&body=Path:%20/introduction)\\\n\\\n[Launch Week II (New)](https://docs.firecrawl.dev/launch-week)\\\n\\\nOn this page\\\n\\\n- [Welcome to Firecrawl](https://docs.firecrawl.dev/introduction#welcome-to-firecrawl)\\\n- [How to use it?](https://docs.firecrawl.dev/introduction#how-to-use-it)\\\n- [API Key](https://docs.firecrawl.dev/introduction#api-key)\\\n- [Features](https://docs.firecrawl.dev/introduction#features)\\\n- [Powerful Capabilities](https://docs.firecrawl.dev/introduction#powerful-capabilities)\\\n- [Crawling](https://docs.firecrawl.dev/introduction#crawling)\\\n- [Installation](https://docs.firecrawl.dev/introduction#installation)\\\n- [Usage](https://docs.firecrawl.dev/introduction#usage)\\\n- [Check Crawl Job](https://docs.firecrawl.dev/introduction#check-crawl-job)\\\n- [Response](https://docs.firecrawl.dev/introduction#response)\\\n- [Scraping](https://docs.firecrawl.dev/introduction#scraping)\\\n- [Response](https://docs.firecrawl.dev/introduction#response-2)\\\n- [Extraction](https://docs.firecrawl.dev/introduction#extraction)\\\n- [Extracting without schema (New)](https://docs.firecrawl.dev/introduction#extracting-without-schema-new)\\\n- [Extraction (v0)](https://docs.firecrawl.dev/introduction#extraction-v0)\\\n- [Interacting with the page with Actions](https://docs.firecrawl.dev/introduction#interacting-with-the-page-with-actions)\\\n- [Example](https://docs.firecrawl.dev/introduction#example)\\\n- [Output](https://docs.firecrawl.dev/introduction#output)\\\n- [Open Source vs Cloud](https://docs.firecrawl.dev/introduction#open-source-vs-cloud)\\\n- [Contributing](https://docs.firecrawl.dev/introduction#contributing)Introducing **/extract** \\- Now in open beta\n\n# Get web data with a prompt\n\nTurn entire websites into structured data with AI\n\nFrom firecrawl.dev, get the pricing.\n\nTry for Free\n\n![Message balloon](https://www.firecrawl.dev/images/extract/balloon.svg)\n\nFrom **firecrawl.dev** find the company name, mission and whether it's open source.\n\n![Message balloon](https://www.firecrawl.dev/images/extract/balloon-2.svg)\n\n{\n\n\"company\\_name\":\"Firecrawl\",\n\n\"company\\_mission\":\"...\",\n\n\"is\\_open\\_source\":true,\n\n}\n\nA milestone in scraping\n\n## Web scraping was hard – now effortless\n\nScraping the internet had everything to do with broken scripts, bad data, wasted time. With Extract, you can get any data in any format effortlessly – in a single API call.\n\n### No more manual scraping\n\nExtract structured data from any website using natural language prompts.\n\npage = urlopen(url)\n\nhtml = page.read().decode(\"utf-8\")\n\nstart\\_index = html.find(\"\") + len(\"<title>\")\n\nend\\_index = html.find(\"\")\n\ntitle = html\\[start\\_index:end\\_index\\]\n\n>>\\> title\n\nPromptBuild a B2B lead list from these company websites.\n\n### Stop rewriting broken scripts\n\nSay goodbye to fragile scrapers that break with every site update. Our AI understands content semantically and adapts automatically.\n\npage = urlopen(url)\n\nhtml = page.read().decode(\"utf-8\")\n\nstart\\_idx = html.find(\"\") + len(\"<title>\")\n\nend\\_idx = html.find(\"\")\n\ntitle = html\\[start\\_idx:end\\_idx\\]\n\n>>> title\n\npage = urlopen(url)\n\nhtml = page.read().decode(\"utf-8\")\n\nstart\\_idx = html.find(\"\") + len(\"<title>\")\n\nend\\_idx = html.find(\"\")\n\ntitle = html\\[start\\_idx:end\\_idx\\]\n\n>>> title\n\npage = urlopen(url)\n\nhtml = page.read().decode(\"utf-8\")\n\nstart\\_idx = html.find(\"\") + len(\"<title>\")\n\nend\\_idx = html.find(\"\")\n\ntitle = html\\[start\\_idx:end\\_idx\\]\n\n>>> title\n\npage = urlopen(url)\n\nhtml = page.read().decode(\"utf-8\")\n\nstart\\_idx = html.find(\"\") + len(\"<title>\")\n\nend\\_idx = html.find(\"\")\n\ntitle = html\\[start\\_idx:end\\_idx\\]\n\n>>> title\n\npage = urlopen(url)\n\nhtml = page.read().decode(\"utf-8\")\n\nstart\\_idx = html.find(\"\") + len(\"<title>\")\n\nend\\_idx = html.find(\"\")\n\ntitle = html\\[start\\_idx:end\\_idx\\]\n\n>>> title\n\npage = urlopen(url)\n\nhtml = page.read().decode(\"utf-8\")\n\nstart\\_idx = html.find(\"\") + len(\"<title>\")\n\nend\\_idx = html.find(\"\")\n\ntitle = html\\[start\\_idx:end\\_idx\\]\n\n>>> title\n\npage = urlopen(url)\n\nhtml = page.read().decode(\"utf-8\")\n\nstart\\_idx = html.find(\"\") + len(\"<title>\")\n\nend\\_idx = html.find(\"\")\n\ntitle = html\\[start\\_idx:end\\_idx\\]\n\n>>> title\n\npage = urlopen(url)\n\nhtml = page.read().decode(\"utf-8\")\n\nstart\\_idx = html.find(\"\") + len(\"<title>\")\n\nend\\_idx = html.find(\"\")\n\ntitle = html\\[start\\_idx:end\\_idx\\]\n\n>>> title\n\npage = urlopen(url)\n\nhtml = page.read().decode(\"utf-8\")\n\nstart\\_idx = html.find(\"\") + len(\"<title>\")\n\nend\\_idx = html.find(\"\")\n\ntitle = html\\[start\\_idx:end\\_idx\\]\n\n>>> title\n\npage = urlopen(url)\n\nhtml = page.read().decode(\"utf-8\")\n\nstart\\_idx = html.find(\"\") + len(\"<title>\")\n\nend\\_idx = html.find(\"\")\n\ntitle = html\\[start\\_idx:end\\_idx\\]\n\n>>> title\n\nawait firecrawl.extract(\\[\\\n\\\n'https://firecrawl.dev/',\\\n\\\n\\], {\n\nprompt: \"Extract mission.\",\n\nschema: z.object({\n\n    mission: z.string()\n\n})\n\n});\n\n### Extract entire websites in a single API call\n\nGet the data you need with a simple API call, whether it's one page or thousands.\n\nTry adding a wildcard /\\* to the URL.It will extract information across the site.It will find and extract information across the entire website.\\> app.extract(\\['https://firecrawl.dev/\\*'\\])\n\n### Forget fighting context windows\n\nNo context window limits. Extract thousands of results effortlessly while we handle the complex LLM work.\n\nExtracting\n\nVideo Demo\n\n## Use Extract for everything\n\nFrom lead enrichment to AI onboarding to KYB – and more. Watch a demo of how Extract can help you get more out of your data.\n\nEnrichment Integrations\n\n## Enrich data anywhere you work\n\nIntegrate Extract with your favorite tools and get enriched data where you need it.\n\nDatasets\n\n## Build datasets spread across websites\n\nGather datasets from any website and use them for any enrichment task.\n\n| | Name | Contact | Email |\n| --- | --- | --- | --- |\n| 1 | Sarah Johnson | +1 (555) 123-4567 | sarah.j@example.com |\n| 2 | Michael Chen | +1 (555) 234-5678 | m.chen@example.com |\n| 3 | Emily Williams | +1 (555) 345-6789 | e.williams@example.com |\n| 4 | James Wilson | +1 (555) 456-7890 | j.wilson@example.com |\n\n[Integrate with Zapier](https://zapier.com/apps/firecrawl/integrations)\n\nSimple, transparent pricing\n\n## Pricing that scales with your business\n\nMonthly\n\nYearly\n\nSave 10%\\+ Get All Credits Upfront\n\n### Free\n\n$0\n\nOne-time\n\nTokens / year500,000\n\nRate limit10 per min\n\nSupportCommunity\n\nSign Up\n\n### Starter\n\n$89/mo\n\n$1,188/yr$1,068/yr(Billed annually)\n\nTokens / year18 million\n\nRate limit20 per min\n\nSupportEmail\n\nSubscribe\n\nAll credits granted upfront\n\nMost Popular 🔥\n\n### Explorer\n\n$359/mo\n\n$4,788/yr$4,308/yr(Billed annually)\n\nTokens / year84 million\n\nRate limit100 per min\n\nSupportSlack\n\nSubscribe\n\nAll credits granted upfront\n\nBest Value\n\n### Pro\n\n$719/mo\n\n$9,588/yr$8,628/yr(Billed annually)\n\nTokens / year192 million\n\nRate limit1000 per min\n\nSupportSlack + Priority\n\nSubscribe\n\nAll credits granted upfront\n\n### Enterprise\n\nCustom\n\nBilled annually\n\nTokens / yearNo limits\n\nRate limitCustom\n\nSupportCustom (SLA, dedicated engineer)\n\nTalk to us\n\nTokens / year\n\n500,000\n\n18 million\n\n84 million\n\n192 million\n\nNo limits\n\nRate limit\n\n10 per min\n\n20 per min\n\n100 per min\n\n1000 per min\n\nCustom\n\nSupport\n\nCommunity\n\nEmail\n\nSlack\n\nSlack + Priority\n\nCustom (SLA, dedicated engineer)\n\nAll requests have a base cost of 300 tokens + [output tokens](https://www.firecrawl.dev/pricing?extract-pricing=true#token-calculator)\n\n## Get started for free\n\n500K free tokens – no credit card required!\n\nFrom firecrawl.dev, get the pricing.\n\nTry for Free\n\nFAQ\n\n## Frequently Asked\n\nEverything you need to know about Extract's powerful web scraping capabilities\n\n### How much does Extract cost?\n\n### What is a token and how many do I need?\n\n### How does Extract handle JavaScript-heavy websites?\n\n### What programming languages and frameworks are supported?\n\n### How many pages can I process in a single API call?\n\n### How can I integrate Extract with my existing workflow?\n\n### Does Extract work with password-protected pages?\n\n### Can I schedule regular extractions for monitoring changes?\n\n### What happens if a website's structure changes?\n\n### How fresh is the extracted data?\n\n### Can Extract handle multiple languages and international websites?\n\n### Can I use Extract for competitor monitoring?\n\n### How does Extract handle dynamic content like prices or inventory?\n\n### Is Extract suitable for real-time data needs?\n\n/extract returns a JSON in your desired formatIntroducing /extract - Get web data with a prompt [Try now](https://www.firecrawl.dev/extract)\n\n[💥Get 2 months free with yearly plan](https://www.firecrawl.dev/pricing)\n\n# Turn websites into _LLM-ready_ data\n\nPower your AI apps with clean data crawled from any website. It's also open-source.\n\nStart for free (500 credits)Start for free\n\nA product by\n\n[![Mendable Logo](https://www.firecrawl.dev/images/mendable_logo_transparent.png)Mendable](https://mendable.ai/)\n\n![Example Webpage](https://www.firecrawl.dev/multiple-websites.png)\n\n## Crawl, Scrape, Clean\n\nWe crawl all accessible subpages and give you clean markdown for each. No sitemap required.\n\n```\n\n [\\\n {\\\n \"url\": \"https://www.firecrawl.dev/\",\\\n \"markdown\": \"## Welcome to Firecrawl\\\n Firecrawl is a web scraper that allows you to extract the content of a webpage.\"\\\n },\\\n {\\\n \"url\": \"https://www.firecrawl.dev/features\",\\\n \"markdown\": \"## Features\\\n Discover how Firecrawl's cutting-edge features can\\\n transform your data operations.\"\\\n },\\\n {\\\n \"url\": \"https://www.firecrawl.dev/pricing\",\\\n \"markdown\": \"## Pricing Plans\\\n Choose the perfect plan that fits your needs.\"\\\n },\\\n {\\\n \"url\": \"https://www.firecrawl.dev/about\",\\\n \"markdown\": \"## About Us\\\n Learn more about Firecrawl's mission and the\\\n team behind our innovative platform.\"\\\n }\\\n ]\n\n```\n\nNote: The markdown has been edited for display purposes.\n\n## Trusted by Top Companies\n\n[![Customer Logo](https://www.firecrawl.dev/logos/zapier.png)](https://www.zapier.com/)\n\n[![Customer Logo](https://www.firecrawl.dev/logos/gamma.svg)](https://gamma.app/)\n\n[![Customer Logo](https://www.firecrawl.dev/logos/nvidia-com.png)](https://www.nvidia.com/)\n\n[![Customer Logo](https://www.firecrawl.dev/logos/phmg.svg)](https://phmg.com/)\n\n[![Customer Logo](https://www.firecrawl.dev/logos/stackai.svg)](https://www.stack-ai.com/)\n\n[![Customer Logo](https://www.firecrawl.dev/logos/teller-io.svg)](https://www.teller.io/)\n\n[![Customer Logo](https://www.firecrawl.dev/logos/carrefour-c.svg)](https://www.carrefour.com/)\n\n[![Customer Logo](https://www.firecrawl.dev/logos/vendr.png)](https://www.vendr.com/)\n\n[![Customer Logo](https://www.firecrawl.dev/logos/open-gov-sg.png)](https://www.open.gov.sg/)\n\n[![Customer Logo](https://www.firecrawl.dev/logos/zapier.png)](https://www.zapier.com/)\n\n[![Customer Logo](https://www.firecrawl.dev/logos/gamma.svg)](https://gamma.app/)\n\n[![Customer Logo](https://www.firecrawl.dev/logos/nvidia-com.png)](https://www.nvidia.com/)\n\n[![Customer Logo](https://www.firecrawl.dev/logos/phmg.svg)](https://phmg.com/)\n\n[![Customer Logo](https://www.firecrawl.dev/logos/stackai.svg)](https://www.stack-ai.com/)\n\n[![Customer Logo](https://www.firecrawl.dev/logos/teller-io.svg)](https://www.teller.io/)\n\n[![Customer Logo](https://www.firecrawl.dev/logos/carrefour-c.svg)](https://www.carrefour.com/)\n\n[![Customer Logo](https://www.firecrawl.dev/logos/vendr.png)](https://www.vendr.com/)\n\n[![Customer Logo](https://www.firecrawl.dev/logos/open-gov-sg.png)](https://www.open.gov.sg/)\n\n[![Customer Logo](https://www.firecrawl.dev/logos/cyberagent-co-jp.svg)](https://www.cyberagent.co.jp/)\n\n[![Customer Logo](https://www.firecrawl.dev/logos/continue_dev.png)](https://continue.dev/)\n\n[![Customer Logo](https://www.firecrawl.dev/logos/bain-com.svg)](https://www.bain.com/)\n\n[![Customer Logo](https://www.firecrawl.dev/logos/jasperai.svg)](https://jasper.ai/)\n\n[![Customer Logo](https://www.firecrawl.dev/logos/palladiumdigital.jpg)](https://www.palladiumdigital.com/)\n\n[![Customer Logo](https://www.firecrawl.dev/logos/checkr.png)](https://www.checkr.com/)\n\n[![Customer Logo](https://www.firecrawl.dev/logos/jetbrains.png)](https://www.jetbrains.com/)\n\n[![Customer Logo](https://www.firecrawl.dev/logos/you_com.svg)](https://www.you.com/)\n\n[![Customer Logo](https://www.firecrawl.dev/logos/cyberagent-co-jp.svg)](https://www.cyberagent.co.jp/)\n\n[![Customer Logo](https://www.firecrawl.dev/logos/continue_dev.png)](https://continue.dev/)\n\n[![Customer Logo](https://www.firecrawl.dev/logos/bain-com.svg)](https://www.bain.com/)\n\n[![Customer Logo](https://www.firecrawl.dev/logos/jasperai.svg)](https://jasper.ai/)\n\n[![Customer Logo](https://www.firecrawl.dev/logos/palladiumdigital.jpg)](https://www.palladiumdigital.com/)\n\n[![Customer Logo](https://www.firecrawl.dev/logos/checkr.png)](https://www.checkr.com/)\n\n[![Customer Logo](https://www.firecrawl.dev/logos/jetbrains.png)](https://www.jetbrains.com/)\n\n[![Customer Logo](https://www.firecrawl.dev/logos/you_com.svg)](https://www.you.com/)\n\n## Integrate today\n\nEnhance your applications with top-tier web scraping and crawling capabilities.\n\n#### Scrape\n\nExtract markdown or structured data from websites quickly and efficiently.\n\n#### Crawling\n\nNavigate and retrieve data from all accessible subpages, even without a sitemap.\n\n```\n1\n```\n\n```\n2\n```\n\n```\n3\n```\n\n```\n4\n```\n\n```\n5\n```\n\n```\n6\n```\n\n```\n7\n```\n\n```\n8\n```\n\n```\n9\n```\n\n```\n10\n```\n\n```\n11\n```\n\n```\n12\n```\n\n```\n// npm install @mendable/firecrawl-js\n\nimport FirecrawlApp from '@mendable/firecrawl-js';\n\nconst app = new FirecrawlApp({ apiKey: \"fc-YOUR_API_KEY\" });\n\n// Scrape a website:\nconst scrapeResult = await app.scrapeUrl('firecrawl.dev');\n\nif (scrapeResult.success) {\n console.log(scrapeResult.markdown)\n}\n```\n\n#### Use well-known tools\n\nAlready fully integrated with the greatest existing tools and workflows.\n\n[![LlamaIndex](https://www.firecrawl.dev/logos/llamaindex.svg)](https://docs.llamaindex.ai/en/stable/examples/data_connectors/WebPageDemo/#using-firecrawl-reader/)[![Langchain](https://www.firecrawl.dev/integrations/langchain.png)](https://python.langchain.com/v0.2/docs/integrations/document_loaders/firecrawl/)[![Dify](https://www.firecrawl.dev/logos/dify.png)](https://dify.ai/blog/dify-ai-blog-integrated-with-firecrawl/)[![Dify](https://www.firecrawl.dev/integrations/langflow_2.png)](https://www.langflow.org/)[![Flowise](https://www.firecrawl.dev/integrations/flowise.png)](https://flowiseai.com/)[![CrewAI](https://www.firecrawl.dev/integrations/crewai.png)](https://crewai.com/)[![Camel AI](https://www.firecrawl.dev/integrations/camel-ai.png)](https://docs.camel-ai.org/cookbooks/ingest_data_from_websites_with_Firecrawl.html)\n\n#### Start for free, scale easily\n\nKick off your journey for free and scale seamlessly as your project expands.\n\n[Try it out](https://www.firecrawl.dev/signin/signup)\n\n#### Open-source\n\nDeveloped transparently and collaboratively. Join our community of contributors.\n\n[Check out our repo](https://github.com/mendableai/firecrawl)\n\n## We handle the hard stuff\n\nRotating proxies, orchestration, rate limits, js-blocked content and more\n\n#### Crawling\n\nFirecrawl crawls all accessible subpages, even without a sitemap.\n\n#### Dynamic content\n\nFirecrawl gathers data even if a website uses javascript to render content.\n\n#### To Markdown\n\nFirecrawl returns clean, well-formatted markdown - ready for use in LLM applications\n\n#### Reliability first\n\nReliability is our core focus. Firecrawl is designed to ensure you get all the data you need.\n\n#### No Caching\n\nFirecrawl doesn't cache content by default. You always get the latest data.\n\n#### Built for AI\n\nBuilt by LLM engineers, for LLM engineers. Giving you clean data the way you want it.\n\n#### Smart Wait\n\nFirecrawl can intelligently wait for content to load, making scraping faster and more reliable.\n\n#### Actions\n\nClick, scroll, write, wait, press and more before extracting content.\n\n#### Media Parsing\n\nFirecrawl can parse and output clean content from web hosted pdfs, docx, images and more.\n\nOur wall of love\n\n## Don't take our word for it\n\n![Morgan Linton](https://www.firecrawl.dev/_next/image?url=%2F_next%2Fstatic%2Fmedia%2Ftestimonial-02.4585097e.jpg&w=96&q=75)\n\nMorgan Linton\n\n[@morganlinton](https://x.com/morganlinton/status/1839454165703204955)\n\nIf you're coding with AI, and haven't discovered @firecrawl\\_dev yet, prepare to have your mind blown 🤯\n\n![Chris DeWeese](https://www.firecrawl.dev/_next/image?url=%2F_next%2Fstatic%2Fmedia%2Ftestimonial-09.a66dcc26.jpg&w=96&q=75)\n\nChris DeWeese\n\n[@ChrisDevApps](https://x.com/ChrisDevApps/status/1853587120406876601)\n\nStarted using @firecrawl\\_dev for a project, I wish I used this sooner.\n\n![Bardia Pourvakil](https://www.firecrawl.dev/_next/image?url=%2F_next%2Fstatic%2Fmedia%2Ftestimonial-01.025350bc.jpeg&w=96&q=75)\n\nBardia Pourvakil\n\n[@thepericulum](https://twitter.com/thepericulum/status/1781397799487078874)\n\nThe Firecrawl team ships. I wanted types for their node SDK, and less than an hour later, I got them.\n\n![Tom Reppelin](https://www.firecrawl.dev/_next/image?url=%2F_next%2Fstatic%2Fmedia%2Ftestimonial-04.1f2b4caf.jpg&w=96&q=75)\n\nTom Reppelin\n\n[@TomReppelin](https://x.com/TomReppelin/status/1844382491014201613)\n\nI found gold today. Thank you @firecrawl\\_dev\n\n![latentsauce 🧘🏽](https://www.firecrawl.dev/_next/image?url=%2F_next%2Fstatic%2Fmedia%2Ftestimonial-07.c2285d35.jpeg&w=96&q=75)\n\nlatentsauce 🧘🏽\n\n[@latentsauce](https://twitter.com/latentsauce/status/1781738253927735331)\n\nFirecrawl simplifies data preparation significantly, exactly what I was hoping for. Thank you Firecrawl ❤️❤️❤️\n\n![Morgan Linton](https://www.firecrawl.dev/_next/image?url=%2F_next%2Fstatic%2Fmedia%2Ftestimonial-02.4585097e.jpg&w=96&q=75)\n\nMorgan Linton\n\n[@morganlinton](https://x.com/morganlinton/status/1839454165703204955)\n\nIf you're coding with AI, and haven't discovered @firecrawl\\_dev yet, prepare to have your mind blown 🤯\n\n![Chris DeWeese](https://www.firecrawl.dev/_next/image?url=%2F_next%2Fstatic%2Fmedia%2Ftestimonial-09.a66dcc26.jpg&w=96&q=75)\n\nChris DeWeese\n\n[@ChrisDevApps](https://x.com/ChrisDevApps/status/1853587120406876601)\n\nStarted using @firecrawl\\_dev for a project, I wish I used this sooner.\n\n![Bardia Pourvakil](https://www.firecrawl.dev/_next/image?url=%2F_next%2Fstatic%2Fmedia%2Ftestimonial-01.025350bc.jpeg&w=96&q=75)\n\nBardia Pourvakil\n\n[@thepericulum](https://twitter.com/thepericulum/status/1781397799487078874)\n\nThe Firecrawl team ships. I wanted types for their node SDK, and less than an hour later, I got them.\n\n![Tom Reppelin](https://www.firecrawl.dev/_next/image?url=%2F_next%2Fstatic%2Fmedia%2Ftestimonial-04.1f2b4caf.jpg&w=96&q=75)\n\nTom Reppelin\n\n[@TomReppelin](https://x.com/TomReppelin/status/1844382491014201613)\n\nI found gold today. Thank you @firecrawl\\_dev\n\n![latentsauce 🧘🏽](https://www.firecrawl.dev/_next/image?url=%2F_next%2Fstatic%2Fmedia%2Ftestimonial-07.c2285d35.jpeg&w=96&q=75)\n\nlatentsauce 🧘🏽\n\n[@latentsauce](https://twitter.com/latentsauce/status/1781738253927735331)\n\nFirecrawl simplifies data preparation significantly, exactly what I was hoping for. Thank you Firecrawl ❤️❤️❤️\n\n![Michael Ning](https://www.firecrawl.dev/_next/image?url=%2F_next%2Fstatic%2Fmedia%2Ftestimonial-05.76d7cd3e.png&w=96&q=75)\n\nMichael Ning\n\nFirecrawl is impressive, saving us 2/3 the tokens and allowing gpt3.5turbo use over gpt4. Major savings in time and money.\n\n![Alex Reibman 🖇️](https://www.firecrawl.dev/_next/image?url=%2F_next%2Fstatic%2Fmedia%2Ftestimonial-06.4ee7cf5a.jpeg&w=96&q=75)\n\nAlex Reibman 🖇️\n\n[@AlexReibman](https://twitter.com/AlexReibman/status/1780299595484131836)\n\nMoved our internal agent's web scraping tool from Apify to Firecrawl because it benchmarked 50x faster with AgentOps.\n\n![Alex Fazio](https://www.firecrawl.dev/_next/image?url=%2F_next%2Fstatic%2Fmedia%2Ftestimonial-08.af684f18.jpg&w=96&q=75)\n\nAlex Fazio\n\n[@alxfazio](https://x.com/alxfazio/status/1826731977283641615)\n\nSemantic scraping with Firecrawl is 🔥!\n\n![Matt Busigin](https://www.firecrawl.dev/_next/image?url=%2F_next%2Fstatic%2Fmedia%2Ftestimonial-03.6f60bcb9.jpg&w=96&q=75)\n\nMatt Busigin\n\n[@mbusigin](https://x.com/mbusigin/status/1836065372010656069)\n\nFirecrawl is dope. Congrats guys 👏\n\n![Michael Ning](https://www.firecrawl.dev/_next/image?url=%2F_next%2Fstatic%2Fmedia%2Ftestimonial-05.76d7cd3e.png&w=96&q=75)\n\nMichael Ning\n\nFirecrawl is impressive, saving us 2/3 the tokens and allowing gpt3.5turbo use over gpt4. Major savings in time and money.\n\n![Alex Reibman 🖇️](https://www.firecrawl.dev/_next/image?url=%2F_next%2Fstatic%2Fmedia%2Ftestimonial-06.4ee7cf5a.jpeg&w=96&q=75)\n\nAlex Reibman 🖇️\n\n[@AlexReibman](https://twitter.com/AlexReibman/status/1780299595484131836)\n\nMoved our internal agent's web scraping tool from Apify to Firecrawl because it benchmarked 50x faster with AgentOps.\n\n![Alex Fazio](https://www.firecrawl.dev/_next/image?url=%2F_next%2Fstatic%2Fmedia%2Ftestimonial-08.af684f18.jpg&w=96&q=75)\n\nAlex Fazio\n\n[@alxfazio](https://x.com/alxfazio/status/1826731977283641615)\n\nSemantic scraping with Firecrawl is 🔥!\n\n![Matt Busigin](https://www.firecrawl.dev/_next/image?url=%2F_next%2Fstatic%2Fmedia%2Ftestimonial-03.6f60bcb9.jpg&w=96&q=75)\n\nMatt Busigin\n\n[@mbusigin](https://x.com/mbusigin/status/1836065372010656069)\n\nFirecrawl is dope. Congrats guys 👏\n\n## Flexible Pricing\n\nStart for free, then scale as you grow\n\nStandard [Extract](https://www.firecrawl.dev/extract#pricing)\n\nMonthly\n\nYearly\n\n20% off\\- 2 months free\n\n## Free Plan\n\n500 credits\n\n$0 one-time\n\nNo credit card requiredGet Started\n\n- Scrape 500 pages\n- 10 /scrape per min\n- 1 /crawl per min\n\n## Hobby\n\n3,000 creditsper month\n\n$16/month\n\n$228/yr$190/yr(Billed annually)\n\nSubscribe$190/yr\n\n- Scrape 3,000 pages\\*\n- 20 /scrape per min\n- 3 /crawl per min\n- 1 seat\n\n## StandardMost Popular\n\n100,000 creditsper month\n\n$83/month\n\n$1188/yr$990/yr(Billed annually)\n\nSubscribe$990/yr\n\n- Scrape 100,000 pages\\*\n- 100 /scrape per min\n- 10 /crawl per min\n- 3 seats\n- Standard Support\n\n## Growth\n\n500,000 creditsper month\n\n$333/month\n\n$4788/yr$3990/yr(Billed annually)\n\nSubscribe$3990/yr\n\n- Scrape 500,000 pages\\*\n- 1000 /scrape per min\n- 50 /crawl per min\n- 5 seats\n- Priority Support\n\n## Add-ons\n\n### Auto Recharge Credits\n\nAutomatically recharge your credits when you run low.\n\n$11per 1000 credits\n\nEnable Auto Recharge\n\nSubscribe to a plan to enable auto recharge\n\n### Credit Pack\n\nPurchase a pack of additional monthly credits.\n\n$9/mo for 1000 credits\n\nPurchase Credit Pack\n\nSubscribe to a plan to purchase credit packs\n\n## Enterprise Plan\n\nUnlimited credits. Custom RPMs.\n\nTalk to us\n\n- Bulk discounts\n- Top priority support\n- Custom concurrency limits\n- Feature Acceleration\n- SLAs\n- Custom seats\n\n\\\\* a /scrape refers to the [scrape](https://docs.firecrawl.dev/api-reference/endpoint/scrape) API endpoint. Structured extraction costs vary. See [credits table](https://www.firecrawl.dev/pricing#credits).\n\n\\\\* a /crawl refers to the [crawl](https://docs.firecrawl.dev/api-reference/endpoint/crawl) API endpoint.\n\n## API Credits\n\nCredits are consumed for each API request, varying by endpoint and feature.\n\n| Features | Credits |\n| --- | --- |\n| Scrape(/scrape) | 1 / page |\n| with JSON format | 5 / page |\n| Crawl(/crawl) | 1 / page |\n| Map(/map) | 1 / call |\n| Search(/search) | 1 / page |\n| Extract(/extract) | New [Separate Pricing](https://www.firecrawl.dev/extract#pricing) |\n\n[🔥](https://www.firecrawl.dev/)\n\n## Ready to _Build?_\n\nStart scraping web data for your AI apps today.\n\nNo credit card needed.\n\nGet Started\n\n## FAQ\n\nFrequently asked questions about Firecrawl\n\n#### General\n\nWhat is Firecrawl?\n\nFirecrawl turns entire websites into clean, LLM-ready markdown or structured data. Scrape, crawl and extract the web with a single API. Ideal for AI companies looking to empower their LLM applications with web data.\n\nWhat sites work?\n\nFirecrawl is best suited for business websites, docs and help centers. We currently don't support social media platforms.\n\nWho can benefit from using Firecrawl?\n\nFirecrawl is tailored for LLM engineers, data scientists, AI researchers, and developers looking to harness web data for training machine learning models, market research, content aggregation, and more. It simplifies the data preparation process, allowing professionals to focus on insights and model development.\n\nIs Firecrawl open-source?\n\nYes, it is. You can check out the repository on GitHub. Keep in mind that this repository is currently in its early stages of development. We are in the process of merging custom modules into this mono repository.\n\nWhat is the difference between Firecrawl and other web scrapers?\n\nFirecrawl is designed with reliability and AI-ready data in mind. We focus on delivering data reliably and in a LLM-ready format - so you can spend less tokens and build better AI applications.\n\nWhat is the difference between the open-source version and the hosted version?\n\nFirecrawl's hosted version features Fire-engine which is our proprietary scraper that takes care of proxies, anti-bot mechanisms and more. It is an intelligent scraper designed to get the data you need - reliably. The hosted version also allows for actions (interacting with the page before scraping), a dashboard for analytics, and it is 1 API call away.\n\n#### Scraping & Crawling\n\nHow does Firecrawl handle dynamic content on websites?\n\nUnlike traditional web scrapers, Firecrawl is equipped to handle dynamic content rendered with JavaScript. It ensures comprehensive data collection from all accessible subpages, making it a reliable tool for scraping websites that rely heavily on JS for content delivery.\n\nWhy is it not crawling all the pages?\n\nThere are a few reasons why Firecrawl may not be able to crawl all the pages of a website. Some common reasons include rate limiting, and anti-scraping mechanisms, disallowing the crawler from accessing certain pages. If you're experiencing issues with the crawler, please reach out to our support team at help@firecrawl.com.\n\nCan Firecrawl crawl websites without a sitemap?\n\nYes, Firecrawl can access and crawl all accessible subpages of a website, even in the absence of a sitemap. This feature enables users to gather data from a wide array of web sources with minimal setup.\n\nWhat formats can Firecrawl convert web data into?\n\nFirecrawl specializes in converting web data into clean, well-formatted markdown. This format is particularly suited for LLM applications, offering a structured yet flexible way to represent web content.\n\nHow does Firecrawl ensure the cleanliness of the data?\n\nFirecrawl employs advanced algorithms to clean and structure the scraped data, removing unnecessary elements and formatting the content into readable markdown. This process ensures that the data is ready for use in LLM applications without further preprocessing.\n\nIs Firecrawl suitable for large-scale data scraping projects?\n\nAbsolutely. Firecrawl offers various pricing plans, including a Scale plan that supports scraping of millions of pages. With features like caching and scheduled syncs, it's designed to efficiently handle large-scale data scraping and continuous updates, making it ideal for enterprises and large projects.\n\nDoes it respect robots.txt?\n\nYes, Firecrawl crawler respects the rules set in a website's robots.txt file. If you notice any issues with the way Firecrawl interacts with your website, you can adjust the robots.txt file to control the crawler's behavior. Firecrawl user agent name is 'FirecrawlAgent'. If you notice any behavior that is not expected, please let us know at help@firecrawl.com.\n\nWhat measures does Firecrawl take to handle web scraping challenges like rate limits and caching?\n\nFirecrawl is built to navigate common web scraping challenges, including stealth proxies, rate limits, and smart wait. It smartly manages requests and employs techniques to minimize bandwidth usage and avoid triggering anti-scraping mechanisms, ensuring reliable data collection.\n\nDoes Firecrawl handle captcha or authentication?\n\nFirecrawl avoids captcha by using stealth proxies. When it encounters captcha, it attempts to solve it automatically, but this is not always possible. We are working to add support for more captcha solving methods. Firecrawl can handle authentication by providing auth headers to the API.\n\n#### API Related\n\nWhere can I find my API key?\n\nClick on the dashboard button on the top navigation menu when logged in and you will find your API key in the main screen and under API Keys.\n\n#### Billing\n\nIs Firecrawl free?\n\nFirecrawl is free for the first 500 scraped pages (500 free credits). After that, you can upgrade to our Standard or Growth plans for more credits and higher rate limits.\n\nIs there a pay per use plan instead of monthly?\n\nWe currently do not offer a pay per use plan, instead you can upgrade to our Standard or Growth plans for more credits and higher rate limits.\n\nHow many credits do scraping, crawling, and extraction cost?\n\nScraping costs 1 credit per page. Crawling costs 1 credit per page. Check out the credits table in the pricing page for more details.\n\nDo you charge for failed requests (scrape, crawl, extract)?\n\nWe do not charge for any failed requests (scrape, crawl, extract). Please contact support at help@firecrawl.com if you have notice something wrong.\n\nWhat payment methods do you accept?\n\nWe accept payments through Stripe which accepts most major credit cards, debit cards, and PayPal.Introducing /extract - Get web data with a prompt [Try now](https://www.firecrawl.dev/extract)\n\n## ChangelogNew\n\n- Jan 24, 2025\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n## Extract Improvements - v1.4.1\n\n\n\n\n\nWe’ve significantly enhanced our data extraction capabilities with several key updates:\n\n\n\n- Extract now returns a lot more data\n- Improved infrastructure reliability\n- Migrated from Cheerio to a high-performance Rust-based parser for faster and more memory-efficient parsing\n- Enhanced crawl cancellation functionality for better control over running jobs\n\n- Jan 7, 2025\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n## /extract changes\n\n\n\n\n\nWe have updated the `/extract` endpoint to now be asynchronous. When you make a request to `/extract`, it will return an ID that you can use to check the status of your extract job. If you are using our SDKs, there are no changes required to your code, but please make sure to update the SDKs to the latest versions as soon as possible.\n\n\n\nFor those using the API directly, we have made it backwards compatible. However, you have 10 days to update your implementation to the new asynchronous model.\n\n\n\nFor more details about the parameters, refer to the docs sent to you.\n\n- Jan 3, 2025\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n## v1.2.0\n\n\n\n\n\n\n\n### Introducing /v1/search\n\n\n\n\n\nThe search endpoint combines web search with Firecrawl’s scraping capabilities to return full page content for any query.\n\n\n\nInclude `scrapeOptions` with `formats: [\"markdown\"]` to get complete markdown content for each search result otherwise it defaults to getting SERP results (url, title, description).\n\n\n\nMore info here: [v1/search docs](https://docs.firecrawl.dev/api-reference/endpoint/search)\n\n\n\n\n\n### Fixes and improvements\n\n\n\n\n\n- Fixed LLM not following the schema in the python SDK for `/extract`\n- Fixed schema json not being able to be sent to the `/extract` endpoint through the Node SDK\n- Prompt is now optional for the `/extract` endpoint\n- Our fork of [MinerU](https://github.com/mendableai/mineru-api) is now default for PDF Parsing\n\n- Dec 27, 2024\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n## v1.1.0\n\n\n\n\n\n\n\n### Changelog Highlights\n\n\n\n\n\n#### Feature Enhancements\n\n\n\n- **New Features**:\n - Geolocation, mobile scraping, 4x faster parsing, better webhooks,\n - Credit packs, auto-recharges and batch scraping support.\n - Iframe support and query parameter differentiation for URLs.\n - Similar URL deduplication.\n - Enhanced map ranking and sitemap fetching.\n\n#### Performance Improvements\n\n- Faster crawl status filtering and improved map ranking algorithm.\n- Optimized Kubernetes setup and simplified build processes.\n- Sitemap discoverability and performance improved\n\n#### Bug Fixes\n\n- Resolved issues:\n - Badly formatted JSON, scrolling actions, and encoding errors.\n - Crawl limits, relative URLs, and missing error handlers.\n- Fixed self-hosted crawling inconsistencies and schema errors.\n\n#### SDK Updates\n\n- Added dynamic WebSocket imports with fallback support.\n- Optional API keys for self-hosted instances.\n- Improved error handling across SDKs.\n\n#### Documentation Updates\n\n- Improved API docs and examples.\n- Updated self-hosting URLs and added Kubernetes optimizations.\n- Added articles: mastering `/scrape` and `/crawl`.\n\n#### Miscellaneous\n\n- Added new Firecrawl examples\n- Enhanced metadata handling for webhooks and improved sitemap fetching.\n- Updated blocklist and streamlined error messages.\n\n- Oct 28, 2024\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n![Batch Scrape](https://www.firecrawl.dev/images/blog/firecrawl-batch-scrape.jpg)\n\n\n\n\n\n## Introducing Batch Scrape\n\n\n\n\n\nYou can now scrape multiple URLs simultaneously with our new Batch Scrape endpoint.\n\n\n\n- Read more about the Batch Scrape endpoint [here](https://www.firecrawl.dev/blog/launch-week-ii-day-1-introducing-batch-scrape-endpoint).\n- Python SDK (1.4.x) and Node SDK (1.7.x) updated with batch scrape support.\n\n- Oct 10, 2024\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n## Cancel Crawl in the SDKs, More Examples, Improved Speed\n\n\n\n\n\n- Added crawl cancellation support for the Python SDK (1.3.x) and Node SDK (1.6.x)\n- OpenAI Voice + Firecrawl example added to the repo\n- CRM lead enrichment example added to the repo\n- Improved our Docker images\n- Limit and timeout fixes for the self hosted playwright scraper\n- Improved speed of all scrapes\n\n- Sep 27, 2024\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n## Fixes + Improvements (no version bump)\n\n\n\n\n\n- Fixed 500 errors that would happen often in some crawled websites and when servers were at capacity\n- Fixed an issue where v1 crawl status wouldn’t properly return pages over 10mb\n- Fixed an issue where `screenshot` would return undefined\n- Push improvements that reduce speed times when a scraper fails\n\n- Sep 24, 2024\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n![Actions](https://www.firecrawl.dev/images/actions.png)\n\n\n\n\n\n## Introducing Actions\n\n\n\n\n\nInteract with pages before extracting data, unlocking more data from every site!\n\n\n\nFirecrawl now allows you to perform various actions on a web page before scraping its content. This is particularly useful for interacting with dynamic content, navigating through pages, or accessing content that requires user interaction.\n\n\n\n- Version 1.5.x of the Node SDK now supports type-safe Actions.\n- Actions are now available in the REST API and Python SDK (no version bumps required!).\n\nHere is a python example of how to use actions to navigate to google.com, search for Firecrawl, click on the first result, and take a screenshot.\n\n```python\nfrom firecrawl import FirecrawlApp\n\napp = FirecrawlApp(api_key=\"fc-YOUR_API_KEY\")\n\n# Scrape a website:\nscrape_result = app.scrape_url('firecrawl.dev',\n params={\n 'formats': ['markdown', 'html'],\n 'actions': [\\\n {\"type\": \"wait\", \"milliseconds\": 2000},\\\n {\"type\": \"click\", \"selector\": \"textarea[title=\\\"Search\\\"]\"},\\\n {\"type\": \"wait\", \"milliseconds\": 2000},\\\n {\"type\": \"write\", \"text\": \"firecrawl\"},\\\n {\"type\": \"wait\", \"milliseconds\": 2000},\\\n {\"type\": \"press\", \"key\": \"ENTER\"},\\\n {\"type\": \"wait\", \"milliseconds\": 3000},\\\n {\"type\": \"click\", \"selector\": \"h3\"},\\\n {\"type\": \"wait\", \"milliseconds\": 3000},\\\n {\"type\": \"screenshot\"}\\\n ]\n }\n)\nprint(scrape_result)\n\n```\n\nFor more examples, check out our [API Reference](https://docs.firecrawl.dev/api-reference/endpoint/scrape).\n\n- Sep 23, 2024\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n![Firecrawl E2E Type Safe LLM Extract](https://www.firecrawl.dev/images/newllmextract.jpeg)\n\n\n\n\n\n## Mid-September Updates\n\n\n\n\n\n\n\n### Typesafe LLM Extract\n\n\n\n\n\n- E2E Type Safety for LLM Extract in Node SDK version 1.5.x.\n- 10x cheaper in the cloud version. From 50 to 5 credits per extract.\n- Improved speed and reliability.\n\n### Rust SDK v1.0.0\n\n- Rust SDK v1 is finally here! Check it out [here](https://crates.io/crates/firecrawl/1.0.0).\n\n### Map Improved Limits\n\n- Map smart results limits increased from 100 to 1000.\n\n### Faster scrape\n\n- Scrape speed improved by 200ms-600ms depending on the website.\n\n### Launching changelog\n\n- For now on, for every new release, we will be creating a changelog entry here.\n\n### Improvements\n\n- Lots of improvements pushed to the infra and API. For all Mid-September changes, refer to the commits [here](https://github.com/mendableai/firecrawl/commits/main/).\n\n- Sep 8, 2024\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n## September 8, 2024\n\n\n\n\n\n\n\n### Patch Notes (No version bump)\n\n\n\n\n\n- Fixed an issue where some of the custom header params were not properly being set in v1 API. You can now pass headers to your requests just fine.\n\n- Aug 29, 2024\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n![Firecrawl V1](https://www.firecrawl.dev/images/blog/f-v1-changelog.png)\n\n\n\n\n\n## Firecrawl V1 is here! With that we introduce a more reliable and developer friendly API.\n\n\n\n\n\n\n\n### Here is what’s new:\n\n\n\n\n\n- Output Formats for /scrape: Choose what formats you want your output in.\n- New /map endpoint: Get most of the URLs of a webpage.\n- Developer friendly API for /crawl/id status.\n- 2x Rate Limits for all plans.\n- Go SDK and Rust SDK.\n- Teams support.\n- API Key Management in the dashboard.\n- onlyMainContent is now default to true.\n- /crawl webhooks and websocket support.\n\nLearn more about it [here](https://docs.firecrawl.dev/v1).\n\nStart using v1 right away at [https://firecrawl.dev](https://firecrawl.dev/)" 4 | } -------------------------------------------------------------------------------- /app/(home)/layout.tsx: -------------------------------------------------------------------------------- 1 | export default function Layout({ children }: { children: React.ReactNode }) { 2 | return
{children}
; 3 | } 4 | -------------------------------------------------------------------------------- /app/(home)/page.tsx: -------------------------------------------------------------------------------- 1 | "use client"; 2 | 3 | import { Button } from "@/components/ui/button"; 4 | import { Label } from "@/components/ui/label"; 5 | import { Switch } from "@/components/ui/switch"; 6 | import { cn } from "@/lib/utils"; 7 | import { CN_SMOOTH_SHADOW } from "./constants"; 8 | import React, { useEffect, useState } from "react"; 9 | import { Input } from "@/components/ui/input"; 10 | import { 11 | Credenza, 12 | CredenzaContent, 13 | CredenzaHeader, 14 | CredenzaTitle, 15 | CredenzaDescription, 16 | CredenzaBody, 17 | CredenzaFooter, 18 | CredenzaClose, 19 | } from "@/components/ui/credenza"; 20 | import { CircleHelp, ExternalLinkIcon, Loader2 } from "lucide-react"; 21 | import { dataMock } from "./data-mock"; 22 | import { toast } from "@/hooks/use-toast"; 23 | import { 24 | Popover, 25 | PopoverTrigger, 26 | PopoverContent, 27 | } from "@radix-ui/react-popover"; 28 | 29 | export default function Page() { 30 | const [isModalOpen, setIsModalOpen] = React.useState(false); 31 | const prevIsModalOpen = usePrevious(isModalOpen); 32 | const [wantsFull, setWantsFull] = React.useState(false); 33 | const [firecrawlKey, setFirecrawlKey] = React.useState(""); 34 | 35 | const [url, setUrl] = React.useState(""); 36 | 37 | const hasKey = firecrawlKey.length > 0; 38 | const isFull = wantsFull && hasKey; 39 | 40 | const [loading, setLoading] = useState(false); 41 | const [mapUrls, setMapUrls] = useState([]); 42 | const [scrapingStatus, setScrapingStatus] = useState(""); 43 | const [apiCallStatus, setApiCallStatus] = useState(""); 44 | 45 | const [finalMessage, setFinalMessage] = useState<{ 46 | fullMessage: string; 47 | message: string; 48 | isFull: boolean; 49 | } | null>( 50 | // Mocked data 51 | // { 52 | // fullMessage: dataMock.fullApiMessage, 53 | // message: dataMock.apiMessage, 54 | // isFull: false, 55 | // } 56 | null 57 | ); 58 | 59 | useEffect(() => { 60 | let interval: NodeJS.Timeout; 61 | if (loading && mapUrls.length > 0) { 62 | let index = 0; 63 | const messages = [ 64 | (url: string) => `Scraping URL: ${url}`, 65 | (url: string) => `Extracting Title for URL: ${url}`, 66 | (url: string) => `Extracting Description for URL: ${url}`, 67 | (url: string) => `Adding URL to llms.txt: ${url}`, 68 | ]; 69 | interval = setInterval(() => { 70 | const currentUrl = mapUrls[index]; 71 | setScrapingStatus(messages[index % messages.length](currentUrl)); 72 | index = (index + 1) % mapUrls.length; 73 | }, 750); 74 | } else { 75 | setScrapingStatus(""); 76 | } 77 | return () => clearInterval(interval); 78 | }, [loading, mapUrls]); 79 | 80 | const callApi = React.useCallback(async () => { 81 | const isFull = wantsFull && hasKey; 82 | const formattedUrl = url.toLowerCase(); 83 | 84 | setLoading(true); 85 | try { 86 | const mapResponse = await fetch("/api/map", { 87 | method: "POST", 88 | headers: { 89 | "Content-Type": "application/json", 90 | }, 91 | body: JSON.stringify({ 92 | url: formattedUrl, 93 | bringYourOwnFirecrawlApiKey: firecrawlKey, 94 | }), 95 | }); 96 | const mapData = await mapResponse.json(); 97 | setMapUrls(mapData.mapUrls); 98 | const llmsResponse = await fetch("/api/service", { 99 | method: "POST", 100 | headers: { 101 | "Content-Type": "application/json", 102 | }, 103 | body: JSON.stringify({ 104 | url: formattedUrl, 105 | urls: mapData.mapUrls, 106 | bringYourOwnFirecrawlApiKey: firecrawlKey, 107 | }), 108 | }); 109 | const data = await llmsResponse.json(); 110 | setFinalMessage({ 111 | fullMessage: data.llmsFulltxt, 112 | message: data.llmstxt, 113 | isFull, 114 | }); 115 | } catch (error) { 116 | setFinalMessage(null); 117 | toast({ 118 | title: "Error", 119 | description: "Something went wrong, please try again later", 120 | }); 121 | } finally { 122 | setLoading(false); 123 | } 124 | }, [url, wantsFull, hasKey, firecrawlKey]); 125 | 126 | const handleSubmit = React.useCallback( 127 | (e: React.FormEvent) => { 128 | e.preventDefault(); 129 | 130 | let inferredUrl = url; 131 | if (!inferredUrl.startsWith("http") && !inferredUrl.startsWith("https")) { 132 | inferredUrl = `https://${inferredUrl}`; 133 | } 134 | 135 | if (!inferredUrl) { 136 | toast({ 137 | title: "Error", 138 | description: "Please enter a URL", 139 | }); 140 | return; 141 | } 142 | 143 | try { 144 | new URL(inferredUrl); 145 | } catch { 146 | toast({ 147 | title: "Error", 148 | description: "Please enter a valid URL", 149 | }); 150 | return; 151 | } 152 | 153 | callApi(); 154 | }, 155 | [url,callApi] 156 | ); 157 | 158 | const [ 159 | retryWhenModalClosesWithFilledKey, 160 | setRetryWhenModalClosesWithFilledKey, 161 | ] = useState(false); 162 | React.useEffect(() => { 163 | if ( 164 | prevIsModalOpen && 165 | !isModalOpen && 166 | hasKey && 167 | retryWhenModalClosesWithFilledKey 168 | ) { 169 | setRetryWhenModalClosesWithFilledKey(false); 170 | callApi(); 171 | } 172 | }, [prevIsModalOpen, isModalOpen, hasKey, retryWhenModalClosesWithFilledKey]); 173 | const retryWithFullGeneration = React.useCallback(() => { 174 | setIsModalOpen(true); 175 | setWantsFull(true); 176 | 177 | setRetryWhenModalClosesWithFilledKey(true); 178 | }, []); 179 | 180 | const canSubmit = (!loading && !url) || loading; 181 | 182 | return ( 183 | 184 |
185 |

186 | LLMs.txt generatorv2 187 |

188 |

189 | Generate consolidated text files from websites for LLM training and 190 | inference – Powered by{" "} 191 | 192 | Firecrawl 🔥 193 | 194 |

195 | 196 |
204 | setUrl(e.target.value)} 209 | /> 210 | 211 | {/* Action Bar */} 212 |
213 | {/* Left */} 214 |
215 | 216 | {/* Right */} 217 |
218 |
219 | { 224 | if (willCheck) { 225 | setIsModalOpen(true); 226 | setWantsFull(true); 227 | } else { 228 | setWantsFull(false); 229 | } 230 | }} 231 | /> 232 | 233 |
234 | 238 |
239 |
240 |
241 | 242 | 250 | 251 |
252 |
253 |
254 | {(!finalMessage || loading) &&
255 |
256 | {!loading && ( 257 | <> 258 |

Please provide a URL to generate a llms.txt file.

259 |
260 |

261 | For a better experience, use an API key from{" "} 262 | 267 | Firecrawl 🔥 268 | 269 | . 270 |

271 |
272 |

273 | You can also call llms.txt it via{" "} 274 | 275 | 276 | 277 | API 278 | 279 | 280 | 281 |

282 |
283 |

284 | Access llms.txt via API by going to: 285 |

286 | 287 | http://llmstxt.firecrawl.dev/{"{YOUR_URL}"} 288 | 289 |
290 |
291 |

292 | For full results, add your Firecrawl API key: 293 |

294 | 295 | ?FIRECRAWL_API_KEY=YOUR_API_KEY 296 | 297 |
298 |
299 | 300 | 301 |

302 | 303 | )} 304 | {loading && ( 305 | <> 306 |

307 | {loading && !scrapingStatus && !apiCallStatus && } 308 | {loading && scrapingStatus && <>{scrapingStatus}} 309 | {apiCallStatus && <>{apiCallStatus}} 310 |

311 | 312 | )} 313 |
314 |
} 315 | 316 | {!loading && finalMessage && ( 317 |
318 | {finalMessage.isFull 319 | ? finalMessage.fullMessage 320 | : finalMessage.message} 321 | {!hasKey && ( 322 |
323 |
324 | For full results get a 325 | 331 | {" "} 332 | free Firecrawl key 🔥 333 | 334 |
335 |
336 | )} 337 |
338 | )} 339 |
340 | 341 | {finalMessage && !loading && ( 342 |
343 | 356 | 357 | {!hasKey && ( 358 | 361 | )} 362 |
363 | )} 364 |
365 |
366 |
367 | 368 | { 371 | setIsModalOpen(val); 372 | if (!val) { 373 | if (firecrawlKey.length === 0) { 374 | toast({ 375 | title: "Going normal mode", 376 | description: "Full generation requires an API key.", 377 | }); 378 | } 379 | } 380 | }} 381 | > 382 | 383 |
{ 384 | e.preventDefault(); 385 | setIsModalOpen(false); 386 | }} className="flex flex-col sm:gap-4"> 387 | 388 | Enable full generation 389 | 390 | Please enter your Firecrawl API key to enable the full 391 | generation feature. 392 | 393 | 394 | 395 |
396 | setFirecrawlKey(e.target.value)} 402 | /> 403 | 408 | Don't have a key? Create Firecrawl account{" "} 409 | 410 | 411 |
412 |
413 | 414 | 415 | 416 | 417 | 418 |
419 |
420 |
421 |
422 | ); 423 | } 424 | 425 | const PageContainer = React.forwardRef< 426 | HTMLDivElement, 427 | React.ComponentPropsWithoutRef<"div"> 428 | >(function PageContainer({ className, ...props }, ref) { 429 | return ( 430 |
438 | ); 439 | }); 440 | 441 | const Results = React.forwardRef< 442 | HTMLDivElement, 443 | React.ComponentPropsWithoutRef<"div"> 444 | >(function Results({ className, ...props }, ref) { 445 | return ( 446 |
454 | ); 455 | }); 456 | 457 | const CN_LINK = `text-primary hover:text-primary/80 transition-colors`; 458 | 459 | function usePrevious(value: T) { 460 | const ref = React.useRef(value); 461 | useEffect(() => { 462 | ref.current = value; 463 | }, [value]); 464 | return ref.current; 465 | } 466 | 467 | const API_INFO = `You can access llms.txt via API by simply going to 'http://llmstxt.firecrawl.dev/{YOUR_URL}' or llms-full.txt via API with 'http://llmstxt.firecrawl.dev/{YOUR_URL}/full'. If you have a Firecrawl API key, you can use it by adding '?FIRECRAWL_API_KEY=YOUR_API_KEY' to the end of the URL for full results.`; 468 | -------------------------------------------------------------------------------- /app/[...slug]/route.ts: -------------------------------------------------------------------------------- 1 | import { NextRequest, NextResponse } from 'next/server'; 2 | import FirecrawlApp from '@mendable/firecrawl-js'; 3 | import { config } from 'dotenv'; 4 | config(); 5 | 6 | export const maxDuration = 300; 7 | 8 | export async function GET( 9 | request: NextRequest, 10 | { params }: { params: Promise<{ slug: string[] }> } 11 | ) { 12 | try { 13 | const resolvedParams = await params; 14 | // Join the slug segments 15 | let rawUrl = resolvedParams.slug.join('/'); 16 | console.log(`Raw URL from slug: ${rawUrl}`); 17 | 18 | // Normalize the URL 19 | let targetUrl = rawUrl; 20 | 21 | // Check for malformed protocols with only one slash 22 | if (targetUrl.match(/^https?:\/[^\/]/)) { 23 | // Convert https:/example.com to https://example.com 24 | targetUrl = targetUrl.replace(/^(https?:\/)([^\/].*)/, '$1/$2'); 25 | console.log(`Fixed malformed protocol (missing slash): ${targetUrl}`); 26 | } 27 | // Check for protocol with right number of slashes 28 | else if (targetUrl.match(/^https?:\/\/.+/)) { 29 | // URL already has a valid protocol 30 | console.log(`URL has valid protocol: ${targetUrl}`); 31 | } 32 | // No protocol at all 33 | else { 34 | // Add https:// protocol 35 | targetUrl = `https://${targetUrl}`; 36 | console.log(`Added protocol: ${targetUrl}`); 37 | } 38 | 39 | const { searchParams } = new URL(request.url); 40 | const firecrawlApiKey = searchParams.get('FIRECRAWL_API_KEY') || request.headers.get('FIRECRAWL_API_KEY') || process.env.FIRECRAWL_API_KEY; 41 | 42 | if (!firecrawlApiKey) { 43 | return NextResponse.json( 44 | { error: 'FIRECRAWL_API_KEY is not set' }, 45 | { status: 500 } 46 | ); 47 | } 48 | 49 | // Initialize FirecrawlApp with the API key 50 | const app = new FirecrawlApp({ apiKey: firecrawlApiKey }); 51 | 52 | // Set maxUrls based on whether user provided their own API key 53 | const maxUrls = searchParams.get('FIRECRAWL_API_KEY') || request.headers.get('FIRECRAWL_API_KEY') ? 100 : 10; 54 | 55 | // Define generation parameters 56 | const generationParams = { 57 | maxUrls, 58 | showFullText: true 59 | }; 60 | 61 | // Check if the last segment is 'full' 62 | const isFullRequest = resolvedParams.slug[resolvedParams.slug.length - 1] === 'full'; 63 | 64 | // Remove 'full' from targetUrl if present 65 | if (isFullRequest) { 66 | targetUrl = targetUrl.replace(/\/full$/, ''); 67 | } 68 | 69 | console.log(`Processing URL: ${targetUrl}`); 70 | 71 | // Generate LLMs.txt directly 72 | const results = await app.generateLLMsText(targetUrl, generationParams); 73 | 74 | if (!results.success) { 75 | throw new Error(`Failed to generate: ${results.error || "Unknown error"}`); 76 | } 77 | 78 | // Format the response based on whether it's a full request 79 | if (isFullRequest) { 80 | 81 | const llmsFulltxt = results.data.llmsfulltxt; 82 | if (!llmsFulltxt) { 83 | console.error('llmsfulltxt is undefined in the response'); 84 | return NextResponse.json( 85 | { error: 'llmsfulltxt is undefined in the response' }, 86 | { status: 500 } 87 | ); 88 | } 89 | 90 | let prettyPrintedFullTxt = JSON.stringify({ llmsfulltxt: llmsFulltxt }, null, 2) 91 | .replace(/\\n/g, '\n') 92 | .replace(/\\t/g, '\t') 93 | .replace(/^\{\s*"llmsfulltxt":\s*"/, '') 94 | .replace(/"\s*\}$/, ''); 95 | 96 | if (!searchParams.get('FIRECRAWL_API_KEY') && !request.headers.get('FIRECRAWL_API_KEY')) { 97 | prettyPrintedFullTxt = `${prettyPrintedFullTxt} \n\n*Note: This is an incomplete llmsfulltxt result. To enable full generation, please provide your Firecrawl API key by either: 98 | 1. Adding the 'FIRECRAWL_API_KEY' header to your request (e.g., 'FIRECRAWL_API_KEY: your-api-key-here'), or 99 | 2. Including it as a query parameter (e.g., '?FIRECRAWL_API_KEY=your-api-key-here')`; 100 | } 101 | 102 | return new Response(prettyPrintedFullTxt, { 103 | headers: { 'Content-Type': 'application/json' }, 104 | }); 105 | } else { 106 | // Add note if using default API key with limited results 107 | let llmstxt = results.data.llmstxt; 108 | if (!searchParams.get('FIRECRAWL_API_KEY') && !request.headers.get('FIRECRAWL_API_KEY')) { 109 | llmstxt = `${llmstxt} \n\n*Note: This is an incomplete llmstxt result. To enable full generation, please provide your Firecrawl API key by either: 110 | 1. Adding the 'FIRECRAWL_API_KEY' header to your request (e.g., 'FIRECRAWL_API_KEY: your-api-key-here'), or 111 | 2. Including it as a query parameter (e.g., '?FIRECRAWL_API_KEY=your-api-key-here')`; 112 | } 113 | 114 | const prettyPrintedData = JSON.stringify({ llmstxt: llmstxt }, null, 2) 115 | .replace(/\\n/g, '\n') 116 | .replace(/\\t/g, '\t') 117 | .replace(/^\{\s*"llmstxt":\s*"/, '') 118 | .replace(/"\s*\}$/, ''); 119 | 120 | return new Response(prettyPrintedData, { 121 | headers: { 'Content-Type': 'application/json' }, 122 | }); 123 | } 124 | } catch (error) { 125 | console.error('Error:', error); 126 | return NextResponse.json( 127 | { error: 'An unexpected error occurred' }, 128 | { status: 500 } 129 | ); 130 | } 131 | } 132 | -------------------------------------------------------------------------------- /app/api/map/route.ts: -------------------------------------------------------------------------------- 1 | import { NextResponse } from 'next/server'; 2 | import FirecrawlApp, { ScrapeResponse } from '@mendable/firecrawl-js'; 3 | import { config } from 'dotenv'; 4 | config(); 5 | 6 | export const maxDuration = 300; 7 | 8 | export async function POST(request: Request) { 9 | const { url, bringYourOwnFirecrawlApiKey } = await request.json(); 10 | let firecrawlApiKey: string | undefined; 11 | let limit: number = 100; 12 | console.log("url", url); 13 | 14 | if (bringYourOwnFirecrawlApiKey) { 15 | firecrawlApiKey = bringYourOwnFirecrawlApiKey; 16 | console.log("Using provided Firecrawl API key. Limit set to 100"); 17 | 18 | } else { 19 | firecrawlApiKey = process.env.FIRECRAWL_API_KEY; 20 | limit = 10; 21 | console.log("Using default limit of 10"); 22 | } 23 | 24 | if (!firecrawlApiKey) { 25 | throw new Error('FIRECRAWL_API_KEY is not set'); 26 | } 27 | 28 | const app = new FirecrawlApp({ apiKey: firecrawlApiKey }); 29 | 30 | let urlObj; 31 | if (url.startsWith('http://') || url.startsWith('https://')) { 32 | urlObj = new URL(url); 33 | } else if (url.startsWith('http:/') || url.startsWith('https:/')) { 34 | urlObj = new URL(url); 35 | } else { 36 | urlObj = new URL(`http://${url}`); 37 | } 38 | 39 | let stemUrl = `${urlObj.hostname}`; 40 | 41 | // If the URL is a GitHub URL, include the owner and repo name in the stemUrl 42 | if (stemUrl.includes('github.com')) { 43 | const pathSegments = urlObj.pathname.split('/').filter(segment => segment); 44 | if (pathSegments.length >= 2) { 45 | const owner = pathSegments[0]; 46 | const repo = pathSegments[1]; 47 | stemUrl = `${stemUrl}/${owner}/${repo}`; 48 | } 49 | } 50 | 51 | // Map a website 52 | const mapResult = await app.mapUrl(stemUrl, { 53 | limit: limit, 54 | }); 55 | 56 | if (!mapResult.success) { 57 | throw new Error(`Failed to map: ${mapResult.error}`); 58 | } 59 | 60 | let urls = mapResult.success ? mapResult.links : []; 61 | 62 | return NextResponse.json({ mapUrls: urls }); 63 | } 64 | -------------------------------------------------------------------------------- /app/api/service/route.ts: -------------------------------------------------------------------------------- 1 | import { NextResponse } from 'next/server'; 2 | import FirecrawlApp from '@mendable/firecrawl-js'; 3 | import { config } from 'dotenv'; 4 | config(); 5 | import { createClient } from '@supabase/supabase-js' 6 | 7 | export const maxDuration = 300; 8 | 9 | export async function POST(request: Request) { 10 | const { url, urls, bringYourOwnFirecrawlApiKey } = await request.json(); 11 | let firecrawlApiKey: string | undefined; 12 | let maxUrls: number = 100; 13 | let no_limit: boolean = false; 14 | 15 | if (bringYourOwnFirecrawlApiKey) { 16 | firecrawlApiKey = bringYourOwnFirecrawlApiKey; 17 | console.log("Using provided Firecrawl API key. Limit set to 100"); 18 | no_limit = true; 19 | } else { 20 | firecrawlApiKey = process.env.FIRECRAWL_API_KEY; 21 | maxUrls = 10; 22 | console.log("Using default limit of 10"); 23 | } 24 | 25 | if (!firecrawlApiKey) { 26 | throw new Error('FIRECRAWL_API_KEY is not set'); 27 | } 28 | 29 | const app = new FirecrawlApp({ apiKey: firecrawlApiKey }); 30 | 31 | const supabaseUrl = process.env.SUPABASE_URL; 32 | const supabaseKey = process.env.SUPABASE_KEY; 33 | const supabase = createClient(supabaseUrl!, supabaseKey!); 34 | 35 | // Define generation parameters 36 | const params = { 37 | maxUrls, 38 | showFullText: true, 39 | urls 40 | }; 41 | 42 | // Generate LLMs.txt with polling 43 | //@ts-ignore 44 | const results = await app.generateLLMsText(url, params); 45 | 46 | if (!results.success) { 47 | throw new Error(`Failed to generate: ${results.error || "Unknown error"}`); 48 | } 49 | 50 | const llmstxt = !bringYourOwnFirecrawlApiKey 51 | ? `*Note: This is an incomplete result, please enable full generation by entering a Firecrawl key.\n\n${results.data.llmstxt}` 52 | : results.data.llmstxt; 53 | 54 | const llmsFulltxt = results.data.llmsfulltxt; 55 | 56 | const { data, error } = await supabase 57 | .from('cache') 58 | .insert([ 59 | { url: url, llmstxt: llmstxt, llmsfulltxt: llmsFulltxt, no_limit: no_limit } 60 | ]); 61 | 62 | if (error) { 63 | throw new Error(`Failed to insert into Supabase: ${error.message}`); 64 | } 65 | 66 | return NextResponse.json({ llmstxt: llmstxt, llmsFulltxt: llmsFulltxt }); 67 | } 68 | -------------------------------------------------------------------------------- /app/favicon.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mendableai/llmstxt-generator/15d7a4230cfea5afe37ece768007b652fb15b1e4/app/favicon.ico -------------------------------------------------------------------------------- /app/fonts/GeistMonoVF.woff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mendableai/llmstxt-generator/15d7a4230cfea5afe37ece768007b652fb15b1e4/app/fonts/GeistMonoVF.woff -------------------------------------------------------------------------------- /app/fonts/GeistVF.woff: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/mendableai/llmstxt-generator/15d7a4230cfea5afe37ece768007b652fb15b1e4/app/fonts/GeistVF.woff -------------------------------------------------------------------------------- /app/globals.css: -------------------------------------------------------------------------------- 1 | @tailwind base; 2 | @tailwind components; 3 | @tailwind utilities; 4 | 5 | body { 6 | font-family: Arial, Helvetica, sans-serif; 7 | } 8 | 9 | @layer base { 10 | @media (min-width: 2048px) { 11 | :root { 12 | font-size: 24px; 13 | } 14 | } 15 | 16 | :root { 17 | --background: 0 0% 97.65%; 18 | --foreground: 0 0% 12.55%; 19 | --muted: 0 0% 93.73%; 20 | --muted-foreground: 0 0% 39.22%; 21 | --popover: 0 0% 98.82%; 22 | --popover-foreground: 0 0% 12.55%; 23 | --card: 0 0% 98.82%; 24 | --card-foreground: 0 0% 12.55%; 25 | --border: 0 0% 84.71%; 26 | --input: 0 0% 94.12%; 27 | --primary: 22.93 92.59% 52.35%; 28 | --primary-foreground: 44 0% 100%; 29 | --secondary: 34.05 100% 85.49%; 30 | --secondary-foreground: 16.27 50.43% 22.94%; 31 | --accent: 0 0% 90.98%; 32 | --accent-foreground: 0 0% 12.55%; 33 | --destructive: 10.16 77.87% 53.92%; 34 | --destructive-foreground: 0 0% 100%; 35 | --ring: 24.8 79.79% 63.14%; 36 | --chart-1: 22.93 92.59% 52.35%; 37 | --chart-2: 34.05 100% 85.49%; 38 | --chart-3: 0 0% 90.98%; 39 | --chart-4: 34.05 100% 88.49%; 40 | --chart-5: 22.93 95.59% 52.35%; 41 | --radius: 0.5rem; 42 | } 43 | 44 | .dark { 45 | --background: 0 0% 6.67%; 46 | --foreground: 0 0% 93.33%; 47 | --muted: 0 0% 13.33%; 48 | --muted-foreground: 0 0% 70.59%; 49 | --popover: 0 0% 9.8%; 50 | --popover-foreground: 0 0% 93.33%; 51 | --card: 0 0% 9.8%; 52 | --card-foreground: 0 0% 93.33%; 53 | --border: 44 14% 11%; 54 | --input: 0 0% 28.24%; 55 | --primary: 22.93 92.59% 52.35%; 56 | --primary-foreground: 29.51 0% 100%; 57 | --secondary: 28.5 64.52% 12.16%; 58 | --secondary-foreground: 29.51 100% 88.04%; 59 | --accent: 0 0% 16.47%; 60 | --accent-foreground: 0 0% 93.33%; 61 | --destructive: 10.16 77.87% 53.92%; 62 | --destructive-foreground: 0 0% 100%; 63 | --ring: 23.11 59.8% 40%; 64 | --chart-1: 22.93 92.59% 52.35%; 65 | --chart-2: 28.5 64.52% 12.16%; 66 | --chart-3: 0 0% 16.47%; 67 | --chart-4: 28.5 64.52% 15.16%; 68 | --chart-5: 22.93 95.59% 52.35%; 69 | } 70 | } 71 | 72 | @layer base { 73 | * { 74 | @apply border-border; 75 | } 76 | body { 77 | @apply bg-background text-foreground; 78 | } 79 | } 80 | 81 | @layer utilities { 82 | .custom-scrollbar { 83 | scrollbar-width: thin; /* For Firefox */ 84 | scrollbar-color: hsl(var(--custom-scrollbar-color, var(--foreground) / .1)) transparent; /* Thumb color | Track color */ 85 | } 86 | } -------------------------------------------------------------------------------- /app/layout.tsx: -------------------------------------------------------------------------------- 1 | import type { Metadata } from "next"; 2 | import localFont from "next/font/local"; 3 | import "./globals.css"; 4 | import { ThemeProvider } from "@/components/theme-provider"; 5 | import { Toaster } from "@/components/ui/toaster"; 6 | import { Analytics } from "@vercel/analytics/react"; 7 | 8 | const geistSans = localFont({ 9 | src: "./fonts/GeistVF.woff", 10 | variable: "--font-geist-sans", 11 | weight: "100 900", 12 | }); 13 | const geistMono = localFont({ 14 | src: "./fonts/GeistMonoVF.woff", 15 | variable: "--font-geist-mono", 16 | weight: "100 900", 17 | }); 18 | 19 | export const metadata: Metadata = { 20 | title: "Generate llms.txt", 21 | description: "Generate llms.txt for any website", 22 | }; 23 | 24 | export default function RootLayout({ 25 | children, 26 | }: Readonly<{ 27 | children: React.ReactNode; 28 | }>) { 29 | return ( 30 | 31 | 34 |
35 | 🎉 The official Firecrawl llms.txt endpoint is now available! 36 | 42 | Learn more → 43 | 44 |
45 | 51 | {children} 52 | 53 | 54 | 55 | 56 | 57 | 58 | ); 59 | } 60 | -------------------------------------------------------------------------------- /components.json: -------------------------------------------------------------------------------- 1 | { 2 | "$schema": "https://ui.shadcn.com/schema.json", 3 | "style": "new-york", 4 | "rsc": true, 5 | "tsx": true, 6 | "tailwind": { 7 | "config": "tailwind.config.ts", 8 | "css": "app/globals.css", 9 | "baseColor": "neutral", 10 | "cssVariables": true, 11 | "prefix": "" 12 | }, 13 | "aliases": { 14 | "components": "@/components", 15 | "utils": "@/lib/utils", 16 | "ui": "@/components/ui", 17 | "lib": "@/lib", 18 | "hooks": "@/hooks" 19 | }, 20 | "iconLibrary": "lucide" 21 | } -------------------------------------------------------------------------------- /components/theme-provider.tsx: -------------------------------------------------------------------------------- 1 | "use client" 2 | 3 | import * as React from "react" 4 | import { ThemeProvider as NextThemesProvider } from "next-themes" 5 | 6 | export function ThemeProvider({ 7 | children, 8 | ...props 9 | }: React.ComponentProps) { 10 | return {children} 11 | } 12 | -------------------------------------------------------------------------------- /components/ui/button.tsx: -------------------------------------------------------------------------------- 1 | import * as React from "react" 2 | import { Slot } from "@radix-ui/react-slot" 3 | import { cva, type VariantProps } from "class-variance-authority" 4 | 5 | import { cn } from "@/lib/utils" 6 | 7 | const buttonVariants = cva( 8 | "inline-flex items-center justify-center gap-2 whitespace-nowrap rounded-md text-sm font-medium transition-colors focus-visible:outline-none focus-visible:ring-1 focus-visible:ring-ring disabled:pointer-events-none disabled:opacity-50 [&_svg]:pointer-events-none [&_svg]:size-4 [&_svg]:shrink-0", 9 | { 10 | variants: { 11 | variant: { 12 | default: 13 | "bg-primary text-primary-foreground shadow hover:bg-primary/90", 14 | destructive: 15 | "bg-destructive text-destructive-foreground shadow-sm hover:bg-destructive/90", 16 | outline: 17 | "border border-input bg-background shadow-sm hover:bg-accent hover:text-accent-foreground", 18 | secondary: 19 | "bg-secondary text-secondary-foreground shadow-sm hover:bg-secondary/80", 20 | ghost: "hover:bg-accent hover:text-accent-foreground", 21 | link: "text-primary underline-offset-4 hover:underline", 22 | }, 23 | size: { 24 | default: "h-9 px-4 py-2", 25 | sm: "h-8 rounded-md px-3 text-xs", 26 | lg: "h-10 rounded-md px-8", 27 | icon: "h-9 w-9", 28 | }, 29 | }, 30 | defaultVariants: { 31 | variant: "default", 32 | size: "default", 33 | }, 34 | } 35 | ) 36 | 37 | export interface ButtonProps 38 | extends React.ButtonHTMLAttributes, 39 | VariantProps { 40 | asChild?: boolean 41 | } 42 | 43 | const Button = React.forwardRef( 44 | ({ className, variant, size, asChild = false, ...props }, ref) => { 45 | const Comp = asChild ? Slot : "button" 46 | return ( 47 | 52 | ) 53 | } 54 | ) 55 | Button.displayName = "Button" 56 | 57 | export { Button, buttonVariants } 58 | -------------------------------------------------------------------------------- /components/ui/credenza.tsx: -------------------------------------------------------------------------------- 1 | "use client" 2 | 3 | import * as React from "react" 4 | 5 | import { cn } from "@/lib/utils" 6 | import { useMediaQuery } from "@/hooks/use-media-query" 7 | import { 8 | Dialog, 9 | DialogClose, 10 | DialogContent, 11 | DialogDescription, 12 | DialogFooter, 13 | DialogHeader, 14 | DialogTitle, 15 | DialogTrigger, 16 | } from "@/components/ui/dialog" 17 | import { 18 | Drawer, 19 | DrawerClose, 20 | DrawerContent, 21 | DrawerDescription, 22 | DrawerFooter, 23 | DrawerHeader, 24 | DrawerTitle, 25 | DrawerTrigger, 26 | } from "@/components/ui/drawer" 27 | 28 | interface BaseProps { 29 | children: React.ReactNode 30 | } 31 | 32 | interface RootCredenzaProps extends BaseProps { 33 | open?: boolean 34 | onOpenChange?: (open: boolean) => void 35 | } 36 | 37 | interface CredenzaProps extends BaseProps { 38 | className?: string 39 | asChild?: true 40 | } 41 | 42 | const CredenzaContext = React.createContext<{ isDesktop: boolean }>({ 43 | isDesktop: false, 44 | }); 45 | 46 | const useCredenzaContext = () => { 47 | const context = React.useContext(CredenzaContext); 48 | if (!context) { 49 | throw new Error( 50 | "Credenza components cannot be rendered outside the Credenza Context", 51 | ); 52 | } 53 | return context; 54 | }; 55 | 56 | const Credenza = ({ children, ...props }: RootCredenzaProps) => { 57 | const isDesktop = useMediaQuery("(min-width: 768px)"); 58 | const Credenza = isDesktop ? Dialog : Drawer; 59 | 60 | return ( 61 | 62 | 63 | {children} 64 | 65 | 66 | ); 67 | }; 68 | 69 | 70 | const CredenzaTrigger = ({ className, children, ...props }: CredenzaProps) => { 71 | const { isDesktop } = useCredenzaContext(); 72 | const CredenzaTrigger = isDesktop ? DialogTrigger : DrawerTrigger; 73 | 74 | return ( 75 | 76 | {children} 77 | 78 | ); 79 | }; 80 | 81 | const CredenzaClose = ({ className, children, ...props }: CredenzaProps) => { 82 | const { isDesktop } = useCredenzaContext(); 83 | const CredenzaClose = isDesktop ? DialogClose : DrawerClose; 84 | 85 | return ( 86 | 87 | {children} 88 | 89 | ); 90 | }; 91 | 92 | const CredenzaContent = ({ className, children, ...props }: CredenzaProps) => { 93 | const { isDesktop } = useCredenzaContext(); 94 | const CredenzaContent = isDesktop ? DialogContent : DrawerContent; 95 | 96 | return ( 97 | 98 | {children} 99 | 100 | ); 101 | }; 102 | 103 | const CredenzaDescription = ({ 104 | className, 105 | children, 106 | ...props 107 | }: CredenzaProps) => { 108 | const { isDesktop } = useCredenzaContext(); 109 | const CredenzaDescription = isDesktop ? DialogDescription : DrawerDescription; 110 | 111 | return ( 112 | 113 | {children} 114 | 115 | ); 116 | }; 117 | 118 | const CredenzaHeader = ({ className, children, ...props }: CredenzaProps) => { 119 | const { isDesktop } = useCredenzaContext(); 120 | const CredenzaHeader = isDesktop ? DialogHeader : DrawerHeader; 121 | 122 | return ( 123 | 124 | {children} 125 | 126 | ); 127 | }; 128 | 129 | const CredenzaTitle = ({ className, children, ...props }: CredenzaProps) => { 130 | const { isDesktop } = useCredenzaContext(); 131 | const CredenzaTitle = isDesktop ? DialogTitle : DrawerTitle; 132 | 133 | return ( 134 | 135 | {children} 136 | 137 | ); 138 | }; 139 | 140 | const CredenzaBody = ({ className, children, ...props }: CredenzaProps) => { 141 | return ( 142 |
143 | {children} 144 |
145 | ); 146 | }; 147 | 148 | const CredenzaFooter = ({ className, children, ...props }: CredenzaProps) => { 149 | const { isDesktop } = useCredenzaContext(); 150 | const CredenzaFooter = isDesktop ? DialogFooter : DrawerFooter; 151 | 152 | return ( 153 | 154 | {children} 155 | 156 | ); 157 | }; 158 | 159 | export { 160 | Credenza, 161 | CredenzaTrigger, 162 | CredenzaClose, 163 | CredenzaContent, 164 | CredenzaDescription, 165 | CredenzaHeader, 166 | CredenzaTitle, 167 | CredenzaBody, 168 | CredenzaFooter, 169 | } -------------------------------------------------------------------------------- /components/ui/dialog.tsx: -------------------------------------------------------------------------------- 1 | "use client" 2 | 3 | import * as React from "react" 4 | import * as DialogPrimitive from "@radix-ui/react-dialog" 5 | import { X } from "lucide-react" 6 | 7 | import { cn } from "@/lib/utils" 8 | 9 | const Dialog = DialogPrimitive.Root 10 | 11 | const DialogTrigger = DialogPrimitive.Trigger 12 | 13 | const DialogPortal = DialogPrimitive.Portal 14 | 15 | const DialogClose = DialogPrimitive.Close 16 | 17 | const DialogOverlay = React.forwardRef< 18 | React.ElementRef, 19 | React.ComponentPropsWithoutRef 20 | >(({ className, ...props }, ref) => ( 21 | 29 | )) 30 | DialogOverlay.displayName = DialogPrimitive.Overlay.displayName 31 | 32 | const DialogContent = React.forwardRef< 33 | React.ElementRef, 34 | React.ComponentPropsWithoutRef 35 | >(({ className, children, ...props }, ref) => ( 36 | 37 | 38 | 46 | {children} 47 | 48 | 49 | Close 50 | 51 | 52 | 53 | )) 54 | DialogContent.displayName = DialogPrimitive.Content.displayName 55 | 56 | const DialogHeader = ({ 57 | className, 58 | ...props 59 | }: React.HTMLAttributes) => ( 60 |
67 | ) 68 | DialogHeader.displayName = "DialogHeader" 69 | 70 | const DialogFooter = ({ 71 | className, 72 | ...props 73 | }: React.HTMLAttributes) => ( 74 |
81 | ) 82 | DialogFooter.displayName = "DialogFooter" 83 | 84 | const DialogTitle = React.forwardRef< 85 | React.ElementRef, 86 | React.ComponentPropsWithoutRef 87 | >(({ className, ...props }, ref) => ( 88 | 96 | )) 97 | DialogTitle.displayName = DialogPrimitive.Title.displayName 98 | 99 | const DialogDescription = React.forwardRef< 100 | React.ElementRef, 101 | React.ComponentPropsWithoutRef 102 | >(({ className, ...props }, ref) => ( 103 | 108 | )) 109 | DialogDescription.displayName = DialogPrimitive.Description.displayName 110 | 111 | export { 112 | Dialog, 113 | DialogPortal, 114 | DialogOverlay, 115 | DialogTrigger, 116 | DialogClose, 117 | DialogContent, 118 | DialogHeader, 119 | DialogFooter, 120 | DialogTitle, 121 | DialogDescription, 122 | } 123 | -------------------------------------------------------------------------------- /components/ui/drawer.tsx: -------------------------------------------------------------------------------- 1 | "use client" 2 | 3 | import * as React from "react" 4 | import { Drawer as DrawerPrimitive } from "vaul" 5 | 6 | import { cn } from "@/lib/utils" 7 | 8 | const Drawer = ({ 9 | shouldScaleBackground = true, 10 | ...props 11 | }: React.ComponentProps) => ( 12 | 16 | ) 17 | Drawer.displayName = "Drawer" 18 | 19 | const DrawerTrigger = DrawerPrimitive.Trigger 20 | 21 | const DrawerPortal = DrawerPrimitive.Portal 22 | 23 | const DrawerClose = DrawerPrimitive.Close 24 | 25 | const DrawerOverlay = React.forwardRef< 26 | React.ElementRef, 27 | React.ComponentPropsWithoutRef 28 | >(({ className, ...props }, ref) => ( 29 | 34 | )) 35 | DrawerOverlay.displayName = DrawerPrimitive.Overlay.displayName 36 | 37 | const DrawerContent = React.forwardRef< 38 | React.ElementRef, 39 | React.ComponentPropsWithoutRef 40 | >(({ className, children, ...props }, ref) => ( 41 | 42 | 43 | 51 |
52 | {children} 53 | 54 | 55 | )) 56 | DrawerContent.displayName = "DrawerContent" 57 | 58 | const DrawerHeader = ({ 59 | className, 60 | ...props 61 | }: React.HTMLAttributes) => ( 62 |
66 | ) 67 | DrawerHeader.displayName = "DrawerHeader" 68 | 69 | const DrawerFooter = ({ 70 | className, 71 | ...props 72 | }: React.HTMLAttributes) => ( 73 |
77 | ) 78 | DrawerFooter.displayName = "DrawerFooter" 79 | 80 | const DrawerTitle = React.forwardRef< 81 | React.ElementRef, 82 | React.ComponentPropsWithoutRef 83 | >(({ className, ...props }, ref) => ( 84 | 92 | )) 93 | DrawerTitle.displayName = DrawerPrimitive.Title.displayName 94 | 95 | const DrawerDescription = React.forwardRef< 96 | React.ElementRef, 97 | React.ComponentPropsWithoutRef 98 | >(({ className, ...props }, ref) => ( 99 | 104 | )) 105 | DrawerDescription.displayName = DrawerPrimitive.Description.displayName 106 | 107 | export { 108 | Drawer, 109 | DrawerPortal, 110 | DrawerOverlay, 111 | DrawerTrigger, 112 | DrawerClose, 113 | DrawerContent, 114 | DrawerHeader, 115 | DrawerFooter, 116 | DrawerTitle, 117 | DrawerDescription, 118 | } 119 | -------------------------------------------------------------------------------- /components/ui/input.tsx: -------------------------------------------------------------------------------- 1 | import * as React from "react" 2 | 3 | import { cn } from "@/lib/utils" 4 | 5 | const Input = React.forwardRef>( 6 | ({ className, type, ...props }, ref) => { 7 | return ( 8 | 17 | ) 18 | } 19 | ) 20 | Input.displayName = "Input" 21 | 22 | export { Input } 23 | -------------------------------------------------------------------------------- /components/ui/label.tsx: -------------------------------------------------------------------------------- 1 | "use client" 2 | 3 | import * as React from "react" 4 | import * as LabelPrimitive from "@radix-ui/react-label" 5 | import { cva, type VariantProps } from "class-variance-authority" 6 | 7 | import { cn } from "@/lib/utils" 8 | 9 | const labelVariants = cva( 10 | "text-sm font-medium leading-none peer-disabled:cursor-not-allowed peer-disabled:opacity-70" 11 | ) 12 | 13 | const Label = React.forwardRef< 14 | React.ElementRef, 15 | React.ComponentPropsWithoutRef & 16 | VariantProps 17 | >(({ className, ...props }, ref) => ( 18 | 23 | )) 24 | Label.displayName = LabelPrimitive.Root.displayName 25 | 26 | export { Label } 27 | -------------------------------------------------------------------------------- /components/ui/popover.tsx: -------------------------------------------------------------------------------- 1 | "use client" 2 | 3 | import * as React from "react" 4 | import * as PopoverPrimitive from "@radix-ui/react-popover" 5 | 6 | import { cn } from "@/lib/utils" 7 | 8 | const Popover = PopoverPrimitive.Root 9 | 10 | const PopoverTrigger = PopoverPrimitive.Trigger 11 | 12 | const PopoverAnchor = PopoverPrimitive.Anchor 13 | 14 | const PopoverContent = React.forwardRef< 15 | React.ElementRef, 16 | React.ComponentPropsWithoutRef 17 | >(({ className, align = "center", sideOffset = 4, ...props }, ref) => ( 18 | 19 | 29 | 30 | )) 31 | PopoverContent.displayName = PopoverPrimitive.Content.displayName 32 | 33 | export { Popover, PopoverTrigger, PopoverContent, PopoverAnchor } 34 | -------------------------------------------------------------------------------- /components/ui/scroll-area.tsx: -------------------------------------------------------------------------------- 1 | "use client" 2 | 3 | import * as React from "react" 4 | import * as ScrollAreaPrimitive from "@radix-ui/react-scroll-area" 5 | 6 | import { cn } from "@/lib/utils" 7 | 8 | const ScrollArea = React.forwardRef< 9 | React.ElementRef, 10 | React.ComponentPropsWithoutRef 11 | >(({ className, children, ...props }, ref) => ( 12 | 17 | 18 | {children} 19 | 20 | 21 | 22 | 23 | )) 24 | ScrollArea.displayName = ScrollAreaPrimitive.Root.displayName 25 | 26 | const ScrollBar = React.forwardRef< 27 | React.ElementRef, 28 | React.ComponentPropsWithoutRef 29 | >(({ className, orientation = "vertical", ...props }, ref) => ( 30 | 43 | 44 | 45 | )) 46 | ScrollBar.displayName = ScrollAreaPrimitive.ScrollAreaScrollbar.displayName 47 | 48 | export { ScrollArea, ScrollBar } 49 | -------------------------------------------------------------------------------- /components/ui/switch.tsx: -------------------------------------------------------------------------------- 1 | "use client" 2 | 3 | import * as React from "react" 4 | import * as SwitchPrimitives from "@radix-ui/react-switch" 5 | 6 | import { cn } from "@/lib/utils" 7 | 8 | const Switch = React.forwardRef< 9 | React.ElementRef, 10 | React.ComponentPropsWithoutRef 11 | >(({ className, ...props }, ref) => ( 12 | 20 | 25 | 26 | )) 27 | Switch.displayName = SwitchPrimitives.Root.displayName 28 | 29 | export { Switch } 30 | -------------------------------------------------------------------------------- /components/ui/toast.tsx: -------------------------------------------------------------------------------- 1 | "use client" 2 | 3 | import * as React from "react" 4 | import * as ToastPrimitives from "@radix-ui/react-toast" 5 | import { cva, type VariantProps } from "class-variance-authority" 6 | import { X } from "lucide-react" 7 | 8 | import { cn } from "@/lib/utils" 9 | 10 | const ToastProvider = ToastPrimitives.Provider 11 | 12 | const ToastViewport = React.forwardRef< 13 | React.ElementRef, 14 | React.ComponentPropsWithoutRef 15 | >(({ className, ...props }, ref) => ( 16 | 24 | )) 25 | ToastViewport.displayName = ToastPrimitives.Viewport.displayName 26 | 27 | const toastVariants = cva( 28 | "group pointer-events-auto relative flex w-full items-center justify-between space-x-2 overflow-hidden rounded-md border p-4 pr-6 shadow-lg transition-all data-[swipe=cancel]:translate-x-0 data-[swipe=end]:translate-x-[var(--radix-toast-swipe-end-x)] data-[swipe=move]:translate-x-[var(--radix-toast-swipe-move-x)] data-[swipe=move]:transition-none data-[state=open]:animate-in data-[state=closed]:animate-out data-[swipe=end]:animate-out data-[state=closed]:fade-out-80 data-[state=closed]:slide-out-to-right-full data-[state=open]:slide-in-from-top-full data-[state=open]:sm:slide-in-from-bottom-full", 29 | { 30 | variants: { 31 | variant: { 32 | default: "border bg-background text-foreground", 33 | destructive: 34 | "destructive group border-destructive bg-destructive text-destructive-foreground", 35 | }, 36 | }, 37 | defaultVariants: { 38 | variant: "default", 39 | }, 40 | } 41 | ) 42 | 43 | const Toast = React.forwardRef< 44 | React.ElementRef, 45 | React.ComponentPropsWithoutRef & 46 | VariantProps 47 | >(({ className, variant, ...props }, ref) => { 48 | return ( 49 | 54 | ) 55 | }) 56 | Toast.displayName = ToastPrimitives.Root.displayName 57 | 58 | const ToastAction = React.forwardRef< 59 | React.ElementRef, 60 | React.ComponentPropsWithoutRef 61 | >(({ className, ...props }, ref) => ( 62 | 70 | )) 71 | ToastAction.displayName = ToastPrimitives.Action.displayName 72 | 73 | const ToastClose = React.forwardRef< 74 | React.ElementRef, 75 | React.ComponentPropsWithoutRef 76 | >(({ className, ...props }, ref) => ( 77 | 86 | 87 | 88 | )) 89 | ToastClose.displayName = ToastPrimitives.Close.displayName 90 | 91 | const ToastTitle = React.forwardRef< 92 | React.ElementRef, 93 | React.ComponentPropsWithoutRef 94 | >(({ className, ...props }, ref) => ( 95 | 100 | )) 101 | ToastTitle.displayName = ToastPrimitives.Title.displayName 102 | 103 | const ToastDescription = React.forwardRef< 104 | React.ElementRef, 105 | React.ComponentPropsWithoutRef 106 | >(({ className, ...props }, ref) => ( 107 | 112 | )) 113 | ToastDescription.displayName = ToastPrimitives.Description.displayName 114 | 115 | type ToastProps = React.ComponentPropsWithoutRef 116 | 117 | type ToastActionElement = React.ReactElement 118 | 119 | export { 120 | type ToastProps, 121 | type ToastActionElement, 122 | ToastProvider, 123 | ToastViewport, 124 | Toast, 125 | ToastTitle, 126 | ToastDescription, 127 | ToastClose, 128 | ToastAction, 129 | } 130 | -------------------------------------------------------------------------------- /components/ui/toaster.tsx: -------------------------------------------------------------------------------- 1 | "use client" 2 | 3 | import { useToast } from "@/hooks/use-toast" 4 | import { 5 | Toast, 6 | ToastClose, 7 | ToastDescription, 8 | ToastProvider, 9 | ToastTitle, 10 | ToastViewport, 11 | } from "@/components/ui/toast" 12 | 13 | export function Toaster() { 14 | const { toasts } = useToast() 15 | 16 | return ( 17 | 18 | {toasts.map(function ({ id, title, description, action, ...props }) { 19 | return ( 20 | 21 |
22 | {title && {title}} 23 | {description && ( 24 | {description} 25 | )} 26 |
27 | {action} 28 | 29 |
30 | ) 31 | })} 32 | 33 |
34 | ) 35 | } 36 | -------------------------------------------------------------------------------- /hooks/use-media-query.ts: -------------------------------------------------------------------------------- 1 | import * as React from "react" 2 | 3 | export function useMediaQuery(query: string) { 4 | const [value, setValue] = React.useState(false) 5 | 6 | React.useEffect(() => { 7 | function onChange(event: MediaQueryListEvent) { 8 | setValue(event.matches) 9 | } 10 | 11 | const result = matchMedia(query) 12 | result.addEventListener("change", onChange) 13 | setValue(result.matches) 14 | 15 | return () => result.removeEventListener("change", onChange) 16 | }, [query]) 17 | 18 | return value 19 | } -------------------------------------------------------------------------------- /hooks/use-toast.ts: -------------------------------------------------------------------------------- 1 | "use client" 2 | 3 | // Inspired by react-hot-toast library 4 | import * as React from "react" 5 | 6 | import type { 7 | ToastActionElement, 8 | ToastProps, 9 | } from "@/components/ui/toast" 10 | 11 | const TOAST_LIMIT = 1 12 | const TOAST_REMOVE_DELAY = 1000000 13 | 14 | type ToasterToast = ToastProps & { 15 | id: string 16 | title?: React.ReactNode 17 | description?: React.ReactNode 18 | action?: ToastActionElement 19 | } 20 | 21 | const actionTypes = { 22 | ADD_TOAST: "ADD_TOAST", 23 | UPDATE_TOAST: "UPDATE_TOAST", 24 | DISMISS_TOAST: "DISMISS_TOAST", 25 | REMOVE_TOAST: "REMOVE_TOAST", 26 | } as const 27 | 28 | let count = 0 29 | 30 | function genId() { 31 | count = (count + 1) % Number.MAX_SAFE_INTEGER 32 | return count.toString() 33 | } 34 | 35 | type ActionType = typeof actionTypes 36 | 37 | type Action = 38 | | { 39 | type: ActionType["ADD_TOAST"] 40 | toast: ToasterToast 41 | } 42 | | { 43 | type: ActionType["UPDATE_TOAST"] 44 | toast: Partial 45 | } 46 | | { 47 | type: ActionType["DISMISS_TOAST"] 48 | toastId?: ToasterToast["id"] 49 | } 50 | | { 51 | type: ActionType["REMOVE_TOAST"] 52 | toastId?: ToasterToast["id"] 53 | } 54 | 55 | interface State { 56 | toasts: ToasterToast[] 57 | } 58 | 59 | const toastTimeouts = new Map>() 60 | 61 | const addToRemoveQueue = (toastId: string) => { 62 | if (toastTimeouts.has(toastId)) { 63 | return 64 | } 65 | 66 | const timeout = setTimeout(() => { 67 | toastTimeouts.delete(toastId) 68 | dispatch({ 69 | type: "REMOVE_TOAST", 70 | toastId: toastId, 71 | }) 72 | }, TOAST_REMOVE_DELAY) 73 | 74 | toastTimeouts.set(toastId, timeout) 75 | } 76 | 77 | export const reducer = (state: State, action: Action): State => { 78 | switch (action.type) { 79 | case "ADD_TOAST": 80 | return { 81 | ...state, 82 | toasts: [action.toast, ...state.toasts].slice(0, TOAST_LIMIT), 83 | } 84 | 85 | case "UPDATE_TOAST": 86 | return { 87 | ...state, 88 | toasts: state.toasts.map((t) => 89 | t.id === action.toast.id ? { ...t, ...action.toast } : t 90 | ), 91 | } 92 | 93 | case "DISMISS_TOAST": { 94 | const { toastId } = action 95 | 96 | // ! Side effects ! - This could be extracted into a dismissToast() action, 97 | // but I'll keep it here for simplicity 98 | if (toastId) { 99 | addToRemoveQueue(toastId) 100 | } else { 101 | state.toasts.forEach((toast) => { 102 | addToRemoveQueue(toast.id) 103 | }) 104 | } 105 | 106 | return { 107 | ...state, 108 | toasts: state.toasts.map((t) => 109 | t.id === toastId || toastId === undefined 110 | ? { 111 | ...t, 112 | open: false, 113 | } 114 | : t 115 | ), 116 | } 117 | } 118 | case "REMOVE_TOAST": 119 | if (action.toastId === undefined) { 120 | return { 121 | ...state, 122 | toasts: [], 123 | } 124 | } 125 | return { 126 | ...state, 127 | toasts: state.toasts.filter((t) => t.id !== action.toastId), 128 | } 129 | } 130 | } 131 | 132 | const listeners: Array<(state: State) => void> = [] 133 | 134 | let memoryState: State = { toasts: [] } 135 | 136 | function dispatch(action: Action) { 137 | memoryState = reducer(memoryState, action) 138 | listeners.forEach((listener) => { 139 | listener(memoryState) 140 | }) 141 | } 142 | 143 | type Toast = Omit 144 | 145 | function toast({ ...props }: Toast) { 146 | const id = genId() 147 | 148 | const update = (props: ToasterToast) => 149 | dispatch({ 150 | type: "UPDATE_TOAST", 151 | toast: { ...props, id }, 152 | }) 153 | const dismiss = () => dispatch({ type: "DISMISS_TOAST", toastId: id }) 154 | 155 | dispatch({ 156 | type: "ADD_TOAST", 157 | toast: { 158 | ...props, 159 | id, 160 | open: true, 161 | onOpenChange: (open) => { 162 | if (!open) dismiss() 163 | }, 164 | }, 165 | }) 166 | 167 | return { 168 | id: id, 169 | dismiss, 170 | update, 171 | } 172 | } 173 | 174 | function useToast() { 175 | const [state, setState] = React.useState(memoryState) 176 | 177 | React.useEffect(() => { 178 | listeners.push(setState) 179 | return () => { 180 | const index = listeners.indexOf(setState) 181 | if (index > -1) { 182 | listeners.splice(index, 1) 183 | } 184 | } 185 | }, [state]) 186 | 187 | return { 188 | ...state, 189 | toast, 190 | dismiss: (toastId?: string) => dispatch({ type: "DISMISS_TOAST", toastId }), 191 | } 192 | } 193 | 194 | export { useToast, toast } 195 | -------------------------------------------------------------------------------- /lib/utils.ts: -------------------------------------------------------------------------------- 1 | import { clsx, type ClassValue } from "clsx" 2 | import { twMerge } from "tailwind-merge" 3 | 4 | export function cn(...inputs: ClassValue[]) { 5 | return twMerge(clsx(inputs)) 6 | } 7 | -------------------------------------------------------------------------------- /next.config.ts: -------------------------------------------------------------------------------- 1 | import type { NextConfig } from "next"; 2 | 3 | const nextConfig: NextConfig = { 4 | /* config options here */ 5 | }; 6 | 7 | export default nextConfig; 8 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "llmstxt-generator", 3 | "version": "0.1.0", 4 | "private": true, 5 | "scripts": { 6 | "dev": "next dev", 7 | "build": "next build", 8 | "start": "next start", 9 | "lint": "next lint" 10 | }, 11 | "dependencies": { 12 | "@mendable/firecrawl-js": "^1.19.0", 13 | "@radix-ui/react-dialog": "^1.1.5", 14 | "@radix-ui/react-label": "^2.1.1", 15 | "@radix-ui/react-popover": "^1.1.5", 16 | "@radix-ui/react-scroll-area": "^1.2.2", 17 | "@radix-ui/react-slot": "^1.1.1", 18 | "@radix-ui/react-switch": "^1.1.2", 19 | "@radix-ui/react-toast": "^1.2.5", 20 | "@supabase/supabase-js": "^2.46.1", 21 | "@vercel/analytics": "^1.5.0", 22 | "class-variance-authority": "^0.7.1", 23 | "clsx": "^2.1.1", 24 | "dotenv": "^16.4.5", 25 | "lucide-react": "^0.474.0", 26 | "next": "15.0.3", 27 | "next-themes": "^0.4.4", 28 | "openai": "^4.73.0", 29 | "react": "19.0.0-rc-66855b96-20241106", 30 | "react-dom": "19.0.0-rc-66855b96-20241106", 31 | "tailwind-merge": "^2.6.0", 32 | "tailwindcss-animate": "^1.0.7", 33 | "vaul": "^1.1.2", 34 | "zod": "^3.24.1" 35 | }, 36 | "devDependencies": { 37 | "@types/node": "^20", 38 | "@types/react": "^18", 39 | "@types/react-dom": "^18", 40 | "postcss": "^8", 41 | "tailwindcss": "^3.4.1", 42 | "typescript": "^5" 43 | } 44 | } 45 | -------------------------------------------------------------------------------- /postcss.config.mjs: -------------------------------------------------------------------------------- 1 | /** @type {import('postcss-load-config').Config} */ 2 | const config = { 3 | plugins: { 4 | tailwindcss: {}, 5 | }, 6 | }; 7 | 8 | export default config; 9 | -------------------------------------------------------------------------------- /public/file.svg: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /public/globe.svg: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /public/next.svg: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /public/vercel.svg: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /public/window.svg: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /tailwind.config.ts: -------------------------------------------------------------------------------- 1 | import type { Config } from "tailwindcss"; 2 | 3 | export default { 4 | darkMode: ["class"], 5 | content: [ 6 | "./pages/**/*.{js,ts,jsx,tsx,mdx}", 7 | "./components/**/*.{js,ts,jsx,tsx,mdx}", 8 | "./app/**/*.{js,ts,jsx,tsx,mdx}", 9 | ], 10 | theme: { 11 | extend: { 12 | fontFamily: { 13 | sans: ["var(--font-geist-sans)"], 14 | mono: ["var(--font-geist-mono)"], 15 | }, 16 | colors: { 17 | background: 'hsl(var(--background))', 18 | foreground: 'hsl(var(--foreground))', 19 | card: { 20 | DEFAULT: 'hsl(var(--card))', 21 | foreground: 'hsl(var(--card-foreground))' 22 | }, 23 | popover: { 24 | DEFAULT: 'hsl(var(--popover))', 25 | foreground: 'hsl(var(--popover-foreground))' 26 | }, 27 | primary: { 28 | DEFAULT: 'hsl(var(--primary))', 29 | foreground: 'hsl(var(--primary-foreground))' 30 | }, 31 | secondary: { 32 | DEFAULT: 'hsl(var(--secondary))', 33 | foreground: 'hsl(var(--secondary-foreground))' 34 | }, 35 | muted: { 36 | DEFAULT: 'hsl(var(--muted))', 37 | foreground: 'hsl(var(--muted-foreground))' 38 | }, 39 | accent: { 40 | DEFAULT: 'hsl(var(--accent))', 41 | foreground: 'hsl(var(--accent-foreground))' 42 | }, 43 | destructive: { 44 | DEFAULT: 'hsl(var(--destructive))', 45 | foreground: 'hsl(var(--destructive-foreground))' 46 | }, 47 | border: 'hsl(var(--border))', 48 | input: 'hsl(var(--input))', 49 | ring: 'hsl(var(--ring))', 50 | chart: { 51 | '1': 'hsl(var(--chart-1))', 52 | '2': 'hsl(var(--chart-2))', 53 | '3': 'hsl(var(--chart-3))', 54 | '4': 'hsl(var(--chart-4))', 55 | '5': 'hsl(var(--chart-5))' 56 | } 57 | }, 58 | borderRadius: { 59 | lg: 'var(--radius)', 60 | md: 'calc(var(--radius) - 2px)', 61 | sm: 'calc(var(--radius) - 4px)' 62 | } 63 | } 64 | }, 65 | plugins: [require("tailwindcss-animate")], 66 | } satisfies Config; 67 | -------------------------------------------------------------------------------- /tsconfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "compilerOptions": { 3 | "target": "ES2017", 4 | "lib": ["dom", "dom.iterable", "esnext"], 5 | "allowJs": true, 6 | "skipLibCheck": true, 7 | "strict": true, 8 | "noEmit": true, 9 | "esModuleInterop": true, 10 | "module": "esnext", 11 | "moduleResolution": "bundler", 12 | "resolveJsonModule": true, 13 | "isolatedModules": true, 14 | "jsx": "preserve", 15 | "incremental": true, 16 | "plugins": [ 17 | { 18 | "name": "next" 19 | } 20 | ], 21 | "paths": { 22 | "@/*": ["./*"] 23 | } 24 | }, 25 | "include": ["next-env.d.ts", "**/*.ts", "**/*.tsx", ".next/types/**/*.ts"], 26 | "exclude": ["node_modules"] 27 | } 28 | --------------------------------------------------------------------------------