├── apps ├── api │ ├── src │ │ ├── controllers │ │ │ ├── types.ts │ │ │ └── v1 │ │ │ │ ├── liveness.ts │ │ │ │ ├── readiness.ts │ │ │ │ ├── generate-tree-status.ts │ │ │ │ ├── generate-tree.ts │ │ │ │ ├── generate-llmstxt-status.ts │ │ │ │ ├── generate-llmstxt.ts │ │ │ │ ├── teams.ts │ │ │ │ ├── user.ts │ │ │ │ └── auth.ts │ │ ├── db │ │ │ ├── schema │ │ │ │ ├── index.ts │ │ │ │ ├── llm-txts.ts │ │ │ │ ├── users.ts │ │ │ │ ├── apiKeys.ts │ │ │ │ └── teams.ts │ │ │ ├── migrations │ │ │ │ ├── meta │ │ │ │ │ └── _journal.json │ │ │ │ └── 0000_typical_kylun.sql │ │ │ ├── index.ts │ │ │ ├── queries.ts │ │ │ └── mutations.ts │ │ ├── core │ │ │ ├── metrics │ │ │ │ ├── workers │ │ │ │ │ ├── types.ts │ │ │ │ │ ├── outputMetricsWorker.ts │ │ │ │ │ └── fileMetricsWorker.ts │ │ │ │ ├── calculateMetrics.ts │ │ │ │ ├── calculateAllFileMetrics.ts │ │ │ │ └── calculateOutputMetrics.ts │ │ │ ├── file │ │ │ │ ├── fileTypes.ts │ │ │ │ ├── filePathSort.ts │ │ │ │ ├── parsePackageJson.ts │ │ │ │ ├── fileCollect.ts │ │ │ │ ├── workers │ │ │ │ │ ├── fileCollectWorker.ts │ │ │ │ │ └── fileProcessWorker.ts │ │ │ │ ├── fileTreeGenerate.ts │ │ │ │ └── fileProcess.ts │ │ │ ├── actions │ │ │ │ ├── index.ts │ │ │ │ └── remoteAction.ts │ │ │ ├── treeSitter │ │ │ │ ├── queries │ │ │ │ │ ├── queryCss.ts │ │ │ │ │ ├── queryVue.ts │ │ │ │ │ ├── index.ts │ │ │ │ │ ├── queryC.ts │ │ │ │ │ ├── querySolidity.ts │ │ │ │ │ ├── queryCpp.ts │ │ │ │ │ ├── queryPython.ts │ │ │ │ │ ├── queryJava.ts │ │ │ │ │ ├── querySwift.ts │ │ │ │ │ ├── queryCSharp.ts │ │ │ │ │ ├── queryPhp.ts │ │ │ │ │ ├── queryRuby.ts │ │ │ │ │ ├── queryGo.ts │ │ │ │ │ ├── queryRust.ts │ │ │ │ │ ├── queryTypescript.ts │ │ │ │ │ ├── queryJavascript.ts │ │ │ │ │ └── README.md │ │ │ │ ├── parseStrategies │ │ │ │ │ ├── index.ts │ │ │ │ │ ├── VueParseStrategy.ts │ │ │ │ │ ├── DefaultParseStrategy.ts │ │ │ │ │ ├── ParseStrategy.ts │ │ │ │ │ └── CssParseStrategy.ts │ │ │ │ ├── ext2Lang.ts │ │ │ │ ├── lang2Query.ts │ │ │ │ └── loadLanguage.ts │ │ │ ├── output │ │ │ │ ├── outputStyles │ │ │ │ │ ├── xmlStyle.ts │ │ │ │ │ └── plainStyle.ts │ │ │ │ └── outputGeneratorTypes.ts │ │ │ └── tokenCount │ │ │ │ └── index.ts │ │ ├── middleware │ │ │ ├── index.ts │ │ │ └── jwt-auth.ts │ │ ├── utils │ │ │ └── errorHandle.ts │ │ ├── lib │ │ │ ├── processConcurrency.ts │ │ │ ├── generate-tree.ts │ │ │ ├── logger.ts │ │ │ ├── generate-llms-txt │ │ │ │ └── redis.ts │ │ │ └── concurrency-limit.ts │ │ ├── services │ │ │ ├── api-keys-service.ts │ │ │ ├── jwt-service.ts │ │ │ ├── rate-limiter.ts │ │ │ └── queue-service.ts │ │ ├── types.ts │ │ ├── index.ts │ │ └── routes │ │ │ └── v1.ts │ ├── .dockerignore │ ├── .gitignore │ ├── drizzle.config.ts │ ├── .swcrc │ ├── .env.example │ ├── tsconfig.json │ ├── docker-entrypoint.sh │ ├── Dockerfile │ └── package.json └── web │ ├── public │ ├── favicon.ico │ ├── favicon.png │ ├── posts │ │ └── sf.webp │ ├── favicon-16x16.png │ ├── favicon-32x32.png │ ├── apple-touch-icon.png │ ├── android-chrome-192x192.png │ ├── android-chrome-512x512.png │ └── site.webmanifest │ ├── postcss.config.mjs │ ├── README.md │ ├── src │ ├── api.ts │ ├── utils │ │ ├── k-formatter.ts │ │ ├── classnames.ts │ │ ├── seo.ts │ │ └── loggingMiddleware.tsx │ ├── lib │ │ ├── codecrawl.ts │ │ ├── query-client.tsx │ │ ├── constants.ts │ │ ├── content.ts │ │ ├── default-query-fn.ts │ │ └── mutation-fn.ts │ ├── global-middleware.ts │ ├── routes │ │ ├── redirect.tsx │ │ ├── app │ │ │ ├── _app │ │ │ │ ├── index.tsx │ │ │ │ ├── logs.tsx │ │ │ │ ├── keys.tsx │ │ │ │ └── playground.tsx │ │ │ └── _app.tsx │ │ ├── api │ │ │ └── hello.ts │ │ ├── (marketing) │ │ │ ├── _landing │ │ │ │ ├── index.tsx │ │ │ │ ├── blog.$slug.tsx │ │ │ │ └── playground.tsx │ │ │ └── _landing.tsx │ │ └── (auth) │ │ │ ├── _auth.tsx │ │ │ └── logout.tsx │ ├── client.tsx │ ├── components │ │ ├── svgs │ │ │ ├── index.ts │ │ │ ├── x-logo.tsx │ │ │ ├── circle-check-mark.tsx │ │ │ ├── linkedin-logo.tsx │ │ │ ├── github-logo.tsx │ │ │ ├── logo-black.tsx │ │ │ └── logo.tsx │ │ ├── ui │ │ │ ├── button-link.tsx │ │ │ ├── toast.tsx │ │ │ └── accordion.tsx │ │ ├── not-found.tsx │ │ ├── logout-confirm.tsx │ │ ├── marketing │ │ │ ├── hero.tsx │ │ │ └── cta.tsx │ │ ├── catch-boundary.tsx │ │ └── playground │ │ │ └── api-key-selector.tsx │ ├── ssr.tsx │ ├── hooks │ │ ├── use-scroll.ts │ │ ├── use-verify-login.tsx │ │ └── use-save-tokens.tsx │ ├── store │ │ ├── use-api-key-store.tsx │ │ ├── use-token-store.tsx │ │ └── use-playground-requests.tsx │ ├── router.tsx │ ├── contexts │ │ └── auth-context.tsx │ └── styles │ │ └── app.css │ ├── content │ ├── updates │ │ └── 2025-04-19.mdx │ └── posts │ │ ├── bullmq.mdx │ │ ├── how-to-ship.mdx │ │ └── where-to-use-codecrawl.mdx │ ├── .gitignore │ ├── app.config.ts │ ├── tsconfig.json │ ├── content-collections.ts │ └── package.json ├── SELF_HOST.md ├── pnpm-workspace.yaml ├── packages └── sdk │ ├── .env.example │ ├── jest.config.js │ ├── tsup.config.ts │ ├── tsconfig.json │ ├── src │ └── __tests__ │ │ └── index.test.ts │ ├── LICENSE │ ├── package.json │ └── .gitignore ├── start.sh ├── Caddyfile ├── .vscode └── settings.json ├── .github ├── workflows │ ├── ghcr-clean.yaml │ ├── sdk-test.yaml │ ├── sdk-publish.yaml │ ├── publish-image-staging.yaml │ └── publish-image.yaml └── ISSUE_TEMPLATE │ ├── feature_request.md │ ├── bug_report.md │ └── self_host_issue.md ├── package.json ├── CONTRIBUTING.md ├── .gitignore ├── assets └── logo.svg └── compose.local.yaml /apps/api/src/controllers/types.ts: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /SELF_HOST.md: -------------------------------------------------------------------------------- 1 | # Self-hosting Codecrawl -------------------------------------------------------------------------------- /pnpm-workspace.yaml: -------------------------------------------------------------------------------- 1 | packages: 2 | - packages/* 3 | - apps/* -------------------------------------------------------------------------------- /apps/api/.dockerignore: -------------------------------------------------------------------------------- 1 | /node_modules/ 2 | /dist/ 3 | .env 4 | *.csv -------------------------------------------------------------------------------- /packages/sdk/.env.example: -------------------------------------------------------------------------------- 1 | API_URL=http://localhost:4000 2 | TEST_API_KEY=cc-YOUR_TEST_API_KEY -------------------------------------------------------------------------------- /start.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | docker compose -f compose.yaml -f compose.$1.yaml up ${@:2} -------------------------------------------------------------------------------- /apps/web/public/favicon.ico: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/revokslab/codecrawl/HEAD/apps/web/public/favicon.ico -------------------------------------------------------------------------------- /apps/web/public/favicon.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/revokslab/codecrawl/HEAD/apps/web/public/favicon.png -------------------------------------------------------------------------------- /apps/web/public/posts/sf.webp: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/revokslab/codecrawl/HEAD/apps/web/public/posts/sf.webp -------------------------------------------------------------------------------- /apps/web/postcss.config.mjs: -------------------------------------------------------------------------------- 1 | export default { 2 | plugins: { 3 | '@tailwindcss/postcss': {}, 4 | }, 5 | }; 6 | -------------------------------------------------------------------------------- /apps/web/public/favicon-16x16.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/revokslab/codecrawl/HEAD/apps/web/public/favicon-16x16.png -------------------------------------------------------------------------------- /apps/web/public/favicon-32x32.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/revokslab/codecrawl/HEAD/apps/web/public/favicon-32x32.png -------------------------------------------------------------------------------- /apps/web/public/apple-touch-icon.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/revokslab/codecrawl/HEAD/apps/web/public/apple-touch-icon.png -------------------------------------------------------------------------------- /apps/web/public/android-chrome-192x192.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/revokslab/codecrawl/HEAD/apps/web/public/android-chrome-192x192.png -------------------------------------------------------------------------------- /apps/web/public/android-chrome-512x512.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/revokslab/codecrawl/HEAD/apps/web/public/android-chrome-512x512.png -------------------------------------------------------------------------------- /apps/api/src/db/schema/index.ts: -------------------------------------------------------------------------------- 1 | export * from './llm-txts'; 2 | export * from './users'; 3 | export * from './apiKeys'; 4 | export * from './teams'; 5 | -------------------------------------------------------------------------------- /apps/api/src/core/metrics/workers/types.ts: -------------------------------------------------------------------------------- 1 | export interface FileMetrics { 2 | path: string; 3 | charCount: number; 4 | tokenCount: number; 5 | } 6 | -------------------------------------------------------------------------------- /apps/api/.gitignore: -------------------------------------------------------------------------------- 1 | /node_modules/ 2 | /dist/ 3 | .env 4 | *.csv 5 | dump.rdb 6 | 7 | .rdb 8 | .sentryclirc 9 | 10 | .env.local 11 | .env 12 | .env.production -------------------------------------------------------------------------------- /Caddyfile: -------------------------------------------------------------------------------- 1 | {$DOMAIN} { 2 | # Reverse proxy for API service 3 | reverse_proxy api:4000 4 | } 5 | 6 | {$DOMAIN}:80 { 7 | # redirect to https 8 | redir {$DOMAIN}{uri} 9 | } -------------------------------------------------------------------------------- /apps/web/README.md: -------------------------------------------------------------------------------- 1 | # Welcome to Codecrawl Web! 2 | 3 | This site is built with TanStack Start! 4 | 5 | ## Development 6 | 7 | From your terminal: 8 | 9 | ```sh 10 | pnpm install 11 | pnpm dev 12 | ``` -------------------------------------------------------------------------------- /apps/web/src/api.ts: -------------------------------------------------------------------------------- 1 | import { 2 | createStartAPIHandler, 3 | defaultAPIFileRouteHandler, 4 | } from '@tanstack/react-start/api'; 5 | 6 | export default createStartAPIHandler(defaultAPIFileRouteHandler); 7 | -------------------------------------------------------------------------------- /apps/api/src/core/file/fileTypes.ts: -------------------------------------------------------------------------------- 1 | export interface RawFile { 2 | path: string; 3 | content: string; 4 | } 5 | 6 | export interface ProcessedFile { 7 | path: string; 8 | content: string; 9 | } 10 | -------------------------------------------------------------------------------- /apps/web/src/utils/k-formatter.ts: -------------------------------------------------------------------------------- 1 | export function kFormatter(num: number) { 2 | return new Intl.NumberFormat('en-US', { 3 | notation: 'compact', 4 | maximumFractionDigits: 1, 5 | }).format(num); 6 | } 7 | -------------------------------------------------------------------------------- /apps/web/src/utils/classnames.ts: -------------------------------------------------------------------------------- 1 | import { type ClassValue, clsx } from 'clsx'; 2 | import { twMerge } from 'tailwind-merge'; 3 | 4 | export function cn(...inputs: ClassValue[]) { 5 | return twMerge(clsx(inputs)); 6 | } 7 | -------------------------------------------------------------------------------- /apps/api/src/controllers/v1/liveness.ts: -------------------------------------------------------------------------------- 1 | import type { Request, Response } from 'express'; 2 | 3 | export async function livenessController(_req: Request, res: Response) { 4 | return res.status(200).json({ status: 'OK' }); 5 | } 6 | -------------------------------------------------------------------------------- /apps/api/src/controllers/v1/readiness.ts: -------------------------------------------------------------------------------- 1 | import type { Request, Response } from 'express'; 2 | 3 | export async function readinessController(_req: Request, res: Response) { 4 | return res.status(200).json({ status: 'ok' }); 5 | } 6 | -------------------------------------------------------------------------------- /apps/api/src/core/actions/index.ts: -------------------------------------------------------------------------------- 1 | export * from './llmsTxtAction'; 2 | export * from './runComprehensiveLlmsTxtAction'; 3 | export * from './remoteAction'; 4 | export * from './defaultAction'; 5 | export * from './fileTreeAction'; 6 | -------------------------------------------------------------------------------- /apps/web/src/lib/codecrawl.ts: -------------------------------------------------------------------------------- 1 | import Codecrawl from '@codecrawl/sdk'; 2 | 3 | export const codecrawl = new Codecrawl({ 4 | apiKey: import.meta.env.VITE_CODECRAWL_API_KEY, 5 | apiUrl: import.meta.env.VITE_CODECRAWL_API_URL, 6 | }); 7 | -------------------------------------------------------------------------------- /apps/api/src/core/treeSitter/queries/queryCss.ts: -------------------------------------------------------------------------------- 1 | export const queryCss = ` 2 | (comment) @comment 3 | 4 | (rule_set 5 | (selectors) @name.definition.selector 6 | ) @definition.selector 7 | 8 | (at_rule) @definition.at_rule 9 | `; 10 | -------------------------------------------------------------------------------- /apps/web/src/global-middleware.ts: -------------------------------------------------------------------------------- 1 | import { registerGlobalMiddleware } from '@tanstack/react-start'; 2 | import { logMiddleware } from './utils/loggingMiddleware'; 3 | 4 | registerGlobalMiddleware({ 5 | middleware: [logMiddleware], 6 | }); 7 | -------------------------------------------------------------------------------- /apps/api/src/core/treeSitter/queries/queryVue.ts: -------------------------------------------------------------------------------- 1 | export const queryVue = ` 2 | (comment) @comment 3 | 4 | (template_element) @template 5 | 6 | (script_element) @script 7 | 8 | (style_element) @style 9 | 10 | (interpolation) @interpolation 11 | `; 12 | -------------------------------------------------------------------------------- /.vscode/settings.json: -------------------------------------------------------------------------------- 1 | { 2 | "files.watcherExclude": { 3 | "**/routeTree.gen.ts": true 4 | }, 5 | "search.exclude": { 6 | "**/routeTree.gen.ts": true 7 | }, 8 | "files.readonlyInclude": { 9 | "**/routeTree.gen.ts": true 10 | } 11 | } 12 | -------------------------------------------------------------------------------- /apps/web/src/routes/redirect.tsx: -------------------------------------------------------------------------------- 1 | import { createFileRoute, redirect } from '@tanstack/react-router'; 2 | 3 | export const Route = createFileRoute('/redirect')({ 4 | beforeLoad: async () => { 5 | throw redirect({ 6 | to: '/', 7 | }); 8 | }, 9 | }); 10 | -------------------------------------------------------------------------------- /apps/web/public/site.webmanifest: -------------------------------------------------------------------------------- 1 | {"name":"","short_name":"","icons":[{"src":"/android-chrome-192x192.png","sizes":"192x192","type":"image/png"},{"src":"/android-chrome-512x512.png","sizes":"512x512","type":"image/png"}],"theme_color":"#ffffff","background_color":"#ffffff","display":"standalone"} -------------------------------------------------------------------------------- /apps/web/src/routes/app/_app/index.tsx: -------------------------------------------------------------------------------- 1 | import { createFileRoute, redirect } from '@tanstack/react-router'; 2 | 3 | export const Route = createFileRoute('/app/_app/')({ 4 | beforeLoad: async () => { 5 | throw redirect({ 6 | to: '/app/playground', 7 | }); 8 | }, 9 | }); 10 | -------------------------------------------------------------------------------- /apps/api/src/core/output/outputStyles/xmlStyle.ts: -------------------------------------------------------------------------------- 1 | export const getXmlTemplate = () => { 2 | return /* xml */ ` 3 | 4 | {{#each processedFiles}} 5 | 6 | {{{this.content}}} 7 | 8 | 9 | {{/each}} 10 | 11 | `; 12 | }; 13 | -------------------------------------------------------------------------------- /apps/api/src/db/migrations/meta/_journal.json: -------------------------------------------------------------------------------- 1 | { 2 | "version": "7", 3 | "dialect": "postgresql", 4 | "entries": [ 5 | { 6 | "idx": 0, 7 | "version": "7", 8 | "when": 1744391761751, 9 | "tag": "0000_typical_kylun", 10 | "breakpoints": true 11 | } 12 | ] 13 | } 14 | -------------------------------------------------------------------------------- /apps/web/src/client.tsx: -------------------------------------------------------------------------------- 1 | /// 2 | import { hydrateRoot } from 'react-dom/client'; 3 | import { StartClient } from '@tanstack/react-start'; 4 | import { createRouter } from './router'; 5 | 6 | const router = createRouter(); 7 | 8 | hydrateRoot(document, ); 9 | -------------------------------------------------------------------------------- /apps/web/src/routes/api/hello.ts: -------------------------------------------------------------------------------- 1 | import { json } from '@tanstack/react-start'; 2 | import { createAPIFileRoute } from '@tanstack/react-start/api'; 3 | 4 | export const APIRoute = createAPIFileRoute('/api/hello')({ 5 | GET: ({ request, params }) => { 6 | return json({ message: 'Hello "/api/hello"!' }); 7 | }, 8 | }); 9 | -------------------------------------------------------------------------------- /apps/api/src/core/treeSitter/parseStrategies/index.ts: -------------------------------------------------------------------------------- 1 | export * from './CssParseStrategy'; 2 | export * from './DefaultParseStrategy'; 3 | export * from './GoParseStrategy'; 4 | export * from './ParseStrategy'; 5 | export * from './PythonParseStrategy'; 6 | export * from './TypescriptStrategy'; 7 | export * from './VueParseStrategy'; 8 | -------------------------------------------------------------------------------- /apps/web/content/updates/2025-04-19.mdx: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Codecrawl 1.0" 3 | image: "/posts/sf.webp" 4 | authors: ["Irere Emmanuel"] 5 | date: 2025-04-18 6 | --- 7 | 8 | Codecrawl 1.0 is a new version of Codecrawl that is now available to everyone. 9 | 10 | ### What's new? 11 | 12 | - New UI 13 | - New features 14 | - New API 15 | 16 | 17 | -------------------------------------------------------------------------------- /apps/api/drizzle.config.ts: -------------------------------------------------------------------------------- 1 | import { defineConfig } from 'drizzle-kit'; 2 | import dotenv from 'dotenv'; 3 | 4 | dotenv.config(); 5 | 6 | export default defineConfig({ 7 | schema: './src/db/schema', 8 | out: './src/db/migrations', 9 | dialect: 'postgresql', 10 | dbCredentials: { 11 | url: process.env.DATABASE_URL as string, 12 | }, 13 | }); 14 | -------------------------------------------------------------------------------- /apps/api/.swcrc: -------------------------------------------------------------------------------- 1 | { 2 | "jsc": { 3 | "parser": { 4 | "syntax": "typescript", 5 | "tsx": false, 6 | "decorators": true, 7 | "dynamicImport": true 8 | }, 9 | "target": "es2020", 10 | "baseUrl": ".", 11 | "paths": { 12 | "~/*": ["src/*"] 13 | } 14 | }, 15 | "module": { 16 | "type": "commonjs" 17 | } 18 | } 19 | -------------------------------------------------------------------------------- /apps/web/src/components/svgs/index.ts: -------------------------------------------------------------------------------- 1 | export { default as SvgCircleCheckMark } from './circle-check-mark'; 2 | export { default as SvgLogo } from './logo'; 3 | export { default as SvgLogoBlack } from './logo-black'; 4 | export { default as SvgXLogo } from './x-logo'; 5 | export { default as SvgGithubLogo } from './github-logo'; 6 | export { default as SvgLinkedInLogo } from './linkedin-logo'; 7 | -------------------------------------------------------------------------------- /apps/api/.env.example: -------------------------------------------------------------------------------- 1 | # ===== Required ENVS ====== 2 | IS_PRODUCTION=true 3 | PORT=4000 4 | HOST=0.0.0.0 5 | NODE_ENV=production 6 | REDIS_URL=redis://localhost:6379 7 | DATABASE_URL=postgresql://postgres:postgres@localhost:5432/codecrawl 8 | REDIS_RATE_LIMIT_URL=redis://localhost:6379 9 | ACCESS_TOKEN_SECRET=your_secret 10 | REFRESH_TOKEN_SECRET=your_secret 11 | 12 | # ===== Optional ENVS ====== -------------------------------------------------------------------------------- /apps/web/.gitignore: -------------------------------------------------------------------------------- 1 | node_modules 2 | package-lock.json 3 | yarn.lock 4 | 5 | .DS_Store 6 | .cache 7 | .env 8 | .vercel 9 | .output 10 | .vinxi 11 | 12 | /build/ 13 | /api/ 14 | /server/build 15 | /public/build 16 | .vinxi 17 | # Sentry Config File 18 | .env.sentry-build-plugin 19 | /test-results/ 20 | /playwright-report/ 21 | /blob-report/ 22 | /playwright/.cache/ 23 | 24 | .content-collections 25 | -------------------------------------------------------------------------------- /packages/sdk/jest.config.js: -------------------------------------------------------------------------------- 1 | /** @type {import('ts-jest').JestConfigWithTsJest} **/ 2 | export default { 3 | testEnvironment: 'node', 4 | moduleNameMapper: { 5 | '^(\\.{1,2}/.*)\\.js$': '$1', 6 | }, 7 | extensionsToTreatAsEsm: ['.ts'], 8 | transform: { 9 | '^.+\\.(mt|t|cj|j)s$': [ 10 | 'ts-jest', 11 | { 12 | useESM: true, 13 | }, 14 | ], 15 | }, 16 | }; 17 | -------------------------------------------------------------------------------- /apps/web/content/posts/bullmq.mdx: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Message Queue with BullMQ" 3 | summary: "A comprehensive introduction to our product management blog series" 4 | image: "/posts/sf.webp" 5 | category: "engineering" 6 | author: "Irere Emmanuel" 7 | date: 2025-04-18 8 | --- 9 | 10 | ## How to use BullMQ 11 | 12 | BullMQ is a powerful tool that allows you to create a queue system for your application. 13 | 14 | -------------------------------------------------------------------------------- /apps/api/src/db/index.ts: -------------------------------------------------------------------------------- 1 | import { drizzle } from 'drizzle-orm/postgres-js'; 2 | import postgres from 'postgres'; 3 | 4 | import * as schema from './schema'; 5 | 6 | if (!process.env.DATABASE_URL) { 7 | throw new Error('DATABASE_URL environment variable must be provided.'); 8 | } 9 | 10 | const queryClient = postgres(process.env.DATABASE_URL); 11 | export const db = drizzle({ client: queryClient, schema }); 12 | -------------------------------------------------------------------------------- /apps/web/content/posts/how-to-ship.mdx: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Promote your next startup." 3 | summary: "A comprehensive introduction to our product management blog series" 4 | image: "/posts/sf.webp" 5 | category: "marketing" 6 | author: "Irere Emmanuel" 7 | date: 2025-04-18 8 | --- 9 | 10 | ## How to market your next startup. 11 | 12 | Ship is a process that involves creating a product, marketing it, and then selling it. 13 | -------------------------------------------------------------------------------- /apps/web/src/ssr.tsx: -------------------------------------------------------------------------------- 1 | /// 2 | import { 3 | createStartHandler, 4 | defaultStreamHandler, 5 | } from '@tanstack/react-start/server'; 6 | import { getRouterManifest } from '@tanstack/react-start/router-manifest'; 7 | 8 | import { createRouter } from './router'; 9 | 10 | export default createStartHandler({ 11 | createRouter, 12 | getRouterManifest, 13 | })(defaultStreamHandler); 14 | -------------------------------------------------------------------------------- /apps/web/src/lib/query-client.tsx: -------------------------------------------------------------------------------- 1 | import { isServer, QueryClient } from '@tanstack/react-query'; 2 | import { defaultQueryFn } from './default-query-fn'; 3 | 4 | export const queryClient = new QueryClient({ 5 | defaultOptions: { 6 | mutations: { 7 | onError: (error) => { 8 | console.log(error); 9 | }, 10 | }, 11 | queries: { 12 | queryFn: defaultQueryFn, 13 | enabled: !isServer, 14 | }, 15 | }, 16 | }); 17 | -------------------------------------------------------------------------------- /apps/api/src/middleware/index.ts: -------------------------------------------------------------------------------- 1 | import type { NextFunction, Request, Response } from 'express'; 2 | 3 | export function wrap( 4 | controller: (req: Req, res: Response) => Promise, 5 | ): (req: Req, res: Response, next: NextFunction) => any { 6 | return (req, res, next) => { 7 | controller(req, res).catch((err) => next(err)); 8 | }; 9 | } 10 | 11 | export * from './api-key-auth'; 12 | export * from './jwt-auth'; 13 | -------------------------------------------------------------------------------- /apps/api/src/core/output/outputGeneratorTypes.ts: -------------------------------------------------------------------------------- 1 | import type { ConfigMerged } from '~/config/configSchema'; 2 | import type { ProcessedFile } from '../file/fileTypes'; 3 | 4 | export interface OutputGeneratorContext { 5 | generationDate: string; 6 | treeString: string; 7 | processedFiles: ProcessedFile[]; 8 | config: ConfigMerged; 9 | } 10 | 11 | export interface RenderContext { 12 | readonly processedFiles: ReadonlyArray; 13 | readonly markdownCodeBlockDelimiter: string; 14 | } 15 | -------------------------------------------------------------------------------- /apps/api/src/db/schema/llm-txts.ts: -------------------------------------------------------------------------------- 1 | import { integer, pgTable, text, timestamp, uuid } from 'drizzle-orm/pg-core'; 2 | 3 | export const llmsTxts = pgTable('llms_txts', { 4 | id: uuid('id').primaryKey().defaultRandom(), 5 | repoUrl: text('repo_url').notNull(), 6 | llmstxt: text('llmstxt').notNull(), 7 | maxUrls: integer('max_urls').default(1), 8 | llmstxtFull: text('llmstxt_full'), 9 | createdAt: timestamp('created_at').defaultNow(), 10 | updatedAt: timestamp('updated_at').defaultNow(), 11 | }); 12 | -------------------------------------------------------------------------------- /apps/api/src/core/output/outputStyles/plainStyle.ts: -------------------------------------------------------------------------------- 1 | const PLAIN_SEPARATOR = '='.repeat(16); 2 | const PLAIN_LONG_SEPARATOR = '='.repeat(64); 3 | 4 | export const getPlainTemplate = () => { 5 | return ` 6 | ${PLAIN_LONG_SEPARATOR} 7 | Files 8 | ${PLAIN_LONG_SEPARATOR} 9 | 10 | {{#each processedFiles}} 11 | ${PLAIN_SEPARATOR} 12 | File: {{{this.path}}} 13 | ${PLAIN_SEPARATOR} 14 | {{{this.content}}} 15 | {{/each}} 16 | 17 | ${PLAIN_LONG_SEPARATOR} 18 | End of Codebase 19 | ${PLAIN_LONG_SEPARATOR} 20 | `; 21 | }; 22 | -------------------------------------------------------------------------------- /.github/workflows/ghcr-clean.yaml: -------------------------------------------------------------------------------- 1 | name: Cleanup Untagged Images 2 | 3 | on: 4 | workflow_dispatch: 5 | 6 | jobs: 7 | delete-untagged-images: 8 | name: Delete Untagged Images 9 | runs-on: ubuntu-latest 10 | steps: 11 | - uses: bots-house/ghcr-delete-image-action@v1.1.0 12 | with: 13 | owner: idee8 14 | name: codecrawl 15 | # NOTE: using Personal Access Token 16 | token: ${{secrets.GITHUB_TOKEN}} 17 | # Keep latest N untagged images 18 | untagged-keep-latest: 5 -------------------------------------------------------------------------------- /apps/api/src/core/treeSitter/queries/index.ts: -------------------------------------------------------------------------------- 1 | export * from './queryC'; 2 | export * from './queryCSharp'; 3 | export * from './queryCpp'; 4 | export * from './queryCss'; 5 | export * from './queryGo'; 6 | export * from './queryJava'; 7 | export * from './queryJavascript'; 8 | export * from './queryPhp'; 9 | export * from './queryPython'; 10 | export * from './queryRuby'; 11 | export * from './queryRust'; 12 | export * from './querySolidity'; 13 | export * from './querySwift'; 14 | export * from './queryTypescript'; 15 | export * from './queryVue'; 16 | -------------------------------------------------------------------------------- /apps/web/app.config.ts: -------------------------------------------------------------------------------- 1 | import { defineConfig } from '@tanstack/react-start/config'; 2 | import tsConfigPaths from 'vite-tsconfig-paths'; 3 | import contentCollections from '@content-collections/vinxi'; 4 | import tailwindcss from '@tailwindcss/vite'; 5 | 6 | export default defineConfig({ 7 | tsr: { 8 | appDirectory: 'src', 9 | }, 10 | vite: { 11 | plugins: [ 12 | contentCollections(), 13 | tailwindcss(), 14 | tsConfigPaths({ 15 | projects: ['./tsconfig.json'], 16 | }), 17 | ], 18 | }, 19 | }); 20 | -------------------------------------------------------------------------------- /apps/web/src/hooks/use-scroll.ts: -------------------------------------------------------------------------------- 1 | import { useCallback, useEffect, useState } from 'react'; 2 | 3 | export function useScroll(threshold: number) { 4 | const [scrolled, setScrolled] = useState(false); 5 | 6 | const onScroll = useCallback(() => { 7 | setScrolled(window.scrollY > threshold); 8 | }, [threshold]); 9 | 10 | useEffect(() => { 11 | window.addEventListener('scroll', onScroll); 12 | onScroll(); 13 | return () => window.removeEventListener('scroll', onScroll); 14 | }, [onScroll]); 15 | 16 | return scrolled; 17 | } 18 | -------------------------------------------------------------------------------- /apps/api/src/core/treeSitter/queries/queryC.ts: -------------------------------------------------------------------------------- 1 | export const queryC = ` 2 | (comment) @comment 3 | 4 | (struct_specifier name: (type_identifier) @name.definition.class body:(_)) @definition.class 5 | 6 | (declaration type: (union_specifier name: (type_identifier) @name.definition.class)) @definition.class 7 | 8 | (function_declarator declarator: (identifier) @name.definition.function) @definition.function 9 | 10 | (type_definition declarator: (type_identifier) @name.definition.type) @definition.type 11 | 12 | (enum_specifier name: (type_identifier) @name.definition.type) @definition.type 13 | `; 14 | -------------------------------------------------------------------------------- /apps/web/src/lib/constants.ts: -------------------------------------------------------------------------------- 1 | export const BLOG_CATEGORIES = [ 2 | { 3 | name: 'Marketing', 4 | slug: 'marketing', 5 | gradient: 'from-blue-50 to-blue-100', 6 | }, 7 | { 8 | name: 'Product', 9 | slug: 'product', 10 | gradient: 'from-green-50 to-green-100', 11 | }, 12 | { 13 | name: 'Engineering', 14 | slug: 'engineering', 15 | gradient: 'from-purple-50 to-purple-100', 16 | }, 17 | ]; 18 | 19 | export const API_BASE_URL = 'http://localhost:4000/v1'; 20 | export const isServer = typeof window === 'undefined'; 21 | export const loginNextPathKey = '@code/next'; 22 | -------------------------------------------------------------------------------- /packages/sdk/tsup.config.ts: -------------------------------------------------------------------------------- 1 | import { defineConfig } from 'tsup'; 2 | 3 | export default defineConfig({ 4 | entry: ['src/index.ts'], 5 | outDir: 'dist', 6 | format: ['esm', 'cjs'], 7 | sourcemap: true, 8 | clean: true, 9 | dts: true, 10 | platform: 'node', 11 | target: 'node22', 12 | tsconfig: 'tsconfig.json', 13 | noExternal: ['typescript-event-target'], 14 | esbuildOptions(options) { 15 | options.define = { 16 | ...options.define, 17 | 'process.env.NODE_ENV': JSON.stringify( 18 | process.env.NODE_ENV || 'production', 19 | ), 20 | }; 21 | }, 22 | }); 23 | -------------------------------------------------------------------------------- /apps/web/content/posts/where-to-use-codecrawl.mdx: -------------------------------------------------------------------------------- 1 | --- 2 | title: "Where to use Codecrawl" 3 | summary: "A comprehensive introduction to our product management blog series" 4 | image: "/posts/sf.webp" 5 | category: "product" 6 | author: "Irere Emmanuel" 7 | date: 2025-04-18 8 | --- 9 | 10 | Codecrawl is a tool that allows you to crawl websites and extract data. It is a powerful tool that can be used to extract data from websites. 11 | 12 | ## How to use Codecrawl 13 | 14 | Codecrawl is a tool that allows you to crawl websites and extract data. It is a powerful tool that can be used to extract data from websites. 15 | -------------------------------------------------------------------------------- /apps/web/src/routes/app/_app/logs.tsx: -------------------------------------------------------------------------------- 1 | import { Flex, Text } from '@radix-ui/themes'; 2 | import { createFileRoute } from '@tanstack/react-router'; 3 | import { seo } from '~/utils/seo'; 4 | 5 | export const Route = createFileRoute('/app/_app/logs')({ 6 | component: RouteComponent, 7 | head(ctx) { 8 | return { 9 | meta: [...seo({ title: 'Logs | Codecrawl' })], 10 | }; 11 | }, 12 | }); 13 | 14 | function RouteComponent() { 15 | return ( 16 | 17 | 18 | Logs 19 | 20 | 21 | ); 22 | } 23 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "codecrawl", 3 | "version": "1.0.0", 4 | "description": "", 5 | "main": "index.js", 6 | "scripts": { 7 | "dev": "pnpm --filter './apps/*' run dev", 8 | "workers": "pnpm --filter './apps/api' run workers", 9 | "format": "biome format --write ." 10 | }, 11 | "keywords": [], 12 | "author": "", 13 | "license": "ISC", 14 | "packageManager": "pnpm@10.2.0+sha512.0d27364e0139c6aadeed65ada153135e0ca96c8da42123bd50047f961339dc7a758fc2e944b428f52be570d1bd3372455c1c65fa2e7aa0bfbf931190f9552001", 15 | "devDependencies": { 16 | "@biomejs/biome": "^1.9.4" 17 | } 18 | } 19 | -------------------------------------------------------------------------------- /apps/web/src/hooks/use-verify-login.tsx: -------------------------------------------------------------------------------- 1 | import { useTokenStore } from '~/store/use-token-store'; 2 | import { useRouter, useLocation } from '@tanstack/react-router'; 3 | import { useEffect } from 'react'; 4 | 5 | export const useVerifyLoggedIn = () => { 6 | const router = useRouter(); 7 | const asPath = useLocation().pathname; 8 | const hasTokens = useTokenStore((s) => !!(s.accessToken && s.refreshToken)); 9 | 10 | useEffect(() => { 11 | if (!hasTokens) { 12 | router.navigate({ 13 | to: '/signin', 14 | search: { next: `/${asPath}` }, 15 | }); 16 | } 17 | }, [hasTokens, asPath, router]); 18 | 19 | return hasTokens; 20 | }; 21 | -------------------------------------------------------------------------------- /apps/api/src/core/treeSitter/ext2Lang.ts: -------------------------------------------------------------------------------- 1 | /** 2 | * @see https://unpkg.com/browse/tree-sitter-wasms@latest/out/ 3 | */ 4 | export const ext2Lang = { 5 | vue: 'vue', 6 | cjs: 'javascript', 7 | mjs: 'javascript', 8 | mjsx: 'javascript', 9 | js: 'javascript', 10 | jsx: 'javascript', 11 | ctx: 'typescript', 12 | mts: 'typescript', 13 | mtsx: 'typescript', 14 | ts: 'typescript', 15 | tsx: 'typescript', 16 | h: 'c', 17 | c: 'c', 18 | hpp: 'cpp', 19 | cpp: 'cpp', 20 | py: 'python', 21 | rs: 'rust', 22 | java: 'java', 23 | go: 'go', 24 | cs: 'c_sharp', 25 | rb: 'ruby', 26 | php: 'php', 27 | swift: 'swift', 28 | css: 'css', 29 | sol: 'solidity', 30 | }; 31 | -------------------------------------------------------------------------------- /apps/web/tsconfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "include": ["**/*.ts", "**/*.tsx"], 3 | "compilerOptions": { 4 | "strict": true, 5 | "esModuleInterop": true, 6 | "jsx": "react-jsx", 7 | "module": "ESNext", 8 | "moduleResolution": "Bundler", 9 | "lib": ["DOM", "DOM.Iterable", "ES2022"], 10 | "isolatedModules": true, 11 | "resolveJsonModule": true, 12 | "skipLibCheck": true, 13 | "target": "ES2022", 14 | "allowJs": true, 15 | "forceConsistentCasingInFileNames": true, 16 | "baseUrl": ".", 17 | "paths": { 18 | "~/*": ["./src/*"], 19 | "content-collections": ["./.content-collections/generated"] 20 | }, 21 | "noEmit": true 22 | } 23 | } 24 | -------------------------------------------------------------------------------- /apps/web/src/lib/content.ts: -------------------------------------------------------------------------------- 1 | import { notFound } from '@tanstack/react-router'; 2 | import { allPosts, allUpdates } from 'content-collections'; 3 | 4 | export function findPostBySlug(slug: string) { 5 | const post = allPosts.find((post) => post._meta.path === slug); 6 | if (!post) { 7 | throw notFound(); 8 | } 9 | return post; 10 | } 11 | 12 | export function findUpdateBySlug(slug: string) { 13 | const update = allUpdates.find((update) => update._meta.path === slug); 14 | if (!update) { 15 | throw notFound(); 16 | } 17 | return update; 18 | } 19 | 20 | export function findPostsByCategory(category: string) { 21 | return allPosts.filter((post) => post.category === category); 22 | } 23 | -------------------------------------------------------------------------------- /apps/api/src/db/queries.ts: -------------------------------------------------------------------------------- 1 | import { and, desc, eq, gte } from 'drizzle-orm'; 2 | import { db } from '.'; 3 | import { llmsTxts } from './schema'; 4 | 5 | export async function getLlmsTxtByRepoUrl(repoUrl: string): Promise { 6 | return await db 7 | .select() 8 | .from(llmsTxts) 9 | .where(eq(llmsTxts.repoUrl, repoUrl)) 10 | .limit(1); 11 | } 12 | 13 | export async function getOrderedLlmsTxtByRepoUrl( 14 | repoUrl: string, 15 | maxUrls: number, 16 | ): Promise { 17 | return await db 18 | .select() 19 | .from(llmsTxts) 20 | .where(and(gte(llmsTxts.maxUrls, maxUrls), eq(llmsTxts.repoUrl, repoUrl))) 21 | .orderBy(desc(llmsTxts.createdAt)) 22 | .limit(1); 23 | } 24 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # Contributors guide: 2 | 3 | Welcome to [Codecrawl](https://crawl.irere.dev)! Here are some instructions on how to get the project locally, so you can run it on your own (and contribute) 4 | 5 | If you're contributing, note that the process is similar to other open source repos i.e. (fork codecrawl, make changes, run tests, PR). If you have any questions, and would like help gettin on board, reach out to irere@idee8.agency for more or submit an issue! 6 | 7 | ## Running the project locally 8 | 9 | First, start by installing dependencies: 10 | 11 | 1. node.js [instructions](https://nodejs.org/en/learn/getting-started/how-to-install-nodejs) 12 | 2. pnpm [instructions](https://pnpm.io/installation) 13 | -------------------------------------------------------------------------------- /apps/api/src/utils/errorHandle.ts: -------------------------------------------------------------------------------- 1 | import { z } from 'zod'; 2 | 3 | export class RepomixConfigValidationError extends Error { 4 | constructor(message: string) { 5 | super(message); 6 | this.name = 'CrawlConfigValidationError'; 7 | } 8 | } 9 | 10 | export const rethrowValidationErrorIfZodError = ( 11 | error: unknown, 12 | message: string, 13 | ): void => { 14 | if (error instanceof z.ZodError) { 15 | const zodErrorText = error.errors 16 | .map((err) => `[${err.path.join('.')}] ${err.message}`) 17 | .join('\n '); 18 | throw new RepomixConfigValidationError( 19 | `${message}\n\n ${zodErrorText}\n\n Please check the config file and try again.`, 20 | ); 21 | } 22 | }; 23 | -------------------------------------------------------------------------------- /apps/api/tsconfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "compilerOptions": { 3 | "target": "es2020", 4 | "module": "CommonJS", 5 | "allowJs": true, 6 | "removeComments": true, 7 | "resolveJsonModule": true, 8 | "typeRoots": ["./node_modules/@types"], 9 | "sourceMap": true, 10 | "outDir": "dist", 11 | "strict": true, 12 | "lib": ["es2020"], 13 | "baseUrl": ".", 14 | "forceConsistentCasingInFileNames": true, 15 | "esModuleInterop": true, 16 | "experimentalDecorators": true, 17 | "emitDecoratorMetadata": true, 18 | "moduleResolution": "Node", 19 | "skipLibCheck": true, 20 | "paths": { 21 | "~/*": ["src/*"] 22 | } 23 | }, 24 | "include": ["src/**/*"], 25 | "exclude": ["node_modules"] 26 | } 27 | -------------------------------------------------------------------------------- /packages/sdk/tsconfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "compilerOptions": { 3 | // See https://www.totaltypescript.com/tsconfig-cheat-sheet 4 | /* Base Options: */ 5 | "esModuleInterop": true, 6 | "skipLibCheck": true, 7 | "target": "es2022", 8 | "allowJs": true, 9 | "resolveJsonModule": true, 10 | "moduleDetection": "force", 11 | "isolatedModules": true, 12 | "verbatimModuleSyntax": true, 13 | 14 | /* Strictness */ 15 | "strict": true, 16 | "noUncheckedIndexedAccess": true, 17 | "noImplicitOverride": true, 18 | 19 | /* If NOT transpiling with TypeScript: */ 20 | "module": "NodeNext", 21 | "noEmit": true 22 | }, 23 | "include": ["src/**/*"], 24 | "exclude": ["node_modules", "dist", "**/__tests__/*"] 25 | } 26 | -------------------------------------------------------------------------------- /.github/workflows/sdk-test.yaml: -------------------------------------------------------------------------------- 1 | name: SDK Test Suite 2 | 3 | on: 4 | pull_request: 5 | branches: 6 | - main 7 | - beta 8 | paths: 9 | - packages/sdk/** 10 | 11 | env: 12 | TEST_API_KEY: ${{ secrets.TEST_API_KEY }} 13 | 14 | jobs: 15 | test: 16 | name: Run tests 17 | runs-on: ubuntu-latest 18 | steps: 19 | - uses: actions/checkout@v3 20 | - name: Set up Node.js 21 | uses: actions/setup-node@v3 22 | with: 23 | node-version: "20" 24 | cache: "npm" 25 | 26 | - name: Install dependencies 27 | run: npm install 28 | working-directory: ./packages/sdk 29 | - name: Run tests 30 | run: npm run test 31 | working-directory: ./packages/sdk -------------------------------------------------------------------------------- /apps/web/src/routes/(marketing)/_landing/index.tsx: -------------------------------------------------------------------------------- 1 | import { createFileRoute } from '@tanstack/react-router'; 2 | import { CTA } from '~/components/marketing/cta'; 3 | import { FAQ } from '~/components/marketing/faq'; 4 | import { Hero } from '~/components/marketing/hero'; 5 | import { Pricing } from '~/components/marketing/pricing'; 6 | 7 | export const Route = createFileRoute('/(marketing)/_landing/')({ 8 | component: Home, 9 | }); 10 | 11 | function Home() { 12 | return ( 13 |
14 | 15 | 16 |
17 | 18 |
19 | 20 |
21 | ); 22 | } 23 | -------------------------------------------------------------------------------- /.github/workflows/sdk-publish.yaml: -------------------------------------------------------------------------------- 1 | name: Publish SDK 2 | 3 | on: 4 | push: 5 | branches: 6 | - main 7 | paths: 8 | - packages/sdk/package.json 9 | 10 | env: 11 | TEST_API_KEY: ${{ secrets.TEST_API_KEY }} 12 | 13 | jobs: 14 | publish: 15 | name: Publish 16 | runs-on: ubuntu-latest 17 | steps: 18 | - uses: actions/checkout@v3 19 | - name: Set up Node.js 20 | uses: actions/setup-node@v3 21 | with: 22 | node-version: "20" 23 | - name: Authenticate 24 | run: echo "//registry.npmjs.org/:_authToken=${{ secrets.NPM_TOKEN }}" > ~/.npmrc 25 | - name: Publish 26 | run: | 27 | npm i 28 | npm run build 29 | npm publish --access public 30 | working-directory: ./packages/sdk -------------------------------------------------------------------------------- /apps/web/src/components/svgs/x-logo.tsx: -------------------------------------------------------------------------------- 1 | import type { SVGProps } from 'react'; 2 | import { cn } from '~/utils/classnames'; 3 | 4 | export default function XLogo(props: SVGProps) { 5 | return ( 6 | 17 | 21 | 22 | ); 23 | } 24 | -------------------------------------------------------------------------------- /apps/web/src/components/ui/button-link.tsx: -------------------------------------------------------------------------------- 1 | interface ButtonLinkProps 2 | extends React.AnchorHTMLAttributes { 3 | children: React.ReactNode; 4 | className?: string; 5 | } 6 | 7 | export function ButtonLink({ 8 | children, 9 | className, 10 | href, 11 | ...props 12 | }: ButtonLinkProps) { 13 | return ( 14 | 19 | {children} 20 | 21 | ); 22 | } 23 | -------------------------------------------------------------------------------- /apps/api/docker-entrypoint.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash -e 2 | 3 | if [ "$UID" -eq 0 ]; then 4 | set +e # disable failing on errror 5 | ulimit -n 65535 6 | echo "NEW ULIMIT: $(ulimit -n)" 7 | set -e # enable failing on error 8 | else 9 | echo ENTRYPOINT DID NOT RUN AS ROOT 10 | fi 11 | 12 | case "${PROCESS_TYPE}" in 13 | "app") 14 | echo "RUNNING app" 15 | node --max-old-space-size=8192 dist/src/index.js 16 | ;; 17 | "worker") 18 | echo "RUNNING worker" 19 | node --max-old-space-size=8192 dist/src/services/queue-worker.js 20 | ;; 21 | "index-worker") 22 | echo "RUNNING index worker" 23 | node --max-old-space-size=8192 dist/src/services/indexing/index-worker.js 24 | ;; 25 | *) 26 | echo "RUNNING default app" 27 | node --max-old-space-size=8192 dist/src/index.js 28 | ;; 29 | esac -------------------------------------------------------------------------------- /apps/web/src/store/use-api-key-store.tsx: -------------------------------------------------------------------------------- 1 | import { create } from "zustand"; 2 | import { persist } from "zustand/middleware"; 3 | 4 | interface ApiKeyState { 5 | selectedApiKey: string | null; 6 | } 7 | 8 | interface ApiKeyActions { 9 | setSelectedApiKey: (key: string | null) => void; 10 | reset: () => void; 11 | } 12 | 13 | type ApiKeyStore = ApiKeyState & ApiKeyActions; 14 | 15 | const initialApiKeyState: ApiKeyState = { 16 | selectedApiKey: null, 17 | }; 18 | 19 | export const useApiKeyStore = create()( 20 | persist( 21 | (set) => ({ 22 | ...initialApiKeyState, 23 | 24 | setSelectedApiKey: (key: string | null) => set({ selectedApiKey: key }), 25 | 26 | reset: () => set(initialApiKeyState), 27 | }), 28 | { 29 | name: "api-key-storage", 30 | } 31 | ) 32 | ); 33 | -------------------------------------------------------------------------------- /apps/api/src/core/treeSitter/lang2Query.ts: -------------------------------------------------------------------------------- 1 | import { 2 | queryC, 3 | queryCpp, 4 | queryCSharp, 5 | queryCss, 6 | queryGo, 7 | queryJava, 8 | queryJavascript, 9 | queryPhp, 10 | queryPython, 11 | queryRuby, 12 | queryRust, 13 | querySolidity, 14 | querySwift, 15 | queryTypescript, 16 | queryVue, 17 | } from './queries'; 18 | 19 | export const lang2Query = { 20 | javascript: queryJavascript, 21 | typescript: queryTypescript, 22 | c: queryC, 23 | cpp: queryCpp, 24 | python: queryPython, 25 | rust: queryRust, 26 | c_sharp: queryCSharp, 27 | java: queryJava, 28 | php: queryPhp, 29 | swift: querySwift, 30 | solidity: querySolidity, 31 | css: queryCss, 32 | vue: queryVue, 33 | ruby: queryRuby, 34 | go: queryGo, 35 | }; 36 | 37 | export type SupportedLang = keyof typeof lang2Query; 38 | -------------------------------------------------------------------------------- /apps/web/src/components/svgs/circle-check-mark.tsx: -------------------------------------------------------------------------------- 1 | import type { SVGProps } from 'react'; 2 | 3 | export default function SvgCircleCheckMark(props: SVGProps) { 4 | return ( 5 | 12 | 17 | 25 | 26 | ); 27 | } 28 | -------------------------------------------------------------------------------- /apps/api/src/core/treeSitter/queries/querySolidity.ts: -------------------------------------------------------------------------------- 1 | export const querySolidity = ` 2 | ;; Comments 3 | (comment) @comment 4 | 5 | ;; Contract declarations 6 | (contract_declaration 7 | name: (identifier) @name.definition.class) @definition.class 8 | 9 | ;; Interface declarations 10 | (interface_declaration 11 | name: (identifier) @name.definition.interface) @definition.interface 12 | 13 | ;; Function declarations 14 | (function_definition 15 | name: (identifier) @name.definition.function) @definition.function 16 | 17 | ;; Import statements 18 | (import_directive) @definition.import 19 | 20 | ; Event definitions 21 | (event_definition 22 | name: (identifier) @name.definition.event) @definition.event 23 | 24 | ; Modifier definitions 25 | (modifier_definition 26 | name: (identifier) @name.definition.modifier) @definition.modifier 27 | `; 28 | -------------------------------------------------------------------------------- /apps/web/src/router.tsx: -------------------------------------------------------------------------------- 1 | import { createRouter as createTanStackRouter } from '@tanstack/react-router'; 2 | import { routeTree } from './routeTree.gen'; 3 | import { DefaultCatchBoundary } from './components/catch-boundary'; 4 | import { NotFound } from './components/not-found'; 5 | import { queryClient } from './lib/query-client'; 6 | 7 | export function createRouter() { 8 | const router = createTanStackRouter({ 9 | routeTree, 10 | defaultPreload: 'intent', 11 | defaultErrorComponent: DefaultCatchBoundary, 12 | defaultNotFoundComponent: () => , 13 | scrollRestoration: true, 14 | context: { 15 | queryClient, 16 | }, 17 | }); 18 | 19 | return router; 20 | } 21 | 22 | declare module '@tanstack/react-router' { 23 | interface Register { 24 | router: ReturnType; 25 | } 26 | } 27 | -------------------------------------------------------------------------------- /apps/api/src/db/mutations.ts: -------------------------------------------------------------------------------- 1 | import { eq } from 'drizzle-orm'; 2 | import { db } from '.'; 3 | import { llmsTxts } from './schema'; 4 | 5 | export interface LlmsTxt { 6 | repoUrl: string; 7 | llmstxt: string; 8 | llmstxtFull: string; 9 | maxUrls: number; 10 | } 11 | 12 | export async function updateLlmsTxtByRepoUrl({ 13 | repoUrl, 14 | llmstxt, 15 | llmstxtFull, 16 | maxUrls, 17 | }: LlmsTxt) { 18 | return await db 19 | .update(llmsTxts) 20 | .set({ 21 | llmstxt, 22 | llmstxtFull, 23 | maxUrls, 24 | }) 25 | .where(eq(llmsTxts.repoUrl, repoUrl)); 26 | } 27 | 28 | export async function createLlmsTxt({ 29 | llmstxt, 30 | llmstxtFull, 31 | maxUrls, 32 | repoUrl, 33 | }: LlmsTxt) { 34 | return await db 35 | .insert(llmsTxts) 36 | .values({ llmstxt, repoUrl, llmstxtFull, maxUrls }) 37 | .returning(); 38 | } 39 | -------------------------------------------------------------------------------- /apps/api/src/core/file/filePathSort.ts: -------------------------------------------------------------------------------- 1 | import path from 'node:path'; 2 | 3 | // Sort paths for general use (not affected by git change count) 4 | export const sortPaths = (filePaths: string[]): string[] => { 5 | return filePaths.sort((a, b) => { 6 | const partsA = a.split(path.sep); 7 | const partsB = b.split(path.sep); 8 | 9 | for (let i = 0; i < Math.min(partsA.length, partsB.length); i++) { 10 | if (partsA[i] !== partsB[i]) { 11 | const isLastA = i === partsA.length - 1; 12 | const isLastB = i === partsB.length - 1; 13 | 14 | if (!isLastA && isLastB) return -1; // Directory 15 | if (isLastA && !isLastB) return 1; // File 16 | 17 | return partsA[i].localeCompare(partsB[i]); 18 | } 19 | } 20 | 21 | // Sort by path length when all parts are equal 22 | return partsA.length - partsB.length; 23 | }); 24 | }; 25 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # See https://help.github.com/articles/ignoring-files/ for more about ignoring files. 2 | 3 | # dependencies 4 | /node_modules 5 | node_modules 6 | /.pnp 7 | .pnp.js 8 | 9 | # testing 10 | /coverage 11 | 12 | # next.js 13 | /.next/ 14 | /out/ 15 | 16 | # production 17 | /build 18 | 19 | # misc 20 | .DS_Store 21 | *.pem 22 | 23 | # debug 24 | npm-debug.log* 25 | yarn-debug.log* 26 | yarn-error.log* 27 | 28 | # local env files 29 | .env*.local 30 | .env 31 | .env.test 32 | 33 | # vercel 34 | .vercel 35 | 36 | # typescript 37 | *.tsbuildinfo 38 | next-env.d.ts 39 | 40 | .turbo 41 | dist 42 | .next 43 | 44 | # tinybird 45 | .tinyb 46 | .venv 47 | 48 | .react-email 49 | 50 | .sentryclirc 51 | .pnpm-store/* 52 | 53 | # Serwist 54 | **/public/serwist** 55 | **/public/sw** 56 | **/public/worker** 57 | **/public/fallback** 58 | **/public/precache** 59 | 60 | # Sanity 61 | .sanity 62 | 63 | TODO.md -------------------------------------------------------------------------------- /apps/web/src/routes/(auth)/_auth.tsx: -------------------------------------------------------------------------------- 1 | import { Flex } from '@radix-ui/themes'; 2 | import { createFileRoute, Outlet, useRouter } from '@tanstack/react-router'; 3 | import { useEffect } from 'react'; 4 | import { useTokenStore } from '~/store/use-token-store'; 5 | 6 | export const Route = createFileRoute('/(auth)/_auth')({ 7 | component: RouteComponent, 8 | }); 9 | 10 | function RouteComponent() { 11 | const router = useRouter(); 12 | const hasTokens = useTokenStore((s) => !!(s.accessToken && s.refreshToken)); 13 | 14 | useEffect(() => { 15 | if (hasTokens) { 16 | router.navigate({ to: '/app/playground' }); 17 | } 18 | }, [hasTokens, router]); 19 | 20 | return ( 21 | 27 | 28 | 29 | ); 30 | } 31 | -------------------------------------------------------------------------------- /apps/web/src/components/not-found.tsx: -------------------------------------------------------------------------------- 1 | import { Link } from '@tanstack/react-router'; 2 | 3 | export function NotFound({ children }: { children?: any }) { 4 | return ( 5 |
6 |
7 | {children ||

The page you are looking for does not exist.

} 8 |
9 |

10 | 17 | 21 | Start Over 22 | 23 |

24 |
25 | ); 26 | } 27 | -------------------------------------------------------------------------------- /apps/api/src/db/schema/users.ts: -------------------------------------------------------------------------------- 1 | import type { InferInsertModel, InferSelectModel } from 'drizzle-orm'; 2 | import { 3 | pgTable, 4 | varchar, 5 | timestamp, 6 | integer, 7 | uuid, 8 | index, 9 | text, 10 | } from 'drizzle-orm/pg-core'; 11 | 12 | export const users = pgTable( 13 | 'users', 14 | { 15 | id: uuid('id').primaryKey().defaultRandom(), 16 | email: varchar('email', { length: 255 }).notNull().unique(), 17 | hashedPassword: text('hashed_password'), 18 | googleId: varchar('google_id', { length: 255 }), 19 | githubId: varchar('github_id', { length: 255 }), 20 | tokenVersion: integer('token_version').default(0), 21 | createdAt: timestamp('created_at', { withTimezone: true }).defaultNow(), 22 | }, 23 | (table) => [index('users_email_idx').on(table.email)], 24 | ); 25 | 26 | export type User = InferSelectModel; 27 | export type NewUser = InferInsertModel; 28 | -------------------------------------------------------------------------------- /apps/web/src/lib/default-query-fn.ts: -------------------------------------------------------------------------------- 1 | import { API_BASE_URL } from '~/lib/constants'; 2 | import { useTokenStore } from '~/store/use-token-store'; 3 | 4 | export const defaultQueryFn = async ({ queryKey }: { queryKey: any }) => { 5 | const { accessToken, refreshToken } = useTokenStore.getState(); 6 | 7 | const resp = await fetch(`${API_BASE_URL}/${queryKey}`, { 8 | headers: { 9 | 'X-Access-Token': accessToken, 10 | 'X-Refresh-Token': refreshToken, 11 | }, 12 | }); 13 | 14 | if (resp.status !== 200) { 15 | throw new Error(await resp.text()); 16 | } 17 | 18 | const _accessToken = resp.headers.get('access-token'); 19 | const _refreshToken = resp.headers.get('refresh-token'); 20 | 21 | if (_accessToken && _refreshToken) { 22 | useTokenStore.getState().setTokens({ 23 | accessToken: _accessToken, 24 | refreshToken: _refreshToken, 25 | }); 26 | } 27 | 28 | return await resp.json(); 29 | }; 30 | -------------------------------------------------------------------------------- /apps/web/src/components/svgs/linkedin-logo.tsx: -------------------------------------------------------------------------------- 1 | import type { SVGProps } from 'react'; 2 | import { cn } from '~/utils/classnames'; 3 | 4 | export default function SvgLinkedInLogo(props: SVGProps) { 5 | return ( 6 | 16 | 20 | 21 | ); 22 | } 23 | -------------------------------------------------------------------------------- /apps/api/src/core/treeSitter/queries/queryCpp.ts: -------------------------------------------------------------------------------- 1 | export const queryCpp = ` 2 | (comment) @comment 3 | 4 | (struct_specifier name: (type_identifier) @name.definition.class body:(_)) @definition.class 5 | 6 | (declaration type: (union_specifier name: (type_identifier) @name.definition.class)) @definition.class 7 | 8 | (function_declarator declarator: (identifier) @name.definition.function) @definition.function 9 | 10 | (function_declarator declarator: (field_identifier) @name.definition.function) @definition.function 11 | 12 | (function_declarator declarator: (qualified_identifier scope: (namespace_identifier) @scope name: (identifier) @name.definition.method)) @definition.method 13 | 14 | (type_definition declarator: (type_identifier) @name.definition.type) @definition.type 15 | 16 | (enum_specifier name: (type_identifier) @name.definition.type) @definition.type 17 | 18 | (class_specifier name: (type_identifier) @name.definition.class) @definition.class 19 | `; 20 | -------------------------------------------------------------------------------- /packages/sdk/src/__tests__/index.test.ts: -------------------------------------------------------------------------------- 1 | import dotenv from 'dotenv'; 2 | import { describe, expect, test } from '@jest/globals'; 3 | import CodecrawlApp from '../index'; 4 | 5 | dotenv.config(); 6 | 7 | const TEST_API_KEY = process.env.TEST_API_KEY || 'test-api-key'; 8 | const API_URL = process.env.API_URL ?? 'https://api.irere.dev'; 9 | 10 | describe('CodecrawlApp', () => { 11 | test.concurrent( 12 | 'should throw error for no API key only for cloud service', 13 | async () => { 14 | if (API_URL.includes('api.irere.dev')) { 15 | expect(async () => { 16 | new CodecrawlApp({ 17 | apiKey: null, 18 | apiUrl: API_URL, 19 | }); 20 | }).toThrow('No API key provided'); 21 | } else { 22 | expect(async () => { 23 | new CodecrawlApp({ 24 | apiKey: null, 25 | apiUrl: API_URL, 26 | }); 27 | }).not.toThrow(); 28 | } 29 | }, 30 | ); 31 | }); 32 | -------------------------------------------------------------------------------- /.github/workflows/publish-image-staging.yaml: -------------------------------------------------------------------------------- 1 | name: STAGING Deploy Images to GHCR 2 | 3 | env: 4 | DOTNET_VERSION: '6.0.x' 5 | 6 | on: 7 | workflow_dispatch: 8 | 9 | jobs: 10 | push-app-image: 11 | permissions: 12 | contents: read 13 | packages: write 14 | attestations: write 15 | id-token: write 16 | runs-on: ubuntu-latest 17 | defaults: 18 | run: 19 | working-directory: './apps/api' 20 | steps: 21 | - name: 'Checkout GitHub Action' 22 | uses: actions/checkout@main 23 | 24 | - name: 'Login to GitHub Container Registry' 25 | uses: docker/login-action@v1 26 | with: 27 | registry: ghcr.io 28 | username: ${{github.actor}} 29 | password: ${{secrets.GITHUB_TOKEN}} 30 | 31 | - name: 'Build Inventory Image' 32 | run: | 33 | docker build . --tag ghcr.io/idee8/codecrawl-staging:latest 34 | docker push ghcr.io/idee8/codecrawl-staging:latest -------------------------------------------------------------------------------- /apps/api/src/core/treeSitter/queries/queryPython.ts: -------------------------------------------------------------------------------- 1 | export const queryPython = ` 2 | (comment) @comment 3 | 4 | (expression_statement 5 | (string) @comment) @docstring 6 | 7 | ; Import statements 8 | (import_statement 9 | name: (dotted_name) @name.reference.module) @definition.import 10 | 11 | (import_from_statement 12 | module_name: (dotted_name) @name.reference.module) @definition.import 13 | 14 | (import_from_statement 15 | name: (dotted_name) @name.reference.module) @definition.import 16 | 17 | (class_definition 18 | name: (identifier) @name.definition.class) @definition.class 19 | 20 | (function_definition 21 | name: (identifier) @name.definition.function) @definition.function 22 | 23 | (call 24 | function: [ 25 | (identifier) @name.reference.call 26 | (attribute 27 | attribute: (identifier) @name.reference.call) 28 | ]) @reference.call 29 | 30 | (assignment 31 | left: (identifier) @name.definition.type_alias) @definition.type_alias 32 | `; 33 | -------------------------------------------------------------------------------- /apps/api/src/core/treeSitter/loadLanguage.ts: -------------------------------------------------------------------------------- 1 | import fs from 'node:fs/promises'; 2 | import { Language } from 'web-tree-sitter'; 3 | 4 | export async function loadLanguage(langName: string): Promise { 5 | if (!langName) { 6 | throw new Error('Invalid language name'); 7 | } 8 | 9 | try { 10 | const wasmPath = await getWasmPath(langName); 11 | return await Language.load(wasmPath); 12 | } catch (error: unknown) { 13 | const message = error instanceof Error ? error.message : String(error); 14 | throw new Error(`Failed to load language ${langName}: ${message}`); 15 | } 16 | } 17 | 18 | async function getWasmPath(langName: string): Promise { 19 | const wasmPath = require.resolve( 20 | `tree-sitter-wasms/out/tree-sitter-${langName}.wasm`, 21 | ); 22 | try { 23 | await fs.access(wasmPath); 24 | return wasmPath; 25 | } catch { 26 | throw new Error( 27 | `WASM file not found for language ${langName}: ${wasmPath}`, 28 | ); 29 | } 30 | } 31 | -------------------------------------------------------------------------------- /apps/api/src/core/file/parsePackageJson.ts: -------------------------------------------------------------------------------- 1 | import * as fs from 'node:fs/promises'; 2 | import path from 'node:path'; 3 | 4 | import { logger } from '~/lib/logger'; 5 | 6 | export const getVersion = async (): Promise => { 7 | try { 8 | const packageJson = await parsePackageJson(); 9 | 10 | if (!packageJson.version) { 11 | logger.warn('No version found in package.json'); 12 | return 'unkown'; 13 | } 14 | 15 | return packageJson.version; 16 | } catch (error) { 17 | logger.error('Error reading package.json', error); 18 | return 'unknown'; 19 | } 20 | }; 21 | 22 | const parsePackageJson = async (): Promise<{ 23 | name: string; 24 | version: string; 25 | }> => { 26 | const packageJsonPath = path.join( 27 | __dirname, 28 | '..', 29 | '..', 30 | '..', 31 | 'package.json', 32 | ); 33 | const packageJsonFile = await fs.readFile(packageJsonPath, 'utf-8'); 34 | const packageJson = JSON.parse(packageJsonFile); 35 | return packageJson; 36 | }; 37 | -------------------------------------------------------------------------------- /apps/web/src/store/use-token-store.tsx: -------------------------------------------------------------------------------- 1 | import { isServer } from '~/lib/constants'; 2 | import { create } from 'zustand'; 3 | import { combine } from 'zustand/middleware'; 4 | 5 | const accessTokenKey = '@code/access'; 6 | const refreshTokenKey = '@code/refresh'; 7 | 8 | const getDefaultValues = () => { 9 | if (!isServer) { 10 | try { 11 | return { 12 | accessToken: localStorage.getItem(accessTokenKey) || '', 13 | refreshToken: localStorage.getItem(refreshTokenKey) || '', 14 | }; 15 | } catch {} 16 | } 17 | 18 | return { 19 | accessToken: '', 20 | refreshToken: '', 21 | }; 22 | }; 23 | 24 | export const useTokenStore = create( 25 | combine(getDefaultValues(), (set) => ({ 26 | setTokens: (x: { accessToken: string; refreshToken: string }) => { 27 | try { 28 | localStorage.setItem(accessTokenKey, x.accessToken); 29 | localStorage.setItem(refreshTokenKey, x.refreshToken); 30 | } catch {} 31 | 32 | set(x); 33 | }, 34 | })), 35 | ); 36 | -------------------------------------------------------------------------------- /apps/web/src/utils/seo.ts: -------------------------------------------------------------------------------- 1 | export const seo = ({ 2 | title, 3 | description, 4 | keywords, 5 | image, 6 | }: { 7 | title: string; 8 | description?: string; 9 | image?: string; 10 | keywords?: string; 11 | }) => { 12 | const tags = [ 13 | { title }, 14 | { name: 'description', content: description }, 15 | { name: 'keywords', content: keywords }, 16 | { name: 'twitter:title', content: title }, 17 | { name: 'twitter:description', content: description }, 18 | { name: 'twitter:creator', content: '@tannerlinsley' }, 19 | { name: 'twitter:site', content: '@tannerlinsley' }, 20 | { name: 'og:type', content: 'website' }, 21 | { name: 'og:title', content: title }, 22 | { name: 'og:description', content: description }, 23 | ...(image 24 | ? [ 25 | { name: 'twitter:image', content: image }, 26 | { name: 'twitter:card', content: 'summary_large_image' }, 27 | { name: 'og:image', content: image }, 28 | ] 29 | : []), 30 | ]; 31 | 32 | return tags; 33 | }; 34 | -------------------------------------------------------------------------------- /apps/api/src/core/treeSitter/queries/queryJava.ts: -------------------------------------------------------------------------------- 1 | export const queryJava = ` 2 | (line_comment) @comment 3 | (block_comment) @comment 4 | 5 | (import_declaration 6 | . 7 | (identifier) @name.reference.module) @definition.import 8 | 9 | (package_declaration 10 | . 11 | (identifier) @name.reference.module) @definition.import 12 | 13 | (class_declaration 14 | name: (identifier) @name.definition.class) @definition.class 15 | 16 | (method_declaration 17 | name: (identifier) @name.definition.method) @definition.method 18 | 19 | (method_invocation 20 | name: (identifier) @name.reference.call 21 | arguments: (argument_list) @reference.call) 22 | 23 | (interface_declaration 24 | name: (identifier) @name.definition.interface) @definition.interface 25 | 26 | (type_list 27 | (type_identifier) @name.reference.implementation) @reference.implementation 28 | 29 | (object_creation_expression 30 | type: (type_identifier) @name.reference.class) @reference.class 31 | 32 | (superclass (type_identifier) @name.reference.class) @reference.class 33 | `; 34 | -------------------------------------------------------------------------------- /apps/api/src/controllers/v1/generate-tree-status.ts: -------------------------------------------------------------------------------- 1 | import type { Request, Response } from 'express'; 2 | 3 | import { 4 | getTreeGenerationData, 5 | getTreeGenerationDataExpiry, 6 | } from '~/lib/generate-tree'; 7 | 8 | interface StatusParams { 9 | jobId: string; 10 | } 11 | 12 | export async function generateTreeStatusController( 13 | req: Request, 14 | res: Response, 15 | ) { 16 | const generationId = req.params.jobId; 17 | const generation = await getTreeGenerationData(generationId); 18 | 19 | if (!generation) { 20 | return res.status(404).json({ 21 | success: false, 22 | error: 'tree generation job not found', 23 | }); 24 | } 25 | 26 | console.log('generation', generation); 27 | 28 | return res.status(200).json({ 29 | success: generation.status !== 'failed', 30 | data: { 31 | tree: generation.fileTree, 32 | }, 33 | status: generation.status, 34 | error: generation?.error ?? undefined, 35 | expiresAt: (await getTreeGenerationDataExpiry(generationId)).toISOString(), 36 | }); 37 | } 38 | -------------------------------------------------------------------------------- /apps/web/src/components/svgs/github-logo.tsx: -------------------------------------------------------------------------------- 1 | import type { SVGProps } from 'react'; 2 | 3 | export default function GithubLogo(props: SVGProps) { 4 | return ( 5 | 11 | GitHub 12 | 13 | 14 | ); 15 | } 16 | -------------------------------------------------------------------------------- /apps/api/src/core/tokenCount/index.ts: -------------------------------------------------------------------------------- 1 | import { type Tiktoken, type TiktokenEncoding, get_encoding } from 'tiktoken'; 2 | import { logger } from '~/lib/logger'; 3 | 4 | export class TokenCounter { 5 | private encoding: Tiktoken; 6 | 7 | constructor(encodingName: TiktokenEncoding) { 8 | // Setup encoding with the specified model 9 | this.encoding = get_encoding(encodingName); 10 | } 11 | 12 | public countTokens(content: string, filePath?: string): number { 13 | try { 14 | return this.encoding.encode(content).length; 15 | } catch (error) { 16 | let message = ''; 17 | if (error instanceof Error) { 18 | message = error.message; 19 | } else { 20 | message = String(error); 21 | } 22 | 23 | if (filePath) { 24 | logger.warn( 25 | `Failed to count tokens. path: ${filePath}, error: ${message}`, 26 | ); 27 | } else { 28 | logger.warn(`Failed to count tokens. error: ${message}`); 29 | } 30 | 31 | return 0; 32 | } 33 | } 34 | 35 | public free(): void { 36 | this.encoding.free(); 37 | } 38 | } 39 | -------------------------------------------------------------------------------- /apps/web/src/routes/(auth)/logout.tsx: -------------------------------------------------------------------------------- 1 | import { createFileRoute, useRouter } from '@tanstack/react-router'; 2 | import { Button } from '@radix-ui/themes'; 3 | import { useTokenStore } from '~/store/use-token-store'; 4 | export const Route = createFileRoute('/(auth)/logout')({ 5 | component: RouteComponent, 6 | }); 7 | 8 | function RouteComponent() { 9 | const router = useRouter(); 10 | 11 | return ( 12 |
13 |

Are you sure you want to logout?

14 |
15 | 18 | 31 |
32 |
33 | ); 34 | } 35 | -------------------------------------------------------------------------------- /apps/web/src/routes/app/_app/keys.tsx: -------------------------------------------------------------------------------- 1 | import { Flex, Text } from '@radix-ui/themes'; 2 | import { createFileRoute } from '@tanstack/react-router'; 3 | 4 | import { seo } from '~/utils/seo'; 5 | import { KeysTable } from '~/components/keys-table'; 6 | import { CreateKeyModal } from '~/components/create-key-modal'; 7 | 8 | export const Route = createFileRoute('/app/_app/keys')({ 9 | component: RouteComponent, 10 | head(_ctx) { 11 | return { 12 | meta: [...seo({ title: 'API Keys | Codecrawl' })], 13 | }; 14 | }, 15 | }); 16 | 17 | function RouteComponent() { 18 | return ( 19 | 20 | 21 | 22 | 23 | API Keys 24 | 25 | 26 | Create an API key to use Codecrawl in your applications. 27 | 28 | 29 | 30 | 31 | 32 | 33 | ); 34 | } 35 | -------------------------------------------------------------------------------- /.github/workflows/publish-image.yaml: -------------------------------------------------------------------------------- 1 | name: Deploy Images to GHCR 2 | 3 | env: 4 | DOTNET_VERSION: '6.0.x' 5 | 6 | on: 7 | push: 8 | branches: 9 | - main 10 | paths: 11 | - apps/api/** 12 | workflow_dispatch: 13 | 14 | jobs: 15 | push-app-image: 16 | permissions: 17 | contents: read 18 | packages: write 19 | attestations: write 20 | id-token: write 21 | runs-on: ubuntu-latest 22 | defaults: 23 | run: 24 | working-directory: './apps/api' 25 | steps: 26 | - name: 'Checkout GitHub Action' 27 | uses: actions/checkout@main 28 | 29 | - name: 'Login to GitHub Container Registry' 30 | uses: docker/login-action@v1 31 | with: 32 | registry: ghcr.io 33 | username: ${{github.actor}} 34 | password: ${{secrets.GITHUB_TOKEN}} 35 | 36 | - name: 'Build Inventory Image' 37 | run: | 38 | docker build . --tag ghcr.io/idee8/codecrawl:latest 39 | docker push ghcr.io/idee8/codecrawl:latest -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/feature_request.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Feature request 3 | about: Suggest an idea for this project 4 | title: "[Feat] " 5 | labels: '' 6 | assignees: '' 7 | 8 | --- 9 | 10 | **Problem Description** 11 | Describe the issue you're experiencing that has prompted this feature request. For example, "I find it difficult when..." 12 | 13 | **Proposed Feature** 14 | Provide a clear and concise description of the feature you would like implemented. 15 | 16 | **Alternatives Considered** 17 | Discuss any alternative solutions or features you've considered. Why were these alternatives not suitable? 18 | 19 | **Implementation Suggestions** 20 | If you have ideas on how the feature could be implemented, share them here. This could include technical details, API changes, or interaction mechanisms. 21 | 22 | **Use Case** 23 | Explain how this feature would be used and what benefits it would bring. Include specific examples to illustrate how this would improve functionality or user experience. 24 | 25 | **Additional Context** 26 | Add any other context such as comparisons with similar features in other products, or links to prototypes or mockups. -------------------------------------------------------------------------------- /apps/api/src/core/treeSitter/queries/querySwift.ts: -------------------------------------------------------------------------------- 1 | export const querySwift = ` 2 | (comment) @comment 3 | 4 | (class_declaration 5 | name: (type_identifier) @name) @definition.class 6 | 7 | (protocol_declaration 8 | name: (type_identifier) @name) @definition.interface 9 | 10 | (class_declaration 11 | (class_body 12 | [ 13 | (function_declaration 14 | name: (simple_identifier) @name 15 | ) 16 | (subscript_declaration 17 | (parameter (simple_identifier) @name) 18 | ) 19 | (init_declaration "init" @name) 20 | (deinit_declaration "deinit" @name) 21 | ] 22 | ) 23 | ) @definition.method 24 | 25 | (class_declaration 26 | (class_body 27 | [ 28 | (property_declaration 29 | (pattern (simple_identifier) @name) 30 | ) 31 | ] 32 | ) 33 | ) @definition.property 34 | 35 | (property_declaration 36 | (pattern (simple_identifier) @name) 37 | ) @definition.property 38 | 39 | (function_declaration 40 | name: (simple_identifier) @name) @definition.function 41 | `; 42 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/bug_report.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Bug report 3 | about: Create a report to help us improve 4 | title: "[Bug] " 5 | labels: bug 6 | assignees: '' 7 | 8 | --- 9 | 10 | **Describe the Bug** 11 | Provide a clear and concise description of what the bug is. 12 | 13 | **To Reproduce** 14 | Steps to reproduce the issue: 15 | 1. Configure the environment or settings with '...' 16 | 2. Run the command '...' 17 | 3. Observe the error or unexpected output at '...' 18 | 4. Log output/error message 19 | 20 | **Expected Behavior** 21 | A clear and concise description of what you expected to happen. 22 | 23 | **Screenshots** 24 | If applicable, add screenshots or copies of the command line output to help explain the issue. 25 | 26 | **Environment (please complete the following information):** 27 | - OS: [e.g. macOS, Linux, Windows] 28 | - Codecrawl Version: [e.g. 1.2.3] 29 | - Node.js Version: [e.g. 14.x] 30 | 31 | **Logs** 32 | If applicable, include detailed logs to help understand the problem. 33 | 34 | **Additional Context** 35 | Add any other context about the problem here, such as configuration specifics, network conditions, data volumes, etc. -------------------------------------------------------------------------------- /packages/sdk/LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2024 Sideguide Technologies Inc. 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. -------------------------------------------------------------------------------- /apps/web/src/components/logout-confirm.tsx: -------------------------------------------------------------------------------- 1 | import { AlertDialog, Button, Flex } from '@radix-ui/themes'; 2 | 3 | export function LogoutConfirm({ 4 | trigger, 5 | open, 6 | setOpen, 7 | }: { 8 | trigger: React.ReactNode; 9 | open: boolean; 10 | setOpen: (open: boolean) => void; 11 | }) { 12 | return ( 13 | 14 | {trigger} 15 | 16 | Logout 17 | 18 | Are you sure you want to logout? 19 | 20 | 21 | 22 | 23 | 26 | 27 | 28 | 31 | 32 | 33 | 34 | 35 | ); 36 | } 37 | -------------------------------------------------------------------------------- /apps/web/src/utils/loggingMiddleware.tsx: -------------------------------------------------------------------------------- 1 | import { createMiddleware } from '@tanstack/react-start'; 2 | 3 | const preLogMiddleware = createMiddleware() 4 | .client(async (ctx) => { 5 | const clientTime = new Date(); 6 | 7 | return ctx.next({ 8 | context: { 9 | clientTime, 10 | }, 11 | sendContext: { 12 | clientTime, 13 | }, 14 | }); 15 | }) 16 | .server(async (ctx) => { 17 | const serverTime = new Date(); 18 | 19 | return ctx.next({ 20 | sendContext: { 21 | serverTime, 22 | durationToServer: 23 | serverTime.getTime() - ctx.context.clientTime.getTime(), 24 | }, 25 | }); 26 | }); 27 | 28 | export const logMiddleware = createMiddleware() 29 | .middleware([preLogMiddleware]) 30 | .client(async (ctx) => { 31 | const res = await ctx.next(); 32 | 33 | const now = new Date(); 34 | console.log('Client Req/Res:', { 35 | duration: res.context.clientTime.getTime() - now.getTime(), 36 | durationToServer: res.context.durationToServer, 37 | durationFromServer: now.getTime() - res.context.serverTime.getTime(), 38 | }); 39 | 40 | return res; 41 | }); 42 | -------------------------------------------------------------------------------- /apps/api/src/core/treeSitter/parseStrategies/VueParseStrategy.ts: -------------------------------------------------------------------------------- 1 | import type { Node } from 'web-tree-sitter'; 2 | import type { ParseContext, ParseStrategy } from './ParseStrategy'; 3 | 4 | export class VueParseStrategy implements ParseStrategy { 5 | parseCapture( 6 | capture: { node: Node; name: string }, 7 | lines: string[], 8 | processedChunks: Set, 9 | context: ParseContext, 10 | ): string | null { 11 | const { node, name } = capture; 12 | const startRow = node.startPosition.row; 13 | const endRow = node.endPosition.row; 14 | 15 | if (!lines[startRow]) { 16 | return null; 17 | } 18 | 19 | // Extract the content based on the capture type 20 | const selectedLines = lines.slice(startRow, endRow + 1); 21 | if (selectedLines.length < 1) { 22 | return null; 23 | } 24 | 25 | const chunk = selectedLines.join('\n'); 26 | const normalizedChunk = chunk.trim(); 27 | 28 | // Create a unique ID for this chunk 29 | const chunkId = `${name}:${startRow}`; 30 | if (processedChunks.has(chunkId)) { 31 | return null; 32 | } 33 | 34 | processedChunks.add(chunkId); 35 | return chunk; 36 | } 37 | } 38 | -------------------------------------------------------------------------------- /apps/api/src/controllers/v1/generate-tree.ts: -------------------------------------------------------------------------------- 1 | import { randomUUID } from 'node:crypto'; 2 | import type { Request, Response } from 'express'; 3 | 4 | import { getGenerateTreeQueue } from '~/services/queue-service'; 5 | import { saveTreeGenerationData } from '~/lib/generate-tree'; 6 | 7 | interface GenerateTreeRequest { 8 | url: string; 9 | } 10 | 11 | export async function generateTreeController( 12 | req: Request, 13 | res: Response, 14 | ) { 15 | const userId = req.apiKeyDetails?.userId; 16 | 17 | if (!userId) { 18 | return res.status(401).json({ 19 | success: false, 20 | error: 'Unauthorized', 21 | }); 22 | } 23 | 24 | const generationId = randomUUID(); 25 | const jobData = { 26 | url: req.body.url, 27 | userId, 28 | generationId, 29 | }; 30 | 31 | await saveTreeGenerationData({ 32 | id: generationId, 33 | userId, 34 | createdAt: Date.now(), 35 | status: 'processing', 36 | url: req.body.url, 37 | fileTree: '', 38 | }); 39 | 40 | await getGenerateTreeQueue().add(generationId, jobData, { 41 | jobId: generationId, 42 | }); 43 | 44 | return res.status(200).json({ 45 | success: true, 46 | id: generationId, 47 | }); 48 | } 49 | -------------------------------------------------------------------------------- /apps/api/src/db/schema/apiKeys.ts: -------------------------------------------------------------------------------- 1 | import { type InferSelectModel, relations } from 'drizzle-orm'; 2 | import { 3 | pgTable, 4 | varchar, 5 | timestamp, 6 | boolean, 7 | index, 8 | uuid, 9 | } from 'drizzle-orm/pg-core'; 10 | 11 | import { users } from './users'; 12 | import { teams } from './teams'; 13 | 14 | export const apiKeys = pgTable( 15 | 'api_keys', 16 | { 17 | id: uuid('id').primaryKey().defaultRandom(), 18 | name: varchar('name', { length: 255 }).notNull(), 19 | key: varchar('key', { length: 255 }).notNull().unique(), 20 | userId: uuid('user_id') 21 | .references(() => users.id) 22 | .notNull(), 23 | teamId: uuid('team_id') 24 | .references(() => teams.id) 25 | .notNull(), 26 | isActive: boolean('is_active').default(true).notNull(), 27 | createdAt: timestamp('created_at', { withTimezone: true }) 28 | .defaultNow() 29 | .notNull(), 30 | }, 31 | (table) => [index('api_keys_user_id_idx').on(table.userId)], 32 | ); 33 | 34 | export const apiKeysRelations = relations(apiKeys, ({ one }) => ({ 35 | user: one(users, { 36 | fields: [apiKeys.userId], 37 | references: [users.id], 38 | }), 39 | })); 40 | 41 | export type ApiKey = InferSelectModel; 42 | -------------------------------------------------------------------------------- /apps/web/content-collections.ts: -------------------------------------------------------------------------------- 1 | import { defineCollection, defineConfig } from '@content-collections/core'; 2 | import { compileMDX } from '@content-collections/mdx'; 3 | 4 | const posts = defineCollection({ 5 | name: 'posts', 6 | directory: 'content/posts', 7 | include: '**/*.mdx', 8 | schema: (z) => ({ 9 | title: z.string(), 10 | summary: z.string(), 11 | date: z.string().regex(/^\d{4}-\d{2}-\d{2}$/), 12 | author: z.string(), 13 | image: z.string(), 14 | category: z.string(), 15 | }), 16 | transform: async (post, ctx) => { 17 | const content = await compileMDX(ctx, post); 18 | return { 19 | ...post, 20 | content, 21 | }; 22 | }, 23 | }); 24 | 25 | const updates = defineCollection({ 26 | name: 'updates', 27 | directory: 'content/updates', 28 | include: '**/*.mdx', 29 | schema: (z) => ({ 30 | title: z.string(), 31 | date: z.string().regex(/^\d{4}-\d{2}-\d{2}$/), 32 | authors: z.array(z.string()), 33 | image: z.string(), 34 | }), 35 | transform: async (post, ctx) => { 36 | const content = await compileMDX(ctx, post); 37 | return { 38 | ...post, 39 | content, 40 | }; 41 | }, 42 | }); 43 | 44 | export default defineConfig({ 45 | collections: [posts, updates], 46 | }); 47 | -------------------------------------------------------------------------------- /apps/api/src/db/schema/teams.ts: -------------------------------------------------------------------------------- 1 | import { pgTable, uuid, varchar, timestamp, index } from 'drizzle-orm/pg-core'; 2 | import type { InferSelectModel } from 'drizzle-orm'; 3 | 4 | import { users } from './users'; 5 | 6 | export const teams = pgTable('teams', { 7 | id: uuid('id').primaryKey().defaultRandom(), 8 | name: varchar('name', { length: 255 }).notNull(), 9 | createdAt: timestamp('created_at', { withTimezone: true }) 10 | .defaultNow() 11 | .notNull(), 12 | updatedAt: timestamp('updated_at', { withTimezone: true }) 13 | .defaultNow() 14 | .notNull(), 15 | }); 16 | 17 | export const teamMembers = pgTable( 18 | 'team_members', 19 | { 20 | id: uuid('id').primaryKey().defaultRandom(), 21 | teamId: uuid('team_id') 22 | .references(() => teams.id) 23 | .notNull(), 24 | userId: uuid('user_id') 25 | .references(() => users.id) 26 | .notNull(), 27 | createdAt: timestamp('created_at', { withTimezone: true }) 28 | .defaultNow() 29 | .notNull(), 30 | }, 31 | (table) => [ 32 | index('team_members_team_id_idx').on(table.teamId), 33 | index('team_members_user_id_idx').on(table.userId), 34 | ], 35 | ); 36 | 37 | export type Team = InferSelectModel; 38 | export type TeamMember = InferSelectModel; 39 | -------------------------------------------------------------------------------- /apps/api/src/lib/processConcurrency.ts: -------------------------------------------------------------------------------- 1 | import os from 'node:os'; 2 | import { Piscina } from 'piscina'; 3 | 4 | import { logger } from './logger'; 5 | 6 | export const getProcessConcurrency = (): number => { 7 | return typeof os.availableParallelism === 'function' 8 | ? os.availableParallelism() 9 | : os.cpus.length; 10 | }; 11 | 12 | export const getWorkerThreadCount = ( 13 | numOfTasks: number, 14 | ): { minThreads: number; maxThreads: number } => { 15 | const processConcurrency = getProcessConcurrency(); 16 | 17 | const minThreads = 1; 18 | 19 | // Limit max threads based on number of tasks 20 | const maxThreads = Math.max( 21 | minThreads, 22 | Math.min(processConcurrency, Math.ceil(numOfTasks / 100)), 23 | ); 24 | 25 | return { 26 | minThreads, 27 | maxThreads, 28 | }; 29 | }; 30 | 31 | export const initPiscina = ( 32 | numOfTasks: number, 33 | workerPath: string, 34 | ): Piscina => { 35 | const { minThreads, maxThreads } = getWorkerThreadCount(numOfTasks); 36 | 37 | logger.info( 38 | `Initializing worker pool with min=${minThreads}, max=${maxThreads} threads. Worker path: ${workerPath}`, 39 | ); 40 | 41 | return new Piscina({ 42 | filename: workerPath, 43 | minThreads, 44 | maxThreads, 45 | idleTimeout: 5000, 46 | }); 47 | }; 48 | -------------------------------------------------------------------------------- /apps/api/src/controllers/v1/generate-llmstxt-status.ts: -------------------------------------------------------------------------------- 1 | import type { Request, Response } from 'express'; 2 | import { 3 | getGeneratedLLmsTxt, 4 | getGeneratedLlmsTxtExpiry, 5 | } from '~/lib/generate-llms-txt/redis'; 6 | 7 | interface StatusParams { 8 | jobId: string; 9 | } 10 | 11 | export async function generateLLMsTextStatusController( 12 | req: Request, 13 | res: Response, 14 | ) { 15 | const generation = await getGeneratedLLmsTxt(req.params.jobId); 16 | const showFullText = generation?.showFullText ?? false; 17 | 18 | if (!generation) { 19 | return res.status(404).json({ 20 | success: false, 21 | error: 'llmsTxt generation job not found', 22 | }); 23 | } 24 | 25 | let data: any = null; 26 | 27 | if (showFullText) { 28 | data = { 29 | llmstxt: generation.generatedText, 30 | llmsfulltxt: generation.fullText, 31 | }; 32 | } else { 33 | data = { 34 | llmstxt: generation.generatedText, 35 | }; 36 | } 37 | 38 | return res.status(200).json({ 39 | success: generation.status !== 'failed', 40 | data: data, 41 | status: generation.status, 42 | error: generation?.error ?? undefined, 43 | expiresAt: ( 44 | await getGeneratedLlmsTxtExpiry(req.params.jobId) 45 | ).toISOString(), 46 | }); 47 | } 48 | -------------------------------------------------------------------------------- /apps/web/src/store/use-playground-requests.tsx: -------------------------------------------------------------------------------- 1 | import { create } from "zustand"; 2 | import { persist } from "zustand/middleware"; 3 | 4 | interface PlaygroundRequestsState { 5 | requestCount: number; 6 | maxRequests: number; 7 | } 8 | 9 | interface PlaygroundRequestsActions { 10 | incrementRequestCount: () => void; 11 | resetRequestCount: () => void; 12 | hasRemainingRequests: () => boolean; 13 | getRemainingRequests: () => number; 14 | } 15 | 16 | type PlaygroundRequestsStore = PlaygroundRequestsState & 17 | PlaygroundRequestsActions; 18 | 19 | const initialPlaygroundRequestsState: PlaygroundRequestsState = { 20 | requestCount: 0, 21 | maxRequests: 5, 22 | }; 23 | 24 | export const usePlaygroundRequestsStore = create()( 25 | persist( 26 | (set, get) => ({ 27 | ...initialPlaygroundRequestsState, 28 | 29 | incrementRequestCount: () => 30 | set((state) => ({ requestCount: state.requestCount + 1 })), 31 | 32 | resetRequestCount: () => set({ requestCount: 0 }), 33 | 34 | hasRemainingRequests: () => get().requestCount < get().maxRequests, 35 | 36 | getRemainingRequests: () => 37 | Math.max(0, get().maxRequests - get().requestCount), 38 | }), 39 | { 40 | name: "playground-requests-storage", 41 | } 42 | ) 43 | ); 44 | -------------------------------------------------------------------------------- /apps/api/src/core/treeSitter/queries/queryCSharp.ts: -------------------------------------------------------------------------------- 1 | export const queryCSharp = ` 2 | (comment) @comment 3 | 4 | (class_declaration 5 | name: (identifier) @name.definition.class 6 | ) @definition.class 7 | 8 | (class_declaration 9 | bases: (base_list (_) @name.reference.class) 10 | ) @reference.class 11 | 12 | (interface_declaration 13 | name: (identifier) @name.definition.interface 14 | ) @definition.interface 15 | 16 | (interface_declaration 17 | bases: (base_list (_) @name.reference.interface) 18 | ) @reference.interface 19 | 20 | (method_declaration 21 | name: (identifier) @name.definition.method 22 | ) @definition.method 23 | 24 | (object_creation_expression 25 | type: (identifier) @name.reference.class 26 | ) @reference.class 27 | 28 | (type_parameter_constraints_clause 29 | target: (identifier) @name.reference.class 30 | ) @reference.class 31 | 32 | (type_constraint 33 | type: (identifier) @name.reference.class 34 | ) @reference.class 35 | 36 | (variable_declaration 37 | type: (identifier) @name.reference.class 38 | ) @reference.class 39 | 40 | (invocation_expression 41 | function: 42 | (member_access_expression 43 | name: (identifier) @name.reference.send 44 | ) 45 | ) @reference.send 46 | 47 | (namespace_declaration 48 | name: (identifier) @name.definition.module 49 | ) @definition.module 50 | `; 51 | -------------------------------------------------------------------------------- /apps/api/src/core/metrics/workers/outputMetricsWorker.ts: -------------------------------------------------------------------------------- 1 | import type { TiktokenEncoding } from 'tiktoken'; 2 | 3 | import { logger } from '~/lib/logger'; 4 | import { TokenCounter } from '../../tokenCount'; 5 | 6 | export interface OutputMetricsTask { 7 | content: string; 8 | encoding: TiktokenEncoding; 9 | path?: string; 10 | } 11 | 12 | // Worker-level singleton for TokenCounter 13 | let tokenCounter: TokenCounter | null = null; 14 | 15 | const getTokenCounter = (encoding: TiktokenEncoding): TokenCounter => { 16 | if (!tokenCounter) { 17 | tokenCounter = new TokenCounter(encoding); 18 | } 19 | return tokenCounter; 20 | }; 21 | 22 | export default async ({ 23 | content, 24 | encoding, 25 | path, 26 | }: OutputMetricsTask): Promise => { 27 | const processStartAt = process.hrtime.bigint(); 28 | const counter = getTokenCounter(encoding); 29 | const tokenCount = counter.countTokens(content, path); 30 | 31 | const processEndAt = process.hrtime.bigint(); 32 | logger.info( 33 | `Counted output tokens. Count: ${tokenCount}. Took: ${(Number(processEndAt - processStartAt) / 1e6).toFixed(2)}ms`, 34 | ); 35 | 36 | return tokenCount; 37 | }; 38 | 39 | // Cleanup when worker is terminated 40 | process.on('exit', () => { 41 | if (tokenCounter) { 42 | tokenCounter.free(); 43 | tokenCounter = null; 44 | } 45 | }); 46 | -------------------------------------------------------------------------------- /apps/api/src/core/treeSitter/queries/queryPhp.ts: -------------------------------------------------------------------------------- 1 | export const queryPhp = ` 2 | ; For codecrawl 3 | (comment) @comment 4 | (namespace_use_clause) @definition.import 5 | (enum_declaration name: (name) @name) @definition.enum 6 | 7 | ; tree-sitter-php 8 | (namespace_definition 9 | name: (namespace_name) @name) @definition.module 10 | 11 | (interface_declaration 12 | name: (name) @name) @definition.interface 13 | 14 | (trait_declaration 15 | name: (name) @name) @definition.interface 16 | 17 | (class_declaration 18 | name: (name) @name) @definition.class 19 | 20 | (class_interface_clause [(name) (qualified_name)] @name) @reference.implementation 21 | 22 | (property_declaration 23 | (property_element (variable_name (name) @name))) @definition.field 24 | 25 | (function_definition 26 | name: (name) @name) @definition.function 27 | 28 | (method_declaration 29 | name: (name) @name) @definition.function 30 | 31 | (object_creation_expression 32 | [ 33 | (qualified_name (name) @name) 34 | (variable_name (name) @name) 35 | ]) @reference.class 36 | 37 | (function_call_expression 38 | function: [ 39 | (qualified_name (name) @name) 40 | (variable_name (name)) @name 41 | ]) @reference.call 42 | 43 | (scoped_call_expression 44 | name: (name) @name) @reference.call 45 | 46 | (member_call_expression 47 | name: (name) @name) @reference.call 48 | `; 49 | -------------------------------------------------------------------------------- /apps/api/src/core/actions/remoteAction.ts: -------------------------------------------------------------------------------- 1 | import { isGitInstalled } from '../file/gitCommand'; 2 | import { 3 | runDefaultAction, 4 | type DefaultActionRunnerResult, 5 | } from './defaultAction'; 6 | import { 7 | parseRemoteValue, 8 | createTempDirectory, 9 | cloneRepository, 10 | cleanupTempDirectory, 11 | } from '../utils/remoteUtils'; 12 | import type { CrawlOptions } from '~/types'; 13 | 14 | export const runRemoteAction = async ( 15 | repoUrlInput: string, 16 | options: CrawlOptions, 17 | deps = { 18 | isGitInstalled, 19 | parseRemoteValue, 20 | createTempDirectory, 21 | cloneRepository, 22 | cleanupTempDirectory, 23 | runDefaultAction, 24 | }, 25 | ): Promise => { 26 | if (!(await deps.isGitInstalled())) { 27 | throw new Error('Git is not installed or not in the system PATH.'); 28 | } 29 | 30 | const parsedFields = deps.parseRemoteValue(repoUrlInput); 31 | const tempDirPath = await deps.createTempDirectory(); 32 | let result: DefaultActionRunnerResult; 33 | 34 | try { 35 | await deps.cloneRepository( 36 | parsedFields.repoUrl, 37 | tempDirPath, 38 | options.remoteBranch || parsedFields.remoteBranch, 39 | ); 40 | 41 | result = await deps.runDefaultAction(repoUrlInput, options); 42 | } finally { 43 | await deps.cleanupTempDirectory(tempDirPath); 44 | } 45 | 46 | return result; 47 | }; 48 | -------------------------------------------------------------------------------- /apps/web/src/components/marketing/hero.tsx: -------------------------------------------------------------------------------- 1 | import { ArrowTurnDownLeftIcon } from '@heroicons/react/24/outline'; 2 | 3 | export function Hero() { 4 | return ( 5 |
6 |
7 |
8 |

9 | Turn codebases into LLM-ready data 10 |

11 |

12 | Power your AI apps with clean data collected from any codebases. 13 | It's also open source. 14 |

15 |
16 |
17 | 22 | 23 | 24 |
25 |
26 | ); 27 | } 28 | -------------------------------------------------------------------------------- /apps/api/src/core/treeSitter/parseStrategies/DefaultParseStrategy.ts: -------------------------------------------------------------------------------- 1 | import type { Node } from 'web-tree-sitter'; 2 | import type { ParseContext, ParseStrategy } from './ParseStrategy'; 3 | 4 | export class DefaultParseStrategy implements ParseStrategy { 5 | parseCapture( 6 | capture: { node: Node; name: string }, 7 | lines: string[], 8 | processedChunks: Set, 9 | context: ParseContext, 10 | ): string | null { 11 | const { node, name } = capture; 12 | const startRow = node.startPosition.row; 13 | const endRow = node.endPosition.row; 14 | 15 | if (!lines[startRow]) { 16 | return null; 17 | } 18 | 19 | const isNameCapture = name.includes('name'); 20 | const isCommentCapture = name.includes('comment'); 21 | const isImportCapture = name.includes('import') || name.includes('require'); 22 | const shouldSelect = isNameCapture || isCommentCapture || isImportCapture; 23 | 24 | if (!shouldSelect) { 25 | return null; 26 | } 27 | 28 | const selectedLines = lines.slice(startRow, endRow + 1); 29 | if (selectedLines.length < 1) { 30 | return null; 31 | } 32 | 33 | const chunk = selectedLines.join('\n'); 34 | const normalizedChunk = chunk.trim(); 35 | 36 | if (processedChunks.has(normalizedChunk)) { 37 | return null; 38 | } 39 | 40 | processedChunks.add(normalizedChunk); 41 | return chunk; 42 | } 43 | } 44 | -------------------------------------------------------------------------------- /apps/api/src/core/treeSitter/parseStrategies/ParseStrategy.ts: -------------------------------------------------------------------------------- 1 | import type { Query, Tree, Node } from 'web-tree-sitter'; 2 | 3 | import type { SupportedLang } from '../lang2Query'; 4 | import { TypeScriptParseStrategy } from './TypescriptStrategy'; 5 | import { GoParseStrategy } from './GoParseStrategy'; 6 | import { DefaultParseStrategy } from './DefaultParseStrategy'; 7 | import { PythonParseStrategy } from './PythonParseStrategy'; 8 | import { CssParseStrategy } from './CssParseStrategy'; 9 | import { VueParseStrategy } from './VueParseStrategy'; 10 | 11 | export interface ParseContext { 12 | fileContent: string; 13 | lines: string[]; 14 | tree: Tree | null; 15 | query: Query; 16 | } 17 | 18 | export interface ParseStrategy { 19 | parseCapture( 20 | capture: { node: Node; name: string }, 21 | lines: string[], 22 | processedChunks: Set, 23 | context: ParseContext, 24 | ): string | null; 25 | } 26 | 27 | export function createParseStrategy(lang: SupportedLang): ParseStrategy { 28 | switch (lang) { 29 | case 'typescript': 30 | return new TypeScriptParseStrategy(); 31 | case 'go': 32 | return new GoParseStrategy(); 33 | case 'python': 34 | return new PythonParseStrategy(); 35 | case 'css': 36 | return new CssParseStrategy(); 37 | case 'vue': 38 | return new VueParseStrategy(); 39 | default: 40 | return new DefaultParseStrategy(); 41 | } 42 | } 43 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/self_host_issue.md: -------------------------------------------------------------------------------- 1 | --- 2 | name: Self-host issue 3 | about: Report an issue with self-hosting Codecrawl 4 | title: "[Self-Host] " 5 | labels: self-host 6 | assignees: '' 7 | 8 | --- 9 | 10 | **Describe the Issue** 11 | Provide a clear and concise description of the self-hosting issue you're experiencing. 12 | 13 | **To Reproduce** 14 | Steps to reproduce the issue: 15 | 1. Configure the environment or settings with '...' 16 | 2. Run the command '...' 17 | 3. Observe the error or unexpected output at '...' 18 | 4. Log output/error message 19 | 20 | **Expected Behavior** 21 | A clear and concise description of what you expected to happen when self-hosting. 22 | 23 | **Screenshots** 24 | If applicable, add screenshots or copies of the command line output to help explain the self-hosting issue. 25 | 26 | **Environment (please complete the following information):** 27 | - OS: [e.g. macOS, Linux, Windows] 28 | - Codecrawl Version: [e.g. 1.2.3] 29 | - Node.js Version: [e.g. 14.x] 30 | - Docker Version (if applicable): [e.g. 20.10.14] 31 | - Database Type and Version: [e.g. PostgreSQL 13.4] 32 | 33 | **Logs** 34 | If applicable, include detailed logs to help understand the self-hosting problem. 35 | 36 | **Configuration** 37 | Provide relevant parts of your configuration files (with sensitive information redacted). 38 | 39 | **Additional Context** 40 | Add any other context about the self-hosting issue here, such as specific infrastructure details, network setup, or any modifications made to the original Firecrawl setup. -------------------------------------------------------------------------------- /apps/api/src/services/api-keys-service.ts: -------------------------------------------------------------------------------- 1 | import crypto from 'node:crypto'; 2 | import { eq, desc } from 'drizzle-orm'; 3 | 4 | import { db } from '~/db'; 5 | import { type ApiKey, apiKeys } from '~/db/schema'; 6 | 7 | const API_KEY_BYTE_LENGTH = 32; 8 | 9 | export const createApiKey = (): string => { 10 | const plainKey = `cc_${crypto 11 | .randomBytes(API_KEY_BYTE_LENGTH) 12 | .toString('hex')}`; 13 | 14 | return plainKey; 15 | }; 16 | 17 | export const validateApiKey = async ( 18 | providedKey: string, 19 | ): Promise | null> => { 20 | if (!providedKey) { 21 | return null; 22 | } 23 | 24 | try { 25 | const [potentialKey] = await db 26 | .select() 27 | .from(apiKeys) 28 | .where(eq(apiKeys.key, providedKey)) 29 | .limit(1); 30 | 31 | if (!potentialKey || !potentialKey.isActive) { 32 | return null; 33 | } 34 | 35 | const { key: _, ...keyDetails } = potentialKey; 36 | return keyDetails; 37 | } catch (error) { 38 | console.error('Error validating API key:', error); 39 | return null; 40 | } 41 | }; 42 | 43 | export const listApiKeysForUser = async ( 44 | userId: string, 45 | ): Promise[]> => { 46 | try { 47 | const keys = await db 48 | .select() 49 | .from(apiKeys) 50 | .where(eq(apiKeys.userId, userId)) 51 | .orderBy(desc(apiKeys.createdAt)); 52 | 53 | return keys; 54 | } catch (error) { 55 | console.error('Error listing API keys:', error); 56 | return []; 57 | } 58 | }; 59 | -------------------------------------------------------------------------------- /apps/api/src/controllers/v1/generate-llmstxt.ts: -------------------------------------------------------------------------------- 1 | import { randomUUID } from 'node:crypto'; 2 | import type { Request, Response } from 'express'; 3 | import { saveGeneratedLlmsTxt } from '~/lib/generate-llms-txt/redis'; 4 | import { getGenerateLlmsTxtQueue } from '~/services/queue-service'; 5 | 6 | export type GenerateLLMsTextResponse = { 7 | success: boolean; 8 | id: string; 9 | }; 10 | 11 | /** 12 | * Initiates a text generation job based on the provided Repo URL. 13 | * @param req - The request object containing authentication and generation parameters. 14 | * @param res - The response object to send the generation job ID. 15 | * @returns A promise that resolves when the generation job is queued. 16 | */ 17 | export async function generateLLMsTextController(req: Request, res: Response) { 18 | const generationId = randomUUID(); 19 | const jobData = { 20 | request: req.body, 21 | teamId: '84594', 22 | plan: 'standard', 23 | subId: '43434', 24 | generationId, 25 | }; 26 | 27 | await saveGeneratedLlmsTxt(generationId, { 28 | id: generationId, 29 | teamId: '84594', 30 | plan: 'standard', 31 | createdAt: Date.now(), 32 | status: 'processing', 33 | url: req.body.url, 34 | showFullText: req.body.showFullText, 35 | generatedText: '', 36 | fullText: '', 37 | }); 38 | 39 | await getGenerateLlmsTxtQueue().add(generationId, jobData, { 40 | jobId: generationId, 41 | }); 42 | 43 | return res.status(200).json({ 44 | success: true, 45 | id: generationId, 46 | }); 47 | } 48 | -------------------------------------------------------------------------------- /apps/web/src/routes/app/_app.tsx: -------------------------------------------------------------------------------- 1 | import { createFileRoute, Outlet } from '@tanstack/react-router'; 2 | import { Flex, Separator } from '@radix-ui/themes'; 3 | import { Sidebar } from '~/components/sidebar'; 4 | import { seo } from '~/utils/seo'; 5 | import { useVerifyLoggedIn } from '~/hooks/use-verify-login'; 6 | import { AuthContextProvider } from '~/contexts/auth-context'; 7 | 8 | export const Route = createFileRoute('/app/_app')({ 9 | component: RouteComponent, 10 | head(ctx) { 11 | return { 12 | meta: [...seo({ title: 'Codecrawl' })], 13 | }; 14 | }, 15 | }); 16 | 17 | function RouteComponent() { 18 | useVerifyLoggedIn(); 19 | 20 | return ( 21 | 22 | 23 | 24 | 25 | 30 | 37 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | ); 50 | } 51 | -------------------------------------------------------------------------------- /apps/api/src/controllers/v1/teams.ts: -------------------------------------------------------------------------------- 1 | import type { Request, Response } from 'express'; 2 | import { and, eq, sql } from 'drizzle-orm'; 3 | 4 | import { type ApiKey, apiKeys, teamMembers, teams } from '~/db/schema'; 5 | import { db } from '~/db'; 6 | 7 | export async function teamKeysController(req: Request, res: Response) { 8 | const userId = req.userId; 9 | const teamId = req.params.teamId; 10 | 11 | if (!userId || !teamId) { 12 | return res.status(401).json({ keys: [] }); 13 | } 14 | 15 | const [team] = await db 16 | .select() 17 | .from(teamMembers) 18 | .where(and(eq(teamMembers.userId, userId), eq(teamMembers.teamId, teamId))); 19 | 20 | if (!team) { 21 | return res.status(401).json({ keys: [] }); 22 | } 23 | 24 | const keys = await db 25 | .select() 26 | .from(apiKeys) 27 | .where(eq(apiKeys.teamId, team.teamId)); 28 | 29 | return res.json({ keys }); 30 | } 31 | 32 | export async function teamsController(req: Request, res: Response) { 33 | const userId = req.userId; 34 | 35 | if (!userId) { 36 | return res.status(401).json({ teams: [] }); 37 | } 38 | 39 | const userTeams = await db 40 | .select({ 41 | id: teams.id, 42 | name: teams.name, 43 | apiKeys: sql`json_agg(${apiKeys})`.as('api_keys'), 44 | }) 45 | .from(teamMembers) 46 | .innerJoin(teams, eq(teamMembers.teamId, teams.id)) 47 | .innerJoin(apiKeys, eq(teams.id, apiKeys.teamId)) 48 | .where(eq(teamMembers.userId, userId)) 49 | .groupBy(teams.id, teams.name); 50 | 51 | return res.json(userTeams); 52 | } 53 | -------------------------------------------------------------------------------- /assets/logo.svg: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | 6 | 7 | 8 | 9 | 10 | 11 | 12 | 13 | 14 | 15 | -------------------------------------------------------------------------------- /apps/web/src/contexts/auth-context.tsx: -------------------------------------------------------------------------------- 1 | 'use client'; 2 | 3 | import React, { createContext, useContext } from 'react'; 4 | import { 5 | type QueryClient, 6 | useQuery, 7 | useQueryClient, 8 | } from '@tanstack/react-query'; 9 | 10 | export interface User { 11 | id: string; 12 | email: string; 13 | name: string; 14 | avatar: string; 15 | } 16 | 17 | export type AuthContextType = { 18 | user: User | null | undefined; 19 | updateUser: (u: User) => void; 20 | }; 21 | 22 | export type AuthContextProviderProps = { 23 | children?: React.ReactNode; 24 | }; 25 | 26 | const AuthContext = createContext({ 27 | user: null, 28 | updateUser() {}, 29 | }); 30 | 31 | export const useAuthContext = () => useContext(AuthContext); 32 | export default AuthContext; 33 | 34 | export const AuthContextProvider: React.FC = ({ 35 | children, 36 | }) => { 37 | const queryClient = useQueryClient(); 38 | const { data } = useQuery<{ user: User | null }>({ 39 | queryKey: ['users/me'], 40 | retry: false, 41 | refetchOnWindowFocus: false, 42 | staleTime: 5 * 60 * 1000, // Keep prefetched data fresh for 5 mins 43 | }); 44 | 45 | const updateUser = () => { 46 | queryClient.invalidateQueries({ queryKey: ['users/me'] }); 47 | }; 48 | 49 | return ( 50 | 51 | {children} 52 | 53 | ); 54 | }; 55 | 56 | export const prefetchUserMe = async (queryClient: QueryClient) => { 57 | await queryClient.prefetchQuery({ 58 | queryKey: ['users/me'], 59 | }); 60 | }; 61 | -------------------------------------------------------------------------------- /apps/web/src/hooks/use-save-tokens.tsx: -------------------------------------------------------------------------------- 1 | import { useRouter } from '@tanstack/react-router'; 2 | import { useEffect } from 'react'; 3 | 4 | import { useTokenStore } from '~/store/use-token-store'; 5 | import { loginNextPathKey } from '~/lib/constants'; 6 | import { Route } from '~/routes/(auth)/_auth.signin'; 7 | 8 | export const useSaveTokens = () => { 9 | const params = Route.useSearch(); 10 | const router = useRouter(); 11 | 12 | const errorParam = params.error; 13 | const accessTokenParam = params.accessToken; 14 | const refreshTokenParam = params.refreshToken; 15 | 16 | useEffect(() => { 17 | if (typeof errorParam === 'string' && errorParam) { 18 | console.error(errorParam); 19 | } 20 | 21 | if ( 22 | typeof accessTokenParam === 'string' && 23 | typeof refreshTokenParam === 'string' && 24 | accessTokenParam && 25 | refreshTokenParam 26 | ) { 27 | useTokenStore.getState().setTokens({ 28 | accessToken: accessTokenParam, 29 | refreshToken: refreshTokenParam, 30 | }); 31 | 32 | let nextPath = '/app/playground'; 33 | 34 | try { 35 | const loginNextPath = localStorage.getItem(loginNextPathKey); 36 | 37 | if (loginNextPath?.startsWith('/')) { 38 | nextPath = loginNextPath; 39 | localStorage.setItem(loginNextPathKey, ''); 40 | } 41 | 42 | // redirect the user to the next page 100ms will be unnoticable 43 | setTimeout(() => router.navigate({ to: nextPath }), 100); 44 | } catch {} 45 | } 46 | }, [errorParam, accessTokenParam, refreshTokenParam, router]); 47 | }; 48 | -------------------------------------------------------------------------------- /apps/web/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "@codecrawl/web", 3 | "private": true, 4 | "sideEffects": false, 5 | "type": "module", 6 | "scripts": { 7 | "dev": "vinxi dev", 8 | "build": "vinxi build", 9 | "start": "vinxi start" 10 | }, 11 | "dependencies": { 12 | "@codecrawl/sdk": "workspace:*", 13 | "@heroicons/react": "^2.2.0", 14 | "@hookform/resolvers": "^5.0.1", 15 | "@radix-ui/react-accordion": "^1.2.7", 16 | "@radix-ui/themes": "^3.2.1", 17 | "@tanstack/react-query": "^5.74.4", 18 | "@tanstack/react-router": "^1.116.0", 19 | "@tanstack/react-router-devtools": "^1.116.0", 20 | "@tanstack/react-start": "^1.116.1", 21 | "@tanstack/zod-adapter": "^1.117.0", 22 | "clsx": "^2.1.1", 23 | "date-fns": "^4.1.0", 24 | "react": "^19.1.0", 25 | "react-dom": "^19.1.0", 26 | "react-hook-form": "^7.56.1", 27 | "redaxios": "^0.5.1", 28 | "sonner": "^2.0.3", 29 | "tailwind-merge": "^2.6.0", 30 | "usehooks-ts": "^3.1.1", 31 | "vinxi": "0.5.3", 32 | "zod": "^3.24.2", 33 | "zustand": "^5.0.3" 34 | }, 35 | "devDependencies": { 36 | "@content-collections/core": "^0.8.2", 37 | "@content-collections/mdx": "^0.2.2", 38 | "@content-collections/vinxi": "^0.1.0", 39 | "@tailwindcss/postcss": "^4.1.4", 40 | "@tailwindcss/typography": "^0.5.16", 41 | "@tailwindcss/vite": "^4.1.4", 42 | "@types/node": "^22.14.1", 43 | "@types/react": "^19.1.2", 44 | "@types/react-dom": "^19.1.2", 45 | "postcss": "^8.5.3", 46 | "tailwindcss": "^4.1.4", 47 | "typescript": "^5.8.3", 48 | "vite-tsconfig-paths": "^5.1.4" 49 | } 50 | } 51 | -------------------------------------------------------------------------------- /apps/api/src/types.ts: -------------------------------------------------------------------------------- 1 | import type { OutputStyle } from './config/configSchema'; 2 | 3 | export type CrawlProgressCallback = (message: string) => void; 4 | 5 | export type PlanType = 'standard' | 'scale' | 'hobby' | 'growth' | 'free'; 6 | 7 | declare global { 8 | namespace Express { 9 | interface Request { 10 | userId?: string; 11 | } 12 | } 13 | } 14 | 15 | export interface CrawlOptions { 16 | // Codecrawl Cloud Options 17 | teamId?: string; 18 | plan?: string; 19 | 20 | // Output Options 21 | output?: string; 22 | style?: OutputStyle; 23 | parsableStyle?: boolean; 24 | compress?: boolean; 25 | outputShowLineNumbers?: boolean; 26 | copy?: boolean; 27 | fileSummary?: boolean; 28 | directoryStructure?: boolean; 29 | removeComments?: boolean; 30 | removeEmptyLines?: boolean; 31 | headerText?: string; 32 | instructionFilePath?: string; 33 | includeEmptyDirectories?: boolean; 34 | gitSortByChanges?: boolean; 35 | 36 | // Filter Options 37 | include?: string; 38 | ignore?: string; 39 | gitignore?: boolean; 40 | defaultPatterns?: boolean; 41 | 42 | // Remote Repository Options 43 | remote?: string; 44 | remoteBranch?: string; 45 | 46 | // Configuration Options 47 | config?: string; 48 | init?: boolean; 49 | global?: boolean; 50 | 51 | // Security Options 52 | securityCheck?: boolean; 53 | 54 | // Token Count Options 55 | tokenCountEncoding?: string; 56 | 57 | // Other Options 58 | topFilesLen?: number; 59 | verbose?: boolean; 60 | quiet?: boolean; 61 | } 62 | 63 | export enum RateLimiterMode { 64 | Crawl = 'crawl', 65 | CrawlStatus = 'crawlStatus', 66 | Search = 'search', 67 | } 68 | -------------------------------------------------------------------------------- /apps/web/src/components/catch-boundary.tsx: -------------------------------------------------------------------------------- 1 | import { 2 | ErrorComponent, 3 | Link, 4 | rootRouteId, 5 | useMatch, 6 | useRouter, 7 | } from '@tanstack/react-router'; 8 | import type { ErrorComponentProps } from '@tanstack/react-router'; 9 | 10 | export function DefaultCatchBoundary({ error }: ErrorComponentProps) { 11 | const router = useRouter(); 12 | const isRoot = useMatch({ 13 | strict: false, 14 | select: (state) => state.id === rootRouteId, 15 | }); 16 | 17 | console.error('DefaultCatchBoundary Error:', error); 18 | 19 | return ( 20 |
21 | 22 |
23 | 32 | {isRoot ? ( 33 | 37 | Home 38 | 39 | ) : ( 40 | { 44 | e.preventDefault(); 45 | window.history.back(); 46 | }} 47 | > 48 | Go Back 49 | 50 | )} 51 |
52 |
53 | ); 54 | } 55 | -------------------------------------------------------------------------------- /apps/web/src/styles/app.css: -------------------------------------------------------------------------------- 1 | @import url('https://cdnjs.cloudflare.com/ajax/libs/inter-ui/3.18.0/inter.css'); 2 | @import 'tailwindcss'; 3 | @import '@radix-ui/themes/styles.css' layer(base); 4 | 5 | @plugin "@tailwindcss/typography"; 6 | 7 | :root { 8 | --radix-accordion-content-height: 0; 9 | } 10 | 11 | @utility custom-container { 12 | @apply md:max-w-6xl sm:max-w-4xl lg:max-w-5xl px-8 sm:px-14 lg:px-16 mx-auto w-full; 13 | } 14 | 15 | 16 | @theme { 17 | /* Fonts */ 18 | --font-display: "Inter", sans-serif; 19 | --font-sans: "Inter", sans-serif; 20 | --font-serif: ui-serif, Georgia, Cambria, "Times New Roman", Times, serif; 21 | --font-mono: ui-monospace, SFMono-Regular, Menlo, Monaco, Consolas, "Liberation Mono", "Courier New", monospace; 22 | 23 | /* Animations */ 24 | --animate-wiggle: wiggle 1s ease-in-out infinite; 25 | --animate-accordion-down: accordion-down 0.2s ease-out; 26 | --animate-accordion-up: accordion-up 0.2s ease-out; 27 | 28 | @keyframes wiggle { 29 | 0%, 30 | 100% { 31 | transform: rotate(-3deg); 32 | } 33 | 50% { 34 | transform: rotate(3deg); 35 | } 36 | } 37 | 38 | @keyframes accordion-down { 39 | from { 40 | height: 0; 41 | } 42 | to { 43 | height: var(--radix-accordion-content-height); 44 | } 45 | } 46 | 47 | @keyframes accordion-up { 48 | from { 49 | height: var(--radix-accordion-content-height); 50 | } 51 | to { 52 | height: 0; 53 | } 54 | } 55 | } 56 | 57 | 58 | .radix-themes { 59 | --default-font-family: var(--font-sans); 60 | --heading-font-family: var(--font-display); 61 | --code-font-family: var(--font-mono); 62 | --strong-font-family: var(--font-sans); 63 | } 64 | -------------------------------------------------------------------------------- /apps/web/src/routes/(marketing)/_landing.tsx: -------------------------------------------------------------------------------- 1 | import { Flex } from '@radix-ui/themes'; 2 | import { useSuspenseQuery } from '@tanstack/react-query'; 3 | import { createFileRoute, Outlet } from '@tanstack/react-router'; 4 | import { createServerFn } from '@tanstack/react-start'; 5 | import { Footer } from '~/components/marketing/footer'; 6 | import { Header } from '~/components/marketing/header'; 7 | import type { User } from '~/contexts/auth-context'; 8 | 9 | export const getGithubStars = createServerFn({ 10 | method: 'GET', 11 | }).handler(async () => { 12 | const response = await fetch('https://api.github.com/repos/Idee8/codecrawl'); 13 | const data = await response.json(); 14 | return data.stargazers_count; 15 | }); 16 | 17 | export const Route = createFileRoute('/(marketing)/_landing')({ 18 | component: RouteComponent, 19 | loader: async ({ context }) => { 20 | await context.queryClient.prefetchQuery({ 21 | queryKey: ['users/me'], 22 | }); 23 | const stars = await getGithubStars(); 24 | return { stars }; 25 | }, 26 | }); 27 | 28 | function RouteComponent() { 29 | const state = Route.useLoaderData(); 30 | const { data } = useSuspenseQuery<{ user: User | null }>({ 31 | queryKey: ['users/me'], 32 | }); 33 | 34 | return ( 35 | 41 |
42 |
43 |
44 | 45 |
46 |
47 |
48 | 49 | ); 50 | } 51 | -------------------------------------------------------------------------------- /apps/api/src/lib/generate-tree.ts: -------------------------------------------------------------------------------- 1 | import { redisConnection } from '~/services/queue-service'; 2 | 3 | export interface TreeGenerationData { 4 | id: string; 5 | userId: string; 6 | createdAt: number; 7 | status: 'processing' | 'completed' | 'failed'; 8 | url: string; 9 | fileTree: string; 10 | error?: string; 11 | } 12 | 13 | export async function saveTreeGenerationData(data: TreeGenerationData) { 14 | await redisConnection.set(`tree:${data.id}`, JSON.stringify(data)); 15 | } 16 | 17 | export async function getTreeGenerationData(id: string) { 18 | const data = await redisConnection.get(`tree:${id}`); 19 | return data ? JSON.parse(data) : null; 20 | } 21 | 22 | export async function updateTreeGenerationData( 23 | id: string, 24 | data: Partial, 25 | ) { 26 | const current = await getTreeGenerationData(id); 27 | if (!current) return; 28 | 29 | const updatedGeneration = { 30 | ...current, 31 | ...data, 32 | }; 33 | 34 | await redisConnection.set(`tree:${id}`, JSON.stringify(updatedGeneration)); 35 | } 36 | 37 | export async function getTreeGenerationDataExpiry(id: string) { 38 | const d = new Date(); 39 | const ttl = await redisConnection.pttl(`tree:${id}`); 40 | d.setMilliseconds(d.getMilliseconds() + ttl); 41 | d.setMilliseconds(0); 42 | return d; 43 | } 44 | 45 | export async function updateTreeGenerationDataStatus( 46 | id: string, 47 | status: 'processing' | 'completed' | 'failed', 48 | fileTree?: string, 49 | error?: string, 50 | ) { 51 | const updates: Partial = { status }; 52 | if (fileTree !== undefined) updates.fileTree = fileTree; 53 | if (error !== undefined) updates.error = error; 54 | await updateTreeGenerationData(id, updates); 55 | } 56 | -------------------------------------------------------------------------------- /apps/web/src/components/ui/toast.tsx: -------------------------------------------------------------------------------- 1 | import { toast as sonnerToast } from 'sonner'; 2 | 3 | interface ToastProps { 4 | id: string | number; 5 | title: string; 6 | description: string; 7 | button: { 8 | label: string; 9 | onClick: () => void; 10 | }; 11 | } 12 | 13 | export function toast(toast: Omit) { 14 | return sonnerToast.custom((id) => ( 15 | sonnerToast.dismiss(id), 22 | }} 23 | /> 24 | )); 25 | } 26 | 27 | export function Toast(props: ToastProps) { 28 | const { title, description, button, id } = props; 29 | 30 | return ( 31 |
32 |
33 |
34 |

{title}

35 |

{description}

36 |
37 |
38 |
39 | 49 |
50 |
51 | ); 52 | } 53 | -------------------------------------------------------------------------------- /apps/api/src/core/treeSitter/queries/queryRuby.ts: -------------------------------------------------------------------------------- 1 | export const queryRuby = ` 2 | (comment) @comment 3 | 4 | ; Import statements 5 | (call 6 | (identifier) @name.reference.module) @definition.import 7 | 8 | ; Method definitions 9 | 10 | ( 11 | (comment)* @doc 12 | . 13 | [ 14 | (method 15 | name: (_) @name.definition.method) @definition.method 16 | (singleton_method 17 | name: (_) @name.definition.method) @definition.method 18 | ] 19 | (#strip! @doc "^#\\s*") 20 | (#select-adjacent! @doc @definition.method) 21 | ) 22 | 23 | (alias 24 | name: (_) @name.definition.method) @definition.method 25 | 26 | (setter 27 | (identifier) @ignore) 28 | 29 | ; Class definitions 30 | 31 | ( 32 | (comment)* @doc 33 | . 34 | [ 35 | (class 36 | name: [ 37 | (constant) @name.definition.class 38 | (scope_resolution 39 | name: (_) @name.definition.class) 40 | ]) @definition.class 41 | (singleton_class 42 | value: [ 43 | (constant) @name.definition.class 44 | (scope_resolution 45 | name: (_) @name.definition.class) 46 | ]) @definition.class 47 | ] 48 | (#strip! @doc "^#\\s*") 49 | (#select-adjacent! @doc @definition.class) 50 | ) 51 | 52 | ; Module definitions 53 | 54 | ( 55 | (module 56 | name: [ 57 | (constant) @name.definition.module 58 | (scope_resolution 59 | name: (_) @name.definition.module) 60 | ]) @definition.module 61 | ) 62 | 63 | ; Calls 64 | 65 | (call method: (identifier) @name.reference.call) @reference.call 66 | 67 | ( 68 | [(identifier) (constant)] @name.reference.call @reference.call 69 | (#is-not? local) 70 | (#not-match? @name.reference.call "^(lambda|load|require|require_relative|__FILE__|__LINE__)$") 71 | ) 72 | `; 73 | -------------------------------------------------------------------------------- /apps/api/src/core/metrics/workers/fileMetricsWorker.ts: -------------------------------------------------------------------------------- 1 | import type { TiktokenEncoding } from 'tiktoken'; 2 | 3 | import { logger } from '~/lib/logger'; 4 | import type { ProcessedFile } from '../../file/fileTypes'; 5 | import { TokenCounter } from '../../tokenCount'; 6 | import type { FileMetrics } from './types'; 7 | 8 | export interface FileMetricsTask { 9 | file: ProcessedFile; 10 | index: number; 11 | totalFiles: number; 12 | encoding: TiktokenEncoding; 13 | } 14 | 15 | // Worker-level singleton for TokenCounter 16 | let tokenCounter: TokenCounter | null = null; 17 | 18 | const getTokenCounter = (encodingName: TiktokenEncoding) => { 19 | if (tokenCounter === null) { 20 | tokenCounter = new TokenCounter(encodingName); 21 | } 22 | return tokenCounter; 23 | }; 24 | 25 | export default async ({ encoding, file }: FileMetricsTask) => { 26 | const processStartAt = process.hrtime.bigint(); 27 | const metrics = await calculateIndividualFileMetrics(file, encoding); 28 | const processEndAt = process.hrtime.bigint(); 29 | logger.info( 30 | `Calculated metrics for ${file.path}. Took: ${(Number(processEndAt - processStartAt) / 1e6).toFixed(2)}ms`, 31 | ); 32 | 33 | return metrics; 34 | }; 35 | 36 | export const calculateIndividualFileMetrics = async ( 37 | file: ProcessedFile, 38 | encoding: TiktokenEncoding, 39 | ): Promise => { 40 | const charCount = file.content.length; 41 | const tokenCounter = getTokenCounter(encoding); 42 | const tokenCount = tokenCounter.countTokens(file.content, file.path); 43 | 44 | return { path: file.path, charCount, tokenCount }; 45 | }; 46 | 47 | // Cleanup when worker is terminated 48 | process.on('exit', () => { 49 | if (tokenCounter) { 50 | if (tokenCounter) { 51 | tokenCounter.free(); 52 | tokenCounter = null; 53 | } 54 | } 55 | }); 56 | -------------------------------------------------------------------------------- /packages/sdk/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "@codecrawl/sdk", 3 | "version": "1.1.0", 4 | "description": "Codecrawl SDK for Codecrawl API", 5 | "main": "dist/index.js", 6 | "type": "module", 7 | "types": "dist/index.d.ts", 8 | "exports": { 9 | "./package.json": "./package.json", 10 | ".": { 11 | "import": "./dist/index.js", 12 | "require": "./dist/index.js", 13 | "default": "./dist/index.cjs" 14 | } 15 | }, 16 | "scripts": { 17 | "build": "tsup", 18 | "watch": "tsup --watch", 19 | "build-and-publish": "npm run build && npm publish --access public", 20 | "publish-beta": "npm run build && npm publish --access public --tag beta", 21 | "test": "NODE_OPTIONS=--experimental-vm-modules jest --verbose src/__tests__/index.test.ts" 22 | }, 23 | "repository": { 24 | "type": "git", 25 | "url": "git+https://github.com/Idee8/codecrawl.git" 26 | }, 27 | "author": "Irere Emmanuel ", 28 | "license": "MIT", 29 | "bugs": { 30 | "url": "https://github.com/Idee8/codecrawl/issues" 31 | }, 32 | "homepage": "https://github.com/Idee8/codecrawl#readme", 33 | "dependencies": { 34 | "axios": "^1.8.4", 35 | "dotenv": "^16.4.7", 36 | "uuid": "^11.1.0", 37 | "zod": "^3.24.2" 38 | }, 39 | "devDependencies": { 40 | "@jest/globals": "^29.7.0", 41 | "@types/axios": "^0.14.4", 42 | "@types/jest": "^29.5.14", 43 | "@types/mocha": "^10.0.10", 44 | "jest": "^29.7.0", 45 | "ts-jest": "^29.3.1", 46 | "tsup": "^8.4.0", 47 | "typescript": "^5.8.2" 48 | }, 49 | "keywords": [ 50 | "codecrawl", 51 | "codecrawl-sdk", 52 | "codebase-indexer", 53 | "codebase-crawler", 54 | "api", 55 | "sdk", 56 | "repositories", 57 | "llms" 58 | ], 59 | "engines": { 60 | "node": ">=22.0.0" 61 | } 62 | } 63 | -------------------------------------------------------------------------------- /apps/api/src/core/treeSitter/parseStrategies/CssParseStrategy.ts: -------------------------------------------------------------------------------- 1 | import type { Node } from 'web-tree-sitter'; 2 | import type { ParseContext, ParseStrategy } from './ParseStrategy'; 3 | 4 | export class CssParseStrategy implements ParseStrategy { 5 | parseCapture( 6 | capture: { node: Node; name: string }, 7 | lines: string[], 8 | processedChunks: Set, 9 | context: ParseContext, 10 | ): string | null { 11 | const { name, node } = capture; 12 | const startRow = node.startPosition.row; 13 | const endRow = node.endPosition.row; 14 | 15 | if (!lines[startRow]) { 16 | return null; 17 | } 18 | 19 | // Process CSS-specific capture names 20 | const isCommentCapture = name.includes('comment'); 21 | const isSelectorCapture = 22 | name.includes('selector') || name.includes('definition.selector'); 23 | const isAtRuleCapture = 24 | name.includes('at_rule') || name.includes('definition.at_rule'); 25 | 26 | const shouldSelect = 27 | isCommentCapture || isSelectorCapture || isAtRuleCapture; 28 | 29 | if (!shouldSelect) { 30 | return null; 31 | } 32 | 33 | // Extract all lines for comments, only the first line for others 34 | let selectedLines: string[]; 35 | if (isCommentCapture) { 36 | selectedLines = lines.slice(startRow, endRow + 1); 37 | } else { 38 | // For selectors and at-rules, extract only the first line 39 | selectedLines = [lines[startRow]]; 40 | } 41 | 42 | if (selectedLines.length < 1) { 43 | return null; 44 | } 45 | 46 | const chunk = selectedLines.join('\n'); 47 | const normalizedChunk = chunk.trim(); 48 | 49 | if (processedChunks.has(normalizedChunk)) { 50 | return null; 51 | } 52 | 53 | processedChunks.add(normalizedChunk); 54 | return chunk; 55 | } 56 | } 57 | -------------------------------------------------------------------------------- /apps/api/src/core/metrics/calculateMetrics.ts: -------------------------------------------------------------------------------- 1 | import type { ConfigMerged } from '~/config/configSchema'; 2 | import type { CrawlProgressCallback } from '~/types'; 3 | import type { ProcessedFile } from '../file/fileTypes'; 4 | import { calculateAllFileMetrics } from './calculateAllFileMetrics'; 5 | import { calculateOutputMetrics } from './calculateOutputMetrics'; 6 | 7 | export interface CalculateMetricsResult { 8 | totalFiles: number; 9 | totalCharacters: number; 10 | totalTokens: number; 11 | fileCharCounts: Record; 12 | fileTokenCounts: Record; 13 | } 14 | 15 | export const calculateMetrics = async ( 16 | processedFiles: ProcessedFile[], 17 | output: string, 18 | progressCallback: CrawlProgressCallback, 19 | config: ConfigMerged, 20 | deps = { 21 | calculateAllFileMetrics, 22 | calculateOutputMetrics, 23 | }, 24 | ): Promise => { 25 | progressCallback('Calculating metrics...'); 26 | 27 | const [fileMetrics, totalTokens] = await Promise.all([ 28 | deps.calculateAllFileMetrics( 29 | processedFiles, 30 | config.tokenCount.encoding, 31 | progressCallback, 32 | ), 33 | deps.calculateOutputMetrics( 34 | output, 35 | config.tokenCount.encoding, 36 | config.output.filePath, 37 | ), 38 | ]); 39 | 40 | const totalFiles = processedFiles.length; 41 | const totalCharacters = output.length; 42 | 43 | const fileCharCounts: Record = {}; 44 | const fileTokenCounts: Record = {}; 45 | for (const file of fileMetrics) { 46 | fileCharCounts[file.path] = file.charCount; 47 | fileTokenCounts[file.path] = file.tokenCount; 48 | } 49 | 50 | return { 51 | totalFiles, 52 | totalCharacters, 53 | totalTokens, 54 | fileCharCounts, 55 | fileTokenCounts, 56 | }; 57 | }; 58 | -------------------------------------------------------------------------------- /apps/api/src/services/jwt-service.ts: -------------------------------------------------------------------------------- 1 | import jwt from 'jsonwebtoken'; 2 | import type { User } from '~/db/schema'; 3 | 4 | export interface AccessTokenPayload { 5 | userId: string; 6 | } 7 | 8 | export interface RefreshTokenPayload { 9 | userId: string; 10 | tokenVersion: number | null; 11 | } 12 | 13 | export const generateAccessToken = (user: User): string => { 14 | const payload: AccessTokenPayload = { 15 | userId: user.id, 16 | }; 17 | 18 | return jwt.sign(payload, process.env.ACCESS_TOKEN_SECRET as string, { 19 | expiresIn: '15min', 20 | }); 21 | }; 22 | 23 | export const generateRefreshToken = ( 24 | user: User, 25 | currentTokenVersion: number | null, 26 | ): string => { 27 | const payload: RefreshTokenPayload = { 28 | userId: user.id, 29 | tokenVersion: currentTokenVersion, 30 | }; 31 | 32 | return jwt.sign(payload, process.env.REFRESH_TOKEN_SECRET as string, { 33 | expiresIn: '7d', 34 | }); 35 | }; 36 | 37 | export const verifyAccessToken = (token: string): AccessTokenPayload => { 38 | const decoded = jwt.verify( 39 | token, 40 | process.env.ACCESS_TOKEN_SECRET as string, 41 | ) as AccessTokenPayload; 42 | 43 | if (typeof decoded.userId !== 'string') { 44 | throw new jwt.JsonWebTokenError('Invalid access token payload structure'); 45 | } 46 | 47 | return decoded; 48 | }; 49 | 50 | export const verifyRefreshToken = (token: string): RefreshTokenPayload => { 51 | const decoded = jwt.verify( 52 | token, 53 | process.env.REFRESH_TOKEN_SECRET as string, 54 | ) as RefreshTokenPayload; 55 | 56 | if (typeof decoded.userId !== 'string') { 57 | throw new jwt.JsonWebTokenError('Invalid refresh token payload structure'); 58 | } 59 | return decoded; 60 | }; 61 | 62 | export const createTokens = (user: User) => { 63 | const accessToken = generateAccessToken(user); 64 | const refreshToken = generateRefreshToken(user, user.tokenVersion); 65 | return { accessToken, refreshToken }; 66 | }; 67 | -------------------------------------------------------------------------------- /apps/web/src/components/marketing/cta.tsx: -------------------------------------------------------------------------------- 1 | import { ArrowRightCircleIcon, HeartIcon } from '@heroicons/react/24/solid'; 2 | import { SvgLogoBlack } from '../svgs'; 3 | import { Button } from '@radix-ui/themes'; 4 | import { useNavigate } from '@tanstack/react-router'; 5 | 6 | export function CTA() { 7 | const navigate = useNavigate(); 8 | 9 | return ( 10 |
11 |
12 |
13 |
14 | 15 | 16 |
17 |

18 | Create Something! 19 |

20 |

21 | Create great products by using Codecrawl API to power your LLM 22 | applications. Join many founders using it to code-related things... 23 |

24 |
25 | 34 |
35 |
36 |
37 |
38 | ); 39 | } 40 | -------------------------------------------------------------------------------- /apps/api/src/core/treeSitter/queries/queryGo.ts: -------------------------------------------------------------------------------- 1 | export const queryGo = ` 2 | ; For codecrawl 3 | (comment) @comment 4 | (package_clause) @definition.package 5 | (import_declaration) @definition.import 6 | (import_spec) @definition.import 7 | (var_declaration) @definition.variable 8 | (const_declaration) @definition.constant 9 | 10 | ; tree-sitter-go 11 | ( 12 | (comment)* @doc 13 | . 14 | (function_declaration 15 | name: (identifier) @name) @definition.function 16 | (#strip! @doc "^//\\\\s*") 17 | (#set-adjacent! @doc @definition.function) 18 | ) 19 | 20 | ( 21 | (comment)* @doc 22 | . 23 | (method_declaration 24 | name: (field_identifier) @name) @definition.method 25 | (#strip! @doc "^//\\\\s*") 26 | (#set-adjacent! @doc @definition.method) 27 | ) 28 | 29 | (call_expression 30 | function: [ 31 | (identifier) @name 32 | (parenthesized_expression (identifier) @name) 33 | (selector_expression field: (field_identifier) @name) 34 | (parenthesized_expression (selector_expression field: (field_identifier) @name)) 35 | ]) @reference.call 36 | 37 | (type_spec 38 | name: (type_identifier) @name) @definition.type 39 | 40 | (type_identifier) @name @reference.type 41 | 42 | (package_clause "package" (package_identifier) @name) 43 | 44 | (type_declaration (type_spec name: (type_identifier) @name type: (interface_type))) 45 | 46 | (type_declaration (type_spec name: (type_identifier) @name type: (struct_type))) 47 | 48 | ; Import statements 49 | (import_declaration 50 | (import_spec_list 51 | (import_spec 52 | path: (interpreted_string_literal) @name.reference.module))) @definition.import 53 | 54 | (import_declaration 55 | (import_spec 56 | path: (interpreted_string_literal) @name.reference.module)) @definition.import 57 | 58 | (package_clause 59 | (package_identifier) @name.reference.module) @definition.package 60 | 61 | (var_declaration (var_spec name: (identifier) @name)) 62 | 63 | (const_declaration (const_spec name: (identifier) @name)) 64 | `; 65 | -------------------------------------------------------------------------------- /apps/api/src/core/file/fileCollect.ts: -------------------------------------------------------------------------------- 1 | import { logger } from '~/lib/logger'; 2 | import { initPiscina } from '~/lib/processConcurrency'; 3 | import type { RawFile } from './fileTypes'; 4 | import type { FileCollectTask } from './workers/fileCollectWorker'; 5 | import type { CrawlProgressCallback } from '~/types'; 6 | 7 | const initTaskRunner = (numOfTasks: number) => { 8 | const pool = initPiscina( 9 | numOfTasks, 10 | require.resolve('./workers/fileCollectWorker'), 11 | ); 12 | return (task: FileCollectTask) => pool.run(task); 13 | }; 14 | 15 | export const collectFiles = async ( 16 | filePaths: string[], 17 | rootDir: string, 18 | progressCallback: CrawlProgressCallback = () => {}, 19 | deps = { initTaskRunner }, 20 | ): Promise => { 21 | const runTask = deps.initTaskRunner(filePaths.length); 22 | const tasks = filePaths.map( 23 | (filePath) => ({ filePath, rootDir }) satisfies FileCollectTask, 24 | ); 25 | 26 | try { 27 | const startTime = process.hrtime.bigint(); 28 | logger.info( 29 | `Starting file collection for ${filePaths.length} files using worker pool`, 30 | ); 31 | 32 | let completedTasks = 0; 33 | const totalTasks = tasks.length; 34 | 35 | const results = await Promise.all( 36 | tasks.map((task) => 37 | runTask(task).then((result) => { 38 | completedTasks++; 39 | progressCallback(`Collect file... (${completedTasks}/${totalTasks})`); 40 | logger.info( 41 | `Collect files... (${completedTasks}/${totalTasks}) ${task.filePath}`, 42 | ); 43 | return result; 44 | }), 45 | ), 46 | ); 47 | 48 | const endTime = process.hrtime.bigint(); 49 | const duration = Number(endTime - startTime) / 1e6; 50 | 51 | logger.info(`File collection completed in ${duration.toFixed(2)}ms`); 52 | return results.filter((file): file is RawFile => file !== null); 53 | } catch (error) { 54 | logger.error('Error during file collection'); 55 | throw error; 56 | } 57 | }; 58 | -------------------------------------------------------------------------------- /apps/web/src/lib/mutation-fn.ts: -------------------------------------------------------------------------------- 1 | import redaxios from 'redaxios'; 2 | import { useTokenStore } from '~/store/use-token-store'; 3 | import { API_BASE_URL } from './constants'; 4 | 5 | /** 6 | * Interface for the arguments passed to the generic mutation function. 7 | */ 8 | interface MutationFnArgs { 9 | /** The URL endpoint for the mutation. */ 10 | endpoint: string; 11 | /** The data payload to send with the request (optional). */ 12 | data?: any; 13 | /** The HTTP method to use (defaults to 'POST'). */ 14 | method?: 'POST' | 'PUT' | 'PATCH' | 'DELETE'; 15 | } 16 | 17 | /** 18 | * A generic mutation function for use with React Query, utilizing redaxios. 19 | * It sends a request (defaulting to POST) to the specified URL with optional data. 20 | * 21 | * @param {MutationFnArgs} args - An object containing the URL, optional data, and optional method. 22 | * @returns {Promise} A promise that resolves with the response data. 23 | * @throws {Error} Throws an error if the request fails. 24 | */ 25 | export const mutationFnHelper = async ({ 26 | endpoint, 27 | data, 28 | method = 'POST', 29 | }: MutationFnArgs): Promise => { 30 | const { accessToken, refreshToken } = useTokenStore.getState(); 31 | try { 32 | const response = await redaxios({ 33 | url: `${API_BASE_URL}/${endpoint}`, 34 | method, 35 | data: JSON.stringify(data), 36 | headers: { 37 | 'Content-Type': 'application/json', 38 | 'X-Access-Token': accessToken, 39 | 'X-Refresh-Token': refreshToken, 40 | }, 41 | }); 42 | 43 | const _accessToken = response.headers.get('access-token'); 44 | const _refreshToken = response.headers.get('refresh-token'); 45 | 46 | if (_accessToken && _refreshToken) { 47 | useTokenStore.getState().setTokens({ 48 | accessToken: _accessToken, 49 | refreshToken: _refreshToken, 50 | }); 51 | } 52 | 53 | return response.data; 54 | } catch (error) { 55 | console.error(`Mutation failed: ${method} ${endpoint}`, error); 56 | throw error; 57 | } 58 | }; 59 | -------------------------------------------------------------------------------- /apps/api/src/middleware/jwt-auth.ts: -------------------------------------------------------------------------------- 1 | import type { Request, Response, NextFunction, RequestHandler } from "express"; 2 | import { eq } from "drizzle-orm"; 3 | import { verify } from "jsonwebtoken"; 4 | 5 | import { 6 | createTokens, 7 | type AccessTokenPayload, 8 | type RefreshTokenPayload, 9 | } from "~/services/jwt-service"; 10 | import { db } from "~/db"; 11 | import { users } from "~/db/schema"; 12 | 13 | export const authMiddleware: (st?: boolean) => RequestHandler = 14 | (shouldThrow = true) => 15 | async (req: Request, res: Response, next: NextFunction): Promise => { 16 | const accessToken = req.headers["x-access-token"] as string; 17 | const refreshToken = req.headers["x-refresh-token"] as string; 18 | 19 | if (!accessToken || !refreshToken) { 20 | if (shouldThrow) { 21 | return res.status(401).json({ error: "Not authorized: Missing token" }); 22 | } 23 | return next(); 24 | } 25 | 26 | try { 27 | const payload = verify( 28 | accessToken, 29 | process.env.ACCESS_TOKEN_SECRET as string 30 | ) as AccessTokenPayload; 31 | 32 | (req as any).userId = payload.userId; 33 | 34 | return next(); 35 | } catch {} 36 | 37 | let data: RefreshTokenPayload; 38 | try { 39 | data = verify( 40 | refreshToken, 41 | process.env.REFRESH_TOKEN_SECRET as string 42 | ) as RefreshTokenPayload; 43 | } catch { 44 | if (shouldThrow) { 45 | return res.status(401).json({ error: "Not authorized" }); 46 | } 47 | return next(); 48 | } 49 | 50 | const user = await db.query.users.findFirst({ 51 | where: eq(users.id, data.userId), 52 | }); 53 | 54 | if (!user || user.tokenVersion !== data.tokenVersion) { 55 | return res.status(401).json({ error: "Not authorized" }); 56 | } 57 | 58 | const tokens = createTokens(user); 59 | res.setHeader("access-token", tokens.accessToken); 60 | res.setHeader("refresh-token", tokens.refreshToken); 61 | 62 | (req as any).userId = user.id; 63 | 64 | return next(); 65 | }; 66 | -------------------------------------------------------------------------------- /apps/api/src/index.ts: -------------------------------------------------------------------------------- 1 | import 'dotenv/config'; 2 | import cors from 'cors'; 3 | import express from 'express'; 4 | import expressWs from 'express-ws'; 5 | import CacheableLookup from 'cacheable-lookup'; 6 | import http from 'node:http'; 7 | import https from 'node:https'; 8 | import os from 'node:os'; 9 | 10 | import { v1Router } from '~/routes/v1'; 11 | import { logger } from '~/lib/logger'; 12 | 13 | const numCPUs = process.env.NODE_ENV === 'production' ? os.cpus().length : 2; 14 | 15 | logger.info(`Number of CPUs: ${numCPUs} available`); 16 | 17 | const cacheable = new CacheableLookup(); 18 | 19 | cacheable.install(http.globalAgent); 20 | cacheable.install(https.globalAgent); 21 | 22 | const ws = expressWs(express()); 23 | const app = ws.app; 24 | 25 | declare global { 26 | var isProduction: boolean; 27 | } 28 | 29 | global.isProduction = process.env.IS_PRODUCTION === 'true'; 30 | 31 | app.use( 32 | cors({ 33 | credentials: true, 34 | }), 35 | ); 36 | app.use(express.json({ limit: '10mb' })); 37 | app.use(express.urlencoded({ extended: true })); 38 | 39 | app.get('/', (_req, res) => { 40 | res.send('CRAWLERS: Hello World'); 41 | }); 42 | 43 | // register router 44 | app.use('/v1', v1Router); 45 | 46 | const DEFAULT_PORT = process.env.PORT ?? 4000; 47 | const HOST = process.env.HOST ?? 'localhost'; 48 | 49 | function startServer(port = DEFAULT_PORT) { 50 | const server = app.listen(Number(port), HOST, () => { 51 | logger.info(`Worker ${process.pid} listening on port ${port}`); 52 | }); 53 | 54 | const exitHandler = () => { 55 | logger.info('SIGTERM signal received closing: HTTP server'); 56 | server.close(() => { 57 | logger.info('Server closed.'); 58 | process.exit(0); 59 | }); 60 | }; 61 | 62 | process.on('SIGTERM', exitHandler); 63 | process.on('SIGINT', exitHandler); 64 | return server; 65 | } 66 | 67 | if (require.main === module) { 68 | startServer(); 69 | } 70 | 71 | app.get('/is-production', (_req, res) => { 72 | res.send({ isProduction: global.isProduction }); 73 | }); 74 | 75 | logger.info(`Worker ${process.pid} started`); 76 | -------------------------------------------------------------------------------- /compose.local.yaml: -------------------------------------------------------------------------------- 1 | # compose.local.yaml (Overrides for Local Development) 2 | # Use with: docker compose -f compose.yaml -f compose.local.yaml up 3 | 4 | services: 5 | # backend 6 | api: 7 | build: 8 | context: ./apps/api 9 | target: base 10 | environment: 11 | NODE_ENV: development 12 | LOGGING_LEVEL: debug 13 | DATABASE_URL: postgresql://postgres:postgres@postgres:5432/codecrawl_dev 14 | REDIS_URL: redis://redis:6379 15 | REDIS_PASSWORD: "" 16 | ports: 17 | - "${API_PORT_EXTERNAL:-4000}:${API_PORT:-3002}" 18 | volumes: 19 | - ./apps/api:/app 20 | - /app/node_modules 21 | depends_on: 22 | - postgres 23 | - redis 24 | 25 | worker: 26 | build: 27 | context: ./apps/api 28 | target: base 29 | environment: 30 | NODE_ENV: development 31 | LOGGING_LEVEL: debug 32 | DATABASE_URL: postgresql://postgres:postgres@postgres:5432/codecrawl_dev 33 | REDIS_URL: redis://redis:6379 34 | REDIS_PASSWORD: "" 35 | volumes: 36 | - ./apps/api:/app 37 | - /app/node_modules 38 | depends_on: 39 | - postgres 40 | - redis 41 | - api 42 | 43 | postgres: 44 | image: postgres:15-alpine 45 | networks: 46 | - internal-api-net 47 | environment: 48 | POSTGRES_USER: postgres 49 | POSTGRES_PASSWORD: postgres 50 | POSTGRES_DB: codecrawl_dev 51 | ports: 52 | - "5432:5432" 53 | volumes: 54 | - postgres_data_local:/var/lib/postgresql/data 55 | healthcheck: 56 | test: ["CMD-SHELL", "pg_isready -U postgres -d codecrawl_dev"] 57 | interval: 10s 58 | timeout: 5s 59 | retries: 5 60 | 61 | redis: 62 | image: redis:7-alpine 63 | networks: 64 | - internal-api-net 65 | ports: 66 | - "6379:6379" 67 | volumes: 68 | - redis_data_local:/data 69 | healthcheck: 70 | test: ["CMD", "redis-cli", "ping"] 71 | interval: 10s 72 | timeout: 5s 73 | retries: 5 74 | 75 | volumes: 76 | postgres_data_local: 77 | redis_data_local: 78 | -------------------------------------------------------------------------------- /apps/api/src/core/file/workers/fileCollectWorker.ts: -------------------------------------------------------------------------------- 1 | import * as fs from 'node:fs/promises'; 2 | import path from 'node:path'; 3 | import iconv from 'iconv-lite'; 4 | import jschartdet from 'jschardet'; 5 | import { isBinary } from 'istextorbinary'; 6 | 7 | import { logger } from '~/lib/logger'; 8 | 9 | // Maximum file size to process (50MB) 10 | // This provents out-of-memory errors when processing very large files 11 | export const MAX_FILE_SIZE = 50 * 1024 * 1024; 12 | 13 | export interface FileCollectTask { 14 | filePath: string; 15 | rootDir: string; 16 | } 17 | 18 | export default async ({ filePath, rootDir }: FileCollectTask) => { 19 | const fullPath = path.resolve(rootDir, filePath); 20 | const content = await readRawFile(fullPath); 21 | 22 | if (content) { 23 | return { 24 | path: filePath, 25 | content, 26 | }; 27 | } 28 | 29 | return null; 30 | }; 31 | 32 | const readRawFile = async (filePath: string): Promise => { 33 | try { 34 | const stats = await fs.stat(filePath); 35 | if (stats.size > MAX_FILE_SIZE) { 36 | const sizeMB = (stats.size / 1024 / 1024).toFixed(1); 37 | logger.warn(''); 38 | logger.warn('⚠️ Large File Warning:'); 39 | logger.warn('------------------'); 40 | logger.warn( 41 | `File exceeds size limit: ${sizeMB}MB > ${ 42 | MAX_FILE_SIZE / 1024 / 1024 43 | }MB (${filePath})`, 44 | ); 45 | return null; 46 | } 47 | 48 | if (isBinary(filePath)) { 49 | logger.debug(`Skipping binary file: ${filePath}`); 50 | return null; 51 | } 52 | 53 | logger.info(`Reading file: ${filePath}`); 54 | 55 | const buffer = await fs.readFile(filePath); 56 | 57 | if (isBinary(null, buffer)) { 58 | logger.debug(`Skipping binary file (content check): ${filePath}`); 59 | return null; 60 | } 61 | 62 | const encoding = jschartdet.detect(buffer).encoding || 'utf-8'; 63 | const content = iconv.decode(buffer, encoding); 64 | return content; 65 | } catch (error) { 66 | logger.warn(`Failed to read file: ${filePath}`, error); 67 | return null; 68 | } 69 | }; 70 | -------------------------------------------------------------------------------- /apps/api/src/core/treeSitter/queries/queryRust.ts: -------------------------------------------------------------------------------- 1 | export const queryRust = ` 2 | (line_comment) @comment 3 | (block_comment) @comment 4 | 5 | ; Import statements 6 | (use_declaration 7 | (scoped_identifier) @name.reference.module) @definition.import 8 | 9 | (use_declaration 10 | (identifier) @name.reference.module) @definition.import 11 | 12 | (extern_crate_declaration 13 | (identifier) @name.reference.module) @definition.import 14 | 15 | ; ADT definitions 16 | 17 | (struct_item 18 | name: (type_identifier) @name.definition.class) @definition.class 19 | 20 | (enum_item 21 | name: (type_identifier) @name.definition.class) @definition.class 22 | 23 | (union_item 24 | name: (type_identifier) @name.definition.class) @definition.class 25 | 26 | ; type aliases 27 | 28 | (type_item 29 | name: (type_identifier) @name.definition.class) @definition.class 30 | 31 | ; method definitions 32 | 33 | (declaration_list 34 | (function_item 35 | name: (identifier) @name.definition.method)) @definition.method 36 | 37 | ; function definitions 38 | 39 | (function_item 40 | name: (identifier) @name.definition.function) @definition.function 41 | 42 | ; trait definitions 43 | (trait_item 44 | name: (type_identifier) @name.definition.interface) @definition.interface 45 | 46 | ; module definitions 47 | (mod_item 48 | name: (identifier) @name.definition.module) @definition.module 49 | 50 | ; macro definitions 51 | 52 | (macro_definition 53 | name: (identifier) @name.definition.macro) @definition.macro 54 | 55 | ; references 56 | 57 | (call_expression 58 | function: (identifier) @name.reference.call) @reference.call 59 | 60 | (call_expression 61 | function: (field_expression 62 | field: (field_identifier) @name.reference.call)) @reference.call 63 | 64 | (macro_invocation 65 | macro: (identifier) @name.reference.call) @reference.call 66 | 67 | ; implementations 68 | 69 | (impl_item 70 | trait: (type_identifier) @name.reference.implementation) @reference.implementation 71 | 72 | (impl_item 73 | type: (type_identifier) @name.reference.implementation 74 | !trait) @reference.implementation 75 | `; 76 | -------------------------------------------------------------------------------- /apps/web/src/routes/app/_app/playground.tsx: -------------------------------------------------------------------------------- 1 | import { Box, Flex, Tabs, Text } from "@radix-ui/themes"; 2 | import { createFileRoute } from "@tanstack/react-router"; 3 | import { seo } from "~/utils/seo"; 4 | import { FileTreeTab } from "~/components/playground/file-tree-tab"; 5 | import { LLMsTxtTab } from "~/components/playground/llmstxt-tab"; 6 | import { SettingsTab } from "~/components/playground/settings-tab"; 7 | import { ApiKeySelector } from "~/components/playground/api-key-selector"; 8 | 9 | export const Route = createFileRoute("/app/_app/playground")({ 10 | component: RouteComponent, 11 | head(ctx) { 12 | return { 13 | meta: [...seo({ title: "Playground | Codecrawl" })], 14 | }; 15 | }, 16 | }); 17 | 18 | function RouteComponent() { 19 | return ( 20 | 21 | 22 | 23 | 24 | Playground 25 | 26 | 27 | Try out Codecrawl with your team API keys 28 | 29 | 30 | 31 | 32 | 33 | 34 | 35 | 36 | 37 | 38 | 39 | File Tree 40 | LLMs.txt 41 | Settings 42 | 43 | 44 | 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | ); 61 | } 62 | -------------------------------------------------------------------------------- /apps/api/src/lib/logger.ts: -------------------------------------------------------------------------------- 1 | import * as winston from 'winston'; 2 | import { configDotenv } from 'dotenv'; 3 | 4 | configDotenv(); 5 | 6 | const logFormat = winston.format.printf( 7 | (info) => 8 | `${info.timestamp} ${info.level} [${(info.metadata as any).module ?? ''}:${ 9 | (info.metadata as any).method ?? '' 10 | }]: ${info.message} ${ 11 | info.level.includes('error') || info.level.includes('warn') 12 | ? JSON.stringify(info.metadata, (_, value) => { 13 | if (value instanceof Error) { 14 | return { 15 | ...value, 16 | name: value.name, 17 | message: value.message, 18 | stack: value.stack, 19 | cause: value.cause, 20 | }; 21 | } else { 22 | return value; 23 | } 24 | }) 25 | : '' 26 | }`, 27 | ); 28 | 29 | export const logger = winston.createLogger({ 30 | level: process.env.LOGGING_LEVEL?.toLowerCase() ?? 'debug', 31 | format: winston.format.json({ 32 | replacer(key, value) { 33 | if (value instanceof Error) { 34 | return { 35 | ...value, 36 | name: value.name, 37 | message: value.message, 38 | stack: value.stack, 39 | cause: value.cause, 40 | }; 41 | } else { 42 | return value; 43 | } 44 | }, 45 | }), 46 | transports: [ 47 | ...(process.env.CODECRAWL_LOG_TO_FILE 48 | ? [ 49 | new winston.transports.File({ 50 | filename: `codecrawl-${process.argv[1].includes('worker') ? 'worker' : 'app'}-${crypto.randomUUID()}.log`, 51 | }), 52 | ] 53 | : []), 54 | new winston.transports.Console({ 55 | format: winston.format.combine( 56 | winston.format.timestamp({ format: 'YYYY-MM-DD HH:mm:ss' }), 57 | winston.format.metadata({ 58 | fillExcept: ['message', 'level', 'timestamp'], 59 | }), 60 | ...((process.env.NODE_ENV === 'production' && 61 | process.env.SENTRY_ENVIRONMENT === 'dev') || 62 | process.env.ENV !== 'production' 63 | ? [winston.format.colorize(), logFormat] 64 | : []), 65 | ), 66 | }), 67 | ], 68 | }); 69 | -------------------------------------------------------------------------------- /apps/web/src/components/playground/api-key-selector.tsx: -------------------------------------------------------------------------------- 1 | import { Flex, Text, Select } from "@radix-ui/themes"; 2 | import { useState, useEffect } from "react"; 3 | import { useApiKeyStore } from "~/store/use-api-key-store"; 4 | import { useTeams } from "~/contexts/teams-context"; 5 | 6 | export function ApiKeySelector() { 7 | const { activeTeam, teams } = useTeams(); 8 | const { selectedApiKey, setSelectedApiKey } = useApiKeyStore(); 9 | const [isLoading, setIsLoading] = useState(false); 10 | 11 | // Get the available API keys from the active team 12 | const apiKeys = activeTeam?.apiKeys || []; 13 | 14 | // If no key is selected but we have keys available, auto-select the first one 15 | useEffect(() => { 16 | if (!selectedApiKey && apiKeys.length > 0) { 17 | setSelectedApiKey(apiKeys[0].key); 18 | } 19 | }, [apiKeys, selectedApiKey, setSelectedApiKey]); 20 | 21 | const handleSelectChange = (value: string) => { 22 | const selectedKey = apiKeys.find((k) => k.id === value)?.key || null; 23 | setSelectedApiKey(selectedKey); 24 | }; 25 | 26 | // No team keys available - show warning 27 | if (apiKeys.length === 0) { 28 | return ( 29 | 30 | 31 | No API keys available. Please create an API key in your team settings. 32 | 33 | 34 | ); 35 | } 36 | 37 | return ( 38 | 39 | 40 | 41 | Select an API key from your team 42 | 43 | k.key === selectedApiKey)?.id || apiKeys[0].id 46 | } 47 | onValueChange={handleSelectChange} 48 | disabled={isLoading} 49 | > 50 | 51 | 52 | {apiKeys.map((key) => ( 53 | 54 | {key.name} 55 | 56 | ))} 57 | 58 | 59 | 60 | 61 | ); 62 | } 63 | -------------------------------------------------------------------------------- /apps/web/src/components/ui/accordion.tsx: -------------------------------------------------------------------------------- 1 | 'use client'; 2 | 3 | import * as React from 'react'; 4 | import * as AccordionPrimitive from '@radix-ui/react-accordion'; 5 | import { ChevronDownIcon } from '@heroicons/react/24/outline'; 6 | 7 | import { cn } from '~/utils/classnames'; 8 | 9 | const Accordion = AccordionPrimitive.Root; 10 | 11 | const AccordionItem = React.forwardRef< 12 | React.ElementRef, 13 | React.ComponentPropsWithoutRef 14 | >(({ className, ...props }, ref) => ( 15 | 20 | )); 21 | AccordionItem.displayName = 'AccordionItem'; 22 | 23 | const AccordionTrigger = React.forwardRef< 24 | React.ElementRef, 25 | React.ComponentPropsWithoutRef 26 | >(({ className, children, ...props }, ref) => ( 27 | 28 | svg]:rotate-180', 32 | className, 33 | )} 34 | {...props} 35 | > 36 | {children} 37 | 38 | 39 | 40 | )); 41 | AccordionTrigger.displayName = AccordionPrimitive.Trigger.displayName; 42 | 43 | const AccordionContent = React.forwardRef< 44 | React.ElementRef, 45 | React.ComponentPropsWithoutRef 46 | >(({ className, children, ...props }, ref) => ( 47 | 52 |
{children}
53 |
54 | )); 55 | AccordionContent.displayName = AccordionPrimitive.Content.displayName; 56 | 57 | export { Accordion, AccordionItem, AccordionTrigger, AccordionContent }; 58 | -------------------------------------------------------------------------------- /apps/api/src/core/file/fileTreeGenerate.ts: -------------------------------------------------------------------------------- 1 | import nodepath from 'node:path'; 2 | 3 | interface TreeNode { 4 | name: string; 5 | children: TreeNode[]; 6 | isDirectory: boolean; 7 | } 8 | 9 | const createTreeNode = (name: string, isDirectory: boolean): TreeNode => ({ 10 | name, 11 | children: [], 12 | isDirectory, 13 | }); 14 | 15 | export const generateFileTree = ( 16 | files: string[], 17 | emptyDirPaths: string[] = [], 18 | ): TreeNode => { 19 | const root: TreeNode = createTreeNode('root', true); 20 | 21 | for (const file of files) { 22 | addPathToTree(root, file, false); 23 | } 24 | 25 | // Add empty directories 26 | for (const dir of emptyDirPaths) { 27 | addPathToTree(root, dir, true); 28 | } 29 | 30 | return root; 31 | }; 32 | 33 | const addPathToTree = ( 34 | root: TreeNode, 35 | path: string, 36 | isDirectory: boolean, 37 | ): void => { 38 | const parts = path.split(nodepath.sep); 39 | let currentNode = root; 40 | 41 | for (let i = 0; i < parts.length; i++) { 42 | const part = parts[i]; 43 | const isLastPart = i === parts.length - 1; 44 | let child = currentNode.children.find((c) => c.name === part); 45 | 46 | if (!child) { 47 | child = createTreeNode(part, !isLastPart || isDirectory); 48 | currentNode.children.push(child); 49 | } 50 | 51 | currentNode = child; 52 | } 53 | }; 54 | 55 | const sortTreeNodes = (node: TreeNode) => { 56 | node.children.sort((a, b) => { 57 | if (a.isDirectory === b.isDirectory) { 58 | return a.name.localeCompare(b.name); 59 | } 60 | return a.isDirectory ? -1 : 1; 61 | }); 62 | 63 | for (const child of node.children) { 64 | sortTreeNodes(child); 65 | } 66 | }; 67 | 68 | export const treeToString = (node: TreeNode, prefix = ''): string => { 69 | sortTreeNodes(node); 70 | let result = ''; 71 | 72 | for (const child of node.children) { 73 | result += `${prefix}${child.name}${child.isDirectory ? '/' : ''}\n`; 74 | if (child.isDirectory) { 75 | result += treeToString(child, `${prefix} `); 76 | } 77 | } 78 | 79 | return result; 80 | }; 81 | 82 | export const generateTreeString = ( 83 | files: string[], 84 | emptyDirPaths: string[] = [], 85 | ): string => { 86 | const tree = generateFileTree(files, emptyDirPaths); 87 | return treeToString(tree).trim(); 88 | }; 89 | -------------------------------------------------------------------------------- /apps/api/src/core/treeSitter/queries/queryTypescript.ts: -------------------------------------------------------------------------------- 1 | export const queryTypescript = ` 2 | (import_statement 3 | (import_clause (identifier) @name.reference.module)) @definition.import 4 | 5 | (import_statement 6 | (import_clause 7 | (named_imports 8 | (import_specifier 9 | name: (identifier) @name.reference.module))) @definition.import) 10 | 11 | (comment) @comment 12 | 13 | (function_signature 14 | name: (identifier) @name.definition.function) @definition.function 15 | 16 | (method_signature 17 | name: (property_identifier) @name.definition.method) @definition.method 18 | 19 | (abstract_method_signature 20 | name: (property_identifier) @name.definition.method) @definition.method 21 | 22 | (abstract_class_declaration 23 | name: (type_identifier) @name.definition.class) @definition.class 24 | 25 | (module 26 | name: (identifier) @name.definition.module) @definition.module 27 | 28 | (interface_declaration 29 | name: (type_identifier) @name.definition.interface) @definition.interface 30 | 31 | (type_annotation 32 | (type_identifier) @name.reference.type) @reference.type 33 | 34 | (new_expression 35 | constructor: (identifier) @name.reference.class) @reference.class 36 | 37 | (function_declaration 38 | name: (identifier) @name.definition.function) @definition.function 39 | 40 | (method_definition 41 | name: (property_identifier) @name.definition.method) @definition.method 42 | 43 | (class_declaration 44 | name: (type_identifier) @name.definition.class) @definition.class 45 | 46 | (interface_declaration 47 | name: (type_identifier) @name.definition.class) @definition.class 48 | 49 | (type_alias_declaration 50 | name: (type_identifier) @name.definition.type) @definition.type 51 | 52 | (enum_declaration 53 | name: (identifier) @name.definition.enum) @definition.enum 54 | 55 | (lexical_declaration 56 | (variable_declarator 57 | name: (identifier) @name.definition.function 58 | value: (arrow_function) 59 | ) 60 | ) @definition.function 61 | 62 | (variable_declaration 63 | (variable_declarator 64 | name: (identifier) @name.definition.function 65 | value: (arrow_function) 66 | ) 67 | ) @definition.function 68 | 69 | (assignment_expression 70 | left: [(identifier) @name.definition.function] 71 | right: (arrow_function) 72 | ) @definition.function 73 | `; 74 | -------------------------------------------------------------------------------- /apps/api/src/core/metrics/calculateAllFileMetrics.ts: -------------------------------------------------------------------------------- 1 | import type { TiktokenEncoding } from 'tiktoken'; 2 | 3 | import { logger } from '~/lib/logger'; 4 | import { initPiscina } from '~/lib/processConcurrency'; 5 | import type { FileMetricsTask } from './workers/fileMetricsWorker'; 6 | import type { ProcessedFile } from '../file/fileTypes'; 7 | import type { CrawlProgressCallback } from '~/types'; 8 | import type { FileMetrics } from './workers/types'; 9 | 10 | const initTaskRunner = (numOfTasks: number) => { 11 | const pool = initPiscina( 12 | numOfTasks, 13 | require.resolve('./workers/fileMetricsWorker'), 14 | ); 15 | return (task: FileMetricsTask) => pool.run(task); 16 | }; 17 | 18 | export const calculateAllFileMetrics = async ( 19 | processedFiles: ProcessedFile[], 20 | tokenCounterEncoding: TiktokenEncoding, 21 | progressCallback: CrawlProgressCallback, 22 | deps = { 23 | initTaskRunner, 24 | }, 25 | ): Promise => { 26 | const runTask = deps.initTaskRunner(processedFiles.length); 27 | const tasks = processedFiles.map( 28 | (file, index) => 29 | ({ 30 | file, 31 | index, 32 | totalFiles: processedFiles.length, 33 | encoding: tokenCounterEncoding, 34 | }) satisfies FileMetricsTask, 35 | ); 36 | 37 | try { 38 | const startTime = process.hrtime.bigint(); 39 | logger.info( 40 | `Starting metrics calculation for ${processedFiles.length} files using worker pool`, 41 | ); 42 | 43 | let completedTasks = 0; 44 | const results = await Promise.all( 45 | tasks.map((task) => 46 | runTask(task).then((result) => { 47 | completedTasks++; 48 | progressCallback( 49 | `Calculating metrics... (${completedTasks}/${task.totalFiles})`, 50 | ); 51 | logger.info( 52 | `Calculating metrics... (${completedTasks}/${task.totalFiles}) ${task.file.path}`, 53 | ); 54 | return result; 55 | }), 56 | ), 57 | ); 58 | 59 | const endTime = process.hrtime.bigint(); 60 | const duration = Number(endTime - startTime) / 1e6; 61 | logger.info(`Metrics calculation completed in ${duration.toFixed(2)}ms`); 62 | 63 | return results; 64 | } catch (error) { 65 | logger.error('Error during metrics calculation:', error); 66 | throw error; 67 | } 68 | }; 69 | -------------------------------------------------------------------------------- /apps/api/src/lib/generate-llms-txt/redis.ts: -------------------------------------------------------------------------------- 1 | import { redisConnection } from '../../services/queue-service'; 2 | import { logger as _logger } from '../logger'; 3 | 4 | export interface GenerationData { 5 | id: string; 6 | teamId: string; 7 | plan: string; 8 | createdAt: number; 9 | status: 'processing' | 'completed' | 'failed'; 10 | url: string; 11 | showFullText: boolean; 12 | generatedText: string; 13 | fullText: string; 14 | error?: string; 15 | } 16 | 17 | // TTL of 24 hours 18 | const GENERATION_TTL = 24 * 60 * 60; 19 | 20 | export async function saveGeneratedLlmsTxt( 21 | id: string, 22 | data: GenerationData, 23 | ): Promise { 24 | _logger.debug(`Saving llmstxt generation ${id} to Redis`); 25 | await redisConnection.set(`generation:${id}`, JSON.stringify(data)); 26 | await redisConnection.expire(`generation:${id}`, GENERATION_TTL); 27 | } 28 | 29 | export async function getGeneratedLLmsTxt( 30 | id: string, 31 | ): Promise { 32 | const x = await redisConnection.get(`generation:${id}`); 33 | return x ? JSON.parse(x) : null; 34 | } 35 | 36 | export async function updateGeneratedLlmsTxt( 37 | id: string, 38 | data: Partial, 39 | ): Promise { 40 | const current = await getGeneratedLLmsTxt(id); 41 | if (!current) return; 42 | 43 | const updatedGeneration = { 44 | ...current, 45 | ...data, 46 | }; 47 | 48 | await redisConnection.set( 49 | `generation:${id}`, 50 | JSON.stringify(updatedGeneration), 51 | ); 52 | await redisConnection.expire(`generation:${id}`, GENERATION_TTL); 53 | } 54 | 55 | export async function getGeneratedLlmsTxtExpiry(id: string): Promise { 56 | const d = new Date(); 57 | const ttl = await redisConnection.pttl(`generation:${id}`); 58 | d.setMilliseconds(d.getMilliseconds() + ttl); 59 | d.setMilliseconds(0); 60 | return d; 61 | } 62 | 63 | // Convenience method for status updates 64 | export async function updateGeneratedLlmsTxtStatus( 65 | id: string, 66 | status: 'processing' | 'completed' | 'failed', 67 | generatedText?: string, 68 | fullText?: string, 69 | error?: string, 70 | ): Promise { 71 | const updates: Partial = { status }; 72 | if (generatedText !== undefined) updates.generatedText = generatedText; 73 | if (fullText !== undefined) updates.fullText = fullText; 74 | if (error !== undefined) updates.error = error; 75 | 76 | await updateGeneratedLlmsTxt(id, updates); 77 | } 78 | -------------------------------------------------------------------------------- /apps/api/src/core/file/fileProcess.ts: -------------------------------------------------------------------------------- 1 | import type { ConfigMerged } from '~/config/configSchema'; 2 | import type { CrawlProgressCallback } from '~/types'; 3 | import { type FileManipulator, getFileManipulator } from './fileManipulate'; 4 | import type { ProcessedFile, RawFile } from './fileTypes'; 5 | import type { FileProcessTask } from './workers/fileProcessWorker'; 6 | import { logger } from '~/lib/logger'; 7 | import { initPiscina } from '~/lib/processConcurrency'; 8 | 9 | type GetFileManipulator = (filePath: string) => FileManipulator | null; 10 | 11 | const initTaskRunner = (numOfTasks: number) => { 12 | const pool = initPiscina( 13 | numOfTasks, 14 | require.resolve('./workers/fileProcessWorker'), 15 | ); 16 | return (task: FileProcessTask) => pool.run(task); 17 | }; 18 | 19 | export const processFiles = async ( 20 | rawFiles: RawFile[], 21 | config: ConfigMerged, 22 | progressCallback: CrawlProgressCallback, 23 | deps: { 24 | initTaskRunner: typeof initTaskRunner; 25 | getFileManipulator: GetFileManipulator; 26 | } = { 27 | initTaskRunner, 28 | getFileManipulator, 29 | }, 30 | ): Promise => { 31 | const runTask = deps.initTaskRunner(rawFiles.length); 32 | const tasks = rawFiles.map( 33 | (rawFile, index) => 34 | ({ 35 | rawFile, 36 | config, 37 | }) satisfies FileProcessTask, 38 | ); 39 | 40 | try { 41 | const startTime = process.hrtime.bigint(); 42 | logger.info( 43 | `Starting file processing for ${rawFiles.length} files using worker pool`, 44 | ); 45 | 46 | let completedTasks = 0; 47 | const totalTasks = tasks.length; 48 | 49 | const results = await Promise.all( 50 | tasks.map((task) => 51 | runTask(task).then((result) => { 52 | completedTasks++; 53 | progressCallback( 54 | `Processing file... (${completedTasks}/${totalTasks})`, 55 | ); 56 | logger.info( 57 | `Processing file... (${completedTasks}/${totalTasks}) ${task.rawFile.path}`, 58 | ); 59 | return result; 60 | }), 61 | ), 62 | ); 63 | 64 | const endTime = process.hrtime.bigint(); 65 | const duration = Number(endTime - startTime) / 1e6; 66 | logger.info(`File processing completed in ${duration.toFixed(2)}ms`); 67 | 68 | return results; 69 | } catch (error) { 70 | logger.error('Error during file processing:', error); 71 | throw error; 72 | } 73 | }; 74 | -------------------------------------------------------------------------------- /apps/api/src/routes/v1.ts: -------------------------------------------------------------------------------- 1 | import express from 'express'; 2 | import expressWs from 'express-ws'; 3 | 4 | import { apiKeyAuthMiddleware, authMiddleware, wrap } from '~/middleware'; 5 | import { RateLimiterMode } from '~/types'; 6 | import { generateLLMsTextController } from '~/controllers/v1/generate-llmstxt'; 7 | import { generateLLMsTextStatusController } from '~/controllers/v1/generate-llmstxt-status'; 8 | import { 9 | userMeController, 10 | userApiKeysController, 11 | userCreateApiKeyController, 12 | userDeleteApiKeyController, 13 | } from '~/controllers/v1/user'; 14 | import { livenessController } from '~/controllers/v1/liveness'; 15 | import { readinessController } from '~/controllers/v1/readiness'; 16 | import { login, register } from '~/controllers/v1/auth'; 17 | import { teamKeysController, teamsController } from '~/controllers/v1/teams'; 18 | import { generateTreeStatusController } from '~/controllers/v1/generate-tree-status'; 19 | import { generateTreeController } from '~/controllers/v1/generate-tree'; 20 | 21 | expressWs(express()); 22 | 23 | export const v1Router = express.Router(); 24 | 25 | v1Router.post('/auth/login', wrap(login)); 26 | v1Router.post('/auth/register', wrap(register)); 27 | 28 | v1Router.get('/health/liveness', wrap(livenessController)); 29 | v1Router.get('/health/readiness', wrap(readinessController)); 30 | 31 | v1Router.get('/users/me', authMiddleware(false), wrap(userMeController)); 32 | v1Router.get('/users/keys', authMiddleware(), wrap(userApiKeysController)); 33 | v1Router.post( 34 | '/users/keys', 35 | authMiddleware(), 36 | wrap(userCreateApiKeyController), 37 | ); 38 | v1Router.delete( 39 | '/users/keys/:keyId', 40 | authMiddleware(), 41 | wrap(userDeleteApiKeyController), 42 | ); 43 | 44 | v1Router.get('/teams/:teamId/keys', authMiddleware(), wrap(teamKeysController)); 45 | v1Router.get('/teams', authMiddleware(), wrap(teamsController)); 46 | v1Router.post( 47 | '/llmstxt', 48 | apiKeyAuthMiddleware(RateLimiterMode.Crawl), 49 | wrap(generateLLMsTextController), 50 | ); 51 | v1Router.get( 52 | '/llmstxt/:jobId', 53 | apiKeyAuthMiddleware(RateLimiterMode.CrawlStatus), 54 | wrap(generateLLMsTextStatusController as any), 55 | ); 56 | 57 | v1Router.post( 58 | '/tree', 59 | apiKeyAuthMiddleware(RateLimiterMode.Crawl), 60 | wrap(generateTreeController), 61 | ); 62 | v1Router.get( 63 | '/tree/:jobId', 64 | apiKeyAuthMiddleware(RateLimiterMode.CrawlStatus), 65 | wrap(generateTreeStatusController as any), 66 | ); 67 | -------------------------------------------------------------------------------- /apps/api/src/core/metrics/calculateOutputMetrics.ts: -------------------------------------------------------------------------------- 1 | import type { TiktokenEncoding } from 'tiktoken'; 2 | 3 | import { logger } from '~/lib/logger'; 4 | import { initPiscina } from '~/lib/processConcurrency'; 5 | import type { OutputMetricsTask } from './workers/outputMetricsWorker'; 6 | 7 | const CHUNK_SIZE = 1000; 8 | const MIN_CONTENT_LENGTH_FOR_PARALLEL = 1_000_000; // 1000KB 9 | 10 | const initTaskRunner = (numOfTasks: number) => { 11 | const pool = initPiscina( 12 | numOfTasks, 13 | require.resolve('./workers/outputMetricsWorker'), 14 | ); 15 | return (task: OutputMetricsTask) => pool.run(task); 16 | }; 17 | 18 | export const calculateOutputMetrics = async ( 19 | content: string, 20 | encoding: TiktokenEncoding, 21 | path?: string, 22 | deps = { 23 | initTaskRunner, 24 | }, 25 | ): Promise => { 26 | const shouldRunInParallel = content.length > MIN_CONTENT_LENGTH_FOR_PARALLEL; 27 | const numOfTasks = shouldRunInParallel ? CHUNK_SIZE : 1; 28 | const runTask = deps.initTaskRunner(numOfTasks); 29 | 30 | try { 31 | logger.info(`Starting output token count for ${path || 'output'}`); 32 | const startTime = process.hrtime.bigint(); 33 | 34 | let result: number; 35 | 36 | if (shouldRunInParallel) { 37 | // Split content into chunks for parallel processing 38 | const chunkSize = Math.ceil(content.length / CHUNK_SIZE); 39 | const chunks: string[] = []; 40 | 41 | for (let i = 0; i < content.length; i += chunkSize) { 42 | chunks.push(content.slice(i, i + chunkSize)); 43 | } 44 | 45 | // Process chunks in parallel 46 | const chunkResults = await Promise.all( 47 | chunks.map((chunk, index) => 48 | runTask({ 49 | content: chunk, 50 | encoding, 51 | path: path ? `${path}-chunk-${index}` : undefined, 52 | }), 53 | ), 54 | ); 55 | 56 | // Sum up the results 57 | result = chunkResults.reduce((sum, count) => sum + count, 0); 58 | } else { 59 | // Process small content directly 60 | result = await runTask({ content, encoding, path }); 61 | } 62 | 63 | const endTime = process.hrtime.bigint(); 64 | const duration = Number(endTime - startTime) / 1e6; 65 | logger.info(`Output token count completed in ${duration.toFixed(2)}ms`); 66 | 67 | return result; 68 | } catch (error) { 69 | logger.error('Error during token count:', error); 70 | throw error; 71 | } 72 | }; 73 | -------------------------------------------------------------------------------- /apps/web/src/routes/(marketing)/_landing/blog.$slug.tsx: -------------------------------------------------------------------------------- 1 | import { MDXContent } from '@content-collections/mdx/react'; 2 | import { createFileRoute } from '@tanstack/react-router'; 3 | import { useMemo } from 'react'; 4 | import { format } from 'date-fns'; 5 | import { findPostBySlug } from '~/lib/content'; 6 | 7 | export const Route = createFileRoute('/(marketing)/_landing/blog/$slug')({ 8 | component: RouteComponent, 9 | loader: async ({ params: { slug } }) => findPostBySlug(slug), 10 | }); 11 | 12 | function RouteComponent() { 13 | const post = Route.useLoaderData(); 14 | const dt = useMemo(() => new Date(post.date), []); 15 | 16 | return ( 17 |
18 |
19 |
20 |

21 | {post.title} 22 |

23 |
24 |
25 | Published on 26 | 32 |
33 |
34 | Written by 35 | {post.author} 36 |
37 |
38 |
39 |
40 | {post.title} 47 |
48 |
49 |
50 |
51 | 52 |
53 |
54 |
55 | ); 56 | } 57 | -------------------------------------------------------------------------------- /apps/api/src/core/file/workers/fileProcessWorker.ts: -------------------------------------------------------------------------------- 1 | import type { ConfigMerged } from '~/config/configSchema'; 2 | import { logger } from '~/lib/logger'; 3 | import { parseFile } from '../../treeSitter/parseFile'; 4 | import { getFileManipulator } from '../fileManipulate'; 5 | import type { RawFile } from '../fileTypes'; 6 | 7 | export interface FileProcessTask { 8 | rawFile: RawFile; 9 | config: ConfigMerged; 10 | } 11 | 12 | export default async ({ config, rawFile }: FileProcessTask) => { 13 | const processedContent = await processContent(rawFile, config); 14 | 15 | return { 16 | path: rawFile.path, 17 | content: processedContent, 18 | }; 19 | }; 20 | 21 | export const processContent = async ( 22 | rawFile: RawFile, 23 | config: ConfigMerged, 24 | ) => { 25 | const processStartAt = process.hrtime.bigint(); 26 | let processedContent = rawFile.content; 27 | const manipulator = getFileManipulator(rawFile.path); 28 | 29 | logger.info(`Processing file: ${rawFile.path}`); 30 | 31 | if (manipulator && config.output.removeComments) { 32 | processedContent = manipulator.removeComments(processedContent); 33 | } 34 | 35 | if (config.output.removeEmptyLines && manipulator) { 36 | processedContent = manipulator.removeEmptyLines(processedContent); 37 | } 38 | 39 | processedContent = processedContent.trim(); 40 | 41 | if (config.output.compress) { 42 | try { 43 | const parsedContent = await parseFile(processedContent, rawFile.path); 44 | if (parsedContent === undefined) { 45 | logger.error(`Failed to parse file: ${rawFile.path}`); 46 | } 47 | processedContent = parsedContent ?? processedContent; 48 | } catch (error: unknown) { 49 | const message = error instanceof Error ? error.message : String(error); 50 | logger.error( 51 | `Error parsing ${rawFile.path} in compressed mode: ${message}`, 52 | ); 53 | //re-throw error 54 | throw error; 55 | } 56 | } else if (config.output.showLineNumbers) { 57 | const lines = processedContent.split('\n'); 58 | const padding = lines.length.toString().length; 59 | const numberedLines = lines.map( 60 | (line, i) => `${(i + 1).toString().padStart(padding)}: ${line}`, 61 | ); 62 | processedContent = numberedLines.join('\n'); 63 | } 64 | 65 | const processEndAt = process.hrtime.bigint(); 66 | logger.info( 67 | `Processed file: ${rawFile.path}. Took: ${(Number(processEndAt - processStartAt) / 1e6).toFixed(2)}ms`, 68 | ); 69 | 70 | return processedContent; 71 | }; 72 | -------------------------------------------------------------------------------- /apps/web/src/routes/(marketing)/_landing/playground.tsx: -------------------------------------------------------------------------------- 1 | import { Flex, Text, Box } from "@radix-ui/themes"; 2 | import { Tabs } from "@radix-ui/themes"; 3 | import { createFileRoute } from "@tanstack/react-router"; 4 | import { FileTreeTab } from "~/components/playground/file-tree-tab"; 5 | import { SettingsTab } from "~/components/playground/settings-tab"; 6 | import { usePlaygroundSettingsStore } from "~/store/use-playground-settings"; 7 | import { LLMsTxtTab } from "~/components/playground/llmstxt-tab"; 8 | 9 | export const Route = createFileRoute("/(marketing)/_landing/playground")({ 10 | component: RouteComponent, 11 | }); 12 | 13 | function RouteComponent() { 14 | usePlaygroundSettingsStore(); 15 | 16 | return ( 17 | 18 | 19 | 20 | 21 | Playground 22 | 23 | 29 | Try out Codecrawl in this visual playground 30 | 31 | 32 | 33 | 34 |
35 | 36 | 37 | 38 | LLMs.txt 39 | 40 | 41 | File Tree 42 | 43 | 44 | Settings 45 | 46 | 47 | 48 | 49 | 50 | 51 | 52 | 53 | 54 | 55 | 56 | 57 | 58 | 59 | 60 | 61 | 62 |
63 |
64 |
65 | ); 66 | } 67 | -------------------------------------------------------------------------------- /apps/web/src/components/svgs/logo-black.tsx: -------------------------------------------------------------------------------- 1 | export default function LogoBlack(props: React.SVGProps) { 2 | return ( 3 | 11 | 18 | 25 | 32 | 39 | 46 | 53 | 60 | 67 | 74 | 81 | 88 | 89 | ); 90 | } 91 | -------------------------------------------------------------------------------- /apps/api/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM node:22.2.0-slim AS base 2 | 3 | ENV PNPM_HOME="/pnpm" 4 | ENV PATH="$PNPM_HOME:$PATH" 5 | # Tell Node.js not to run scripts as root 6 | ENV npm_config_unsafe_perm true 7 | 8 | # Prepare the specific pnpm version mentioned in the error 9 | RUN corepack prepare pnpm@9.1.0 --activate 10 | RUN corepack enable 11 | 12 | # Create app directory 13 | WORKDIR /app 14 | 15 | # Add node user and group 16 | RUN groupadd --system --gid 1001 nodejs && \ 17 | useradd --system --uid 1001 --gid nodejs nodejs 18 | 19 | # Copy application code first 20 | COPY . /app 21 | 22 | # Copy entrypoint script and set permissions early 23 | COPY docker-entrypoint.sh /app/docker-entrypoint.sh 24 | RUN sed -i 's/\r$//' /app/docker-entrypoint.sh && \ 25 | chown nodejs:nodejs /app/docker-entrypoint.sh && \ 26 | chmod +x /app/docker-entrypoint.sh 27 | 28 | # Change ownership for subsequent RUN commands 29 | RUN chown -R nodejs:nodejs /app 30 | 31 | # Switch to non-root user for subsequent commands 32 | USER nodejs 33 | 34 | FROM base AS prod-deps 35 | WORKDIR /app 36 | USER root 37 | RUN --mount=type=cache,id=pnpm,target=/pnpm/store pnpm install --filter api --prod 38 | # Ensure node_modules is owned by nodejs user 39 | RUN chown -R nodejs:nodejs /app/node_modules 40 | USER nodejs 41 | 42 | FROM base AS build 43 | WORKDIR /app 44 | USER root 45 | RUN --mount=type=cache,id=pnpm,target=/pnpm/store pnpm install --filter api && pnpm run build 46 | # Ensure dist is owned by nodejs user 47 | RUN chown -R nodejs:nodejs /app/dist 48 | USER nodejs 49 | 50 | # Final runtime image 51 | FROM node:22.2.0-slim 52 | ENV NODE_ENV production # Set NODE_ENV for production runtime 53 | ENV PNPM_HOME="/pnpm" 54 | ENV PATH="$PNPM_HOME:$PATH" 55 | ENV npm_config_unsafe_perm true 56 | 57 | WORKDIR /app 58 | 59 | # Add node user and group again in the final stage 60 | RUN groupadd --system --gid 1001 nodejs && \ 61 | useradd --system --uid 1001 --gid nodejs nodejs 62 | 63 | # Copy built artifacts and dependencies 64 | COPY --from=build /app/dist /app/dist 65 | COPY --from=prod-deps /app/node_modules /app/node_modules 66 | COPY --from=base /app/docker-entrypoint.sh /app/docker-entrypoint.sh 67 | 68 | # Ensure correct ownership of application files 69 | RUN chown -R nodejs:nodejs /app 70 | 71 | # Switch to non-root user 72 | USER nodejs 73 | 74 | # Expose the port the API listens on (value often set via compose) 75 | # Dockerfile EXPOSE is informational; the actual binding happens in compose 76 | EXPOSE 3002 77 | 78 | # Run the entrypoint script 79 | ENTRYPOINT ["/app/docker-entrypoint.sh"] 80 | # CMD is implicitly handled by the entrypoint script based on PROCESS_TYPE -------------------------------------------------------------------------------- /apps/web/src/components/svgs/logo.tsx: -------------------------------------------------------------------------------- 1 | import React from 'react'; 2 | 3 | export default function Logo(props: React.SVGProps) { 4 | return ( 5 | 13 | 20 | 27 | 34 | 41 | 48 | 55 | 62 | 69 | 76 | 83 | 90 | 91 | ); 92 | } 93 | -------------------------------------------------------------------------------- /apps/api/src/lib/concurrency-limit.ts: -------------------------------------------------------------------------------- 1 | import type { JobsOptions } from 'bullmq'; 2 | 3 | import { redisConnection } from '~/services/queue-service'; 4 | 5 | const constructKey = (teamId: string) => `concurrency-limiter:${teamId}`; 6 | const constructQueueKey = (teamId: string) => 7 | `concurrency-limit-queue:${teamId}`; 8 | 9 | export async function cleanOldConcurrencyLimitEntries( 10 | teamId: string, 11 | now: number = Date.now(), 12 | ) { 13 | await redisConnection.zremrangebyscore( 14 | constructKey(teamId), 15 | Number.NEGATIVE_INFINITY, 16 | now, 17 | ); 18 | } 19 | 20 | export async function getConcurrencyLimitActiveJobs( 21 | teamId: string, 22 | now: number = Date.now(), 23 | ): Promise { 24 | return await redisConnection.zrangebyscore( 25 | constructKey(teamId), 26 | now, 27 | Number.POSITIVE_INFINITY, 28 | ); 29 | } 30 | 31 | export async function pushConcurrencyLimitActiveJob( 32 | teamId: string, 33 | id: string, 34 | timeout: number, 35 | now: number = Date.now(), 36 | ) { 37 | await redisConnection.zadd(constructKey(teamId), now + timeout, id); 38 | } 39 | 40 | export async function removeConcurrencyLimitActiveJob( 41 | teamId: string, 42 | id: string, 43 | ) { 44 | await redisConnection.zrem(constructKey(teamId), id); 45 | } 46 | 47 | export type ConcurrencyLimitedJob = { 48 | id: string; 49 | data: any; 50 | opts: JobsOptions; 51 | priority?: number; 52 | }; 53 | 54 | type ZMPopResult = [string, [string, string]]; // [key, [member, score]] 55 | 56 | export async function takeConcurrencyLimitedJob( 57 | teamId: string, 58 | ): Promise { 59 | const res = await redisConnection.zmpop(1, constructQueueKey(teamId), 'MIN'); 60 | if (res === null || res === undefined) { 61 | return null; 62 | } 63 | 64 | // Cast to the expected type after null check 65 | const typedRes = res as ZMPopResult; 66 | return JSON.parse(typedRes[1][0]); 67 | } 68 | 69 | export async function pushConcurrencyLimitedJob( 70 | teamId: string, 71 | job: ConcurrencyLimitedJob, 72 | ) { 73 | await redisConnection.zadd( 74 | constructQueueKey(teamId), 75 | job.priority ?? 1, 76 | JSON.stringify(job), 77 | ); 78 | } 79 | 80 | export async function getConcurrencyLimitedJobs(teamId: string) { 81 | return new Set( 82 | (await redisConnection.zrange(constructQueueKey(teamId), 0, -1)).map( 83 | (x) => JSON.parse(x).id, 84 | ), 85 | ); 86 | } 87 | 88 | export async function getConcurrencyQueueJobsCount( 89 | teamId: string, 90 | ): Promise { 91 | const count = await redisConnection.zcard(constructQueueKey(teamId)); 92 | return count; 93 | } 94 | -------------------------------------------------------------------------------- /apps/api/src/controllers/v1/user.ts: -------------------------------------------------------------------------------- 1 | import { eq } from 'drizzle-orm'; 2 | import type { Request, Response } from 'express'; 3 | import { z } from 'zod'; 4 | 5 | import { db } from '~/db'; 6 | import { apiKeys, users } from '~/db/schema'; 7 | import { createApiKey } from '~/services/api-keys-service'; 8 | 9 | export async function userMeController(req: Request, res: Response) { 10 | const userId = req.userId; 11 | 12 | if (!userId) { 13 | return res.status(200).json({ user: null }); 14 | } 15 | const [user] = await db.select().from(users).where(eq(users.id, userId)); 16 | return res.status(200).json({ user: user }); 17 | } 18 | 19 | export async function userApiKeysController(req: Request, res: Response) { 20 | const userId = req.userId; 21 | 22 | if (!userId) { 23 | return res.status(200).json({ keys: [] }); 24 | } 25 | 26 | const keys = await db 27 | .select() 28 | .from(apiKeys) 29 | .where(eq(apiKeys.userId, userId)); 30 | return res.status(200).json({ keys }); 31 | } 32 | 33 | const createApiKeySchema = z.object({ 34 | name: z.string(), 35 | teamId: z.string(), 36 | }); 37 | 38 | export async function userCreateApiKeyController( 39 | req: Request>, 40 | res: Response, 41 | ) { 42 | const userId = req.userId; 43 | 44 | if (!userId) { 45 | return res.status(401).json({ error: 'Unauthorized' }); 46 | } 47 | 48 | const apiKey = createApiKey(); 49 | 50 | try { 51 | const { name, teamId } = createApiKeySchema.parse(req.body); 52 | 53 | const [key] = await db 54 | .insert(apiKeys) 55 | .values({ 56 | key: apiKey, 57 | userId: userId as string, 58 | teamId, 59 | name, 60 | }) 61 | .returning(); 62 | return res.status(200).json({ key }); 63 | } catch (error) { 64 | console.error(error); 65 | return res.status(500).json({ error: 'Failed to create API key' }); 66 | } 67 | } 68 | 69 | const deleteApiKeySchema = z.object({ 70 | keyId: z.string(), 71 | }); 72 | 73 | export async function userDeleteApiKeyController( 74 | req: Request>, 75 | res: Response, 76 | ) { 77 | const userId = req.userId; 78 | 79 | if (!userId) { 80 | return res.status(401).json({ error: 'Unauthorized' }); 81 | } 82 | 83 | try { 84 | const { keyId } = deleteApiKeySchema.parse(req.body); 85 | 86 | await db.delete(apiKeys).where(eq(apiKeys.id, keyId)); 87 | return res.status(200).json({ message: 'API key deleted', keyId }); 88 | } catch (error) { 89 | console.error(error); 90 | return res.status(500).json({ error: 'Failed to delete API key' }); 91 | } 92 | } 93 | -------------------------------------------------------------------------------- /apps/api/src/core/treeSitter/queries/queryJavascript.ts: -------------------------------------------------------------------------------- 1 | export const queryJavascript = ` 2 | (comment) @comment 3 | 4 | ( 5 | (comment)* @doc 6 | . 7 | (method_definition 8 | name: (property_identifier) @name.definition.method) @definition.method 9 | (#not-eq? @name.definition.method "constructor") 10 | (#strip! @doc "^[\\s\\*/]+|^[\\s\\*/]$") 11 | (#select-adjacent! @doc @definition.method) 12 | ) 13 | 14 | ( 15 | (comment)* @doc 16 | . 17 | [ 18 | (class 19 | name: (_) @name.definition.class) 20 | (class_declaration 21 | name: (_) @name.definition.class) 22 | ] @definition.class 23 | (#strip! @doc "^[\\s\\*/]+|^[\\s\\*/]$") 24 | (#select-adjacent! @doc @definition.class) 25 | ) 26 | 27 | ( 28 | (comment)* @doc 29 | . 30 | [ 31 | (function_declaration 32 | name: (identifier) @name.definition.function) 33 | (generator_function 34 | name: (identifier) @name.definition.function) 35 | (generator_function_declaration 36 | name: (identifier) @name.definition.function) 37 | ] @definition.function 38 | (#strip! @doc "^[\\s\\*/]+|^[\\s\\*/]$") 39 | (#select-adjacent! @doc @definition.function) 40 | ) 41 | 42 | ( 43 | (comment)* @doc 44 | . 45 | (lexical_declaration 46 | (variable_declarator 47 | name: (identifier) @name.definition.function 48 | value: [(arrow_function) (function_declaration)]) @definition.function) 49 | (#strip! @doc "^[\\s\\*/]+|^[\\s\\*/]$") 50 | (#select-adjacent! @doc @definition.function) 51 | ) 52 | 53 | ( 54 | (comment)* @doc 55 | . 56 | (variable_declaration 57 | (variable_declarator 58 | name: (identifier) @name.definition.function 59 | value: [(arrow_function) (function_declaration)]) @definition.function) 60 | (#strip! @doc "^[\\s\\*/]+|^[\\s\\*/]$") 61 | (#select-adjacent! @doc @definition.function) 62 | ) 63 | 64 | (assignment_expression 65 | left: [ 66 | (identifier) @name.definition.function 67 | (member_expression 68 | property: (property_identifier) @name.definition.function) 69 | ] 70 | right: [(arrow_function) (function_declaration)] 71 | ) @definition.function 72 | 73 | (pair 74 | key: (property_identifier) @name.definition.function 75 | value: [(arrow_function) (function_declaration)]) @definition.function 76 | 77 | ( 78 | (call_expression 79 | function: (identifier) @name.reference.call) @reference.call 80 | (#not-match? @name.reference.call "^(require)$") 81 | ) 82 | 83 | (call_expression 84 | function: (member_expression 85 | property: (property_identifier) @name.reference.call) 86 | arguments: (_) @reference.call) 87 | 88 | (new_expression 89 | constructor: (_) @name.reference.class) @reference.class 90 | `; 91 | -------------------------------------------------------------------------------- /apps/api/src/services/rate-limiter.ts: -------------------------------------------------------------------------------- 1 | import { RateLimiterRedis } from 'rate-limiter-flexible'; 2 | import Redis from 'ioredis'; 3 | 4 | import type { PlanType, RateLimiterMode } from '~/types'; 5 | 6 | export const CONCURRENCY_LIMIT: Omit, ''> = { 7 | free: 2, 8 | hobby: 5, 9 | growth: 100, 10 | scale: 500, 11 | standard: 50, 12 | }; 13 | 14 | const RATE_LIMITS = { 15 | crawl: { 16 | default: 15, 17 | free: 5, 18 | standard: 25, 19 | scale: 250, 20 | hobby: 15, 21 | growth: 250, 22 | }, 23 | crawlStatus: { 24 | free: 500, 25 | default: 25000, 26 | }, 27 | search: { 28 | default: 100, 29 | free: 5, 30 | standard: 250, 31 | scale: 2500, 32 | hobby: 50, 33 | growth: 2500, 34 | }, 35 | account: { 36 | free: 100, 37 | default: 500, 38 | }, 39 | testSuite: { 40 | free: 10000, 41 | default: 50000, 42 | }, 43 | }; 44 | 45 | export const redisRateLimitClient = new Redis( 46 | process.env.REDIS_RATE_LIMIT_URL as string, 47 | ); 48 | 49 | const createRateLimiter = (keyPrefix?: string, points?: number) => 50 | new RateLimiterRedis({ 51 | storeClient: redisRateLimitClient, 52 | keyPrefix, 53 | points, 54 | duration: 60, // duration in seconds 55 | }); 56 | 57 | export const serverRateLimiter = createRateLimiter( 58 | 'server', 59 | RATE_LIMITS.account.default, 60 | ); 61 | 62 | export const testSuiteRateLimiter = new RateLimiterRedis({ 63 | storeClient: redisRateLimitClient, 64 | keyPrefix: 'test-suite', 65 | points: 100, 66 | }); 67 | 68 | function makePlanKey(plan?: string) { 69 | return plan ? plan.replace('-', '') : 'default'; 70 | } 71 | 72 | export function getRateLimiterPoints( 73 | mode: RateLimiterMode, 74 | _token?: string, 75 | plan?: string, 76 | _teamId?: string, 77 | ) { 78 | const rateLimitConfig = RATE_LIMITS[mode]; 79 | 80 | if (!rateLimitConfig) return RATE_LIMITS.account.default; 81 | 82 | const key = makePlanKey(plan); 83 | const points: number = 84 | rateLimitConfig[key as keyof typeof rateLimitConfig] ?? 85 | rateLimitConfig.default; 86 | 87 | return points; 88 | } 89 | 90 | export function getRateLimiter( 91 | mode: RateLimiterMode, 92 | token?: string, 93 | plan?: string, 94 | teamId?: string, 95 | ): RateLimiterRedis { 96 | return createRateLimiter( 97 | `${mode}-${makePlanKey(plan)}`, 98 | getRateLimiterPoints(mode, token, plan, teamId), 99 | ); 100 | } 101 | 102 | export function getConcurrencyLimitMax( 103 | plan: PlanType, 104 | _teamId?: string, 105 | ): number { 106 | return CONCURRENCY_LIMIT[plan] ?? 10; 107 | } 108 | -------------------------------------------------------------------------------- /packages/sdk/.gitignore: -------------------------------------------------------------------------------- 1 | # Logs 2 | logs 3 | *.log 4 | npm-debug.log* 5 | yarn-debug.log* 6 | yarn-error.log* 7 | lerna-debug.log* 8 | .pnpm-debug.log* 9 | 10 | # Diagnostic reports (https://nodejs.org/api/report.html) 11 | report.[0-9]*.[0-9]*.[0-9]*.[0-9]*.json 12 | 13 | # Runtime data 14 | pids 15 | *.pid 16 | *.seed 17 | *.pid.lock 18 | 19 | # Directory for instrumented libs generated by jscoverage/JSCover 20 | lib-cov 21 | 22 | # Coverage directory used by tools like istanbul 23 | coverage 24 | *.lcov 25 | 26 | # nyc test coverage 27 | .nyc_output 28 | 29 | # Grunt intermediate storage (https://gruntjs.com/creating-plugins#storing-task-files) 30 | .grunt 31 | 32 | # Bower dependency directory (https://bower.io/) 33 | bower_components 34 | 35 | # node-waf configuration 36 | .lock-wscript 37 | 38 | # Compiled binary addons (https://nodejs.org/api/addons.html) 39 | build/Release 40 | 41 | # Dependency directories 42 | node_modules/ 43 | jspm_packages/ 44 | 45 | # Snowpack dependency directory (https://snowpack.dev/) 46 | web_modules/ 47 | 48 | # TypeScript cache 49 | *.tsbuildinfo 50 | 51 | # Optional npm cache directory 52 | .npm 53 | 54 | # Optional eslint cache 55 | .eslintcache 56 | 57 | # Optional stylelint cache 58 | .stylelintcache 59 | 60 | # Microbundle cache 61 | .rpt2_cache/ 62 | .rts2_cache_cjs/ 63 | .rts2_cache_es/ 64 | .rts2_cache_umd/ 65 | 66 | # Optional REPL history 67 | .node_repl_history 68 | 69 | # Output of 'npm pack' 70 | *.tgz 71 | 72 | # Yarn Integrity file 73 | .yarn-integrity 74 | 75 | # dotenv environment variable files 76 | .env 77 | .env.development.local 78 | .env.test.local 79 | .env.production.local 80 | .env.local 81 | 82 | # parcel-bundler cache (https://parceljs.org/) 83 | .cache 84 | .parcel-cache 85 | 86 | # Next.js build output 87 | .next 88 | out 89 | 90 | # Nuxt.js build / generate output 91 | .nuxt 92 | dist 93 | 94 | # Gatsby files 95 | .cache/ 96 | # Comment in the public line in if your project uses Gatsby and not Next.js 97 | # https://nextjs.org/blog/next-9-1#public-directory-support 98 | # public 99 | 100 | # vuepress build output 101 | .vuepress/dist 102 | 103 | # vuepress v2.x temp and cache directory 104 | .temp 105 | .cache 106 | 107 | # Docusaurus cache and generated files 108 | .docusaurus 109 | 110 | # Serverless directories 111 | .serverless/ 112 | 113 | # FuseBox cache 114 | .fusebox/ 115 | 116 | # DynamoDB Local files 117 | .dynamodb/ 118 | 119 | # TernJS port file 120 | .tern-port 121 | 122 | # Stores VSCode versions used for testing VSCode extensions 123 | .vscode-test 124 | 125 | # yarn v2 126 | .yarn/cache 127 | .yarn/unplugged 128 | .yarn/build-state.yml 129 | .yarn/install-state.gz 130 | .pnp.* 131 | 132 | build -------------------------------------------------------------------------------- /apps/api/src/core/treeSitter/queries/README.md: -------------------------------------------------------------------------------- 1 | 2 | # Credits 3 | Codecrawl uses modified versions of tree-sitter queries from Aider and Cline: 4 | * [https://github.com/Aider-AI/aider](https://github.com/Aider-AI/aider) — licensed under the Apache License 2.0. 5 | * [https://github.com/cline/cline](https://github.com/cline/cline) — licensed under the Apache License 2.0. 6 | 7 | Aider uses modified versions of the tags.scm files from these open source 8 | tree-sitter language implementations: 9 | 10 | * [https://github.com/tree-sitter/tree-sitter-c](https://github.com/tree-sitter/tree-sitter-c) — licensed under the MIT License. 11 | * [https://github.com/tree-sitter/tree-sitter-c-sharp](https://github.com/tree-sitter/tree-sitter-c-sharp) — licensed under the MIT License. 12 | * [https://github.com/tree-sitter/tree-sitter-cpp](https://github.com/tree-sitter/tree-sitter-cpp) — licensed under the MIT License. 13 | * [https://github.com/Wilfred/tree-sitter-elisp](https://github.com/Wilfred/tree-sitter-elisp) — licensed under the MIT License. 14 | * [https://github.com/elixir-lang/tree-sitter-elixir](https://github.com/elixir-lang/tree-sitter-elixir) — licensed under the Apache License, Version 2.0. 15 | * [https://github.com/elm-tooling/tree-sitter-elm](https://github.com/elm-tooling/tree-sitter-elm) — licensed under the MIT License. 16 | * [https://github.com/tree-sitter/tree-sitter-go](https://github.com/tree-sitter/tree-sitter-go) — licensed under the MIT License. 17 | * [https://github.com/tree-sitter/tree-sitter-java](https://github.com/tree-sitter/tree-sitter-java) — licensed under the MIT License. 18 | * [https://github.com/tree-sitter/tree-sitter-javascript](https://github.com/tree-sitter/tree-sitter-javascript) — licensed under the MIT License. 19 | * [https://github.com/tree-sitter/tree-sitter-ocaml](https://github.com/tree-sitter/tree-sitter-ocaml) — licensed under the MIT License. 20 | * [https://github.com/tree-sitter/tree-sitter-php](https://github.com/tree-sitter/tree-sitter-php) — licensed under the MIT License. 21 | * [https://github.com/tree-sitter/tree-sitter-python](https://github.com/tree-sitter/tree-sitter-python) — licensed under the MIT License. 22 | * [https://github.com/tree-sitter/tree-sitter-ql](https://github.com/tree-sitter/tree-sitter-ql) — licensed under the MIT License. 23 | * [https://github.com/r-lib/tree-sitter-r](https://github.com/r-lib/tree-sitter-r) — licensed under the MIT License. 24 | * [https://github.com/tree-sitter/tree-sitter-ruby](https://github.com/tree-sitter/tree-sitter-ruby) — licensed under the MIT License. 25 | * [https://github.com/tree-sitter/tree-sitter-rust](https://github.com/tree-sitter/tree-sitter-rust) — licensed under the MIT License. 26 | * [https://github.com/tree-sitter/tree-sitter-typescript](https://github.com/tree-sitter/tree-sitter-typescript) — licensed under the MIT License. -------------------------------------------------------------------------------- /apps/api/src/services/queue-service.ts: -------------------------------------------------------------------------------- 1 | import { Queue } from 'bullmq'; 2 | import IORedis from 'ioredis'; 3 | import { logger } from '~/lib/logger'; 4 | 5 | export type QueueFunction = () => Queue; 6 | 7 | let crawlQueue: Queue; 8 | let generateLlmsTxtQueue: Queue; 9 | let billingQueue: Queue; 10 | let treeQueue: Queue; 11 | 12 | if (!process.env.REDIS_URL) { 13 | throw new Error('REDIS_URL environment variable is missing...'); 14 | } 15 | 16 | export const redisConnection = new IORedis(process.env.REDIS_URL, { 17 | maxRetriesPerRequest: null, 18 | }); 19 | 20 | export const indexStoreQueueName = '{indexQueue}'; 21 | export const treeQueueName = '{treeQueue}'; 22 | export const generateLlmsTxtQueueName = '{generateLlmsTextQueue}'; 23 | export const crawlQueueName = '{crawlQueue}'; 24 | export const billingQueueName = '{billingQueue}'; 25 | 26 | export function getCrawlQueue() { 27 | if (!crawlQueue) { 28 | crawlQueue = new Queue(crawlQueueName, { 29 | connection: redisConnection, 30 | defaultJobOptions: { 31 | removeOnComplete: { 32 | age: 10800, // 3 hours 33 | }, 34 | removeOnFail: { 35 | age: 10800, // 3 hours 36 | }, 37 | }, 38 | }); 39 | logger.info('Codebase crawling queue created'); 40 | } 41 | return crawlQueue; 42 | } 43 | 44 | export function getGenerateLlmsTxtQueue() { 45 | if (!generateLlmsTxtQueue) { 46 | generateLlmsTxtQueue = new Queue(generateLlmsTxtQueueName, { 47 | connection: redisConnection, 48 | defaultJobOptions: { 49 | removeOnComplete: { 50 | age: 90000, // 25 hours 51 | }, 52 | removeOnFail: { 53 | age: 90000, // 25 hours 54 | }, 55 | }, 56 | }); 57 | logger.info('LLMs TXT generation queue created'); 58 | } 59 | return generateLlmsTxtQueue; 60 | } 61 | 62 | export function getBillingQueue() { 63 | if (!billingQueue) { 64 | billingQueue = new Queue(billingQueueName, { 65 | connection: redisConnection, 66 | defaultJobOptions: { 67 | removeOnComplete: { 68 | age: 90000, // 25 hours 69 | }, 70 | removeOnFail: { 71 | age: 90000, // 25 hours 72 | }, 73 | }, 74 | }); 75 | logger.info('Billing queue created'); 76 | } 77 | return billingQueue; 78 | } 79 | 80 | export function getGenerateTreeQueue() { 81 | if (!treeQueue) { 82 | treeQueue = new Queue(treeQueueName, { 83 | connection: redisConnection, 84 | defaultJobOptions: { 85 | removeOnComplete: { 86 | age: 90000, // 25 hours 87 | }, 88 | removeOnFail: { 89 | age: 90000, // 25 hours 90 | }, 91 | }, 92 | }); 93 | logger.info('Tree generation queue created'); 94 | } 95 | return treeQueue; 96 | } 97 | -------------------------------------------------------------------------------- /apps/api/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "@codecrawl/api", 3 | "version": "1.0.0", 4 | "description": "", 5 | "main": "index.js", 6 | "scripts": { 7 | "dev": "concurrently \"npm run watch-compile\" \"npm run watch-dev\"", 8 | "watch-compile": "swc src -w --out-dir dist", 9 | "watch-dev": "nodemon --watch \"dist/**/*\" -e js ./dist/src/index.js", 10 | "build": "swc src -d dist", 11 | "start": "NODE_ENV=production node dist/src/index.js", 12 | "workers": "nodemon --exec node dist/src/services/queue-worker.js", 13 | "worker:prod": "node dist/src/services/queue-worker.js", 14 | "db:gen": "drizzle-kit generate", 15 | "db:migrate": "drizzle-kit migrate" 16 | }, 17 | "keywords": [], 18 | "author": "", 19 | "license": "ISC", 20 | "dependencies": { 21 | "@bull-board/api": "^6.8.2", 22 | "@bull-board/express": "^6.8.2", 23 | "@types/bcrypt": "^5.0.2", 24 | "@types/jsonwebtoken": "^9.0.9", 25 | "ai": "^4.2.10", 26 | "argon2": "^0.41.1", 27 | "async": "^3.2.6", 28 | "async-mutex": "^0.5.0", 29 | "axios": "^1.8.4", 30 | "bcrypt": "^5.1.1", 31 | "better-auth": "^1.2.5", 32 | "bullmq": "^5.47.2", 33 | "cacheable-lookup": "^7.0.0", 34 | "cors": "^2.8.5", 35 | "date-fns": "^4.1.0", 36 | "dotenv": "^16.4.7", 37 | "dotenv-cli": "^8.0.0", 38 | "drizzle-orm": "^0.41.0", 39 | "express": "^5.1.0", 40 | "express-ws": "^5.0.2", 41 | "fast-xml-parser": "^5.2.0", 42 | "git-url-parse": "^16.0.1", 43 | "glob": "^11.0.1", 44 | "globby": "^14.1.0", 45 | "gpt3-tokenizer": "^1.1.5", 46 | "handlebars": "^4.7.8", 47 | "iconv-lite": "^0.6.3", 48 | "ioredis": "^5.6.0", 49 | "istextorbinary": "^9.5.0", 50 | "jschardet": "^3.1.4", 51 | "jsonwebtoken": "^9.0.2", 52 | "koffi": "^2.10.1", 53 | "minimatch": "^10.0.1", 54 | "piscina": "^4.9.2", 55 | "postgres": "^3.4.5", 56 | "rate-limiter-flexible": "^7.0.0", 57 | "strip-comments": "^2.0.1", 58 | "systeminformation": "^5.25.11", 59 | "tiktoken": "^1.0.20", 60 | "tree-sitter-wasms": "^0.1.12", 61 | "uuid": "^11.1.0", 62 | "web-tree-sitter": "^0.25.3", 63 | "winston": "^3.17.0", 64 | "winston-transport": "^4.9.0", 65 | "ws": "^8.18.1", 66 | "zod": "^3.24.2" 67 | }, 68 | "devDependencies": { 69 | "@jest/globals": "^29.7.0", 70 | "@swc/cli": "^0.7.3", 71 | "@swc/core": "^1.11.22", 72 | "@types/cors": "^2.8.17", 73 | "@types/express": "^5.0.1", 74 | "@types/express-ws": "^3.0.5", 75 | "@types/git-url-parse": "^9.0.3", 76 | "@types/node": "^22.14.0", 77 | "@types/strip-comments": "^2.0.4", 78 | "@types/supertest": "^6.0.3", 79 | "concurrently": "^9.1.2", 80 | "drizzle-kit": "^0.30.6", 81 | "nodemon": "^3.1.9", 82 | "supertest": "^7.1.0", 83 | "ts-jest": "^29.3.1", 84 | "ts-node": "^10.9.2", 85 | "typescript": "^5.8.2" 86 | } 87 | } 88 | -------------------------------------------------------------------------------- /apps/api/src/db/migrations/0000_typical_kylun.sql: -------------------------------------------------------------------------------- 1 | CREATE TABLE "api_keys" ( 2 | "id" uuid PRIMARY KEY DEFAULT gen_random_uuid() NOT NULL, 3 | "name" varchar(255) NOT NULL, 4 | "key" varchar(255) NOT NULL, 5 | "user_id" uuid NOT NULL, 6 | "team_id" uuid NOT NULL, 7 | "is_active" boolean DEFAULT true NOT NULL, 8 | "created_at" timestamp with time zone DEFAULT now() NOT NULL, 9 | CONSTRAINT "api_keys_key_unique" UNIQUE("key") 10 | ); 11 | --> statement-breakpoint 12 | CREATE TABLE "llms_txts" ( 13 | "id" uuid PRIMARY KEY DEFAULT gen_random_uuid() NOT NULL, 14 | "repo_url" text NOT NULL, 15 | "llmstxt" text NOT NULL, 16 | "max_urls" integer DEFAULT 1, 17 | "llmstxt_full" text, 18 | "created_at" timestamp DEFAULT now(), 19 | "updated_at" timestamp DEFAULT now() 20 | ); 21 | --> statement-breakpoint 22 | CREATE TABLE "users" ( 23 | "id" uuid PRIMARY KEY DEFAULT gen_random_uuid() NOT NULL, 24 | "email" varchar(255) NOT NULL, 25 | "hashed_password" text, 26 | "google_id" varchar(255), 27 | "github_id" varchar(255), 28 | "token_version" integer DEFAULT 0, 29 | "created_at" timestamp with time zone DEFAULT now(), 30 | CONSTRAINT "users_email_unique" UNIQUE("email") 31 | ); 32 | --> statement-breakpoint 33 | CREATE TABLE "team_members" ( 34 | "id" uuid PRIMARY KEY DEFAULT gen_random_uuid() NOT NULL, 35 | "team_id" uuid NOT NULL, 36 | "user_id" uuid NOT NULL, 37 | "created_at" timestamp with time zone DEFAULT now() NOT NULL 38 | ); 39 | --> statement-breakpoint 40 | CREATE TABLE "teams" ( 41 | "id" uuid PRIMARY KEY DEFAULT gen_random_uuid() NOT NULL, 42 | "name" varchar(255) NOT NULL, 43 | "created_at" timestamp with time zone DEFAULT now() NOT NULL, 44 | "updated_at" timestamp with time zone DEFAULT now() NOT NULL 45 | ); 46 | --> statement-breakpoint 47 | ALTER TABLE "api_keys" ADD CONSTRAINT "api_keys_user_id_users_id_fk" FOREIGN KEY ("user_id") REFERENCES "public"."users"("id") ON DELETE no action ON UPDATE no action;--> statement-breakpoint 48 | ALTER TABLE "api_keys" ADD CONSTRAINT "api_keys_team_id_teams_id_fk" FOREIGN KEY ("team_id") REFERENCES "public"."teams"("id") ON DELETE no action ON UPDATE no action;--> statement-breakpoint 49 | ALTER TABLE "team_members" ADD CONSTRAINT "team_members_team_id_teams_id_fk" FOREIGN KEY ("team_id") REFERENCES "public"."teams"("id") ON DELETE no action ON UPDATE no action;--> statement-breakpoint 50 | ALTER TABLE "team_members" ADD CONSTRAINT "team_members_user_id_users_id_fk" FOREIGN KEY ("user_id") REFERENCES "public"."users"("id") ON DELETE no action ON UPDATE no action;--> statement-breakpoint 51 | CREATE INDEX "api_keys_user_id_idx" ON "api_keys" USING btree ("user_id");--> statement-breakpoint 52 | CREATE INDEX "users_email_idx" ON "users" USING btree ("email");--> statement-breakpoint 53 | CREATE INDEX "team_members_team_id_idx" ON "team_members" USING btree ("team_id");--> statement-breakpoint 54 | CREATE INDEX "team_members_user_id_idx" ON "team_members" USING btree ("user_id"); -------------------------------------------------------------------------------- /apps/api/src/controllers/v1/auth.ts: -------------------------------------------------------------------------------- 1 | import { eq } from 'drizzle-orm'; 2 | import argon2 from 'argon2'; 3 | import type { Request, Response } from 'express'; 4 | import { z } from 'zod'; 5 | import { db } from '~/db'; 6 | import { apiKeys, teamMembers, teams, users } from '~/db/schema'; 7 | import { createTokens } from '~/services/jwt-service'; 8 | import { createApiKey } from '~/services/api-keys-service'; 9 | 10 | const loginSchema = z.object({ 11 | email: z.string().email(), 12 | password: z.string(), 13 | }); 14 | 15 | export const login = async (req: Request, res: Response) => { 16 | try { 17 | const { email, password } = loginSchema.parse(req.body); 18 | 19 | const user = await db.query.users.findFirst({ 20 | where: eq(users.email, email), 21 | }); 22 | 23 | if (!user) { 24 | return res.status(400).json({ error: 'User not found' }); 25 | } 26 | 27 | const isPasswordValid = await argon2.verify( 28 | user.hashedPassword as string, 29 | password, 30 | ); 31 | 32 | if (!isPasswordValid) { 33 | return res.status(400).json({ error: 'Invalid password' }); 34 | } 35 | 36 | const tokens = createTokens(user); 37 | 38 | return res.status(200).json({ 39 | success: true, 40 | tokens, 41 | }); 42 | } catch (error) { 43 | console.error(error); 44 | return res.status(400).json({ error: 'Invalid request' }); 45 | } 46 | }; 47 | 48 | export const register = async (req: Request, res: Response) => { 49 | const { email, password } = req.body; 50 | 51 | let user = await db.query.users.findFirst({ 52 | where: eq(users.email, email), 53 | }); 54 | 55 | if (user) { 56 | return res.status(400).json({ error: 'User already exists' }); 57 | } 58 | 59 | const hashedPassword = await argon2.hash(password); 60 | 61 | user = await db.transaction(async (tx) => { 62 | const apiKey = createApiKey(); 63 | 64 | const [team] = await tx 65 | .insert(teams) 66 | .values({ 67 | name: 'Personal Team', 68 | }) 69 | .returning(); 70 | 71 | const [u] = await tx 72 | .insert(users) 73 | .values({ 74 | email, 75 | hashedPassword, 76 | }) 77 | .returning(); 78 | 79 | await tx.insert(teamMembers).values({ 80 | userId: u?.id as string, 81 | teamId: team?.id as string, 82 | }); 83 | 84 | await tx.insert(apiKeys).values({ 85 | key: apiKey, 86 | userId: u?.id as string, 87 | teamId: team?.id as string, 88 | name: 'Default', 89 | }); 90 | 91 | return u; 92 | }); 93 | 94 | if (!user) { 95 | return res.json({ error: 'Internal Server Errror' }).status(500); 96 | } 97 | 98 | const tokens = createTokens(user); 99 | 100 | return res.status(201).json({ 101 | success: true, 102 | tokens, 103 | }); 104 | }; 105 | --------------------------------------------------------------------------------