├── .nvmrc ├── version.ts ├── src ├── app │ ├── index.ts │ ├── scoreQA.ts │ ├── getScore.ts │ └── compareLib.ts ├── config │ ├── header.ts │ └── options.ts ├── services │ ├── context7.ts │ ├── llmUtils.ts │ ├── prompts │ │ ├── handler.ts │ │ └── templates.ts │ ├── llmEval.ts │ └── questionEval.ts ├── reports │ ├── human.ts │ └── machine.ts └── lib │ ├── textEval.ts │ ├── utils.ts │ ├── types.ts │ └── textMetrics.ts ├── .gitignore ├── tests ├── compareTester.ts ├── individualTester.ts ├── QATester.ts ├── textMetricsTester.ts └── testContext.txt ├── tsconfig.json ├── package.json ├── .github └── workflows │ └── release.yml └── README.md /.nvmrc: -------------------------------------------------------------------------------- 1 | 24 2 | -------------------------------------------------------------------------------- /version.ts: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /src/app/index.ts: -------------------------------------------------------------------------------- 1 | export { getScore } from "./getScore.js"; 2 | export { compareLibraries } from "./compareLib.js"; 3 | export { scoreQA } from "./scoreQA.js"; 4 | export * from "../lib/types.js"; 5 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Environment variables 2 | .env 3 | .env.local 4 | .env.*.local 5 | 6 | # Python 7 | .venv 8 | .venv/ 9 | __pycache__/ 10 | .DS_Store 11 | 12 | # Dependencies 13 | node_modules/ 14 | npm-debug.log* 15 | yarn-debug.log* 16 | yarn-error.log* 17 | yarn.lock 18 | 19 | # TypeScript 20 | *.tsbuildinfo 21 | dist/ 22 | build/ 23 | out/ 24 | 25 | # Cache 26 | .cache/ 27 | .npm/ 28 | 29 | compare-results/ 30 | individual-results/ 31 | results/ 32 | benchmark-questions/ 33 | gemini-key.json -------------------------------------------------------------------------------- /src/config/header.ts: -------------------------------------------------------------------------------- 1 | import { HeaderConfig } from "../lib/types.js"; 2 | 3 | /** 4 | * Builds the header config for the Context7 API 5 | * @param context7ApiToken - The Context7 API token 6 | * @returns The header config 7 | */ 8 | export const buildContext7Header = (context7ApiToken: string): HeaderConfig | {} => { 9 | let headerConfig = {}; 10 | headerConfig = { 11 | headers: { 12 | "Authorization": "Bearer " + context7ApiToken 13 | } 14 | } 15 | return headerConfig; 16 | } 17 | -------------------------------------------------------------------------------- /src/config/options.ts: -------------------------------------------------------------------------------- 1 | /** 2 | * Default configuration options for the score evaluation 3 | */ 4 | export const defaultConfigOptions = { 5 | report: { 6 | console: true, 7 | humanReadable: false, 8 | returnScore: false, 9 | }, 10 | weights: { 11 | question: 0.8, 12 | llm: 0.05, 13 | formatting: 0.05, 14 | metadata: 0.05, 15 | initialization: 0.05, 16 | }, 17 | llm: { 18 | temperature: 0, 19 | topP: 0.1, 20 | topK: 1, 21 | candidateCount: 1, 22 | seed: 42, 23 | } 24 | } 25 | 26 | -------------------------------------------------------------------------------- /tests/compareTester.ts: -------------------------------------------------------------------------------- 1 | import { fileURLToPath } from "url"; 2 | import { compareLibraries } from "../src/app/compareLib.ts"; 3 | 4 | async function main() { 5 | 6 | console.log("🧪 Running compare tester...") 7 | const libraries = [ 8 | "tailwindlabs/tailwindcss.com", 9 | "websites/tailwindcss-com_vercel_app", 10 | ]; 11 | 12 | // Questions to test functionality 13 | const questions = `1. How can I install rust? 14 | 2. How can I install Tailwind CSS v4.1 using npm?`; 15 | 16 | await compareLibraries(libraries[0], libraries[1], questions, { 17 | report: { 18 | console: true 19 | } 20 | }); 21 | } 22 | 23 | if (process.argv[1] === fileURLToPath(import.meta.url)) { 24 | void main(); 25 | } -------------------------------------------------------------------------------- /tsconfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "compilerOptions": { 3 | "target": "ES2020", 4 | "module": "NodeNext", 5 | "moduleResolution": "NodeNext", 6 | "esModuleInterop": true, 7 | "strict": true, 8 | "outDir": "dist", 9 | "rootDir": "src", 10 | "declaration": true, 11 | "sourceMap": true, 12 | "types": [ 13 | "node" 14 | ] 15 | }, 16 | "include": [ 17 | "src/**/*", 18 | ], 19 | "exclude": [ 20 | "node_modules", 21 | "dist", 22 | "repos", 23 | "output", 24 | "projects", 25 | "archive", 26 | "temp", 27 | "tests" 28 | ], 29 | "ts-node": { 30 | "esm": true, 31 | "moduleTypes": { 32 | "**/*": "esm" 33 | } 34 | } 35 | } -------------------------------------------------------------------------------- /tests/individualTester.ts: -------------------------------------------------------------------------------- 1 | import { getScore } from "../src/app/getScore.ts"; 2 | import { fileURLToPath } from "url"; 3 | 4 | 5 | async function main() { 6 | 7 | const library = "/websites/python_langchain"; 8 | 9 | // Questions to test functionality. First question is answerable, the second is not. 10 | const questions = `1. What is a selector, and how do I use it? 11 | 2. What is the input token limit for Gemini 2.5 Pro?`; 12 | 13 | try { 14 | console.log(`Working on ${library}`) 15 | await getScore( 16 | library, 17 | questions, 18 | { 19 | report: { 20 | console: true 21 | } 22 | }); 23 | 24 | } catch (error) { 25 | console.error(`${library} error: ${error}`); 26 | } 27 | } 28 | 29 | if (process.argv[1] === fileURLToPath(import.meta.url)) { 30 | void main(); 31 | } 32 | 33 | -------------------------------------------------------------------------------- /tests/QATester.ts: -------------------------------------------------------------------------------- 1 | import { scoreQA } from "../src/app/scoreQA.ts"; 2 | import { fileURLToPath } from "url"; 3 | import { config } from 'dotenv'; 4 | import axios from "axios"; 5 | 6 | config(); 7 | 8 | export async function getSnippets(library: string): Promise { 9 | const context7Url = `https://context7.com/api/v1/${library}?tokens=10000` 10 | const response = await axios.get(context7Url, { headers: { 11 | "Authorization": "Bearer " + process.env.CONTEXT7_API_TOKEN 12 | }}); 13 | const snippet_title = "=".repeat(24) + "\nCODE SNIPPETS\n" + "=".repeat(24); 14 | const snippets = response.data.replace(snippet_title, ""); 15 | return snippets; 16 | } 17 | 18 | async function main() { 19 | const context = await getSnippets("langchain-ai/langgraph"); 20 | 21 | const { 22 | questionAverageScore, 23 | questionExplanation 24 | } = await scoreQA( 25 | "How can I install LangChain Core?", 26 | context 27 | ); 28 | console.log(`Score: ${questionAverageScore}`) 29 | console.log(`Explanation: ${questionExplanation}`) 30 | } 31 | 32 | if (process.argv[1] === fileURLToPath(import.meta.url)) { 33 | void main(); 34 | } 35 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "@upstash/c7score", 3 | "version": "2.0.0", 4 | "description": "Evaluates the quality of code snippets.", 5 | "type": "module", 6 | "main": "dist/app/index.js", 7 | "types": "dist/app/index.d.ts", 8 | "scripts": { 9 | "start": "node dist/app/index.js", 10 | "build": "tsc", 11 | "prepare": "npm run build", 12 | "test": "npm run test-individual && npm run test-compare && npm run test-text-metrics", 13 | "test-individual": "tsx tests/individualTester.ts", 14 | "test-compare": "tsx tests/compareTester.ts", 15 | "test-text-metrics": "tsx tests/textMetricsTester.ts", 16 | "test-qascore": "tsx tests/QATester.ts" 17 | }, 18 | "repository": { 19 | "type": "git", 20 | "url": "git+https://github.com/upstash/c7score" 21 | }, 22 | "keywords": [], 23 | "author": "Shannon Rumsey", 24 | "license": "ISC", 25 | "bugs": { 26 | "url": "https://github.com/upstash/c7score/issues" 27 | }, 28 | "homepage": "https://github.com/upstash/c7score", 29 | "dependencies": { 30 | "@eslint/js": "^9.35.0", 31 | "@google/genai": "^1.9.0", 32 | "axios": "^1.7.2", 33 | "commander": "^12.1.0", 34 | "dotenv": "^16.4.5", 35 | "eslint": "^9.35.0", 36 | "exponential-backoff": "^3.1.2", 37 | "fast-fuzzy": "^1.12.0" 38 | }, 39 | "devDependencies": { 40 | "@types/node": "^24.0.14", 41 | "ts-node": "^10.9.2", 42 | "tsx": "^4.20.5", 43 | "typescript": "^5.8.3" 44 | } 45 | } 46 | -------------------------------------------------------------------------------- /.github/workflows/release.yml: -------------------------------------------------------------------------------- 1 | # This workflow will run tests using node and then publish a package to GitHub Packages when a release is created 2 | # For more information see: https://docs.github.com/en/actions/publishing-packages/publishing-nodejs-packages 3 | 4 | name: Release 5 | 6 | on: 7 | release: 8 | types: [published] 9 | 10 | jobs: 11 | build: 12 | runs-on: ubuntu-latest 13 | steps: 14 | - uses: actions/checkout@v5 15 | - uses: actions/setup-node@v5 16 | with: 17 | node-version: 24 18 | 19 | - name: Install dependencies 20 | run: npm ci 21 | 22 | - name: Build 23 | run: npm run build 24 | 25 | publish-npm: 26 | needs: build 27 | runs-on: ubuntu-latest 28 | steps: 29 | - uses: actions/checkout@v5 30 | - uses: actions/setup-node@v5 31 | with: 32 | node-version: 24 33 | registry-url: https://registry.npmjs.org/ 34 | 35 | - name: Set env 36 | run: echo "VERSION=${GITHUB_REF#refs/tags/v}" >> $GITHUB_ENV 37 | 38 | - name: Set package version 39 | id: set-version 40 | run: | 41 | echo $(jq --arg v "${{ env.VERSION }}" '(.version) = $v' package.json) > package.json 42 | echo "export const VERSION='${{ env.VERSION }}'" > ./version.ts 43 | 44 | - name: Set env 45 | run: echo "VERSION=${GITHUB_REF#refs/*/}" >> $GITHUB_ENV 46 | 47 | - name: Install dependencies 48 | run: npm ci 49 | 50 | - name: Publish 51 | run: npm publish --access public 52 | env: 53 | NODE_AUTH_TOKEN: ${{secrets.NPM_TOKEN}} 54 | -------------------------------------------------------------------------------- /src/services/context7.ts: -------------------------------------------------------------------------------- 1 | import axios, { AxiosError } from 'axios'; 2 | 3 | /** 4 | * Checks if the library has any redirects 5 | * @param library - The library to check 6 | * @returns The redirected library if it exists, otherwise the original library 7 | */ 8 | export async function checkRedirects(library: string, headerConfig: object): Promise { 9 | try { 10 | const context7Libraries = `https://context7.com/api/v1/${library}?tokens=10000`; 11 | await axios.get(context7Libraries, headerConfig); 12 | return library; 13 | } catch (error) { 14 | if (error instanceof AxiosError && error.response?.status === 404) { 15 | const redirectKeyword = `Library ${library} has been redirected to this library: ` 16 | const errorMessage = error.response.data; 17 | if (errorMessage.includes(redirectKeyword)) { 18 | const newLibrary = errorMessage.split(redirectKeyword)[1].split(".").slice(0, -1).join(".").trim(); 19 | return newLibrary; 20 | } 21 | } 22 | throw error 23 | } 24 | } 25 | 26 | /** 27 | * Scrapes snippets from the Context7 API 28 | * @param library - The library to scrape snippets from 29 | * @param headerConfig - The header config to use for the Context7 API 30 | * @returns The scraped snippets 31 | */ 32 | export async function scrapeContext7Snippets(library: string, headerConfig: object): Promise { 33 | const context7Url = `https://context7.com/api/v1/${library}?tokens=10000` 34 | const response = await axios.get(context7Url, headerConfig); 35 | const snippet_title = "=".repeat(24) + "\nCODE SNIPPETS\n" + "=".repeat(24); 36 | const snippets = String(response.data).replace(snippet_title, ""); 37 | return snippets; 38 | } 39 | -------------------------------------------------------------------------------- /src/reports/human.ts: -------------------------------------------------------------------------------- 1 | import fs from "fs/promises"; 2 | import { defaultConfigOptions } from '../config/options.js'; 3 | import { FullResults, ReportOptions } from '../lib/types.js'; 4 | 5 | /** 6 | * Writes the full results to a human-readable text file 7 | * @param library - The name of the library 8 | * @param fullResults - The full results to write 9 | * @param reportOptions - The options for the report, specifically the folder path and console output 10 | * @param compare - Whether the report is for a comparison or individual library 11 | */ 12 | export const humanReadableReport = async ( 13 | library: string, 14 | fullResults: FullResults, 15 | reportOptions: ReportOptions = defaultConfigOptions.report, 16 | compare: boolean = false): Promise => { 17 | const toSave = [ 18 | "== Average Score ==", 19 | fullResults.averageScore, 20 | "== Questions Score ==", 21 | fullResults.questionAverageScore, 22 | "== Questions Explanation ==", 23 | fullResults.questionExplanation, 24 | "== LLM Score ==", 25 | fullResults.llmAverageScore, 26 | "== LLM Explanation ==", 27 | fullResults.llmExplanation, 28 | "== Formatting Score ==", 29 | fullResults.formattingAvgScore, 30 | "== Project Metadata Score ==", 31 | fullResults.metadataAvgScore, 32 | "== Initialization Score ==", 33 | fullResults.initializationAvgScore, 34 | ] 35 | if (reportOptions.humanReadable) { 36 | const directory = reportOptions.folderPath; 37 | await fs.writeFile(`${directory}/result${compare ? "-compare" : ""}-${library.replace(/[/._]/g, "-").toLowerCase()}.txt`, toSave.join("\n\n")); 38 | } 39 | if (reportOptions.console) { 40 | console.log(toSave.join("\n\n")); 41 | } 42 | } -------------------------------------------------------------------------------- /src/services/llmUtils.ts: -------------------------------------------------------------------------------- 1 | import { GoogleGenAI } from '@google/genai'; 2 | import { backOff } from 'exponential-backoff'; 3 | 4 | /** 5 | * Runs the LLM on a prompt. This is for evaluating question and LLM metrics. 6 | * @param prompt - The prompt to run the LLM on 7 | * @param config - The config to use, which specifies formatting, tool calling, and model configuration. 8 | * @param client - The client to use for the LLM evaluation 9 | * @returns The response from the LLM 10 | */ 11 | export async function runLLM(prompt: string, config: Record, client: GoogleGenAI): Promise { 12 | const countTokensResponse = await client.models.countTokens({ 13 | model: "gemini-2.5-pro", 14 | contents: prompt, 15 | }); 16 | if (countTokensResponse.totalTokens !== undefined && countTokensResponse.totalTokens > 1048576) { 17 | console.error("Prompt is too long: ", countTokensResponse.totalTokens, " condensing prompt to 1048576 tokens"); 18 | // 1 Gemini token = roughly 4 characters, using 3 to not go over limit 19 | prompt = prompt.slice(0, 1048576 * 3); 20 | } 21 | const generate = async (): Promise => { 22 | const response = await client.models.generateContent({ 23 | model: "gemini-2.5-pro", 24 | contents: [prompt], 25 | config: { 26 | ...config 27 | } 28 | }); 29 | if (response.text === undefined) { 30 | throw new Error("Response is undefined"); 31 | } 32 | return response.text; 33 | } 34 | try { 35 | const retryResponse = await backOff(() => generate(), { 36 | numOfAttempts: 5, 37 | delayFirstAttempt: true, 38 | }); 39 | return retryResponse; 40 | } catch (error) { 41 | throw new Error("Error in LLM call (context or llm evaluation): " + error); 42 | } 43 | } -------------------------------------------------------------------------------- /src/reports/machine.ts: -------------------------------------------------------------------------------- 1 | import fs from "fs/promises"; 2 | import { Scores, ScoresObject, ReportOptions } from "../lib/types.js"; 3 | import { defaultConfigOptions } from '../config/options.js'; 4 | 5 | /** 6 | * Converts the scores and average score into an object 7 | * @param productName - The name of the product 8 | * @param scores - The scores to convert 9 | * @param averageScore - The average score to convert 10 | * @returns The converted scores and average score 11 | */ 12 | export const convertScoresToObject = ( 13 | productName: string, 14 | scores: Scores, 15 | averageScore: number, 16 | ): ScoresObject => { 17 | return { 18 | [productName]: { 19 | scores: scores, 20 | averageScore: averageScore, 21 | } 22 | } 23 | } 24 | 25 | /** 26 | * Writes the convertScoresToObject results to a machine-readable JSON file 27 | * @param input - The input to write to the file 28 | * @param reportOptions - The options for the report, specifically the folder path 29 | * @param compare - Whether the report is for a comparison or individual library 30 | */ 31 | export const machineReadableReport = async ( 32 | input: ScoresObject, 33 | reportOptions: ReportOptions = defaultConfigOptions.report, 34 | compare: boolean = false): Promise => { 35 | // Default is to not have a folder path 36 | if (reportOptions.folderPath) { 37 | const filePath = `${reportOptions.folderPath}/result${compare ? "-compare" : ""}.json`; 38 | let obj: ScoresObject = {}; 39 | try { 40 | const resultFile = await fs.readFile(filePath, "utf-8"); 41 | obj = JSON.parse(resultFile); 42 | } catch (err) { 43 | if (err instanceof Error && err.message.includes("ENOENT")) { 44 | obj = {} 45 | } else { 46 | throw err; 47 | } 48 | } 49 | // Assumes the data we add only has one project 50 | const projectName = Object.keys(input)[0]; 51 | 52 | // Adds in or updates the project data 53 | obj[projectName] = input[projectName]; 54 | await fs.writeFile(filePath, JSON.stringify(obj, null, 2)); 55 | } 56 | } 57 | -------------------------------------------------------------------------------- /src/app/scoreQA.ts: -------------------------------------------------------------------------------- 1 | import { GoogleGenAI } from '@google/genai'; 2 | import { QAEvalOptions, QuestionEvaluationOutput } from '../lib/types.js'; 3 | import { QuestionEvaluator } from '../services/questionEval.js'; 4 | import { config } from 'dotenv'; 5 | 6 | /** 7 | * Evaluates how well snippets answer a given question 8 | * @param question - The question to evaluate 9 | * @param context - The string of snippets to evaluate 10 | * @param configOptions - The options for the evaluation 11 | */ 12 | export async function scoreQA( 13 | question: string, 14 | context: string, 15 | configOptions?: QAEvalOptions 16 | ): Promise { 17 | // Load environment variables 18 | config(); 19 | 20 | // Initialize clients 21 | let client: GoogleGenAI; 22 | if (process.env.VERTEX_AI) { 23 | if (!process.env.GOOGLE_CLOUD_PROJECT) { 24 | throw new Error("GOOGLE_CLOUD_PROJECT environment variable is required for Vertex AI authentication!"); 25 | } 26 | const GOOGLE_CLOUD_PROJECT = process.env.GOOGLE_CLOUD_PROJECT; 27 | const GOOGLE_CLOUD_LOCATION = process.env.GOOGLE_CLOUD_LOCATION || "global"; 28 | 29 | if (!process.env.GOOGLE_APPLICATION_CREDENTIALS) { 30 | throw new Error("GOOGLE_APPLICATION_CREDENTIALS not set!"); 31 | } 32 | 33 | client = new GoogleGenAI({ 34 | vertexai: true, 35 | project: GOOGLE_CLOUD_PROJECT, 36 | location: GOOGLE_CLOUD_LOCATION, 37 | 38 | }); 39 | } else { 40 | if (!process.env.GEMINI_API_TOKEN) { 41 | throw new Error("If using Vertex AI, set VERTEX_AI to true, otherwise GEMINI_API_TOKEN environment variable is needed"); 42 | } 43 | client = new GoogleGenAI({ apiKey: process.env.GEMINI_API_TOKEN }); 44 | } 45 | 46 | const questionEvaluator = new QuestionEvaluator(client, configOptions?.llm, configOptions?.prompts); 47 | 48 | // Questions evaluation 49 | const questionResponse = await questionEvaluator.evaluateQuestion(question, context); 50 | 51 | if (configOptions?.report?.console) { 52 | console.log(`Score: ${questionResponse.questionAverageScore}`); 53 | console.log(`Explanation: ${questionResponse.questionExplanation}`); 54 | } 55 | 56 | return {questionAverageScore: questionResponse.questionAverageScore, questionExplanation: questionResponse.questionExplanation}; 57 | } 58 | -------------------------------------------------------------------------------- /src/lib/textEval.ts: -------------------------------------------------------------------------------- 1 | import * as metrics from '../lib/textMetrics.js'; 2 | 3 | export class TextEvaluator { 4 | private snippets: string; 5 | 6 | constructor(snippets: string) { 7 | this.snippets = snippets; 8 | } 9 | 10 | /** 11 | * Splits the entire snippet file into individual snippets 12 | * @returns The individual snippets 13 | */ 14 | splitSnippets(): string[] { 15 | return this.snippets.split("\n" + "-".repeat(40) + "\n"); 16 | } 17 | 18 | /** 19 | * Evaluates the formatting of snippets 20 | * @returns The average score for the library 21 | */ 22 | formatting(): number { 23 | try { 24 | const snippetsList = this.splitSnippets(); 25 | let improperFormatting = 0; 26 | 27 | for (const snippet of snippetsList) { 28 | 29 | const missingInfo = metrics.snippetIncomplete( snippet); 30 | const shortCode = metrics.codeSnippetLength(snippet); 31 | const descriptionForLang = metrics.languageDesc(snippet); 32 | const containsList = metrics.containsList(snippet); 33 | 34 | if ([missingInfo, shortCode, descriptionForLang, containsList].some(test => test)) { 35 | improperFormatting++; 36 | } 37 | } 38 | return ((snippetsList.length - improperFormatting) / snippetsList.length) * 100; 39 | 40 | } catch (error) { 41 | throw new Error("Error in formatting: " + error); 42 | } 43 | } 44 | 45 | /** 46 | * Evaluates the frequency of project metadata in the snippets 47 | * @returns The average score for the library 48 | */ 49 | metadata(): number { 50 | try { 51 | const snippetsList = this.splitSnippets(); 52 | let projectMetadata = 0; 53 | 54 | for (const snippet of snippetsList) { 55 | 56 | const citations = metrics.citations(snippet); 57 | const licenseInfo = metrics.licenseInfo(snippet); 58 | const directoryStructure = metrics.directoryStructure(snippet); 59 | if ([citations, licenseInfo, directoryStructure].some(test => test)) { 60 | projectMetadata++; 61 | } 62 | } 63 | return ((snippetsList.length - projectMetadata) / snippetsList.length) * 100; 64 | } catch (error) { 65 | throw new Error("Error in project metadata: " + error); 66 | } 67 | } 68 | 69 | /** 70 | * Evaluates the frequency of initialization information in the snippets 71 | * @returns The average score for the library 72 | */ 73 | initialization(): number { 74 | try { 75 | const snippetsList = this.splitSnippets(); 76 | let initializationCheck = 0; 77 | 78 | for (const snippet of snippetsList) { 79 | 80 | const imports = metrics.imports(snippet); 81 | const installs = metrics.installs(snippet); 82 | if ([imports, installs].some(test => test)) { 83 | initializationCheck++; 84 | } 85 | } 86 | return ((snippetsList.length - initializationCheck) / snippetsList.length) * 100; 87 | } catch (error) { 88 | throw new Error("Error in initialization: " + error); 89 | } 90 | } 91 | } -------------------------------------------------------------------------------- /src/lib/utils.ts: -------------------------------------------------------------------------------- 1 | import { TextEvaluator } from './textEval.js'; 2 | import { Metrics, Weights } from './types.js'; 3 | import { fuzzy } from 'fast-fuzzy'; 4 | import { defaultConfigOptions } from '../config/options.js'; 5 | 6 | /** 7 | * Identifies the product of a library 8 | * @param library - The library to identify the product of 9 | * @returns The name of the product for the library 10 | */ 11 | export function identifyProduct(library: string): string { 12 | const libraryExtensionsRemoved = library.replace(/(\.com|\.org|docs)/g, "") 13 | const libraryNormalized = libraryExtensionsRemoved.replace(/(\.|\/\_)/g, "-").toLowerCase(); 14 | const prodSplit = libraryNormalized.split("/"); 15 | const finalProduct = prodSplit[prodSplit.length - 1].trim(); 16 | return finalProduct; 17 | } 18 | 19 | /** 20 | * Checks if the products are the same 21 | * @param prods - The products to check 22 | * @returns If the products are the same, returns the first product, otherwise throws an error 23 | */ 24 | export function checkSameProduct(prods: string[]): string { 25 | const prod1 = prods[0]; 26 | const prod2 = prods[1]; 27 | const matchScore = fuzzy(prod1, prod2); 28 | if (matchScore < 0.8) { 29 | throw new Error(`${prods[0]} and ${prods[1]} are not the same product`); 30 | } 31 | return prod1 32 | } 33 | 34 | /** 35 | * Runs all three text analysis metrics on the snippets 36 | * @param snippets - The snippets to run text analysis on 37 | * @returns The average scores for each metric 38 | */ 39 | export function runTextAnalysis(snippets: string): { 40 | formatting: number, 41 | metadata: number, 42 | initialization: number 43 | } { 44 | const textEvaluator = new TextEvaluator(snippets); 45 | const formatting = textEvaluator.formatting(); 46 | const metadata = textEvaluator.metadata(); 47 | const initialization = textEvaluator.initialization(); 48 | return { formatting, metadata, initialization }; 49 | } 50 | 51 | /** 52 | * Calculates the final average score based on context, text analysis, and LLM metrics 53 | * @param scores - The scores used to calculate the weighted average 54 | * @param weights - The weights to use for the weighted average 55 | * @returns the average score 56 | */ 57 | export function calculateAverageScore(scores: Metrics, weights: Weights = defaultConfigOptions.weights): number { 58 | const scoresKeys = Object.keys(scores); 59 | const weightsKeys = Object.keys(weights); 60 | 61 | // Check that the weights sum to 1 62 | const EPS = 0.000001; 63 | const weightsSum = Object.values(weights).reduce((a, b) => a + b, 0); 64 | if (Math.abs(weightsSum - 1) > EPS) { 65 | throw new Error("Weights must sum to 1"); 66 | } 67 | 68 | // Check that the weights and scores have the same keys 69 | if (weightsKeys.length !== scoresKeys.length || !scoresKeys.every(key => weightsKeys.includes(key))) { 70 | throw new Error("Weights and scores have different number of keys or keys are not the same"); 71 | } 72 | 73 | // Calculate the average score (weighted) 74 | const averageScore = scoresKeys.reduce((total, key) => { 75 | const score = scores[key as keyof Metrics]; 76 | const weight = weights[key as keyof Weights]; 77 | return total + score * weight; 78 | }, 0); 79 | return averageScore; 80 | } 81 | -------------------------------------------------------------------------------- /tests/textMetricsTester.ts: -------------------------------------------------------------------------------- 1 | import { program } from 'commander'; 2 | import * as textMetrics from '../src/lib/textMetrics.ts'; 3 | import { scrapeContext7Snippets } from '../src/services/context7.ts'; 4 | import { buildContext7Header } from '../src/config/header.ts'; 5 | import { config } from 'dotenv'; 6 | 7 | // Note: test URL information may change when snippets are refreshed on website 8 | 9 | config(); 10 | 11 | if (!process.env.CONTEXT7_API_TOKEN) { 12 | throw new Error("CONTEXT7_API_TOKEN environment variable is required for Context7 API authentication!"); 13 | } 14 | const headerConfig = buildContext7Header(process.env.CONTEXT7_API_TOKEN); 15 | 16 | async function textMetricsTester(): Promise { 17 | for (const [metricName, libraries] of Object.entries(testCases)) { 18 | for (const [library, answer] of Object.entries(libraries)) { 19 | const scrapedSnippets = await scrapeContext7Snippets(library, headerConfig); 20 | const snippets = scrapedSnippets.split('-'.repeat(40)); 21 | if (snippets.some(snippet => (textMetrics as any)[metricName](snippet)) === answer) { 22 | console.log(`✅ ${metricName} is correct for ${library}`); 23 | } else { 24 | console.log(`❌ ${metricName} is incorrect for ${library}`); 25 | } 26 | } 27 | } 28 | } 29 | 30 | const testCases: { [key: string]: Record } = { 31 | snippetIncomplete: { 32 | 'steamre/steamkit': true, 33 | '/1password/onepassword-sdk-js': false, 34 | }, 35 | codeSnippetLength: { 36 | 'steamre/steamkit': true, 37 | '/eclipse-4diac/4diac-forte': true, 38 | '/context7/coderabbitai_github_io-bitbucket': true, 39 | '/context7/tailwindcss': true, 40 | '/humanlayer/12-factor-agents': true, 41 | }, 42 | multipleCode: { 43 | '/websites/tailwindcss-com_vercel_app': false, 44 | '/1password/onepassword-sdk-js': true, 45 | '/nvidia-omniverse/ext-7z': true, 46 | }, 47 | languageDesc: { 48 | '/eclipse-4diac/4diac-forte': true, 49 | '/technomancy-dev/00': true, 50 | '/pnxenopoulos/awpy': true, 51 | '/aflplusplus/aflplusplus': false, 52 | }, 53 | containsList: { 54 | '/directus/directus': true, 55 | '/context7/ctrl-plex_vercel_app': true, 56 | '/mhsanaei/3x-ui': true, 57 | }, 58 | citations: { 59 | '/cleardusk/3ddfa_v2': true, 60 | '/context7/zh_d2l_ai': false, 61 | }, 62 | licenseInfo: { 63 | '/ralfbiedert/cheats.rs': true, 64 | '/stanfordnlp/corenlp': true, 65 | '/n8n-io/n8n-docs': false, 66 | }, 67 | directoryStructure: { 68 | '/shadcn-ui/ui': false, 69 | '/context7/cuelang': true, 70 | '/jpressprojects/jpress': false, 71 | '/czelabueno/jai-workflow': true, 72 | }, 73 | imports: { 74 | '/shuvijs/shuvi': false, 75 | '/adn-devtech/3dsmax-python-howtos': true, 76 | '/sortablejs/sortable': true, 77 | '/jawah/niquests': true, 78 | }, 79 | installs: { 80 | '/fbsamples/360-video-player-for-android': true, 81 | '/wangluozhe/requests': true, 82 | '/jawah/niquests': true, 83 | '/theailanguage/a2a_samples': true, 84 | }, 85 | }; 86 | 87 | program 88 | .action(() => { 89 | textMetricsTester(); 90 | }); 91 | 92 | program.parse(process.argv); -------------------------------------------------------------------------------- /src/services/prompts/handler.ts: -------------------------------------------------------------------------------- 1 | import * as prompts from "./templates.js"; 2 | 3 | /** 4 | * Determines if the prompt will be the default or one provided by the user. 5 | * All prompts except for searchPrompt can be modified by the user. 6 | */ 7 | 8 | export const searchTopicsPromptHandler = (product: string, questions: string, newPrompt?: string) => { 9 | let prompt = newPrompt || prompts.searchTopicsPrompt; 10 | if (!prompt.includes("{{product}}") || !prompt.includes("{{questions}}")) { 11 | throw new Error("Prompt does not contain {{product}} or {{questions}}"); 12 | } 13 | const finalPrompt = prompt.replace("{{product}}", product).replace("{{questions}}", questions); 14 | return finalPrompt; 15 | } 16 | 17 | export const questionEvaluationPromptHandler = (contexts: string[][], questions: string, newPrompt?: string) => { 18 | let prompt = newPrompt || prompts.questionEvaluationPrompt; 19 | if (!prompt.includes("{{questions}}") || !prompt.includes("{{contexts}}")) { 20 | throw new Error("Prompt does not contain {{questions}} or {{contexts}}"); 21 | } 22 | const finalPrompt = prompt.replace("{{questions}}", questions).replace("{{contexts}}", contexts.toString()); 23 | return finalPrompt; 24 | } 25 | 26 | // questionEvaluationPromptHandler adapted for scoreQA 27 | export const scoreQAQuestionPromptHandler = (context: string, question: string, newPrompt?: string) => { 28 | let prompt = newPrompt || prompts.scoreQAQuestionPrompt; 29 | if (!prompt.includes("{{questions}}") || !prompt.includes("{{context}}")) { 30 | throw new Error("Prompt does not contain {{questions}} or {{context}}"); 31 | } 32 | const finalPrompt = prompt.replace("{{questions}}", question).replace("{{context}}", context); 33 | return finalPrompt; 34 | } 35 | 36 | export const questionEvaluationPromptCompareHandler = (contexts: string[][][], questions: string, newPrompt?: string) => { 37 | let prompt = newPrompt || prompts.questionEvaluationPromptCompare; 38 | if (!prompt.includes("{{questions}}") || !prompt.includes("{{contexts[0]}}") || !prompt.includes("{{contexts[1]}}")) { 39 | throw new Error("Prompt does not contain {{questions}} or {{contexts[0]}} or {{contexts[1]}}"); 40 | } 41 | const finalPrompt = prompt.replace("{{questions}}", questions).replace("{{contexts[0]}}", contexts[0].toString()).replace("{{contexts[1]}}", contexts[1].toString()); 42 | return finalPrompt; 43 | } 44 | 45 | export const llmEvaluationPromptHandler = (snippets: string, snippetDelimiter: string, newPrompt?: string) => { 46 | let prompt = newPrompt || prompts.llmEvaluationPrompt; 47 | if (!prompt.includes("{{snippets}}") || !prompt.includes("{{snippetDelimiter}}")) { 48 | throw new Error("Prompt does not contain {{snippets}} or {{snippetDelimiter}}"); 49 | } 50 | const finalPrompt = prompt.replace("{{snippets}}", snippets).replace("{{snippetDelimiter}}", snippetDelimiter); 51 | return finalPrompt; 52 | } 53 | 54 | export const llmEvaluationPromptCompareHandler = (snippets: string[], snippetDelimiter: string, newPrompt?: string) => { 55 | let prompt = newPrompt || prompts.llmEvaluationPromptCompare; 56 | if (!prompt.includes("{{snippets[0]}}") || !prompt.includes("{{snippets[1]}}") || !prompt.includes("{{snippetDelimiter}}")) { 57 | throw new Error("Prompt does not contain {{snippets[0]}} or {{snippets[1]}} or {{snippetDelimiter}}"); 58 | } 59 | const finalPrompt = prompt.replace("{{snippets[0]}}", snippets[0]).replace("{{snippets[1]}}", snippets[1]).replace("{{snippetDelimiter}}", snippetDelimiter); 60 | return finalPrompt; 61 | } 62 | -------------------------------------------------------------------------------- /src/services/llmEval.ts: -------------------------------------------------------------------------------- 1 | import { GoogleGenAI, Type } from '@google/genai'; 2 | import { LLMScores, LLMScoresCompare, LLMOptions, Prompts } from '../lib/types.js'; 3 | import { runLLM } from './llmUtils.js'; 4 | import { llmEvaluationPromptHandler, llmEvaluationPromptCompareHandler } from './prompts/handler.js'; 5 | import { defaultConfigOptions } from '../config/options.js'; 6 | 7 | export class LLMEvaluator { 8 | private client: GoogleGenAI; 9 | private llmConfig: LLMOptions; 10 | private prompts?: Prompts; 11 | 12 | constructor( 13 | client: GoogleGenAI, 14 | llmConfig: LLMOptions = defaultConfigOptions.llm, 15 | prompts?: Prompts) { 16 | this.client = client; 17 | this.llmConfig = llmConfig; 18 | this.prompts = prompts; 19 | } 20 | 21 | /** 22 | * Evaluates the quality of the snippets based on 3 criteria: unique information, clarity, and correct syntax 23 | * @returns The average score and explanation for the snippet collection 24 | */ 25 | async llmEvaluate(snippets: string): Promise { 26 | const snippetDelimiter = "\n" + "-".repeat(40) + "\n"; 27 | const prompt = llmEvaluationPromptHandler(snippets, snippetDelimiter, this.prompts?.llmEvaluation); 28 | 29 | const config: object = { 30 | responseMimeType: 'application/json', 31 | responseSchema: { 32 | type: 'object', 33 | properties: { 34 | llmAverageScore: { type: Type.NUMBER }, 35 | llmExplanation: { type: Type.STRING }, 36 | }, 37 | required: ["llmAverageScore", "llmExplanation"], 38 | }, 39 | ...this.llmConfig 40 | } 41 | const response = await runLLM(prompt, config, this.client); 42 | const jsonResponse = JSON.parse(response); 43 | if (jsonResponse.llmAverageScore == undefined || jsonResponse.llmExplanation == undefined) { 44 | throw new Error("LLM scores are undefined"); 45 | } else { 46 | const llmAverageScore = jsonResponse.llmAverageScore; 47 | const llmExplanation = jsonResponse.llmExplanation; 48 | return { llmAverageScore, llmExplanation }; 49 | } 50 | } 51 | 52 | /** 53 | * Compares the quality of two different snippet sources using 3 criteria: unique information, clarity, and correct syntax 54 | * @returns The average scores and explanations for the snippet collections 55 | */ 56 | async llmEvaluateCompare(snippets: string[]): Promise { 57 | const snippetDelimiter = "\n" + "-".repeat(40) + "\n"; 58 | const prompt = llmEvaluationPromptCompareHandler(snippets, snippetDelimiter, this.prompts?.llmEvaluation); 59 | const config: object = { 60 | responseMimeType: 'application/json', 61 | responseSchema: { 62 | type: 'object', 63 | properties: { 64 | llmAverageScores: { type: Type.ARRAY, minItems: 2, maxItems: 2, items: { type: Type.NUMBER } }, 65 | llmExplanations: { type: Type.ARRAY, minItems: 2, maxItems: 2, items: { type: Type.STRING } }, 66 | }, 67 | required: ["llmAverageScores", "llmExplanations"], 68 | }, 69 | ...this.llmConfig 70 | } 71 | const response = await runLLM(prompt, config, this.client); 72 | const jsonResponse = JSON.parse(response); 73 | if (jsonResponse.llmAverageScores == undefined || jsonResponse.llmExplanations == undefined) { 74 | throw new Error("LLM scores are undefined"); 75 | } else { 76 | const llmAverageScores = jsonResponse.llmAverageScores; 77 | const llmExplanations = jsonResponse.llmExplanations; 78 | return { llmAverageScores, llmExplanations }; 79 | } 80 | } 81 | } 82 | 83 | -------------------------------------------------------------------------------- /src/lib/types.ts: -------------------------------------------------------------------------------- 1 | export interface EvalOptions { 2 | report?: { 3 | console?: boolean; // Whether to print to console 4 | folderPath?: string; // Where to save the human readable or machine readable report 5 | humanReadable?: boolean; // Whether to save the human readable report to a file 6 | returnScore?: boolean; // Whether to return the average score for the library 7 | }; 8 | // Weights for the different metrics, must sum to 1 9 | weights?: { 10 | question: number; 11 | llm: number; 12 | formatting: number; 13 | metadata: number; 14 | initialization: number; 15 | }; 16 | // Gemini API configuration options 17 | llm?: { 18 | temperature?: number; 19 | topP?: number; 20 | topK?: number; 21 | candidateCount?: number; 22 | seed?: number; 23 | }; 24 | // Prompts for LLM-based evaluation metrics 25 | prompts?: { 26 | searchTopics?: string; 27 | questionEvaluation?: string; 28 | llmEvaluation?: string; 29 | } 30 | } 31 | 32 | export interface QAEvalOptions { 33 | report?: { 34 | console?: boolean; 35 | }; 36 | llm?: { 37 | temperature?: number; 38 | topP?: number; 39 | topK?: number; 40 | candidateCount?: number; 41 | seed?: number; 42 | }; 43 | // Prompts for LLM-based evaluation metrics 44 | prompts?: { 45 | questionEvaluation?: string; 46 | } 47 | } 48 | 49 | export interface HeaderConfig { 50 | headers: { 51 | "Authorization": string; 52 | } 53 | } 54 | 55 | export interface ReportOptions { 56 | console?: boolean; 57 | folderPath?: string; 58 | humanReadable?: boolean; 59 | returnScore?: boolean; 60 | } 61 | 62 | export interface Weights { 63 | question: number; 64 | llm: number; 65 | formatting: number; 66 | metadata: number; 67 | initialization: number; 68 | } 69 | 70 | export interface LLMOptions { 71 | temperature?: number; 72 | topP?: number; 73 | topK?: number; 74 | candidateCount?: number; 75 | seed?: number; 76 | } 77 | 78 | export interface Prompts { 79 | searchTopics?: string; 80 | questionEvaluation?: string; 81 | llmEvaluation?: string; 82 | } 83 | 84 | export interface QuestionEvaluationOutput { 85 | questionAverageScore: number; 86 | questionExplanation: string; 87 | } 88 | 89 | export interface QuestionEvaluationPairOutput { 90 | questionAverageScores: number[]; 91 | questionExplanations: string[]; 92 | } 93 | 94 | export type Category = "TITLE" | "DESCRIPTION" | "SOURCE" | "LANGUAGE" | "CODE"; 95 | 96 | export interface LLMScoresCompare { 97 | llmAverageScores: number[]; 98 | llmExplanations: string[]; 99 | } 100 | export interface LLMScores { 101 | llmAverageScore: number; 102 | llmExplanation: string; 103 | } 104 | 105 | export interface Metrics { 106 | question: number; 107 | llm: number; 108 | formatting: number; 109 | metadata: number; 110 | initialization: number; 111 | } 112 | 113 | export interface ProjectData { 114 | scores: { 115 | question: number; 116 | llm: number; 117 | formatting: number; 118 | metadata: number; 119 | initialization: number; 120 | }; 121 | averageScore: number; 122 | } 123 | 124 | export interface Scores { 125 | question: number; 126 | llm: number; 127 | formatting: number; 128 | metadata: number; 129 | initialization: number; 130 | } 131 | 132 | export interface FullResults { 133 | averageScore: number, 134 | questionAverageScore: number, 135 | questionExplanation: string, 136 | llmAverageScore: number, 137 | llmExplanation: string, 138 | formattingAvgScore: number, 139 | metadataAvgScore: number, 140 | initializationAvgScore: number, 141 | } 142 | 143 | export interface ScoresObject { 144 | [productName: string]: { 145 | scores: { 146 | question: number; 147 | llm: number; 148 | formatting: number; 149 | metadata: number; 150 | initialization: number; 151 | }; 152 | averageScore: number; 153 | } 154 | } -------------------------------------------------------------------------------- /src/app/getScore.ts: -------------------------------------------------------------------------------- 1 | import { GoogleGenAI } from '@google/genai'; 2 | import { buildContext7Header } from '../config/header.js'; 3 | import { EvalOptions } from '../lib/types.js'; 4 | import { runTextAnalysis, calculateAverageScore } from '../lib/utils.js'; 5 | import { QuestionEvaluator } from '../services/questionEval.js'; 6 | import { LLMEvaluator } from '../services/llmEval.js' 7 | import { checkRedirects, scrapeContext7Snippets } from '../services/context7.js'; 8 | import { machineReadableReport, convertScoresToObject } from '../reports/machine.js'; 9 | import { humanReadableReport } from '../reports/human.js'; 10 | import { identifyProduct } from '../lib/utils.js'; 11 | import { config } from 'dotenv'; 12 | import { defaultConfigOptions } from '../config/options.js'; 13 | 14 | /** 15 | * Evaluates the snippets of a library using 5 metrics 16 | * @param library - The library to evaluate 17 | * @param questions - The questions used to evaluate the snippets 18 | * @param configOptions - The options for evaluation 19 | */ 20 | export async function getScore( 21 | library: string, 22 | questions: string, 23 | configOptions?: EvalOptions 24 | ): Promise { 25 | config(); 26 | 27 | if (!process.env.CONTEXT7_API_TOKEN) { 28 | throw new Error("CONTEXT7_API_TOKEN environment variable is required for Context7 API authentication!"); 29 | } 30 | // Build header config for Context7 API 31 | const headerConfig = buildContext7Header(process.env.CONTEXT7_API_TOKEN); 32 | 33 | // Initialize clients 34 | let client: GoogleGenAI; 35 | if (process.env.VERTEX_AI) { 36 | if (!process.env.GOOGLE_CLOUD_PROJECT) { 37 | throw new Error("GOOGLE_CLOUD_PROJECT environment variable is required for Vertex AI authentication!"); 38 | } 39 | const GOOGLE_CLOUD_PROJECT = process.env.GOOGLE_CLOUD_PROJECT; 40 | const GOOGLE_CLOUD_LOCATION = process.env.GOOGLE_CLOUD_LOCATION || "global"; 41 | 42 | if (!process.env.GOOGLE_APPLICATION_CREDENTIALS) { 43 | throw new Error("GOOGLE_APPLICATION_CREDENTIALS not set!"); 44 | } 45 | 46 | client = new GoogleGenAI({ 47 | vertexai: true, 48 | project: GOOGLE_CLOUD_PROJECT, 49 | location: GOOGLE_CLOUD_LOCATION, 50 | 51 | }); 52 | } else { 53 | if (!process.env.GEMINI_API_TOKEN) { 54 | throw new Error("If using Vertex AI, set VERTEX_AI to true, otherwise GEMINI_API_TOKEN environment variable is needed"); 55 | } 56 | client = new GoogleGenAI({ apiKey: process.env.GEMINI_API_TOKEN }); 57 | } 58 | 59 | // Identify product of library and redirections 60 | const redirect = await checkRedirects(library, headerConfig); 61 | const product = identifyProduct(redirect); 62 | 63 | // Get questions file for product 64 | const questionEvaluator = new QuestionEvaluator(client, configOptions?.llm, configOptions?.prompts); 65 | 66 | // Generate search topics and fetch relevant snippets 67 | const searchTopics = await questionEvaluator.generateSearchTopics(product, questions); 68 | const contexts = await questionEvaluator.fetchRelevantSnippets(searchTopics, redirect, headerConfig); 69 | 70 | // Questions evaluation 71 | const questionResponse = await questionEvaluator.evaluateQuestions(questions, contexts); 72 | 73 | // Scrape Context7 snippets 74 | const snippets = await scrapeContext7Snippets(redirect, headerConfig); 75 | 76 | // LLM evaluation 77 | const llm_evaluator = new LLMEvaluator(client, configOptions?.llm, configOptions?.prompts); 78 | const llmResponse = await llm_evaluator.llmEvaluate(snippets); 79 | 80 | // Text analysis 81 | const { 82 | formatting, 83 | metadata, 84 | initialization, 85 | } = runTextAnalysis(snippets); 86 | 87 | // Calculate scores 88 | const scores = { 89 | question: questionResponse.questionAverageScore, 90 | llm: llmResponse.llmAverageScore, 91 | formatting: formatting, 92 | metadata: metadata, 93 | initialization: initialization 94 | } 95 | const averageScore = calculateAverageScore(scores, configOptions?.weights); 96 | const roundedAverageScore = Math.round(averageScore); 97 | 98 | // Write results 99 | const fullResults = { 100 | averageScore: roundedAverageScore, 101 | questionAverageScore: Math.round(questionResponse.questionAverageScore), 102 | questionExplanation: questionResponse.questionExplanation, 103 | llmAverageScore: Math.round(llmResponse.llmAverageScore), 104 | llmExplanation: llmResponse.llmExplanation, 105 | formattingAvgScore: Math.round(formatting), 106 | metadataAvgScore: Math.round(metadata), 107 | initializationAvgScore: Math.round(initialization), 108 | } 109 | await humanReadableReport(redirect, fullResults, configOptions?.report, false); 110 | const scoresObject = convertScoresToObject(redirect, scores, roundedAverageScore); 111 | await machineReadableReport(scoresObject, configOptions?.report, false); 112 | 113 | if (configOptions?.report?.returnScore ?? defaultConfigOptions.report.returnScore) { 114 | return roundedAverageScore; 115 | } 116 | } 117 | -------------------------------------------------------------------------------- /src/app/compareLib.ts: -------------------------------------------------------------------------------- 1 | import { GoogleGenAI } from '@google/genai'; 2 | import { buildContext7Header } from '../config/header.js'; 3 | import { EvalOptions } from '../lib/types.js'; 4 | import { runTextAnalysis, calculateAverageScore } from '../lib/utils.js'; 5 | import { QuestionEvaluator } from '../services/questionEval.js'; 6 | import { LLMEvaluator } from '../services/llmEval.js' 7 | import { checkRedirects, scrapeContext7Snippets } from '../services/context7.js'; 8 | import { machineReadableReport, convertScoresToObject } from '../reports/machine.js'; 9 | import { humanReadableReport } from '../reports/human.js'; 10 | import { identifyProduct } from '../lib/utils.js'; 11 | import { checkSameProduct } from '../lib/utils.js'; 12 | import { config } from 'dotenv'; 13 | import { defaultConfigOptions } from '../config/options.js'; 14 | 15 | /** 16 | * Compares the snippets of two library using 5 metrics 17 | * @param library1 - The first library to evaluate 18 | * @param library2 - The second library to evaluate 19 | * @param questions - The questions used to evaluate the snippets 20 | * @param configOptions - The options for evaluation 21 | */ 22 | export async function compareLibraries( 23 | library1: string, 24 | library2: string, 25 | questions: string, 26 | configOptions?: EvalOptions 27 | ): Promise> { 28 | config(); 29 | 30 | if (!process.env.CONTEXT7_API_TOKEN) { 31 | throw new Error("CONTEXT7_API_TOKEN environment variable is required for Context7 API authentication!"); 32 | } 33 | // Build header config for Context7 API 34 | const headerConfig = buildContext7Header(process.env.CONTEXT7_API_TOKEN); 35 | 36 | // Initialize clients 37 | let client: GoogleGenAI; 38 | if (process.env.VERTEX_AI) { 39 | if (!process.env.GOOGLE_CLOUD_PROJECT) { 40 | throw new Error("GOOGLE_CLOUD_PROJECT environment variable is required for Vertex AI authentication!"); 41 | } 42 | const GOOGLE_CLOUD_PROJECT = process.env.GOOGLE_CLOUD_PROJECT; 43 | const GOOGLE_CLOUD_LOCATION = process.env.GOOGLE_CLOUD_LOCATION || "global"; 44 | 45 | if (!process.env.GOOGLE_APPLICATION_CREDENTIALS) { 46 | throw new Error("GOOGLE_APPLICATION_CREDENTIALS not set!"); 47 | } 48 | 49 | client = new GoogleGenAI({ 50 | vertexai: true, 51 | project: GOOGLE_CLOUD_PROJECT, 52 | location: GOOGLE_CLOUD_LOCATION, 53 | 54 | }); 55 | } else { 56 | if (!process.env.GEMINI_API_TOKEN) { 57 | throw new Error("If using Vertex AI, set VERTEX_AI to true, otherwise GEMINI_API_TOKEN environment variable is needed"); 58 | } 59 | client = new GoogleGenAI({ apiKey: process.env.GEMINI_API_TOKEN }); 60 | } 61 | 62 | // Identify products of libraries and redirections 63 | const libraryList = [library1, library2]; 64 | let prods = []; 65 | let newLibraryList = []; 66 | for (const library of libraryList) { 67 | const redirect = await checkRedirects(library, headerConfig); 68 | newLibraryList.push(redirect); 69 | const prod = identifyProduct(redirect); 70 | prods.push(prod); 71 | } 72 | 73 | // Check that the libraries have the same product 74 | const product = checkSameProduct(prods); 75 | 76 | const questionEvaluator = new QuestionEvaluator(client, configOptions?.llm, configOptions?.prompts); 77 | 78 | // Generate search topics and fetch relevant snippets 79 | const searchTopics = await questionEvaluator.generateSearchTopics(product, questions); 80 | const contexts = await Promise.all(newLibraryList.map(newLibrary => 81 | questionEvaluator.fetchRelevantSnippets(searchTopics, newLibrary, headerConfig) 82 | )); 83 | 84 | // Questions evaluation 85 | const questionResponse = await questionEvaluator.evaluateQuestionsPair(questions, contexts); 86 | 87 | // Scrape Context7 snippets 88 | const snippets = await Promise.all(newLibraryList.map(newLibrary => 89 | scrapeContext7Snippets(newLibrary, headerConfig) 90 | )); 91 | 92 | // LLM evaluation 93 | const llm_evaluator = new LLMEvaluator(client, configOptions?.llm, configOptions?.prompts); 94 | const llmResponse = await llm_evaluator.llmEvaluateCompare(snippets); 95 | 96 | let returnScores: Record = {}; 97 | for (let i = 0; i < newLibraryList.length; i++) { 98 | 99 | // Text analysis 100 | const { 101 | formatting, 102 | metadata, 103 | initialization, 104 | } = runTextAnalysis(snippets[i]); 105 | 106 | // Calculate scores 107 | const scores = { 108 | question: questionResponse.questionAverageScores[i], 109 | llm: llmResponse.llmAverageScores[i], 110 | formatting: formatting, 111 | metadata: metadata, 112 | initialization: initialization, 113 | } 114 | const averageScore = calculateAverageScore(scores, configOptions?.weights); 115 | const roundedAverageScore = Math.round(averageScore); 116 | 117 | // Write results 118 | const fullResults = { 119 | averageScore: roundedAverageScore, 120 | questionAverageScore: Math.round(questionResponse.questionAverageScores[i]), 121 | questionExplanation: questionResponse.questionExplanations[i], 122 | llmAverageScore: Math.round(llmResponse.llmAverageScores[i]), 123 | llmExplanation: llmResponse.llmExplanations[i], 124 | formattingAvgScore: Math.round(formatting), 125 | metadataAvgScore: Math.round(metadata), 126 | initializationAvgScore: Math.round(initialization), 127 | } 128 | returnScores[newLibraryList[i]] = roundedAverageScore; 129 | await humanReadableReport(newLibraryList[i], fullResults, configOptions?.report, true); 130 | const scoresObject = convertScoresToObject(newLibraryList[i], scores, roundedAverageScore); 131 | await machineReadableReport(scoresObject, configOptions?.report, true); 132 | } 133 | 134 | if (configOptions?.report?.returnScore ?? defaultConfigOptions.report.returnScore) { 135 | return returnScores as Record; 136 | } 137 | } 138 | -------------------------------------------------------------------------------- /src/services/prompts/templates.ts: -------------------------------------------------------------------------------- 1 | /** 2 | * All LLM Prompts 3 | */ 4 | 5 | export const searchPrompt = ` 6 | Generate 15 questions, 10 of which should be common and practical 7 | questions that developers frequently ask when using the product {{product}}. 8 | These should represent real-world use cases and coding challenges. 9 | 10 | Add 5 more questions that might not be very common but relevant to edge cases and 11 | less common use cases. Format each question on a new line, numbered 1-15. 12 | Questions should be specific and actionable, the kind that a developer would ask an 13 | AI coding assistant. 14 | 15 | Focus on diverse topics like: 16 | - Component building (cards, navigation, forms, modals) 17 | - Responsive design patterns 18 | - Animation and transitions 19 | - Dark mode implementation 20 | - Custom styling and configuration 21 | - Performance optimization 22 | - Common UI patterns 23 | 24 | Example questions: 25 | 1. "Show me how to build a card component with shadow, hover effects, and truncated text in {{product}}" 26 | 2. "How to create a responsive navigation bar with dropdown menus in {{product}}" 27 | 28 | Do not include any headers in your response, only the list of questions. You may search 29 | Google for the questions. 30 | ` 31 | export const searchTopicsPrompt = ` 32 | For each question about {{product}}, generate 5 relevant search topics 33 | as comma-separated keywords/phrases. These topics should help find the most 34 | relevant documentation and code examples. 35 | 36 | Questions: {{questions}} 37 | `; 38 | 39 | export const questionEvaluationPrompt = ` 40 | You are evaluating documentation context for its quality and relevance in helping an AI 41 | coding assistant answer the following questions: 42 | 43 | Questions: {{questions}} 44 | 45 | Context: {{contexts}} 46 | 47 | For each question, evaluate and score the context from 0-100 based on the following criteria: 48 | 1. Relevance to the specific question (50%) 49 | 2. Practical applicability (15%) 50 | 3. Coverage of requested features (35%) 51 | 52 | Your response should contain the overall score and an explanation. 53 | Make sure that the explanation is 1-2 sentences. 54 | Separate each sentence in the explanation with a new line. 55 | `; 56 | 57 | export const scoreQAQuestionPrompt = ` 58 | You are evaluating documentation context for its quality and relevance in helping an AI 59 | coding assistant answer the following question: 60 | 61 | Questions: {{questions}} 62 | 63 | Context: {{context}} 64 | 65 | For each question, evaluate and score how well the snippets in the context answer the question from 0-100. 66 | If the snippets answer it completely, then the score should be 100. If the 67 | snippets only partially answer the question or are relevant to answering it, then a score of 68 | around 50 is given. If the snippets are completely unrelated to the question, then the score 69 | should be 0. 70 | 71 | Your response should contain the overall score for the context and an explanation of the score. 72 | Make sure that the explanation is 1-2 sentences. 73 | Separate each sentence in the explanation with a new line. 74 | `; 75 | 76 | export const questionEvaluationPromptCompare = ` 77 | You are evaluating two different documentation contexts for their quality and relevance in helping an AI 78 | coding assistant answer the following question: 79 | 80 | Questions: {{questions}} 81 | 82 | Contexts ({{contexts[0]}} and {{contexts[1]}}): 83 | 84 | For each question, evaluate and score the context from 0-100 based on the following criteria: 85 | 1. Relevance to the specific question (50%) 86 | 2. Practical applicability (15%) 87 | 3. Coverage of requested features (35%) 88 | 89 | Your response should include a list that contains the overall score for each context and a list of two explanations, 90 | one for each context. Make sure that each explanation is 1-2 sentences and each sentence is separated by a new line. 91 | `; 92 | 93 | export const llmEvaluationPrompt = ` 94 | Rate the quality of the snippets using the criteria. 95 | Your total score for the snippets should be between 0 and 100, 96 | where 0 is the indicates that the snippets did not meet the criteria 97 | at all, 50 is the criteria was partially met, and 100 is the 98 | criteria was fully met with no room for improvement. 99 | The snippets are separated by {{snippetDelimiter}} 100 | and the code blocks are enclosed in \`\`\`. 101 | Your scores should represent a ratio of how many 102 | snippets meet the criterion out of the total number of snippets. 103 | 104 | Criteria: 105 | 1. Unique Information (30%): Snippets contain unique information that is not already included in 106 | another snippet. There can be some overlap, but the snippets should not be identical. 107 | 2. Clarity (30%): There are no snippets that are confusingly worded or unclear. This could be grammatical 108 | or spelling errors. Titles and descriptions are sensible (e.g., the description shouldn't be about requests 109 | when the code is about visualizing data) and all the text, even in the code snippets, are in English. 110 | 3. Correct Syntax (40%): No snippets contain any obvious syntax errors. Snippets are formatted in such a way 111 | that you can easily isolate the code (e.g., no placeholders or ellipses). The programming language of 112 | the code snippet is correct. 113 | 114 | In your response, include the average score and no more than a 1-2 sentence explanation for the score. 115 | Make sure that each sentence is separated by a new line. 116 | 117 | Snippets: {{snippets}} 118 | `; 119 | 120 | export const llmEvaluationPromptCompare = ` 121 | Compare the quality of two different snippet sources using the criteria. 122 | Your total score for the snippets should be between 0 and 100, 123 | where 0 is the indicates that the snippets did not meet the criteria 124 | at all, 50 is the criteria was partially met, and 100 is the 125 | criteria was fully met with no room for improvement. 126 | The snippets are separated by {{snippetDelimiter}} 127 | and the code blocks are enclosed in \`\`\`. 128 | Your scores should represent a ratio of how many 129 | snippets meet the criterion out of the total number of snippets. 130 | 131 | Criteria: 132 | 1. Unique Information (30%): Snippets contain unique information that is not already included in 133 | another snippet. There can be some overlap, but the snippets should not be identical. 134 | 2. Clarity (30%): There are no snippets that are confusingly worded or unclear. This could be grammatical 135 | or spelling errors. Titles and descriptions are sensible (e.g., the description shouldn't be about requests 136 | when the code is about visualizing data) and all the text, even in the code snippets, are in English. 137 | 3. Correct Syntax (40%): No snippets contain any obvious syntax errors. Snippets are formatted in such a way 138 | that you can easily isolate the code (e.g., no placeholders or ellipses). The programming language of 139 | the code snippet is correct. 140 | 141 | In your response, include the average score and a 1-2 sentence explanation of the score for each snippet source. 142 | Make sure that each sentence is separated by a new line. 143 | 144 | Snippets 1: {{snippets[0]}} 145 | Snippets 2: {{snippets[1]}} 146 | `; 147 | -------------------------------------------------------------------------------- /src/lib/textMetrics.ts: -------------------------------------------------------------------------------- 1 | import { Category } from "../lib/types.js"; 2 | 3 | /** 4 | * Accesses the specified category of the snippet 5 | * @param snippet - The snippet to access 6 | * @param category - The category to access. Must be one of the following: TITLE, DESCRIPTION, SOURCE, LANGUAGE, CODE (case-sensitive) 7 | * @returns The category, or if the category is language or code, then it is everything that occurs after the category keyword 8 | */ 9 | function accessCategory(snippet: string, category: Category): string | string[] { 10 | if (!snippet) { 11 | throw new Error("Snippet must be non-empty strings"); 12 | } 13 | const snippetLines = snippet.split(/\r?\n/); 14 | if (category === "LANGUAGE" || category === "CODE") { 15 | // returns [content] if category exists, [] if it doesn't 16 | return snippet.split(`${category}:`).slice(1); 17 | } 18 | for (const line of snippetLines) { 19 | if (line.trim().startsWith(`${category}:`)) { 20 | return line.replace(`${category}:`, "").trim(); 21 | } 22 | } 23 | return ""; 24 | } 25 | 26 | /** 27 | * Checks if all categories exist in the snippet 28 | * @returns a boolean indicating if the snippet is incomplete 29 | */ 30 | export function snippetIncomplete(snippet: string): boolean { 31 | const components = ["TITLE:", "DESCRIPTION:", "LANGUAGE:", "SOURCE:", "CODE:"]; 32 | return !components.every((c) => snippet.includes(c)); 33 | } 34 | 35 | /** 36 | * Checks if the code is too short (defined as less than 5 words) 37 | * This could indicate that the code is a simple command, parsing error, or empty. 38 | * @returns a boolean indicating if the code snippet is too short 39 | */ 40 | export function codeSnippetLength(snippet: string): boolean { 41 | const codes = accessCategory(snippet, "CODE") as string[]; 42 | return codes.some(code => { 43 | const codeSnippets = code.split("CODE:") 44 | const codeBlock = codeSnippets[codeSnippets.length - 1].replace(/```/g, "") 45 | const cleanedCode = codeBlock.trim().replace(/\r?\n/g, " "); 46 | return cleanedCode.split(" ").filter(token => token.trim() !== "").length < 5; 47 | }) 48 | } 49 | 50 | /** 51 | * Checks if there are multiple code or language categories in a snippet 52 | * @returns A boolean indicating if there are multiple instances of CODE: or LANGUAGE: 53 | */ 54 | export function multipleCode(snippet: string): boolean { 55 | return snippet.split("CODE:").length > 2 || snippet.split("LANGUAGE:").length > 2; 56 | } 57 | 58 | /** 59 | * Checks if the LANGUAGE category is actually a description of the code 60 | * @returns A boolean indicating if the language is a description of the code (e.g., "CLI Arguments") or not a proper language (e.g., "console") 61 | */ 62 | export function languageDesc(snippet: string): boolean { 63 | const langs = accessCategory(snippet, 'LANGUAGE') as string[]; 64 | return langs.some(lang => { 65 | const langSnippet = lang.split("CODE:")[0]; 66 | const cleanLang = langSnippet.trim().toLowerCase(); 67 | 68 | // Language contains multiple words 69 | if (cleanLang.split(" ").length > 1) { 70 | return true; 71 | } 72 | // Language contains keywords 73 | if (cleanLang.includes("none") || cleanLang.includes("console")) { 74 | return true 75 | } 76 | }) 77 | } 78 | 79 | /** 80 | * Checks if the code contains a list 81 | * @returns A boolean indicating that code is actually a list 82 | */ 83 | export function containsList(snippet: string): boolean { 84 | const codes = accessCategory(snippet, 'CODE') as string[]; 85 | 86 | // Unordered list: ◯, •, ☐, □, ○ 87 | // Ordered list: 1., 2., 3., ... 88 | const unorderedMarkers = ["◯", "•", "☐", "□"]; 89 | 90 | return codes.some(code => { 91 | const codeSnippet = code.split("CODE:") 92 | const cleanCode = codeSnippet[codeSnippet.length - 1].replace(/```/g, '').trim(); 93 | 94 | const containsUnordered = unorderedMarkers.some(marker => cleanCode.includes(marker)); 95 | 96 | const containsOrdered = cleanCode.includes('1. ') && cleanCode.includes('2. ') 97 | return containsUnordered || containsOrdered; 98 | }); 99 | } 100 | 101 | /** 102 | * Checks if there are any common citation formats 103 | * @returns A boolean indicating that code is actually a citation 104 | */ 105 | export function citations(snippet: string): boolean { 106 | const citationFormats = ["bibtex", "biblatex", "ris", "mods", "marc", "csl json"] 107 | const langs = accessCategory(snippet, "LANGUAGE") as string[]; 108 | return langs.some(lang => { 109 | const langSnippet = lang.split("CODE:")[0]; 110 | const cleanLang = langSnippet.trim().replace(/\r?\n/g, "").toLowerCase(); 111 | return citationFormats.some(format => cleanLang.includes(format)) 112 | }) 113 | } 114 | 115 | /** 116 | * Checks if the snippet is about licensing 117 | * @returns A boolean indicating that code is about a license 118 | */ 119 | export function licenseInfo(snippet: string): boolean { 120 | const source = (accessCategory(snippet, "SOURCE") as string).toLowerCase(); 121 | return source.includes('license') 122 | } 123 | 124 | /** 125 | * Checks if the snippet is about the directory structure 126 | * @returns A boolean indicating that code is about a directory structure 127 | */ 128 | export function directoryStructure(snippet: string): boolean { 129 | const directoryKeywords = ["directory", "structure", "workflow", "filesystem"]; 130 | const title = (accessCategory(snippet, "TITLE") as string).toLowerCase(); 131 | const codes = accessCategory(snippet, "CODE") as string[]; 132 | const titleContainsDirectory = directoryKeywords.some((keyword) => title.includes(keyword)); 133 | 134 | const treeSymbols = ["├", "└", "|-"]; 135 | return titleContainsDirectory && 136 | codes.some(code => { 137 | const codeSnippet = code.split("CODE:") 138 | const cleanCode = codeSnippet[codeSnippet.length - 1].trim(); 139 | return treeSymbols.some(symbol => cleanCode.includes(symbol)); 140 | }) 141 | } 142 | 143 | /** 144 | * Checks if the snippet is about imports 145 | * @returns A boolean indicating that code is about imports 146 | */ 147 | export function imports(snippet: string): boolean { 148 | const importKeywords = ["import", "importing"] 149 | const title = (accessCategory(snippet, "TITLE") as string).toLowerCase(); 150 | const codes = accessCategory(snippet, "CODE") as string[]; 151 | return importKeywords.some((t) => title.includes(t)) && 152 | codes.some(code => { 153 | const codeSnippet = code.split("CODE:") 154 | const cleanedCode = codeSnippet[codeSnippet.length - 1].trim().replace(/```/g, ""); 155 | const singleLine = cleanedCode.split(/\r?\n/).filter(line => line.trim() !== "").length == 1; 156 | // Not a descriptive import statement such as a specific path 157 | const noPath = !cleanedCode.includes("/"); 158 | return singleLine && noPath; 159 | }) 160 | } 161 | 162 | /** 163 | * Checks if the snippet is about installations 164 | * @returns A boolean indicating that code is about installations 165 | */ 166 | export function installs(snippet: string): boolean { 167 | const installKeywords = ["install", "initialize", "initializing", "installation"]; 168 | const title = (accessCategory(snippet, "TITLE") as string).toLowerCase(); 169 | const codes = accessCategory(snippet, "CODE") as string[]; 170 | return installKeywords.some((t) => title.includes(t)) && 171 | codes.some(code => { 172 | const codeSnippet = code.split("CODE:") 173 | const cleanCode = codeSnippet[codeSnippet.length - 1].trim().replace(/```/g, ""); 174 | const singleLine = cleanCode.split(/\r?\n/).filter(line => line.trim() !== "").length === 1; 175 | return singleLine; 176 | }) 177 | } -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # `c7score` 2 | 3 | The `c7score` package is used to evaluate the quality of Upstash's Context7 code snippets. 4 | 5 | ## Metrics 6 | `c7score` uses the following five metrics to grade quality. The metrics can be divided into two groups: LLM analysis and rule-based text analysis. 7 | 1. LLM Analysis 8 | * Metric 1 (Question-Snippet Comparison): How well the snippets answer common developer questions. 9 | * Metric 2 (LLM substitute for human eval): Evaluates snippet relevancy, clarity, and correctness. 10 | 2. Text Analysis 11 | * Metric 3 (Formatting): Determines whether the snippets have the expected format. 12 | * Metric 4 (Project Metadata): Checks for irrelevant project information. 13 | * Metric 5 (Initialization): Looks for basic import and installation statements. 14 | 15 | **Exports** 16 | 1. `getScore` - evaluates a single library based on 5 metrics 17 | 2. `compareLibraries` - evaluates two similar libraries based on 5 metrics 18 | 3. `scoreQA` - evaluates how well code snippets answer provided questions. 19 | 20 | ## Usage 21 | **Requirements:** 22 | The `.env` file must always have the following: 23 | ```text 24 | CONTEXT7_API_TOKEN=... 25 | ``` 26 | 27 | This library can be used with both Vertex AI and the Gemini API. To use Vertex AI, the `.env` file must contain the following: 28 | 29 | ```text 30 | VERTEX_AI=true 31 | GOOGLE_CLOUD_PROJECT=... 32 | GOOGLE_APPLICATION_CREDENTIALS=path_to_credentials 33 | ``` 34 | 35 | If using the Gemini API: 36 | ```text 37 | GEMINI_API_TOKEN=... 38 | ``` 39 | 40 | ```typescript 41 | import { getScore, compareLibraries, scoreQA } from "@upstash/c7score"; 42 | 43 | await getScore( 44 | "/websites/python_langchain", 45 | `1. What is a selector, and how do I use it? 46 | 2. What is the input token limit for Gemini 2.5 Pro?`, 47 | { 48 | report: { 49 | console: true, 50 | folderPath: "results", 51 | }, 52 | weights: { 53 | question: 0.8, 54 | llm: 0.05, 55 | formatting: 0.05, 56 | metadata: 0.05, 57 | initialization: 0.05, 58 | }, 59 | prompts: { 60 | questionEvaluation: `Evaluate ...`, 61 | }, 62 | } 63 | ); 64 | 65 | await compareLibraries( 66 | "/tailwindlabs/tailwindcss.com", 67 | "/websites/tailwindcss", 68 | `1. How can I install rust? 69 | 2. How can I install Tailwind CSS v4.1 using npm?`; 70 | { 71 | report: { 72 | console: true 73 | }, 74 | llm: { 75 | temperature: 0.95, 76 | topP: 0.8, 77 | topK: 45 78 | }, 79 | prompts: { 80 | questionEvaluation: `Evaluate ...` 81 | } 82 | } 83 | ); 84 | 85 | await scoreQA( 86 | "How can I install LangChain Core?", 87 | ` 88 | ======================== 89 | CODE SNIPPETS 90 | ======================== 91 | TITLE: Install LangGraph and LangChain dependencies 92 | DESCRIPTION: This snippet demonstrates how to install 93 | the necessary Python and JavaScript/TypeScript packages 94 | for LangGraph and LangChain, including Anthropic 95 | integrations for model access. 96 | ... 97 | ` 98 | ) 99 | ``` 100 | 101 | ### Configuration 102 | 103 | For `getScore` and `compareLibraries`: 104 | 105 | ```typescript 106 | { 107 | report: { 108 | console: boolean; 109 | folderPath: string; 110 | humanReadable: boolean; 111 | returnScore: boolean; 112 | }; 113 | weights: { 114 | question: number; 115 | llm: number; 116 | formatting: number; 117 | metadata: number; 118 | initialization: number; 119 | }; 120 | llm: { 121 | temperature: number; 122 | topP: number; 123 | topK: number; 124 | candidateCount: number; 125 | seed: number; 126 | }; 127 | prompts: { 128 | searchTopics: string; 129 | questionEvaluation: string; 130 | llmEvaluation: string; 131 | }; 132 | } 133 | ``` 134 | 135 | For `scoreQA`: 136 | ```typescript 137 | { 138 | report: { 139 | console: boolean; 140 | } 141 | llm: { 142 | temperature: number; 143 | topP: number; 144 | topK: number; 145 | candidateCount: number; 146 | seed: number; 147 | }; 148 | prompts: { 149 | questionEvaluation: string; 150 | } 151 | } 152 | 153 | ``` 154 | 155 | **Configuration Details** 156 | * `compareLibraries` 157 | * must have two libraries that have the same product 158 | * will output results to `result-compare.json` and `result-compare-LIBRARY_NAME.txt` 159 | * `getScore` 160 | * will output machine-readable results to `result.json` and human-readable results to `result-LIBRARY_NAME.txt` in the specified directory 161 | * `scoreQA` only returns the score and explanations or logs to the console. 162 | * `report` 163 | * `console: true` prints results to the console. 164 | * `folderPath` specifies the folder for human-readable and machine-readable results (the folder must already exist). 165 | * The machine-readable file will add or update the libraries. 166 | * `humanReadable` writes the results to a txt file. 167 | * `returnScore` returns the average score as a number for `getScore` and an object for `compareLibraries`. 168 | * `weights` 169 | * Specifies weight breakdown for evaluation metrics. If changing the weights, all must have an associated value (can be 0) and must sum to 1. 170 | * `llm` 171 | * LLM configuration options for Gemini 172 | * Specific default numbers used to create more reproducible results. 173 | * `prompts` 174 | * Replaces the current prompts. It is not recommended to change the final output result instructions or score maximum (e.g., 100 -> 10) 175 | * Each prompt accepts different placeholders, but they must be formatted as {{variableName}} with the correct associated variable name in the prompt (see Placeholder Reference). 176 | 177 | ### Placeholder Reference 178 | | Prompt | For `getScore` | For `compareLibraries` | For `scoreQA` | 179 | |-----------------|---------------------------------------------------|----------------------------------------------------------------------------------------|-----------------------------------| 180 | | **searchTopics** | `{{product}}`, `{{questions}}` | – | – | 181 | | **questionEvaluation** | `{{contexts}}`, `{{questions}}` | `{{contexts[0]}}`, `{{contexts[1]}}`, `{{questions}}` | `{{context}}`, `{{question}}` | 182 | | **llmEvaluation** | `{{snippets}}`, `{{snippetDelimiter}}` | `{{snippets[0]}}`, `{{snippets[1]}}`, `{{snippetDelimiter}}` | – | 183 | 184 | 185 | 186 | ### Default Values 187 | 188 | For `getScore` and `compareLibraries`: 189 | ```typescript 190 | { 191 | report: { 192 | console: true, 193 | humanReadable: false, 194 | returnScore: false, 195 | }, 196 | weights: { 197 | question: 0.8, 198 | llm: 0.05, 199 | formatting: 0.05, 200 | metadata: 0.05, 201 | initialization: 0.05, 202 | }, 203 | llm: { 204 | temperature: 0, 205 | topP: 0.1, 206 | topK: 1, 207 | candidateCount: 1, 208 | seed: 42, 209 | } 210 | } 211 | ``` 212 | 213 | For `scoreQA`: 214 | ```typescript 215 | { 216 | report: { 217 | console: true, 218 | }, 219 | llm: { 220 | temperature: 0, 221 | topP: 0.1, 222 | topK: 1, 223 | candidateCount: 1, 224 | seed: 42, 225 | } 226 | } 227 | ``` 228 | * Note: `scoreQA` will always return the scores as objects 229 | 230 | 231 | ### Example Outputs 232 | Example output from `scoreQA`: 233 | 234 | ```text 235 | Score: 75 236 | Explanation: The provided context contains several code snippets that show how to install `@langchain/core` for JavaScript/TypeScript using npm, yarn, pnpm, and bun. 237 | However, it fails to provide the equivalent `pip` command for installing the `langchain-core` Python package, thus only partially answering the question. 238 | ``` 239 | 240 | Example output from `getScore` (`compareLibraries` is the same): 241 | ```text 242 | == Average Score == 243 | 244 | 92 245 | 246 | == Questions Score == 247 | 248 | 100 249 | 250 | == Questions Explanation == 251 | 252 | The context provides clear definitions and multiple comprehensive code examples that explain what a selector is in LangChain. 253 | It thoroughly demonstrates how to use various types of selectors, such as `SemanticSimilarityExampleSelector` and `LengthBasedExampleSelector`, by integrating them into few-shot prompt templates. 254 | 255 | == LLM Score == 256 | 257 | 46 258 | 259 | == LLM Explanation == 260 | 261 | The score is low due to a high rate of duplicate snippets, with over a quarter being identical copies. Additionally, a majority of snippets fail the syntax criterion due to errors, use of placeholders, and formatting issues that also impact clarity. 262 | 263 | == Formatting Score == 264 | 265 | 0 266 | 267 | == Project Metadata Score == 268 | 269 | 100 270 | 271 | == Initialization Score == 272 | 273 | 100 274 | ``` 275 | 276 | -------------------------------------------------------------------------------- /src/services/questionEval.ts: -------------------------------------------------------------------------------- 1 | import { Type, GoogleGenAI } from '@google/genai'; 2 | import { QuestionEvaluationOutput, QuestionEvaluationPairOutput } from '../lib/types.js'; 3 | import axios from 'axios'; 4 | import { runLLM } from './llmUtils.js'; 5 | import { questionEvaluationPromptHandler, questionEvaluationPromptCompareHandler, searchTopicsPromptHandler, scoreQAQuestionPromptHandler } from './prompts/handler.js'; 6 | import { defaultConfigOptions } from '../config/options.js'; 7 | import { searchPrompt } from './prompts/templates.js'; 8 | 9 | export class QuestionEvaluator { 10 | private client: GoogleGenAI; 11 | private llmConfig: Record; 12 | private prompts?: Record; 13 | 14 | constructor( 15 | client: GoogleGenAI, 16 | llmConfig: Record = defaultConfigOptions.llm, 17 | prompts?: Record) { 18 | this.client = client; 19 | this.llmConfig = llmConfig; 20 | this.prompts = prompts; 21 | } 22 | 23 | /** 24 | * Generates 15 questions about a product one might ask an AI coding assistant. 25 | * The search prompt is not customizable by user. 26 | * @returns The 15 questions as a string 27 | */ 28 | async generateQuestions(product: string): Promise { 29 | const prompt = searchPrompt.replace("{{product}}", product); 30 | const searchTool = { googleSearch: {} }; 31 | 32 | const defaultConfig: object = { 33 | tools: [searchTool], 34 | ...{ 35 | temperature: 1.0, 36 | topP: 0.95, 37 | topK: 64 38 | } 39 | } 40 | const response = await runLLM(prompt, defaultConfig, this.client); 41 | if (response == undefined) { 42 | throw new Error("Response is undefined"); 43 | } else { 44 | return response; 45 | } 46 | } 47 | 48 | /** 49 | * Generates 5 search topics for each question. 50 | * @param questions - The questions to generate search topics for 51 | * @returns 75 search topics 52 | */ 53 | async generateSearchTopics(product: string, questions: string): Promise { 54 | const prompt = searchTopicsPromptHandler(product, questions, this.prompts?.searchTopics); 55 | 56 | const config: object = { 57 | responseMimeType: "application/json", 58 | responseSchema: { 59 | type: Type.OBJECT, 60 | properties: { 61 | topics: { type: Type.ARRAY, items: { type: Type.ARRAY, items: { type: Type.STRING } } } 62 | }, 63 | required: ["topics"], 64 | }, 65 | ...this.llmConfig 66 | } 67 | const response = await runLLM(prompt, config, this.client); 68 | const jsonResponse = JSON.parse(response); 69 | if (jsonResponse.topics == undefined) { 70 | throw new Error("Topics are undefined"); 71 | } else { 72 | return jsonResponse.topics; 73 | } 74 | } 75 | 76 | /** 77 | * Fetches 1 context/code snippet per topic for the library from Context7. 78 | * @param topics - The search topics. 15 questions, 5 topics per question. 79 | * @param library - The library to fetch the context for 80 | * @param headerConfig - The header config to use for the Context7 API 81 | * @returns 75 context/code snippets 82 | */ 83 | async fetchRelevantSnippets(topics: string[][], library: string, headerConfig: object): Promise { 84 | const snippet_title = "=".repeat(24) + "\nCODE SNIPPETS\n" + "=".repeat(24); 85 | const contexts = []; 86 | for (const questionTopics of topics) { 87 | const questionContexts = []; 88 | for (const topic of questionTopics) { 89 | let snippets = ""; 90 | const topicUrl = encodeURIComponent(topic); 91 | const url = `https://context7.com/api/v1/${library}?tokens=10000&topic=${topicUrl}`; 92 | const response = await axios.get(url, headerConfig) 93 | 94 | // Take only first snippet to avoid high token count downstream 95 | snippets = String(response.data).replace(snippet_title, "").split("\n" + "-".repeat(40) + "\n")[0]; 96 | questionContexts.push(snippets); 97 | } 98 | contexts.push(questionContexts); 99 | } 100 | return contexts; 101 | } 102 | 103 | /** 104 | * For compareLibraries only. 105 | * Evaluates how well the snippets answer the questions based on 3 criteria. 106 | * @param questions - The questions to evaluate 107 | * @param contexts - The context/code snippets per topic 108 | * @returns The average scores and explanations for each context collection 109 | */ 110 | async evaluateQuestionsPair(questions: string, contexts: string[][][]): Promise { 111 | const prompt = questionEvaluationPromptCompareHandler(contexts, questions, this.prompts?.questionEvaluation); 112 | const config: object = { 113 | responseMimeType: "application/json", 114 | responseSchema: { 115 | type: Type.OBJECT, 116 | properties: { 117 | questionAverageScores: { type: Type.ARRAY, minItems: 2, maxItems: 2, items: { type: Type.NUMBER } }, 118 | questionExplanations: { type: Type.ARRAY, minItems: 2, maxItems: 2, items: { type: Type.STRING } } 119 | }, 120 | required: ["questionAverageScores", "questionExplanations"], 121 | }, 122 | ...this.llmConfig 123 | } 124 | const response = await runLLM(prompt, config, this.client); 125 | const jsonResponse = JSON.parse(response); 126 | if (jsonResponse.questionAverageScores == undefined || jsonResponse.questionExplanations == undefined) { 127 | throw new Error("Question scores are undefined"); 128 | } else { 129 | return { 130 | questionAverageScores: jsonResponse.questionAverageScores as number[], 131 | questionExplanations: jsonResponse.questionExplanations as string[] 132 | } 133 | } 134 | } 135 | 136 | /** 137 | * For getScore only. 138 | * Evaluates how well the snippets answer the questions based on 3 criteria. 139 | * @param questions - The questions to evaluate 140 | * @param contexts - The context/code snippets per topic 141 | * @returns The average score and explanation for the context collection 142 | */ 143 | async evaluateQuestions(questions: string, contexts: string[][]): Promise { 144 | const prompt = questionEvaluationPromptHandler(contexts, questions, this.prompts?.questionEvaluation); 145 | const config: object = { 146 | responseMimeType: "application/json", 147 | responseSchema: { 148 | type: Type.OBJECT, 149 | properties: { 150 | questionAverageScore: { type: Type.NUMBER }, 151 | questionExplanation: { type: Type.STRING } 152 | }, 153 | required: ["questionAverageScore", "questionExplanation"], 154 | }, 155 | ...this.llmConfig 156 | } 157 | 158 | const response = await runLLM(prompt, config, this.client); 159 | const jsonResponse = JSON.parse(response); 160 | if (jsonResponse.questionAverageScore == undefined || jsonResponse.questionExplanation == undefined) { 161 | throw new Error("Question scores are undefined"); 162 | } else { 163 | return { 164 | questionAverageScore: jsonResponse.questionAverageScore as number, 165 | questionExplanation: jsonResponse.questionExplanation as string 166 | } 167 | } 168 | } 169 | 170 | /** 171 | * For scoreQA only. 172 | * Evaluates how well the snippets answer the question based on 3 criteria. 173 | * @param question - The question to evaluate 174 | * @param context - The context/code snippets 175 | * @returns The average score and explanation for the context collection 176 | */ 177 | async evaluateQuestion(question: string, context: string): Promise { 178 | const prompt = scoreQAQuestionPromptHandler(context, question, this.prompts?.questionEvaluation); 179 | const config: object = { 180 | responseMimeType: "application/json", 181 | responseSchema: { 182 | type: Type.OBJECT, 183 | properties: { 184 | questionAverageScore: { type: Type.NUMBER }, 185 | questionExplanation: { type: Type.STRING } 186 | }, 187 | required: ["questionAverageScore", "questionExplanation"], 188 | }, 189 | ...this.llmConfig 190 | } 191 | 192 | const response = await runLLM(prompt, config, this.client); 193 | const jsonResponse = JSON.parse(response); 194 | if (jsonResponse.questionAverageScore == undefined || jsonResponse.questionExplanation == undefined) { 195 | throw new Error("Question scores are undefined"); 196 | } else { 197 | return { 198 | questionAverageScore: jsonResponse.questionAverageScore as number, 199 | questionExplanation: jsonResponse.questionExplanation as string 200 | } 201 | } 202 | } 203 | } 204 | -------------------------------------------------------------------------------- /tests/testContext.txt: -------------------------------------------------------------------------------- 1 | ======================== 2 | CODE SNIPPETS 3 | ======================== 4 | TITLE: Install LangGraph and LangChain dependencies 5 | DESCRIPTION: This snippet demonstrates how to install the necessary Python and JavaScript/TypeScript packages for LangGraph and LangChain, including Anthropic integrations for model access. Ensure you have pip for Python and npm for JavaScript/TypeScript installed. 6 | 7 | SOURCE: https://github.com/langchain-ai/langgraph/blob/main/docs/docs/agents/agents.md#_snippet_0 8 | 9 | LANGUAGE: bash 10 | CODE: 11 | ``` 12 | pip install -U langgraph "langchain[anthropic]" 13 | ``` 14 | 15 | LANGUAGE: bash 16 | CODE: 17 | ``` 18 | npm install @langchain/langgraph @langchain/core @langchain/anthropic 19 | ``` 20 | 21 | ---------------------------------------- 22 | 23 | TITLE: Install Documentation Build Requirements 24 | DESCRIPTION: This command uses `uv` to synchronize and install the necessary dependencies for building documentation, specifically targeting the 'test' group of requirements. It ensures all tools and libraries needed for documentation generation are in place. 25 | 26 | SOURCE: https://github.com/langchain-ai/langgraph/blob/main/docs/README.md#_snippet_0 27 | 28 | LANGUAGE: bash 29 | CODE: 30 | ``` 31 | uv sync --group test 32 | ``` 33 | 34 | ---------------------------------------- 35 | 36 | TITLE: Run LangGraph Development Server (Python) 37 | DESCRIPTION: Installs local Python dependencies for the LangGraph project and starts the development server. This allows for local testing and development of the Python application. 38 | 39 | SOURCE: https://github.com/langchain-ai/langgraph/blob/main/docs/docs/tutorials/auth/getting_started.md#_snippet_1 40 | 41 | LANGUAGE: shell 42 | CODE: 43 | ``` 44 | pip install -e . 45 | langgraph dev 46 | ``` 47 | 48 | ---------------------------------------- 49 | 50 | TITLE: Install LangGraph and LangChain Prerequisites 51 | DESCRIPTION: This snippet installs the necessary Python packages, including `langgraph`, `langchain-openai`, and `langchain`, required to run the examples in this guide. 52 | 53 | SOURCE: https://github.com/langchain-ai/langgraph/blob/main/docs/docs/how-tos/memory/semantic-search.ipynb#_snippet_0 54 | 55 | LANGUAGE: python 56 | CODE: 57 | ``` 58 | %%capture --no-stderr 59 | %pip install -U langgraph langchain-openai langchain 60 | ``` 61 | 62 | ---------------------------------------- 63 | 64 | TITLE: Run LangGraph Development Server (JavaScript) 65 | DESCRIPTION: Installs local JavaScript dependencies for the LangGraph project and starts the development server. This allows for local testing and development of the JavaScript application. 66 | 67 | SOURCE: https://github.com/langchain-ai/langgraph/blob/main/docs/docs/tutorials/auth/getting_started.md#_snippet_2 68 | 69 | LANGUAGE: shell 70 | CODE: 71 | ``` 72 | npm install 73 | npm run langgraph dev 74 | ``` 75 | 76 | ---------------------------------------- 77 | 78 | TITLE: Install Required Packages and Set API Keys 79 | DESCRIPTION: This section outlines the initial setup steps, including installing necessary Python packages for LangChain, LangGraph, and related libraries. It also provides a utility function to securely set environment variables for OpenAI and Anthropic API keys, prompting the user if they are not already defined. 80 | 81 | SOURCE: https://github.com/langchain-ai/langgraph/blob/main/docs/docs/tutorials/code_assistant/langgraph_code_assistant.ipynb#_snippet_0 82 | 83 | LANGUAGE: bash 84 | CODE: 85 | ``` 86 | ! pip install -U langchain_community langchain-openai langchain-anthropic langchain langgraph bs4 87 | ``` 88 | 89 | LANGUAGE: python 90 | CODE: 91 | ``` 92 | import getpass 93 | import os 94 | 95 | def _set_env(var: str): 96 | if not os.environ.get(var): 97 | os.environ[var] = getpass.getpass(f"{var}: ") 98 | 99 | 100 | _set_env("OPENAI_API_KEY") 101 | _set_env("ANTHROPIC_API_KEY") 102 | ``` 103 | 104 | ---------------------------------------- 105 | 106 | TITLE: Install Dependencies for Self-RAG 107 | DESCRIPTION: Installs necessary Python packages for building a Self-RAG system, including `langchain-pinecone`, `langchain-openai`, `langchainhub`, and `langgraph`. The `-qU` flags ensure a quiet and upgraded installation. 108 | 109 | SOURCE: https://github.com/langchain-ai/langgraph/blob/main/examples/rag/langgraph_self_rag_pinecone_movies.ipynb#_snippet_0 110 | 111 | LANGUAGE: python 112 | CODE: 113 | ``` 114 | %pip install -qU langchain-pinecone langchain-openai langchainhub langgraph 115 | ``` 116 | 117 | ---------------------------------------- 118 | 119 | TITLE: Asynchronous LangGraph with Postgres Checkpointer Example 120 | DESCRIPTION: Presents a full asynchronous LangGraph application utilizing `AsyncPostgresSaver` for state persistence. This example demonstrates defining an async graph, compiling it, and asynchronously streaming messages with state checkpointing to PostgreSQL. Remember to call `await checkpointer.setup()` once for initial database setup. 121 | 122 | SOURCE: https://github.com/langchain-ai/langgraph/blob/main/docs/docs/how-tos/memory/add-memory.md#_snippet_5 123 | 124 | LANGUAGE: python 125 | CODE: 126 | ``` 127 | from langchain.chat_models import init_chat_model 128 | from langgraph.graph import StateGraph, MessagesState, START 129 | from langgraph.checkpoint.postgres.aio import AsyncPostgresSaver 130 | 131 | model = init_chat_model(model="anthropic:claude-3-5-haiku-latest") 132 | 133 | DB_URI = "postgresql://postgres:postgres@localhost:5442/postgres?sslmode=disable" 134 | async with AsyncPostgresSaver.from_conn_string(DB_URI) as checkpointer: 135 | # await checkpointer.setup() 136 | 137 | async def call_model(state: MessagesState): 138 | response = await model.ainvoke(state["messages"]) 139 | return {"messages": response} 140 | 141 | builder = StateGraph(MessagesState) 142 | builder.add_node(call_model) 143 | builder.add_edge(START, "call_model") 144 | 145 | graph = builder.compile(checkpointer=checkpointer) 146 | 147 | config = { 148 | "configurable": { 149 | "thread_id": "1" 150 | } 151 | } 152 | 153 | async for chunk in graph.astream( 154 | {"messages": [{"role": "user", "content": "hi! I'm bob"}]}, 155 | config, 156 | stream_mode="values" 157 | ): 158 | chunk["messages"][-1].pretty_print() 159 | 160 | async for chunk in graph.astream( 161 | {"messages": [{"role": "user", "content": "what's my name?"}]}, 162 | config, 163 | stream_mode="values" 164 | ): 165 | chunk["messages"][-1].pretty_print() 166 | ``` 167 | 168 | ---------------------------------------- 169 | 170 | TITLE: Create LangGraph Project (JavaScript CLI) 171 | DESCRIPTION: Installs the LangGraph CLI and initializes a new LangGraph project named 'custom-auth' using the TypeScript template. This command-line setup prepares the environment for a JavaScript/TypeScript-based LangGraph application. 172 | 173 | SOURCE: https://github.com/langchain-ai/langgraph/blob/main/docs/docs/tutorials/auth/getting_started.md#_snippet_0 174 | 175 | LANGUAGE: bash 176 | CODE: 177 | ``` 178 | npx @langchain/langgraph-cli new --template=new-langgraph-project-typescript custom-auth 179 | cd custom-auth 180 | ``` 181 | 182 | ---------------------------------------- 183 | 184 | TITLE: Install Dependencies and Configure OpenAI API Key 185 | DESCRIPTION: This snippet handles the initial setup for the project. It installs the necessary Python packages, `langgraph` and `langchain_openai`, using pip. Additionally, it provides a utility function to securely prompt the user for their `OPENAI_API_KEY` if it's not already set in the environment, ensuring the application can authenticate with the OpenAI LLM. 186 | 187 | SOURCE: https://github.com/langchain-ai/langgraph/blob/main/docs/docs/tutorials/chatbots/information-gather-prompting.ipynb#_snippet_0 188 | 189 | LANGUAGE: python 190 | CODE: 191 | ``` 192 | %%capture --no-stderr 193 | % pip install -U langgraph langchain_openai 194 | ``` 195 | 196 | LANGUAGE: python 197 | CODE: 198 | ``` 199 | import getpass 200 | import os 201 | 202 | 203 | def _set_env(var: str): 204 | if not os.environ.get(var): 205 | os.environ[var] = getpass.getpass(f"{var}: ") 206 | 207 | 208 | _set_env("OPENAI_API_KEY") 209 | ``` 210 | 211 | ---------------------------------------- 212 | 213 | TITLE: Implement a Dynamic Prompt for LangGraph Agents 214 | DESCRIPTION: This example shows how to create a dynamic prompt for a LangGraph agent using a function. The function generates messages at runtime based on the agent's state and configuration, allowing for personalized or context-aware LLM interactions. This enables including information like user IDs or internal agent state in the prompt. 215 | 216 | SOURCE: https://github.com/langchain-ai/langgraph/blob/main/docs/docs/agents/agents.md#_snippet_4 217 | 218 | LANGUAGE: python 219 | CODE: 220 | ``` 221 | from langchain_core.messages import AnyMessage 222 | from langchain_core.runnables import RunnableConfig 223 | from langgraph.prebuilt.chat_agent_executor import AgentState 224 | from langgraph.prebuilt import create_react_agent 225 | 226 | # highlight-next-line 227 | def prompt(state: AgentState, config: RunnableConfig) -> list[AnyMessage]: # (1)! 228 | user_name = config["configurable"].get("user_name") 229 | system_msg = f"You are a helpful assistant. Address the user as {user_name}." 230 | return [{"role": "system", "content": system_msg}] + state["messages"] 231 | 232 | agent = create_react_agent( 233 | model="anthropic:claude-3-7-sonnet-latest", 234 | tools=[get_weather], 235 | # highlight-next-line 236 | prompt=prompt 237 | ) 238 | 239 | agent.invoke( 240 | {"messages": [{"role": "user", "content": "what is the weather in sf"}]}, 241 | # highlight-next-line 242 | config={"configurable": {"user_name": "John Smith"}} 243 | ) 244 | ``` 245 | 246 | LANGUAGE: typescript 247 | CODE: 248 | ``` 249 | import { type BaseMessageLike } from "@langchain/core/messages"; 250 | import { type RunnableConfig } from "@langchain/core/runnables"; 251 | import { createReactAgent } from "@langchain/langgraph/prebuilt"; 252 | 253 | // highlight-next-line 254 | const dynamicPrompt = (state: { messages: BaseMessageLike[] }, config: RunnableConfig): BaseMessageLike[] => { // (1)! 255 | const userName = config.configurable?.user_name; 256 | const systemMsg = `You are a helpful assistant. Address the user as ${userName}.`; 257 | return [{ role: "system", content: systemMsg }, ...state.messages]; 258 | }; 259 | 260 | const agent = createReactAgent({ 261 | llm: "anthropic:claude-3-5-sonnet-latest", 262 | tools: [getWeather], 263 | // highlight-next-line 264 | stateModifier: dynamicPrompt 265 | }); 266 | 267 | await agent.invoke( 268 | { messages: [{ role: "user", content: "what is the weather in sf" }] }, 269 | // highlight-next-line 270 | { configurable: { user_name: "John Smith" } } 271 | ); 272 | ``` 273 | 274 | ---------------------------------------- 275 | 276 | TITLE: Install Packages and Set API Keys for LangGraph ReAct Agent 277 | DESCRIPTION: This snippet provides the initial setup steps for building a ReAct agent. It includes installing necessary Python packages like `langgraph` and `langchain-openai` using `pip`, and a utility function to securely set environment variables, specifically `OPENAI_API_KEY`, which is required for interacting with OpenAI models. 278 | 279 | SOURCE: https://github.com/langchain-ai/langgraph/blob/main/docs/docs/how-tos/react-agent-from-scratch-functional.ipynb#_snippet_0 280 | 281 | LANGUAGE: python 282 | CODE: 283 | ``` 284 | %%capture --no-stderr 285 | %pip install -U langgraph langchain-openai 286 | ``` 287 | 288 | LANGUAGE: python 289 | CODE: 290 | ``` 291 | import getpass 292 | import os 293 | 294 | 295 | def _set_env(var: str): 296 | if not os.environ.get(var): 297 | os.environ[var] = getpass.getpass(f"{var}: ") 298 | 299 | 300 | _set_env("OPENAI_API_KEY") 301 | ``` 302 | 303 | ---------------------------------------- 304 | 305 | TITLE: Initialize LangGraph StateGraph and User Information Node 306 | DESCRIPTION: This code snippet demonstrates the foundational setup for a LangGraph application by initializing a `StateGraph` instance. It then adds a crucial starting node named 'fetch_user_info', which is responsible for pre-populating the graph's state with relevant user-specific data, such as flight information. This node is directly connected to the `START` edge, ensuring user data is available at the beginning of the graph's execution. 307 | 308 | SOURCE: https://github.com/langchain-ai/langgraph/blob/main/docs/docs/tutorials/customer-support/customer-support.ipynb#_snippet_48 309 | 310 | LANGUAGE: python 311 | CODE: 312 | ``` 313 | from typing import Literal 314 | 315 | from langgraph.checkpoint.memory import InMemorySaver 316 | from langgraph.graph import StateGraph 317 | from langgraph.prebuilt import tools_condition 318 | 319 | builder = StateGraph(State) 320 | 321 | def user_info(state: State): 322 | return {"user_info": fetch_user_flight_information.invoke({})} 323 | 324 | 325 | builder.add_node("fetch_user_info", user_info) 326 | builder.add_edge(START, "fetch_user_info") 327 | ``` 328 | 329 | ---------------------------------------- 330 | 331 | TITLE: Example pyproject.toml for LangGraph Application 332 | DESCRIPTION: Provides an example `pyproject.toml` file demonstrating how to configure build system requirements, define project metadata (name, version, description, authors, license, Python compatibility), and specify project-specific dependencies for a LangGraph application. 333 | 334 | SOURCE: https://github.com/langchain-ai/langgraph/blob/main/docs/docs/cloud/deployment/setup_pyproject.md#_snippet_2 335 | 336 | LANGUAGE: toml 337 | CODE: 338 | ``` 339 | [build-system] 340 | requires = ["hatchling"] 341 | build-backend = "hatchling.build" 342 | 343 | [project] 344 | name = "my-agent" 345 | version = "0.0.1" 346 | description = "An excellent agent build for LangGraph Platform." 347 | authors = [ 348 | {name = "Polly the parrot", email = "1223+polly@users.noreply.github.com"} 349 | ] 350 | license = {text = "MIT"} 351 | readme = "README.md" 352 | requires-python = ">=3.9" 353 | dependencies = [ 354 | "langgraph>=0.2.0", 355 | "langchain-fireworks>=0.1.3" 356 | ] 357 | 358 | [tool.hatch.build.targets.wheel] 359 | packages = ["my_agent"] 360 | ``` 361 | 362 | ---------------------------------------- 363 | 364 | TITLE: Install LangGraph and Langchain Anthropic Packages 365 | DESCRIPTION: This command installs the necessary Python packages, `langgraph` and `langchain_anthropic`, required to run the examples and build LangGraph applications. The `%%capture` and `%pip` commands are specific to Jupyter/IPython environments. 366 | 367 | SOURCE: https://github.com/langchain-ai/langgraph/blob/main/docs/docs/how-tos/persistence-functional.ipynb#_snippet_4 368 | 369 | LANGUAGE: python 370 | CODE: 371 | ``` 372 | %%capture --no-stderr 373 | %pip install --quiet -U langgraph langchain_anthropic 374 | ``` 375 | 376 | ---------------------------------------- 377 | 378 | TITLE: Start LangGraph Local Development Server 379 | DESCRIPTION: Run the LangGraph server locally in development mode. This command starts the server in watch mode, automatically restarting on code changes, and makes your local application accessible to LangGraph Studio for testing. 380 | 381 | SOURCE: https://github.com/langchain-ai/langgraph/blob/main/docs/docs/cloud/how-tos/studio/quick_start.md#_snippet_1 382 | 383 | LANGUAGE: bash 384 | CODE: 385 | ``` 386 | langgraph dev 387 | ``` 388 | 389 | ---------------------------------------- 390 | 391 | TITLE: Install LangGraph CLI for Local Development 392 | DESCRIPTION: Install the LangGraph Command Line Interface (CLI) with in-memory support. This tool is essential for running the LangGraph server locally and interacting with LangGraph Studio for local development. 393 | 394 | SOURCE: https://github.com/langchain-ai/langgraph/blob/main/docs/docs/cloud/how-tos/studio/quick_start.md#_snippet_0 395 | 396 | LANGUAGE: bash 397 | CODE: 398 | ``` 399 | pip install -U "langgraph-cli[inmem]" 400 | ``` 401 | 402 | ---------------------------------------- 403 | 404 | TITLE: Example `requirements.txt` for LangGraph Application 405 | DESCRIPTION: This example demonstrates a `requirements.txt` file, specifying core Python packages required for a LangGraph application. It includes `langgraph`, `langchain_anthropic`, `tavily-python`, `langchain_community`, and `langchain_openai`. 406 | 407 | SOURCE: https://github.com/langchain-ai/langgraph/blob/main/docs/docs/cloud/deployment/setup.md#_snippet_2 408 | 409 | LANGUAGE: text 410 | CODE: 411 | ``` 412 | langgraph 413 | langchain_anthropic 414 | tavily-python 415 | langchain_community 416 | langchain_openai 417 | 418 | ``` 419 | 420 | ---------------------------------------- 421 | 422 | TITLE: Install Packages and Configure Anthropic API Key 423 | DESCRIPTION: This snippet outlines the initial setup steps, including installing the necessary Python packages like 'langgraph' and 'langchain_anthropic'. It also provides code to securely set the 'ANTHROPIC_API_KEY' environment variable using 'getpass' for API authentication. 424 | 425 | SOURCE: https://github.com/langchain-ai/langgraph/blob/main/docs/docs/how-tos/react-agent-structured-output.ipynb#_snippet_0 426 | 427 | LANGUAGE: python 428 | CODE: 429 | ``` 430 | %%capture --no-stderr 431 | %pip install -U langgraph langchain_anthropic 432 | ``` 433 | 434 | LANGUAGE: python 435 | CODE: 436 | ``` 437 | import getpass 438 | import os 439 | 440 | 441 | def _set_env(var: str): 442 | if not os.environ.get(var): 443 | os.environ[var] = getpass.getpass(f"{var}: ") 444 | 445 | 446 | _set_env("ANTHROPIC_API_KEY") 447 | ``` 448 | 449 | ---------------------------------------- 450 | 451 | TITLE: Define LangGraph Chatbot Structure 452 | DESCRIPTION: This code provides the complete definition for a basic LangGraph chatbot. It sets up the state, initializes a chat model, defines a 'chatbot' node, and compiles the graph with start and end edges, demonstrating a simple conversational flow. Examples are provided for both Python and TypeScript, showcasing the setup with different LLM providers and state management. 453 | 454 | SOURCE: https://github.com/langchain-ai/langgraph/blob/main/docs/docs/tutorials/get-started/1-build-basic-chatbot.md#_snippet_10 455 | 456 | LANGUAGE: python 457 | CODE: 458 | ``` 459 | from typing import Annotated 460 | 461 | from langchain.chat_models import init_chat_model 462 | from typing_extensions import TypedDict 463 | 464 | from langgraph.graph import StateGraph, START, END 465 | from langgraph.graph.message import add_messages 466 | 467 | 468 | class State(TypedDict): 469 | messages: Annotated[list, add_messages] 470 | 471 | 472 | graph_builder = StateGraph(State) 473 | 474 | 475 | llm = init_chat_model("anthropic:claude-3-5-sonnet-latest") 476 | 477 | 478 | def chatbot(state: State): 479 | return {"messages": [llm.invoke(state["messages"])]} 480 | 481 | 482 | # The first argument is the unique node name 483 | # The second argument is the function or object that will be called whenever 484 | # the node is used. 485 | graph_builder.add_node("chatbot", chatbot) 486 | graph_builder.add_edge(START, "chatbot") 487 | graph_builder.add_edge("chatbot", END) 488 | graph = graph_builder.compile() 489 | ``` 490 | 491 | LANGUAGE: typescript 492 | CODE: 493 | ``` 494 | import { StateGraph, START, END, MessagesZodState } from "@langchain/langgraph"; 495 | import { z } from "zod"; 496 | import { ChatOpenAI } from "@langchain/openai"; 497 | 498 | const llm = new ChatOpenAI({ 499 | model: "gpt-4o", 500 | temperature: 0, 501 | }); 502 | 503 | const State = z.object({ messages: MessagesZodState.shape.messages }); 504 | 505 | const graph = new StateGraph(State) 506 | // The first argument is the unique node name 507 | // The second argument is the function or object that will be called whenever 508 | // the node is used. 509 | .addNode("chatbot", async (state) => { 510 | return { messages: [await llm.invoke(state.messages)] }; 511 | }) 512 | .addEdge(START, "chatbot") 513 | .addEdge("chatbot", END) 514 | .compile(); 515 | ``` 516 | 517 | ---------------------------------------- 518 | 519 | TITLE: Define LangGraph Node for Example Retrieval 520 | DESCRIPTION: This Python function, `retrieve_examples`, serves as a LangGraph node responsible for fetching relevant examples based on a candidate solution. It takes the current `State` and `RunnableConfig` (to adjust `top_k` examples) as input. It extracts the code from the candidate's tool calls, uses the configured retriever to find similar examples, formats them, and returns them as a string in the `examples` field of the state. 521 | 522 | SOURCE: https://github.com/langchain-ai/langgraph/blob/main/docs/docs/tutorials/usaco/usaco.ipynb#_snippet_20 523 | 524 | LANGUAGE: python 525 | CODE: 526 | ``` 527 | from langchain_core.runnables import RunnableConfig 528 | 529 | 530 | def retrieve_examples(state: State, config: RunnableConfig): 531 | top_k = config["configurable"].get("k") or 2 532 | ai_message: AIMessage = state["candidate"] 533 | if not ai_message.tool_calls: 534 | # We err here. To make more robust, you could loop back 535 | raise ValueError("Draft agent did not produce a valid code block") 536 | code = ai_message.tool_calls[0]["args"]["code"] 537 | examples_str = "\n".join( 538 | [doc.page_content for doc in retriever.invoke(code)[:top_k]] 539 | ) 540 | examples_str = f""" 541 | You previously solved the following problems in this competition: 542 | 543 | {examples_str} 544 | 545 | Approach this new question with similar sophistication.""" 546 | return {"examples": examples_str} 547 | ``` 548 | 549 | ---------------------------------------- 550 | 551 | TITLE: Install LangGraph Ecosystem Packages via npm 552 | DESCRIPTION: This snippet provides the necessary `npm install` commands to set up various components of the LangGraph ecosystem. Each command installs a specific package designed for different functionalities, such as core agent creation, multi-agent system supervision, or performance evaluation. Ensure Node.js and npm are installed on your system before running these commands. 553 | 554 | SOURCE: https://github.com/langchain-ai/langgraph/blob/main/docs/docs/agents/overview.md#_snippet_3 555 | 556 | LANGUAGE: Shell 557 | CODE: 558 | ``` 559 | npm install @langchain/langgraph @langchain/core 560 | ``` 561 | 562 | LANGUAGE: Shell 563 | CODE: 564 | ``` 565 | npm install @langchain/langgraph-supervisor 566 | ``` 567 | 568 | LANGUAGE: Shell 569 | CODE: 570 | ``` 571 | npm install @langchain/langgraph-swarm 572 | ``` 573 | 574 | LANGUAGE: Shell 575 | CODE: 576 | ``` 577 | npm install @langchain/mcp-adapters 578 | ``` 579 | 580 | LANGUAGE: Shell 581 | CODE: 582 | ``` 583 | npm install agentevals 584 | ``` 585 | 586 | ---------------------------------------- 587 | 588 | TITLE: Install LangGraph and related ecosystem packages 589 | DESCRIPTION: Instructions for installing various LangGraph ecosystem packages using pip, enabling different functionalities like prebuilt agents, supervisors, swarms, MCP adapters, memory management, and evaluation tools. Each command installs a specific package or set of packages required for different aspects of agent development. 590 | 591 | SOURCE: https://github.com/langchain-ai/langgraph/blob/main/docs/docs/agents/overview.md#_snippet_0 592 | 593 | LANGUAGE: shell 594 | CODE: 595 | ``` 596 | pip install -U langgraph langchain 597 | ``` 598 | 599 | LANGUAGE: shell 600 | CODE: 601 | ``` 602 | pip install -U langgraph-supervisor 603 | ``` 604 | 605 | LANGUAGE: shell 606 | CODE: 607 | ``` 608 | pip install -U langgraph-swarm 609 | ``` 610 | 611 | LANGUAGE: shell 612 | CODE: 613 | ``` 614 | pip install -U langchain-mcp-adapters 615 | ``` 616 | 617 | LANGUAGE: shell 618 | CODE: 619 | ``` 620 | pip install -U langmem 621 | ``` 622 | 623 | LANGUAGE: shell 624 | CODE: 625 | ``` 626 | pip install -U agentevals 627 | ``` 628 | 629 | ---------------------------------------- 630 | 631 | TITLE: Install required Python packages for LangGraph and OpenAI 632 | DESCRIPTION: This snippet installs `langgraph`, `langchain_openai`, and `numpy` using pip. It uses `%%capture --no-stderr` to suppress output and `%pip install --quiet -U` for quiet, upgrade installations, which is common in Jupyter/IPython environments. 633 | 634 | SOURCE: https://github.com/langchain-ai/langgraph/blob/main/docs/docs/how-tos/many-tools.ipynb#_snippet_0 635 | 636 | LANGUAGE: python 637 | CODE: 638 | ``` 639 | %%capture --no-stderr 640 | %pip install --quiet -U langgraph langchain_openai numpy 641 | ``` 642 | 643 | ---------------------------------------- 644 | 645 | TITLE: Specify LangGraph.js Application Dependencies in package.json 646 | DESCRIPTION: Demonstrates how to define project dependencies for a LangGraph.js application using a `package.json` file. This example includes core LangChain and LangGraph packages, which will be installed by the chosen package manager during application deployment. 647 | 648 | SOURCE: https://github.com/langchain-ai/langgraph/blob/main/docs/docs/cloud/deployment/setup_javascript.md#_snippet_1 649 | 650 | LANGUAGE: json 651 | CODE: 652 | ``` 653 | { 654 | "name": "langgraphjs-studio-starter", 655 | "packageManager": "yarn@1.22.22", 656 | "dependencies": { 657 | "@langchain/community": "^0.2.31", 658 | "@langchain/core": "^0.2.31", 659 | "@langchain/langgraph": "^0.2.0", 660 | "@langchain/openai": "^0.2.8" 661 | } 662 | } 663 | ``` 664 | 665 | ---------------------------------------- 666 | 667 | TITLE: Execute LangGraph Notebooks Without Install Cells 668 | DESCRIPTION: This set of commands executes LangGraph notebooks while optionally commenting out `%pip install` cells. The `prepare_notebooks_for_ci.py` script, when run with the `--comment-install-cells` flag, modifies the notebooks before `execute_notebooks.sh` runs them, which is useful for environments where dependencies are already managed. 669 | 670 | SOURCE: https://github.com/langchain-ai/langgraph/blob/main/docs/README.md#_snippet_3 671 | 672 | LANGUAGE: bash 673 | CODE: 674 | ``` 675 | python _scripts/prepare_notebooks_for_ci.py --comment-install-cells 676 | ./_scripts/execute_notebooks.sh 677 | ``` 678 | 679 | ---------------------------------------- 680 | 681 | TITLE: Install LangGraph and LangSmith Dependencies 682 | DESCRIPTION: This section provides commands to install the necessary `langgraph` and `langsmith` packages. It covers installation for Python environments using `pip` and for JavaScript environments using various package managers like `npm`, `yarn`, `pnpm`, and `bun`. 683 | 684 | SOURCE: https://github.com/langchain-ai/langgraph/blob/main/docs/docs/tutorials/get-started/1-build-basic-chatbot.md#_snippet_0 685 | 686 | LANGUAGE: bash 687 | CODE: 688 | ``` 689 | pip install -U langgraph langsmith 690 | ``` 691 | 692 | LANGUAGE: bash 693 | CODE: 694 | ``` 695 | npm install @langchain/langgraph @langchain/core zod 696 | ``` 697 | 698 | LANGUAGE: bash 699 | CODE: 700 | ``` 701 | yarn add @langchain/langgraph @langchain/core zod 702 | ``` 703 | 704 | LANGUAGE: bash 705 | CODE: 706 | ``` 707 | pnpm add @langchain/langgraph @langchain/core zod 708 | ``` 709 | 710 | LANGUAGE: bash 711 | CODE: 712 | ``` 713 | bun add @langchain/langgraph @langchain/core zod 714 | ``` 715 | 716 | ---------------------------------------- 717 | 718 | TITLE: Install LangGraph Library for Python and JavaScript 719 | DESCRIPTION: Provides commands to install the LangGraph library. For Python environments, use `pip`, and for JavaScript/TypeScript projects, use `npm`. 720 | 721 | SOURCE: https://github.com/langchain-ai/langgraph/blob/main/docs/docs/how-tos/graph-api.md#_snippet_0 722 | 723 | LANGUAGE: bash 724 | CODE: 725 | ``` 726 | pip install -U langgraph 727 | ``` 728 | 729 | LANGUAGE: bash 730 | CODE: 731 | ``` 732 | npm install @langchain/langgraph 733 | ``` 734 | 735 | ---------------------------------------- 736 | 737 | TITLE: Initialize Graph State for `Command` Example in Python 738 | DESCRIPTION: Sets up the initial graph state definition using `TypedDict` for an end-to-end example demonstrating the `Command` object in LangGraph. This defines the structure of the state that nodes will interact with, ensuring type safety and clarity. 739 | 740 | SOURCE: https://github.com/langchain-ai/langgraph/blob/main/docs/docs/how-tos/graph-api.md#_snippet_75 741 | 742 | LANGUAGE: python 743 | CODE: 744 | ``` 745 | import random 746 | from typing_extensions import TypedDict, Literal 747 | from langgraph.graph import StateGraph, START 748 | from langgraph.types import Command 749 | 750 | # Define graph state 751 | class State(TypedDict): 752 | foo: str 753 | ``` 754 | 755 | ---------------------------------------- 756 | 757 | TITLE: Install LangGraph CLI 758 | DESCRIPTION: Instructions for installing the LangGraph CLI package using pip for Python or npm/npx for JavaScript, along with a prerequisite check for Docker. 759 | 760 | SOURCE: https://github.com/langchain-ai/langgraph/blob/main/docs/docs/cloud/reference/cli.md#_snippet_0 761 | 762 | LANGUAGE: Bash 763 | CODE: 764 | ``` 765 | pip install langgraph-cli 766 | ``` 767 | 768 | LANGUAGE: Bash 769 | CODE: 770 | ``` 771 | npx @langchain/langgraph-cli 772 | 773 | # Install globally, will be available as `langgraphjs` 774 | npm install -g @langchain/langgraph-cli 775 | ``` 776 | 777 | ---------------------------------------- 778 | 779 | TITLE: Serve LangGraph Documentation Locally 780 | DESCRIPTION: This `make` command initiates a local web server to host the LangGraph documentation. Once executed, the documentation becomes accessible in a web browser, typically at `http://127.0.0.1:8000/langgraph/`, allowing for real-time preview of changes. 781 | 782 | SOURCE: https://github.com/langchain-ai/langgraph/blob/main/docs/README.md#_snippet_1 783 | 784 | LANGUAGE: bash 785 | CODE: 786 | ``` 787 | make serve-docs 788 | ``` 789 | 790 | ---------------------------------------- 791 | 792 | TITLE: TypeScript LangGraph Postgres Checkpointer Setup 793 | DESCRIPTION: Shows the initial TypeScript setup for a LangGraph application with `PostgresSaver`. It includes importing necessary modules, configuring the database URI, and creating a checkpointer instance for state persistence. Note that `checkpointer.setup()` should be called once for database initialization. 794 | 795 | SOURCE: https://github.com/langchain-ai/langgraph/blob/main/docs/docs/how-tos/memory/add-memory.md#_snippet_6 796 | 797 | LANGUAGE: typescript 798 | CODE: 799 | ``` 800 | import { ChatAnthropic } from "@langchain/anthropic"; 801 | import { StateGraph, MessagesZodState, START } from "@langchain/langgraph"; 802 | import { PostgresSaver } from "@langchain/langgraph-checkpoint-postgres"; 803 | 804 | const model = new ChatAnthropic({ model: "claude-3-5-haiku-20241022" }); 805 | 806 | const DB_URI = "postgresql://postgres:postgres@localhost:5442/postgres?sslmode=disable"; 807 | const checkpointer = PostgresSaver.fromConnString(DB_URI); 808 | // await checkpointer.setup(); 809 | ``` 810 | 811 | ---------------------------------------- 812 | 813 | TITLE: Illustrate LangGraph Project Directory Structure 814 | DESCRIPTION: This example shows a recommended directory structure for a LangGraph project. It organizes project code within a `my_agent` directory, separating utilities, tools, nodes, and state definitions. This structure helps manage dependencies and maintain a clean codebase for your graph application. 815 | 816 | SOURCE: https://github.com/langchain-ai/langgraph/blob/main/docs/docs/cloud/deployment/setup.md#_snippet_6 817 | 818 | LANGUAGE: bash 819 | CODE: 820 | ``` 821 | my-app/ 822 | ├── my_agent # all project code lies within here 823 | │ ├── utils # utilities for your graph 824 | │ │ ├── __init__.py 825 | │ │ ├── tools.py # tools for your graph 826 | │ │ ├── nodes.py # node functions for you graph 827 | │ │ └── state.py # state definition of your graph 828 | │ ├── requirements.txt # package dependencies 829 | │ ├── __init__.py 830 | │ └── agent.py # code for constructing your graph 831 | └── .env # environment variables 832 | ``` 833 | 834 | ---------------------------------------- 835 | 836 | TITLE: Build and Serve LangGraph Documentation Locally 837 | DESCRIPTION: This `make` command compiles the LangGraph documentation and starts a local web server. It allows developers to preview their changes in a browser, verifying the appearance and functionality of the documentation before committing. 838 | 839 | SOURCE: https://github.com/langchain-ai/langgraph/blob/main/CONTRIBUTING.md#_snippet_2 840 | 841 | LANGUAGE: bash 842 | CODE: 843 | ``` 844 | make serve-docs 845 | ``` 846 | 847 | ---------------------------------------- 848 | 849 | TITLE: Install Required Python Packages 850 | DESCRIPTION: This command installs the necessary Python libraries, `autogen` and `langgraph`, using pip. These packages are essential for running the multi-agent integration examples provided in the guide. 851 | 852 | SOURCE: https://github.com/langchain-ai/langgraph/blob/main/docs/docs/how-tos/autogen-integration-functional.ipynb#_snippet_1 853 | 854 | LANGUAGE: python 855 | CODE: 856 | ``` 857 | %pip install autogen langgraph 858 | ``` 859 | 860 | ---------------------------------------- 861 | 862 | TITLE: Install local LangGraph application dependencies 863 | DESCRIPTION: Navigate into your new LangGraph application directory and install its required dependencies. The Python command installs in editable mode for local development. 864 | 865 | SOURCE: https://github.com/langchain-ai/langgraph/blob/main/docs/docs/tutorials/langgraph-platform/local-server.md#_snippet_2 866 | 867 | LANGUAGE: shell 868 | CODE: 869 | ``` 870 | cd path/to/your/app 871 | pip install -e . 872 | ``` 873 | 874 | LANGUAGE: shell 875 | CODE: 876 | ``` 877 | cd path/to/your/app 878 | npm install 879 | ``` 880 | 881 | ---------------------------------------- 882 | 883 | TITLE: Set up and start an Express.js server for MCP 884 | DESCRIPTION: This snippet demonstrates how to configure and start an Express.js server. It includes an example of handling an unknown tool error, defining a POST route for '/mcp' to establish an SSE connection using `SSEServerTransport`, and listening on a specified port. This setup is typical for a backend service interacting with the Model Context Protocol. 885 | 886 | SOURCE: https://github.com/langchain-ai/langgraph/blob/main/docs/docs/agents/mcp.md#_snippet_8 887 | 888 | LANGUAGE: JavaScript 889 | CODE: 890 | ``` 891 | throw new Error(`Unknown tool: ${request.params.name}`); 892 | } 893 | }); 894 | 895 | app.post("/mcp", async (req, res) => { 896 | const transport = new SSEServerTransport("/mcp", res); 897 | await server.connect(transport); 898 | }); 899 | 900 | const PORT = process.env.PORT || 8000; 901 | app.listen(PORT, () => { 902 | console.log(`Weather MCP server running on port ${PORT}`); 903 | }); 904 | ``` 905 | 906 | ---------------------------------------- 907 | 908 | TITLE: Install LangGraph CLI for Python and JavaScript 909 | DESCRIPTION: Install the LangGraph command-line interface. Python users need Python 3.11+ and can use `pip`, while JavaScript users can use `npx`. 910 | 911 | SOURCE: https://github.com/langchain-ai/langgraph/blob/main/docs/docs/tutorials/langgraph-platform/local-server.md#_snippet_0 912 | 913 | LANGUAGE: shell 914 | CODE: 915 | ``` 916 | pip install --upgrade "langgraph-cli[inmem]" 917 | ``` 918 | 919 | LANGUAGE: shell 920 | CODE: 921 | ``` 922 | npx @langchain/langgraph-cli 923 | ``` 924 | 925 | ---------------------------------------- 926 | 927 | TITLE: Example `.env` File for LangGraph Environment Variables 928 | DESCRIPTION: This example shows a `.env` file used to define environment variables for a LangGraph application. It includes generic variables like `MY_ENV_VAR_1`, `MY_ENV_VAR_2`, and a sensitive API key like `OPENAI_API_KEY`. 929 | 930 | SOURCE: https://github.com/langchain-ai/langgraph/blob/main/docs/docs/cloud/deployment/setup.md#_snippet_3 931 | 932 | LANGUAGE: text 933 | CODE: 934 | ``` 935 | MY_ENV_VAR_1=foo 936 | MY_ENV_VAR_2=bar 937 | OPENAI_API_KEY=key 938 | ``` 939 | 940 | ---------------------------------------- 941 | 942 | TITLE: Install required Python packages for LangGraph and LangChain 943 | DESCRIPTION: Installs `langgraph`, `langchain[openai]`, `langchain-community`, and `langchain-text-splitters` using pip. The `%%capture --no-stderr` magic command suppresses output, and `%pip install` is used for Jupyter/IPython environments. 944 | 945 | SOURCE: https://github.com/langchain-ai/langgraph/blob/main/docs/docs/tutorials/rag/langgraph_agentic_rag.md#_snippet_0 946 | 947 | LANGUAGE: python 948 | CODE: 949 | ``` 950 | %%capture --no-stderr 951 | %pip install -U --quiet langgraph "langchain[openai]" langchain-community langchain-text-splitters 952 | ``` 953 | 954 | ---------------------------------------- 955 | 956 | TITLE: Install LangGraph CLI 957 | DESCRIPTION: Instructions for installing the LangGraph command-line interface using pip, including a standard installation and a development mode installation with in-memory features. 958 | 959 | SOURCE: https://github.com/langchain-ai/langgraph/blob/main/libs/cli/README.md#_snippet_0 960 | 961 | LANGUAGE: bash 962 | CODE: 963 | ``` 964 | pip install langgraph-cli 965 | ``` 966 | 967 | LANGUAGE: bash 968 | CODE: 969 | ``` 970 | pip install "langgraph-cli[inmem]" 971 | ``` 972 | 973 | ---------------------------------------- 974 | 975 | TITLE: Configure LLM parameters for LangGraph agents 976 | DESCRIPTION: This snippet demonstrates how to configure specific parameters, such as temperature, for the Language Model (LLM) used by a LangGraph agent. It shows examples for both Python (using `init_chat_model` from `langchain.chat_models`) and JavaScript/TypeScript (using a model instance like `ChatAnthropic`). The configured model is then passed to the agent creation function. 977 | 978 | SOURCE: https://github.com/langchain-ai/langgraph/blob/main/docs/docs/agents/agents.md#_snippet_2 979 | 980 | LANGUAGE: python 981 | CODE: 982 | ``` 983 | from langchain.chat_models import init_chat_model 984 | from langgraph.prebuilt import create_react_agent 985 | 986 | model = init_chat_model( 987 | "anthropic:claude-3-7-sonnet-latest", 988 | temperature=0 989 | ) 990 | 991 | agent = create_react_agent( 992 | model=model, 993 | tools=[get_weather], 994 | ) 995 | ``` 996 | 997 | LANGUAGE: typescript 998 | CODE: 999 | ``` 1000 | import { ChatAnthropic } from "@langchain/anthropic"; 1001 | import { createReactAgent } from "@langchain/langgraph/prebuilt"; 1002 | 1003 | const model = new ChatAnthropic({ 1004 | model: "claude-3-5-sonnet-latest", 1005 | temperature: 0, 1006 | }); 1007 | 1008 | const agent = createReactAgent({ 1009 | llm: model, 1010 | tools: [getWeather], 1011 | }); 1012 | ``` 1013 | 1014 | ---------------------------------------- --------------------------------------------------------------------------------