├── .codespellignore ├── static ├── config.png ├── studio.png ├── overview.png └── studio_example.png ├── langgraph.json ├── .gitignore ├── .env.example ├── jest.config.js ├── tests ├── configuration.test.ts ├── agent.test.ts └── agent.int.test.ts ├── tsconfig.json ├── src └── enrichment_agent │ ├── prompts.ts │ ├── utils.ts │ ├── configuration.ts │ ├── state.ts │ ├── tools.ts │ └── graph.ts ├── LICENSE ├── .github └── workflows │ ├── integration-tests.yml │ └── unit-tests.yml ├── .eslintrc.cjs ├── package.json ├── scripts └── checkLanggraphPaths.js └── README.md /.codespellignore: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /static/config.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/langchain-ai/data-enrichment-js/HEAD/static/config.png -------------------------------------------------------------------------------- /static/studio.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/langchain-ai/data-enrichment-js/HEAD/static/studio.png -------------------------------------------------------------------------------- /static/overview.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/langchain-ai/data-enrichment-js/HEAD/static/overview.png -------------------------------------------------------------------------------- /static/studio_example.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/langchain-ai/data-enrichment-js/HEAD/static/studio_example.png -------------------------------------------------------------------------------- /langgraph.json: -------------------------------------------------------------------------------- 1 | { 2 | "node_version": "20", 3 | "dockerfile_lines": [], 4 | "dependencies": ["."], 5 | "graphs": { 6 | "agent": "./src/enrichment_agent/graph.ts:graph" 7 | }, 8 | "env": ".env" 9 | } 10 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | index.cjs 2 | index.js 3 | index.d.ts 4 | node_modules 5 | dist 6 | .yarn/* 7 | !.yarn/patches 8 | !.yarn/plugins 9 | !.yarn/releases 10 | !.yarn/sdks 11 | !.yarn/versions 12 | 13 | .turbo 14 | **/.turbo 15 | **/.eslintcache 16 | 17 | .env 18 | .ipynb_checkpoints 19 | 20 | -------------------------------------------------------------------------------- /.env.example: -------------------------------------------------------------------------------- 1 | TAVILY_API_KEY=... 2 | 3 | # To separate your traces from other application 4 | LANGCHAIN_PROJECT=data-enrichment 5 | # LANGCHAIN_API_KEY=... 6 | # LANGCHAIN_TRACING_V2=true 7 | 8 | # The following depend on your selected configuration 9 | 10 | ## LLM choice: 11 | ANTHROPIC_API_KEY=.... 12 | FIREWORKS_API_KEY=... 13 | OPENAI_API_KEY=... 14 | -------------------------------------------------------------------------------- /jest.config.js: -------------------------------------------------------------------------------- 1 | export default { 2 | preset: "ts-jest/presets/default-esm", 3 | moduleNameMapper: { 4 | "^(\\.{1,2}/.*)\\.js$": "$1", 5 | }, 6 | transform: { 7 | "^.+\\.tsx?$": [ 8 | "ts-jest", 9 | { 10 | useESM: true, 11 | }, 12 | ], 13 | }, 14 | extensionsToTreatAsEsm: [".ts"], 15 | setupFiles: ["dotenv/config"], 16 | passWithNoTests: true, 17 | testTimeout: 20_000, 18 | }; 19 | -------------------------------------------------------------------------------- /tests/configuration.test.ts: -------------------------------------------------------------------------------- 1 | import { describe, it, expect } from "@jest/globals"; 2 | import { ensureConfiguration } from "../src/enrichment_agent/configuration.js"; 3 | 4 | describe("Configuration", () => { 5 | it("should initialize configuration from an empty object", () => { 6 | const emptyConfig = {}; 7 | const result = ensureConfiguration(emptyConfig); 8 | expect(result).toBeDefined(); 9 | expect(typeof result).toBe("object"); 10 | }); 11 | }); 12 | -------------------------------------------------------------------------------- /tests/agent.test.ts: -------------------------------------------------------------------------------- 1 | import { describe, it, expect } from "@jest/globals"; 2 | import { graph } from "../src/enrichment_agent/graph.js"; 3 | 4 | describe("Web Research Agent", () => { 5 | beforeAll(() => { 6 | process.env.TAVILY_API_KEY = "dummy"; 7 | }); 8 | 9 | it("should initialize and compile the graph", () => { 10 | expect(graph).toBeDefined(); 11 | expect(graph.name).toBe("ResearchTopic"); 12 | }); 13 | 14 | // TODO: Add more test cases for individual nodes, routing logic, tool integration, and output validation 15 | }); 16 | -------------------------------------------------------------------------------- /tsconfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "extends": "@tsconfig/recommended", 3 | "compilerOptions": { 4 | "target": "ES2021", 5 | "lib": ["ES2021", "ES2022.Object", "DOM"], 6 | "module": "NodeNext", 7 | "moduleResolution": "nodenext", 8 | "esModuleInterop": true, 9 | "declaration": true, 10 | "noImplicitReturns": true, 11 | "noFallthroughCasesInSwitch": true, 12 | "noUnusedLocals": true, 13 | "noUnusedParameters": true, 14 | "useDefineForClassFields": true, 15 | "strictPropertyInitialization": false, 16 | "allowJs": true, 17 | "strict": true, 18 | "outDir": "dist", 19 | "types": ["jest", "node"], 20 | "resolveJsonModule": true 21 | }, 22 | "include": ["**/*.ts", "**/*.js"], 23 | "exclude": ["node_modules", "dist"] 24 | } 25 | -------------------------------------------------------------------------------- /src/enrichment_agent/prompts.ts: -------------------------------------------------------------------------------- 1 | /** 2 | * Main prompt template for the AI agent. 3 | * This prompt guides the AI in conducting the research and using the available tools. 4 | */ 5 | export const MAIN_PROMPT = `You are doing web research on behalf of a user. You are trying to figure out this information: 6 | 7 | 8 | {info} 9 | 10 | 11 | You have access to the following tools: 12 | 13 | - \`Search\`: call a search tool and get back some results 14 | - \`ScrapeWebsite\`: scrape a website and get relevant notes about the given request. This will update the notes above. 15 | - \`Info\`: call this when you are done and have gathered all the relevant info 16 | 17 | Here is the information you have about the topic you are researching: 18 | 19 | Topic: {topic}`; 20 | 21 | export const INFO_PROMPT = `You are doing web research on behalf of a user. You are trying to find out this information: 22 | 23 | 24 | {info} 25 | 26 | 27 | You just scraped the following website: {url} 28 | 29 | Based on the website content below, jot down some notes about the website. 30 | 31 | {content}`; 32 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2024 LangChain 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /.github/workflows/integration-tests.yml: -------------------------------------------------------------------------------- 1 | # This workflow will run integration tests for the current project once per day 2 | 3 | name: Integration Tests 4 | 5 | on: 6 | schedule: 7 | - cron: "37 14 * * *" # Run at 7:37 AM Pacific Time (14:37 UTC) every day 8 | workflow_dispatch: # Allows triggering the workflow manually in GitHub UI 9 | 10 | # If another scheduled run starts while this workflow is still running, 11 | # cancel the earlier run in favor of the next run. 12 | concurrency: 13 | group: ${{ github.workflow }}-${{ github.ref }} 14 | cancel-in-progress: true 15 | 16 | jobs: 17 | integration-tests: 18 | name: Integration Tests 19 | strategy: 20 | matrix: 21 | os: [ubuntu-latest] 22 | node-version: [20.x] 23 | runs-on: ${{ matrix.os }} 24 | steps: 25 | - uses: actions/checkout@v4 26 | - name: Use Node.js ${{ matrix.node-version }} 27 | uses: actions/setup-node@v3 28 | with: 29 | node-version: ${{ matrix.node-version }} 30 | cache: "yarn" 31 | - name: Install dependencies 32 | run: yarn install --immutable 33 | - name: Build project 34 | run: yarn build 35 | - name: Run integration tests 36 | env: 37 | ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }} 38 | TAVILY_API_KEY: ${{ secrets.TAVILY_API_KEY }} 39 | run: yarn test:int 40 | -------------------------------------------------------------------------------- /.github/workflows/unit-tests.yml: -------------------------------------------------------------------------------- 1 | # This workflow will run unit tests for the current project 2 | 3 | name: CI 4 | 5 | on: 6 | push: 7 | branches: ["main"] 8 | pull_request: 9 | workflow_dispatch: # Allows triggering the workflow manually in GitHub UI 10 | 11 | # If another push to the same PR or branch happens while this workflow is still running, 12 | # cancel the earlier run in favor of the next run. 13 | concurrency: 14 | group: ${{ github.workflow }}-${{ github.ref }} 15 | cancel-in-progress: true 16 | 17 | jobs: 18 | unit-tests: 19 | name: Unit Tests 20 | strategy: 21 | matrix: 22 | os: [ubuntu-latest] 23 | node-version: [18.x, 20.x] 24 | runs-on: ${{ matrix.os }} 25 | steps: 26 | - uses: actions/checkout@v4 27 | - name: Use Node.js ${{ matrix.node-version }} 28 | uses: actions/setup-node@v3 29 | with: 30 | node-version: ${{ matrix.node-version }} 31 | cache: "yarn" 32 | - name: Install dependencies 33 | run: yarn install --immutable 34 | - name: Build project 35 | run: yarn build 36 | 37 | - name: Lint project 38 | run: yarn lint:all 39 | 40 | - name: Check README spelling 41 | uses: codespell-project/actions-codespell@v2 42 | with: 43 | ignore_words_file: .codespellignore 44 | path: README.md 45 | 46 | - name: Check code spelling 47 | uses: codespell-project/actions-codespell@v2 48 | with: 49 | ignore_words_file: .codespellignore 50 | path: src/ 51 | 52 | - name: Run tests 53 | env: 54 | ANTHROPIC_API_KEY: afakekey 55 | TAVILY_API_KEY: anotherfakekey 56 | run: yarn test 57 | -------------------------------------------------------------------------------- /src/enrichment_agent/utils.ts: -------------------------------------------------------------------------------- 1 | import { BaseChatModel } from "@langchain/core/language_models/chat_models"; 2 | import { 3 | MessageContent, 4 | MessageContentComplex, 5 | } from "@langchain/core/messages"; 6 | import { initChatModel } from "langchain/chat_models/universal"; 7 | 8 | /** 9 | * Helper function to extract text content from a complex message. 10 | * 11 | * @param content - The complex message content to process 12 | * @returns The extracted text content 13 | */ 14 | function getSingleTextContent(content: MessageContentComplex) { 15 | if (content?.type === "text") { 16 | return content.text; 17 | } else if (content.type === "array") { 18 | return content.content.map(getSingleTextContent).join(" "); 19 | } 20 | return ""; 21 | } 22 | 23 | /** 24 | * Helper function to extract text content from various message types. 25 | * 26 | * @param content - The message content to process 27 | * @returns The extracted text content 28 | */ 29 | export function getTextContent(content: MessageContent): string { 30 | if (typeof content === "string") { 31 | return content; 32 | } else if (Array.isArray(content)) { 33 | return content.map(getSingleTextContent).join(" "); 34 | } 35 | return ""; 36 | } 37 | 38 | /** 39 | * Load a chat model from a fully specified name. 40 | * @param fullySpecifiedName - String in the format 'provider/model' or 'provider/account/provider/model'. 41 | * @returns A Promise that resolves to a BaseChatModel instance. 42 | */ 43 | export async function loadChatModel( 44 | fullySpecifiedName: string, 45 | ): Promise { 46 | const index = fullySpecifiedName.indexOf("/"); 47 | if (index === -1) { 48 | // If there's no "/", assume it's just the model 49 | return await initChatModel(fullySpecifiedName); 50 | } else { 51 | const provider = fullySpecifiedName.slice(0, index); 52 | const model = fullySpecifiedName.slice(index + 1); 53 | return await initChatModel(model, { modelProvider: provider }); 54 | } 55 | } 56 | -------------------------------------------------------------------------------- /.eslintrc.cjs: -------------------------------------------------------------------------------- 1 | module.exports = { 2 | extends: [ 3 | "eslint:recommended", 4 | "prettier", 5 | "plugin:@typescript-eslint/recommended", 6 | ], 7 | parserOptions: { 8 | ecmaVersion: 12, 9 | parser: "@typescript-eslint/parser", 10 | project: "./tsconfig.json", 11 | sourceType: "module", 12 | }, 13 | plugins: ["import", "@typescript-eslint", "no-instanceof"], 14 | ignorePatterns: [ 15 | ".eslintrc.cjs", 16 | "scripts", 17 | "node_modules", 18 | "dist", 19 | "dist-cjs", 20 | "*.js", 21 | "*.cjs", 22 | "*.d.ts", 23 | ], 24 | rules: { 25 | "no-process-env": 0, 26 | "no-instanceof/no-instanceof": 2, 27 | "@typescript-eslint/explicit-module-boundary-types": 0, 28 | "@typescript-eslint/no-empty-function": 0, 29 | "@typescript-eslint/no-non-null-assertion": 0, 30 | "@typescript-eslint/no-shadow": 0, 31 | "@typescript-eslint/no-empty-interface": 0, 32 | "@typescript-eslint/no-use-before-define": ["error", "nofunc"], 33 | "@typescript-eslint/no-unused-vars": ["warn", { args: "none" }], 34 | "@typescript-eslint/no-floating-promises": "error", 35 | "@typescript-eslint/no-misused-promises": "error", 36 | camelcase: 0, 37 | "class-methods-use-this": 0, 38 | "import/extensions": [2, "ignorePackages"], 39 | "import/no-extraneous-dependencies": [ 40 | "error", 41 | { devDependencies: ["**/*.test.ts"] }, 42 | ], 43 | "import/no-unresolved": 0, 44 | "import/prefer-default-export": 0, 45 | "keyword-spacing": "error", 46 | "max-classes-per-file": 0, 47 | "max-len": 0, 48 | "no-await-in-loop": 0, 49 | "no-bitwise": 0, 50 | "no-console": 0, 51 | "no-restricted-syntax": 0, 52 | "no-shadow": 0, 53 | "no-continue": 0, 54 | "no-underscore-dangle": 0, 55 | "no-use-before-define": 0, 56 | "no-useless-constructor": 0, 57 | "no-return-await": 0, 58 | "consistent-return": 0, 59 | "no-else-return": 0, 60 | "new-cap": ["error", { properties: false, capIsNew: false }], 61 | }, 62 | }; 63 | -------------------------------------------------------------------------------- /src/enrichment_agent/configuration.ts: -------------------------------------------------------------------------------- 1 | /** 2 | * Define the configurable parameters for the agent. 3 | */ 4 | 5 | import { RunnableConfig } from "@langchain/core/runnables"; 6 | import { Annotation } from "@langchain/langgraph"; 7 | import { MAIN_PROMPT } from "./prompts.js"; 8 | 9 | /** 10 | * The complete configuration for the agent. 11 | */ 12 | export const ConfigurationAnnotation = Annotation.Root({ 13 | /** 14 | * The name of the language model to use for the agent. 15 | * 16 | * Should be in the form: provider/model-name. 17 | */ 18 | model: Annotation, 19 | 20 | /** 21 | * The main prompt template to use for the agent's interactions. 22 | * 23 | * Expects two template literals: ${info} and ${topic}. 24 | */ 25 | prompt: Annotation, 26 | 27 | /** 28 | * The maximum number of search results to return for each search query. 29 | */ 30 | maxSearchResults: Annotation, 31 | 32 | /** 33 | * The maximum number of times the Info tool can be called during a single interaction. 34 | */ 35 | maxInfoToolCalls: Annotation, 36 | 37 | /** 38 | * The maximum number of interaction loops allowed before the agent terminates. 39 | */ 40 | maxLoops: Annotation, 41 | }); 42 | 43 | /** 44 | * Create a typeof ConfigurationAnnotation.State instance from a RunnableConfig object. 45 | * 46 | * @param config - The configuration object to use. 47 | * @returns An instance of typeof ConfigurationAnnotation.State with the specified configuration. 48 | */ 49 | export function ensureConfiguration( 50 | config?: RunnableConfig, 51 | ): typeof ConfigurationAnnotation.State { 52 | const configurable = (config?.configurable ?? {}) as Partial< 53 | typeof ConfigurationAnnotation.State 54 | >; 55 | 56 | return { 57 | model: configurable.model ?? "anthropic/claude-3-5-sonnet-20240620", 58 | prompt: configurable.prompt ?? MAIN_PROMPT, 59 | maxSearchResults: configurable.maxSearchResults ?? 5, 60 | maxInfoToolCalls: configurable.maxInfoToolCalls ?? 3, 61 | maxLoops: configurable.maxLoops ?? 6, 62 | }; 63 | } 64 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "data-enrichment", 3 | "version": "0.0.1", 4 | "description": "A starter template for building a research agent that uses a web search tool to populate a user-provided schema.", 5 | "main": "src/enrichment_agent/graph.ts", 6 | "author": "William Fu-Hinthorn", 7 | "license": "MIT", 8 | "private": true, 9 | "type": "module", 10 | "packageManager": "yarn@1.22.22", 11 | "scripts": { 12 | "build": "tsc", 13 | "clean": "rm -rf dist", 14 | "test": "node --experimental-vm-modules node_modules/jest/bin/jest.js --testPathPattern=\\.test\\.ts$ --testPathIgnorePatterns=\\.int\\.test\\.ts$", 15 | "test:int": "node --experimental-vm-modules node_modules/jest/bin/jest.js --testPathPattern=\\.int\\.test\\.ts$", 16 | "format": "prettier --write .", 17 | "lint": "eslint src", 18 | "format:check": "prettier --check .", 19 | "lint:langgraph-json": "node scripts/checkLanggraphPaths.js", 20 | "lint:all": "yarn lint & yarn lint:langgraph-json & yarn format:check", 21 | "test:all": "yarn test && yarn test:int && yarn lint:langgraph" 22 | }, 23 | "dependencies": { 24 | "@langchain/anthropic": "^0.3.21", 25 | "@langchain/community": "^0.3.45", 26 | "@langchain/core": "^0.3.57", 27 | "@langchain/langgraph": "^0.3.0", 28 | "langchain": "^0.3.27", 29 | "langsmith": "^0.3.30", 30 | "ts-node": "^10.9.2", 31 | "zod": "^3.23.8" 32 | }, 33 | "devDependencies": { 34 | "@eslint/eslintrc": "^3.1.0", 35 | "@eslint/js": "^9.9.1", 36 | "@jest/globals": "^29.7.0", 37 | "@tsconfig/recommended": "^1.0.7", 38 | "@types/jest": "^29.5.0", 39 | "@types/node": "^20.14.8", 40 | "@typescript-eslint/eslint-plugin": "^5.59.8", 41 | "@typescript-eslint/parser": "^5.59.8", 42 | "dotenv": "^16.4.5", 43 | "eslint": "^8.41.0", 44 | "eslint-config-prettier": "^8.8.0", 45 | "eslint-plugin-import": "^2.27.5", 46 | "eslint-plugin-no-instanceof": "^1.0.1", 47 | "eslint-plugin-prettier": "^4.2.1", 48 | "jest": "^29.7.0", 49 | "prettier": "^3.3.3", 50 | "ts-jest": "^29.1.0", 51 | "typescript": "^5.3.3" 52 | } 53 | } 54 | -------------------------------------------------------------------------------- /scripts/checkLanggraphPaths.js: -------------------------------------------------------------------------------- 1 | import fs from "fs"; 2 | import path from "path"; 3 | import { fileURLToPath } from "url"; 4 | 5 | // Function to check if a file exists 6 | function fileExists(filePath) { 7 | return fs.existsSync(filePath); 8 | } 9 | 10 | // Function to check if an object is exported from a file 11 | function isObjectExported(filePath, objectName) { 12 | try { 13 | const fileContent = fs.readFileSync(filePath, "utf8"); 14 | const exportRegex = new RegExp( 15 | `export\\s+(?:const|let|var)\\s+${objectName}\\s*=|export\\s+\\{[^}]*\\b${objectName}\\b[^}]*\\}`, 16 | ); 17 | return exportRegex.test(fileContent); 18 | } catch (error) { 19 | console.error(`Error reading file ${filePath}: ${error.message}`); 20 | return false; 21 | } 22 | } 23 | 24 | // Main function to check langgraph.json 25 | function checkLanggraphPaths() { 26 | const __filename = fileURLToPath(import.meta.url); 27 | const __dirname = path.dirname(__filename); 28 | const langgraphPath = path.join(__dirname, "..", "langgraph.json"); 29 | 30 | if (!fileExists(langgraphPath)) { 31 | console.error("langgraph.json not found in the root directory"); 32 | process.exit(1); 33 | } 34 | 35 | try { 36 | const langgraphContent = JSON.parse(fs.readFileSync(langgraphPath, "utf8")); 37 | const graphs = langgraphContent.graphs; 38 | 39 | if (!graphs || typeof graphs !== "object") { 40 | console.error('Invalid or missing "graphs" object in langgraph.json'); 41 | process.exit(1); 42 | } 43 | 44 | let hasError = false; 45 | 46 | for (const [key, value] of Object.entries(graphs)) { 47 | const [filePath, objectName] = value.split(":"); 48 | const fullPath = path.join(__dirname, "..", filePath); 49 | 50 | if (!fileExists(fullPath)) { 51 | console.error(`File not found: ${fullPath}`); 52 | hasError = true; 53 | continue; 54 | } 55 | 56 | if (!isObjectExported(fullPath, objectName)) { 57 | console.error( 58 | `Object "${objectName}" is not exported from ${fullPath}`, 59 | ); 60 | hasError = true; 61 | } 62 | } 63 | 64 | if (hasError) { 65 | process.exit(1); 66 | } else { 67 | console.log( 68 | "All paths in langgraph.json are valid and objects are exported correctly.", 69 | ); 70 | } 71 | } catch (error) { 72 | console.error(`Error parsing langgraph.json: ${error.message}`); 73 | process.exit(1); 74 | } 75 | } 76 | 77 | checkLanggraphPaths(); 78 | -------------------------------------------------------------------------------- /src/enrichment_agent/state.ts: -------------------------------------------------------------------------------- 1 | import { Annotation, messagesStateReducer } from "@langchain/langgraph"; 2 | import { type BaseMessage } from "@langchain/core/messages"; 3 | 4 | // eslint-disable-next-line 5 | export type AnyRecord = Record; 6 | 7 | export const InputStateAnnotation = Annotation.Root({ 8 | topic: Annotation, 9 | /** 10 | * The info state trackes the current extracted data for the given topic, 11 | * conforming to the provided schema. 12 | */ 13 | info: Annotation, 14 | /** 15 | * The schema defines the information the agent is tasked with filling out. 16 | */ 17 | extractionSchema: Annotation, 18 | // Feel free to add additional attributes to your state as needed. 19 | // Common examples include retrieved documents, extracted entities, API connections, etc. 20 | }); 21 | /** 22 | * A graph's StateAnnotation defines three main thing: 23 | * 1. The structure of the data to be passed between nodes (which "channels" to read from/write to and their types) 24 | * 2. Default values each field 25 | * 3. Rducers for the state's. Reducers are functions that determine how to apply updates to the state. 26 | * See [Reducers](https://langchain-ai.github.io/langgraphjs/concepts/low_level/#reducers) for more information. 27 | */ 28 | 29 | export const StateAnnotation = Annotation.Root({ 30 | /** 31 | * Messages track the primary execution state of the agent. 32 | * 33 | * Typically accumulates a pattern of: 34 | * 35 | * 1. HumanMessage - user input 36 | * 2. AIMessage with .tool_calls - agent picking tool(s) to use to collect 37 | * information 38 | * 3. ToolMessage(s) - the responses (or errors) from the executed tools 39 | * 40 | * (... repeat steps 2 and 3 as needed ...) 41 | * 4. AIMessage without .tool_calls - agent responding in unstructured 42 | * format to the user. 43 | * 44 | * 5. HumanMessage - user responds with the next conversational turn. 45 | * 46 | * (... repeat steps 2-5 as needed ... ) 47 | * 48 | * Merges two lists of messages, updating existing messages by ID. 49 | * 50 | * By default, this ensures the state is "append-only", unless the 51 | * new message has the same ID as an existing message. 52 | * 53 | * Returns: 54 | * A new list of messages with the messages from \`right\` merged into \`left\`. 55 | * If a message in \`right\` has the same ID as a message in \`left\`, the 56 | * message from \`right\` will replace the message from \`left\`.` 57 | */ 58 | messages: Annotation({ 59 | reducer: messagesStateReducer, 60 | default: () => [], 61 | }), 62 | 63 | topic: Annotation, 64 | /** 65 | * The info state trackes the current extracted data for the given topic, 66 | * conforming to the provided schema. 67 | */ 68 | info: Annotation, 69 | 70 | /** 71 | * The schema defines the information the agent is tasked with filling out. 72 | */ 73 | extractionSchema: Annotation, 74 | 75 | /** 76 | * Tracks the number of iterations the agent has gone through in the current session. 77 | * This can be used to limit the number of iterations or to track progress. 78 | */ 79 | loopStep: Annotation({ 80 | reducer: (left: number, right: number) => left + right, 81 | default: () => 0, 82 | }), 83 | // Feel free to add additional attributes to your state as needed. 84 | // Common examples include retrieved documents, extracted entities, API connections, etc. 85 | }); 86 | -------------------------------------------------------------------------------- /tests/agent.int.test.ts: -------------------------------------------------------------------------------- 1 | import { describe, it, expect } from "@jest/globals"; 2 | import { graph } from "../src/enrichment_agent/graph.js"; 3 | 4 | describe("Researcher", () => { 5 | it("should initialize and compile the graph", () => { 6 | expect(graph).toBeDefined(); 7 | expect(graph.name).toBe("ResearchTopic"); 8 | }); 9 | 10 | const extractionSchema = { 11 | type: "object", 12 | properties: { 13 | founder: { 14 | type: "string", 15 | description: "The name of the company founder.", 16 | }, 17 | websiteUrl: { 18 | type: "string", 19 | description: 20 | "Website URL of the company, e.g.: https://openai.com/, or https://microsoft.com", 21 | }, 22 | products_sold: { 23 | type: "array", 24 | items: { type: "string" }, 25 | description: "A list of products sold by the company.", 26 | }, 27 | }, 28 | required: ["founder", "websiteUrl", "products_sold"], 29 | }; 30 | 31 | it("Simple runthrough", async () => { 32 | const res = await graph.invoke({ 33 | topic: "LangChain", 34 | extractionSchema: extractionSchema, 35 | }); 36 | 37 | expect(res.info).toBeDefined(); 38 | expect(res.info.founder.toLowerCase()).toContain("harrison"); 39 | }, 100_000); 40 | 41 | const arrayExtractionSchema = { 42 | type: "object", 43 | properties: { 44 | providers: { 45 | type: "array", 46 | items: { 47 | type: "object", 48 | properties: { 49 | name: { type: "string", description: "Company name" }, 50 | technology_summary: { 51 | type: "string", 52 | description: 53 | "Brief summary of their chip technology for LLM training", 54 | }, 55 | current_market_share: { 56 | type: "string", 57 | description: 58 | "Estimated current market share percentage or position", 59 | }, 60 | future_outlook: { 61 | type: "string", 62 | description: 63 | "Brief paragraph on future prospects and developments", 64 | }, 65 | }, 66 | required: [ 67 | "name", 68 | "technology_summary", 69 | "current_market_share", 70 | "future_outlook", 71 | ], 72 | }, 73 | description: "List of top chip providers for LLM Training", 74 | }, 75 | overall_market_trends: { 76 | type: "string", 77 | description: "Brief paragraph on general trends in the LLM chip market", 78 | }, 79 | }, 80 | required: ["providers", "overall_market_trends"], 81 | }; 82 | 83 | it("Researcher list type", async () => { 84 | const res = await graph.invoke({ 85 | topic: "Top 5 chip providers for LLM training", 86 | extractionSchema: arrayExtractionSchema, 87 | }); 88 | 89 | const info = res.info; 90 | expect(info.providers).toBeDefined(); 91 | expect(Array.isArray(info.providers)).toBe(true); 92 | expect(info.providers.length).toBe(5); 93 | 94 | const nvidiaPresent = info.providers.some( 95 | (provider: { name: string }) => 96 | provider.name.toLowerCase().trim() === "nvidia", 97 | ); 98 | expect(nvidiaPresent).toBe(true); 99 | 100 | info.providers.forEach( 101 | (provider: { 102 | name: any; 103 | technology_summary: any; 104 | current_market_share: any; 105 | future_outlook: any; 106 | }) => { 107 | expect(provider.name).toBeDefined(); 108 | expect(provider.technology_summary).toBeDefined(); 109 | expect(provider.current_market_share).toBeDefined(); 110 | expect(provider.future_outlook).toBeDefined(); 111 | }, 112 | ); 113 | 114 | expect(info.overall_market_trends).toBeDefined(); 115 | expect(typeof info.overall_market_trends).toBe("string"); 116 | expect(info.overall_market_trends.length).toBeGreaterThan(0); 117 | }, 100_000); 118 | }); 119 | -------------------------------------------------------------------------------- /src/enrichment_agent/tools.ts: -------------------------------------------------------------------------------- 1 | /** 2 | * Tools for data enrichment. 3 | * 4 | * This module contains functions that are directly exposed to the LLM as tools. 5 | * These tools can be used for tasks such as web searching and scraping. 6 | * Users can edit and extend these tools as needed. 7 | */ 8 | import { TavilySearchResults } from "@langchain/community/tools/tavily_search"; 9 | import { RunnableConfig } from "@langchain/core/runnables"; 10 | import { tool } from "@langchain/core/tools"; 11 | 12 | import { INFO_PROMPT } from "./prompts.js"; 13 | import { ensureConfiguration } from "./configuration.js"; 14 | import { StateAnnotation } from "./state.js"; 15 | import { getTextContent, loadChatModel } from "./utils.js"; 16 | import { 17 | AIMessage, 18 | isBaseMessage, 19 | ToolMessage, 20 | } from "@langchain/core/messages"; 21 | import { z } from "zod"; 22 | 23 | /** 24 | * Initialize tools within a function so that they have access to the current 25 | * state and config at runtime. 26 | */ 27 | function initializeTools( 28 | state?: typeof StateAnnotation.State, 29 | config?: RunnableConfig, 30 | ) { 31 | /** 32 | * Search for general results. 33 | * 34 | * This function performs a search using the Tavily search engine, which is designed 35 | * to provide comprehensive, accurate, and trusted results. It's particularly useful 36 | * for answering questions about current events. 37 | */ 38 | const configuration = ensureConfiguration(config); 39 | const searchTool = new TavilySearchResults({ 40 | maxResults: configuration.maxSearchResults, 41 | }); 42 | 43 | async function scrapeWebsite({ url }: { url: string }): Promise { 44 | /** 45 | * Scrape and summarize content from a given URL. 46 | */ 47 | const response = await fetch(url); 48 | const content = await response.text(); 49 | const truncatedContent = content.slice(0, 50000); 50 | const p = INFO_PROMPT.replace( 51 | "{info}", 52 | JSON.stringify(state?.extractionSchema, null, 2), 53 | ) 54 | .replace("{url}", url) 55 | .replace("{content}", truncatedContent); 56 | 57 | const rawModel = await loadChatModel(configuration.model); 58 | const result = await rawModel.invoke(p); 59 | return getTextContent(result.content); 60 | } 61 | 62 | const scraperTool = tool(scrapeWebsite, { 63 | name: "scrapeWebsite", 64 | description: "Scrape content from a given website URL", 65 | schema: z.object({ 66 | url: z.string().url().describe("The URL of the website to scrape"), 67 | }), 68 | }); 69 | 70 | return [searchTool, scraperTool]; 71 | } 72 | 73 | export const toolNode = async ( 74 | state: typeof StateAnnotation.State, 75 | config: RunnableConfig, 76 | ) => { 77 | const message = state.messages[state.messages.length - 1]; 78 | // Initialize the tools within the context of the node so that the tools 79 | // have the current state of the graph and the config in scope. 80 | // See: https://js.langchain.com/docs/how_to/tool_runtime 81 | const tools = initializeTools(state, config); 82 | const outputs = await Promise.all( 83 | (message as AIMessage).tool_calls?.map(async (call) => { 84 | const tool = tools.find((tool) => tool.name === call.name); 85 | try { 86 | if (tool === undefined) { 87 | throw new Error(`Tool "${call.name}" not found.`); 88 | } 89 | const newCall = { 90 | ...call, 91 | args: { 92 | __state: state, 93 | ...call.args, 94 | }, 95 | }; 96 | const output = await tool.invoke( 97 | { ...newCall, type: "tool_call" }, 98 | config, 99 | ); 100 | if (isBaseMessage(output) && output._getType() === "tool") { 101 | return output; 102 | } else { 103 | return new ToolMessage({ 104 | name: tool.name, 105 | content: 106 | typeof output === "string" ? output : JSON.stringify(output), 107 | tool_call_id: call.id ?? "", 108 | }); 109 | } 110 | // eslint-disable-next-line @typescript-eslint/no-explicit-any 111 | } catch (e: any) { 112 | return new ToolMessage({ 113 | content: `Error: ${e.message}\n Please fix your mistakes.`, 114 | name: call.name, 115 | tool_call_id: call.id ?? "", 116 | status: "error", 117 | }); 118 | } 119 | }) ?? [], 120 | ); 121 | 122 | return { messages: outputs }; 123 | }; 124 | 125 | // No state or config required here since these are just bound to the chat model 126 | // and are only used to define schema. 127 | // The tool node above will actually call the functions. 128 | export const MODEL_TOOLS = initializeTools(); 129 | -------------------------------------------------------------------------------- /src/enrichment_agent/graph.ts: -------------------------------------------------------------------------------- 1 | /** 2 | * Define a data enrichment agent. 3 | * 4 | * Works with a chat model with tool calling support. 5 | */ 6 | 7 | import { 8 | AIMessage, 9 | BaseMessage, 10 | HumanMessage, 11 | ToolMessage, 12 | } from "@langchain/core/messages"; 13 | import { RunnableConfig } from "@langchain/core/runnables"; 14 | import { tool } from "@langchain/core/tools"; 15 | import { StateGraph } from "@langchain/langgraph"; 16 | import { z } from "zod"; 17 | 18 | import { 19 | ConfigurationAnnotation, 20 | ensureConfiguration, 21 | } from "./configuration.js"; 22 | import { AnyRecord, InputStateAnnotation, StateAnnotation } from "./state.js"; 23 | import { MODEL_TOOLS, toolNode } from "./tools.js"; 24 | import { loadChatModel } from "./utils.js"; 25 | 26 | /** 27 | * Calls the primary Language Model (LLM) to decide on the next research action. 28 | * 29 | * This function performs the following steps: 30 | * 1. Initializes configuration and sets up the 'Info' tool, which is the user-defined extraction schema. 31 | * 2. Prepares the prompt and message history for the LLM. 32 | * 3. Initializes and configures the LLM with available tools. 33 | * 4. Invokes the LLM and processes its response. 34 | * 5. Handles the LLM's decision to either continue research or submit final info. 35 | * 36 | * @param state - The current state of the research process. 37 | * @param config - Optional configuration for the runnable. 38 | * @returns A Promise resolving to an object containing: 39 | * - messages: An array of BaseMessage objects representing the LLM's response. 40 | * - info: An optional AnyRecord containing the extracted information if the LLM decided to submit final info. 41 | * - loopStep: A number indicating the current step in the research loop. 42 | */ 43 | 44 | async function callAgentModel( 45 | state: typeof StateAnnotation.State, 46 | config: RunnableConfig, 47 | ): Promise { 48 | const configuration = ensureConfiguration(config); 49 | // First, define the info tool. This uses the user-provided 50 | // json schema to define the research targets 51 | // We pass an empty function because we will not actually invoke this tool. 52 | // We are just using it for formatting. 53 | const infoTool = tool(async () => {}, { 54 | name: "Info", 55 | description: "Call this when you have gathered all the relevant info", 56 | schema: state.extractionSchema, 57 | }); 58 | // Next, load the model 59 | const rawModel = await loadChatModel(configuration.model); 60 | if (!rawModel.bindTools) { 61 | throw new Error("Chat model does not support tool binding"); 62 | } 63 | const model = rawModel.bindTools([...MODEL_TOOLS, infoTool], { 64 | tool_choice: "any", 65 | }); 66 | 67 | // Format the schema into the configurable system prompt 68 | const p = configuration.prompt 69 | .replace("{info}", JSON.stringify(state.extractionSchema, null, 2)) 70 | .replace("{topic}", state.topic); 71 | const messages = [{ role: "user", content: p }, ...state.messages]; 72 | 73 | // Next, we'll call the model. 74 | const response: AIMessage = await model.invoke(messages); 75 | const responseMessages = [response]; 76 | 77 | // If the model has collected enough information to fill uot 78 | // the provided schema, great! It will call the "Info" tool 79 | // We've decided to track this as a separate state variable 80 | let info; 81 | if ((response?.tool_calls && response.tool_calls?.length) || 0) { 82 | for (const tool_call of response.tool_calls || []) { 83 | if (tool_call.name === "Info") { 84 | info = tool_call.args; 85 | // If info was called, the agent is submitting a response. 86 | // (it's not actually a function to call, it's a schema to extract) 87 | // To ensure that the graph doesn'tend up in an invalid state 88 | // (where the AI has called tools but no tool message has been provided) 89 | // we will drop any extra tool_calls. 90 | response.tool_calls = response.tool_calls?.filter( 91 | (tool_call) => tool_call.name === "Info", 92 | ); 93 | break; 94 | } 95 | } 96 | } else { 97 | // If LLM didn't respect the tool_choice 98 | responseMessages.push( 99 | new HumanMessage("Please respond by calling one of the provided tools."), 100 | ); 101 | } 102 | 103 | return { 104 | messages: responseMessages, 105 | info, 106 | // This increments the step counter. 107 | // We configure a max step count to avoid infinite research loops 108 | loopStep: 1, 109 | }; 110 | } 111 | 112 | /** 113 | * Validate whether the current extracted info is satisfactory and complete. 114 | */ 115 | const InfoIsSatisfactory = z.object({ 116 | reason: z 117 | .array(z.string()) 118 | .describe( 119 | "First, provide reasoning for why this is either good or bad as a final result. Must include at least 3 reasons.", 120 | ), 121 | is_satisfactory: z 122 | .boolean() 123 | .describe( 124 | "After providing your reasoning, provide a value indicating whether the result is satisfactory. If not, you will continue researching.", 125 | ), 126 | improvement_instructions: z 127 | .string() 128 | .optional() 129 | .describe( 130 | "If the result is not satisfactory, provide clear and specific instructions on what needs to be improved or added to make the information satisfactory. This should include details on missing information, areas that need more depth, or specific aspects to focus on in further research.", 131 | ), 132 | }); 133 | 134 | /** 135 | * Validates the quality of the data enrichment agent's output. 136 | * 137 | * This function performs the following steps: 138 | * 1. Prepares the initial prompt using the main prompt template. 139 | * 2. Constructs a message history for the model. 140 | * 3. Prepares a checker prompt to evaluate the presumed info. 141 | * 4. Initializes and configures a language model with structured output. 142 | * 5. Invokes the model to assess the quality of the gathered information. 143 | * 6. Processes the model's response and determines if the info is satisfactory. 144 | * 145 | * @param state - The current state of the research process. 146 | * @param config - Optional configuration for the runnable. 147 | * @returns A Promise resolving to an object containing either: 148 | * - messages: An array of BaseMessage objects if the info is not satisfactory. 149 | * - info: An AnyRecord containing the extracted information if it is satisfactory. 150 | */ 151 | async function reflect( 152 | state: typeof StateAnnotation.State, 153 | config: RunnableConfig, 154 | ): Promise<{ messages: BaseMessage[] } | { info: AnyRecord }> { 155 | const configuration = ensureConfiguration(config); 156 | const presumedInfo = state.info; // The current extracted result 157 | const lm = state.messages[state.messages.length - 1]; 158 | if (!(lm._getType() === "ai")) { 159 | throw new Error( 160 | `${reflect.name} expects the last message in the state to be an AI message with tool calls. Got: ${lm._getType()}`, 161 | ); 162 | } 163 | const lastMessage = lm as AIMessage; 164 | 165 | // Load the configured model & provide the reflection/critique schema 166 | const rawModel = await loadChatModel(configuration.model); 167 | const boundModel = rawModel.withStructuredOutput(InfoIsSatisfactory); 168 | // Template in the conversation history: 169 | const p = configuration.prompt 170 | .replace("{info}", JSON.stringify(state.extractionSchema, null, 2)) 171 | .replace("{topic}", state.topic); 172 | const messages = [ 173 | { role: "user", content: p }, 174 | ...state.messages.slice(0, -1), 175 | ]; 176 | 177 | const checker_prompt = `I am thinking of calling the info tool with the info below. \ 178 | Is this good? Give your reasoning as well. \ 179 | You can encourage the Assistant to look at specific URLs if that seems relevant, or do more searches. 180 | If you don't think it is good, you should be very specific about what could be improved. 181 | 182 | {presumed_info}`; 183 | const p1 = checker_prompt.replace( 184 | "{presumed_info}", 185 | JSON.stringify(presumedInfo ?? {}, null, 2), 186 | ); 187 | messages.push({ role: "user", content: p1 }); 188 | 189 | // Call the model 190 | const response = await boundModel.invoke(messages); 191 | if (response.is_satisfactory && presumedInfo) { 192 | return { 193 | info: presumedInfo, 194 | messages: [ 195 | new ToolMessage({ 196 | tool_call_id: lastMessage.tool_calls?.[0]?.id || "", 197 | content: response.reason.join("\n"), 198 | name: "Info", 199 | artifact: response, 200 | status: "success", 201 | }), 202 | ], 203 | }; 204 | } else { 205 | return { 206 | messages: [ 207 | new ToolMessage({ 208 | tool_call_id: lastMessage.tool_calls?.[0]?.id || "", 209 | content: `Unsatisfactory response:\n${response.improvement_instructions}`, 210 | name: "Info", 211 | artifact: response, 212 | status: "error", 213 | }), 214 | ], 215 | }; 216 | } 217 | } 218 | 219 | /** 220 | * Determines the next step in the research process based on the agent's last action. 221 | * 222 | * @param state - The current state of the research process. 223 | * @returns "reflect" if the agent has called the "Info" tool to submit findings, 224 | * "tools" if the agent has called any other tool or no tool at all. 225 | */ 226 | function routeAfterAgent( 227 | state: typeof StateAnnotation.State, 228 | ): "callAgentModel" | "reflect" | "tools" | "__end__" { 229 | const lastMessage: AIMessage = state.messages[state.messages.length - 1]; 230 | 231 | // If for some reason the last message is not an AIMessage 232 | // (if you've modified this template and broken one of the assumptions) 233 | // ensure the system doesn't crash but instead tries to recover by calling the agent model again. 234 | if (lastMessage._getType() !== "ai") { 235 | return "callAgentModel"; 236 | } 237 | 238 | // If the "Info" tool was called, then the model provided its extraction output. Reflect on the result 239 | if (lastMessage.tool_calls && lastMessage.tool_calls[0]?.name === "Info") { 240 | return "reflect"; 241 | } 242 | 243 | // The last message is a tool call that is not "Info" (extraction output) 244 | return "tools"; 245 | } 246 | 247 | /** 248 | * Schedules the next node after the checker's evaluation. 249 | * 250 | * This function determines whether to continue the research process or end it 251 | * based on the checker's evaluation and the current state of the research. 252 | * 253 | * @param state - The current state of the research process. 254 | * @param config - The configuration for the research process. 255 | * @returns "__end__" if the research should end, "callAgentModel" if it should continue. 256 | */ 257 | function routeAfterChecker( 258 | state: typeof StateAnnotation.State, 259 | config?: RunnableConfig, 260 | ): "__end__" | "callAgentModel" { 261 | const configuration = ensureConfiguration(config); 262 | const lastMessage = state.messages[state.messages.length - 1]; 263 | 264 | if (state.loopStep < configuration.maxLoops) { 265 | if (!state.info) { 266 | return "callAgentModel"; 267 | } 268 | if (lastMessage._getType() !== "tool") { 269 | throw new Error( 270 | `routeAfterChecker expected a tool message. Received: ${lastMessage._getType()}.`, 271 | ); 272 | } 273 | if ((lastMessage as ToolMessage).status === "error") { 274 | // Research deemed unsatisfactory 275 | return "callAgentModel"; 276 | } 277 | // It's great! 278 | return "__end__"; 279 | } else { 280 | return "__end__"; 281 | } 282 | } 283 | 284 | // Create the graph 285 | const workflow = new StateGraph( 286 | { 287 | stateSchema: StateAnnotation, 288 | input: InputStateAnnotation, 289 | }, 290 | ConfigurationAnnotation, 291 | ) 292 | .addNode("callAgentModel", callAgentModel) 293 | .addNode("reflect", reflect) 294 | .addNode("tools", toolNode) 295 | .addEdge("__start__", "callAgentModel") 296 | .addConditionalEdges("callAgentModel", routeAfterAgent) 297 | .addEdge("tools", "callAgentModel") 298 | .addConditionalEdges("reflect", routeAfterChecker); 299 | 300 | export const graph = workflow.compile(); 301 | graph.name = "ResearchTopic"; 302 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # LangGraph Studio Data Enrichment Template 2 | 3 | [![CI](https://github.com/langchain-ai/data-enrichment-js/actions/workflows/unit-tests.yml/badge.svg)](https://github.com/langchain-ai/data-enrichment-js/actions/workflows/unit-tests.yml) 4 | [![Integration Tests](https://github.com/langchain-ai/data-enrichment-js/actions/workflows/integration-tests.yml/badge.svg)](https://github.com/langchain-ai/data-enrichment-js/actions/workflows/integration-tests.yml) 5 | [![Open in - LangGraph Studio](https://img.shields.io/badge/Open_in-LangGraph_Studio-00324d.svg?logo=data:image/svg%2bxml;base64,PHN2ZyB4bWxucz0iaHR0cDovL3d3dy53My5vcmcvMjAwMC9zdmciIHdpZHRoPSI4NS4zMzMiIGhlaWdodD0iODUuMzMzIiB2ZXJzaW9uPSIxLjAiIHZpZXdCb3g9IjAgMCA2NCA2NCI+PHBhdGggZD0iTTEzIDcuOGMtNi4zIDMuMS03LjEgNi4zLTYuOCAyNS43LjQgMjQuNi4zIDI0LjUgMjUuOSAyNC41QzU3LjUgNTggNTggNTcuNSA1OCAzMi4zIDU4IDcuMyA1Ni43IDYgMzIgNmMtMTIuOCAwLTE2LjEuMy0xOSAxLjhtMzcuNiAxNi42YzIuOCAyLjggMy40IDQuMiAzLjQgNy42cy0uNiA0LjgtMy40IDcuNkw0Ny4yIDQzSDE2LjhsLTMuNC0zLjRjLTQuOC00LjgtNC44LTEwLjQgMC0xNS4ybDMuNC0zLjRoMzAuNHoiLz48cGF0aCBkPSJNMTguOSAyNS42Yy0xLjEgMS4zLTEgMS43LjQgMi41LjkuNiAxLjcgMS44IDEuNyAyLjcgMCAxIC43IDIuOCAxLjYgNC4xIDEuNCAxLjkgMS40IDIuNS4zIDMuMi0xIC42LS42LjkgMS40LjkgMS41IDAgMi43LS41IDIuNy0xIDAtLjYgMS4xLS44IDIuNi0uNGwyLjYuNy0xLjgtMi45Yy01LjktOS4zLTkuNC0xMi4zLTExLjUtOS44TTM5IDI2YzAgMS4xLS45IDIuNS0yIDMuMi0yLjQgMS41LTIuNiAzLjQtLjUgNC4yLjguMyAyIDEuNyAyLjUgMy4xLjYgMS41IDEuNCAyLjMgMiAyIDEuNS0uOSAxLjItMy41LS40LTMuNS0yLjEgMC0yLjgtMi44LS44LTMuMyAxLjYtLjQgMS42LS41IDAtLjYtMS4xLS4xLTEuNS0uNi0xLjItMS42LjctMS43IDMuMy0yLjEgMy41LS41LjEuNS4yIDEuNi4zIDIuMiAwIC43LjkgMS40IDEuOSAxLjYgMi4xLjQgMi4zLTIuMy4yLTMuMi0uOC0uMy0yLTEuNy0yLjUtMy4xLTEuMS0zLTMtMy4zLTMtLjUiLz48L3N2Zz4=)](https://langgraph-studio.vercel.app/templates/open?githubUrl=https://github.com/langchain-ai/data-enrichment-js) 6 | 7 | Producing structured results (e.g., to populate a database or spreadsheet) from open-ended research (e.g., web research) is a common use case that LLM-powered agents are well-suited to handle. Here, we provide a general template for this kind of "data enrichment agent" agent using [LangGraph](https://github.com/langchain-ai/langgraph) in [LangGraph Studio](https://github.com/langchain-ai/langgraph-studio). It contains an example graph exported from `src/enrichment_agent/graph.ts` that implements a research assistant capable of automatically gathering information on various topics from the web and structuring the results into a user-defined JSON format. 8 | 9 | ![Overview of agent](./static/overview.png) 10 | 11 | ![](/static/studio.png) 12 | 13 | # What it does 14 | 15 | The enrichment agent defined in `src/enrichment_agent/graph.ts` performs the following steps: 16 | 17 | 1. Takes a research **topic** and requested **extractionSchema** as input. 18 | 2. Searches the web for relevant information 19 | 3. Reads and extracts key details from websites 20 | 4. Organizes the findings into the requested structured format 21 | 5. Validates the gathered information for completeness and accuracy 22 | 23 | ![Graph view in LangGraph studio UI](./static/studio.png) 24 | 25 | ## Getting Started 26 | 27 | You will need the latest versions of `@langchain/langgraph` and `@langchain/core`. See these instructions for help upgrading an [existing project](https://langchain-ai.github.io/langgraphjs/how-tos/manage-ecosystem-dependencies/). 28 | 29 | Assuming you have already [installed LangGraph Studio](https://github.com/langchain-ai/langgraph-studio?tab=readme-ov-file#download), to set up: 30 | 31 | 1. Create a `.env` file. 32 | 33 | ```bash 34 | cp .env.example .env 35 | ``` 36 | 37 | 2. Define required API keys in your `.env` file. 38 | 39 | The primary [search tool](./src/enrichment_agent/tools.ts) [^1] used is [Tavily](https://tavily.com/). Create an API key [here](https://app.tavily.com/sign-in). 40 | 41 | 44 | 45 |
46 | Setup for `model` 47 | The `llm` configuration defaults are shown below: 48 | 49 | ```yaml 50 | model: anthropic/claude-3-5-sonnet-20240620 51 | ``` 52 | 53 | Follow the instructions below to get set up, or pick one of the additional options. 54 | 55 | ### Anthropic Chat Models 56 | 57 | To use Anthropic's chat models: 58 | 59 | 1. Sign up for an [Anthropic API key](https://console.anthropic.com/) if you haven't already. 60 | 2. Once you have your API key, add it to your `.env` file: 61 | 62 | ``` 63 | ANTHROPIC_API_KEY=your-api-key 64 | ``` 65 | 66 | ### Fireworks Chat Models 67 | 68 | To use Fireworks AI's chat models: 69 | 70 | 1. Sign up for a [Fireworks AI account](https://app.fireworks.ai/signup) and obtain an API key. 71 | 2. Add your Fireworks AI API key to your `.env` file: 72 | 73 | ``` 74 | FIREWORKS_API_KEY=your-api-key 75 | ``` 76 | 77 | #### OpenAI Chat Models 78 | 79 | To use OpenAI's chat models: 80 | 81 | 1. Sign up for an [OpenAI API key](https://platform.openai.com/signup). 82 | 2. Once you have your API key, add it to your `.env` file: 83 | 84 | ``` 85 | OPENAI_API_KEY=your-api-key 86 | ``` 87 | 88 |
89 | 90 | 93 | 94 | 3. Consider a research topic and desired extraction schema. 95 | 96 | As an example, here is a research topic we can consider: 97 | 98 | ``` 99 | "Autonomous agents" 100 | ``` 101 | 102 | With an `extractionSchema` of: 103 | 104 | ```json 105 | { 106 | "type": "object", 107 | "properties": { 108 | "facts": { 109 | "type": "array", 110 | "description": "An array of facts retrieved from the provided sources", 111 | "items": { 112 | "type": "string" 113 | } 114 | } 115 | }, 116 | "required": ["facts"] 117 | } 118 | ``` 119 | 120 | Another example topic with a more complex schema is: 121 | 122 | ``` 123 | "Top 5 chip providers for LLM Training" 124 | ``` 125 | 126 | And here is a desired `extractionSchema`: 127 | 128 | ```json 129 | { 130 | "type": "object", 131 | "properties": { 132 | "companies": { 133 | "type": "array", 134 | "items": { 135 | "type": "object", 136 | "properties": { 137 | "name": { 138 | "type": "string", 139 | "description": "Company name" 140 | }, 141 | "technologies": { 142 | "type": "string", 143 | "description": "Brief summary of key technologies used by the company" 144 | }, 145 | "market_share": { 146 | "type": "string", 147 | "description": "Overview of market share for this company" 148 | }, 149 | "future_outlook": { 150 | "type": "string", 151 | "description": "Brief summary of future prospects and developments in the field for this company" 152 | }, 153 | "key_powers": { 154 | "type": "string", 155 | "description": "Which of the 7 Powers (Scale Economies, Network Economies, Counter Positioning, Switching Costs, Branding, Cornered Resource, Process Power) best describe this company's competitive advantage" 156 | } 157 | }, 158 | "required": ["name", "technologies", "market_share", "future_outlook"] 159 | }, 160 | "description": "List of companies" 161 | } 162 | }, 163 | "required": ["companies"] 164 | } 165 | ``` 166 | 167 | 4. Open the folder LangGraph Studio, and input `topic` and `extractionSchema`. 168 | 169 | ## How to customize 170 | 171 | 1. **Customize research targets**: Provide a custom JSON `extractionSchema` when calling the graph to gather different types of information. 172 | 2. **Select a different model**: We default to anthropic (`claude-3-5-sonnet-20240620`). You can select a compatible chat model using `provider/model-name` via configuration. Example: `openai/gpt-4o-mini`. 173 | 3. **Customize the prompt**: We provide a default prompt in [src/enrichment_agent/prompts.ts](./src/enrichment_agent/prompts.ts). You can easily update this via configuration. 174 | 175 | For quick prototyping, these configurations can be set in the studio UI. 176 | 177 | ![Config In Studio](./static/config.png) 178 | 179 | You can also quickly extend this template by: 180 | 181 | - Adding new tools and API connections in [src/enrichment_agent/tools.ts](./src/enrichment_agent/tools.ts). These are just any TypeScript functions. 182 | - Adding additional steps in [src/enrichment_agent/graph.ts](./src/enrichment_agent/graph.ts). 183 | 184 | ## Development 185 | 186 | While iterating on your graph, you can edit past state and rerun your app from past states to debug specific nodes. Local changes will be automatically applied via hot reload. Try adding an interrupt before the agent calls tools, updating the default system message in [src/enrichment_agent/utils.ts](./src/enrichment_agent/utils.ts) to take on a persona, or adding additional nodes and edges! 187 | 188 | Follow up requests will be appended to the same thread. You can create an entirely new thread, clearing previous history, using the `+` button in the top right. 189 | 190 | You can find the latest (under construction) docs on [LangGraph.js](https://langchain-ai.github.io/langgraphjs/) here, including examples and other references. Using those guides can help you pick the right patterns to adapt here for your use case. 191 | 192 | LangGraph Studio also integrates with [LangSmith](https://smith.langchain.com/) for more in-depth tracing and collaboration with teammates. 193 | 194 | [^1]: https://js.langchain.com/docs/concepts#tools 195 | 196 | 330 | --------------------------------------------------------------------------------