├── .codespellignore
├── static
    ├── config.png
    ├── studio.png
    ├── overview.png
    └── studio_example.png
├── langgraph.json
├── .gitignore
├── .env.example
├── jest.config.js
├── tests
    ├── configuration.test.ts
    ├── agent.test.ts
    └── agent.int.test.ts
├── tsconfig.json
├── src
    └── enrichment_agent
    │   ├── prompts.ts
    │   ├── utils.ts
    │   ├── configuration.ts
    │   ├── state.ts
    │   ├── tools.ts
    │   └── graph.ts
├── LICENSE
├── .github
    └── workflows
    │   ├── integration-tests.yml
    │   └── unit-tests.yml
├── .eslintrc.cjs
├── package.json
├── scripts
    └── checkLanggraphPaths.js
└── README.md


/.codespellignore:
--------------------------------------------------------------------------------
1 | 


--------------------------------------------------------------------------------
/static/config.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/langchain-ai/data-enrichment-js/HEAD/static/config.png


--------------------------------------------------------------------------------
/static/studio.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/langchain-ai/data-enrichment-js/HEAD/static/studio.png


--------------------------------------------------------------------------------
/static/overview.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/langchain-ai/data-enrichment-js/HEAD/static/overview.png


--------------------------------------------------------------------------------
/static/studio_example.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/langchain-ai/data-enrichment-js/HEAD/static/studio_example.png


--------------------------------------------------------------------------------
/langgraph.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "node_version": "20",
 3 |   "dockerfile_lines": [],
 4 |   "dependencies": ["."],
 5 |   "graphs": {
 6 |     "agent": "./src/enrichment_agent/graph.ts:graph"
 7 |   },
 8 |   "env": ".env"
 9 | }
10 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | index.cjs
 2 | index.js
 3 | index.d.ts
 4 | node_modules
 5 | dist
 6 | .yarn/*
 7 | !.yarn/patches
 8 | !.yarn/plugins
 9 | !.yarn/releases
10 | !.yarn/sdks
11 | !.yarn/versions
12 | 
13 | .turbo
14 | **/.turbo
15 | **/.eslintcache
16 | 
17 | .env
18 | .ipynb_checkpoints
19 | 
20 | 


--------------------------------------------------------------------------------
/.env.example:
--------------------------------------------------------------------------------
 1 | TAVILY_API_KEY=...
 2 | 
 3 | # To separate your traces from other application
 4 | LANGCHAIN_PROJECT=data-enrichment
 5 | # LANGCHAIN_API_KEY=...
 6 | # LANGCHAIN_TRACING_V2=true
 7 | 
 8 | # The following depend on your selected configuration
 9 | 
10 | ## LLM choice:
11 | ANTHROPIC_API_KEY=....
12 | FIREWORKS_API_KEY=...
13 | OPENAI_API_KEY=...
14 | 


--------------------------------------------------------------------------------
/jest.config.js:
--------------------------------------------------------------------------------
 1 | export default {
 2 |   preset: "ts-jest/presets/default-esm",
 3 |   moduleNameMapper: {
 4 |     "^(\\.{1,2}/.*)\\.js$": "$1",
 5 |   },
 6 |   transform: {
 7 |     "^.+\\.tsx?$": [
 8 |       "ts-jest",
 9 |       {
10 |         useESM: true,
11 |       },
12 |     ],
13 |   },
14 |   extensionsToTreatAsEsm: [".ts"],
15 |   setupFiles: ["dotenv/config"],
16 |   passWithNoTests: true,
17 |   testTimeout: 20_000,
18 | };
19 | 


--------------------------------------------------------------------------------
/tests/configuration.test.ts:
--------------------------------------------------------------------------------
 1 | import { describe, it, expect } from "@jest/globals";
 2 | import { ensureConfiguration } from "../src/enrichment_agent/configuration.js";
 3 | 
 4 | describe("Configuration", () => {
 5 |   it("should initialize configuration from an empty object", () => {
 6 |     const emptyConfig = {};
 7 |     const result = ensureConfiguration(emptyConfig);
 8 |     expect(result).toBeDefined();
 9 |     expect(typeof result).toBe("object");
10 |   });
11 | });
12 | 


--------------------------------------------------------------------------------
/tests/agent.test.ts:
--------------------------------------------------------------------------------
 1 | import { describe, it, expect } from "@jest/globals";
 2 | import { graph } from "../src/enrichment_agent/graph.js";
 3 | 
 4 | describe("Web Research Agent", () => {
 5 |   beforeAll(() => {
 6 |     process.env.TAVILY_API_KEY = "dummy";
 7 |   });
 8 | 
 9 |   it("should initialize and compile the graph", () => {
10 |     expect(graph).toBeDefined();
11 |     expect(graph.name).toBe("ResearchTopic");
12 |   });
13 | 
14 |   // TODO: Add more test cases for individual nodes, routing logic, tool integration, and output validation
15 | });
16 | 


--------------------------------------------------------------------------------
/tsconfig.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "extends": "@tsconfig/recommended",
 3 |   "compilerOptions": {
 4 |     "target": "ES2021",
 5 |     "lib": ["ES2021", "ES2022.Object", "DOM"],
 6 |     "module": "NodeNext",
 7 |     "moduleResolution": "nodenext",
 8 |     "esModuleInterop": true,
 9 |     "declaration": true,
10 |     "noImplicitReturns": true,
11 |     "noFallthroughCasesInSwitch": true,
12 |     "noUnusedLocals": true,
13 |     "noUnusedParameters": true,
14 |     "useDefineForClassFields": true,
15 |     "strictPropertyInitialization": false,
16 |     "allowJs": true,
17 |     "strict": true,
18 |     "outDir": "dist",
19 |     "types": ["jest", "node"],
20 |     "resolveJsonModule": true
21 |   },
22 |   "include": ["**/*.ts", "**/*.js"],
23 |   "exclude": ["node_modules", "dist"]
24 | }
25 | 


--------------------------------------------------------------------------------
/src/enrichment_agent/prompts.ts:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * Main prompt template for the AI agent.
 3 |  * This prompt guides the AI in conducting the research and using the available tools.
 4 |  */
 5 | export const MAIN_PROMPT = `You are doing web research on behalf of a user. You are trying to figure out this information:
 6 | 
 7 | <info>
 8 | {info}
 9 | </info>
10 | 
11 | You have access to the following tools:
12 | 
13 | - \`Search\`: call a search tool and get back some results
14 | - \`ScrapeWebsite\`: scrape a website and get relevant notes about the given request. This will update the notes above.
15 | - \`Info\`: call this when you are done and have gathered all the relevant info
16 | 
17 | Here is the information you have about the topic you are researching:
18 | 
19 | Topic: {topic}`;
20 | 
21 | export const INFO_PROMPT = `You are doing web research on behalf of a user. You are trying to find out this information:
22 | 
23 | <info>
24 | {info}
25 | </info>
26 | 
27 | You just scraped the following website: {url}
28 | 
29 | Based on the website content below, jot down some notes about the website.
30 | 
31 | {content}`;
32 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2024 LangChain
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/.github/workflows/integration-tests.yml:
--------------------------------------------------------------------------------
 1 | # This workflow will run integration tests for the current project once per day
 2 | 
 3 | name: Integration Tests
 4 | 
 5 | on:
 6 |   schedule:
 7 |     - cron: "37 14 * * *" # Run at 7:37 AM Pacific Time (14:37 UTC) every day
 8 |   workflow_dispatch: # Allows triggering the workflow manually in GitHub UI
 9 | 
10 | # If another scheduled run starts while this workflow is still running,
11 | # cancel the earlier run in favor of the next run.
12 | concurrency:
13 |   group: ${{ github.workflow }}-${{ github.ref }}
14 |   cancel-in-progress: true
15 | 
16 | jobs:
17 |   integration-tests:
18 |     name: Integration Tests
19 |     strategy:
20 |       matrix:
21 |         os: [ubuntu-latest]
22 |         node-version: [20.x]
23 |     runs-on: ${{ matrix.os }}
24 |     steps:
25 |       - uses: actions/checkout@v4
26 |       - name: Use Node.js ${{ matrix.node-version }}
27 |         uses: actions/setup-node@v3
28 |         with:
29 |           node-version: ${{ matrix.node-version }}
30 |           cache: "yarn"
31 |       - name: Install dependencies
32 |         run: yarn install --immutable
33 |       - name: Build project
34 |         run: yarn build
35 |       - name: Run integration tests
36 |         env:
37 |           ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
38 |           TAVILY_API_KEY: ${{ secrets.TAVILY_API_KEY }}
39 |         run: yarn test:int
40 | 


--------------------------------------------------------------------------------
/.github/workflows/unit-tests.yml:
--------------------------------------------------------------------------------
 1 | # This workflow will run unit tests for the current project
 2 | 
 3 | name: CI
 4 | 
 5 | on:
 6 |   push:
 7 |     branches: ["main"]
 8 |   pull_request:
 9 |   workflow_dispatch: # Allows triggering the workflow manually in GitHub UI
10 | 
11 | # If another push to the same PR or branch happens while this workflow is still running,
12 | # cancel the earlier run in favor of the next run.
13 | concurrency:
14 |   group: ${{ github.workflow }}-${{ github.ref }}
15 |   cancel-in-progress: true
16 | 
17 | jobs:
18 |   unit-tests:
19 |     name: Unit Tests
20 |     strategy:
21 |       matrix:
22 |         os: [ubuntu-latest]
23 |         node-version: [18.x, 20.x]
24 |     runs-on: ${{ matrix.os }}
25 |     steps:
26 |       - uses: actions/checkout@v4
27 |       - name: Use Node.js ${{ matrix.node-version }}
28 |         uses: actions/setup-node@v3
29 |         with:
30 |           node-version: ${{ matrix.node-version }}
31 |           cache: "yarn"
32 |       - name: Install dependencies
33 |         run: yarn install --immutable
34 |       - name: Build project
35 |         run: yarn build
36 | 
37 |       - name: Lint project
38 |         run: yarn lint:all
39 | 
40 |       - name: Check README spelling
41 |         uses: codespell-project/actions-codespell@v2
42 |         with:
43 |           ignore_words_file: .codespellignore
44 |           path: README.md
45 | 
46 |       - name: Check code spelling
47 |         uses: codespell-project/actions-codespell@v2
48 |         with:
49 |           ignore_words_file: .codespellignore
50 |           path: src/
51 | 
52 |       - name: Run tests
53 |         env:
54 |           ANTHROPIC_API_KEY: afakekey
55 |           TAVILY_API_KEY: anotherfakekey
56 |         run: yarn test
57 | 


--------------------------------------------------------------------------------
/src/enrichment_agent/utils.ts:
--------------------------------------------------------------------------------
 1 | import { BaseChatModel } from "@langchain/core/language_models/chat_models";
 2 | import {
 3 |   MessageContent,
 4 |   MessageContentComplex,
 5 | } from "@langchain/core/messages";
 6 | import { initChatModel } from "langchain/chat_models/universal";
 7 | 
 8 | /**
 9 |  * Helper function to extract text content from a complex message.
10 |  *
11 |  * @param content - The complex message content to process
12 |  * @returns The extracted text content
13 |  */
14 | function getSingleTextContent(content: MessageContentComplex) {
15 |   if (content?.type === "text") {
16 |     return content.text;
17 |   } else if (content.type === "array") {
18 |     return content.content.map(getSingleTextContent).join(" ");
19 |   }
20 |   return "";
21 | }
22 | 
23 | /**
24 |  * Helper function to extract text content from various message types.
25 |  *
26 |  * @param content - The message content to process
27 |  * @returns The extracted text content
28 |  */
29 | export function getTextContent(content: MessageContent): string {
30 |   if (typeof content === "string") {
31 |     return content;
32 |   } else if (Array.isArray(content)) {
33 |     return content.map(getSingleTextContent).join(" ");
34 |   }
35 |   return "";
36 | }
37 | 
38 | /**
39 |  * Load a chat model from a fully specified name.
40 |  * @param fullySpecifiedName - String in the format 'provider/model' or 'provider/account/provider/model'.
41 |  * @returns A Promise that resolves to a BaseChatModel instance.
42 |  */
43 | export async function loadChatModel(
44 |   fullySpecifiedName: string,
45 | ): Promise<BaseChatModel> {
46 |   const index = fullySpecifiedName.indexOf("/");
47 |   if (index === -1) {
48 |     // If there's no "/", assume it's just the model
49 |     return await initChatModel(fullySpecifiedName);
50 |   } else {
51 |     const provider = fullySpecifiedName.slice(0, index);
52 |     const model = fullySpecifiedName.slice(index + 1);
53 |     return await initChatModel(model, { modelProvider: provider });
54 |   }
55 | }
56 | 


--------------------------------------------------------------------------------
/.eslintrc.cjs:
--------------------------------------------------------------------------------
 1 | module.exports = {
 2 |   extends: [
 3 |     "eslint:recommended",
 4 |     "prettier",
 5 |     "plugin:@typescript-eslint/recommended",
 6 |   ],
 7 |   parserOptions: {
 8 |     ecmaVersion: 12,
 9 |     parser: "@typescript-eslint/parser",
10 |     project: "./tsconfig.json",
11 |     sourceType: "module",
12 |   },
13 |   plugins: ["import", "@typescript-eslint", "no-instanceof"],
14 |   ignorePatterns: [
15 |     ".eslintrc.cjs",
16 |     "scripts",
17 |     "node_modules",
18 |     "dist",
19 |     "dist-cjs",
20 |     "*.js",
21 |     "*.cjs",
22 |     "*.d.ts",
23 |   ],
24 |   rules: {
25 |     "no-process-env": 0,
26 |     "no-instanceof/no-instanceof": 2,
27 |     "@typescript-eslint/explicit-module-boundary-types": 0,
28 |     "@typescript-eslint/no-empty-function": 0,
29 |     "@typescript-eslint/no-non-null-assertion": 0,
30 |     "@typescript-eslint/no-shadow": 0,
31 |     "@typescript-eslint/no-empty-interface": 0,
32 |     "@typescript-eslint/no-use-before-define": ["error", "nofunc"],
33 |     "@typescript-eslint/no-unused-vars": ["warn", { args: "none" }],
34 |     "@typescript-eslint/no-floating-promises": "error",
35 |     "@typescript-eslint/no-misused-promises": "error",
36 |     camelcase: 0,
37 |     "class-methods-use-this": 0,
38 |     "import/extensions": [2, "ignorePackages"],
39 |     "import/no-extraneous-dependencies": [
40 |       "error",
41 |       { devDependencies: ["**/*.test.ts"] },
42 |     ],
43 |     "import/no-unresolved": 0,
44 |     "import/prefer-default-export": 0,
45 |     "keyword-spacing": "error",
46 |     "max-classes-per-file": 0,
47 |     "max-len": 0,
48 |     "no-await-in-loop": 0,
49 |     "no-bitwise": 0,
50 |     "no-console": 0,
51 |     "no-restricted-syntax": 0,
52 |     "no-shadow": 0,
53 |     "no-continue": 0,
54 |     "no-underscore-dangle": 0,
55 |     "no-use-before-define": 0,
56 |     "no-useless-constructor": 0,
57 |     "no-return-await": 0,
58 |     "consistent-return": 0,
59 |     "no-else-return": 0,
60 |     "new-cap": ["error", { properties: false, capIsNew: false }],
61 |   },
62 | };
63 | 


--------------------------------------------------------------------------------
/src/enrichment_agent/configuration.ts:
--------------------------------------------------------------------------------
 1 | /**
 2 |  * Define the configurable parameters for the agent.
 3 |  */
 4 | 
 5 | import { RunnableConfig } from "@langchain/core/runnables";
 6 | import { Annotation } from "@langchain/langgraph";
 7 | import { MAIN_PROMPT } from "./prompts.js";
 8 | 
 9 | /**
10 |  * The complete configuration for the agent.
11 |  */
12 | export const ConfigurationAnnotation = Annotation.Root({
13 |   /**
14 |    * The name of the language model to use for the agent.
15 |    *
16 |    * Should be in the form: provider/model-name.
17 |    */
18 |   model: Annotation<string>,
19 | 
20 |   /**
21 |    * The main prompt template to use for the agent's interactions.
22 |    *
23 |    * Expects two template literals: ${info} and ${topic}.
24 |    */
25 |   prompt: Annotation<string>,
26 | 
27 |   /**
28 |    * The maximum number of search results to return for each search query.
29 |    */
30 |   maxSearchResults: Annotation<number>,
31 | 
32 |   /**
33 |    * The maximum number of times the Info tool can be called during a single interaction.
34 |    */
35 |   maxInfoToolCalls: Annotation<number>,
36 | 
37 |   /**
38 |    * The maximum number of interaction loops allowed before the agent terminates.
39 |    */
40 |   maxLoops: Annotation<number>,
41 | });
42 | 
43 | /**
44 |  * Create a typeof ConfigurationAnnotation.State instance from a RunnableConfig object.
45 |  *
46 |  * @param config - The configuration object to use.
47 |  * @returns An instance of typeof ConfigurationAnnotation.State with the specified configuration.
48 |  */
49 | export function ensureConfiguration(
50 |   config?: RunnableConfig,
51 | ): typeof ConfigurationAnnotation.State {
52 |   const configurable = (config?.configurable ?? {}) as Partial<
53 |     typeof ConfigurationAnnotation.State
54 |   >;
55 | 
56 |   return {
57 |     model: configurable.model ?? "anthropic/claude-3-5-sonnet-20240620",
58 |     prompt: configurable.prompt ?? MAIN_PROMPT,
59 |     maxSearchResults: configurable.maxSearchResults ?? 5,
60 |     maxInfoToolCalls: configurable.maxInfoToolCalls ?? 3,
61 |     maxLoops: configurable.maxLoops ?? 6,
62 |   };
63 | }
64 | 


--------------------------------------------------------------------------------
/package.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "name": "data-enrichment",
 3 |   "version": "0.0.1",
 4 |   "description": "A starter template for building a research agent that uses a web search tool to populate a user-provided schema.",
 5 |   "main": "src/enrichment_agent/graph.ts",
 6 |   "author": "William Fu-Hinthorn",
 7 |   "license": "MIT",
 8 |   "private": true,
 9 |   "type": "module",
10 |   "packageManager": "yarn@1.22.22",
11 |   "scripts": {
12 |     "build": "tsc",
13 |     "clean": "rm -rf dist",
14 |     "test": "node --experimental-vm-modules node_modules/jest/bin/jest.js --testPathPattern=\\.test\\.ts$ --testPathIgnorePatterns=\\.int\\.test\\.ts$",
15 |     "test:int": "node --experimental-vm-modules node_modules/jest/bin/jest.js --testPathPattern=\\.int\\.test\\.ts$",
16 |     "format": "prettier --write .",
17 |     "lint": "eslint src",
18 |     "format:check": "prettier --check .",
19 |     "lint:langgraph-json": "node scripts/checkLanggraphPaths.js",
20 |     "lint:all": "yarn lint & yarn lint:langgraph-json & yarn format:check",
21 |     "test:all": "yarn test && yarn test:int && yarn lint:langgraph"
22 |   },
23 |   "dependencies": {
24 |     "@langchain/anthropic": "^0.3.21",
25 |     "@langchain/community": "^0.3.45",
26 |     "@langchain/core": "^0.3.57",
27 |     "@langchain/langgraph": "^0.3.0",
28 |     "langchain": "^0.3.27",
29 |     "langsmith": "^0.3.30",
30 |     "ts-node": "^10.9.2",
31 |     "zod": "^3.23.8"
32 |   },
33 |   "devDependencies": {
34 |     "@eslint/eslintrc": "^3.1.0",
35 |     "@eslint/js": "^9.9.1",
36 |     "@jest/globals": "^29.7.0",
37 |     "@tsconfig/recommended": "^1.0.7",
38 |     "@types/jest": "^29.5.0",
39 |     "@types/node": "^20.14.8",
40 |     "@typescript-eslint/eslint-plugin": "^5.59.8",
41 |     "@typescript-eslint/parser": "^5.59.8",
42 |     "dotenv": "^16.4.5",
43 |     "eslint": "^8.41.0",
44 |     "eslint-config-prettier": "^8.8.0",
45 |     "eslint-plugin-import": "^2.27.5",
46 |     "eslint-plugin-no-instanceof": "^1.0.1",
47 |     "eslint-plugin-prettier": "^4.2.1",
48 |     "jest": "^29.7.0",
49 |     "prettier": "^3.3.3",
50 |     "ts-jest": "^29.1.0",
51 |     "typescript": "^5.3.3"
52 |   }
53 | }
54 | 


--------------------------------------------------------------------------------
/scripts/checkLanggraphPaths.js:
--------------------------------------------------------------------------------
 1 | import fs from "fs";
 2 | import path from "path";
 3 | import { fileURLToPath } from "url";
 4 | 
 5 | // Function to check if a file exists
 6 | function fileExists(filePath) {
 7 |   return fs.existsSync(filePath);
 8 | }
 9 | 
10 | // Function to check if an object is exported from a file
11 | function isObjectExported(filePath, objectName) {
12 |   try {
13 |     const fileContent = fs.readFileSync(filePath, "utf8");
14 |     const exportRegex = new RegExp(
15 |       `export\\s+(?:const|let|var)\\s+${objectName}\\s*=|export\\s+\\{[^}]*\\b${objectName}\\b[^}]*\\}`,
16 |     );
17 |     return exportRegex.test(fileContent);
18 |   } catch (error) {
19 |     console.error(`Error reading file ${filePath}: ${error.message}`);
20 |     return false;
21 |   }
22 | }
23 | 
24 | // Main function to check langgraph.json
25 | function checkLanggraphPaths() {
26 |   const __filename = fileURLToPath(import.meta.url);
27 |   const __dirname = path.dirname(__filename);
28 |   const langgraphPath = path.join(__dirname, "..", "langgraph.json");
29 | 
30 |   if (!fileExists(langgraphPath)) {
31 |     console.error("langgraph.json not found in the root directory");
32 |     process.exit(1);
33 |   }
34 | 
35 |   try {
36 |     const langgraphContent = JSON.parse(fs.readFileSync(langgraphPath, "utf8"));
37 |     const graphs = langgraphContent.graphs;
38 | 
39 |     if (!graphs || typeof graphs !== "object") {
40 |       console.error('Invalid or missing "graphs" object in langgraph.json');
41 |       process.exit(1);
42 |     }
43 | 
44 |     let hasError = false;
45 | 
46 |     for (const [key, value] of Object.entries(graphs)) {
47 |       const [filePath, objectName] = value.split(":");
48 |       const fullPath = path.join(__dirname, "..", filePath);
49 | 
50 |       if (!fileExists(fullPath)) {
51 |         console.error(`File not found: ${fullPath}`);
52 |         hasError = true;
53 |         continue;
54 |       }
55 | 
56 |       if (!isObjectExported(fullPath, objectName)) {
57 |         console.error(
58 |           `Object "${objectName}" is not exported from ${fullPath}`,
59 |         );
60 |         hasError = true;
61 |       }
62 |     }
63 | 
64 |     if (hasError) {
65 |       process.exit(1);
66 |     } else {
67 |       console.log(
68 |         "All paths in langgraph.json are valid and objects are exported correctly.",
69 |       );
70 |     }
71 |   } catch (error) {
72 |     console.error(`Error parsing langgraph.json: ${error.message}`);
73 |     process.exit(1);
74 |   }
75 | }
76 | 
77 | checkLanggraphPaths();
78 | 


--------------------------------------------------------------------------------
/src/enrichment_agent/state.ts:
--------------------------------------------------------------------------------
 1 | import { Annotation, messagesStateReducer } from "@langchain/langgraph";
 2 | import { type BaseMessage } from "@langchain/core/messages";
 3 | 
 4 | // eslint-disable-next-line
 5 | export type AnyRecord = Record<string, any>;
 6 | 
 7 | export const InputStateAnnotation = Annotation.Root({
 8 |   topic: Annotation<string>,
 9 |   /**
10 |    * The info state trackes the current extracted data for the given topic,
11 |    * conforming to the provided schema.
12 |    */
13 |   info: Annotation<AnyRecord>,
14 |   /**
15 |    * The schema defines the information the agent is tasked with filling out.
16 |    */
17 |   extractionSchema: Annotation<AnyRecord>,
18 |   // Feel free to add additional attributes to your state as needed.
19 |   // Common examples include retrieved documents, extracted entities, API connections, etc.
20 | });
21 | /**
22 |  * A graph's StateAnnotation defines three main thing:
23 |  * 1. The structure of the data to be passed between nodes (which "channels" to read from/write to and their types)
24 |  * 2. Default values each field
25 |  * 3. Rducers for the state's. Reducers are functions that determine how to apply updates to the state.
26 |  * See [Reducers](https://langchain-ai.github.io/langgraphjs/concepts/low_level/#reducers) for more information.
27 |  */
28 | 
29 | export const StateAnnotation = Annotation.Root({
30 |   /**
31 |    * Messages track the primary execution state of the agent.
32 |    *
33 |    * Typically accumulates a pattern of:
34 |    *
35 |    * 1. HumanMessage - user input
36 |    * 2. AIMessage with .tool_calls - agent picking tool(s) to use to collect
37 |    *     information
38 |    * 3. ToolMessage(s) - the responses (or errors) from the executed tools
39 |    *
40 |    *     (... repeat steps 2 and 3 as needed ...)
41 |    * 4. AIMessage without .tool_calls - agent responding in unstructured
42 |    *     format to the user.
43 |    *
44 |    * 5. HumanMessage - user responds with the next conversational turn.
45 |    *
46 |    *     (... repeat steps 2-5 as needed ... )
47 |    *
48 |    * Merges two lists of messages, updating existing messages by ID.
49 |    *
50 |    * By default, this ensures the state is "append-only", unless the
51 |    * new message has the same ID as an existing message.
52 |    *
53 |    * Returns:
54 |    *     A new list of messages with the messages from \`right\` merged into \`left\`.
55 |    *     If a message in \`right\` has the same ID as a message in \`left\`, the
56 |    *     message from \`right\` will replace the message from \`left\`.`
57 |    */
58 |   messages: Annotation<BaseMessage[]>({
59 |     reducer: messagesStateReducer,
60 |     default: () => [],
61 |   }),
62 | 
63 |   topic: Annotation<string>,
64 |   /**
65 |    * The info state trackes the current extracted data for the given topic,
66 |    * conforming to the provided schema.
67 |    */
68 |   info: Annotation<AnyRecord>,
69 | 
70 |   /**
71 |    * The schema defines the information the agent is tasked with filling out.
72 |    */
73 |   extractionSchema: Annotation<AnyRecord>,
74 | 
75 |   /**
76 |    * Tracks the number of iterations the agent has gone through in the current session.
77 |    * This can be used to limit the number of iterations or to track progress.
78 |    */
79 |   loopStep: Annotation<number>({
80 |     reducer: (left: number, right: number) => left + right,
81 |     default: () => 0,
82 |   }),
83 |   // Feel free to add additional attributes to your state as needed.
84 |   // Common examples include retrieved documents, extracted entities, API connections, etc.
85 | });
86 | 


--------------------------------------------------------------------------------
/tests/agent.int.test.ts:
--------------------------------------------------------------------------------
  1 | import { describe, it, expect } from "@jest/globals";
  2 | import { graph } from "../src/enrichment_agent/graph.js";
  3 | 
  4 | describe("Researcher", () => {
  5 |   it("should initialize and compile the graph", () => {
  6 |     expect(graph).toBeDefined();
  7 |     expect(graph.name).toBe("ResearchTopic");
  8 |   });
  9 | 
 10 |   const extractionSchema = {
 11 |     type: "object",
 12 |     properties: {
 13 |       founder: {
 14 |         type: "string",
 15 |         description: "The name of the company founder.",
 16 |       },
 17 |       websiteUrl: {
 18 |         type: "string",
 19 |         description:
 20 |           "Website URL of the company, e.g.: https://openai.com/, or https://microsoft.com",
 21 |       },
 22 |       products_sold: {
 23 |         type: "array",
 24 |         items: { type: "string" },
 25 |         description: "A list of products sold by the company.",
 26 |       },
 27 |     },
 28 |     required: ["founder", "websiteUrl", "products_sold"],
 29 |   };
 30 | 
 31 |   it("Simple runthrough", async () => {
 32 |     const res = await graph.invoke({
 33 |       topic: "LangChain",
 34 |       extractionSchema: extractionSchema,
 35 |     });
 36 | 
 37 |     expect(res.info).toBeDefined();
 38 |     expect(res.info.founder.toLowerCase()).toContain("harrison");
 39 |   }, 100_000);
 40 | 
 41 |   const arrayExtractionSchema = {
 42 |     type: "object",
 43 |     properties: {
 44 |       providers: {
 45 |         type: "array",
 46 |         items: {
 47 |           type: "object",
 48 |           properties: {
 49 |             name: { type: "string", description: "Company name" },
 50 |             technology_summary: {
 51 |               type: "string",
 52 |               description:
 53 |                 "Brief summary of their chip technology for LLM training",
 54 |             },
 55 |             current_market_share: {
 56 |               type: "string",
 57 |               description:
 58 |                 "Estimated current market share percentage or position",
 59 |             },
 60 |             future_outlook: {
 61 |               type: "string",
 62 |               description:
 63 |                 "Brief paragraph on future prospects and developments",
 64 |             },
 65 |           },
 66 |           required: [
 67 |             "name",
 68 |             "technology_summary",
 69 |             "current_market_share",
 70 |             "future_outlook",
 71 |           ],
 72 |         },
 73 |         description: "List of top chip providers for LLM Training",
 74 |       },
 75 |       overall_market_trends: {
 76 |         type: "string",
 77 |         description: "Brief paragraph on general trends in the LLM chip market",
 78 |       },
 79 |     },
 80 |     required: ["providers", "overall_market_trends"],
 81 |   };
 82 | 
 83 |   it("Researcher list type", async () => {
 84 |     const res = await graph.invoke({
 85 |       topic: "Top 5 chip providers for LLM training",
 86 |       extractionSchema: arrayExtractionSchema,
 87 |     });
 88 | 
 89 |     const info = res.info;
 90 |     expect(info.providers).toBeDefined();
 91 |     expect(Array.isArray(info.providers)).toBe(true);
 92 |     expect(info.providers.length).toBe(5);
 93 | 
 94 |     const nvidiaPresent = info.providers.some(
 95 |       (provider: { name: string }) =>
 96 |         provider.name.toLowerCase().trim() === "nvidia",
 97 |     );
 98 |     expect(nvidiaPresent).toBe(true);
 99 | 
100 |     info.providers.forEach(
101 |       (provider: {
102 |         name: any;
103 |         technology_summary: any;
104 |         current_market_share: any;
105 |         future_outlook: any;
106 |       }) => {
107 |         expect(provider.name).toBeDefined();
108 |         expect(provider.technology_summary).toBeDefined();
109 |         expect(provider.current_market_share).toBeDefined();
110 |         expect(provider.future_outlook).toBeDefined();
111 |       },
112 |     );
113 | 
114 |     expect(info.overall_market_trends).toBeDefined();
115 |     expect(typeof info.overall_market_trends).toBe("string");
116 |     expect(info.overall_market_trends.length).toBeGreaterThan(0);
117 |   }, 100_000);
118 | });
119 | 


--------------------------------------------------------------------------------
/src/enrichment_agent/tools.ts:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * Tools for data enrichment.
  3 |  *
  4 |  * This module contains functions that are directly exposed to the LLM as tools.
  5 |  * These tools can be used for tasks such as web searching and scraping.
  6 |  * Users can edit and extend these tools as needed.
  7 |  */
  8 | import { TavilySearchResults } from "@langchain/community/tools/tavily_search";
  9 | import { RunnableConfig } from "@langchain/core/runnables";
 10 | import { tool } from "@langchain/core/tools";
 11 | 
 12 | import { INFO_PROMPT } from "./prompts.js";
 13 | import { ensureConfiguration } from "./configuration.js";
 14 | import { StateAnnotation } from "./state.js";
 15 | import { getTextContent, loadChatModel } from "./utils.js";
 16 | import {
 17 |   AIMessage,
 18 |   isBaseMessage,
 19 |   ToolMessage,
 20 | } from "@langchain/core/messages";
 21 | import { z } from "zod";
 22 | 
 23 | /**
 24 |  * Initialize tools within a function so that they have access to the current
 25 |  * state and config at runtime.
 26 |  */
 27 | function initializeTools(
 28 |   state?: typeof StateAnnotation.State,
 29 |   config?: RunnableConfig,
 30 | ) {
 31 |   /**
 32 |    * Search for general results.
 33 |    *
 34 |    * This function performs a search using the Tavily search engine, which is designed
 35 |    * to provide comprehensive, accurate, and trusted results. It's particularly useful
 36 |    * for answering questions about current events.
 37 |    */
 38 |   const configuration = ensureConfiguration(config);
 39 |   const searchTool = new TavilySearchResults({
 40 |     maxResults: configuration.maxSearchResults,
 41 |   });
 42 | 
 43 |   async function scrapeWebsite({ url }: { url: string }): Promise<string> {
 44 |     /**
 45 |      * Scrape and summarize content from a given URL.
 46 |      */
 47 |     const response = await fetch(url);
 48 |     const content = await response.text();
 49 |     const truncatedContent = content.slice(0, 50000);
 50 |     const p = INFO_PROMPT.replace(
 51 |       "{info}",
 52 |       JSON.stringify(state?.extractionSchema, null, 2),
 53 |     )
 54 |       .replace("{url}", url)
 55 |       .replace("{content}", truncatedContent);
 56 | 
 57 |     const rawModel = await loadChatModel(configuration.model);
 58 |     const result = await rawModel.invoke(p);
 59 |     return getTextContent(result.content);
 60 |   }
 61 | 
 62 |   const scraperTool = tool(scrapeWebsite, {
 63 |     name: "scrapeWebsite",
 64 |     description: "Scrape content from a given website URL",
 65 |     schema: z.object({
 66 |       url: z.string().url().describe("The URL of the website to scrape"),
 67 |     }),
 68 |   });
 69 | 
 70 |   return [searchTool, scraperTool];
 71 | }
 72 | 
 73 | export const toolNode = async (
 74 |   state: typeof StateAnnotation.State,
 75 |   config: RunnableConfig,
 76 | ) => {
 77 |   const message = state.messages[state.messages.length - 1];
 78 |   // Initialize the tools within the context of the node so that the tools
 79 |   // have the current state of the graph and the config in scope.
 80 |   // See: https://js.langchain.com/docs/how_to/tool_runtime
 81 |   const tools = initializeTools(state, config);
 82 |   const outputs = await Promise.all(
 83 |     (message as AIMessage).tool_calls?.map(async (call) => {
 84 |       const tool = tools.find((tool) => tool.name === call.name);
 85 |       try {
 86 |         if (tool === undefined) {
 87 |           throw new Error(`Tool "${call.name}" not found.`);
 88 |         }
 89 |         const newCall = {
 90 |           ...call,
 91 |           args: {
 92 |             __state: state,
 93 |             ...call.args,
 94 |           },
 95 |         };
 96 |         const output = await tool.invoke(
 97 |           { ...newCall, type: "tool_call" },
 98 |           config,
 99 |         );
100 |         if (isBaseMessage(output) && output._getType() === "tool") {
101 |           return output;
102 |         } else {
103 |           return new ToolMessage({
104 |             name: tool.name,
105 |             content:
106 |               typeof output === "string" ? output : JSON.stringify(output),
107 |             tool_call_id: call.id ?? "",
108 |           });
109 |         }
110 |         // eslint-disable-next-line @typescript-eslint/no-explicit-any
111 |       } catch (e: any) {
112 |         return new ToolMessage({
113 |           content: `Error: ${e.message}\n Please fix your mistakes.`,
114 |           name: call.name,
115 |           tool_call_id: call.id ?? "",
116 |           status: "error",
117 |         });
118 |       }
119 |     }) ?? [],
120 |   );
121 | 
122 |   return { messages: outputs };
123 | };
124 | 
125 | // No state or config required here since these are just bound to the chat model
126 | // and are only used to define schema.
127 | // The tool node above will actually call the functions.
128 | export const MODEL_TOOLS = initializeTools();
129 | 


--------------------------------------------------------------------------------
/src/enrichment_agent/graph.ts:
--------------------------------------------------------------------------------
  1 | /**
  2 |  * Define a data enrichment agent.
  3 |  *
  4 |  * Works with a chat model with tool calling support.
  5 |  */
  6 | 
  7 | import {
  8 |   AIMessage,
  9 |   BaseMessage,
 10 |   HumanMessage,
 11 |   ToolMessage,
 12 | } from "@langchain/core/messages";
 13 | import { RunnableConfig } from "@langchain/core/runnables";
 14 | import { tool } from "@langchain/core/tools";
 15 | import { StateGraph } from "@langchain/langgraph";
 16 | import { z } from "zod";
 17 | 
 18 | import {
 19 |   ConfigurationAnnotation,
 20 |   ensureConfiguration,
 21 | } from "./configuration.js";
 22 | import { AnyRecord, InputStateAnnotation, StateAnnotation } from "./state.js";
 23 | import { MODEL_TOOLS, toolNode } from "./tools.js";
 24 | import { loadChatModel } from "./utils.js";
 25 | 
 26 | /**
 27 |  * Calls the primary Language Model (LLM) to decide on the next research action.
 28 |  *
 29 |  * This function performs the following steps:
 30 |  * 1. Initializes configuration and sets up the 'Info' tool, which is the user-defined extraction schema.
 31 |  * 2. Prepares the prompt and message history for the LLM.
 32 |  * 3. Initializes and configures the LLM with available tools.
 33 |  * 4. Invokes the LLM and processes its response.
 34 |  * 5. Handles the LLM's decision to either continue research or submit final info.
 35 |  *
 36 |  * @param state - The current state of the research process.
 37 |  * @param config - Optional configuration for the runnable.
 38 |  * @returns A Promise resolving to an object containing:
 39 |  *   - messages: An array of BaseMessage objects representing the LLM's response.
 40 |  *   - info: An optional AnyRecord containing the extracted information if the LLM decided to submit final info.
 41 |  *   - loopStep: A number indicating the current step in the research loop.
 42 |  */
 43 | 
 44 | async function callAgentModel(
 45 |   state: typeof StateAnnotation.State,
 46 |   config: RunnableConfig,
 47 | ): Promise<typeof StateAnnotation.Update> {
 48 |   const configuration = ensureConfiguration(config);
 49 |   // First, define the info tool. This uses the user-provided
 50 |   // json schema to define the research targets
 51 |   // We pass an empty function because we will not actually invoke this tool.
 52 |   // We are just using it for formatting.
 53 |   const infoTool = tool(async () => {}, {
 54 |     name: "Info",
 55 |     description: "Call this when you have gathered all the relevant info",
 56 |     schema: state.extractionSchema,
 57 |   });
 58 |   // Next, load the model
 59 |   const rawModel = await loadChatModel(configuration.model);
 60 |   if (!rawModel.bindTools) {
 61 |     throw new Error("Chat model does not support tool binding");
 62 |   }
 63 |   const model = rawModel.bindTools([...MODEL_TOOLS, infoTool], {
 64 |     tool_choice: "any",
 65 |   });
 66 | 
 67 |   // Format the schema into the configurable system prompt
 68 |   const p = configuration.prompt
 69 |     .replace("{info}", JSON.stringify(state.extractionSchema, null, 2))
 70 |     .replace("{topic}", state.topic);
 71 |   const messages = [{ role: "user", content: p }, ...state.messages];
 72 | 
 73 |   // Next, we'll call the model.
 74 |   const response: AIMessage = await model.invoke(messages);
 75 |   const responseMessages = [response];
 76 | 
 77 |   // If the model has collected enough information to fill uot
 78 |   // the provided schema, great! It will call the "Info" tool
 79 |   // We've decided to track this as a separate state variable
 80 |   let info;
 81 |   if ((response?.tool_calls && response.tool_calls?.length) || 0) {
 82 |     for (const tool_call of response.tool_calls || []) {
 83 |       if (tool_call.name === "Info") {
 84 |         info = tool_call.args;
 85 |         // If info was called, the agent is submitting a response.
 86 |         // (it's not actually a function to call, it's a schema to extract)
 87 |         // To ensure that the graph doesn'tend up in an invalid state
 88 |         // (where the AI has called tools but no tool message has been provided)
 89 |         // we will drop any extra tool_calls.
 90 |         response.tool_calls = response.tool_calls?.filter(
 91 |           (tool_call) => tool_call.name === "Info",
 92 |         );
 93 |         break;
 94 |       }
 95 |     }
 96 |   } else {
 97 |     // If LLM didn't respect the tool_choice
 98 |     responseMessages.push(
 99 |       new HumanMessage("Please respond by calling one of the provided tools."),
100 |     );
101 |   }
102 | 
103 |   return {
104 |     messages: responseMessages,
105 |     info,
106 |     // This increments the step counter.
107 |     // We configure a max step count to avoid infinite research loops
108 |     loopStep: 1,
109 |   };
110 | }
111 | 
112 | /**
113 |  * Validate whether the current extracted info is satisfactory and complete.
114 |  */
115 | const InfoIsSatisfactory = z.object({
116 |   reason: z
117 |     .array(z.string())
118 |     .describe(
119 |       "First, provide reasoning for why this is either good or bad as a final result. Must include at least 3 reasons.",
120 |     ),
121 |   is_satisfactory: z
122 |     .boolean()
123 |     .describe(
124 |       "After providing your reasoning, provide a value indicating whether the result is satisfactory. If not, you will continue researching.",
125 |     ),
126 |   improvement_instructions: z
127 |     .string()
128 |     .optional()
129 |     .describe(
130 |       "If the result is not satisfactory, provide clear and specific instructions on what needs to be improved or added to make the information satisfactory. This should include details on missing information, areas that need more depth, or specific aspects to focus on in further research.",
131 |     ),
132 | });
133 | 
134 | /**
135 |  * Validates the quality of the data enrichment agent's output.
136 |  *
137 |  * This function performs the following steps:
138 |  * 1. Prepares the initial prompt using the main prompt template.
139 |  * 2. Constructs a message history for the model.
140 |  * 3. Prepares a checker prompt to evaluate the presumed info.
141 |  * 4. Initializes and configures a language model with structured output.
142 |  * 5. Invokes the model to assess the quality of the gathered information.
143 |  * 6. Processes the model's response and determines if the info is satisfactory.
144 |  *
145 |  * @param state - The current state of the research process.
146 |  * @param config - Optional configuration for the runnable.
147 |  * @returns A Promise resolving to an object containing either:
148 |  *   - messages: An array of BaseMessage objects if the info is not satisfactory.
149 |  *   - info: An AnyRecord containing the extracted information if it is satisfactory.
150 |  */
151 | async function reflect(
152 |   state: typeof StateAnnotation.State,
153 |   config: RunnableConfig,
154 | ): Promise<{ messages: BaseMessage[] } | { info: AnyRecord }> {
155 |   const configuration = ensureConfiguration(config);
156 |   const presumedInfo = state.info; // The current extracted result
157 |   const lm = state.messages[state.messages.length - 1];
158 |   if (!(lm._getType() === "ai")) {
159 |     throw new Error(
160 |       `${reflect.name} expects the last message in the state to be an AI message with tool calls. Got: ${lm._getType()}`,
161 |     );
162 |   }
163 |   const lastMessage = lm as AIMessage;
164 | 
165 |   // Load the configured model & provide the reflection/critique schema
166 |   const rawModel = await loadChatModel(configuration.model);
167 |   const boundModel = rawModel.withStructuredOutput(InfoIsSatisfactory);
168 |   // Template in the conversation history:
169 |   const p = configuration.prompt
170 |     .replace("{info}", JSON.stringify(state.extractionSchema, null, 2))
171 |     .replace("{topic}", state.topic);
172 |   const messages = [
173 |     { role: "user", content: p },
174 |     ...state.messages.slice(0, -1),
175 |   ];
176 | 
177 |   const checker_prompt = `I am thinking of calling the info tool with the info below. \
178 | Is this good? Give your reasoning as well. \
179 | You can encourage the Assistant to look at specific URLs if that seems relevant, or do more searches.
180 | If you don't think it is good, you should be very specific about what could be improved.
181 | 
182 | {presumed_info}`;
183 |   const p1 = checker_prompt.replace(
184 |     "{presumed_info}",
185 |     JSON.stringify(presumedInfo ?? {}, null, 2),
186 |   );
187 |   messages.push({ role: "user", content: p1 });
188 | 
189 |   // Call the model
190 |   const response = await boundModel.invoke(messages);
191 |   if (response.is_satisfactory && presumedInfo) {
192 |     return {
193 |       info: presumedInfo,
194 |       messages: [
195 |         new ToolMessage({
196 |           tool_call_id: lastMessage.tool_calls?.[0]?.id || "",
197 |           content: response.reason.join("\n"),
198 |           name: "Info",
199 |           artifact: response,
200 |           status: "success",
201 |         }),
202 |       ],
203 |     };
204 |   } else {
205 |     return {
206 |       messages: [
207 |         new ToolMessage({
208 |           tool_call_id: lastMessage.tool_calls?.[0]?.id || "",
209 |           content: `Unsatisfactory response:\n${response.improvement_instructions}`,
210 |           name: "Info",
211 |           artifact: response,
212 |           status: "error",
213 |         }),
214 |       ],
215 |     };
216 |   }
217 | }
218 | 
219 | /**
220 |  * Determines the next step in the research process based on the agent's last action.
221 |  *
222 |  * @param state - The current state of the research process.
223 |  * @returns "reflect" if the agent has called the "Info" tool to submit findings,
224 |  *          "tools" if the agent has called any other tool or no tool at all.
225 |  */
226 | function routeAfterAgent(
227 |   state: typeof StateAnnotation.State,
228 | ): "callAgentModel" | "reflect" | "tools" | "__end__" {
229 |   const lastMessage: AIMessage = state.messages[state.messages.length - 1];
230 | 
231 |   // If for some reason the last message is not an AIMessage
232 |   // (if you've modified this template and broken one of the assumptions)
233 |   // ensure the system doesn't crash but instead tries to recover by calling the agent model again.
234 |   if (lastMessage._getType() !== "ai") {
235 |     return "callAgentModel";
236 |   }
237 | 
238 |   // If the "Info" tool was called, then the model provided its extraction output. Reflect on the result
239 |   if (lastMessage.tool_calls && lastMessage.tool_calls[0]?.name === "Info") {
240 |     return "reflect";
241 |   }
242 | 
243 |   // The last message is a tool call that is not "Info" (extraction output)
244 |   return "tools";
245 | }
246 | 
247 | /**
248 |  * Schedules the next node after the checker's evaluation.
249 |  *
250 |  * This function determines whether to continue the research process or end it
251 |  * based on the checker's evaluation and the current state of the research.
252 |  *
253 |  * @param state - The current state of the research process.
254 |  * @param config - The configuration for the research process.
255 |  * @returns "__end__" if the research should end, "callAgentModel" if it should continue.
256 |  */
257 | function routeAfterChecker(
258 |   state: typeof StateAnnotation.State,
259 |   config?: RunnableConfig,
260 | ): "__end__" | "callAgentModel" {
261 |   const configuration = ensureConfiguration(config);
262 |   const lastMessage = state.messages[state.messages.length - 1];
263 | 
264 |   if (state.loopStep < configuration.maxLoops) {
265 |     if (!state.info) {
266 |       return "callAgentModel";
267 |     }
268 |     if (lastMessage._getType() !== "tool") {
269 |       throw new Error(
270 |         `routeAfterChecker expected a tool message. Received: ${lastMessage._getType()}.`,
271 |       );
272 |     }
273 |     if ((lastMessage as ToolMessage).status === "error") {
274 |       // Research deemed unsatisfactory
275 |       return "callAgentModel";
276 |     }
277 |     // It's great!
278 |     return "__end__";
279 |   } else {
280 |     return "__end__";
281 |   }
282 | }
283 | 
284 | // Create the graph
285 | const workflow = new StateGraph(
286 |   {
287 |     stateSchema: StateAnnotation,
288 |     input: InputStateAnnotation,
289 |   },
290 |   ConfigurationAnnotation,
291 | )
292 |   .addNode("callAgentModel", callAgentModel)
293 |   .addNode("reflect", reflect)
294 |   .addNode("tools", toolNode)
295 |   .addEdge("__start__", "callAgentModel")
296 |   .addConditionalEdges("callAgentModel", routeAfterAgent)
297 |   .addEdge("tools", "callAgentModel")
298 |   .addConditionalEdges("reflect", routeAfterChecker);
299 | 
300 | export const graph = workflow.compile();
301 | graph.name = "ResearchTopic";
302 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # LangGraph Studio Data Enrichment Template
  2 | 
  3 | [![CI](https://github.com/langchain-ai/data-enrichment-js/actions/workflows/unit-tests.yml/badge.svg)](https://github.com/langchain-ai/data-enrichment-js/actions/workflows/unit-tests.yml)
  4 | [![Integration Tests](https://github.com/langchain-ai/data-enrichment-js/actions/workflows/integration-tests.yml/badge.svg)](https://github.com/langchain-ai/data-enrichment-js/actions/workflows/integration-tests.yml)
  5 | [![Open in - LangGraph Studio](https://img.shields.io/badge/Open_in-LangGraph_Studio-00324d.svg?logo=data:image/svg%2bxml;base64,PHN2ZyB4bWxucz0iaHR0cDovL3d3dy53My5vcmcvMjAwMC9zdmciIHdpZHRoPSI4NS4zMzMiIGhlaWdodD0iODUuMzMzIiB2ZXJzaW9uPSIxLjAiIHZpZXdCb3g9IjAgMCA2NCA2NCI+PHBhdGggZD0iTTEzIDcuOGMtNi4zIDMuMS03LjEgNi4zLTYuOCAyNS43LjQgMjQuNi4zIDI0LjUgMjUuOSAyNC41QzU3LjUgNTggNTggNTcuNSA1OCAzMi4zIDU4IDcuMyA1Ni43IDYgMzIgNmMtMTIuOCAwLTE2LjEuMy0xOSAxLjhtMzcuNiAxNi42YzIuOCAyLjggMy40IDQuMiAzLjQgNy42cy0uNiA0LjgtMy40IDcuNkw0Ny4yIDQzSDE2LjhsLTMuNC0zLjRjLTQuOC00LjgtNC44LTEwLjQgMC0xNS4ybDMuNC0zLjRoMzAuNHoiLz48cGF0aCBkPSJNMTguOSAyNS42Yy0xLjEgMS4zLTEgMS43LjQgMi41LjkuNiAxLjcgMS44IDEuNyAyLjcgMCAxIC43IDIuOCAxLjYgNC4xIDEuNCAxLjkgMS40IDIuNS4zIDMuMi0xIC42LS42LjkgMS40LjkgMS41IDAgMi43LS41IDIuNy0xIDAtLjYgMS4xLS44IDIuNi0uNGwyLjYuNy0xLjgtMi45Yy01LjktOS4zLTkuNC0xMi4zLTExLjUtOS44TTM5IDI2YzAgMS4xLS45IDIuNS0yIDMuMi0yLjQgMS41LTIuNiAzLjQtLjUgNC4yLjguMyAyIDEuNyAyLjUgMy4xLjYgMS41IDEuNCAyLjMgMiAyIDEuNS0uOSAxLjItMy41LS40LTMuNS0yLjEgMC0yLjgtMi44LS44LTMuMyAxLjYtLjQgMS42LS41IDAtLjYtMS4xLS4xLTEuNS0uNi0xLjItMS42LjctMS43IDMuMy0yLjEgMy41LS41LjEuNS4yIDEuNi4zIDIuMiAwIC43LjkgMS40IDEuOSAxLjYgMi4xLjQgMi4zLTIuMy4yLTMuMi0uOC0uMy0yLTEuNy0yLjUtMy4xLTEuMS0zLTMtMy4zLTMtLjUiLz48L3N2Zz4=)](https://langgraph-studio.vercel.app/templates/open?githubUrl=https://github.com/langchain-ai/data-enrichment-js)
  6 | 
  7 | Producing structured results (e.g., to populate a database or spreadsheet) from open-ended research (e.g., web research) is a common use case that LLM-powered agents are well-suited to handle. Here, we provide a general template for this kind of "data enrichment agent" agent using [LangGraph](https://github.com/langchain-ai/langgraph) in [LangGraph Studio](https://github.com/langchain-ai/langgraph-studio). It contains an example graph exported from `src/enrichment_agent/graph.ts` that implements a research assistant capable of automatically gathering information on various topics from the web and structuring the results into a user-defined JSON format.
  8 | 
  9 | ![Overview of agent](./static/overview.png)
 10 | 
 11 | ![](/static/studio.png)
 12 | 
 13 | # What it does
 14 | 
 15 | The enrichment agent defined in `src/enrichment_agent/graph.ts` performs the following steps:
 16 | 
 17 | 1. Takes a research **topic** and requested **extractionSchema** as input.
 18 | 2. Searches the web for relevant information
 19 | 3. Reads and extracts key details from websites
 20 | 4. Organizes the findings into the requested structured format
 21 | 5. Validates the gathered information for completeness and accuracy
 22 | 
 23 | ![Graph view in LangGraph studio UI](./static/studio.png)
 24 | 
 25 | ## Getting Started
 26 | 
 27 | You will need the latest versions of `@langchain/langgraph` and `@langchain/core`. See these instructions for help upgrading an [existing project](https://langchain-ai.github.io/langgraphjs/how-tos/manage-ecosystem-dependencies/).
 28 | 
 29 | Assuming you have already [installed LangGraph Studio](https://github.com/langchain-ai/langgraph-studio?tab=readme-ov-file#download), to set up:
 30 | 
 31 | 1. Create a `.env` file.
 32 | 
 33 | ```bash
 34 | cp .env.example .env
 35 | ```
 36 | 
 37 | 2. Define required API keys in your `.env` file.
 38 | 
 39 | The primary [search tool](./src/enrichment_agent/tools.ts) [^1] used is [Tavily](https://tavily.com/). Create an API key [here](https://app.tavily.com/sign-in).
 40 | 
 41 | <!--
 42 | Setup instruction auto-generated by `langgraph template lock`. DO NOT EDIT MANUALLY.
 43 | -->
 44 | 
 45 | <details>
 46 | <summary>Setup for `model`</summary>
 47 | The `llm` configuration defaults are shown below:
 48 | 
 49 | ```yaml
 50 | model: anthropic/claude-3-5-sonnet-20240620
 51 | ```
 52 | 
 53 | Follow the instructions below to get set up, or pick one of the additional options.
 54 | 
 55 | ### Anthropic Chat Models
 56 | 
 57 | To use Anthropic's chat models:
 58 | 
 59 | 1. Sign up for an [Anthropic API key](https://console.anthropic.com/) if you haven't already.
 60 | 2. Once you have your API key, add it to your `.env` file:
 61 | 
 62 | ```
 63 | ANTHROPIC_API_KEY=your-api-key
 64 | ```
 65 | 
 66 | ### Fireworks Chat Models
 67 | 
 68 | To use Fireworks AI's chat models:
 69 | 
 70 | 1. Sign up for a [Fireworks AI account](https://app.fireworks.ai/signup) and obtain an API key.
 71 | 2. Add your Fireworks AI API key to your `.env` file:
 72 | 
 73 | ```
 74 | FIREWORKS_API_KEY=your-api-key
 75 | ```
 76 | 
 77 | #### OpenAI Chat Models
 78 | 
 79 | To use OpenAI's chat models:
 80 | 
 81 | 1. Sign up for an [OpenAI API key](https://platform.openai.com/signup).
 82 | 2. Once you have your API key, add it to your `.env` file:
 83 | 
 84 | ```
 85 | OPENAI_API_KEY=your-api-key
 86 | ```
 87 | 
 88 | </details>
 89 | 
 90 | <!--
 91 | End setup instructions
 92 | -->
 93 | 
 94 | 3. Consider a research topic and desired extraction schema.
 95 | 
 96 | As an example, here is a research topic we can consider:
 97 | 
 98 | ```
 99 | "Autonomous agents"
100 | ```
101 | 
102 | With an `extractionSchema` of:
103 | 
104 | ```json
105 | {
106 |   "type": "object",
107 |   "properties": {
108 |     "facts": {
109 |       "type": "array",
110 |       "description": "An array of facts retrieved from the provided sources",
111 |       "items": {
112 |         "type": "string"
113 |       }
114 |     }
115 |   },
116 |   "required": ["facts"]
117 | }
118 | ```
119 | 
120 | Another example topic with a more complex schema is:
121 | 
122 | ```
123 | "Top 5 chip providers for LLM Training"
124 | ```
125 | 
126 | And here is a desired `extractionSchema`:
127 | 
128 | ```json
129 | {
130 |   "type": "object",
131 |   "properties": {
132 |     "companies": {
133 |       "type": "array",
134 |       "items": {
135 |         "type": "object",
136 |         "properties": {
137 |           "name": {
138 |             "type": "string",
139 |             "description": "Company name"
140 |           },
141 |           "technologies": {
142 |             "type": "string",
143 |             "description": "Brief summary of key technologies used by the company"
144 |           },
145 |           "market_share": {
146 |             "type": "string",
147 |             "description": "Overview of market share for this company"
148 |           },
149 |           "future_outlook": {
150 |             "type": "string",
151 |             "description": "Brief summary of future prospects and developments in the field for this company"
152 |           },
153 |           "key_powers": {
154 |             "type": "string",
155 |             "description": "Which of the 7 Powers (Scale Economies, Network Economies, Counter Positioning, Switching Costs, Branding, Cornered Resource, Process Power) best describe this company's competitive advantage"
156 |           }
157 |         },
158 |         "required": ["name", "technologies", "market_share", "future_outlook"]
159 |       },
160 |       "description": "List of companies"
161 |     }
162 |   },
163 |   "required": ["companies"]
164 | }
165 | ```
166 | 
167 | 4. Open the folder LangGraph Studio, and input `topic` and `extractionSchema`.
168 | 
169 | ## How to customize
170 | 
171 | 1. **Customize research targets**: Provide a custom JSON `extractionSchema` when calling the graph to gather different types of information.
172 | 2. **Select a different model**: We default to anthropic (`claude-3-5-sonnet-20240620`). You can select a compatible chat model using `provider/model-name` via configuration. Example: `openai/gpt-4o-mini`.
173 | 3. **Customize the prompt**: We provide a default prompt in [src/enrichment_agent/prompts.ts](./src/enrichment_agent/prompts.ts). You can easily update this via configuration.
174 | 
175 | For quick prototyping, these configurations can be set in the studio UI.
176 | 
177 | ![Config In Studio](./static/config.png)
178 | 
179 | You can also quickly extend this template by:
180 | 
181 | - Adding new tools and API connections in [src/enrichment_agent/tools.ts](./src/enrichment_agent/tools.ts). These are just any TypeScript functions.
182 | - Adding additional steps in [src/enrichment_agent/graph.ts](./src/enrichment_agent/graph.ts).
183 | 
184 | ## Development
185 | 
186 | While iterating on your graph, you can edit past state and rerun your app from past states to debug specific nodes. Local changes will be automatically applied via hot reload. Try adding an interrupt before the agent calls tools, updating the default system message in [src/enrichment_agent/utils.ts](./src/enrichment_agent/utils.ts) to take on a persona, or adding additional nodes and edges!
187 | 
188 | Follow up requests will be appended to the same thread. You can create an entirely new thread, clearing previous history, using the `+` button in the top right.
189 | 
190 | You can find the latest (under construction) docs on [LangGraph.js](https://langchain-ai.github.io/langgraphjs/) here, including examples and other references. Using those guides can help you pick the right patterns to adapt here for your use case.
191 | 
192 | LangGraph Studio also integrates with [LangSmith](https://smith.langchain.com/) for more in-depth tracing and collaboration with teammates.
193 | 
194 | [^1]: https://js.langchain.com/docs/concepts#tools
195 | 
196 | <!--
197 | Configuration auto-generated by `langgraph template lock`. DO NOT EDIT MANUALLY.
198 | {
199 |   "config_schemas": {
200 |     "agent": {
201 |       "type": "object",
202 |       "properties": {
203 |         "model": {
204 |           "type": "string",
205 |           "default": "anthropic/claude-3-5-sonnet-20240620",
206 |           "description": "The name of the language model to use for the agent. Should be in the form: provider/model-name.",
207 |           "environment": [
208 |             {
209 |               "value": "anthropic/claude-1.2",
210 |               "variables": "ANTHROPIC_API_KEY"
211 |             },
212 |             {
213 |               "value": "anthropic/claude-2.0",
214 |               "variables": "ANTHROPIC_API_KEY"
215 |             },
216 |             {
217 |               "value": "anthropic/claude-2.1",
218 |               "variables": "ANTHROPIC_API_KEY"
219 |             },
220 |             {
221 |               "value": "anthropic/claude-3-5-sonnet-20240620",
222 |               "variables": "ANTHROPIC_API_KEY"
223 |             },
224 |             {
225 |               "value": "anthropic/claude-3-haiku-20240307",
226 |               "variables": "ANTHROPIC_API_KEY"
227 |             },
228 |             {
229 |               "value": "anthropic/claude-3-opus-20240229",
230 |               "variables": "ANTHROPIC_API_KEY"
231 |             },
232 |             {
233 |               "value": "anthropic/claude-3-sonnet-20240229",
234 |               "variables": "ANTHROPIC_API_KEY"
235 |             },
236 |             {
237 |               "value": "anthropic/claude-instant-1.2",
238 |               "variables": "ANTHROPIC_API_KEY"
239 |             },
240 |             {
241 |               "value": "openai/gpt-3.5-turbo",
242 |               "variables": "OPENAI_API_KEY"
243 |             },
244 |             {
245 |               "value": "openai/gpt-3.5-turbo-0125",
246 |               "variables": "OPENAI_API_KEY"
247 |             },
248 |             {
249 |               "value": "openai/gpt-3.5-turbo-0301",
250 |               "variables": "OPENAI_API_KEY"
251 |             },
252 |             {
253 |               "value": "openai/gpt-3.5-turbo-0613",
254 |               "variables": "OPENAI_API_KEY"
255 |             },
256 |             {
257 |               "value": "openai/gpt-3.5-turbo-1106",
258 |               "variables": "OPENAI_API_KEY"
259 |             },
260 |             {
261 |               "value": "openai/gpt-3.5-turbo-16k",
262 |               "variables": "OPENAI_API_KEY"
263 |             },
264 |             {
265 |               "value": "openai/gpt-3.5-turbo-16k-0613",
266 |               "variables": "OPENAI_API_KEY"
267 |             },
268 |             {
269 |               "value": "openai/gpt-4",
270 |               "variables": "OPENAI_API_KEY"
271 |             },
272 |             {
273 |               "value": "openai/gpt-4-0125-preview",
274 |               "variables": "OPENAI_API_KEY"
275 |             },
276 |             {
277 |               "value": "openai/gpt-4-0314",
278 |               "variables": "OPENAI_API_KEY"
279 |             },
280 |             {
281 |               "value": "openai/gpt-4-0613",
282 |               "variables": "OPENAI_API_KEY"
283 |             },
284 |             {
285 |               "value": "openai/gpt-4-1106-preview",
286 |               "variables": "OPENAI_API_KEY"
287 |             },
288 |             {
289 |               "value": "openai/gpt-4-32k",
290 |               "variables": "OPENAI_API_KEY"
291 |             },
292 |             {
293 |               "value": "openai/gpt-4-32k-0314",
294 |               "variables": "OPENAI_API_KEY"
295 |             },
296 |             {
297 |               "value": "openai/gpt-4-32k-0613",
298 |               "variables": "OPENAI_API_KEY"
299 |             },
300 |             {
301 |               "value": "openai/gpt-4-turbo",
302 |               "variables": "OPENAI_API_KEY"
303 |             },
304 |             {
305 |               "value": "openai/gpt-4-turbo-preview",
306 |               "variables": "OPENAI_API_KEY"
307 |             },
308 |             {
309 |               "value": "openai/gpt-4-vision-preview",
310 |               "variables": "OPENAI_API_KEY"
311 |             },
312 |             {
313 |               "value": "openai/gpt-4o",
314 |               "variables": "OPENAI_API_KEY"
315 |             },
316 |             {
317 |               "value": "openai/gpt-4o-mini",
318 |               "variables": "OPENAI_API_KEY"
319 |             }
320 |           ]
321 |         }
322 |       },
323 |       "environment": [
324 |         "TAVILY_API_KEY"
325 |       ]
326 |     }
327 |   }
328 | }
329 | -->
330 | 


--------------------------------------------------------------------------------