├── .nxignore ├── packages ├── .gitkeep ├── agent-script │ ├── .npmrc │ ├── src │ │ ├── index.ts │ │ └── lib │ │ │ ├── prompts │ │ │ ├── index.ts │ │ │ ├── builder.ts │ │ │ └── parts.ts │ │ │ ├── udf │ │ │ ├── baseStoppingUdf.ts │ │ │ ├── index.ts │ │ │ ├── thinkUdf.ts │ │ │ ├── terminateUdf.ts │ │ │ ├── baseUdf.ts │ │ │ ├── finalAnswerUdf.ts │ │ │ ├── duckduckgoSearchUdf.ts │ │ │ ├── datasheetWriteUdf.ts │ │ │ ├── callAgentUdf.ts │ │ │ ├── notebookWriteUdf.ts │ │ │ └── bingSearchUdf.ts │ │ │ ├── index.ts │ │ │ ├── bufferConsole.ts │ │ │ ├── lang.ts │ │ │ ├── errors.ts │ │ │ ├── agentLogger.ts │ │ │ ├── __tests__ │ │ │ ├── bufferConsole.test.ts │ │ │ ├── agentMemory.test.ts │ │ │ ├── codeAgent.logging.test.ts │ │ │ ├── codeAgent.memory.test.ts │ │ │ ├── codeAgent.managedAgents.test.ts │ │ │ ├── sandbox.test.ts │ │ │ ├── codeAgent.udf.test.ts │ │ │ └── codeAgent.planning.test.ts │ │ │ ├── sandbox.ts │ │ │ ├── chatModel.ts │ │ │ ├── types.ts │ │ │ ├── utils.ts │ │ │ ├── agentMemory.ts │ │ │ └── codeAgent.prompt.ts │ ├── jest.config.ts │ ├── tsconfig.spec.json │ ├── tsconfig.json │ ├── tsconfig.lib.cjs.json │ ├── eslint.config.js │ ├── tsconfig.lib.json │ ├── package.json │ ├── project.json │ └── rollup.config.js ├── agent-script-web │ ├── .npmrc │ ├── src │ │ ├── lib │ │ │ ├── udf │ │ │ │ ├── index.ts │ │ │ │ └── browser │ │ │ │ │ ├── index.ts │ │ │ │ │ ├── errors.ts │ │ │ │ │ ├── pageGoBack.ts │ │ │ │ │ ├── pageNavigateUrlUdf.ts │ │ │ │ │ ├── utils.ts │ │ │ │ │ ├── pageUdf.ts │ │ │ │ │ ├── pageClickUdf.ts │ │ │ │ │ ├── __tests__ │ │ │ │ │ └── pageUdf.test.ts │ │ │ │ │ ├── pageReadUdf.ts │ │ │ │ │ └── pageExtractDataUdf.ts │ │ │ ├── utils │ │ │ │ ├── index.ts │ │ │ │ ├── lang.ts │ │ │ │ ├── format.ts │ │ │ │ └── schema.ts │ │ │ ├── agents │ │ │ │ └── webAgents │ │ │ │ │ ├── index.ts │ │ │ │ │ ├── deepResearchAgent.ts │ │ │ │ │ ├── webDataAgent.ts │ │ │ │ │ ├── webAgent.ts │ │ │ │ │ ├── deepResearchAgent.prompt.ts │ │ │ │ │ └── webDataAgent.prompt.ts │ │ │ └── types.ts │ │ ├── index.ts │ │ └── scripts │ │ │ └── cli.ts │ ├── jest.config.ts │ ├── tsconfig.spec.json │ ├── tsconfig.json │ ├── tsconfig.lib.cjs.json │ ├── tsconfig.lib.json │ ├── package.json │ ├── project.json │ └── rollup.config.js └── agent-script-instrumentation │ ├── .npmrc │ ├── src │ ├── index.ts │ └── lib │ │ ├── setup.ts │ │ ├── utils.ts │ │ └── instrumentation.ts │ ├── eslint.config.js │ ├── tsconfig.spec.json │ ├── tsconfig.lib.cjs.json │ ├── jest.config.ts │ ├── tsconfig.json │ ├── tsconfig.lib.json │ ├── project.json │ ├── package.json │ └── rollup.config.js ├── pnpm-workspace.yaml ├── .prettierrc ├── .npmrc ├── jest.preset.js ├── .prettierignore ├── tsconfig.json ├── examples └── agents │ ├── tsconfig.json │ ├── package.json │ ├── src │ ├── codeAgent │ │ └── simpleMath.ts │ ├── deepResearchAgent │ │ └── mobileLlmStartup.ts │ └── webDataAgent │ │ └── hackernews.ts │ └── README.md ├── jest.config.ts ├── .gitignore ├── package.json ├── tsconfig.base.json ├── LICENSE ├── nx.json └── README.md /.nxignore: -------------------------------------------------------------------------------- 1 | examples -------------------------------------------------------------------------------- /packages/.gitkeep: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /packages/agent-script/.npmrc: -------------------------------------------------------------------------------- 1 | save-workspace-protocol=false -------------------------------------------------------------------------------- /pnpm-workspace.yaml: -------------------------------------------------------------------------------- 1 | packages: 2 | - "packages/*" 3 | -------------------------------------------------------------------------------- /packages/agent-script-web/.npmrc: -------------------------------------------------------------------------------- 1 | save-workspace-protocol=false -------------------------------------------------------------------------------- /packages/agent-script/src/index.ts: -------------------------------------------------------------------------------- 1 | export * from './lib/index'; 2 | -------------------------------------------------------------------------------- /packages/agent-script-instrumentation/.npmrc: -------------------------------------------------------------------------------- 1 | save-workspace-protocol=false -------------------------------------------------------------------------------- /.prettierrc: -------------------------------------------------------------------------------- 1 | { 2 | "singleQuote": true, 3 | "trailingComma": "all" 4 | } 5 | -------------------------------------------------------------------------------- /packages/agent-script-web/src/lib/udf/index.ts: -------------------------------------------------------------------------------- 1 | export * from './browser/index'; 2 | -------------------------------------------------------------------------------- /.npmrc: -------------------------------------------------------------------------------- 1 | strict-peer-dependencies=false 2 | auto-install-peers=true 3 | save-workspace-protocol=false -------------------------------------------------------------------------------- /packages/agent-script/src/lib/prompts/index.ts: -------------------------------------------------------------------------------- 1 | export * from './parts'; 2 | export * from './builder'; 3 | -------------------------------------------------------------------------------- /jest.preset.js: -------------------------------------------------------------------------------- 1 | const nxPreset = require('@nx/jest/preset').default; 2 | 3 | module.exports = { ...nxPreset }; 4 | -------------------------------------------------------------------------------- /packages/agent-script-instrumentation/src/index.ts: -------------------------------------------------------------------------------- 1 | export * from './lib/instrumentation'; 2 | export * from './lib/setup'; 3 | -------------------------------------------------------------------------------- /.prettierignore: -------------------------------------------------------------------------------- 1 | # Add files here to ignore them from prettier formatting 2 | /dist 3 | /coverage 4 | /.nx/cache 5 | /.nx/workspace-data -------------------------------------------------------------------------------- /packages/agent-script-web/src/lib/utils/index.ts: -------------------------------------------------------------------------------- 1 | export * from './format'; 2 | export * from './lang'; 3 | export * from './schema'; 4 | -------------------------------------------------------------------------------- /tsconfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "extends": "./tsconfig.base.json", 3 | "compileOnSave": false, 4 | "files": [], 5 | "references": [] 6 | } 7 | -------------------------------------------------------------------------------- /packages/agent-script-instrumentation/eslint.config.js: -------------------------------------------------------------------------------- 1 | const baseConfig = require('../../eslint.config.js'); 2 | 3 | module.exports = [...baseConfig]; 4 | -------------------------------------------------------------------------------- /examples/agents/tsconfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "compilerOptions": { 3 | "esModuleInterop": true, 4 | "module": "CommonJS", 5 | "target": "ES2020" 6 | }, 7 | } -------------------------------------------------------------------------------- /packages/agent-script/src/lib/udf/baseStoppingUdf.ts: -------------------------------------------------------------------------------- 1 | import { BaseUdf } from './baseUdf'; 2 | 3 | export abstract class BaseStoppingUdf extends BaseUdf {} 4 | -------------------------------------------------------------------------------- /packages/agent-script-web/src/index.ts: -------------------------------------------------------------------------------- 1 | export * from './lib/types'; 2 | export * from './lib/udf/index'; 3 | export * from './lib/agents/webAgents/index'; 4 | export * from './lib/utils/index'; 5 | -------------------------------------------------------------------------------- /jest.config.ts: -------------------------------------------------------------------------------- 1 | import type { Config } from 'jest'; 2 | import { getJestProjectsAsync } from '@nx/jest'; 3 | 4 | export default async (): Promise => ({ 5 | projects: await getJestProjectsAsync(), 6 | }); 7 | -------------------------------------------------------------------------------- /packages/agent-script-web/src/lib/agents/webAgents/index.ts: -------------------------------------------------------------------------------- 1 | export * from './webAgent'; 2 | export * from './webDataAgent'; 3 | export * from './webDataAgent.prompt'; 4 | export * from './deepResearchAgent'; 5 | export * from './deepResearchAgent.prompt'; 6 | -------------------------------------------------------------------------------- /packages/agent-script-web/src/lib/udf/browser/index.ts: -------------------------------------------------------------------------------- 1 | export * from './pageExtractDataUdf'; 2 | export * from './pageClickUdf'; 3 | export * from './pageNavigateUrlUdf'; 4 | export * from './pageUdf'; 5 | export * from './pageGoBack'; 6 | export * from './pageReadUdf'; 7 | -------------------------------------------------------------------------------- /examples/agents/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "dependencies": { 3 | "@runparse/agent-script": ">=0.0.1 <0.1.0", 4 | "@runparse/agent-script-instrumentation": ">=0.0.1 <0.1.0", 5 | "@runparse/agent-script-web": ">=0.0.1 <0.1.0", 6 | "playwright": "1.50.0" 7 | }, 8 | "devDependencies": { 9 | "tsx": "4.19.3" 10 | } 11 | } 12 | -------------------------------------------------------------------------------- /packages/agent-script-web/src/lib/udf/browser/errors.ts: -------------------------------------------------------------------------------- 1 | import { AgentError } from '@runparse/agent-script'; 2 | 3 | export class PageActionTimeoutError extends AgentError { 4 | constructor(timeoutMs: number) { 5 | super({ 6 | message: `Page action timed out after ${timeoutMs} ms`, 7 | code: 'PAGE_ACTION_TIMEOUT', 8 | }); 9 | } 10 | } 11 | -------------------------------------------------------------------------------- /packages/agent-script-instrumentation/tsconfig.spec.json: -------------------------------------------------------------------------------- 1 | { 2 | "extends": "./tsconfig.json", 3 | "compilerOptions": { 4 | "outDir": "../../dist/out-tsc", 5 | "module": "commonjs", 6 | "types": ["jest"] 7 | }, 8 | "include": [ 9 | "jest.config.ts", 10 | "src/**/*.test.ts", 11 | "src/**/*.spec.ts", 12 | "src/**/*.d.ts" 13 | ] 14 | } 15 | -------------------------------------------------------------------------------- /packages/agent-script/src/lib/udf/index.ts: -------------------------------------------------------------------------------- 1 | export * from './baseUdf'; 2 | export * from './finalAnswerUdf'; 3 | export * from './terminateUdf'; 4 | export * from './bingSearchUdf'; 5 | export * from './duckduckgoSearchUdf'; 6 | export * from './datasheetWriteUdf'; 7 | export * from './notebookWriteUdf'; 8 | export * from './thinkUdf'; 9 | export * from './callAgentUdf'; 10 | -------------------------------------------------------------------------------- /packages/agent-script/jest.config.ts: -------------------------------------------------------------------------------- 1 | export default { 2 | displayName: 'agent-script', 3 | preset: '../../jest.preset.js', 4 | testEnvironment: 'node', 5 | transform: { 6 | '^.+\\.[tj]s$': ['ts-jest', { tsconfig: '/tsconfig.spec.json' }], 7 | }, 8 | moduleFileExtensions: ['ts', 'js', 'html'], 9 | coverageDirectory: '../../coverage/packages/agent-script', 10 | }; 11 | -------------------------------------------------------------------------------- /packages/agent-script/src/lib/index.ts: -------------------------------------------------------------------------------- 1 | export * from './codeAgent'; 2 | export * from './codeAgent.prompt'; 3 | export * from './prompts/index'; 4 | export * from './types'; 5 | export * from './udf/index'; 6 | export * from './agentMemory'; 7 | export * from './agentLogger'; 8 | export * from './chatModel'; 9 | export * from './lang'; 10 | export * from './utils'; 11 | export * from './errors'; 12 | -------------------------------------------------------------------------------- /packages/agent-script-web/jest.config.ts: -------------------------------------------------------------------------------- 1 | export default { 2 | displayName: 'agent-script-web', 3 | preset: '../../jest.preset.js', 4 | testEnvironment: 'node', 5 | transform: { 6 | '^.+\\.[tj]s$': ['ts-jest', { tsconfig: '/tsconfig.spec.json' }], 7 | }, 8 | moduleFileExtensions: ['ts', 'js', 'html'], 9 | coverageDirectory: '../../coverage/packages/agent-script-web', 10 | }; 11 | -------------------------------------------------------------------------------- /packages/agent-script/tsconfig.spec.json: -------------------------------------------------------------------------------- 1 | { 2 | "extends": "./tsconfig.json", 3 | "compilerOptions": { 4 | "outDir": "../../dist/out-tsc", 5 | "module": "commonjs", 6 | "moduleResolution": "node10", 7 | "types": ["jest", "node"] 8 | }, 9 | "include": [ 10 | "jest.config.ts", 11 | "src/**/*.test.ts", 12 | "src/**/*.spec.ts", 13 | "src/**/*.d.ts" 14 | ] 15 | } 16 | -------------------------------------------------------------------------------- /packages/agent-script-web/tsconfig.spec.json: -------------------------------------------------------------------------------- 1 | { 2 | "extends": "./tsconfig.json", 3 | "compilerOptions": { 4 | "outDir": "../../dist/out-tsc", 5 | "module": "commonjs", 6 | "moduleResolution": "node10", 7 | "types": ["jest", "node"] 8 | }, 9 | "include": [ 10 | "jest.config.ts", 11 | "src/**/*.test.ts", 12 | "src/**/*.spec.ts", 13 | "src/**/*.d.ts" 14 | ] 15 | } 16 | -------------------------------------------------------------------------------- /packages/agent-script/tsconfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "extends": "../../tsconfig.base.json", 3 | "compilerOptions": { 4 | "outDir": "./dist", 5 | "module": "commonjs", 6 | "moduleResolution": "Node", 7 | "declaration": true, 8 | }, 9 | "files": [], 10 | "include": [], 11 | "references": [ 12 | { 13 | "path": "./tsconfig.lib.json" 14 | }, 15 | { 16 | "path": "./tsconfig.spec.json" 17 | } 18 | ] 19 | } 20 | -------------------------------------------------------------------------------- /packages/agent-script/tsconfig.lib.cjs.json: -------------------------------------------------------------------------------- 1 | { 2 | "extends": "./tsconfig.json", 3 | "compilerOptions": { 4 | "outDir": "dist/cjs", 5 | "rootDir": "src", 6 | "declaration": true, 7 | "declarationMap": true, 8 | "sourceMap": true, 9 | "inlineSources": true, 10 | "moduleResolution": "Node" 11 | }, 12 | "exclude": ["jest.config.ts", "src/**/*.spec.ts", "src/**/*.test.ts"], 13 | "include": ["src/**/*.ts"] 14 | } 15 | -------------------------------------------------------------------------------- /packages/agent-script-web/tsconfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "extends": "../../tsconfig.base.json", 3 | "compilerOptions": { 4 | "outDir": "./dist", 5 | "module": "commonjs", 6 | "moduleResolution": "Node", 7 | "declaration": true, 8 | }, 9 | "files": [], 10 | "include": [], 11 | "references": [ 12 | { 13 | "path": "./tsconfig.lib.json" 14 | }, 15 | { 16 | "path": "./tsconfig.spec.json" 17 | } 18 | ] 19 | } 20 | -------------------------------------------------------------------------------- /packages/agent-script-web/tsconfig.lib.cjs.json: -------------------------------------------------------------------------------- 1 | { 2 | "extends": "./tsconfig.json", 3 | "compilerOptions": { 4 | "outDir": "dist/cjs", 5 | "rootDir": "src", 6 | "declaration": true, 7 | "declarationMap": true, 8 | "sourceMap": true, 9 | "inlineSources": true, 10 | "moduleResolution": "Node" 11 | }, 12 | "exclude": ["jest.config.ts", "src/**/*.spec.ts", "src/**/*.test.ts"], 13 | "include": ["src/**/*.ts"] 14 | } 15 | -------------------------------------------------------------------------------- /packages/agent-script-instrumentation/tsconfig.lib.cjs.json: -------------------------------------------------------------------------------- 1 | { 2 | "extends": "./tsconfig.json", 3 | "compilerOptions": { 4 | "outDir": "dist/cjs", 5 | "rootDir": "src", 6 | "declaration": true, 7 | "declarationMap": true, 8 | "sourceMap": true, 9 | "inlineSources": true, 10 | "moduleResolution": "Node" 11 | }, 12 | "exclude": ["jest.config.ts", "src/**/*.spec.ts", "src/**/*.test.ts"], 13 | "include": ["src/**/*.ts"] 14 | } 15 | -------------------------------------------------------------------------------- /packages/agent-script-instrumentation/jest.config.ts: -------------------------------------------------------------------------------- 1 | export default { 2 | displayName: 'agent-script-instrumentation', 3 | preset: '../../jest.preset.js', 4 | testEnvironment: 'node', 5 | transform: { 6 | '^.+\\.[tj]s$': ['ts-jest', { tsconfig: '/tsconfig.spec.json' }], 7 | }, 8 | moduleFileExtensions: ['ts', 'js', 'html'], 9 | coverageDirectory: '../../coverage/packages/agent-script-instrumentation', 10 | passWithNoTests: true, 11 | }; 12 | -------------------------------------------------------------------------------- /packages/agent-script-instrumentation/tsconfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "extends": "../../tsconfig.base.json", 3 | "compilerOptions": { 4 | "outDir": "./dist", 5 | "module": "commonjs", 6 | "moduleResolution": "Node", 7 | "declaration": true, 8 | }, 9 | "files": [], 10 | "include": [], 11 | "references": [ 12 | { 13 | "path": "./tsconfig.lib.json" 14 | }, 15 | { 16 | "path": "./tsconfig.spec.json" 17 | }, 18 | ] 19 | } 20 | -------------------------------------------------------------------------------- /packages/agent-script/eslint.config.js: -------------------------------------------------------------------------------- 1 | const baseConfig = require('../../eslint.config.js'); 2 | 3 | module.exports = [ 4 | ...baseConfig, 5 | { 6 | files: ['**/*.json'], 7 | rules: { 8 | '@nx/dependency-checks': [ 9 | 'error', 10 | { 11 | ignoredFiles: ['{projectRoot}/eslint.config.{js,cjs,mjs}'], 12 | }, 13 | ], 14 | }, 15 | languageOptions: { 16 | parser: require('jsonc-eslint-parser'), 17 | }, 18 | }, 19 | ]; 20 | -------------------------------------------------------------------------------- /packages/agent-script/tsconfig.lib.json: -------------------------------------------------------------------------------- 1 | { 2 | "extends": "./tsconfig.json", 3 | "compilerOptions": { 4 | "outDir": "dist/esm", 5 | "rootDir": "src", 6 | "declaration": true, 7 | "declarationMap": true, 8 | "sourceMap": true, 9 | "inlineSources": true, 10 | "module": "esnext", 11 | "moduleResolution": "node", 12 | "target": "es2017" 13 | }, 14 | "exclude": ["jest.config.ts", "src/**/*.spec.ts", "src/**/*.test.ts"], 15 | "include": ["src/**/*.ts"] 16 | } 17 | -------------------------------------------------------------------------------- /packages/agent-script-web/tsconfig.lib.json: -------------------------------------------------------------------------------- 1 | { 2 | "extends": "./tsconfig.json", 3 | "compilerOptions": { 4 | "outDir": "dist/esm", 5 | "rootDir": "src", 6 | "declaration": true, 7 | "declarationMap": true, 8 | "sourceMap": true, 9 | "inlineSources": true, 10 | "module": "esnext", 11 | "moduleResolution": "node", 12 | "target": "es2017" 13 | }, 14 | "exclude": ["jest.config.ts", "src/**/*.spec.ts", "src/**/*.test.ts"], 15 | "include": ["src/**/*.ts"] 16 | } 17 | -------------------------------------------------------------------------------- /packages/agent-script-instrumentation/tsconfig.lib.json: -------------------------------------------------------------------------------- 1 | { 2 | "extends": "./tsconfig.json", 3 | "compilerOptions": { 4 | "outDir": "dist/esm", 5 | "rootDir": "src", 6 | "declaration": true, 7 | "declarationMap": true, 8 | "sourceMap": true, 9 | "inlineSources": true, 10 | "module": "esnext", 11 | "moduleResolution": "node", 12 | "target": "es2017" 13 | }, 14 | "exclude": ["jest.config.ts", "src/**/*.spec.ts", "src/**/*.test.ts"], 15 | "include": ["src/**/*.ts"] 16 | } 17 | -------------------------------------------------------------------------------- /packages/agent-script-web/src/lib/utils/lang.ts: -------------------------------------------------------------------------------- 1 | export type Json = 2 | | string 3 | | number 4 | | boolean 5 | | null 6 | | Json[] 7 | | { [key: string]: Json }; 8 | 9 | export type JsonSchemaObjectInstance = { [key: string]: JsonSchemaInstance }; 10 | 11 | export type JsonSchemaInstance = 12 | | string 13 | | number 14 | | boolean 15 | | null 16 | | string[] 17 | | number[] 18 | | boolean[] 19 | | JsonSchemaInstance[] 20 | | { [key: string]: JsonSchemaInstance } 21 | | { [key: string]: JsonSchemaInstance }[]; 22 | -------------------------------------------------------------------------------- /packages/agent-script/src/lib/udf/thinkUdf.ts: -------------------------------------------------------------------------------- 1 | import { Type, Static } from '@sinclair/typebox'; 2 | import { BaseUdf } from './baseUdf'; 3 | import { ICodeAgent } from '../types'; 4 | 5 | export class ThinkUdf extends BaseUdf { 6 | name = 'think'; 7 | description = 8 | 'Reflect on the steps taken so far and update the plan if improvements / changes should be made'; 9 | 10 | inputSchema = Type.Any(); 11 | outputSchema = Type.Any(); 12 | 13 | override async call( 14 | input: Static, 15 | agent: ICodeAgent, 16 | ): Promise> { 17 | agent.updateShouldRunPlanning(true); 18 | return 'Thinking...'; 19 | } 20 | } 21 | -------------------------------------------------------------------------------- /packages/agent-script-web/src/lib/utils/format.ts: -------------------------------------------------------------------------------- 1 | export function removeLeadingIndentation( 2 | content: string, 3 | excludeFirstNonEmptyLine: boolean = true, 4 | ): string { 5 | const lines = content.split('\n'); 6 | const nonEmptyLines = lines.filter((line) => line.trim().length > 0); 7 | const linesToConsider = excludeFirstNonEmptyLine 8 | ? nonEmptyLines.slice(1) 9 | : nonEmptyLines; 10 | const minIndentation = Math.min( 11 | ...linesToConsider.map((line) => line.match(/^\s*/)?.[0]?.length || 0), 12 | ); 13 | 14 | return lines 15 | .map((line) => 16 | line.startsWith(' '.repeat(minIndentation)) 17 | ? line.slice(minIndentation) 18 | : line, 19 | ) 20 | .join('\n'); 21 | } 22 | -------------------------------------------------------------------------------- /packages/agent-script-web/src/lib/types.ts: -------------------------------------------------------------------------------- 1 | import { ICodeAgent, IUdf } from '@runparse/agent-script'; 2 | import { Page } from 'playwright'; 3 | import { Static } from '@sinclair/typebox'; 4 | 5 | export interface IWebAgentNavigationHistoryItem { 6 | url: string; 7 | timestamp: number; 8 | status: 'loading' | 'success' | 'error' | 'skipped'; 9 | dataExtraction?: { 10 | data: any; 11 | error?: string; 12 | }; 13 | } 14 | 15 | export interface IPageUdf extends IUdf { 16 | call( 17 | input: Static, 18 | agent: IWebAgent, 19 | ): Promise>; 20 | } 21 | 22 | export interface IWebAgent extends ICodeAgent { 23 | page: Page; 24 | navigationHistory: IWebAgentNavigationHistoryItem[]; 25 | } 26 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # See https://docs.github.com/en/get-started/getting-started-with-git/ignoring-files for more about ignoring files. 2 | 3 | # compiled output 4 | dist 5 | tmp 6 | out-tsc 7 | 8 | # dependencies 9 | node_modules 10 | 11 | # IDEs and editors 12 | /.idea 13 | .project 14 | .classpath 15 | .c9/ 16 | *.launch 17 | .settings/ 18 | *.sublime-workspace 19 | 20 | # IDE - VSCode 21 | .vscode/* 22 | !.vscode/settings.json 23 | !.vscode/tasks.json 24 | !.vscode/launch.json 25 | !.vscode/extensions.json 26 | 27 | # misc 28 | /.sass-cache 29 | /connect.lock 30 | /coverage 31 | /libpeerconnection.log 32 | npm-debug.log 33 | yarn-error.log 34 | testem.log 35 | /typings 36 | 37 | # System Files 38 | .DS_Store 39 | Thumbs.db 40 | 41 | .nx/cache 42 | .nx/workspace-data 43 | 44 | .vscode 45 | .env* -------------------------------------------------------------------------------- /packages/agent-script/src/lib/bufferConsole.ts: -------------------------------------------------------------------------------- 1 | import { Console } from 'console'; 2 | import { Writable } from 'stream'; 3 | 4 | export class BufferConsole { 5 | private buffer: string[] = []; 6 | private stream: Writable; 7 | private console: Console; 8 | 9 | constructor() { 10 | this.stream = new Writable({ 11 | write: (chunk, encoding, callback) => { 12 | this.buffer.push(chunk.toString()); 13 | callback(); 14 | }, 15 | }); 16 | this.console = new Console(this.stream); 17 | } 18 | 19 | log(...args: any[]) { 20 | this.console.log(...args); 21 | } 22 | 23 | getOutput(): string { 24 | return this.buffer 25 | .join('') 26 | .replace( 27 | /[\u001B\u009B][[()#;?]*(?:[0-9]{1,4}(?:;[0-9]{0,4})*)?[0-9A-ORZcf-nqry=><]/g, 28 | '', 29 | ); 30 | } 31 | } 32 | -------------------------------------------------------------------------------- /packages/agent-script/src/lib/lang.ts: -------------------------------------------------------------------------------- 1 | export type Exclusive = T extends U ? (U extends T ? never : T) : T; 2 | 3 | export type Either = Exclusive | Exclusive; 4 | 5 | export type EitherOrBoth = Either | (A & B); 6 | 7 | export type PartialBy = Omit & Partial>; 8 | 9 | export function notEmpty( 10 | value: TValue | null | undefined 11 | ): value is TValue { 12 | return value !== null && value !== undefined; 13 | } 14 | 15 | export function fulfilled( 16 | value: PromiseSettledResult 17 | ): value is PromiseFulfilledResult { 18 | return value.status === 'fulfilled'; 19 | } 20 | 21 | export function rejected( 22 | value: PromiseSettledResult 23 | ): value is PromiseRejectedResult { 24 | return value.status === 'rejected'; 25 | } 26 | -------------------------------------------------------------------------------- /packages/agent-script/src/lib/udf/terminateUdf.ts: -------------------------------------------------------------------------------- 1 | import { ICodeAgent } from '../types'; 2 | import { Type, Static } from '@sinclair/typebox'; 3 | import { BaseStoppingUdf } from './baseStoppingUdf'; 4 | 5 | export class TerminateUdf extends BaseStoppingUdf { 6 | name = 'terminate'; 7 | description = 'Terminate the agent.'; 8 | 9 | inputSchema = Type.Object( 10 | { 11 | reason: Type.String({ 12 | description: 'The reason for terminating the task', 13 | }), 14 | }, 15 | { default: { reason: 'The task is complete' } }, 16 | ); 17 | 18 | outputSchema = Type.String(); 19 | 20 | reason: string | undefined; 21 | 22 | override call( 23 | input: Static, 24 | agent: ICodeAgent, 25 | ): Static { 26 | this.reason = input.reason; 27 | return this.reason; 28 | } 29 | } 30 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "@runparse/agent-script-all", 3 | "version": "0.0.1", 4 | "license": "MIT", 5 | "scripts": { 6 | "postinstall": "pnpm nx run-many --target=build --all" 7 | }, 8 | "private": true, 9 | "workspaces": [ 10 | "packages/*" 11 | ], 12 | "devDependencies": { 13 | "@nx/jest": "^20.5.0", 14 | "@nx/js": "20.5.0", 15 | "@nx/node": "^20.5.0", 16 | "@swc-node/register": "~1.9.1", 17 | "@swc/core": "~1.5.7", 18 | "@swc/helpers": "~0.5.11", 19 | "@types/jest": "^29.5.14", 20 | "@types/node": "18.16.9", 21 | "commander": "13.1.0", 22 | "jest": "^29.7.0", 23 | "jest-environment-node": "^29.7.0", 24 | "nx": "20.5.0", 25 | "prettier": "^2.6.2", 26 | "ts-jest": "^29.1.0", 27 | "tsx": "4.19.3", 28 | "typescript": "~5.7.2", 29 | "rollup": "4.37.0", 30 | "@rollup/plugin-typescript": "12.1.2" 31 | } 32 | } 33 | -------------------------------------------------------------------------------- /packages/agent-script/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "@runparse/agent-script", 3 | "version": "0.0.2", 4 | "license": "MIT", 5 | "main": "dist/cjs/index.cjs", 6 | "module": "dist/esm/index.mjs", 7 | "types": "dist/esm/index.d.ts", 8 | "type": "module", 9 | "exports": { 10 | ".": { 11 | "import": "./dist/esm/index.mjs", 12 | "require": "./dist/cjs/index.cjs" 13 | } 14 | }, 15 | "files": [ 16 | "dist" 17 | ], 18 | "dependencies": { 19 | "nunjucks": "3.2.4", 20 | "duck-duck-scrape": "2.2.7", 21 | "axios": "1.8.2", 22 | "openai": "^4.52.2", 23 | "tslib": "2.8.1", 24 | "token.js": "0.5.4" 25 | }, 26 | "peerDependencies": { 27 | "@sinclair/typebox": ">=0.34.28" 28 | }, 29 | "devDependencies": { 30 | "@types/nunjucks": "3.2.6" 31 | }, 32 | "scripts": { 33 | "prebuild": "rm -rf dist", 34 | "build": "tsc --build tsconfig.lib.json tsconfig.lib.cjs.json" 35 | } 36 | } 37 | -------------------------------------------------------------------------------- /tsconfig.base.json: -------------------------------------------------------------------------------- 1 | { 2 | "compilerOptions": { 3 | "baseUrl": ".", 4 | "rootDir": ".", 5 | "allowJs": false, 6 | "allowSyntheticDefaultImports": true, 7 | "declaration": true, 8 | "emitDecoratorMetadata": false, 9 | "esModuleInterop": true, 10 | "experimentalDecorators": false, 11 | "forceConsistentCasingInFileNames": true, 12 | "incremental": true, 13 | "declarationMap": true, 14 | "importHelpers": true, 15 | "isolatedModules": true, 16 | "lib": ["es2022"], 17 | "module": "ES2022", 18 | "noEmitOnError": true, 19 | "noFallthroughCasesInSwitch": true, 20 | "noImplicitOverride": true, 21 | "noImplicitReturns": true, 22 | "noUnusedLocals": true, 23 | "skipLibCheck": true, 24 | "strict": true, 25 | "target": "es2022", 26 | "strictNullChecks": true, 27 | "noUncheckedIndexedAccess": true, 28 | }, 29 | "include": ["packages/**/*.ts", "scripts/**/*.ts"] 30 | } 31 | -------------------------------------------------------------------------------- /packages/agent-script/project.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "agent-script", 3 | "$schema": "../../node_modules/nx/schemas/project-schema.json", 4 | "sourceRoot": "packages/agent-script/src", 5 | "projectType": "library", 6 | "tags": [], 7 | "targets": { 8 | "build": { 9 | "executor": "nx:run-commands", 10 | "outputs": ["{options.outputPath}"], 11 | "options": { 12 | "commands": ["rm -rf dist", "rollup -c rollup.config.js"], 13 | "cwd": "packages/agent-script", 14 | "parallel": false 15 | } 16 | }, 17 | "publish": { 18 | "executor": "nx:run-commands", 19 | "options": { 20 | "commands": ["pnpm publish"], 21 | "cwd": "packages/agent-script", 22 | "parallel": false 23 | } 24 | }, 25 | "cli": { 26 | "executor": "nx:run-commands", 27 | "options": { 28 | "commands": ["pnpm tsx packages/agent-script/scripts/cli.ts"] 29 | } 30 | } 31 | } 32 | } 33 | -------------------------------------------------------------------------------- /examples/agents/src/codeAgent/simpleMath.ts: -------------------------------------------------------------------------------- 1 | // import { ChatModel } from '@runparse/agent-script'; 2 | import { CodeAgent, FinalAnswerUdf } from '@runparse/agent-script'; 3 | import { setup } from '@runparse/agent-script-instrumentation'; 4 | 5 | setup(); 6 | 7 | async function main() { 8 | const task = 'what is 2 + 2?'; 9 | 10 | try { 11 | const agent = new CodeAgent({ 12 | name: 'Web Agent', 13 | description: '', 14 | maxSteps: 10, 15 | udfs: [new FinalAnswerUdf()], 16 | // uncomment to use anthropic, must set ANTHROPIC_API_KEY in .env 17 | // model: new ChatModel({ 18 | // provider: 'anthropic', 19 | // model: 'claude-3-5-sonnet-latest', 20 | // max_tokens: 4096, 21 | // }), 22 | }); 23 | 24 | const finalAnswer = await agent.run(task, {}); 25 | 26 | console.log('final answer:\n', finalAnswer); 27 | } catch (error) { 28 | console.error(error); 29 | } 30 | } 31 | 32 | main().catch(console.error); 33 | -------------------------------------------------------------------------------- /packages/agent-script-instrumentation/project.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "agent-script-instrumentation", 3 | "$schema": "../../node_modules/nx/schemas/project-schema.json", 4 | "sourceRoot": "packages/agent-script-instrumentation/src", 5 | "projectType": "library", 6 | "tags": [], 7 | "targets": { 8 | "build": { 9 | "executor": "nx:run-commands", 10 | "outputs": ["{options.outputPath}"], 11 | "options": { 12 | "commands": ["rm -rf dist", "rollup -c rollup.config.js"], 13 | "cwd": "packages/agent-script-instrumentation", 14 | "parallel": false 15 | }, 16 | "dependsOn": [ 17 | { 18 | "target": "build", 19 | "projects": ["agent-script"] 20 | } 21 | ] 22 | }, 23 | "publish": { 24 | "executor": "nx:run-commands", 25 | "options": { 26 | "commands": ["pnpm publish"], 27 | "cwd": "packages/agent-script-instrumentation", 28 | "parallel": false 29 | } 30 | } 31 | } 32 | } 33 | -------------------------------------------------------------------------------- /examples/agents/README.md: -------------------------------------------------------------------------------- 1 | # Agents 2 | 3 | `npm install`, then create a `.env` file in this folder with, at a minimum. You may also need `npx playwright install` if you haven't downloaded browsers for playwright yet. 4 | 5 | ``` 6 | OPENAI_API_KEY= 7 | ``` 8 | 9 | Other environment variables are needed with different models and service providers. See the comments inside each script file. 10 | 11 | ## CodeAgent 12 | 13 | A general agent that solves problems by writing javascript code. 14 | 15 | ```sh 16 | npx tsx --env-file=.env src/codeAgent/simpleMath.ts 17 | ``` 18 | 19 | ## DeepResearchAgent 20 | 21 | DeepResearchAgent is a generate agent to do research on the internet and produce answers and reports. 22 | 23 | ```sh 24 | npx tsx --env-file=.env src/deepResearchAgent/mobileLlmStartup.ts 25 | ``` 26 | 27 | ## WebDataAgent 28 | 29 | WebDataAgent is an agent that collects structured data from the internet through search and web page browsing. 30 | 31 | ```sh 32 | npx tsx --env-file=.env src/webDataAgent/hackernews.ts 33 | ``` 34 | -------------------------------------------------------------------------------- /packages/agent-script-web/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "@runparse/agent-script-web", 3 | "version": "0.0.2", 4 | "license": "MIT", 5 | "main": "dist/cjs/index.cjs", 6 | "module": "dist/esm/index.mjs", 7 | "types": "dist/esm/index.d.ts", 8 | "type": "module", 9 | "exports": { 10 | ".": { 11 | "import": "./dist/esm/index.mjs", 12 | "require": "./dist/cjs/index.cjs" 13 | } 14 | }, 15 | "files": [ 16 | "dist" 17 | ], 18 | "dependencies": { 19 | "sharp": "0.33.5", 20 | "turndown": "7.2.0", 21 | "htmlparser2": "10.0.0" 22 | }, 23 | "peerDependencies": { 24 | "@runparse/agent-script": "workspace:*", 25 | "@runparse/agent-script-instrumentation": "workspace:*", 26 | "@sinclair/typebox": ">=0.34.28", 27 | "playwright": ">=1.50.0", 28 | "tslib": "2.8.1", 29 | "openai": "^4.52.2" 30 | }, 31 | "devDependencies": { 32 | "@types/json-schema": "7.0.15", 33 | "@types/turndown": "5.0.5" 34 | }, 35 | "scripts": { 36 | "prebuild": "rm -rf dist", 37 | "build": "tsc --build tsconfig.lib.json tsconfig.lib.cjs.json" 38 | } 39 | } 40 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2025 runparse 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /packages/agent-script/rollup.config.js: -------------------------------------------------------------------------------- 1 | import { builtinModules } from 'module'; 2 | import typescript from '@rollup/plugin-typescript'; 3 | 4 | const external = (id) => { 5 | const isExternal = builtinModules.includes(id) || /^[a-z@][^:]/.test(id); 6 | if (isExternal) console.log('External:', id); 7 | return isExternal; 8 | }; 9 | 10 | export default [ 11 | // ESM build using tsconfig.lib.json 12 | { 13 | input: 'src/index.ts', // adjust if your entrypoint is different 14 | output: { 15 | file: 'dist/esm/index.mjs', 16 | format: 'esm', 17 | sourcemap: true, 18 | }, 19 | plugins: [ 20 | typescript({ 21 | tsconfig: './tsconfig.lib.json', 22 | }), 23 | ], 24 | external, 25 | }, 26 | // CommonJS build using tsconfig.lib.cjs.json 27 | { 28 | input: 'src/index.ts', // adjust if your entrypoint is different 29 | output: { 30 | file: 'dist/cjs/index.cjs', 31 | format: 'cjs', 32 | sourcemap: true, 33 | }, 34 | plugins: [ 35 | typescript({ 36 | tsconfig: './tsconfig.lib.cjs.json', 37 | }), 38 | ], 39 | external, 40 | }, 41 | ]; 42 | -------------------------------------------------------------------------------- /packages/agent-script-instrumentation/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "@runparse/agent-script-instrumentation", 3 | "version": "0.0.3", 4 | "license": "MIT", 5 | "main": "dist/cjs/index.cjs", 6 | "module": "dist/esm/index.mjs", 7 | "types": "dist/esm/index.d.ts", 8 | "type": "module", 9 | "exports": { 10 | ".": { 11 | "import": "./dist/esm/index.mjs", 12 | "require": "./dist/cjs/index.cjs" 13 | } 14 | }, 15 | "files": [ 16 | "dist" 17 | ], 18 | "dependencies": { 19 | "@arizeai/openinference-semantic-conventions": "1.0.0", 20 | "@arizeai/openinference-core": "1.0.0", 21 | "@opentelemetry/api": "1.9.0", 22 | "@opentelemetry/exporter-trace-otlp-proto": "0.57.2", 23 | "@opentelemetry/instrumentation": "0.57.2", 24 | "@opentelemetry/sdk-trace-base": "1.30.1", 25 | "@opentelemetry/sdk-trace-node": "1.30.1" 26 | }, 27 | "peerDependencies": { 28 | "@runparse/agent-script": "workspace:*", 29 | "tslib": "2.8.1", 30 | "openai": "^4.52.2" 31 | }, 32 | "scripts": { 33 | "prebuild": "rm -rf dist", 34 | "build": "tsc --build tsconfig.lib.json tsconfig.lib.cjs.json" 35 | } 36 | } 37 | -------------------------------------------------------------------------------- /packages/agent-script/src/lib/udf/baseUdf.ts: -------------------------------------------------------------------------------- 1 | import { ICodeAgent, IUdf } from '../types'; 2 | import { Static, TSchema } from '@sinclair/typebox'; 3 | import { schemaToTypeString } from '../utils'; 4 | 5 | export abstract class BaseUdf implements IUdf { 6 | abstract name: string; 7 | abstract description: string; 8 | abstract inputSchema: TSchema; 9 | abstract outputSchema: TSchema; 10 | 11 | getSignature(): string { 12 | return `// ${this.description}\nasync function ${ 13 | this.name 14 | }(params: ${schemaToTypeString(this.inputSchema)}): Promise\<${ 15 | this.outputSchema ? schemaToTypeString(this.outputSchema) : 'any' 16 | }\>`; 17 | } 18 | 19 | abstract call( 20 | input: Static, 21 | agent: ICodeAgent, 22 | ): Promise> | Static; 23 | 24 | async onBeforeCall( 25 | input: Static, 26 | agent: ICodeAgent, 27 | ): Promise {} 28 | 29 | async onAfterCall( 30 | input: Static, 31 | output: Static, 32 | agent: ICodeAgent, 33 | ): Promise {} 34 | } 35 | -------------------------------------------------------------------------------- /packages/agent-script-web/project.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "agent-script-web", 3 | "$schema": "../../node_modules/nx/schemas/project-schema.json", 4 | "sourceRoot": "packages/agent-script-web/src", 5 | "projectType": "library", 6 | "tags": [], 7 | "targets": { 8 | "build": { 9 | "executor": "nx:run-commands", 10 | "outputs": ["{options.outputPath}"], 11 | "options": { 12 | "commands": ["rm -rf dist", "rollup -c rollup.config.js"], 13 | "cwd": "packages/agent-script-web", 14 | "parallel": false 15 | }, 16 | "dependsOn": [ 17 | { 18 | "target": "build", 19 | "projects": ["agent-script", "agent-script-instrumentation"] 20 | } 21 | ] 22 | }, 23 | "publish": { 24 | "executor": "nx:run-commands", 25 | "options": { 26 | "commands": ["pnpm publish"], 27 | "cwd": "packages/agent-script-web", 28 | "parallel": false 29 | } 30 | }, 31 | "cli": { 32 | "executor": "nx:run-commands", 33 | "options": { 34 | "commands": ["pnpm tsx packages/agent-script-web/src/scripts/cli.ts"] 35 | } 36 | } 37 | } 38 | } 39 | -------------------------------------------------------------------------------- /packages/agent-script/src/lib/errors.ts: -------------------------------------------------------------------------------- 1 | import { IAgentError } from './types'; 2 | 3 | export class ChatCompletionError extends Error { 4 | originalError?: Error; 5 | 6 | constructor(message: string, originalError?: Error) { 7 | super(message); 8 | this.originalError = originalError; 9 | } 10 | } 11 | 12 | export enum AgentErrorCode { 13 | VALIDATION_ERROR = 'VALIDATION_ERROR', 14 | UDF_NOT_FOUND = 'UDF_NOT_FOUND', 15 | SCRIPT_EXECUTION_FAILED = 'SCRIPT_EXECUTION_FAILED', 16 | PARSING_ERROR = 'PARSING_ERROR', 17 | MANAGED_AGENT_ERROR = 'MANAGED_AGENT_ERROR', 18 | INVALID_INPUT = 'INVALID_INPUT', 19 | UDF_EXECUTION_ERROR = 'UDF_EXECUTION_ERROR', 20 | MAX_STEPS_REACHED = 'MAX_STEPS_REACHED', 21 | MODEL_OUTPUT_ERROR = 'MODEL_OUTPUT_ERROR', 22 | INVALID_CODE_PATTERN = 'INVALID_CODE_PATTERN', 23 | INVALID_UDF_INPUT_SCHEMA = 'INVALID_UDF_INPUT_SCHEMA', 24 | PREMATURE_TERMINATE = 'PREMATURE_TERMINATE', 25 | } 26 | 27 | export class AgentError extends Error implements IAgentError { 28 | public code: string; 29 | constructor({ message, code }: { message: string; code: string }) { 30 | super(message); 31 | this.code = code; 32 | } 33 | } 34 | -------------------------------------------------------------------------------- /packages/agent-script-web/rollup.config.js: -------------------------------------------------------------------------------- 1 | // rollup.config.js 2 | import typescript from '@rollup/plugin-typescript'; 3 | import { builtinModules } from 'module'; 4 | 5 | const external = (id) => { 6 | const isExternal = builtinModules.includes(id) || /^[a-z@][^:]/.test(id); 7 | if (isExternal) console.log('External:', id); 8 | return isExternal; 9 | }; 10 | 11 | export default [ 12 | // ESM build using tsconfig.lib.json 13 | { 14 | input: 'src/index.ts', // adjust if your entrypoint is different 15 | output: { 16 | file: 'dist/esm/index.mjs', 17 | format: 'esm', 18 | sourcemap: true, 19 | }, 20 | plugins: [ 21 | typescript({ 22 | tsconfig: './tsconfig.lib.json', 23 | }), 24 | ], 25 | external, 26 | }, 27 | // CommonJS build using tsconfig.lib.cjs.json 28 | { 29 | input: 'src/index.ts', // adjust if your entrypoint is different 30 | output: { 31 | file: 'dist/cjs/index.cjs', 32 | format: 'cjs', 33 | sourcemap: true, 34 | }, 35 | plugins: [ 36 | typescript({ 37 | tsconfig: './tsconfig.lib.cjs.json', 38 | }), 39 | ], 40 | external, 41 | }, 42 | ]; 43 | -------------------------------------------------------------------------------- /packages/agent-script-instrumentation/rollup.config.js: -------------------------------------------------------------------------------- 1 | // rollup.config.js 2 | import typescript from '@rollup/plugin-typescript'; 3 | import { builtinModules } from 'module'; 4 | 5 | const external = (id) => { 6 | const isExternal = builtinModules.includes(id) || /^[a-z@][^:]/.test(id); 7 | if (isExternal) console.log('External:', id); 8 | return isExternal; 9 | }; 10 | 11 | export default [ 12 | // ESM build using tsconfig.lib.json 13 | { 14 | input: 'src/index.ts', // adjust if your entrypoint is different 15 | output: { 16 | file: 'dist/esm/index.mjs', 17 | format: 'esm', 18 | sourcemap: true, 19 | }, 20 | plugins: [ 21 | typescript({ 22 | tsconfig: './tsconfig.lib.json', 23 | }), 24 | ], 25 | external, 26 | }, 27 | // CommonJS build using tsconfig.lib.cjs.json 28 | { 29 | input: 'src/index.ts', // adjust if your entrypoint is different 30 | output: { 31 | file: 'dist/cjs/index.cjs', 32 | format: 'cjs', 33 | sourcemap: true, 34 | }, 35 | plugins: [ 36 | typescript({ 37 | tsconfig: './tsconfig.lib.cjs.json', 38 | }), 39 | ], 40 | external, 41 | }, 42 | ]; 43 | -------------------------------------------------------------------------------- /nx.json: -------------------------------------------------------------------------------- 1 | { 2 | "$schema": "./node_modules/nx/schemas/nx-schema.json", 3 | "namedInputs": { 4 | "default": ["{projectRoot}/**/*", "sharedGlobals"], 5 | "production": [ 6 | "default", 7 | "!{projectRoot}/**/?(*.)+(spec|test).[jt]s?(x)?(.snap)", 8 | "!{projectRoot}/tsconfig.spec.json", 9 | "!{projectRoot}/jest.config.[jt]s", 10 | "!{projectRoot}/src/test-setup.[jt]s", 11 | "!{projectRoot}/test-setup.[jt]s" 12 | ], 13 | "sharedGlobals": [] 14 | }, 15 | "plugins": [ 16 | { 17 | "plugin": "@nx/js/typescript", 18 | "options": { 19 | "typecheck": { 20 | "targetName": "typecheck" 21 | }, 22 | "build": { 23 | "targetName": "build", 24 | "configName": "tsconfig.lib.json", 25 | "buildDepsName": "build-deps", 26 | "watchDepsName": "watch-deps" 27 | } 28 | } 29 | }, 30 | { 31 | "plugin": "@nx/jest/plugin", 32 | "options": { 33 | "targetName": "test" 34 | } 35 | } 36 | ], 37 | "targetDefaults": { 38 | "@nx/js:tsc": { 39 | "cache": true, 40 | "dependsOn": ["^build"], 41 | "inputs": ["production", "^production"] 42 | } 43 | } 44 | } 45 | -------------------------------------------------------------------------------- /packages/agent-script/src/lib/agentLogger.ts: -------------------------------------------------------------------------------- 1 | import { Console } from 'console'; 2 | import { IChatMessage, IAgentLogger, LogLevel } from './types'; 3 | 4 | export class AgentLogger implements IAgentLogger { 5 | level: LogLevel; 6 | console: Console; 7 | 8 | constructor(level: LogLevel = LogLevel.INFO) { 9 | this.level = level; 10 | this.console = new Console(process.stdout, process.stderr); 11 | } 12 | 13 | log(...args: any[]): void { 14 | this.console.log(...args); 15 | } 16 | 17 | logMarkdown({ title, content }: { title?: string; content: string }): void { 18 | if (title) { 19 | this.console.log(`\n${title}\n${content}\n`); 20 | } else { 21 | this.console.log(`\n${content}\n`); 22 | } 23 | } 24 | 25 | logRule(title: string): void { 26 | this.console.log(`\n${'-'.repeat(20)}\n${title}\n${'-'.repeat(20)}\n`); 27 | } 28 | 29 | logTask(content: string): void { 30 | this.console.log(`\nNew task: ${content}\n`); 31 | } 32 | 33 | logMessages(messages: IChatMessage[] | null): void { 34 | if (!messages) return; 35 | this.console.log('\nMessages:'); 36 | messages.forEach((message) => { 37 | this.console.log(JSON.stringify(message, null, 2)); 38 | }); 39 | this.console.log('\n'); 40 | } 41 | } 42 | -------------------------------------------------------------------------------- /packages/agent-script/src/lib/udf/finalAnswerUdf.ts: -------------------------------------------------------------------------------- 1 | import { ICodeAgent } from '../types'; 2 | import { schemaToTypeString } from '../utils'; 3 | import { BaseStoppingUdf } from './baseStoppingUdf'; 4 | import { Type, Static, TSchema } from '@sinclair/typebox'; 5 | 6 | export class FinalAnswerUdf extends BaseStoppingUdf { 7 | name = 'finalAnswer'; 8 | description: string; 9 | 10 | inputSchema: TSchema = Type.Object({ 11 | answer: Type.String({ description: 'The final answer to the task' }), 12 | }); 13 | outputSchema: TSchema; 14 | output: any; 15 | 16 | constructor({ 17 | answerSchema, 18 | description, 19 | }: { 20 | answerSchema?: TSchema; 21 | description?: string; 22 | } = {}) { 23 | super(); 24 | if (answerSchema) { 25 | this.inputSchema = answerSchema; 26 | } 27 | this.outputSchema = this.inputSchema; 28 | this.description = 29 | description || 30 | `Provide the final answer in the following format: ${schemaToTypeString( 31 | this.outputSchema, 32 | )}`; 33 | } 34 | 35 | override async call( 36 | input: Static, 37 | agent: ICodeAgent, 38 | ): Promise> { 39 | this.output = input; 40 | return JSON.parse(JSON.stringify(this.output)); 41 | } 42 | } 43 | -------------------------------------------------------------------------------- /packages/agent-script-instrumentation/src/lib/setup.ts: -------------------------------------------------------------------------------- 1 | /* eslint-disable no-console */ 2 | import { registerInstrumentations } from '@opentelemetry/instrumentation'; 3 | import { 4 | // ConsoleSpanExporter, 5 | SimpleSpanProcessor, 6 | } from '@opentelemetry/sdk-trace-base'; 7 | import { NodeTracerProvider } from '@opentelemetry/sdk-trace-node'; 8 | import { OTLPTraceExporter } from '@opentelemetry/exporter-trace-otlp-proto'; 9 | // import { diag, DiagConsoleLogger, DiagLogLevel } from '@opentelemetry/api'; 10 | import { AgentsInstrumentation } from './instrumentation'; 11 | 12 | export function setup() { 13 | // For troubleshooting, set the log level to DiagLogLevel.DEBUG 14 | // diag.setLogger(new DiagConsoleLogger(), DiagLogLevel.INFO); 15 | 16 | const provider = new NodeTracerProvider(); 17 | 18 | // provider.addSpanProcessor(new SimpleSpanProcessor(new ConsoleSpanExporter())); 19 | provider.addSpanProcessor( 20 | new SimpleSpanProcessor( 21 | new OTLPTraceExporter({ 22 | url: 'http://localhost:6006/v1/traces', 23 | }), 24 | ), 25 | ); 26 | provider.register(); 27 | 28 | registerInstrumentations({ 29 | instrumentations: [ 30 | new AgentsInstrumentation({}, { base64ImageMaxLength: 256 * 1024 }), 31 | ], 32 | }); 33 | 34 | console.log('👀 OpenInference initialized'); 35 | } 36 | -------------------------------------------------------------------------------- /packages/agent-script-web/src/lib/udf/browser/pageGoBack.ts: -------------------------------------------------------------------------------- 1 | import { Static, Type } from '@sinclair/typebox'; 2 | import { PageActionUdf } from './pageUdf'; 3 | import { IWebAgent, IWebAgentNavigationHistoryItem } from '../../types'; 4 | 5 | export class PageGoBackUdf extends PageActionUdf { 6 | name = 'pageGoBack'; 7 | description = 8 | 'Navigates back to the previous location in the browser history'; 9 | 10 | inputSchema = Type.Any(); 11 | 12 | outputSchema = Type.Any(); 13 | 14 | private historyItem: IWebAgentNavigationHistoryItem | undefined; 15 | 16 | override async pageActionCall( 17 | input: Static, 18 | agent: IWebAgent, 19 | ): Promise> { 20 | await agent.page.goBack(); 21 | return { 22 | success: true, 23 | }; 24 | } 25 | 26 | override async onBeforeCall( 27 | input: Static, 28 | agent: IWebAgent, 29 | ) { 30 | await super.onBeforeCall(input, agent); 31 | this.historyItem = undefined; 32 | } 33 | 34 | override async onAfterCall( 35 | input: Static, 36 | output: Static, 37 | agent: IWebAgent, 38 | ) { 39 | await super.onAfterCall(input, output, agent); 40 | if (this.historyItem) { 41 | this.historyItem.status = 'success'; 42 | } 43 | } 44 | } 45 | -------------------------------------------------------------------------------- /packages/agent-script/src/lib/prompts/builder.ts: -------------------------------------------------------------------------------- 1 | import { codeAgentRules } from './parts'; 2 | 3 | export interface ICodeAgentRunExample { 4 | task: string; 5 | steps: { 6 | thought: string; 7 | code: string; 8 | result: string; 9 | }[]; 10 | } 11 | 12 | export interface ICodeAgentRunExampleStep { 13 | thought: string; 14 | code: string; 15 | result: string; 16 | } 17 | 18 | export function buildExamplePrompt(example: ICodeAgentRunExample) { 19 | return `Task: "${example.task}" 20 | ${example.steps 21 | .map( 22 | (step, index) => ` 23 | ## Step ${index + 1}: 24 | -- Your code block start -- 25 | \`\`\`js 26 | // Thought: ${step.thought} 27 | 28 | ${step.code} 29 | \`\`\` 30 | -- Your code block end -- 31 | 32 | -- UDF call result -- 33 | ${step.result}`, 34 | ) 35 | .join('\n')}`; 36 | } 37 | 38 | export function buildExamplesSectionPrompt(examples: ICodeAgentRunExample[]) { 39 | return `Here are a few examples using notional UDFs: 40 | 41 | ${examples 42 | .map( 43 | (example, index) => `# Example ${index + 1} 44 | 45 | ${buildExamplePrompt(example)}`, 46 | ) 47 | .join('\n\n')} 48 | `; 49 | } 50 | 51 | export function buildCodeAgentRulesPrompt( 52 | rules: readonly string[] = codeAgentRules, 53 | ) { 54 | return `Here are the rules you should always follow to solve your task: 55 | ${rules.map((rule, index) => `${index + 1}. ${rule}\n`).join('')}`; 56 | } 57 | -------------------------------------------------------------------------------- /packages/agent-script/src/lib/udf/duckduckgoSearchUdf.ts: -------------------------------------------------------------------------------- 1 | import { search, SearchOptions } from 'duck-duck-scrape'; 2 | import { BaseUdf } from './baseUdf'; 3 | import { ICodeAgent } from '../types'; 4 | import { Type, Static } from '@sinclair/typebox'; 5 | 6 | export class DuckduckgoSearchUdf extends BaseUdf { 7 | name = 'duckduckgoSearch'; 8 | 9 | description = 'Search the web for information'; 10 | 11 | inputSchema = Type.Object( 12 | { 13 | query: Type.String({ 14 | description: 'The search query', 15 | }), 16 | }, 17 | { default: { query: 'string' } }, 18 | ); 19 | 20 | outputSchema = Type.Array( 21 | Type.Object({ 22 | title: Type.String(), 23 | link: Type.String(), 24 | snippet: Type.String(), 25 | }), 26 | { default: [{ title: 'string', link: 'string', snippet: 'string' }] }, 27 | ); 28 | 29 | private searchOptions?: SearchOptions; 30 | 31 | private maxResults = 10; 32 | 33 | override async call( 34 | input: Static, 35 | agent: ICodeAgent, 36 | ): Promise> { 37 | const { results } = await search(input.query, this.searchOptions); 38 | 39 | return results 40 | .map((result) => ({ 41 | title: result.title, 42 | link: result.url, 43 | snippet: result.description, 44 | })) 45 | .slice(0, this.maxResults); 46 | } 47 | } 48 | -------------------------------------------------------------------------------- /packages/agent-script-web/src/lib/agents/webAgents/deepResearchAgent.ts: -------------------------------------------------------------------------------- 1 | import { 2 | BingSearchUdf, 3 | DuckduckgoSearchUdf, 4 | FinalAnswerUdf, 5 | ThinkUdf, 6 | } from '@runparse/agent-script'; 7 | import { IWebAgent } from '../../types'; 8 | import { 9 | PageClickUdf, 10 | PageGoBackUdf, 11 | PageNavigateUrlUdf, 12 | } from '../../udf/browser/index'; 13 | import { deepResearchAgentPrompt } from './deepResearchAgent.prompt'; 14 | import { PageReadUdf } from '../../udf/browser/pageReadUdf'; 15 | import { IWebAgentProps, WebAgent } from './webAgent'; 16 | export const getDeepResearchAgentDefaultUdfs = ( 17 | options: { useBingSearch?: boolean } = { useBingSearch: true }, 18 | ) => [ 19 | options.useBingSearch ? new BingSearchUdf() : new DuckduckgoSearchUdf(), 20 | new PageClickUdf(), 21 | new PageNavigateUrlUdf(), 22 | new PageGoBackUdf(), 23 | new PageReadUdf({}), 24 | new FinalAnswerUdf(), 25 | new ThinkUdf(), 26 | ]; 27 | 28 | export class DeepResearchAgent extends WebAgent implements IWebAgent { 29 | constructor(props: IWebAgentProps) { 30 | super({ 31 | ...props, 32 | prompts: props.prompts || deepResearchAgentPrompt, 33 | udfs: props.udfs || getDeepResearchAgentDefaultUdfs(), 34 | description: 35 | props.description || 36 | `You object is to generate a report for a research task. Use the provided UDFs to explore the internet and read information from web pages. Navigate away from the page if you see a captcha.`, 37 | }); 38 | } 39 | } 40 | -------------------------------------------------------------------------------- /packages/agent-script/src/lib/udf/datasheetWriteUdf.ts: -------------------------------------------------------------------------------- 1 | import { BaseUdf } from './baseUdf'; 2 | import { ICodeAgent } from '../types'; 3 | import { Type, Static } from '@sinclair/typebox'; 4 | import { stableStringify } from '../utils'; 5 | export class DatasheetWriteUdf extends BaseUdf { 6 | name = 'datasheetWrite'; 7 | 8 | description = 'Write data entries to the notebook'; 9 | 10 | inputSchema = Type.Array(Type.Any()); 11 | 12 | outputSchema = Type.Object( 13 | { 14 | successCount: Type.Number(), 15 | totalSuccessCount: Type.Number(), 16 | }, 17 | { default: { successCount: 0, errorCount: 0, totalSuccessCount: 0 } }, 18 | ); 19 | 20 | private entries: Record = []; 21 | 22 | constructor(exampleObject: any) { 23 | super(); 24 | this.inputSchema.default = [exampleObject]; 25 | } 26 | 27 | override async call( 28 | input: Static, 29 | agent: ICodeAgent, 30 | ): Promise> { 31 | let successCount = 0; 32 | for (const entry of input) { 33 | const key = stableStringify(entry); 34 | if (key in this.entries) { 35 | continue; 36 | } else { 37 | this.entries[key] = entry; 38 | successCount++; 39 | } 40 | } 41 | 42 | return { 43 | successCount, 44 | totalSuccessCount: Object.keys(this.entries).length, 45 | }; 46 | } 47 | 48 | getEntries(): Array { 49 | return Object.values(this.entries); 50 | } 51 | } 52 | -------------------------------------------------------------------------------- /examples/agents/src/deepResearchAgent/mobileLlmStartup.ts: -------------------------------------------------------------------------------- 1 | // import { ChatModel } from '@runparse/agent-script'; 2 | import { setup } from '@runparse/agent-script-instrumentation'; 3 | import { 4 | DeepResearchAgent, 5 | getDeepResearchAgentDefaultUdfs, 6 | } from '@runparse/agent-script-web'; 7 | import { chromium } from 'playwright'; 8 | 9 | setup(); 10 | 11 | async function main() { 12 | const browser = await chromium.launch({ headless: false }); 13 | const page = await browser.newPage(); 14 | 15 | const task = 16 | 'Should I build a startup that trains small LLMS for mobile use in 2025? Do a deep dive and give me a report in markdown format'; 17 | 18 | try { 19 | const agent = new DeepResearchAgent({ 20 | name: 'Web Agent', 21 | description: '', 22 | maxSteps: 20, 23 | page, 24 | udfs: getDeepResearchAgentDefaultUdfs({ useBingSearch: false }), // set to true to use bing, must set BING_API_KEY in .env 25 | // uncomment to use anthropic, must set ANTHROPIC_API_KEY in .env 26 | // model: new ChatModel({ 27 | // provider: 'anthropic', 28 | // model: 'claude-3-5-sonnet-latest', 29 | // max_tokens: 4096, 30 | // }), 31 | }); 32 | 33 | const finalAnswer = await agent.run(task, {}); 34 | await page.close(); 35 | await browser.close(); 36 | 37 | console.log('finalAnswer:\n', finalAnswer); 38 | } catch (error) { 39 | console.error(error); 40 | } finally { 41 | await page.close(); 42 | await browser.close(); 43 | } 44 | } 45 | 46 | main().catch(console.error); 47 | -------------------------------------------------------------------------------- /packages/agent-script/src/lib/__tests__/bufferConsole.test.ts: -------------------------------------------------------------------------------- 1 | import { BufferConsole } from '../bufferConsole'; 2 | 3 | describe('BufferConsole', () => { 4 | let bufferConsole: BufferConsole; 5 | 6 | beforeEach(() => { 7 | bufferConsole = new BufferConsole(); 8 | }); 9 | 10 | test('should initialize with empty buffer', () => { 11 | expect(bufferConsole.getOutput()).toBe(''); 12 | }); 13 | 14 | test('should capture single log message', () => { 15 | bufferConsole.log('Test message'); 16 | expect(bufferConsole.getOutput()).toBe('Test message\n'); 17 | }); 18 | 19 | test('should capture multiple log messages', () => { 20 | bufferConsole.log('Message 1'); 21 | bufferConsole.log('Message 2'); 22 | expect(bufferConsole.getOutput()).toBe('Message 1\nMessage 2\n'); 23 | }); 24 | 25 | test('should handle multiple arguments in log', () => { 26 | bufferConsole.log('Count:', 5, 'Status:', true); 27 | expect(bufferConsole.getOutput()).toBe('Count: 5 Status: true\n'); 28 | }); 29 | 30 | test('should strip ANSI escape codes', () => { 31 | const ansiMessage = '\x1b[32mGreen Text\x1b[0m'; 32 | bufferConsole.log(ansiMessage); 33 | expect(bufferConsole.getOutput()).toBe('Green Text\n'); 34 | }); 35 | 36 | test('should handle empty log calls', () => { 37 | bufferConsole.log(); 38 | expect(bufferConsole.getOutput()).toBe('\n'); 39 | }); 40 | 41 | test('should handle special characters', () => { 42 | bufferConsole.log('Special: \n\t\r'); 43 | expect(bufferConsole.getOutput()).toBe('Special: \n\t\r\n'); 44 | }); 45 | }); 46 | -------------------------------------------------------------------------------- /packages/agent-script/src/lib/udf/callAgentUdf.ts: -------------------------------------------------------------------------------- 1 | import { Type, Static, TSchema } from '@sinclair/typebox'; 2 | import { BaseUdf } from './baseUdf'; 3 | import { ICodeAgent } from '../types'; 4 | 5 | export class CallAgentUdf extends BaseUdf { 6 | name: string; 7 | description: string; 8 | 9 | inputSchema = Type.Object({ 10 | task: Type.String({ 11 | description: 'The task to be performed by the agent', 12 | }), 13 | }); 14 | outputSchema: TSchema; 15 | 16 | agentName: string; 17 | 18 | constructor({ 19 | agentName, 20 | agentDescription, 21 | agentOutputSchema, 22 | }: { 23 | agentName: string; 24 | agentDescription: string; 25 | agentOutputSchema?: TSchema; 26 | }) { 27 | super(); 28 | this.name = `call${agentName 29 | .split(/\s+/g) 30 | .map((word) => word[0]?.toUpperCase() + word.slice(1)) 31 | .join('')}`; 32 | this.description = `Call the ${agentName} agent for help. Here's a description of the agent: ${agentDescription}`; 33 | this.agentName = agentName; 34 | this.outputSchema = agentOutputSchema || Type.Any(); 35 | } 36 | 37 | override async call( 38 | input: Static, 39 | agent: ICodeAgent, 40 | ): Promise> { 41 | const managedAgent = agent.managedAgents.find( 42 | (a) => a.name === this.agentName, 43 | ); 44 | if (!managedAgent) { 45 | throw new Error(`Agent ${this.name} not found`); 46 | } 47 | const result = await managedAgent.call(input.task, {}); 48 | return result; 49 | } 50 | } 51 | -------------------------------------------------------------------------------- /packages/agent-script/src/lib/udf/notebookWriteUdf.ts: -------------------------------------------------------------------------------- 1 | import { BaseUdf } from './baseUdf'; 2 | import { ICodeAgent } from '../types'; 3 | import { Type, Static } from '@sinclair/typebox'; 4 | 5 | export class NotebookWriteUdf extends BaseUdf { 6 | name = 'notebookWrite'; 7 | 8 | description = 'Write strings and objects to the notebook'; 9 | 10 | inputSchema = Type.Any(); 11 | 12 | outputSchema = Type.Object({}); 13 | 14 | content: Buffer = Buffer.from(''); 15 | 16 | override async call( 17 | input: Static, 18 | agent: ICodeAgent, 19 | ): Promise> { 20 | switch (typeof input) { 21 | case 'string': 22 | case 'number': 23 | case 'boolean': 24 | this.content = Buffer.concat([ 25 | this.content, 26 | Buffer.from(input.toString()), 27 | ]); 28 | break; 29 | case 'object': 30 | this.content = Buffer.concat([ 31 | this.content, 32 | Buffer.from(JSON.stringify(input, null, 2)), 33 | ]); 34 | break; 35 | } 36 | 37 | return { 38 | success: true, 39 | contentSize: this.formatBytes(this.content.length), 40 | }; 41 | } 42 | 43 | private formatBytes(bytes: number, decimals = 2): string { 44 | if (bytes === 0) return '0 Bytes'; 45 | 46 | const sizes = ['Bytes', 'KB', 'MB', 'GB', 'TB', 'PB', 'EB', 'ZB', 'YB']; 47 | const i = Math.floor(Math.log(bytes) / Math.log(1024)); 48 | 49 | return `${parseFloat((bytes / Math.pow(1024, i)).toFixed(decimals))} ${ 50 | sizes[i] 51 | }`; 52 | } 53 | } 54 | -------------------------------------------------------------------------------- /examples/agents/src/webDataAgent/hackernews.ts: -------------------------------------------------------------------------------- 1 | // import { ChatModel } from '@runparse/agent-script'; 2 | import { setup } from '@runparse/agent-script-instrumentation'; 3 | import { 4 | WebDataAgent, 5 | getWebDataAgentDefaultUdfs, 6 | createTSchemaFromInstance, 7 | } from '@runparse/agent-script-web'; 8 | import { chromium } from 'playwright'; 9 | 10 | setup(); 11 | 12 | async function main() { 13 | const browser = await chromium.launch({ headless: false }); 14 | const page = await browser.newPage(); 15 | 16 | const task = 'give me the top 40 posts on hacker news'; 17 | const schema = createTSchemaFromInstance({ 18 | title: 'title of the article', 19 | author: 'author of the article', 20 | points: 0, 21 | }); 22 | 23 | try { 24 | const agent = new WebDataAgent({ 25 | name: 'Web Agent', 26 | description: '', 27 | maxSteps: 10, 28 | page, 29 | dataObjectSchema: schema, 30 | shouldRunPlanning: true, 31 | udfs: [ 32 | ...getWebDataAgentDefaultUdfs({ 33 | useBingSearch: false, // set to true to use bing, must set BING_API_KEY in .env 34 | extractionObjectSchema: schema, 35 | }), 36 | ], 37 | // uncomment to use anthropic, must set ANTHROPIC_API_KEY in .env 38 | // model: new ChatModel({ 39 | // provider: 'anthropic', 40 | // model: 'claude-3-5-sonnet-latest', 41 | // max_tokens: 4096, 42 | // }), 43 | }); 44 | 45 | await agent.run(task, {}); 46 | await page.close(); 47 | await browser.close(); 48 | 49 | console.log('data:\n', agent.getDatasheetEntries()); 50 | } catch (error) { 51 | console.error(error); 52 | } finally { 53 | await page.close(); 54 | await browser.close(); 55 | } 56 | } 57 | 58 | main().catch(console.error); 59 | -------------------------------------------------------------------------------- /packages/agent-script-web/src/lib/udf/browser/pageNavigateUrlUdf.ts: -------------------------------------------------------------------------------- 1 | import { Static, Type } from '@sinclair/typebox'; 2 | import { PageActionUdf } from './pageUdf'; 3 | import { IWebAgent, IWebAgentNavigationHistoryItem } from '../../types'; 4 | 5 | export class PageNavigateUrlUdf extends PageActionUdf { 6 | name = 'pageNavigateUrl'; 7 | description = 'Navigates to a specific URL'; 8 | 9 | inputSchema = Type.Object( 10 | { 11 | url: Type.String({ 12 | description: 'The URL to navigate to', 13 | }), 14 | }, 15 | { default: { url: 'string' } }, 16 | ); 17 | 18 | outputSchema = Type.Object( 19 | { 20 | success: Type.Boolean(), 21 | message: Type.Optional(Type.String()), 22 | }, 23 | { default: { success: true, message: 'string' } }, 24 | ); 25 | 26 | private historyItem: IWebAgentNavigationHistoryItem | undefined; 27 | 28 | override async pageActionCall( 29 | input: Static, 30 | agent: IWebAgent, 31 | ): Promise> { 32 | if ( 33 | agent.navigationHistory.find( 34 | (historyItem) => historyItem.url === input.url, 35 | ) 36 | ) { 37 | this.historyItem = { 38 | url: input.url, 39 | timestamp: Date.now(), 40 | status: 'skipped', 41 | }; 42 | return { success: false, message: 'already visited, skipping' }; 43 | } 44 | this.historyItem = { 45 | url: input.url, 46 | timestamp: Date.now(), 47 | status: 'loading', 48 | }; 49 | agent.navigationHistory.push(this.historyItem); 50 | await agent.page.goto(input.url, { timeout: 30000 }); 51 | return { success: true }; 52 | } 53 | 54 | override async onBeforeCall( 55 | input: Static, 56 | agent: IWebAgent, 57 | ) { 58 | await super.onBeforeCall(input, agent); 59 | this.historyItem = undefined; 60 | } 61 | 62 | override async onAfterCall( 63 | input: Static, 64 | output: Static, 65 | agent: IWebAgent, 66 | ) { 67 | await super.onAfterCall(input, output, agent); 68 | if (this.historyItem) { 69 | this.historyItem.status = 'success'; 70 | } 71 | } 72 | } 73 | -------------------------------------------------------------------------------- /packages/agent-script-web/src/lib/udf/browser/utils.ts: -------------------------------------------------------------------------------- 1 | import { Page, Locator, PageScreenshotOptions } from 'playwright'; 2 | import sharp from 'sharp'; 3 | 4 | export enum ElementRole { 5 | BUTTON = 'button', 6 | LINK = 'link', 7 | } 8 | 9 | export async function getBestElementByText({ 10 | page, 11 | text, 12 | role, 13 | exact, 14 | elementIndex, 15 | }: { 16 | page: Page; 17 | text: string; 18 | role?: ElementRole; 19 | exact: boolean; 20 | elementIndex?: number; 21 | }): Promise<{ 22 | candidates?: Locator[]; 23 | match: Locator | null; 24 | }> { 25 | let elementsLocator: Locator; 26 | if (role) { 27 | elementsLocator = page.getByRole(role, { exact, name: text }); 28 | } else { 29 | elementsLocator = page.getByText(text, { exact }); 30 | } 31 | 32 | const elements = await elementsLocator.all(); 33 | 34 | if (elements.length >= 1) { 35 | if (elementIndex && elementIndex >= 0 && elementIndex < elements.length) { 36 | return { 37 | candidates: elements, 38 | match: elements[elementIndex]!, 39 | }; 40 | } 41 | return { 42 | candidates: elements, 43 | match: elements[0]!, 44 | }; 45 | } 46 | 47 | return { 48 | match: null, 49 | }; 50 | } 51 | 52 | export enum VisualQuality { 53 | MEDIUM = 'MEDIUM', 54 | LOW = 'LOW', 55 | } 56 | 57 | export const VisualQualityParams: Record< 58 | VisualQuality, 59 | { width: number; height: number; quality: number } 60 | > = { 61 | [VisualQuality.MEDIUM]: { 62 | width: 1024, 63 | height: 1024, 64 | quality: 90, 65 | }, 66 | [VisualQuality.LOW]: { 67 | width: 512, 68 | height: 512, 69 | quality: 70, 70 | }, 71 | } as const; 72 | 73 | export async function getBase64Screenshot( 74 | page: Page, 75 | options: { visualQuality: VisualQuality } & PageScreenshotOptions = { 76 | visualQuality: VisualQuality.MEDIUM, 77 | }, 78 | ): Promise<{ data: string; metadata: { width: number; height: number } }> { 79 | const { width, height, quality } = VisualQualityParams[options.visualQuality]; 80 | const screenshot = sharp(await page.screenshot({ ...options })) 81 | .resize(width, height, { fit: 'contain' }) 82 | .jpeg({ quality }); 83 | 84 | return { 85 | data: `data:image/jpeg;base64,${(await screenshot.toBuffer()).toString( 86 | 'base64', 87 | )}`, 88 | metadata: { width, height }, 89 | }; 90 | } 91 | -------------------------------------------------------------------------------- /packages/agent-script-web/src/lib/udf/browser/pageUdf.ts: -------------------------------------------------------------------------------- 1 | import { ActionStep, BaseUdf } from '@runparse/agent-script'; 2 | import { Static } from '@sinclair/typebox'; 3 | import { getBase64Screenshot } from './utils'; 4 | import { IWebAgent } from '../../types'; 5 | import { PageActionTimeoutError } from './errors'; 6 | 7 | export abstract class PageUdf extends BaseUdf { 8 | abstract override call( 9 | input: Static, 10 | agent: IWebAgent, 11 | ): Promise>; 12 | } 13 | 14 | export const PageActionDefaultTimeoutMs: number = 10000; 15 | 16 | export abstract class PageActionUdf extends PageUdf { 17 | constructor(public timeoutMs: number = PageActionDefaultTimeoutMs) { 18 | super(); 19 | } 20 | 21 | override async call( 22 | input: Static, 23 | agent: IWebAgent, 24 | ): Promise> { 25 | try { 26 | const result = await Promise.race([ 27 | this.pageActionCall(input, agent), 28 | new Promise(async (_, reject) => { 29 | setTimeout(async () => { 30 | reject(new PageActionTimeoutError(this.timeoutMs)); 31 | }, this.timeoutMs); 32 | }), 33 | ]); 34 | await this.saveScreenshotToMemory(agent); 35 | return result; 36 | } catch (error) { 37 | if (error instanceof PageActionTimeoutError) { 38 | await this.saveScreenshotToMemory( 39 | agent, 40 | `Page action timed out after ${this.timeoutMs} ms. It is possible that the action succeeded but timed out on page load`, 41 | ); 42 | } 43 | throw error; 44 | } 45 | } 46 | 47 | protected abstract pageActionCall( 48 | input: Static, 49 | agent: IWebAgent, 50 | ): Promise>; 51 | 52 | protected async saveScreenshotToMemory( 53 | agent: IWebAgent, 54 | additionalContext: string = '', 55 | ): Promise { 56 | // Get current memory step 57 | const currentStep = agent.memory.steps[agent.memory.steps.length - 1]; 58 | if (!(currentStep instanceof ActionStep)) return; 59 | 60 | // Take screenshot 61 | const { data: screenshotData } = await getBase64Screenshot(agent.page); 62 | 63 | // Save screenshot to current step 64 | currentStep.observations.push({ 65 | type: 'image', 66 | image: screenshotData, 67 | context: `screenshot after page action ${ 68 | this.name 69 | } on ${agent.page.url()}${ 70 | additionalContext ? `\n${additionalContext}` : '' 71 | }`, 72 | }); 73 | } 74 | } 75 | -------------------------------------------------------------------------------- /packages/agent-script-web/src/lib/agents/webAgents/webDataAgent.ts: -------------------------------------------------------------------------------- 1 | import { 2 | BingSearchUdf, 3 | DatasheetWriteUdf, 4 | DuckduckgoSearchUdf, 5 | IChatModel, 6 | TerminateUdf, 7 | ThinkUdf, 8 | } from '@runparse/agent-script'; 9 | import { Static, TSchema } from '@sinclair/typebox'; 10 | import { IWebAgent } from '../../types'; 11 | import { 12 | PageClickUdf, 13 | PageExtractDataUdf, 14 | PageGoBackUdf, 15 | PageNavigateUrlUdf, 16 | } from '../../udf/browser/index'; 17 | import { generateDefaultJsonSchemaInstance } from '../../utils/schema'; 18 | import { webDataAgentPrompt } from './webDataAgent.prompt'; 19 | import { IWebAgentProps, WebAgent } from './webAgent'; 20 | 21 | export const getWebDataAgentDefaultUdfs = ({ 22 | useBingSearch = true, 23 | extractionModel, 24 | extractionObjectSchema, 25 | }: { 26 | useBingSearch?: boolean; 27 | extractionModel?: IChatModel; 28 | extractionObjectSchema: TSchema; 29 | }) => [ 30 | new PageClickUdf(), 31 | new PageNavigateUrlUdf(), 32 | new PageGoBackUdf(), 33 | new DatasheetWriteUdf({}), 34 | useBingSearch ? new BingSearchUdf() : new DuckduckgoSearchUdf(), 35 | new TerminateUdf(), 36 | new ThinkUdf(), 37 | new PageExtractDataUdf({ 38 | model: extractionModel, 39 | objectSchema: extractionObjectSchema, 40 | }), 41 | ]; 42 | 43 | export interface IWebDataAgentProps extends IWebAgentProps { 44 | dataObjectSchema: TSchema; 45 | } 46 | 47 | export class WebDataAgent extends WebAgent implements IWebAgent { 48 | constructor(props: IWebDataAgentProps) { 49 | super({ 50 | ...props, 51 | prompts: props.prompts || webDataAgentPrompt, 52 | udfs: 53 | props.udfs || 54 | getWebDataAgentDefaultUdfs({ 55 | extractionModel: props.model, 56 | extractionObjectSchema: props.dataObjectSchema, 57 | }), 58 | description: 59 | props.description || 60 | `You object is to collect data as JSON objects with the following structure:\n\n${JSON.stringify( 61 | generateDefaultJsonSchemaInstance(props.dataObjectSchema), 62 | )} using the 'datasheetWrite' UDF to save any relevant data after extracting data from a webpage or searching the web. Use the provided page UDFs to explore the webpage and extract data following user instructions. Navigate away from the page if you see a captcha.`, 63 | }); 64 | 65 | if (!this.udfs.some((udf) => udf instanceof DatasheetWriteUdf)) { 66 | throw new Error('The DatasheetWrite UDF is required'); 67 | } 68 | if (!this.udfs.some((udf) => udf instanceof PageExtractDataUdf)) { 69 | throw new Error('The PageExtractData UDF is required'); 70 | } 71 | } 72 | 73 | getDatasheetEntries() { 74 | return this.udfs 75 | .find((udf) => udf instanceof DatasheetWriteUdf)! 76 | .getEntries(); 77 | } 78 | 79 | override async call( 80 | task: string, 81 | kwargs: any, 82 | ): Promise> { 83 | await super.call(task, kwargs); 84 | return this.getDatasheetEntries(); 85 | } 86 | } 87 | -------------------------------------------------------------------------------- /packages/agent-script/src/lib/prompts/parts.ts: -------------------------------------------------------------------------------- 1 | import { removeLeadingIndentation } from '../utils'; 2 | 3 | export const codeAgentRolePromptPart = removeLeadingIndentation(` 4 | You are an expert javascript software developer who can solve any task using only valid javascript code. You will be given a task to solve as best you can. 5 | 6 | To solve the task, you must plan forward to proceed in a series of steps. 7 | 8 | At each step you'll write a javascript code block that starts with a '// Thought:' comment to explain your reasoning towards solving the task and the User Defined Functions (UDF / UDFs)that you want to use. Then you should write the code in simple Javascript. The result of UDF call should be stored in a variable so that it can be used in the next step. Each UDF call result will be printed to the console for you to see. 9 | `); 10 | 11 | export const codeAgentRules = [ 12 | `CRITICAL: You must only response in valid Javascript code. No other text is allowed. The code must be enclosed in a code block starting with \`\`\`js and ending with \`\`\`. Start with a // Thought: comment to explain your reasoning towards solving the task and the UDFs that you want to use, then write the code. Example of a valid output: 13 | \`\`\`js 14 | // Thought: ... 15 | // code block with UDF calls, ... 16 | \`\`\``, 17 | `Use only variables that you have defined!`, 18 | `Make sure to use the right arguments for the UDFs as defined in the signature. CRITICAL: You must call an async UDF with an await.`, 19 | `Take care to not chain too many sequential UDF calls in the same code block, especially when the output format is unpredictable. For instance, a call to search has an unpredictable return format, so do not have another UDF call that depends on its output in the same block.`, 20 | `Call a UDF only when needed, and never re-do an UDF call that you previously did with the exact same parameters.`, 21 | `Don't name any new variable with the same name as a UDF: for instance don't name a variable 'finalAnswer'.`, 22 | `Never create any notional variables in our code, as having these in your logs will derail you from the true variables.`, 23 | `You can use imports in your code, but only from the following list of modules: [{{authorizedImports}}]. Only the following global variables are available: [{{globalVariables}}].`, 24 | `The state persists between code executions: so if in one step you've created variables or imported modules, these will all persist.`, 25 | `Don't give up! You're in charge of solving the task, not providing directions to solve it.`, 26 | `For intermedia variables, programatically pass values as input for UDF calls instead of typing them out. For example, use \`navigate({url: searchResult[0].link})\` instead of \`navigate({url: "https://example.com"})\`.`, 27 | `Do not use console.log to print the result of UDF calls.`, 28 | `Do not create new functions.`, 29 | `Always assign the result of UDF calls to a variable.`, 30 | `Write only one code block per step.`, 31 | `If there are UDF calls in the code block but you see no output from the calls, it means that the UDF call(s) failed. Check if you made an error in the UDF call(s).`, 32 | ] as const; 33 | -------------------------------------------------------------------------------- /packages/agent-script-web/src/lib/agents/webAgents/webAgent.ts: -------------------------------------------------------------------------------- 1 | import { 2 | CodeAgent, 3 | ICodeAgentProps, 4 | IChatMessage, 5 | ActionStep, 6 | DuckduckgoSearchUdf, 7 | BingSearchUdf, 8 | FinalAnswerUdf, 9 | ThinkUdf, 10 | PartialBy, 11 | } from '@runparse/agent-script'; 12 | import { Page } from 'playwright'; 13 | import { IWebAgentNavigationHistoryItem } from '../../types'; 14 | import { Static } from '@sinclair/typebox'; 15 | import { PageReadUdf } from '../../udf/browser/pageReadUdf'; 16 | import { PageClickUdf, PageGoBackUdf, PageNavigateUrlUdf } from '../../udf'; 17 | 18 | export function getWebAgentDefaultUdfs( 19 | options: { useBingSearch?: boolean } = { useBingSearch: true }, 20 | ) { 21 | return [ 22 | options?.useBingSearch ? new BingSearchUdf() : new DuckduckgoSearchUdf(), 23 | new PageClickUdf(), 24 | new PageReadUdf({}), 25 | new PageNavigateUrlUdf(), 26 | new PageGoBackUdf(), 27 | new FinalAnswerUdf(), 28 | new ThinkUdf(), 29 | ]; 30 | } 31 | 32 | export interface IWebAgentProps 33 | extends PartialBy { 34 | page: Page; 35 | navigationHistory?: IWebAgentNavigationHistoryItem[]; 36 | } 37 | 38 | export class WebAgent extends CodeAgent { 39 | page: Page; 40 | navigationHistory: IWebAgentNavigationHistoryItem[]; 41 | 42 | constructor(props: IWebAgentProps) { 43 | super({ 44 | ...props, 45 | description: props.description || '', 46 | udfs: props.udfs || getWebAgentDefaultUdfs(), 47 | }); 48 | 49 | if ( 50 | !this.udfs.some( 51 | (udf) => 52 | udf instanceof BingSearchUdf || udf instanceof DuckduckgoSearchUdf, 53 | ) 54 | ) { 55 | throw new Error('A web search UDF is required'); 56 | } 57 | 58 | this.page = props.page; 59 | this.navigationHistory = props.navigationHistory || []; 60 | } 61 | 62 | override writeMemoryToMessages(summaryMode: boolean): IChatMessage[] { 63 | const messages = super.writeMemoryToMessages(summaryMode); 64 | if (this.navigationHistory.length > 0) { 65 | const currentLocationString = `You are currently at this url: ${this.page.url()}\n\n`; 66 | messages.push({ 67 | role: 'user', 68 | content: `${currentLocationString}Do not navigate to any of the following urls you have visited:\n${this.navigationHistory 69 | .map((item) => `- ${item.url}`) 70 | .join('\n')}`, 71 | }); 72 | } 73 | return messages; 74 | } 75 | 76 | override async step( 77 | memoryStep: ActionStep, 78 | ): Promise | undefined> { 79 | // Get current memory step 80 | const currentStep = this.memory.steps[this.memory.steps.length - 1]; 81 | if (currentStep instanceof ActionStep) { 82 | // Remove old screenshots to keep memory lean 83 | for (const step of this.memory.steps) { 84 | if (!(step instanceof ActionStep)) continue; 85 | if (step.stepNumber <= currentStep.stepNumber - 2) { 86 | step.observations = step.observations?.filter( 87 | (o) => !(o.type === 'image' && o.context?.includes('screenshot')), 88 | ); 89 | } 90 | } 91 | } 92 | 93 | return super.step(memoryStep); 94 | } 95 | } 96 | -------------------------------------------------------------------------------- /packages/agent-script/src/lib/sandbox.ts: -------------------------------------------------------------------------------- 1 | import * as vm from 'vm'; 2 | import { BufferConsole } from './bufferConsole'; 3 | import { AgentError, AgentErrorCode } from './errors'; 4 | import { ICallableResult } from './types'; 5 | 6 | export class Sandbox { 7 | constructor( 8 | public vmContext: vm.Context = vm.createContext(), 9 | public callHistory: ICallableResult[][] = [], 10 | ) {} 11 | 12 | register(callable: string, fn: (...fnArgs: any[]) => Promise) { 13 | this.vmContext[callable] = async (...args: any[]) => { 14 | const currentScriptCalls = this.callHistory[this.callHistory.length - 1]!; 15 | try { 16 | const result = await fn(...args); 17 | currentScriptCalls.push({ 18 | returnValue: result, 19 | callable: callable, 20 | }); 21 | return result; 22 | } catch (error: any) { 23 | throw new Error(`Error calling function ${callable}: ${error.message}`); 24 | } 25 | }; 26 | } 27 | 28 | async executeScript( 29 | script: string, 30 | ): Promise<{ calls: ICallableResult[]; returnValue: any; output: string }> { 31 | const sandboxConsole = new BufferConsole(); 32 | this.vmContext.console = sandboxConsole; 33 | function trap(reason: any) { 34 | if (reason instanceof Error) { 35 | sandboxConsole.log(`UnhandledPromiseRejection: ${reason.message}`); 36 | } else { 37 | sandboxConsole.log(`UnhandledPromiseRejection: ${reason}`); 38 | } 39 | } 40 | process.on('unhandledRejection', trap); 41 | 42 | const currentScriptCalls: ICallableResult[] = []; 43 | this.callHistory.push(currentScriptCalls); 44 | 45 | try { 46 | const existingVariables = new Set(Object.keys(this.vmContext)); 47 | 48 | const scriptReturnValue = await vm.runInContext( 49 | `(async () => { 50 | ${script} 51 | })()`, 52 | this.vmContext, 53 | ); 54 | 55 | const newVariables = Array.from(Object.keys(this.vmContext)).filter( 56 | (key) => !existingVariables.has(key), 57 | ); 58 | 59 | const callResultsString = this.formatScriptCallResults( 60 | newVariables, 61 | currentScriptCalls, 62 | ); 63 | if (callResultsString) { 64 | sandboxConsole.log(callResultsString); 65 | } 66 | 67 | return { 68 | calls: currentScriptCalls, 69 | returnValue: scriptReturnValue, 70 | output: sandboxConsole.getOutput(), 71 | }; 72 | } catch (error: any) { 73 | throw new AgentError({ 74 | message: `Script execution failed: ${error.message}`, 75 | code: AgentErrorCode.SCRIPT_EXECUTION_FAILED, 76 | }); 77 | } finally { 78 | setTimeout(() => process.off('unhandledRejection', trap), 100); 79 | } 80 | } 81 | 82 | formatScriptCallResults( 83 | variables: string[], 84 | callResults: ICallableResult[], 85 | ): string { 86 | return callResults 87 | .map((call) => { 88 | const correspondingVariable = variables.find( 89 | (variable) => this.vmContext[variable] === call.returnValue, 90 | ); 91 | return `// ${call.callable} -> \n${ 92 | correspondingVariable ? `${correspondingVariable} = ` : '' 93 | }${JSON.stringify(call.returnValue, null, 2)}`; 94 | }) 95 | .join('\n\n'); 96 | } 97 | } 98 | -------------------------------------------------------------------------------- /packages/agent-script/src/lib/chatModel.ts: -------------------------------------------------------------------------------- 1 | import { ChatCompletionMessageParam, TokenJS, models } from 'token.js'; 2 | import { IChatMessage, IChatModel, IChatResponseMetadata } from './types'; 3 | import { ChatCompletionError } from './errors'; 4 | import { ChatCompletionCreateParamsNonStreaming } from 'openai/resources/chat/completions'; 5 | 6 | export type LLMProvider = keyof typeof models; 7 | 8 | export class ChatModel implements IChatModel { 9 | private client: TokenJS = new TokenJS(); 10 | 11 | constructor( 12 | public options: { 13 | provider: LLMProvider; 14 | model: string; 15 | } & Partial = { 16 | provider: 'openai', 17 | model: 'gpt-4o', 18 | }, 19 | ) {} 20 | 21 | async chatCompletion( 22 | request: { 23 | messages: ChatCompletionMessageParam[]; 24 | } & Partial, 25 | ): Promise<{ 26 | message: IChatMessage; 27 | metadata: IChatResponseMetadata; 28 | }> { 29 | const response = await this.client.chat.completions.create({ 30 | ...this.options, 31 | ...request, 32 | }); 33 | const message = response.choices[0]?.message; 34 | if (!message) { 35 | throw new ChatCompletionError('No message returned from chat completion'); 36 | } 37 | const content = message.content || ''; 38 | return { 39 | message: { 40 | role: message.role, 41 | content, 42 | raw: message, 43 | }, 44 | metadata: { 45 | usage: { 46 | promptTokens: response.usage?.prompt_tokens || 0, 47 | completionTokens: response.usage?.completion_tokens || 0, 48 | totalTokens: response.usage?.total_tokens || 0, 49 | }, 50 | }, 51 | }; 52 | } 53 | 54 | async chatCompletionWithSchema( 55 | request: { 56 | messages: ChatCompletionMessageParam[]; 57 | } & Partial, 58 | ): Promise<{ 59 | message: IChatMessage; 60 | metadata: IChatResponseMetadata; 61 | }> { 62 | // @ts-ignore 63 | const responseFormat = request.response_format; 64 | // @ts-ignore 65 | if (responseFormat?.type !== 'json_schema') { 66 | throw new ChatCompletionError('response_format must be a json_schema'); 67 | } 68 | const provider = this.options.provider; 69 | if (provider === 'anthropic') { 70 | const dataExtractionTool = { 71 | name: 'extractDataEntities', 72 | description: 'Extracts data entities from given content', 73 | // @ts-ignore 74 | parameters: responseFormat.json_schema.schema, 75 | }; 76 | request.tools = [ 77 | { 78 | function: dataExtractionTool, 79 | type: 'function', 80 | }, 81 | ]; 82 | 83 | const response = await this.chatCompletion(request); 84 | const toolCall = response.message.raw?.tool_calls?.[0]; 85 | if (!toolCall) { 86 | throw new ChatCompletionError( 87 | 'Failed to extract data: no tool call returned from chat completion using Anthropic', 88 | ); 89 | } 90 | return { 91 | message: { 92 | role: response.message.role, 93 | content: toolCall.function.arguments, 94 | raw: response.message.raw, 95 | }, 96 | metadata: { 97 | usage: { 98 | promptTokens: response.metadata.usage.promptTokens, 99 | completionTokens: response.metadata.usage.completionTokens, 100 | totalTokens: response.metadata.usage.totalTokens, 101 | }, 102 | }, 103 | }; 104 | } 105 | 106 | return this.chatCompletion(request); 107 | } 108 | } 109 | -------------------------------------------------------------------------------- /packages/agent-script/src/lib/__tests__/agentMemory.test.ts: -------------------------------------------------------------------------------- 1 | import { AgentMemory } from '../agentMemory'; 2 | import { ActionStep, PlanningStep, TaskStep } from '../agentMemory'; 3 | import { AgentLogger } from '../agentLogger'; 4 | 5 | describe('AgentMemory', () => { 6 | let agentMemory: AgentMemory; 7 | const systemPrompt = 'Test system prompt'; 8 | 9 | beforeEach(() => { 10 | agentMemory = new AgentMemory(systemPrompt); 11 | }); 12 | 13 | test('should initialize with system prompt', () => { 14 | expect(agentMemory.systemPrompt).toBeDefined(); 15 | expect(agentMemory.systemPrompt.systemPrompt).toBe(systemPrompt); 16 | }); 17 | 18 | test('should start with empty steps', () => { 19 | expect(agentMemory.steps).toHaveLength(0); 20 | }); 21 | 22 | test('reset() should clear all steps', () => { 23 | agentMemory.steps.push(new ActionStep({ stepNumber: 1 })); 24 | agentMemory.reset(); 25 | expect(agentMemory.steps).toHaveLength(0); 26 | }); 27 | 28 | test('getSuccinctSteps() should return summarized steps', () => { 29 | const actionStep = new ActionStep({ 30 | stepNumber: 1, 31 | modelOutput: 'Test output', 32 | observations: [{ type: 'text', text: 'Test observation' }], 33 | }); 34 | const taskStep = new TaskStep({ task: 'Test task' }); 35 | 36 | agentMemory.steps.push(actionStep); 37 | agentMemory.steps.push(taskStep); 38 | 39 | const steps = agentMemory.getSuccinctSteps(); 40 | expect(steps).toHaveLength(2); 41 | expect(steps[0]?.content).toContain('Observation:\nTest observation'); 42 | expect(steps[1]?.content).toContain('Test task'); 43 | }); 44 | 45 | test('replay() should log steps correctly', () => { 46 | const mockLogger = new AgentLogger(); 47 | const consoleSpy = jest.spyOn(mockLogger.console, 'log'); 48 | 49 | const actionStep = new ActionStep({ 50 | stepNumber: 1, 51 | modelOutput: 'Test output', 52 | }); 53 | agentMemory.steps.push(actionStep); 54 | 55 | agentMemory.replay(mockLogger); 56 | 57 | expect(consoleSpy).toHaveBeenCalledWith("Replaying the agent's steps:"); 58 | expect(consoleSpy).toHaveBeenCalledWith(expect.stringContaining('Step 1')); 59 | }); 60 | 61 | test('should handle different step types', () => { 62 | const actionStep = new ActionStep({ stepNumber: 1 }); 63 | const planningStep = new PlanningStep({ 64 | modelInputMessages: [], 65 | modelOutputMessageFacts: { role: 'assistant', content: '' }, 66 | facts: 'Test facts', 67 | modelOutputMessagePlan: { role: 'assistant', content: '' }, 68 | plan: 'Test plan', 69 | }); 70 | const taskStep = new TaskStep({ task: 'Test task' }); 71 | 72 | agentMemory.steps.push(actionStep); 73 | agentMemory.steps.push(planningStep); 74 | agentMemory.steps.push(taskStep); 75 | 76 | expect(agentMemory.steps).toHaveLength(3); 77 | expect(agentMemory.steps[0]).toBeInstanceOf(ActionStep); 78 | expect(agentMemory.steps[1]).toBeInstanceOf(PlanningStep); 79 | expect(agentMemory.steps[2]).toBeInstanceOf(TaskStep); 80 | }); 81 | 82 | test('replay() with detailed flag should show more information', () => { 83 | const mockLogger = new AgentLogger(); 84 | const logMarkdownSpy = jest.spyOn(mockLogger, 'logMarkdown'); 85 | 86 | const actionStep = new ActionStep({ 87 | stepNumber: 1, 88 | modelInputMessages: [{ role: 'user', content: 'Test input' }], 89 | modelOutput: 'Test output', 90 | }); 91 | agentMemory.steps.push(actionStep); 92 | 93 | agentMemory.replay(mockLogger, true); 94 | 95 | expect(logMarkdownSpy).toHaveBeenCalledWith({ 96 | title: 'Agent output:', 97 | content: 'Test output', 98 | }); 99 | }); 100 | }); 101 | -------------------------------------------------------------------------------- /packages/agent-script-web/src/lib/udf/browser/pageClickUdf.ts: -------------------------------------------------------------------------------- 1 | import { Static, Type } from '@sinclair/typebox'; 2 | import { PageActionUdf } from './pageUdf'; 3 | import { IWebAgent, IWebAgentNavigationHistoryItem } from '../../types'; 4 | import { ElementRole } from './utils'; 5 | import { getBestElementByText } from './utils'; 6 | import { notEmpty } from '@runparse/agent-script'; 7 | 8 | export class PageClickUdf extends PageActionUdf { 9 | name = 'pageClick'; 10 | description = 'Clicks on an element on the page'; 11 | 12 | inputSchema = Type.Object( 13 | { 14 | elementText: Type.String({ 15 | description: 'The text of the element to click', 16 | }), 17 | // elementRole: Type.Optional( 18 | // Type.Enum(ElementRole, { 19 | // description: '(Optional) The role of the element to click', 20 | // }), 21 | // ), 22 | elementIndex: Type.Optional( 23 | Type.Number({ 24 | description: 25 | '(Optional) The index of the element in the matching elements. 0 is the first element, 1 is the second element, etc. Defaults to 0', 26 | }), 27 | ), 28 | }, 29 | { default: { elementText: 'string', elementRole: ElementRole.LINK } }, 30 | ); 31 | 32 | outputSchema = Type.Object( 33 | { 34 | success: Type.Boolean(), 35 | candidateElementLabels: Type.Optional(Type.Array(Type.String())), 36 | elementIndex: Type.Optional(Type.Number()), 37 | }, 38 | { default: { success: true, candidateElementLabels: [] } }, 39 | ); 40 | 41 | private historyItem: IWebAgentNavigationHistoryItem | undefined; 42 | 43 | override async pageActionCall( 44 | input: Static, 45 | agent: IWebAgent, 46 | ): Promise> { 47 | const elementIndex = input.elementIndex || 0; 48 | const { match, candidates } = await getBestElementByText({ 49 | page: agent.page, 50 | text: input.elementText, 51 | // role: input.elementRole, 52 | exact: true, 53 | }); 54 | 55 | if (match) { 56 | await match.click(); 57 | if (candidates) { 58 | return { 59 | success: true, 60 | candidateElementLabels: ( 61 | await Promise.all( 62 | candidates.map( 63 | async (candidate) => await candidate.textContent(), 64 | ), 65 | ) 66 | ).filter(notEmpty), 67 | elementIndex, 68 | }; 69 | } 70 | return { 71 | success: true, 72 | }; 73 | } 74 | 75 | const { match: matchNonExact, candidates: candidatesNonExact } = 76 | await getBestElementByText({ 77 | page: agent.page, 78 | text: input.elementText, 79 | // role: input.elementRole, 80 | exact: false, 81 | }); 82 | 83 | if (matchNonExact) { 84 | await matchNonExact.click(); 85 | if (candidatesNonExact) { 86 | return { 87 | success: true, 88 | candidateElementLabels: ( 89 | await Promise.all( 90 | candidatesNonExact.map( 91 | async (candidate) => await candidate.textContent(), 92 | ), 93 | ) 94 | ).filter(notEmpty), 95 | elementIndex, 96 | }; 97 | } 98 | return { 99 | success: true, 100 | }; 101 | } 102 | 103 | return { success: false }; 104 | } 105 | 106 | override async onBeforeCall( 107 | input: Static, 108 | agent: IWebAgent, 109 | ) { 110 | await super.onBeforeCall(input, agent); 111 | this.historyItem = undefined; 112 | } 113 | 114 | override async onAfterCall( 115 | input: Static, 116 | output: Static, 117 | agent: IWebAgent, 118 | ) { 119 | await super.onAfterCall(input, output, agent); 120 | if (this.historyItem) { 121 | this.historyItem.status = 'success'; 122 | } 123 | } 124 | } 125 | -------------------------------------------------------------------------------- /packages/agent-script-web/src/lib/udf/browser/__tests__/pageUdf.test.ts: -------------------------------------------------------------------------------- 1 | import { ActionStep } from '@runparse/agent-script'; 2 | import { PageActionUdf } from '../pageUdf'; 3 | import { IWebAgent } from '../../../types'; 4 | import { Static, Type } from '@sinclair/typebox'; 5 | import { PageActionTimeoutError } from '../errors'; 6 | jest.useFakeTimers(); 7 | 8 | jest.mock('../utils', () => ({ 9 | getBase64Screenshot: jest.fn().mockResolvedValue({ 10 | data: 'base64-encoded-screenshot-data', 11 | mimeType: 'image/png', 12 | }), 13 | })); 14 | 15 | class TestPageActionUdf extends PageActionUdf { 16 | name = 'TestPageActionUdf'; 17 | description = 'Test page action'; 18 | 19 | inputSchema = Type.Object({ 20 | testInput: Type.String(), 21 | }); 22 | 23 | outputSchema = Type.Object({ 24 | testOutput: Type.String(), 25 | }); 26 | 27 | protected async pageActionCall( 28 | input: Static, 29 | agent: IWebAgent, 30 | ): Promise> { 31 | if (input.testInput === 'timeout') { 32 | // Make sure this is longer than the timeout period 33 | await new Promise((resolve) => setTimeout(resolve, this.timeoutMs + 200)); 34 | } 35 | return { testOutput: 'success' }; 36 | } 37 | } 38 | 39 | describe('PageActionUdf', () => { 40 | let mockAgent: IWebAgent; 41 | let udf: TestPageActionUdf; 42 | 43 | beforeEach(() => { 44 | mockAgent = { 45 | page: { 46 | screenshot: jest.fn().mockResolvedValue(Buffer.from('fake-screenshot')), 47 | url: jest.fn().mockReturnValue('https://test.com'), 48 | }, 49 | memory: { 50 | steps: [new ActionStep({ stepNumber: 1 })], 51 | }, 52 | } as unknown as IWebAgent; 53 | 54 | udf = new TestPageActionUdf(200); 55 | }); 56 | 57 | it('should successfully complete when action finishes before timeout', async () => { 58 | const result = await udf.call({ testInput: 'quick' }, mockAgent); 59 | 60 | expect(result).toEqual({ testOutput: 'success' }); 61 | }); 62 | 63 | it('should throw PageActionTimeoutError when action exceeds timeout', async () => { 64 | const promise = udf.call({ testInput: 'timeout' }, mockAgent); 65 | 66 | // Advance timers incrementally to handle Promise.race properly 67 | jest.advanceTimersByTime(220); 68 | 69 | try { 70 | await promise; 71 | expect(true).toBe(false); 72 | } catch (error) { 73 | expect(error).toBeInstanceOf(PageActionTimeoutError); 74 | } 75 | }); 76 | 77 | it('should save screenshot to memory after successful action', async () => { 78 | const result = await udf.call({ testInput: 'quick' }, mockAgent); 79 | expect(result).toEqual({ testOutput: 'success' }); 80 | 81 | const currentStep = mockAgent.memory.steps[0] as ActionStep; 82 | expect(currentStep.observations).toHaveLength(1); 83 | expect(currentStep.observations[0]).toMatchObject({ 84 | type: 'image', 85 | context: 86 | 'screenshot after page action TestPageActionUdf on https://test.com', 87 | image: 'base64-encoded-screenshot-data', 88 | }); 89 | }); 90 | 91 | it('should save screenshot to memory after timeout', async () => { 92 | const promise = udf.call({ testInput: 'timeout' }, mockAgent); 93 | 94 | // Advance timers incrementally to handle Promise.race properly 95 | jest.advanceTimersByTime(220); 96 | 97 | try { 98 | await promise; 99 | expect(true).toBe(false); 100 | } catch (error) { 101 | expect(error).toBeInstanceOf(PageActionTimeoutError); 102 | } 103 | 104 | const currentStep = mockAgent.memory.steps[0] as ActionStep; 105 | expect(currentStep.observations).toHaveLength(1); 106 | expect(currentStep.observations[0]).toMatchObject({ 107 | type: 'image', 108 | context: 109 | 'screenshot after page action TestPageActionUdf on https://test.com\nPage action timed out after 200 ms. It is possible that the action succeeded but timed out on page load', 110 | image: 'base64-encoded-screenshot-data', 111 | }); 112 | }); 113 | }); 114 | -------------------------------------------------------------------------------- /packages/agent-script-web/src/scripts/cli.ts: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env node 2 | import { setup } from '@runparse/agent-script-instrumentation'; 3 | import { Option, program } from 'commander'; 4 | import playwright from 'playwright'; 5 | import { WebDataAgent, DeepResearchAgent } from '../lib/agents/webAgents/index'; 6 | import { createTSchemaFromInstance } from '../lib/utils/schema'; 7 | import { CodeAgent, FinalAnswerUdf } from '@runparse/agent-script'; 8 | setup(); 9 | 10 | program 11 | .command('deep-research-agent') 12 | .description('Run the deep research agent') 13 | .addOption( 14 | new Option('--task ', 'The task to run').makeOptionMandatory(), 15 | ) 16 | .action(async (options) => { 17 | console.log(JSON.stringify(options, undefined, 2)); 18 | 19 | const browser = await playwright.chromium.launch({ headless: false }); 20 | const page = await browser.newPage(); 21 | 22 | try { 23 | const agent = new DeepResearchAgent({ 24 | name: 'Deep Research Agent', 25 | description: '', 26 | maxSteps: 10, 27 | page: page, 28 | // model: new ChatModel({ 29 | // provider: 'anthropic', 30 | // model: 'claude-3-5-sonnet-latest', 31 | // max_tokens: 4096, 32 | // }), 33 | }); 34 | 35 | const result = await agent.run(options.task); 36 | 37 | console.log(JSON.stringify(result, undefined, 2)); 38 | } catch (error) { 39 | console.error(error); 40 | } 41 | await browser.close(); 42 | }); 43 | 44 | program 45 | .command('web-data-agent') 46 | .description('Run the web agent') 47 | .addOption( 48 | new Option('--task ', 'The task to run').makeOptionMandatory(), 49 | ) 50 | .addOption( 51 | new Option( 52 | '--schema ', 53 | 'JSON schema for the output', 54 | ).makeOptionMandatory(), 55 | ) 56 | .action(async (options) => { 57 | console.log(JSON.stringify(options, undefined, 2)); 58 | 59 | const browser = await playwright.chromium.launch({ headless: false }); 60 | const page = await browser.newPage(); 61 | const schema = createTSchemaFromInstance(JSON.parse(options.schema)); 62 | 63 | try { 64 | const agent = new WebDataAgent({ 65 | name: 'Web Agent', 66 | description: '', 67 | maxSteps: 10, 68 | page: page, 69 | dataObjectSchema: schema, 70 | shouldRunPlanning: true, 71 | // model: new ChatModel({ 72 | // provider: 'anthropic', 73 | // model: 'claude-3-5-sonnet-latest', 74 | // max_tokens: 4096, 75 | // }), 76 | }); 77 | 78 | const result = await agent.run(options.task); 79 | 80 | console.log(JSON.stringify(result, undefined, 2)); 81 | } catch (error) { 82 | console.error(error); 83 | } 84 | await browser.close(); 85 | }); 86 | 87 | program 88 | .command('code-agent-manager') 89 | .description('Run the code agent manager') 90 | .addOption( 91 | new Option('--task ', 'The task to run').makeOptionMandatory(), 92 | ) 93 | .addOption( 94 | new Option( 95 | '--schema ', 96 | 'JSON schema for the output', 97 | ).makeOptionMandatory(), 98 | ) 99 | .action(async (options) => { 100 | console.log(JSON.stringify(options, undefined, 2)); 101 | 102 | const browser = await playwright.chromium.launch({ headless: false }); 103 | const page = await browser.newPage(); 104 | const schema = createTSchemaFromInstance(JSON.parse(options.schema)); 105 | 106 | try { 107 | const webDataAgent = new WebDataAgent({ 108 | name: 'Web Agent', 109 | description: '', 110 | maxSteps: 10, 111 | page: page, 112 | dataObjectSchema: schema, 113 | shouldRunPlanning: false, 114 | }); 115 | 116 | const agent = new CodeAgent({ 117 | name: 'Test Code Agent', 118 | description: '', 119 | udfs: [new FinalAnswerUdf()], 120 | maxSteps: 10, 121 | shouldRunPlanning: true, 122 | managedAgents: [webDataAgent], 123 | }); 124 | 125 | const result = await agent.run(options.task); 126 | 127 | console.log(JSON.stringify(result, undefined, 2)); 128 | } catch (error) { 129 | console.error(error); 130 | } 131 | await browser.close(); 132 | }); 133 | 134 | program.parse(process.argv); 135 | -------------------------------------------------------------------------------- /packages/agent-script-web/src/lib/agents/webAgents/deepResearchAgent.prompt.ts: -------------------------------------------------------------------------------- 1 | import { 2 | IAgentPrompt, 3 | codeAgentRolePromptPart, 4 | buildExamplesSectionPrompt, 5 | ICodeAgentRunExample, 6 | removeLeadingIndentation, 7 | codeAgentPrompt, 8 | codeAgentRules, 9 | buildCodeAgentRulesPrompt, 10 | } from '@runparse/agent-script'; 11 | 12 | export const deepResearchAgentExamples: ICodeAgentRunExample[] = [ 13 | { 14 | task: 'Find the best selling top 2 books in 2024, give me the title, author', 15 | steps: [ 16 | { 17 | thought: 18 | 'I will use the UDF `webSearch` to get the best selling books in 2024.', 19 | code: 'bookSearchResults = await webSearch({query: "best selling books in 2024"})', 20 | result: removeLeadingIndentation(` 21 | bookSearchResults: [ 22 | { 23 | "title": "The Great Gatsby", 24 | "link": "https://www.amazon.com/Great-Gatsby-F-Scott-Fitzgerald/dp/1451673316", 25 | }, 26 | ... 27 | ] 28 | `), 29 | }, 30 | { 31 | thought: 32 | 'I have the result from the websearch stored in the variable `bookSearchResults`. Now I need to visit each of the webpages from the results and extract the title, author', 33 | code: 'webpageDataLink1 = await getWebpageData(bookSearchResults[0].link)', 34 | result: removeLeadingIndentation( 35 | `webpageDataLink1: [ 36 | { 37 | "title": "The Great Gatsby", 38 | "link": "https://www.amazon.com/Great-Gatsby-F-Scott-Fitzgerald/dp/1451673316", 39 | ...truncated... 40 | "title": "Alice's Adventures in Wonderland", 41 | "link": "https://www.amazon.com/alice-wonderland-lewis-carroll/dp/1411673311", 42 | } 43 | ]`, 44 | ), 45 | }, 46 | { 47 | thought: 48 | 'I have visited the first webpage from the results. Now I need to visit the second one.', 49 | code: 'webpageDataLink2 = await getWebpageData(bookSearchResults[1].link)', 50 | result: removeLeadingIndentation(` 51 | webpageDataLink2: { 52 | "title": "The Great Gatsby", 53 | "author": "F. Scott Fitzgerald", 54 | } 55 | `), 56 | }, 57 | { 58 | thought: 59 | 'I have visited the second webpage from the results and got the data. The task is done, I can give the final answer.', 60 | code: 'await finalAnswer({reason: "I have found the best selling books in 2024. Here are the titles and authors: ${JSON.stringify(bookSearchResults)}"})', 61 | result: 'No output from UDF calls', 62 | }, 63 | ], 64 | }, 65 | ] as const; 66 | 67 | export const deepResearchAgentRules = [ 68 | ...codeAgentRules, 69 | 'CRITICAL: `await terminate` UDF must be the only UDF call in your last step.', 70 | ] as const; 71 | 72 | export const deepResearchAgentPrompt: IAgentPrompt = { 73 | ...codeAgentPrompt, 74 | systemPrompt: `${codeAgentRolePromptPart} 75 | 76 | In the end you have to call the \`await finalAnswer\` UDF with the reason as the argument. You must only call the \`await finalAnswer\` UDF after either successfully completing the task or after you have determined that you have exhausted all possible options. 77 | 78 | Use the \`await think\` UDF to think about the task if you are stuck or not making progress according to the plan. 79 | 80 | ${buildExamplesSectionPrompt(deepResearchAgentExamples)} 81 | 82 | Above examples were using notional UDFs that might not exist for you. On top of performing computations in the Javascript code snippets that you create, you only have access to these UDFs (in additional to any built-in functions): 83 | \`\`\`js 84 | {%- for udf in udfs.values() %} 85 | {{ udf.getSignature() | safe }}{{ '\\n' }} 86 | {%- endfor %} 87 | \`\`\` 88 | 89 | {%- if managedAgents and managedAgents | length %} 90 | You can also give tasks to team members. 91 | Calling a team member works the same as for calling a UDF: simply, the only argument you can give in the call is 'task', a long string explaining your task. 92 | Given that this team member is a real human, you should be very verbose in your task. 93 | Here is a list of the team members that you can call: 94 | {%- for agent in managedAgents.values() %} 95 | - {{ agent.name }}: {{ agent.description }} 96 | {%- endfor %} 97 | {%- else %} 98 | {%- endif %} 99 | 100 | ${buildCodeAgentRulesPrompt(Array.from(deepResearchAgentRules))} 101 | 102 | {{ description | safe }} 103 | 104 | Now Begin! If you solve the task correctly, you will receive a reward of $1,000,000.`, 105 | }; 106 | -------------------------------------------------------------------------------- /packages/agent-script-instrumentation/src/lib/utils.ts: -------------------------------------------------------------------------------- 1 | import { SemanticConventions } from '@arizeai/openinference-semantic-conventions'; 2 | import { Attributes } from '@opentelemetry/api'; 3 | import { ChatCompletionMessageParam } from 'openai/resources/chat/completions'; 4 | import { ChatCompletionCreateParamsNonStreaming } from 'openai/resources/chat/completions'; 5 | 6 | export function transformChatRequestMessages( 7 | messages: ChatCompletionMessageParam[], 8 | options: { 9 | omitImageData: boolean; 10 | } = { 11 | omitImageData: true, 12 | }, 13 | ): any[] { 14 | return messages.map((message) => { 15 | if (!options.omitImageData) { 16 | return message; 17 | } 18 | if (Array.isArray(message.content)) { 19 | return { 20 | ...message, 21 | content: message.content.map((part) => { 22 | if (part.type === 'image_url') { 23 | return { 24 | ...part, 25 | image_url: { 26 | url: `data:image/jpeg;base64,...(omitted)`, 27 | }, 28 | }; 29 | } 30 | return part; 31 | }), 32 | }; 33 | } 34 | return message; 35 | }); 36 | } 37 | 38 | export function getLLMInputMessagesAttributes( 39 | body: ChatCompletionCreateParamsNonStreaming, 40 | ): Attributes { 41 | return body.messages.reduce( 42 | (acc: Attributes, message: ChatCompletionMessageParam, index: number) => { 43 | const messageAttributes = 44 | getChatCompletionInputMessageAttributes(message); 45 | const indexPrefix = `${SemanticConventions.LLM_INPUT_MESSAGES}.${index}.`; 46 | // Flatten the attributes on the index prefix 47 | for (const [key, value] of Object.entries(messageAttributes)) { 48 | acc[`${indexPrefix}${key}`] = value; 49 | } 50 | return acc; 51 | }, 52 | {} as Attributes, 53 | ); 54 | } 55 | 56 | function getChatCompletionInputMessageAttributes( 57 | message: ChatCompletionMessageParam, 58 | ): Attributes { 59 | const role = message.role; 60 | const attributes: Attributes = { 61 | [SemanticConventions.MESSAGE_ROLE]: role, 62 | }; 63 | // Add the content only if it is a string 64 | if (typeof message.content === 'string') { 65 | attributes[SemanticConventions.MESSAGE_CONTENT] = message.content; 66 | } else if (Array.isArray(message.content)) { 67 | message.content.forEach((part, index) => { 68 | const contentsIndexPrefix = `${SemanticConventions.MESSAGE_CONTENTS}.${index}.`; 69 | if (part.type === 'text') { 70 | attributes[ 71 | `${contentsIndexPrefix}${SemanticConventions.MESSAGE_CONTENT_TYPE}` 72 | ] = 'text'; 73 | attributes[ 74 | `${contentsIndexPrefix}${SemanticConventions.MESSAGE_CONTENT_TEXT}` 75 | ] = part.text; 76 | } else if (part.type === 'image_url') { 77 | attributes[ 78 | `${contentsIndexPrefix}${SemanticConventions.MESSAGE_CONTENT_TYPE}` 79 | ] = 'image'; 80 | attributes[ 81 | `${contentsIndexPrefix}${SemanticConventions.MESSAGE_CONTENT_IMAGE}.${SemanticConventions.IMAGE_URL}` 82 | ] = part.image_url.url; 83 | } 84 | }); 85 | } 86 | switch (role) { 87 | case 'user': 88 | // There's nothing to add for the user 89 | break; 90 | case 'assistant': 91 | if (message.tool_calls) { 92 | message.tool_calls.forEach((toolCall, index) => { 93 | const toolCallIndexPrefix = `${SemanticConventions.MESSAGE_TOOL_CALLS}.${index}.`; 94 | 95 | // Add the tool call id if it exists 96 | if (toolCall.id) { 97 | attributes[ 98 | `${toolCallIndexPrefix}${SemanticConventions.TOOL_CALL_ID}` 99 | ] = toolCall.id; 100 | } 101 | // Make sure the tool call has a function 102 | if (toolCall.function) { 103 | attributes[ 104 | `${toolCallIndexPrefix}${SemanticConventions.TOOL_CALL_FUNCTION_NAME}` 105 | ] = toolCall.function.name; 106 | attributes[ 107 | `${toolCallIndexPrefix}${SemanticConventions.TOOL_CALL_FUNCTION_ARGUMENTS_JSON}` 108 | ] = toolCall.function.arguments; 109 | } 110 | }); 111 | } 112 | break; 113 | case 'function': 114 | attributes[SemanticConventions.MESSAGE_FUNCTION_CALL_NAME] = message.name; 115 | break; 116 | case 'tool': 117 | if (message.tool_call_id) { 118 | attributes[`${SemanticConventions.MESSAGE_TOOL_CALL_ID}`] = 119 | message.tool_call_id; 120 | } 121 | break; 122 | case 'system': 123 | // There's nothing to add for the system. Content is captured above 124 | break; 125 | default: 126 | break; 127 | } 128 | return attributes; 129 | } 130 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # AgentScript 2 | 3 | AgentScript is a simple, observable **code-writing** agent builder in TypeScript. Our agents write and execute javascript to accomplish complex tasks. Inspired by Hugging Face’s [smolagents](https://github.com/huggingface/smolagents) 🤗, we’re bringing agentic capabilities to TypeScript, making it easier to build production-ready AI agents. 4 | 5 | ## Demo 6 | 7 | Run inside `examples/agents` (requires `.env` - see [full instructions](./examples/agents/README.md#webdataagent)): 8 | 9 | ```sh 10 | cd examples/agents && npm install 11 | npx tsx --env-file=.env src/webDataAgent/hackernews.ts 12 | ``` 13 | 14 | **Task: give me the top 40 posts on hacker news.** 15 | 16 | https://github.com/user-attachments/assets/de161faa-84a5-4e23-951e-1a7e221ba371 17 | 18 | ## What You Get with AgentScript 19 | 20 | 🔁 A simple, customizable agent loop that enables scalable agentic workflow execution.\ 21 | 📊 No-code OpenTelemetry instrumentation. Full task / step tracing & token usage statistics. (See demo video)\ 22 | 🌐 Web browser actions (visual) with sample web automation agent. 23 | 24 | 🚀 Ready to dive in and build something awesome? 25 | 26 | AgentScript is currently in **alpha**, help us by reporting issues and suggesting features! 27 | 28 | ## How It Works 29 | 30 | AgentScript provides an agent loop scaffold that breaks down a task into multiple steps. In each step, the agent uses its memory on previous steps, and then does the following: 31 | 32 | 1. Generates descriptive / reasoning comments, followed by a javascript / typescript script block. 33 | 2. Executes the generated script in a Node vm, using built-ins and a list of User-Defined Functions (UDFs). 34 | 3. Adds all UDF call output (or any errors in script execution) into the agent memory context as observations. 35 | 36 | The Agent will keep taking steps towards the goal of the task and terminate when any of the conditions are met: 37 | 38 | 1. A UDF provides the final answer for the task. 39 | 2. The agent reaches the maximum steps allowed. 40 | 3. The agent is stuck in an error loop. 41 | 42 | ## Quick Start 43 | 44 | Install [Arize-ai/phoenix](https://github.com/Arize-ai/phoenix) for detailed tracing. For fastest setup, use docker. 45 | 46 | ### As NPM Packages 47 | 48 | Use your preferred package manager (example below uses npm): 49 | 50 | ```sh 51 | npm install \ 52 | @runparse/agent-script \ 53 | @runparse/agent-script-instrumentation \ 54 | @runparse/agent-script-web 55 | ``` 56 | 57 | ### Local Development 58 | 59 | Pre-requisites: 60 | 61 | - `pnpm` (`npm install -g pnpm`) 62 | 63 | Steps: 64 | 65 | 1. Install dependencies with `pnpm install` in repo root. 66 | 2. Inspect and experiment with the ready-to-run samples in the `examples` folder. 67 | 68 | Generate npm package build artifacts with `pnpm nx run-many --target=build --all`. See `project.json` in each package for details. 69 | 70 | Run tests with `pnpm nx run-many --target=test --all` 71 | 72 | #### Using pnpm workspaces 73 | 74 | If you are using `pnpm` in your main repo, a quick way to reference a fork of this repo is to add the packages as workspace references (e.g. `"@runparse/agent-script": "workspace:*",`) in your main repo's `package.json` dependencies section, and then add the path to `packages` folder in this repo to your main repo's `pnpm-workspace.yaml`, e.g. 75 | 76 | ``` 77 | packages: 78 | - 79 | - // to add 80 | ``` 81 | 82 | ## Why Code Agents? 83 | 84 | Take it from huggingface: [Writing actions as code snippets is demonstrated to work better than the current industry practice of letting the LLM output a dictionary of the tools it wants to call: uses 30% fewer steps (thus 30% fewer LLM calls) and reaches higher performance on difficult benchmarks.](https://github.com/huggingface/smolagents?tab=readme-ov-file#how-do-code-agents-work) 85 | 86 | At a fundamental level, LLMs are remarkable at writing code. And this makes sense, because code is a highly structured way of turning fuzzy ideas into precise actions using natural language. 87 | 88 | In addition, there have been decades of work creating compilers, interpreters, and sandboxes for programming languages that provide highly optimized access to the core components of a computer (working memory, variables, long term storage, object oriented design, object passing, and so much more). These same components are likely to be the building blocks of AGI. 89 | 90 | That’s why we believe that code-writing agents are the best agents in terms of quality and performance. But to move beyond smolagents and into production, we needed a simple yet powerful TypeScript agent builder, which is where AgentScript comes in. 91 | 92 | ## Contributing 93 | 94 | Contributions are welcome. Please fork and submit a pull request to main. 95 | 96 | ## Who are We 97 | 98 | We're a small team of builders based out of Toronto, San Francisco, and Mumbai -- find more about us at [here](https://heyarctic.io/). If you're interested in our work, reach out at hello@heyarctic.io. 99 | -------------------------------------------------------------------------------- /packages/agent-script/src/lib/__tests__/codeAgent.logging.test.ts: -------------------------------------------------------------------------------- 1 | import { CodeAgent } from '../codeAgent'; 2 | import { AgentLogger } from '../agentLogger'; 3 | import { LogLevel } from '../types'; 4 | import { FinalAnswerUdf } from '../udf'; 5 | 6 | describe('CodeAgent logging functionality', () => { 7 | let mockLogger: AgentLogger; 8 | let agent: CodeAgent; 9 | 10 | beforeEach(() => { 11 | // Create a spy logger 12 | mockLogger = new AgentLogger(); 13 | jest.spyOn(mockLogger, 'logTask'); 14 | jest.spyOn(mockLogger, 'logRule'); 15 | jest.spyOn(mockLogger, 'log'); 16 | jest.spyOn(mockLogger, 'logMarkdown'); 17 | 18 | // Create a simple agent with mock dependencies 19 | agent = new CodeAgent({ 20 | name: 'TestAgent', 21 | description: 'Test agent for logging', 22 | udfs: [new FinalAnswerUdf()], 23 | maxSteps: 3, 24 | logger: mockLogger, 25 | }); 26 | 27 | // Mock model to avoid real API calls 28 | agent.model.chatCompletion = jest.fn().mockResolvedValue({ 29 | message: { 30 | role: 'assistant', 31 | content: '```js\nconsole.log("test");\n```', 32 | }, 33 | }); 34 | 35 | // Mock sandbox to avoid real execution 36 | agent.sandbox.executeScript = jest.fn().mockResolvedValue({ 37 | returnValue: null, 38 | calls: [], 39 | output: 'Mock execution output', 40 | }); 41 | }); 42 | 43 | test('should log task start correctly', async () => { 44 | const task = 'Test task for logging'; 45 | 46 | // Run agent with the test task 47 | await agent.run(task); 48 | 49 | // Verify the task was logged correctly 50 | expect(mockLogger.logTask).toHaveBeenCalledWith(task); 51 | }); 52 | 53 | test('should log step execution details', async () => { 54 | const task = 'Step logging test task'; 55 | 56 | // Run the agent 57 | await agent.run(task); 58 | 59 | // Verify step log was created 60 | expect(mockLogger.logRule).toHaveBeenCalledWith(`Step 1`, LogLevel.INFO); 61 | 62 | // Verify model output was logged 63 | expect(mockLogger.logMarkdown).toHaveBeenCalledWith({ 64 | content: expect.stringContaining('```js\nconsole.log("test");\n```'), 65 | title: '--- Output message of the LLM ---', 66 | }); 67 | }); 68 | 69 | test('should log planning steps', async () => { 70 | const task = 'Planning test task'; 71 | 72 | // Configure agent to use planning 73 | agent = new CodeAgent({ 74 | name: 'PlanningAgent', 75 | description: 'Test agent for planning logging', 76 | udfs: [new FinalAnswerUdf()], 77 | maxSteps: 3, 78 | logger: mockLogger, 79 | shouldRunPlanning: true, 80 | }); 81 | 82 | // Mock planning-related model calls 83 | agent.model.chatCompletion = jest 84 | .fn() 85 | .mockResolvedValueOnce({ 86 | message: { role: 'assistant', content: 'Facts about task' }, 87 | }) 88 | .mockResolvedValueOnce({ 89 | message: { 90 | role: 'assistant', 91 | content: 'Step 1: Do something\nStep 2: Finish task', 92 | }, 93 | }) 94 | .mockResolvedValueOnce({ 95 | message: { 96 | role: 'assistant', 97 | content: '```js\nconsole.log("executing plan");\n```', 98 | }, 99 | }); 100 | 101 | // Run the agent 102 | await agent.run(task); 103 | 104 | // Verify planning log 105 | expect(mockLogger.logRule).toHaveBeenCalledWith( 106 | 'Initial plan', 107 | LogLevel.INFO, 108 | ); 109 | expect(mockLogger.log).toHaveBeenCalledWith( 110 | expect.stringContaining('Step 1: Do something'), 111 | ); 112 | }); 113 | 114 | test('should log script execution results', async () => { 115 | const task = 'Script execution logging test'; 116 | const executionOutput = 'Script executed successfully with test output'; 117 | 118 | // Mock the sandbox execution to return specific output 119 | agent.sandbox.executeScript = jest.fn().mockResolvedValue({ 120 | returnValue: null, 121 | calls: [], 122 | output: executionOutput, 123 | }); 124 | 125 | // Run the agent 126 | await agent.run(task); 127 | 128 | // Verify script execution results were logged 129 | expect(mockLogger.logMarkdown).toHaveBeenCalledWith({ 130 | content: executionOutput, 131 | title: '-- Script execution results --', 132 | }); 133 | }); 134 | 135 | test('should log when script execution has no output', async () => { 136 | const task = 'Script with no output test'; 137 | 138 | // Mock the sandbox execution to return empty output 139 | agent.sandbox.executeScript = jest.fn().mockResolvedValue({ 140 | returnValue: null, 141 | calls: [], 142 | output: '', 143 | }); 144 | 145 | // Run the agent 146 | await agent.run(task); 147 | 148 | // Verify empty output message was logged 149 | expect(mockLogger.logMarkdown).toHaveBeenCalledWith({ 150 | content: 'No output from script execution', 151 | title: '-- Script execution results --', 152 | }); 153 | }); 154 | }); 155 | -------------------------------------------------------------------------------- /packages/agent-script/src/lib/udf/bingSearchUdf.ts: -------------------------------------------------------------------------------- 1 | import { BaseUdf } from './baseUdf'; 2 | import { ICodeAgent } from '../types'; 3 | import { Type, Static } from '@sinclair/typebox'; 4 | import axios from 'axios'; 5 | 6 | export class BingSearchUdf extends BaseUdf { 7 | name = 'bingSearch'; 8 | 9 | description = 'Search the web for information using Bing'; 10 | 11 | inputSchema = Type.Object( 12 | { 13 | query: Type.String({ description: 'The search query' }), 14 | options: Type.Optional( 15 | Type.Object({ 16 | site: Type.Optional( 17 | Type.String({ description: 'The site to search' }), 18 | ), 19 | count: Type.Optional( 20 | Type.Number({ description: 'Number of results to return' }), 21 | ), 22 | offset: Type.Optional( 23 | Type.Number({ 24 | description: 'Result pagination offset', 25 | }), 26 | ), 27 | market: Type.Optional(Type.String({ description: 'The market' })), 28 | // exclude: Type.Optional( 29 | // Type.Union([Type.String(), Type.Array(Type.String())]), 30 | // ), 31 | // filetype: Type.Optional(Type.String()), 32 | // intitle: Type.Optional(Type.String()), 33 | // inurl: Type.Optional(Type.String()), 34 | }), 35 | ), 36 | }, 37 | { 38 | default: { 39 | query: 'string', 40 | options: { 41 | site: 'string', 42 | count: 20, 43 | offset: 0, 44 | market: 'en-US', 45 | // exclude: [], 46 | // filetype: '', 47 | // intitle: '', 48 | // inurl: '', 49 | }, 50 | }, 51 | }, 52 | ); 53 | 54 | outputSchema = Type.Array( 55 | Type.Object({ 56 | title: Type.String(), 57 | link: Type.String(), 58 | snippet: Type.String(), 59 | }), 60 | { default: [{ title: 'string', link: 'string', snippet: 'string' }] }, 61 | ); 62 | 63 | private maxResults = 10; 64 | 65 | private endpoint = 'https://api.bing.microsoft.com/v7.0/search'; 66 | 67 | private apiKey = process.env.BING_API_KEY; 68 | 69 | constructor(public urlBlacklist: string[] = []) { 70 | super(); 71 | if (!this.apiKey) { 72 | throw new Error('BING_API_KEY is not set'); 73 | } 74 | } 75 | 76 | /** 77 | * Builds the final query string by combining the base query with various optional search modifiers. 78 | * @param baseQuery The main search query. 79 | * @param options Optional parameters to refine the search. 80 | * @returns A constructed query string. 81 | */ 82 | private buildQuery( 83 | baseQuery: string, 84 | options?: Static['options'], 85 | ): string { 86 | let queryParts: string[] = [baseQuery]; 87 | 88 | if (options) { 89 | if (options.site) { 90 | queryParts.push(`site:${options.site}`); 91 | } 92 | // if (options.filetype) { 93 | // queryParts.push(`filetype:${options.filetype}`); 94 | // } 95 | // if (options.intitle) { 96 | // queryParts.push(`intitle:${options.intitle}`); 97 | // } 98 | // if (options.inurl) { 99 | // queryParts.push(`inurl:${options.inurl}`); 100 | // } 101 | // if (options.exclude) { 102 | // // Handle multiple exclusion keywords. 103 | // if (Array.isArray(options.exclude)) { 104 | // options.exclude.forEach((keyword) => { 105 | // queryParts.push(`-${keyword}`); 106 | // }); 107 | // } else { 108 | // queryParts.push(`-${options.exclude}`); 109 | // } 110 | // } 111 | } 112 | 113 | return queryParts.join(' '); 114 | } 115 | 116 | override async call( 117 | input: Static, 118 | agent: ICodeAgent, 119 | ): Promise> { 120 | const query = this.buildQuery(input.query, input.options); 121 | const url = new URL(this.endpoint); 122 | url.searchParams.append('q', query); 123 | url.searchParams.append( 124 | 'count', 125 | input?.options?.count?.toString() || this.maxResults.toString(), 126 | ); 127 | 128 | if (input.options) { 129 | if (input.options.market) { 130 | url.searchParams.append('mkt', input.options.market); 131 | } 132 | if (input.options.offset !== undefined) { 133 | url.searchParams.append('offset', input.options.offset.toString()); 134 | } 135 | } 136 | 137 | const response = await axios.get(url.toString(), { 138 | headers: { 139 | 'Ocp-Apim-Subscription-Key': this.apiKey, 140 | }, 141 | }); 142 | 143 | if (response.status >= 300) { 144 | throw new Error( 145 | `Bing search API request failed with status ${response.status}`, 146 | ); 147 | } 148 | 149 | return response.data.webPages.value 150 | .filter((result: any) => !this.urlBlacklist.includes(result.url)) 151 | .map((result: any) => ({ 152 | title: result.name, 153 | link: result.url, 154 | snippet: result.snippet, 155 | })); 156 | } 157 | } 158 | -------------------------------------------------------------------------------- /packages/agent-script-web/src/lib/udf/browser/pageReadUdf.ts: -------------------------------------------------------------------------------- 1 | import { Type, TSchema, Static } from '@sinclair/typebox'; 2 | import { PageUdf } from './pageUdf'; 3 | import { IChatModel, ChatModel } from '@runparse/agent-script'; 4 | import TurndownService from 'turndown'; 5 | import { IWebAgent } from '../../types'; 6 | import { Parser } from 'htmlparser2'; 7 | import { getBase64Screenshot } from './utils'; 8 | import { ChatCompletionCreateParamsNonStreaming } from 'openai/resources/chat/completions'; 9 | 10 | export class PageReadUdf extends PageUdf { 11 | name = 'pageRead'; 12 | description = 13 | 'Read information from the current webpage you are on, following user instructions'; 14 | 15 | inputSchema = Type.Object( 16 | { 17 | instructions: Type.String({ 18 | description: 19 | 'Describe the type of information you want to read from the webpage', 20 | }), 21 | }, 22 | { default: { instructions: 'string' } }, 23 | ); 24 | 25 | outputSchema: TSchema = Type.String(); 26 | private model: IChatModel; 27 | private visualMode: boolean = false; 28 | 29 | constructor({ 30 | model, 31 | visualMode = false, 32 | }: { 33 | model?: IChatModel; 34 | visualMode?: boolean; 35 | }) { 36 | super(); 37 | this.model = 38 | model || 39 | new ChatModel({ 40 | provider: 'openai', 41 | model: 'gpt-4o', 42 | }); 43 | 44 | this.visualMode = visualMode; 45 | } 46 | 47 | override async call( 48 | input: Static, 49 | agent: IWebAgent, 50 | ): Promise> { 51 | const content = await agent.page.content(); 52 | 53 | const bodyMarkdown = getBodyMarkdown(content); 54 | 55 | const response = await this.model.chatCompletion( 56 | getDataExtractionPrompt( 57 | bodyMarkdown, 58 | this.visualMode 59 | ? ( 60 | await getBase64Screenshot(agent.page) 61 | ).data 62 | : undefined, 63 | input.instructions, 64 | ), 65 | ); 66 | 67 | return response.message.content; 68 | } 69 | 70 | override async onAfterCall( 71 | input: Static, 72 | output: Static, 73 | agent: IWebAgent, 74 | ) { 75 | await super.onAfterCall(input, output, agent); 76 | const historyItem = agent.navigationHistory 77 | .reverse() 78 | .find((item) => item.url === agent.page.url()); 79 | if (historyItem) { 80 | historyItem.dataExtraction = { data: output }; 81 | } 82 | } 83 | } 84 | 85 | function getDataExtractionPrompt( 86 | document: string, 87 | screenshotBase64: string | undefined, 88 | instructions: string, 89 | ): ChatCompletionCreateParamsNonStreaming { 90 | const messages = [ 91 | { 92 | role: 'system', 93 | content: `You are a helpful assistant that can answer questions about a webpage. Use only the information provided in the html document. Return an empty type response if no relevant information is found. Here is the user's instruction: ${instructions}.`, 94 | }, 95 | ...(screenshotBase64 96 | ? [ 97 | { role: 'user', content: 'Here is the screenshot of the webpage:' }, 98 | { 99 | role: 'user', 100 | content: { 101 | type: 'image_url', 102 | image_url: { url: screenshotBase64 }, 103 | }, 104 | }, 105 | ] 106 | : []), 107 | { 108 | role: 'user', 109 | content: 110 | "Below is the webpage html in a markdown format. Use it to answer the user's question.", 111 | }, 112 | { role: 'user', content: document }, 113 | ]; 114 | 115 | return { 116 | // @ts-ignore outdated openai version in token.js 117 | messages, 118 | stream: false, 119 | max_tokens: 4096, 120 | }; 121 | } 122 | 123 | function getBodyMarkdown(html: string): string { 124 | let transformedHtml = ''; 125 | let skipContent = false; 126 | 127 | const parser = new Parser( 128 | { 129 | onopentag(tagName, attrs) { 130 | // Ignore contents of these tags 131 | if (['script', 'style', 'noscript'].includes(tagName)) { 132 | skipContent = true; 133 | } else { 134 | const attrsString = Object.entries(attrs) 135 | .map(([key, value]) => `${key}="${value}"`) 136 | .join(' '); 137 | transformedHtml += `<${tagName}${ 138 | attrsString ? ' ' + attrsString : '' 139 | }>`; 140 | } 141 | }, 142 | ontext(text) { 143 | if (!skipContent) { 144 | // Clean up the text: trim and add a space 145 | transformedHtml += text.trim() + ' '; 146 | } 147 | }, 148 | onclosetag(tagName) { 149 | if (['script', 'style', 'noscript'].includes(tagName)) { 150 | skipContent = false; 151 | } else { 152 | transformedHtml += ``; 153 | } 154 | }, 155 | }, 156 | { decodeEntities: true }, 157 | ); 158 | 159 | // Execute parsing 160 | parser.write(html); 161 | parser.end(); 162 | 163 | return new TurndownService().turndown(transformedHtml); 164 | } 165 | -------------------------------------------------------------------------------- /packages/agent-script/src/lib/__tests__/codeAgent.memory.test.ts: -------------------------------------------------------------------------------- 1 | import { 2 | ActionStep, 3 | PlanningStep, 4 | SystemPromptStep, 5 | TaskStep, 6 | } from '../agentMemory'; 7 | import { CodeAgent } from '../codeAgent'; 8 | import { FinalAnswerUdf } from '../udf'; 9 | 10 | describe('CodeAgent', () => { 11 | describe('Memory Management', () => { 12 | it('should correctly write memory to messages in summary mode', async () => { 13 | // Setup 14 | const agent = new CodeAgent({ 15 | name: 'TestAgent', 16 | description: 'Test agent for memory management', 17 | udfs: [new FinalAnswerUdf()], 18 | maxSteps: 10, 19 | }); 20 | 21 | // Create test memory with various steps 22 | agent.memory.systemPrompt = new SystemPromptStep({ 23 | systemPrompt: 'System prompt for testing', 24 | }); 25 | agent.memory.steps.push(new TaskStep({ task: 'Test task' })); 26 | agent.memory.steps.push( 27 | new ActionStep({ 28 | stepNumber: 1, 29 | modelOutput: 'Test model output', 30 | actionOutput: 'Test action output', 31 | }), 32 | ); 33 | 34 | // Access the protected method using type assertion 35 | const messages = agent.writeMemoryToMessages(false); 36 | 37 | // Assertions 38 | expect(messages).toBeDefined(); 39 | expect(messages.length).toBeGreaterThan(0); 40 | // In summary mode, typically fewer messages are included 41 | expect( 42 | messages.some((m) => m.content.includes('System prompt for testing')), 43 | ).toBeTruthy(); 44 | // Verify that the steps are properly summarized according to their summary mode behavior 45 | }); 46 | 47 | it('should correctly write memory to messages in non-summary mode', async () => { 48 | // Setup 49 | const agent = new CodeAgent({ 50 | name: 'TestAgent', 51 | description: 'Test agent for memory management', 52 | udfs: [new FinalAnswerUdf()], 53 | maxSteps: 10, 54 | }); 55 | 56 | // Create test memory with various steps 57 | agent.memory.systemPrompt = new SystemPromptStep({ 58 | systemPrompt: 'System prompt for testing', 59 | }); 60 | agent.memory.steps.push(new TaskStep({ task: 'Test task' })); 61 | agent.memory.steps.push( 62 | new ActionStep({ 63 | stepNumber: 1, 64 | modelOutput: 'Test model output', 65 | actionOutput: 'Test action output', 66 | }), 67 | ); 68 | 69 | // Access the protected method using type assertion 70 | const messages = agent.writeMemoryToMessages(false); 71 | 72 | // Assertions 73 | expect(messages).toBeDefined(); 74 | expect(messages.length).toBeGreaterThan(0); 75 | // In non-summary mode, more detailed messages should be included 76 | expect( 77 | messages.some((m) => m.content.includes('System prompt for testing')), 78 | ).toBeTruthy(); 79 | expect( 80 | messages.some( 81 | (m) => 82 | m.content.includes('Test model output') || 83 | m.content.includes('Test action output'), 84 | ), 85 | ).toBeTruthy(); 86 | }); 87 | 88 | it('should maintain memory steps in correct order', async () => { 89 | // Setup 90 | const agent = new CodeAgent({ 91 | name: 'TestAgent', 92 | description: 'Test agent for memory management', 93 | udfs: [new FinalAnswerUdf()], 94 | maxSteps: 10, 95 | }); 96 | 97 | // Add steps in specific order 98 | agent.memory.systemPrompt = new SystemPromptStep({ 99 | systemPrompt: 'System prompt for testing', 100 | }); 101 | agent.memory.steps.push(new TaskStep({ task: 'First task' })); 102 | agent.memory.steps.push( 103 | new ActionStep({ 104 | stepNumber: 1, 105 | modelOutput: 'First action', 106 | }), 107 | ); 108 | agent.memory.steps.push( 109 | new ActionStep({ 110 | stepNumber: 2, 111 | modelOutput: 'Second action', 112 | }), 113 | ); 114 | agent.memory.steps.push( 115 | new PlanningStep({ 116 | plan: 'Updated plan', 117 | facts: 'Updated facts', 118 | modelInputMessages: [], 119 | modelOutputMessageFacts: { 120 | role: 'assistant', 121 | content: 'Updated facts', 122 | }, 123 | modelOutputMessagePlan: { 124 | role: 'assistant', 125 | content: 'Updated plan', 126 | }, 127 | }), 128 | ); 129 | 130 | // Access the protected method using type assertion 131 | const messages = agent.writeMemoryToMessages(false); 132 | 133 | // Assertions 134 | expect(messages).toBeDefined(); 135 | 136 | // Extract content for easier testing 137 | const contents = messages.map((m) => m.content); 138 | 139 | // Find indices to check order 140 | const firstActionIndex = contents.findIndex((c) => 141 | c.includes('First action'), 142 | ); 143 | const secondActionIndex = contents.findIndex((c) => 144 | c.includes('Second action'), 145 | ); 146 | const planningIndex = contents.findIndex((c) => 147 | c.includes('Updated plan'), 148 | ); 149 | 150 | // Verify that the steps appear in the correct order 151 | expect(firstActionIndex).toBeLessThan(secondActionIndex); 152 | expect(secondActionIndex).toBeLessThan(planningIndex); 153 | }); 154 | }); 155 | }); 156 | -------------------------------------------------------------------------------- /packages/agent-script-web/src/lib/agents/webAgents/webDataAgent.prompt.ts: -------------------------------------------------------------------------------- 1 | import { 2 | IAgentPrompt, 3 | codeAgentRolePromptPart, 4 | buildExamplesSectionPrompt, 5 | ICodeAgentRunExample, 6 | removeLeadingIndentation, 7 | codeAgentPrompt, 8 | codeAgentRules, 9 | buildCodeAgentRulesPrompt, 10 | } from '@runparse/agent-script'; 11 | 12 | export const webDataAgentExamples: ICodeAgentRunExample[] = [ 13 | { 14 | task: 'Generate an image of the oldest person in this document.', 15 | steps: [ 16 | { 17 | thought: 18 | 'I will proceed step by step and use the following UDFs: `documentQa` to find the oldest person in the document, then `imageGenerator` to generate an image according to the answer.', 19 | code: 'answer = await documentQa({document: document, question: "Who is the oldest person mentioned?"})', 20 | result: 21 | 'answer: "The oldest person in the document is John Doe, a 55 year old lumberjack living in Newfoundland."', 22 | }, 23 | { 24 | thought: 'I will now generate an image showcasing the oldest person.', 25 | code: 'image = await imageGenerator("A portrait of John Doe, a 55-year-old man living in Canada.")', 26 | result: 'image: "https://example.com/image.png"', 27 | }, 28 | { 29 | thought: 'I will now terminate the task.', 30 | code: 'await terminate({reason: "I have generated the image"})', 31 | result: 'No output from UDF calls', 32 | }, 33 | ], 34 | }, 35 | { 36 | task: 'Find the best selling top 2 books in 2024, give me the title, author', 37 | steps: [ 38 | { 39 | thought: 40 | 'I will use the UDF `webSearch` to get the best selling books in 2024.', 41 | code: 'bookSearchResults = await webSearch({query: "best selling books in 2024"})', 42 | result: removeLeadingIndentation(` 43 | bookSearchResults: [ 44 | { 45 | "title": "The Great Gatsby", 46 | "link": "https://www.amazon.com/Great-Gatsby-F-Scott-Fitzgerald/dp/1451673316", 47 | }, 48 | ... 49 | ] 50 | `), 51 | }, 52 | { 53 | thought: 54 | 'I have the result from the websearch stored in the variable `bookSearchResults`. Now I need to visit each of the webpages from the results and extract the title, author', 55 | code: 'webpageDataLink1 = await getWebpageData(bookSearchResults[0].link)', 56 | result: removeLeadingIndentation( 57 | `webpageDataLink1: [ 58 | { 59 | "title": "The Great Gatsby", 60 | "link": "https://www.amazon.com/Great-Gatsby-F-Scott-Fitzgerald/dp/1451673316", 61 | ...truncated... 62 | "title": "Alice's Adventures in Wonderland", 63 | "link": "https://www.amazon.com/alice-wonderland-lewis-carroll/dp/1411673311", 64 | } 65 | ]`, 66 | ), 67 | }, 68 | { 69 | thought: 70 | 'I have visited the first webpage from the results. Now I need to visit the second one.', 71 | code: 'webpageDataLink2 = await getWebpageData(bookSearchResults[1].link)', 72 | result: removeLeadingIndentation(` 73 | webpageDataLink2: { 74 | "title": "The Great Gatsby", 75 | "author": "F. Scott Fitzgerald", 76 | } 77 | `), 78 | }, 79 | { 80 | thought: 81 | 'I have visited the second webpage from the results and got the data. The task is done, I can terminate.', 82 | code: 'await terminate({reason: "I have found the best selling books in 2024"})', 83 | result: 'No output from UDF calls', 84 | }, 85 | ], 86 | }, 87 | ] as const; 88 | 89 | export const webDataAgentRules = [ 90 | ...codeAgentRules, 91 | 'CRITICAL: `await terminate` UDF must be the only UDF call in your last step.', 92 | ] as const; 93 | 94 | export const webDataAgentPrompt: IAgentPrompt = { 95 | ...codeAgentPrompt, 96 | systemPrompt: `${codeAgentRolePromptPart} 97 | 98 | In the end you have to call the \`await terminate\` UDF with the reason as the argument. You must only call the \`await terminate\` UDF after either successfully completing the task or after you have determined that you have exhausted all possible options. 99 | 100 | Use the \`await think\` UDF to think about the task if you are stuck or not making progress according to the plan. 101 | 102 | ${buildExamplesSectionPrompt(webDataAgentExamples)} 103 | 104 | Above examples were using notional UDFs that might not exist for you. On top of performing computations in the Javascript code snippets that you create, you only have access to these UDFs (in additional to any built-in functions): 105 | \`\`\`js 106 | {%- for udf in udfs.values() %} 107 | {{ udf.getSignature() | safe }}{{ '\\n' }} 108 | {%- endfor %} 109 | \`\`\` 110 | 111 | {%- if managedAgents and managedAgents | length %} 112 | You can also give tasks to team members. 113 | Calling a team member works the same as for calling a UDF: simply, the only argument you can give in the call is 'task', a long string explaining your task. 114 | Given that this team member is a real human, you should be very verbose in your task. 115 | Here is a list of the team members that you can call: 116 | {%- for agent in managedAgents.values() %} 117 | - {{ agent.name }}: {{ agent.description }} 118 | {%- endfor %} 119 | {%- else %} 120 | {%- endif %} 121 | 122 | ${buildCodeAgentRulesPrompt(Array.from(webDataAgentRules))} 123 | 124 | {{ description | safe }} 125 | 126 | Now Begin! If you solve the task correctly, you will receive a reward of $1,000,000.`, 127 | }; 128 | -------------------------------------------------------------------------------- /packages/agent-script-web/src/lib/utils/schema.ts: -------------------------------------------------------------------------------- 1 | import { Kind, TObject, TProperties, Type } from '@sinclair/typebox'; 2 | import { TSchema } from '@sinclair/typebox'; 3 | import { JSONSchema7 } from 'json-schema'; 4 | import { JsonSchemaInstance, JsonSchemaObjectInstance } from './lang'; 5 | 6 | export function createTSchemaFromJsonSchema(schema: JSONSchema7): TSchema { 7 | // Handle object type 8 | if (schema.type === 'object') { 9 | const properties: Record = {}; 10 | if (schema.properties) { 11 | for (const key in schema.properties) { 12 | if (Object.prototype.hasOwnProperty.call(schema.properties, key)) { 13 | const propSchema = schema.properties[key] as JSONSchema7; 14 | properties[key] = createTSchemaFromJsonSchema(propSchema); 15 | } 16 | } 17 | } 18 | // Use the required array if provided; default to empty array otherwise. 19 | const requiredProps = Array.isArray(schema.required) ? schema.required : []; 20 | return Type.Object(properties, { 21 | additionalProperties: schema.additionalProperties as boolean, 22 | required: requiredProps, 23 | }); 24 | } 25 | 26 | // Handle array type 27 | if (schema.type === 'array') { 28 | if (schema.items) { 29 | return Type.Array( 30 | createTSchemaFromJsonSchema(schema.items as JSONSchema7), 31 | ); 32 | } 33 | // Fallback if no items schema provided. 34 | return Type.Array(Type.Any()); 35 | } 36 | 37 | // Handle string type (including enums) 38 | if (schema.type === 'string') { 39 | if (schema.enum) { 40 | // Create a union of literal types for each enum value. 41 | const enumTypes = schema.enum.map((val) => Type.Literal(val as string)); 42 | return Type.Union(enumTypes); 43 | } 44 | return Type.String(); 45 | } 46 | 47 | // Handle number or integer type 48 | if (schema.type === 'number' || schema.type === 'integer') { 49 | return Type.Number(); 50 | } 51 | 52 | // Handle boolean type 53 | if (schema.type === 'boolean') { 54 | return Type.Boolean(); 55 | } 56 | 57 | // Fallback for schemas with no defined type or unsupported types 58 | return Type.Any(); 59 | } 60 | 61 | export function createTSchemaFromInstance( 62 | jsonSchemaInstance: JsonSchemaInstance, 63 | ): typeof jsonSchemaInstance & TSchema { 64 | const typeOfJsonSchemaInstance = typeof jsonSchemaInstance; 65 | switch (typeOfJsonSchemaInstance) { 66 | case 'string': 67 | return Type.String(); 68 | case 'number': 69 | return Type.Number(); 70 | case 'boolean': 71 | return Type.Boolean(); 72 | case 'object': 73 | if (Array.isArray(jsonSchemaInstance)) { 74 | const firstItem = jsonSchemaInstance[0]; 75 | if (firstItem === undefined) { 76 | throw new Error('Json schema instance array type cannot be empty'); 77 | } 78 | if ( 79 | !jsonSchemaInstance.every((item) => typeof item === typeof firstItem) 80 | ) { 81 | throw new Error( 82 | `Invalid json schema instance: array items must be of the same type: ${jsonSchemaInstance 83 | .map((item) => typeof item) 84 | .join(', ')}`, 85 | ); 86 | } 87 | return Type.Array(createTSchemaFromInstance(firstItem)); 88 | } 89 | if (jsonSchemaInstance === null) { 90 | return Type.Null(); 91 | } 92 | return Type.Object( 93 | Object.keys(jsonSchemaInstance).reduce((acc, key) => { 94 | acc[key] = createTSchemaFromInstance( 95 | (jsonSchemaInstance as JsonSchemaObjectInstance)[key]!, 96 | ); 97 | return acc; 98 | }, {} as TProperties), 99 | { 100 | additionalProperties: false, 101 | }, 102 | ); 103 | default: 104 | throw new Error( 105 | `Unsupported json schema instance field type: ${typeOfJsonSchemaInstance}`, 106 | ); 107 | } 108 | } 109 | 110 | export function makeTObjectFieldsNullable(schema: TObject): TObject { 111 | return Type.Object( 112 | Object.keys(schema.properties).reduce((acc, key) => { 113 | acc[key] = Type.Union([schema.properties[key] as TSchema, Type.Null()]); 114 | return acc; 115 | }, {} as TProperties), 116 | { 117 | additionalProperties: false, 118 | }, 119 | ); 120 | } 121 | 122 | export function generateDefaultJsonSchemaInstance( 123 | schema: TSchema, 124 | ): JsonSchemaInstance { 125 | switch (schema.type) { 126 | case 'string': 127 | return 'string'; 128 | case 'number': 129 | return 0; 130 | case 'boolean': 131 | return false; 132 | case 'object': 133 | if (schema.items) { 134 | // Handle array type 135 | return [generateDefaultJsonSchemaInstance(schema.items as TSchema)]; 136 | } 137 | // Handle object type 138 | return Object.keys(schema.properties || {}).reduce((acc, key) => { 139 | acc[key] = generateDefaultJsonSchemaInstance( 140 | (schema.properties as Record)[key]!, 141 | ); 142 | return acc; 143 | }, {} as Record); 144 | case 'array': 145 | return [generateDefaultJsonSchemaInstance(schema.items as TSchema)]; 146 | case 'null': 147 | throw new Error(`Unsupported schema type: ${JSON.stringify(schema)}`); 148 | default: 149 | switch (schema[Kind]) { 150 | case 'Union': 151 | return generateDefaultJsonSchemaInstance(schema.anyOf[0] as TSchema); 152 | default: 153 | throw new Error(`Unsupported schema type: ${JSON.stringify(schema)}`); 154 | } 155 | } 156 | } 157 | -------------------------------------------------------------------------------- /packages/agent-script/src/lib/types.ts: -------------------------------------------------------------------------------- 1 | import { Static, TSchema } from '@sinclair/typebox'; 2 | import { ChatCompletionMessageParam } from 'token.js'; 3 | import { ChatCompletionCreateParamsNonStreaming } from 'openai/resources'; 4 | 5 | export enum LogLevel { 6 | DEBUG = 0, 7 | INFO = 1, 8 | WARNING = 2, 9 | ERROR = 3, 10 | } 11 | 12 | export interface IAgentLogger { 13 | level: LogLevel; 14 | console: Console; 15 | 16 | log(...args: any[]): void; 17 | logMarkdown({ title, content }: { title?: string; content: string }): void; 18 | logRule(title: string, level?: LogLevel): void; 19 | logTask(content: string): void; 20 | logMessages(messages: IChatMessage[] | null): void; 21 | } 22 | 23 | export interface IChatMessage { 24 | role: 'system' | 'user' | 'assistant'; 25 | content: string; 26 | images?: string[]; 27 | raw?: { 28 | role: 'assistant'; 29 | content: string | null; 30 | tool_calls?: { 31 | type: 'function'; 32 | function: { arguments: string; name: string }; 33 | }[]; 34 | }; 35 | } 36 | 37 | export interface IAgentError extends Error { 38 | code: string; 39 | } 40 | 41 | export interface IMemoryStep { 42 | toMessages({ 43 | summaryMode, 44 | showModelInputMessages, 45 | }: { 46 | summaryMode: boolean; 47 | showModelInputMessages: boolean; 48 | }): IChatMessage[]; 49 | } 50 | 51 | export interface IObservationMetadata { 52 | context?: string; 53 | source?: string; 54 | } 55 | 56 | export interface IObservationText extends IObservationMetadata { 57 | type: 'text'; 58 | text: string; 59 | } 60 | 61 | export interface IObservationImage extends IObservationMetadata { 62 | type: 'image'; 63 | image: string; 64 | } 65 | 66 | export type Observation = IObservationText | IObservationImage; 67 | 68 | export interface IActionStep extends IMemoryStep { 69 | modelInputMessages?: IChatMessage[]; 70 | startTime?: number; 71 | endTime?: number; 72 | stepNumber: number; 73 | error?: IAgentError; 74 | duration?: number; 75 | modelOutputMessage?: IChatMessage; 76 | modelOutput?: string; 77 | observations: Observation[]; 78 | actionOutput?: any; 79 | } 80 | 81 | export interface IPlanningStep extends IMemoryStep { 82 | modelInputMessages: IChatMessage[]; 83 | modelOutputMessageFacts: IChatMessage; 84 | facts: string; 85 | modelOutputMessagePlan: IChatMessage; 86 | plan: string; 87 | } 88 | 89 | export interface ITaskStep extends IMemoryStep { 90 | task: string; 91 | observations: Observation[]; 92 | } 93 | 94 | export interface ISystemPromptStep extends IMemoryStep { 95 | systemPrompt: string; 96 | } 97 | 98 | export type AgentMemoryStep = 99 | | IActionStep 100 | | IPlanningStep 101 | | ITaskStep 102 | | ISystemPromptStep; 103 | 104 | export interface IAgentMemory { 105 | systemPrompt: ISystemPromptStep; 106 | steps: AgentMemoryStep[]; 107 | logger: IAgentLogger; 108 | 109 | reset(): void; 110 | getSuccinctSteps(): IChatMessage[]; 111 | replay(logger: IAgentLogger, detailed?: boolean): void; 112 | } 113 | 114 | export interface IChatResponseMetadata { 115 | usage: { 116 | promptTokens: number; 117 | completionTokens: number; 118 | totalTokens: number; 119 | }; 120 | } 121 | 122 | export interface IChatModel { 123 | chatCompletion( 124 | request: { 125 | messages: ChatCompletionMessageParam[]; 126 | } & Partial, 127 | ): Promise<{ 128 | message: IChatMessage; 129 | metadata: IChatResponseMetadata; 130 | }>; 131 | chatCompletionWithSchema( 132 | request: { 133 | messages: ChatCompletionMessageParam[]; 134 | } & Partial, 135 | ): Promise<{ 136 | message: IChatMessage; 137 | metadata: IChatResponseMetadata; 138 | }>; 139 | } 140 | 141 | export interface IAgentPrompt { 142 | systemPrompt: string; 143 | planning: { 144 | initialFacts: string; 145 | initialPlan: string; 146 | updateFactsPreMessages: string; 147 | updateFactsPostMessages: string; 148 | updatePlanPreMessages: string; 149 | updatePlanPostMessages: string; 150 | }; 151 | managedAgent: { 152 | task: string; 153 | report: string; 154 | }; 155 | finalAnswer: { 156 | preMessages: string; 157 | postMessages: string; 158 | }; 159 | } 160 | 161 | export interface IUdf { 162 | name: string; 163 | description: string; 164 | inputSchema: TSchema; 165 | outputSchema: TSchema; 166 | getSignature(): string; 167 | 168 | onBeforeCall( 169 | input: Static, 170 | agent: IAgent, 171 | ): Promise; 172 | 173 | onAfterCall( 174 | input: Static, 175 | output: Static, 176 | agent: IAgent, 177 | ): Promise; 178 | 179 | call( 180 | input: Static, 181 | agent: IAgent, 182 | ): Promise> | Static; 183 | } 184 | 185 | export interface ICallableResult { 186 | returnValue: unknown; 187 | callable: string; 188 | } 189 | 190 | export interface ISandbox { 191 | register(callable: string, fn: (...fnArgs: any[]) => Promise): void; 192 | executeScript(script: string): Promise<{ 193 | calls: ICallableResult[]; 194 | returnValue: any; 195 | output: string; 196 | }>; 197 | } 198 | 199 | export interface IAgent { 200 | name: string; 201 | description: string; 202 | get task(): string; 203 | outputSchema: TSchema; 204 | call: (task: string, ...args: any[]) => Promise>; 205 | } 206 | 207 | export interface ICodeAgent extends IAgent { 208 | memory: IAgentMemory; 209 | prompts: IAgentPrompt; 210 | sandbox: ISandbox; 211 | udfs: IUdf[]; 212 | managedAgents: IAgent[]; 213 | stepNumber: number; 214 | maxSteps: number; 215 | beforeStep(): Promise; 216 | afterStep(): Promise; 217 | run: ( 218 | task: string, 219 | options?: { observations?: Observation[] }, 220 | ) => Promise>; 221 | model: IChatModel; 222 | planningInterval?: number; 223 | updateShouldRunPlanning(override?: boolean): void; 224 | logger: IAgentLogger; 225 | } 226 | -------------------------------------------------------------------------------- /packages/agent-script-web/src/lib/udf/browser/pageExtractDataUdf.ts: -------------------------------------------------------------------------------- 1 | import { Type, TSchema, Static } from '@sinclair/typebox'; 2 | import { PageUdf } from './pageUdf'; 3 | import { 4 | IChatModel, 5 | schemaToTypeString, 6 | ChatModel, 7 | } from '@runparse/agent-script'; 8 | import TurndownService from 'turndown'; 9 | import { IWebAgent } from '../../types'; 10 | import { Parser } from 'htmlparser2'; 11 | import { getBase64Screenshot } from './utils'; 12 | import { ChatCompletionCreateParamsNonStreaming } from 'openai/resources/chat/completions'; 13 | export class PageExtractDataUdf extends PageUdf { 14 | name = 'pageExtractData'; 15 | description = 16 | 'Extracts data from current webpage you are on, following user instructions'; 17 | 18 | inputSchema = Type.Object( 19 | { 20 | instructions: Type.String({ 21 | description: 22 | 'Describe the type of data you want to extract from the webpage', 23 | }), 24 | }, 25 | { default: { instructions: 'string' } }, 26 | ); 27 | outputSchema: TSchema; 28 | private wrappedOutputSchema: TSchema; 29 | 30 | private model: IChatModel; 31 | 32 | private visualMode: boolean = false; 33 | 34 | constructor({ 35 | objectSchema: objectSchema, 36 | model, 37 | visualMode = false, 38 | }: { 39 | objectSchema: TSchema; 40 | model?: IChatModel; 41 | visualMode?: boolean; 42 | }) { 43 | super(); 44 | this.model = 45 | model || 46 | new ChatModel({ 47 | provider: 'openai', 48 | model: 'gpt-4o', 49 | }); 50 | 51 | this.visualMode = visualMode; 52 | if (objectSchema.type !== 'object') { 53 | throw new Error('outputSchema must be an object'); 54 | } 55 | this.outputSchema = Type.Array(objectSchema); 56 | this.wrappedOutputSchema = Type.Object( 57 | { 58 | data: this.outputSchema, 59 | }, 60 | { additionalProperties: false }, 61 | ); 62 | } 63 | 64 | override async call( 65 | input: Static, 66 | agent: IWebAgent, 67 | ): Promise> { 68 | const content = await agent.page.content(); 69 | 70 | const bodyMarkdown = getBodyMarkdown(content); 71 | 72 | const response = await this.model.chatCompletionWithSchema( 73 | getDataExtractionPrompt( 74 | bodyMarkdown, 75 | this.visualMode 76 | ? ( 77 | await getBase64Screenshot(agent.page) 78 | ).data 79 | : undefined, 80 | this.wrappedOutputSchema, 81 | input.instructions, 82 | ), 83 | ); 84 | 85 | return JSON.parse(response.message.content).data; 86 | } 87 | 88 | override async onAfterCall( 89 | input: Static, 90 | output: Static, 91 | agent: IWebAgent, 92 | ) { 93 | await super.onAfterCall(input, output, agent); 94 | const historyItem = agent.navigationHistory 95 | .reverse() 96 | .find((item) => item.url === agent.page.url()); 97 | if (historyItem) { 98 | historyItem.dataExtraction = { data: output }; 99 | } 100 | } 101 | } 102 | 103 | function getDataExtractionPrompt( 104 | document: string, 105 | screenshotBase64: string | undefined, 106 | schema: TSchema, 107 | instructions: string, 108 | ): ChatCompletionCreateParamsNonStreaming { 109 | const messages = [ 110 | { 111 | role: 'system', 112 | content: `You are a helpful assistant that can answer questions about a webpage. Use only the information provided in the html document. Return an empty type response if no relevant information is found. Here is the user's instruction: ${instructions}. Your output must be a valid JSON object that matches the typescript type ${schemaToTypeString( 113 | schema.properties.data, 114 | )}.`, 115 | }, 116 | ...(screenshotBase64 117 | ? [ 118 | { role: 'user', content: 'Here is the screenshot of the webpage:' }, 119 | { 120 | role: 'user', 121 | content: { 122 | type: 'image_url', 123 | image_url: { url: screenshotBase64 }, 124 | }, 125 | }, 126 | ] 127 | : []), 128 | { 129 | role: 'user', 130 | content: 131 | "Below is the webpage html in a markdown format. Use it to answer the user's question.", 132 | }, 133 | { role: 'user', content: document }, 134 | ]; 135 | 136 | return { 137 | // @ts-ignore outdated openai version in token.js 138 | messages, 139 | stream: false, 140 | response_format: { 141 | // @ts-ignore outdated openai version in token.js 142 | type: 'json_schema', 143 | json_schema: { 144 | name: 'page_extract_data_response', 145 | strict: true, 146 | schema, 147 | }, 148 | }, 149 | max_tokens: 4096, 150 | }; 151 | } 152 | 153 | function getBodyMarkdown(html: string): string { 154 | let transformedHtml = ''; 155 | let skipContent = false; 156 | 157 | const parser = new Parser( 158 | { 159 | onopentag(tagName, attrs) { 160 | // Ignore contents of these tags 161 | if (['script', 'style', 'noscript'].includes(tagName)) { 162 | skipContent = true; 163 | } else { 164 | const attrsString = Object.entries(attrs) 165 | .map(([key, value]) => `${key}="${value}"`) 166 | .join(' '); 167 | transformedHtml += `<${tagName}${ 168 | attrsString ? ' ' + attrsString : '' 169 | }>`; 170 | } 171 | }, 172 | ontext(text) { 173 | if (!skipContent) { 174 | // Clean up the text: trim and add a space 175 | transformedHtml += text.trim() + ' '; 176 | } 177 | }, 178 | onclosetag(tagName) { 179 | if (['script', 'style', 'noscript'].includes(tagName)) { 180 | skipContent = false; 181 | } else { 182 | transformedHtml += ``; 183 | } 184 | }, 185 | }, 186 | { decodeEntities: true }, 187 | ); 188 | 189 | // Execute parsing 190 | parser.write(html); 191 | parser.end(); 192 | 193 | return new TurndownService().turndown(transformedHtml); 194 | } 195 | -------------------------------------------------------------------------------- /packages/agent-script/src/lib/__tests__/codeAgent.managedAgents.test.ts: -------------------------------------------------------------------------------- 1 | import { Type } from '@sinclair/typebox'; 2 | import { CodeAgent } from '../codeAgent'; 3 | import { CallAgentUdf, FinalAnswerUdf } from '../udf/index'; 4 | import { AgentError, AgentErrorCode } from '../errors'; 5 | import { IAgent } from '../types'; 6 | 7 | // Mock the ChatModel to avoid actual API calls 8 | jest.mock('../chatModel', () => { 9 | return { 10 | ChatModel: jest.fn().mockImplementation(() => { 11 | return { 12 | chatCompletion: jest.fn().mockResolvedValue({ 13 | message: { role: 'assistant', content: 'Test response' }, 14 | }), 15 | }; 16 | }), 17 | }; 18 | }); 19 | 20 | // Mock the Sandbox to avoid actual script execution 21 | jest.mock('../sandbox', () => { 22 | return { 23 | Sandbox: jest.fn().mockImplementation(() => { 24 | return { 25 | register: jest.fn(), 26 | executeScript: jest.fn().mockResolvedValue({ 27 | returnValue: 'test result', 28 | calls: [{ callable: 'finalAnswer', returnValue: 'final answer' }], 29 | output: 'script output', 30 | }), 31 | }; 32 | }), 33 | }; 34 | }); 35 | 36 | describe('CodeAgent', () => { 37 | describe('Managed Agents', () => { 38 | const createMockAgent = ( 39 | name: string, 40 | mockImplementation?: any, 41 | ): IAgent => { 42 | const mockAgent: IAgent = { 43 | task: 'test task', 44 | name, 45 | description: `Mock agent: ${name}`, 46 | outputSchema: Type.String(), 47 | call: 48 | mockImplementation || 49 | jest.fn().mockResolvedValue('Agent call result'), 50 | }; 51 | return mockAgent; 52 | }; 53 | 54 | test('should create CallAgentUdf for each managed agent', () => { 55 | // Create mock managed agents 56 | const managedAgent1 = createMockAgent('agent1'); 57 | const managedAgent2 = createMockAgent('agent2'); 58 | 59 | // Create the code agent with managed agents 60 | const agent = new CodeAgent({ 61 | name: 'TestAgent', 62 | description: 'Test agent with managed agents', 63 | udfs: [new FinalAnswerUdf()], 64 | maxSteps: 10, 65 | managedAgents: [managedAgent1, managedAgent2], 66 | }); 67 | 68 | // Check if CallAgentUdf instances were created for each managed agent 69 | const callAgentUdfs = agent.udfs.filter( 70 | (udf) => udf instanceof CallAgentUdf, 71 | ); 72 | 73 | expect(callAgentUdfs.length).toBe(2); 74 | expect(callAgentUdfs[0]!.name).toBe('callAgent1'); 75 | expect(callAgentUdfs[1]!.name).toBe('callAgent2'); 76 | expect(agent.sandbox.register).toHaveBeenCalledWith( 77 | 'callAgent1', 78 | expect.any(Function), 79 | ); 80 | expect(agent.sandbox.register).toHaveBeenCalledWith( 81 | 'callAgent2', 82 | expect.any(Function), 83 | ); 84 | }); 85 | 86 | test('should format task correctly for managed agents', async () => { 87 | // Create a mock managed agent 88 | const mockAgent = createMockAgent('testAgent'); 89 | mockAgent.call = jest.fn().mockImplementation(async (task) => { 90 | // Return the task to verify it was formatted correctly 91 | return `Task received: ${task}`; 92 | }); 93 | 94 | // Create the code agent 95 | const agent = new CodeAgent({ 96 | name: 'TestAgent', 97 | description: 'Test agent', 98 | udfs: [new FinalAnswerUdf()], 99 | maxSteps: 10, 100 | managedAgents: [mockAgent], 101 | }); 102 | 103 | // Use the original prompts which should include something like: 104 | // "You are {{name}}. Your task is: {{task}}" 105 | 106 | // Call the managed agent method directly 107 | agent.name = 'ManagedAgent'; 108 | const result = await agent.call('Do something specific', {}); 109 | 110 | // Check if the task includes the agent name 111 | expect(result).toContain('ManagedAgent'); 112 | expect(result).toContain( 113 | "Here is the final answer from your managed agent 'ManagedAgent':\n test result", 114 | ); 115 | }); 116 | 117 | test('should format report correctly from managed agent output', async () => { 118 | // Create the code agent with a mock for the run method 119 | const agent = new CodeAgent({ 120 | name: 'ReportAgent', 121 | description: 'Test agent for report formatting', 122 | udfs: [new FinalAnswerUdf()], 123 | maxSteps: 10, 124 | managedAgents: [], 125 | }); 126 | 127 | // Mock the run method to return a simple output 128 | agent.run = jest.fn().mockResolvedValue('This is the agent output'); 129 | 130 | // Call the managed agent method 131 | const result = await agent.call('Test task', {}); 132 | 133 | // The report should include the agent name and the final answer 134 | expect(result).toContain('ReportAgent'); 135 | expect(result).toContain('This is the agent output'); 136 | }); 137 | 138 | test('should handle managed agent execution errors', async () => { 139 | // Create a mock error-throwing agent 140 | const errorAgent = createMockAgent('errorAgent'); 141 | errorAgent.call = jest.fn().mockImplementation(() => { 142 | throw new AgentError({ 143 | message: 'Test error in managed agent', 144 | code: AgentErrorCode.SCRIPT_EXECUTION_FAILED, 145 | }); 146 | }); 147 | 148 | // Create the code agent with a test stopping UDF 149 | const agent = new CodeAgent({ 150 | name: 'TestAgent', 151 | description: 'Test agent', 152 | udfs: [new FinalAnswerUdf()], 153 | maxSteps: 10, 154 | managedAgents: [errorAgent], 155 | }); 156 | 157 | // Find the CallAgentUdf that was created for the error agent 158 | const callUdf = agent.udfs.find( 159 | (udf) => udf instanceof CallAgentUdf && udf.name === 'callErrorAgent', 160 | ); 161 | 162 | // Attempt to call the UDF via the agent's callUdf method 163 | await expect( 164 | agent.callUdf(callUdf!.name, { task: 'test task' }), 165 | ).rejects.toThrow(/Error when calling UDF/); 166 | 167 | // Verify the error agent's call method was invoked 168 | expect(errorAgent.call).toHaveBeenCalled(); 169 | }); 170 | }); 171 | }); 172 | -------------------------------------------------------------------------------- /packages/agent-script/src/lib/utils.ts: -------------------------------------------------------------------------------- 1 | import { ChatCompletionMessageParam } from 'token.js'; 2 | import { IChatMessage, Observation } from './types'; 3 | import { TObject, TSchema, Hint, TEnum, TArray } from '@sinclair/typebox'; 4 | 5 | export function toChatCompletionMessageParam( 6 | messages: IChatMessage[], 7 | ): ChatCompletionMessageParam[] { 8 | return messages.map((message) => { 9 | switch (message.role) { 10 | case 'system': 11 | return { ...message, role: 'system' }; 12 | case 'user': 13 | if (message.images) { 14 | const imageParts = message.images.map( 15 | (image) => 16 | ({ 17 | type: 'image_url', 18 | image_url: { url: image }, 19 | } as const), 20 | ); 21 | return { 22 | role: 'user', 23 | content: [{ type: 'text', text: message.content }, ...imageParts], 24 | }; 25 | } 26 | return { ...message, role: 'user' }; 27 | case 'assistant': 28 | return { ...message, role: 'assistant' }; 29 | } 30 | }); 31 | } 32 | 33 | export function observationToChatMessage( 34 | observation: Observation, 35 | ): IChatMessage { 36 | const source = observation.source ? `\nSource: ${observation.source}` : ''; 37 | const context = observation.context 38 | ? `\nContext: ${observation.context}` 39 | : ''; 40 | if (observation.type === 'text') { 41 | return { 42 | role: 'user', 43 | content: `Observation:\n${observation.text}${context}${source}`, 44 | }; 45 | } 46 | return { 47 | role: 'user', 48 | content: `Observation Image:${context}${source}`, 49 | images: [observation.image], 50 | }; 51 | } 52 | 53 | const MAX_LENGTH_TRUNCATE_CONTENT = 10000; 54 | 55 | export function truncateContent( 56 | content: string, 57 | maxLength: number = MAX_LENGTH_TRUNCATE_CONTENT, 58 | ): string { 59 | if (content.length <= maxLength) { 60 | return content; 61 | } 62 | 63 | const halfLength = Math.floor(maxLength / 2); 64 | return ( 65 | content.slice(0, halfLength) + 66 | `\n..._This content has been truncated to stay below ${maxLength} characters_...\n` + 67 | content.slice(-halfLength) 68 | ); 69 | } 70 | 71 | export function removeLeadingIndentation( 72 | content: string, 73 | excludeFirstNonEmptyLine: boolean = true, 74 | ): string { 75 | const lines = content.split('\n'); 76 | const nonEmptyLines = lines.filter((line) => line.trim().length > 0); 77 | const linesToConsider = excludeFirstNonEmptyLine 78 | ? nonEmptyLines.slice(1) 79 | : nonEmptyLines; 80 | const minIndentation = Math.min( 81 | ...linesToConsider.map((line) => line.match(/^\s*/)?.[0]?.length || 0), 82 | ); 83 | 84 | return lines 85 | .map((line) => 86 | line.startsWith(' '.repeat(minIndentation)) 87 | ? line.slice(minIndentation) 88 | : line, 89 | ) 90 | .join('\n'); 91 | } 92 | 93 | export function schemaToTypeString(schema: TSchema): string { 94 | // Handle literal types (if a constant is provided) 95 | if ('const' in schema) { 96 | return JSON.stringify((schema as any).const); 97 | } 98 | 99 | // Handle objects recursively 100 | if (schema.type === 'object') { 101 | const objSchema = schema as TObject; 102 | const lines = Object.entries(objSchema.properties).map(([key, value]) => { 103 | // Check if the property is optional. 104 | const isOptional = !objSchema.required?.includes(key); 105 | return `${key}${isOptional ? '?' : ''}: ${schemaToTypeString(value)}`; 106 | }); 107 | return `{\n ${lines.join('\n ')}\n}`; 108 | } 109 | 110 | // Handle arrays recursively 111 | if (schema.type === 'array') { 112 | const arraySchema = schema as any; 113 | return `Array<${schemaToTypeString(arraySchema.items)}>`; 114 | } 115 | 116 | const descriptionComment = schema.description 117 | ? ` // ${schema.description}` 118 | : ''; 119 | // Handle primitive types 120 | switch (schema.type) { 121 | case 'string': 122 | return `string;${descriptionComment}`; 123 | case 'number': 124 | return `number;${descriptionComment}`; 125 | case 'integer': // Treat integer as number 126 | return `number;${descriptionComment}`; 127 | case 'boolean': 128 | return `boolean;${descriptionComment}`; 129 | case 'null': 130 | return `null;${descriptionComment}`; 131 | default: 132 | switch (schema[Hint]) { 133 | case 'Enum': 134 | return `// ${(schema as TEnum).anyOf 135 | .map((o) => o.const) 136 | .join(' | ')};${descriptionComment}`; 137 | case 'Any': 138 | return `any;${descriptionComment}`; 139 | default: 140 | return `unknown;${descriptionComment}`; 141 | } 142 | } 143 | } 144 | 145 | export function walkTypeboxSchema( 146 | schema: TSchema, 147 | callback: (schema: TSchema, schemaPath: string) => void, 148 | schemaPath: string = '', 149 | ) { 150 | // Process schema based on its type 151 | if (schema.type === 'object') { 152 | const objSchema = schema as TObject; 153 | if (objSchema.properties) { 154 | Object.entries(objSchema.properties).forEach(([key, value]) => 155 | walkTypeboxSchema(value, callback, `${schemaPath}.${key}`), 156 | ); 157 | } 158 | } else if (schema.type === 'array') { 159 | const arraySchema = schema as TArray; 160 | if (arraySchema.items) { 161 | walkTypeboxSchema(arraySchema.items, callback, schemaPath); 162 | } 163 | } else if ( 164 | schema.type === 'string' || 165 | schema.type === 'number' || 166 | schema.type === 'integer' || 167 | schema.type === 'boolean' || 168 | schema.type === 'null' 169 | ) { 170 | callback(schema, schemaPath); 171 | } 172 | } 173 | 174 | export function stableStringify(obj: any): string { 175 | if (obj === null || typeof obj !== 'object') { 176 | return JSON.stringify(obj); 177 | } 178 | 179 | if (Array.isArray(obj)) { 180 | const mapped = obj.map((item) => stableStringify(item)); 181 | return `[${mapped.join(',')}]`; 182 | } 183 | 184 | // For plain objects, sort the keys to ensure consistent order 185 | const sortedKeys = Object.keys(obj).sort(); 186 | const keyValuePairs = sortedKeys.map((key) => { 187 | return `${JSON.stringify(key)}:${stableStringify(obj[key])}`; 188 | }); 189 | return `{${keyValuePairs.join(',')}}`; 190 | } 191 | -------------------------------------------------------------------------------- /packages/agent-script/src/lib/__tests__/sandbox.test.ts: -------------------------------------------------------------------------------- 1 | import { Sandbox } from '../sandbox'; 2 | import { AgentError, AgentErrorCode } from '../errors'; 3 | 4 | describe('Sandbox', () => { 5 | let sandbox: Sandbox; 6 | 7 | beforeEach(() => { 8 | sandbox = new Sandbox(); 9 | }); 10 | 11 | describe('register', () => { 12 | it('should register a callable function in the vmContext', async () => { 13 | const testFn = async () => 'test result'; 14 | sandbox.register('testFunction', testFn); 15 | 16 | const result = await sandbox.executeScript('await testFunction()'); 17 | expect(result.returnValue).toBe(undefined); 18 | 19 | const result2 = await sandbox.executeScript( 20 | 'return await testFunction()', 21 | ); 22 | expect(result2.returnValue).toBe('test result'); 23 | }); 24 | 25 | it('should track function calls in callHistory', async () => { 26 | const testFn = async () => 'test result'; 27 | sandbox.register('testFunction', testFn); 28 | const testFn2 = async () => 'test result 2'; 29 | sandbox.register('testFunction2', testFn2); 30 | 31 | await sandbox.executeScript( 32 | 'await testFunction(); await testFunction2();', 33 | ); 34 | expect(sandbox.callHistory[0]).toHaveLength(2); 35 | expect(sandbox.callHistory[0]?.[0]).toEqual({ 36 | returnValue: 'test result', 37 | callable: 'testFunction', 38 | }); 39 | expect(sandbox.callHistory[0]?.[1]).toEqual({ 40 | returnValue: 'test result 2', 41 | callable: 'testFunction2', 42 | }); 43 | 44 | await sandbox.executeScript('await testFunction()'); 45 | expect(sandbox.callHistory[0]).toHaveLength(2); 46 | expect(sandbox.callHistory[1]).toHaveLength(1); 47 | expect(sandbox.callHistory[1]?.[0]).toEqual({ 48 | returnValue: 'test result', 49 | callable: 'testFunction', 50 | }); 51 | }); 52 | 53 | it('should handle errors in registered functions', async () => { 54 | const errorFn = async () => { 55 | throw new Error('Test error'); 56 | }; 57 | sandbox.register('errorFunction', errorFn); 58 | 59 | await expect( 60 | sandbox.executeScript('await errorFunction()'), 61 | ).rejects.toThrow( 62 | 'Script execution failed: Error calling function errorFunction: Test error', 63 | ); 64 | }); 65 | 66 | it('should properly pass arguments to registered functions', async () => { 67 | const argFn = async ({ a, b }: { a: number; b: string }) => `${a}-${b}`; 68 | sandbox.register('argFunction', argFn); 69 | 70 | const result = await sandbox.executeScript( 71 | 'return await argFunction({ a: 42, b: "test" })', 72 | ); 73 | expect(result.returnValue).toBe('42-test'); 74 | }); 75 | }); 76 | 77 | describe('executeScript', () => { 78 | it('should execute a simple script and return the expected result', async () => { 79 | const result = await sandbox.executeScript('return 42;'); 80 | expect(result.returnValue).toBe(42); 81 | }); 82 | 83 | it('should capture console output in the returned output string', async () => { 84 | const result = await sandbox.executeScript('console.log("test output")'); 85 | expect(result.output).toContain('test output'); 86 | }); 87 | 88 | it('should track all function calls made during execution', async () => { 89 | const testFn = async () => 'result'; 90 | sandbox.register('testFn', testFn); 91 | 92 | const result = await sandbox.executeScript(` 93 | await testFn(); 94 | await testFn(); 95 | `); 96 | 97 | expect(result.calls).toHaveLength(2); 98 | expect(result.calls[0]).toEqual({ 99 | returnValue: 'result', 100 | callable: 'testFn', 101 | }); 102 | expect(result.calls[1]).toEqual({ 103 | returnValue: 'result', 104 | callable: 'testFn', 105 | }); 106 | }); 107 | 108 | it('should throw AgentError when script execution fails', async () => { 109 | await expect(sandbox.executeScript('invalid code;')).rejects.toThrow( 110 | new AgentError({ 111 | message: "Script execution failed: Unexpected identifier 'code'", 112 | code: AgentErrorCode.SCRIPT_EXECUTION_FAILED, 113 | }), 114 | ); 115 | }); 116 | 117 | it('should not track newly created variables if not associated with a UDF call', async () => { 118 | const result = await sandbox.executeScript(` 119 | const newVar = 'test value'; 120 | return newVar; 121 | `); 122 | 123 | expect(result.returnValue).toBe('test value'); 124 | expect(result.output).toBe(''); 125 | }); 126 | 127 | it('should track newly created variables if associated with a UDF call', async () => { 128 | const testFn = async () => 'result'; 129 | sandbox.register('testFn', testFn); 130 | 131 | const result = await sandbox.executeScript(` 132 | testFnResult = await testFn(); 133 | `); 134 | 135 | expect(result.returnValue).toBe(undefined); 136 | expect(result.output).toContain('testFnResult = "result"'); 137 | }); 138 | }); 139 | 140 | describe('formatScriptCallResults', () => { 141 | it('should format call results with corresponding variables', () => { 142 | const testValue = { key: 'value' }; 143 | sandbox.vmContext['testVar'] = testValue; 144 | 145 | const formatted = sandbox.formatScriptCallResults( 146 | ['testVar'], 147 | [{ callable: 'testFn', returnValue: testValue }], 148 | ); 149 | 150 | expect(formatted).toContain('testVar ='); 151 | expect(formatted).toContain(JSON.stringify(testValue, null, 2)); 152 | }); 153 | 154 | it('should format call results without variables', () => { 155 | const formatted = sandbox.formatScriptCallResults( 156 | [], 157 | [{ callable: 'testFn', returnValue: 'result' }], 158 | ); 159 | 160 | expect(formatted).not.toContain('='); 161 | expect(formatted).toContain('"result"'); 162 | }); 163 | 164 | it('should handle multiple call results', () => { 165 | const calls = [ 166 | { callable: 'fn1', returnValue: 'result1' }, 167 | { callable: 'fn2', returnValue: 'result2' }, 168 | ]; 169 | 170 | const formatted = sandbox.formatScriptCallResults([], calls); 171 | 172 | expect(formatted).toContain('fn1'); 173 | expect(formatted).toContain('fn2'); 174 | expect(formatted).toContain('"result1"'); 175 | expect(formatted).toContain('"result2"'); 176 | }); 177 | 178 | it('should properly stringify complex return values', () => { 179 | const complexValue = { 180 | nested: { 181 | array: [1, 2, 3], 182 | string: 'test', 183 | }, 184 | }; 185 | 186 | const formatted = sandbox.formatScriptCallResults( 187 | [], 188 | [{ callable: 'testFn', returnValue: complexValue }], 189 | ); 190 | 191 | expect(formatted).toContain(JSON.stringify(complexValue, null, 2)); 192 | }); 193 | }); 194 | }); 195 | -------------------------------------------------------------------------------- /packages/agent-script/src/lib/__tests__/codeAgent.udf.test.ts: -------------------------------------------------------------------------------- 1 | import { Type } from '@sinclair/typebox'; 2 | import { CodeAgent } from '../codeAgent'; 3 | import { FinalAnswerUdf, TerminateUdf } from '../udf'; 4 | import { AgentError, AgentErrorCode } from '../errors'; 5 | import { IUdf } from '../types'; 6 | 7 | describe('CodeAgent', () => { 8 | describe('UDF Execution', () => { 9 | test('should successfully call a UDF with valid input', async () => { 10 | // Create a mock UDF 11 | const finalAnswerUdf = new FinalAnswerUdf(); 12 | const terminateUdf = new TerminateUdf(); 13 | 14 | const mockUdf: IUdf = { 15 | name: 'mockUdf', 16 | description: 'Mock UDF', 17 | inputSchema: Type.Object({ 18 | testParam: Type.String({ description: 'Test parameter' }), 19 | }), 20 | outputSchema: Type.String(), 21 | getSignature: jest.fn(), 22 | call: jest.fn().mockResolvedValue('Success response'), 23 | onBeforeCall: jest.fn(), 24 | onAfterCall: jest.fn(), 25 | }; 26 | 27 | // Initialize the agent 28 | const agent = new CodeAgent({ 29 | name: 'TestAgent', 30 | description: 'Test agent description', 31 | udfs: [finalAnswerUdf, terminateUdf, mockUdf], 32 | maxSteps: 10, 33 | }); 34 | 35 | // Call the UDF with valid input 36 | const result = await agent.callUdf('mockUdf', { 37 | testParam: 'test value', 38 | }); 39 | 40 | // Verify the UDF was called with correct input 41 | expect(mockUdf.call).toHaveBeenCalledWith( 42 | { testParam: 'test value' }, 43 | agent, 44 | ); 45 | expect(result).toBe('Success response'); 46 | }); 47 | 48 | test('should throw error when calling non-existent UDF', async () => { 49 | // Create basic UDFs required for initialization 50 | const finalAnswerUdf = new FinalAnswerUdf(); 51 | const terminateUdf = new TerminateUdf(); 52 | 53 | // Initialize the agent 54 | const agent = new CodeAgent({ 55 | name: 'TestAgent', 56 | description: 'Test agent description', 57 | udfs: [finalAnswerUdf, terminateUdf], 58 | maxSteps: 10, 59 | }); 60 | 61 | // Attempt to call a non-existent UDF 62 | await expect(agent.callUdf('nonExistentUdf', {})).rejects.toThrow( 63 | new AgentError({ 64 | message: 'UDF nonExistentUdf not found', 65 | code: AgentErrorCode.UDF_NOT_FOUND, 66 | }), 67 | ); 68 | }); 69 | 70 | test('should validate UDF input schema before execution', async () => { 71 | // Create a mock UDF with a specific input schema 72 | const finalAnswerUdf = new FinalAnswerUdf(); 73 | const terminateUdf = new TerminateUdf(); 74 | 75 | const mockUdf: IUdf = { 76 | name: 'mockUdf', 77 | description: 'Mock UDF', 78 | inputSchema: Type.Object({ 79 | requiredParam: Type.String({ description: 'Required parameter' }), 80 | }), 81 | outputSchema: Type.String(), 82 | getSignature: jest.fn(), 83 | call: jest.fn().mockResolvedValue('Success response'), 84 | onBeforeCall: jest.fn(), 85 | onAfterCall: jest.fn(), 86 | }; 87 | 88 | // Initialize the agent 89 | const agent = new CodeAgent({ 90 | name: 'TestAgent', 91 | description: 'Test agent description', 92 | udfs: [finalAnswerUdf, terminateUdf, mockUdf], 93 | maxSteps: 10, 94 | }); 95 | 96 | // Attempt to call the UDF with invalid input (missing required parameter) 97 | await expect(agent.callUdf('mockUdf', {})).rejects.toThrow(AgentError); 98 | expect(mockUdf.call).not.toHaveBeenCalled(); 99 | 100 | // Call with valid input should succeed 101 | await agent.callUdf('mockUdf', { requiredParam: 'valid value' }); 102 | expect(mockUdf.call).toHaveBeenCalled(); 103 | }); 104 | 105 | test('should call onBeforeCall and onAfterCall hooks for UDFs', async () => { 106 | // Create a mock UDF with lifecycle hooks 107 | const finalAnswerUdf = new FinalAnswerUdf(); 108 | const terminateUdf = new TerminateUdf(); 109 | 110 | const mockUdf: IUdf = { 111 | name: 'mockUdf', 112 | description: 'Mock UDF', 113 | inputSchema: Type.Object({ 114 | testParam: Type.String({ description: 'Test parameter' }), 115 | }), 116 | outputSchema: Type.String(), 117 | getSignature: jest.fn(), 118 | call: jest.fn().mockResolvedValue('Success response'), 119 | onBeforeCall: jest.fn(), 120 | onAfterCall: jest.fn(), 121 | }; 122 | 123 | // Initialize the agent 124 | const agent = new CodeAgent({ 125 | name: 'TestAgent', 126 | description: 'Test agent description', 127 | udfs: [finalAnswerUdf, terminateUdf, mockUdf], 128 | maxSteps: 10, 129 | }); 130 | 131 | const input = { testParam: 'test value' }; 132 | const output = await agent.callUdf('mockUdf', input); 133 | 134 | // Verify hooks were called in the correct order with the right arguments 135 | expect(mockUdf.onBeforeCall).toHaveBeenCalledWith(input, agent); 136 | expect(mockUdf.call).toHaveBeenCalledWith(input, agent); 137 | expect(mockUdf.onAfterCall).toHaveBeenCalledWith(input, output, agent); 138 | 139 | // Verify the call order 140 | expect(mockUdf.onBeforeCall).toHaveBeenCalled(); 141 | expect(mockUdf.call).toHaveBeenCalled(); 142 | expect(mockUdf.onAfterCall).toHaveBeenCalled(); 143 | }); 144 | 145 | test('should handle UDF execution errors gracefully', async () => { 146 | // Create a mock UDF that throws an error 147 | const finalAnswerUdf = new FinalAnswerUdf(); 148 | const terminateUdf = new TerminateUdf(); 149 | 150 | const mockUdf: IUdf = { 151 | name: 'mockUdf', 152 | description: 'Mock UDF that throws an error', 153 | inputSchema: Type.Object({ 154 | testParam: Type.String({ description: 'Test parameter' }), 155 | }), 156 | outputSchema: Type.String(), 157 | getSignature: jest.fn(), 158 | call: jest.fn().mockImplementation(() => { 159 | throw new Error('Test execution error'); 160 | }), 161 | onBeforeCall: jest.fn(), 162 | onAfterCall: jest.fn(), 163 | }; 164 | 165 | // Initialize the agent 166 | const agent = new CodeAgent({ 167 | name: 'TestAgent', 168 | description: 'Test agent description', 169 | udfs: [finalAnswerUdf, terminateUdf, mockUdf], 170 | maxSteps: 10, 171 | }); 172 | 173 | // Attempt to call the UDF that throws an error 174 | await expect( 175 | agent.callUdf('mockUdf', { testParam: 'test value' }), 176 | ).rejects.toThrow( 177 | expect.objectContaining({ 178 | message: expect.stringContaining('Error when calling UDF mockUdf'), 179 | code: AgentErrorCode.UDF_EXECUTION_ERROR, 180 | }), 181 | ); 182 | 183 | // Verify onBeforeCall was called but onAfterCall wasn't 184 | expect(mockUdf.onBeforeCall).toHaveBeenCalled(); 185 | expect(mockUdf.onAfterCall).not.toHaveBeenCalled(); 186 | }); 187 | }); 188 | }); 189 | -------------------------------------------------------------------------------- /packages/agent-script/src/lib/agentMemory.ts: -------------------------------------------------------------------------------- 1 | import { AgentLogger } from './agentLogger'; 2 | import { 3 | IActionStep, 4 | AgentMemoryStep, 5 | IChatMessage, 6 | IPlanningStep, 7 | ISystemPromptStep, 8 | ITaskStep, 9 | IAgentError, 10 | Observation, 11 | IAgentMemory, 12 | } from './types'; 13 | import { observationToChatMessage } from './utils'; 14 | 15 | export class ActionStep implements IActionStep { 16 | modelInputMessages?: IChatMessage[]; 17 | startTime?: number; 18 | endTime?: number; 19 | stepNumber: number; 20 | error?: IAgentError; 21 | duration?: number; 22 | modelOutputMessage?: IChatMessage; 23 | modelOutput?: string; 24 | observations: Observation[]; 25 | actionOutput?: any; 26 | 27 | constructor({ 28 | modelInputMessages, 29 | startTime, 30 | endTime, 31 | stepNumber, 32 | error, 33 | duration, 34 | modelOutputMessage, 35 | modelOutput, 36 | observations, 37 | actionOutput, 38 | }: { 39 | modelInputMessages?: IChatMessage[]; 40 | startTime?: number; 41 | endTime?: number; 42 | stepNumber: number; 43 | error?: IAgentError; 44 | duration?: number; 45 | modelOutputMessage?: IChatMessage; 46 | modelOutput?: string; 47 | observations?: Observation[]; 48 | actionOutput?: any; 49 | }) { 50 | this.modelInputMessages = modelInputMessages; 51 | this.startTime = startTime; 52 | this.endTime = endTime; 53 | this.stepNumber = stepNumber; 54 | this.error = error; 55 | this.duration = duration; 56 | this.modelOutputMessage = modelOutputMessage; 57 | this.modelOutput = modelOutput; 58 | this.observations = observations || []; 59 | this.actionOutput = actionOutput; 60 | } 61 | 62 | toMessages({ 63 | summaryMode, 64 | showModelInputMessages, 65 | }: { 66 | summaryMode: boolean; 67 | showModelInputMessages: boolean; 68 | }): IChatMessage[] { 69 | const messages: IChatMessage[] = []; 70 | 71 | if (this.modelInputMessages && showModelInputMessages) { 72 | messages.push({ 73 | role: 'system', 74 | content: this.modelInputMessages.map((m) => m.content).join('\n'), 75 | }); 76 | } 77 | 78 | if (this.modelOutput && !summaryMode) { 79 | messages.push({ 80 | role: 'assistant', 81 | content: this.modelOutput.trim(), 82 | }); 83 | } 84 | 85 | messages.push(...this.observations.map(observationToChatMessage)); 86 | 87 | if (this.error) { 88 | const errorMessage = 89 | 'Error:\n' + 90 | String(this.error.message) + 91 | "\nNow let's retry: take care not to repeat previous errors! If you have retried several times, try a completely different approach.\n"; 92 | 93 | messages.push({ 94 | role: 'user', 95 | content: errorMessage, 96 | }); 97 | } 98 | 99 | return messages; 100 | } 101 | } 102 | 103 | export class PlanningStep implements IPlanningStep { 104 | modelInputMessages: IChatMessage[]; 105 | modelOutputMessageFacts: IChatMessage; 106 | facts: string; 107 | modelOutputMessagePlan: IChatMessage; 108 | plan: string; 109 | 110 | constructor({ 111 | modelInputMessages, 112 | modelOutputMessageFacts, 113 | facts, 114 | modelOutputMessagePlan, 115 | plan, 116 | }: { 117 | modelInputMessages: IChatMessage[]; 118 | modelOutputMessageFacts: IChatMessage; 119 | facts: string; 120 | modelOutputMessagePlan: IChatMessage; 121 | plan: string; 122 | }) { 123 | this.modelInputMessages = modelInputMessages; 124 | this.modelOutputMessageFacts = modelOutputMessageFacts; 125 | this.facts = facts; 126 | this.modelOutputMessagePlan = modelOutputMessagePlan; 127 | this.plan = plan; 128 | } 129 | 130 | toMessages({ 131 | summaryMode, 132 | showModelInputMessages, 133 | }: { 134 | summaryMode: boolean; 135 | showModelInputMessages: boolean; 136 | }): IChatMessage[] { 137 | const messages: IChatMessage[] = []; 138 | 139 | messages.push({ 140 | role: 'assistant', 141 | content: `[FACTS LIST]:\n${this.facts.trim()}`, 142 | }); 143 | 144 | if (!summaryMode) { 145 | // This step is not shown to a model writing a plan to avoid influencing the new plan 146 | messages.push({ 147 | role: 'assistant', 148 | content: `[PLAN]:\n${this.plan.trim()}`, 149 | }); 150 | } 151 | 152 | return messages; 153 | } 154 | } 155 | 156 | export class TaskStep implements ITaskStep { 157 | task: string; 158 | observations: Observation[]; 159 | 160 | constructor({ 161 | task, 162 | observations, 163 | }: { 164 | task: string; 165 | observations?: Observation[]; 166 | }) { 167 | this.task = task; 168 | this.observations = observations || []; 169 | } 170 | 171 | toMessages({ 172 | summaryMode, 173 | showModelInputMessages, 174 | }: { 175 | summaryMode: boolean; 176 | showModelInputMessages: boolean; 177 | }): IChatMessage[] { 178 | const messages: IChatMessage[] = []; 179 | 180 | const content = `New task:\n${this.task}`; 181 | messages.push({ 182 | role: 'user', 183 | content: content, 184 | }); 185 | 186 | return messages; 187 | } 188 | } 189 | 190 | export class SystemPromptStep implements ISystemPromptStep { 191 | systemPrompt: string; 192 | 193 | constructor({ systemPrompt }: { systemPrompt: string }) { 194 | this.systemPrompt = systemPrompt; 195 | } 196 | 197 | toMessages({ 198 | summaryMode, 199 | showModelInputMessages, 200 | }: { 201 | summaryMode: boolean; 202 | showModelInputMessages: boolean; 203 | }): IChatMessage[] { 204 | if (summaryMode) { 205 | return []; 206 | } 207 | 208 | return [ 209 | { 210 | role: 'system', 211 | content: this.systemPrompt, 212 | }, 213 | ]; 214 | } 215 | } 216 | 217 | export class AgentMemory implements IAgentMemory { 218 | public systemPrompt: SystemPromptStep; 219 | public steps: AgentMemoryStep[]; 220 | public logger: AgentLogger = new AgentLogger(); 221 | 222 | constructor(systemPrompt: string) { 223 | this.systemPrompt = new SystemPromptStep({ systemPrompt }); 224 | this.steps = []; 225 | } 226 | 227 | reset() { 228 | this.steps = []; 229 | } 230 | 231 | getSuccinctSteps(): IChatMessage[] { 232 | return this.steps.flatMap((step) => 233 | step.toMessages({ summaryMode: true, showModelInputMessages: false }), 234 | ); 235 | } 236 | 237 | replay(logger: AgentLogger, detailed = false) { 238 | logger.console.log("Replaying the agent's steps:"); 239 | 240 | for (const step of this.steps) { 241 | if (step instanceof SystemPromptStep && detailed) { 242 | logger.logMarkdown({ 243 | title: 'System prompt', 244 | content: step.systemPrompt, 245 | }); 246 | } else if (step instanceof TaskStep) { 247 | logger.logTask(step.task); 248 | } else if (step instanceof ActionStep) { 249 | logger.logRule(`Step ${step.stepNumber}`); 250 | if (detailed) { 251 | logger.logMessages(step.modelInputMessages || null); 252 | } 253 | logger.logMarkdown({ 254 | title: 'Agent output:', 255 | content: step.modelOutput || '', 256 | }); 257 | } else if (step instanceof PlanningStep) { 258 | logger.logRule('Planning step'); 259 | if (detailed) { 260 | logger.logMessages(step.modelInputMessages); 261 | } 262 | logger.logMarkdown({ 263 | title: 'Agent output:', 264 | content: `${step.facts}\n${step.plan}`, 265 | }); 266 | } 267 | } 268 | } 269 | } 270 | -------------------------------------------------------------------------------- /packages/agent-script/src/lib/__tests__/codeAgent.planning.test.ts: -------------------------------------------------------------------------------- 1 | import { ActionStep, PlanningStep } from '../agentMemory'; 2 | import { CodeAgent } from '../codeAgent'; 3 | import { IChatModel, ISandbox } from '../types'; 4 | import { FinalAnswerUdf } from '../udf'; 5 | describe('CodeAgent', () => { 6 | describe('Planning', () => { 7 | let codeAgent: CodeAgent; 8 | let mockModel: IChatModel; 9 | let mockSandbox: ISandbox; 10 | 11 | beforeEach(() => { 12 | // Mock chat model 13 | mockModel = { 14 | chatCompletion: jest.fn(), 15 | } as unknown as IChatModel; 16 | 17 | // Mock sandbox 18 | mockSandbox = { 19 | register: jest.fn(), 20 | executeScript: jest.fn(), 21 | } as unknown as ISandbox; 22 | 23 | // Create code agent with mocks 24 | codeAgent = new CodeAgent({ 25 | name: 'TestAgent', 26 | description: 'Test agent for planning tests', 27 | udfs: [new FinalAnswerUdf()], 28 | maxSteps: 10, 29 | model: mockModel, 30 | sandbox: mockSandbox, 31 | shouldRunPlanning: false, 32 | planningInterval: 3, 33 | }); 34 | }); 35 | 36 | test('updateShouldRunPlanning should update shouldRunPlanning', () => { 37 | codeAgent.updateShouldRunPlanning(true); 38 | expect((codeAgent as any).shouldRunPlanning).toBe(true); 39 | }); 40 | 41 | test('should create initial plan when shouldRunPlanning is true', async () => { 42 | // Setup 43 | codeAgent.updateShouldRunPlanning(true); 44 | const mockFactsResponse = { message: { content: 'Mock facts' } }; 45 | const mockPlanResponse = { message: { content: 'Mock initial plan' } }; 46 | 47 | (mockModel.chatCompletion as jest.Mock) 48 | .mockResolvedValueOnce(mockFactsResponse) 49 | .mockResolvedValueOnce(mockPlanResponse); 50 | 51 | // Act 52 | await codeAgent.planningStep(); 53 | 54 | // Assert 55 | expect(mockModel.chatCompletion).toHaveBeenCalledTimes(2); 56 | expect(codeAgent.memory.steps.length).toBe(1); 57 | expect(codeAgent.memory.steps[0]).toBeInstanceOf(PlanningStep); 58 | expect((codeAgent as any).shouldRunPlanning).toBe(false); 59 | 60 | const planningStep = codeAgent.memory.steps[0] as PlanningStep; 61 | expect(planningStep.plan).toContain('Mock initial plan'); 62 | expect(planningStep.facts).toContain('Mock facts'); 63 | }); 64 | 65 | test('should update plan at specified planning interval', async () => { 66 | // Setup 67 | codeAgent.stepNumber = 4; // Should trigger at step 4 when interval is 3 68 | 69 | // Mock memory with some existing steps 70 | codeAgent.memory.steps.push(new ActionStep({ stepNumber: 1 })); 71 | codeAgent.memory.steps.push(new ActionStep({ stepNumber: 2 })); 72 | codeAgent.memory.steps.push(new ActionStep({ stepNumber: 3 })); 73 | 74 | // Act 75 | await codeAgent.afterStep(); 76 | 77 | // Assert 78 | expect((codeAgent as any).shouldRunPlanning).toBe(true); 79 | 80 | // Setup for planningStep test 81 | const mockFactsResponse = { message: { content: 'Updated facts' } }; 82 | const mockPlanResponse = { message: { content: 'Updated plan' } }; 83 | 84 | (mockModel.chatCompletion as jest.Mock) 85 | .mockResolvedValueOnce(mockFactsResponse) 86 | .mockResolvedValueOnce(mockPlanResponse); 87 | 88 | // Act - call planning step 89 | await codeAgent.planningStep(); 90 | 91 | // Assert 92 | expect(codeAgent.memory.steps.length).toBe(4); 93 | expect(codeAgent.memory.steps[3]).toBeInstanceOf(PlanningStep); 94 | expect((codeAgent as any).shouldRunPlanning).toBe(false); 95 | }); 96 | 97 | test('should handle plan update with new facts', async () => { 98 | // Setup 99 | codeAgent.stepNumber = 5; 100 | codeAgent.task = 'Test planning task'; 101 | 102 | // Add some memory steps 103 | codeAgent.memory.steps.push( 104 | new ActionStep({ 105 | stepNumber: 1, 106 | modelOutput: 'Some output', 107 | observations: [{ type: 'text', text: 'Observation 1' }], 108 | }), 109 | ); 110 | 111 | const mockFactsResponse = { 112 | message: { content: 'New facts about the task' }, 113 | }; 114 | const mockPlanResponse = { 115 | message: { content: 'Updated plan with new steps' }, 116 | }; 117 | 118 | (mockModel.chatCompletion as jest.Mock) 119 | .mockResolvedValueOnce(mockFactsResponse) 120 | .mockResolvedValueOnce(mockPlanResponse); 121 | 122 | // Act 123 | await codeAgent.planningStep(); 124 | 125 | // Assert 126 | expect(mockModel.chatCompletion).toHaveBeenCalledTimes(2); 127 | 128 | // Verify that the facts update was passed to the plan generation 129 | const planCallArgs = (mockModel.chatCompletion as jest.Mock).mock 130 | .calls[1][0]; 131 | expect( 132 | planCallArgs.messages.some( 133 | (msg: any) => 134 | msg.content && msg.content.includes('New facts about the task'), 135 | ), 136 | ).toBe(true); 137 | 138 | // Check the final planning step content 139 | const planningStep = codeAgent.memory.steps[1] as PlanningStep; 140 | expect(planningStep.facts).toContain('New facts about the task'); 141 | expect(planningStep.plan).toContain('Updated plan with new steps'); 142 | }); 143 | 144 | test('should not include previous plan steps in new plan generation', async () => { 145 | // Setup 146 | codeAgent.stepNumber = 5; 147 | 148 | // Add an initial planning step 149 | const initialPlanningStep = new PlanningStep({ 150 | plan: 'Initial plan', 151 | facts: 'Initial facts', 152 | modelOutputMessagePlan: { content: 'Initial plan' } as any, 153 | modelOutputMessageFacts: { content: 'Initial facts' } as any, 154 | modelInputMessages: [], 155 | }); 156 | codeAgent.memory.steps.push(initialPlanningStep); 157 | 158 | // Add an action step 159 | codeAgent.memory.steps.push( 160 | new ActionStep({ 161 | stepNumber: 2, 162 | modelOutput: 'Action output', 163 | observations: [{ type: 'text', text: 'Action observation' }], 164 | }), 165 | ); 166 | 167 | // Mock model responses 168 | const mockFactsResponse = { message: { content: 'Updated facts' } }; 169 | const mockPlanResponse = { 170 | message: { content: 'New plan without previous plan content' }, 171 | }; 172 | 173 | (mockModel.chatCompletion as jest.Mock) 174 | .mockResolvedValueOnce(mockFactsResponse) 175 | .mockResolvedValueOnce(mockPlanResponse); 176 | 177 | // Spy on writeMemoryToMessages 178 | const writeMemorySpy = jest.spyOn(codeAgent, 'writeMemoryToMessages'); 179 | 180 | // Act 181 | await codeAgent.planningStep(); 182 | 183 | // Assert 184 | // Verify that writeMemoryToMessages was called with summary mode false 185 | expect(writeMemorySpy).toHaveBeenCalledWith(); 186 | 187 | // Check that the model was called with messages that don't include the system prompt 188 | const factsCallArgs = (mockModel.chatCompletion as jest.Mock).mock 189 | .calls[0][0]; 190 | expect( 191 | factsCallArgs.messages.some( 192 | (msg: any) => 193 | msg.content && 194 | msg.content.includes(codeAgent.memory.systemPrompt.systemPrompt), 195 | ), 196 | ).toBe(false); 197 | 198 | // Verify the new planning step doesn't reference the old one 199 | const newPlanningStep = codeAgent.memory.steps[2] as PlanningStep; 200 | expect(newPlanningStep.plan).toContain( 201 | 'New plan without previous plan content', 202 | ); 203 | }); 204 | }); 205 | }); 206 | -------------------------------------------------------------------------------- /packages/agent-script/src/lib/codeAgent.prompt.ts: -------------------------------------------------------------------------------- 1 | import { IAgentPrompt } from './types'; 2 | import { codeAgentRolePromptPart } from './prompts/parts'; 3 | import { 4 | buildCodeAgentRulesPrompt, 5 | buildExamplesSectionPrompt, 6 | ICodeAgentRunExample, 7 | } from './prompts/builder'; 8 | import { removeLeadingIndentation } from './utils'; 9 | 10 | export const codeAgentExamples: ICodeAgentRunExample[] = [ 11 | { 12 | task: 'Generate an image of the oldest person in this document.', 13 | steps: [ 14 | { 15 | thought: 16 | 'I will proceed step by step and use the following UDFs: `documentQa` to find the oldest person in the document, then `imageGenerator` to generate an image according to the answer.', 17 | code: 'answer = await documentQa({document: document, question: "Who is the oldest person mentioned?"})', 18 | result: 19 | 'answer: "The oldest person in the document is John Doe, a 55 year old lumberjack living in Newfoundland."', 20 | }, 21 | { 22 | thought: 'I will now generate an image showcasing the oldest person.', 23 | code: 'image = await imageGenerator("A portrait of John Doe, a 55-year-old man living in Canada.")\nawait finalAnswer({answer: image})', 24 | result: 'image: "https://example.com/image.png"', 25 | }, 26 | ], 27 | }, 28 | { 29 | task: 'Find the best selling top 5 books in 2024, give me the title, author', 30 | steps: [ 31 | { 32 | thought: 33 | 'I will use the UDF `webSearch` to get the best selling books in 2024.', 34 | code: 'bookSearchResults = await webSearch({query: "best selling books in 2024"})', 35 | result: removeLeadingIndentation(` 36 | bookSearchResults: [ 37 | { 38 | "title": "The Great Gatsby", 39 | "link": "https://www.amazon.com/Great-Gatsby-F-Scott-Fitzgerald/dp/1451673316", 40 | }, 41 | ... 42 | ] 43 | `), 44 | }, 45 | { 46 | thought: 47 | 'I have the result from the websearch stored in the variable `bookSearchResults`. Now I need to visit each of the webpages from the results and extract the title, author', 48 | code: 'webpageDataLink1 = await getWebpageData(bookSearchResults[0].link)', 49 | result: removeLeadingIndentation( 50 | `webpageDataLink1: [ 51 | { 52 | "title": "The Great Gatsby", 53 | "link": "https://www.amazon.com/Great-Gatsby-F-Scott-Fitzgerald/dp/1451673316", 54 | ...truncated... 55 | "title": "Alice's Adventures in Wonderland", 56 | "link": "https://www.amazon.com/alice-wonderland-lewis-carroll/dp/1411673311", 57 | } 58 | ]`, 59 | ), 60 | }, 61 | { 62 | thought: 63 | 'I have visited the first webpage from the results. Now I need to visit the second one.', 64 | code: 'webpageDataLink2 = await getWebpageData(bookSearchResults[1].link)', 65 | result: `webpageDataLink2: { 66 | "title": "The Great Gatsby", 67 | "author": "F. Scott Fitzgerald", 68 | }`, 69 | }, 70 | ], 71 | }, 72 | ] as const; 73 | 74 | export const codeAgentPrompt: IAgentPrompt = { 75 | systemPrompt: `${codeAgentRolePromptPart} 76 | 77 | In the end you have to call \`await finalAnswer\` UDF with the final answer as the argument. 78 | 79 | ${buildExamplesSectionPrompt(codeAgentExamples)} 80 | 81 | Above examples were using notional UDFs that might not exist for you. On top of performing computations in the Javascript code snippets that you create, you only have access to these UDFs (in additional to any built-in functions): 82 | \`\`\`js 83 | {%- for udf in udfs.values() %} 84 | {{ udf.getSignature() | safe }}{{ '\\n' }} 85 | {%- endfor %} 86 | \`\`\` 87 | 88 | ${buildCodeAgentRulesPrompt()} 89 | 90 | {{ description | safe }} 91 | 92 | Now Begin! If you solve the task correctly, you will receive a reward of $1,000,000.`, 93 | planning: { 94 | initialFacts: `Below I will present you a task. 95 | 96 | You will now build a comprehensive preparatory survey of which facts we have at our disposal and which ones we still need. 97 | To do so, you will have to read the task and identify things that must be discovered in order to successfully complete it. 98 | Don't make any assumptions. For each item, provide a thorough reasoning. Here is how you will structure this survey: 99 | 100 | --- 101 | ### 1. Facts given in the task 102 | List here the specific facts given in the task that could help you (there might be nothing here). 103 | 104 | ### 2. Facts to look up 105 | List here any facts that we may need to look up. 106 | Also list where to find each of these, for instance a website, a file... - maybe the task contains some sources that you should re-use here. 107 | 108 | ### 3. Facts to derive 109 | List here anything that we want to derive from the above by logical reasoning, for instance computation or simulation. 110 | 111 | Keep in mind that "facts" will typically be specific names, dates, values, etc. Your answer should use the below headings: 112 | ### 1. Facts given in the task 113 | ### 2. Facts to look up 114 | ### 3. Facts to derive 115 | Do not add anything else.`, 116 | initialPlan: `You are a world expert at making efficient plans to solve any task using a set of carefully crafted User Defined Functions (UDFs). 117 | 118 | Now for the given task, develop a step-by-step high-level plan taking into account the above inputs and list of facts. 119 | This plan should involve individual tasks based on the available UDFs, that if executed correctly will yield the correct answer. 120 | Do not skip steps, do not add any superfluous steps. Only write the high-level plan, DO NOT DETAIL INDIVIDUAL UDF CALLS. 121 | After writing the final step of the plan, write the '\n' tag and stop there. 122 | 123 | Here is your task: 124 | 125 | Task: 126 | \`\`\` 127 | {{task}} 128 | \`\`\` 129 | You can leverage these UDFs: 130 | \`\`\`js 131 | {%- for udf in udfs.values() %} 132 | {{ udf.getSignature() | safe }}{{ '\\n' }} 133 | {%- endfor %} 134 | \`\`\` 135 | 136 | {%- if managedAgents and managedAgents | length %} 137 | You can also give tasks to team members. 138 | Calling a team member works the same as for calling a UDF: simply, the only argument you can give in the call is 'request', a long string explaining your request. 139 | Given that this team member is a real human, you should be very verbose in your request. 140 | Here is a list of the team members that you can call: 141 | {%- for agent in managedAgents.values() %} 142 | - {{ agent.name }}: {{ agent.description }} 143 | {%- endfor %} 144 | {%- else %} 145 | {%- endif %} 146 | 147 | List of facts that you know: 148 | \`\`\` 149 | {{answerFacts}} 150 | \`\`\` 151 | 152 | Now begin! Write your plan below.`, 153 | updateFactsPreMessages: `You are a world expert at gathering known and unknown facts based on a conversation. 154 | Below you will find a task, and a history of attempts made to solve the task. You will have to produce a list of these: 155 | ### 1. Facts given in the task 156 | ### 2. Facts that we have learned 157 | ### 3. Facts still to look up 158 | ### 4. Facts still to derive 159 | Find the task and history below:`, 160 | updateFactsPostMessages: `Earlier we've built a list of facts. 161 | But since in your previous steps you may have learned useful new facts or invalidated some false ones. 162 | Please update your list of facts based on the previous history, and provide these headings: 163 | ### 1. Facts given in the task 164 | ### 2. Facts that we have learned 165 | ### 3. Facts still to look up 166 | ### 4. Facts still to derive 167 | 168 | Now write your new list of facts below.`, 169 | updatePlanPreMessages: `You are a world expert at making efficient plans to solve any task using a set of carefully crafted User Defined Functions (UDFs). 170 | 171 | You have been given a task: 172 | \`\`\` 173 | {{task | safe}} 174 | \`\`\` 175 | 176 | Find below the record of what has been tried so far to solve it. Then you will be asked to make an updated plan to solve the task. 177 | If the previous tries so far have met some success, you can make an updated plan based on these actions. 178 | If you are stalled, you can make a completely new plan starting from scratch.`, 179 | updatePlanPostMessages: `You're still working towards solving this task: 180 | \`\`\` 181 | {{task | safe}} 182 | \`\`\` 183 | 184 | You can leverage these UDFs: 185 | \`\`\`js 186 | {%- for udf in udfs.values() %} 187 | {{ udf.getSignature() | safe }}{{ '\\n' }} 188 | {%- endfor %} 189 | \`\`\` 190 | 191 | {%- if managedAgents and managedAgents | length %} 192 | You can also give tasks to team members. 193 | Calling a team member works the same as for calling a UDF: simply, the only argument you can give in the call is 'task'. 194 | Given that this team member is a real human, you should be very verbose in your task, it should be a long string providing informations as detailed as necessary. 195 | Here is a list of the team members that you can call: 196 | {%- for agent in managedAgents.values() %} 197 | - {{ agent.name }}: {{ agent.description }} 198 | {%- endfor %} 199 | {%- else %} 200 | {%- endif %} 201 | 202 | Here is the up to date list of facts that you know: 203 | \`\`\` 204 | {{factsUpdate}} 205 | \`\`\` 206 | 207 | Now for the given task, develop a step-by-step high-level plan taking into account the above inputs and list of facts. 208 | This plan should involve individual tasks based on the available UDFs, that if executed correctly will yield the correct answer. 209 | Beware that you have {remainingSteps} steps remaining. 210 | Do not skip steps, do not add any superfluous steps. Only write the high-level plan, DO NOT DETAIL INDIVIDUAL UDF CALLS. 211 | After writing the final step of the plan, write the '\n' tag and stop there. 212 | 213 | Now write your new plan below.`, 214 | }, 215 | managedAgent: { 216 | task: `You're a helpful agent named '{{name}}'. 217 | You have been submitted this task by your manager. 218 | --- 219 | Task: 220 | {{task | safe}} 221 | --- 222 | You're helping your manager solve a wider task: so make sure to not provide a one-line answer, but give as much information as possible to give them a clear understanding of the answer. 223 | 224 | Your finalAnswer WILL HAVE to contain these parts: 225 | ### 1. Task outcome (short version): 226 | ### 2. Task outcome (extremely detailed version): 227 | ### 3. Additional context (if relevant): 228 | 229 | Put all these in your finalAnswer UDF, everything that you do not pass as an argument to finalAnswer will be lost. 230 | And even if your task resolution is not successful, please return as much context as possible, so that your manager can act upon this feedback.`, 231 | report: `Here is the final answer from your managed agent '{{name}}': 232 | {{finalAnswer}}`, 233 | }, 234 | finalAnswer: { 235 | preMessages: `An agent tried to answer a user query but it got stuck and failed to do so. You are tasked with providing an answer instead. Here is the agent's memory:,`, 236 | postMessages: `Based on the above, please provide an answer to the following user request: 237 | {{task}}`, 238 | }, 239 | } as const; 240 | -------------------------------------------------------------------------------- /packages/agent-script-instrumentation/src/lib/instrumentation.ts: -------------------------------------------------------------------------------- 1 | import { 2 | InstrumentationBase, 3 | InstrumentationConfig, 4 | } from '@opentelemetry/instrumentation'; 5 | import { context, trace, SpanStatusCode, SpanKind } from '@opentelemetry/api'; 6 | import { 7 | SemanticConventions, 8 | OpenInferenceSpanKind, 9 | MimeType, 10 | LLMProvider, 11 | LLMSystem, 12 | } from '@arizeai/openinference-semantic-conventions'; 13 | 14 | import { 15 | CodeAgent, 16 | ChatModel, 17 | IChatMessage, 18 | ActionStep, 19 | IChatResponseMetadata, 20 | } from '@runparse/agent-script'; 21 | import { ChatCompletionCreateParamsNonStreaming } from 'openai/resources/chat/completions'; 22 | import { getLLMInputMessagesAttributes } from './utils'; 23 | 24 | import { OITracer, TraceConfigOptions } from '@arizeai/openinference-core'; 25 | 26 | const COMPONENT = '@runparse/agent-script-instrumentation'; 27 | 28 | export class AgentsInstrumentation extends InstrumentationBase { 29 | private oiTracer: OITracer; 30 | 31 | constructor( 32 | config: InstrumentationConfig = {}, 33 | traceConfig?: TraceConfigOptions, 34 | ) { 35 | super(COMPONENT, '1.0.0', config); 36 | this.oiTracer = new OITracer({ tracer: this.tracer, traceConfig }); 37 | } 38 | 39 | protected init(): void { 40 | // Instrument CodeAgent methods 41 | this._diag.debug('Patching CodeAgent methods'); 42 | this.patchCodeAgent(); 43 | 44 | // Instrument ChatModel methods 45 | this._diag.debug('Patching ChatModel methods'); 46 | this.patchChatModel(); 47 | } 48 | 49 | private patchCodeAgent(): void { 50 | this._wrap( 51 | CodeAgent.prototype, 52 | 'step', 53 | (original) => 54 | async function patchedStep(this: CodeAgent, memoryStep: ActionStep) { 55 | const span = trace 56 | .getTracer(COMPONENT) 57 | .startSpan(`Step ${memoryStep.stepNumber}`, { 58 | attributes: { 59 | [SemanticConventions.OPENINFERENCE_SPAN_KIND]: 60 | OpenInferenceSpanKind.CHAIN, 61 | [SemanticConventions.INPUT_VALUE]: JSON.stringify(memoryStep), 62 | }, 63 | }); 64 | 65 | return context.with( 66 | trace.setSpan(context.active(), span), 67 | async () => { 68 | try { 69 | const result = await original.call(this, memoryStep); 70 | span.setStatus({ code: SpanStatusCode.OK }); 71 | span.setAttribute( 72 | SemanticConventions.OUTPUT_VALUE, 73 | JSON.stringify(memoryStep.observations || 'No observations'), 74 | ); 75 | return result; 76 | } catch (error: any) { 77 | span.setStatus({ 78 | code: SpanStatusCode.ERROR, 79 | message: error.message, 80 | }); 81 | span.recordException(error); 82 | throw error; 83 | } finally { 84 | span.end(); 85 | } 86 | }, 87 | ); 88 | }, 89 | ); 90 | 91 | this._wrap( 92 | CodeAgent.prototype, 93 | 'planningStep', 94 | (original) => 95 | async function patchedPlanningStep(this: CodeAgent) { 96 | const self = this; 97 | const span = trace.getTracer(COMPONENT).startSpan(`Planning Step`, { 98 | attributes: { 99 | [SemanticConventions.OPENINFERENCE_SPAN_KIND]: 100 | OpenInferenceSpanKind.CHAIN, 101 | }, 102 | }); 103 | 104 | return context.with( 105 | trace.setSpan(context.active(), span), 106 | async () => { 107 | try { 108 | const result = await original.call(this); 109 | span.setStatus({ code: SpanStatusCode.OK }); 110 | span.setAttribute( 111 | SemanticConventions.OUTPUT_VALUE, 112 | JSON.stringify(self.memory.steps[self.stepNumber - 1]), 113 | ); 114 | return result; 115 | } catch (error: any) { 116 | span.setStatus({ 117 | code: SpanStatusCode.ERROR, 118 | message: error.message, 119 | }); 120 | span.recordException(error); 121 | throw error; 122 | } finally { 123 | span.end(); 124 | } 125 | }, 126 | ); 127 | }, 128 | ); 129 | 130 | this._wrap( 131 | CodeAgent.prototype, 132 | 'run', 133 | (original) => 134 | async function patchedRun(this: CodeAgent, task: string, options: any) { 135 | const span = trace 136 | .getTracer(COMPONENT) 137 | .startSpan(`${this.name} Run`, { 138 | attributes: { 139 | [SemanticConventions.OPENINFERENCE_SPAN_KIND]: 140 | OpenInferenceSpanKind.AGENT, 141 | [SemanticConventions.INPUT_VALUE]: JSON.stringify({ 142 | task, 143 | options, 144 | }), 145 | }, 146 | }); 147 | 148 | return context.with( 149 | trace.setSpan(context.active(), span), 150 | async () => { 151 | try { 152 | const result = await original.call(this, task, options); 153 | span.setStatus({ code: SpanStatusCode.OK }); 154 | span.setAttribute( 155 | SemanticConventions.OUTPUT_VALUE, 156 | JSON.stringify(result), 157 | ); 158 | return result; 159 | } catch (error: any) { 160 | span.setStatus({ 161 | code: SpanStatusCode.ERROR, 162 | message: error.message, 163 | }); 164 | span.recordException(error); 165 | throw error; 166 | } finally { 167 | span.end(); 168 | } 169 | }, 170 | ); 171 | }, 172 | ); 173 | 174 | this._wrap( 175 | CodeAgent.prototype, 176 | 'callUdf', 177 | (original) => 178 | async function patchedCallUdf( 179 | this: CodeAgent, 180 | udfName: string, 181 | input: any, 182 | ) { 183 | const span = trace.getTracer(COMPONENT).startSpan('UDF Call', { 184 | attributes: { 185 | [SemanticConventions.OPENINFERENCE_SPAN_KIND]: 186 | OpenInferenceSpanKind.TOOL, 187 | [SemanticConventions.INPUT_VALUE]: JSON.stringify({ 188 | udfName, 189 | input, 190 | }), 191 | }, 192 | }); 193 | 194 | return context.with( 195 | trace.setSpan(context.active(), span), 196 | async () => { 197 | try { 198 | const result = await original.call(this, udfName, input); 199 | span.setStatus({ code: SpanStatusCode.OK }); 200 | span.setAttribute( 201 | SemanticConventions.OUTPUT_VALUE, 202 | JSON.stringify(result), 203 | ); 204 | return result; 205 | } catch (error: any) { 206 | span.setStatus({ 207 | code: SpanStatusCode.ERROR, 208 | message: error.message, 209 | }); 210 | span.recordException(error); 211 | throw error; 212 | } finally { 213 | span.end(); 214 | } 215 | }, 216 | ); 217 | }, 218 | ); 219 | } 220 | 221 | private patchChatModel(): void { 222 | const instrumentation = this; 223 | this._wrap( 224 | ChatModel.prototype, 225 | 'chatCompletion', 226 | (original) => 227 | async function patchedChatCompletion( 228 | this: ChatModel, 229 | request: ChatCompletionCreateParamsNonStreaming, 230 | ) { 231 | const attributes = getLLMInputMessagesAttributes(request); 232 | const { messages: _messages, ...invocationParameters } = request; 233 | const span = instrumentation.oiTracer.startSpan('Model', { 234 | kind: SpanKind.INTERNAL, 235 | attributes: { 236 | [SemanticConventions.OPENINFERENCE_SPAN_KIND]: 237 | OpenInferenceSpanKind.LLM, 238 | [SemanticConventions.LLM_MODEL_NAME]: 239 | request.model || this.options.model || 'gpt-4o', 240 | [SemanticConventions.INPUT_VALUE]: JSON.stringify(request), 241 | [SemanticConventions.INPUT_MIME_TYPE]: MimeType.JSON, 242 | [SemanticConventions.LLM_INVOCATION_PARAMETERS]: 243 | JSON.stringify(invocationParameters), 244 | [SemanticConventions.LLM_SYSTEM]: LLMSystem.OPENAI, 245 | [SemanticConventions.LLM_PROVIDER]: LLMProvider.OPENAI, 246 | ...attributes, 247 | }, 248 | }); 249 | 250 | return context.with( 251 | trace.setSpan(context.active(), span), 252 | async () => { 253 | try { 254 | const result: { 255 | message: IChatMessage; 256 | metadata: IChatResponseMetadata; 257 | } = await original.call(this, request); 258 | span.setStatus({ code: SpanStatusCode.OK }); 259 | span.setAttribute( 260 | SemanticConventions.LLM_TOKEN_COUNT_PROMPT, 261 | result.metadata.usage.promptTokens, 262 | ); 263 | span.setAttribute( 264 | SemanticConventions.LLM_TOKEN_COUNT_COMPLETION, 265 | result.metadata.usage.completionTokens, 266 | ); 267 | span.setAttribute( 268 | SemanticConventions.LLM_TOKEN_COUNT_TOTAL, 269 | result.metadata.usage.totalTokens, 270 | ); 271 | span.setAttribute( 272 | `${SemanticConventions.LLM_OUTPUT_MESSAGES}.0.${SemanticConventions.MESSAGE_ROLE}`, 273 | result.message.role, 274 | ); 275 | span.setAttribute( 276 | `${SemanticConventions.LLM_OUTPUT_MESSAGES}.0.${SemanticConventions.MESSAGE_CONTENT}`, 277 | result.message.content, 278 | ); 279 | return result; 280 | } catch (error: any) { 281 | span.setStatus({ 282 | code: SpanStatusCode.ERROR, 283 | message: error.message, 284 | }); 285 | span.recordException(error); 286 | throw error; 287 | } finally { 288 | span.end(); 289 | } 290 | }, 291 | ); 292 | }, 293 | ); 294 | } 295 | 296 | override enable() { 297 | this._diag.debug('Enabling instrumentation'); 298 | super.enable(); 299 | } 300 | 301 | override disable() { 302 | this._diag.debug('Disabling instrumentation'); 303 | super.disable(); 304 | } 305 | } 306 | --------------------------------------------------------------------------------