├── .editorconfig
├── .eslintrc.json
├── .gitattributes
├── .github
    └── workflows
    │   ├── publish.yml
    │   └── test.yml
├── .gitignore
├── .husky
    └── pre-commit
├── .prettierrc
├── LICENSE
├── README.md
├── jest.config.js
├── package.json
├── patches
    └── @anthropic-ai__sdk@0.16.1.patch
├── playground.ts
├── pnpm-lock.yaml
├── src
    ├── config.ts
    ├── index.ts
    ├── models
    │   ├── anthropic-bedrock.ts
    │   ├── anthropic.ts
    │   ├── errors.ts
    │   ├── groq.ts
    │   ├── interface.ts
    │   ├── openai.mock.ts
    │   ├── openai.ts
    │   └── tokenizer.ts
    ├── types.ts
    └── utils.ts
└── tsconfig.json


/.editorconfig:
--------------------------------------------------------------------------------
 1 | root = true
 2 | 
 3 | [*]
 4 | end_of_line = lf
 5 | insert_final_newline = true
 6 | 
 7 | [*.{js,json,yml}]
 8 | charset = utf-8
 9 | indent_style = space
10 | indent_size = 2
11 | 


--------------------------------------------------------------------------------
/.eslintrc.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "root": true,
 3 |   "plugins": ["prettier", "@typescript-eslint"],
 4 |   "extends": [
 5 |     "universe/node",
 6 |     "plugin:prettier/recommended",
 7 |     "plugin:import/recommended",
 8 |     "plugin:import/typescript",
 9 |     "plugin:@typescript-eslint/recommended"
10 |   ],
11 |   "parser": "@typescript-eslint/parser",
12 |   "parserOptions": {
13 |     "ecmaVersion": 2020,
14 |     "sourceType": "module"
15 |   },
16 |   "settings": {
17 |     "import/extensions": [".ts", ".tsx", ".js", ".jsx"],
18 |     "import/resolver": {
19 |       "typescript": {
20 |         "project": "tsconfig.json"
21 |       }
22 |     }
23 |   },
24 |   "rules": {
25 |     "@typescript-eslint/no-explicit-any": "off",
26 |     "@typescript-eslint/ban-ts-comment": "off",
27 |     "@typescript-eslint/no-empty-interface": "off",
28 |     "no-shadow": "off",
29 |     "no-console": ["error", { "allow": ["warn", "error", "info"] }],
30 |     "react/react-in-jsx-scope": "off",
31 |     "react/jsx-props-no-spreading": "off",
32 |     "jsx-a11y/anchor-is-valid": "off",
33 |     "jsx-a11y/alt-text": "off",
34 |     "jsx-a11y/click-events-have-key-events": "off",
35 |     "jsx-a11y/no-static-element-interactions": "off",
36 |     "jsx-a11y/interactive-supports-focus": "off",
37 |     "react/require-default-props": "off",
38 |     "no-param-reassign": "off",
39 |     "import/no-anonymous-default-export": "off",
40 |     "complexity": ["warn", 40],
41 |     "import/no-named-as-default": "off",
42 |     "import/no-named-as-default-member": "off",
43 |     "import/order": [
44 |       "error",
45 |       {
46 |         "newlines-between": "always",
47 |         "groups": [
48 |           ["builtin", "external"],
49 |           "internal",
50 |           "parent",
51 |           "sibling",
52 |           "index"
53 |         ],
54 |         "pathGroups": [
55 |           {
56 |             "pattern": "~/**",
57 |             "group": "parent",
58 |             "position": "before"
59 |           }
60 |         ],
61 |         "pathGroupsExcludedImportTypes": ["react"],
62 |         "alphabetize": { "order": "asc", "caseInsensitive": true }
63 |       }
64 |     ]
65 |   }
66 | }
67 | 


--------------------------------------------------------------------------------
/.gitattributes:
--------------------------------------------------------------------------------
1 | /.yarn/**            linguist-vendored
2 | /.yarn/releases/*    binary
3 | /.yarn/plugins/**/*  binary
4 | /.pnp.*              binary linguist-generated
5 | 


--------------------------------------------------------------------------------
/.github/workflows/publish.yml:
--------------------------------------------------------------------------------
 1 | name: Release & Publish
 2 | 
 3 | on:
 4 |   push:
 5 |     tags:
 6 |       - 'v*'
 7 | 
 8 | jobs:
 9 |   release:
10 |     runs-on: ubuntu-latest
11 |     steps:
12 |       - name: Checkout code
13 |         uses: actions/checkout@v2
14 | 
15 |   publish:
16 |     runs-on: ubuntu-latest
17 |     steps:
18 |       - uses: actions/checkout@v2
19 |       - name: Setup .npmrc file to publish to npm
20 |         uses: actions/setup-node@v2
21 |         with:
22 |           node-version: '18.x'
23 |           registry-url: 'https://registry.npmjs.org'
24 |       - name: Setup pnpm
25 |         uses: pnpm/action-setup@v3 # docs https://pnpm.io/continuous-integration#github-actions
26 |         with:
27 |           version: 8 # Optional: specify a pnpm version
28 |       - name: Install modules
29 |         run: pnpm install
30 |       - name: Build
31 |         run: pnpm build
32 |       - name: Publish to npm
33 |         run: npm publish --access public
34 |         env:
35 |           NODE_AUTH_TOKEN: ${{ secrets.NPM_TOKEN }}
36 | 


--------------------------------------------------------------------------------
/.github/workflows/test.yml:
--------------------------------------------------------------------------------
 1 | name: Test
 2 | 
 3 | on:
 4 |   push:
 5 |     branches: [main, staging]
 6 |   pull_request:
 7 |     branches: [main, staging]
 8 | 
 9 | jobs:
10 |   test:
11 |     runs-on: ${{ matrix.os }}
12 |     strategy:
13 |       matrix:
14 |         os:
15 |           - ubuntu-latest
16 |           - windows-latest
17 |         node: [16.x, 18.x]
18 | 
19 |     steps:
20 |       - uses: actions/checkout@v2
21 |       - name: Set up Node.js
22 |         uses: actions/setup-node@v2
23 |         with:
24 |           node-version: ${{ matrix.node }}
25 |       - name: Setup pnpm
26 |         uses: pnpm/action-setup@v3 # docs https://pnpm.io/continuous-integration#github-actions
27 |         with:
28 |           version: 8 # Optional: specify a pnpm version
29 |       - name: Install modules
30 |         run: pnpm install
31 |       - name: Run tests
32 |         run: pnpm test
33 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | .yarn/*
 2 | !.yarn/patches
 3 | !.yarn/plugins
 4 | !.yarn/releases
 5 | !.yarn/sdks
 6 | !.yarn/versions
 7 | 
 8 | 
 9 | # Swap the comments on the following lines if you don't wish to use zero-installs
10 | # Documentation here: https://yarnpkg.com/features/zero-installs
11 | #!.yarn/cache
12 | #.pnp.*
13 | 
14 | node_modules/
15 | dist/
16 | npm-debug.*
17 | .DS_Store
18 | 


--------------------------------------------------------------------------------
/.husky/pre-commit:
--------------------------------------------------------------------------------
1 | npx lint-staged
2 | 


--------------------------------------------------------------------------------
/.prettierrc:
--------------------------------------------------------------------------------
1 | {
2 |   "printWidth": 80,
3 |   "tabWidth": 2,
4 |   "trailingComma": "all",
5 |   "semi": true,
6 |   "singleQuote": true
7 | }
8 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2023 David Zhang
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # ✨ LLM API
  2 | 
  3 | [![test](https://github.com/dzhng/llm-api/actions/workflows/test.yml/badge.svg?branch=main&event=push)](https://github.com/dzhng/llm-api/actions/workflows/test.yml)
  4 | 
  5 | Fully typed chat APIs for OpenAI, Anthropic, and Azure's chat models for browser, edge, and node environments.
  6 | 
  7 | - [Introduction](#-introduction)
  8 | - [Usage](#-usage)
  9 | - [Azure](#-azure)
 10 | - [Anthropic](#-anthropic)
 11 | - [Groq](#-groq)
 12 | - [Amazon Bedrock](#-amazon-bedrock)
 13 | - [Debugging](#-debugging)
 14 | 
 15 | ## 👋 Introduction
 16 | 
 17 | - Clean interface for text and chat completion for OpenAI, Anthropic, and Azure models
 18 | - Catch token overflow errors automatically on the client side
 19 | - Handle rate limit and any other API errors as gracefully as possible (e.g. exponential backoff for rate-limit)
 20 | - Support for browser, edge, and node environments
 21 | - Works great with [zod-gpt](https://github.com/dzhng/zod-gpt) for outputting structured data
 22 | 
 23 | ```typescript
 24 | import { OpenAIChatApi } from 'llm-api';
 25 | 
 26 | const openai = new OpenAIChatApi({ apiKey: 'YOUR_OPENAI_KEY' });
 27 | 
 28 | const resText = await openai.textCompletion('Hello');
 29 | 
 30 | const resChat = await openai.chatCompletion({
 31 |   role: 'user',
 32 |   content: 'Hello world',
 33 | });
 34 | ```
 35 | 
 36 | ## 🔨 Usage
 37 | 
 38 | ### Install
 39 | 
 40 | This package is hosted on npm:
 41 | 
 42 | ```
 43 | npm i llm-api
 44 | ```
 45 | 
 46 | ```
 47 | yarn add llm-api
 48 | ```
 49 | 
 50 | ### Model Config
 51 | 
 52 | To configure a new model endpoint:
 53 | 
 54 | ```typescript
 55 | const openai = new OpenAIChatApi(params: OpenAIConfig, config: ModelConfig);
 56 | ```
 57 | 
 58 | These model config map to OpenAI's config directly, see doc:
 59 | https://platform.openai.com/docs/api-reference/chat/create
 60 | 
 61 | ```typescript
 62 | interface ModelConfig {
 63 |   model?: string;
 64 |   contextSize?: number;
 65 |   maxTokens?: number;
 66 |   temperature?: number;
 67 |   topP?: number;
 68 |   stop?: string | string[];
 69 |   presencePenalty?: number;
 70 |   frequencyPenalty?: number;
 71 |   logitBias?: Record<string, number>;
 72 |   user?: string;
 73 | 
 74 |   // use stream mode for API response, the streamed tokens will be sent to `events in `ModelRequestOptions`
 75 |   stream?: boolean;
 76 | }
 77 | ```
 78 | 
 79 | ### Request
 80 | 
 81 | To send a completion request to a model:
 82 | 
 83 | ```typescript
 84 | const text: ModelResponse = await openai.textCompletion(api: CompletionApi, prompt: string, options: ModelRequestOptions);
 85 | 
 86 | const completion: ModelResponse = await openai.chatCompletion(api: CompletionApi, messages: ChatCompletionRequestMessage, options: ModelRequestOptions);
 87 | 
 88 | // respond to existing chat session, preserving the past messages
 89 | const response: ModelResponse = await completion.respond(message: ChatCompletionRequestMessage, options: ModelRequestOptions);
 90 | ```
 91 | 
 92 | **options**
 93 | You can override the default request options via this parameter. A request will automatically be retried if there is a ratelimit or server error.
 94 | 
 95 | ```typescript
 96 | type ModelRequestOptions = {
 97 |   // set to automatically add system message (only relevant when using textCompletion)
 98 |   systemMessage?: string | (() => string);
 99 | 
100 |   // send a prefix to the model response so the model can continue generating from there, useful for steering the model towards certain output structures.
101 |   // the response prefix WILL be appended to the model response.
102 |   // for Anthropic's models ONLY
103 |   responsePrefix?: string;
104 | 
105 |   // function related parameters are for OpenAI's models ONLY
106 |   functions?: ModelFunction[];
107 |   // force the model to call the following function
108 |   callFunction?: string;
109 | 
110 |   // default: 3
111 |   retries?: number;
112 |   // default: 30s
113 |   retryInterval?: number;
114 |   // default: 60s
115 |   timeout?: number;
116 | 
117 |   // the minimum amount of tokens to allocate for the response. if the request is predicted to not have enough tokens, it will automatically throw a 'TokenError' without sending the request
118 |   // default: 200
119 |   minimumResponseTokens?: number;
120 | 
121 |   // the maximum amount of tokens to use for response
122 |   // NOTE: in OpenAI models, setting this option also requires contextSize in ModelConfig to be set
123 |   maximumResponseTokens?: number;
124 | };
125 | ```
126 | 
127 | ### Response
128 | 
129 | Completion responses are in the following format:
130 | 
131 | ```typescript
132 | interface ModelResponse {
133 |   content?: string;
134 | 
135 |   // used to parse function responses
136 |   name?: string;
137 |   arguments?: JsonValue;
138 | 
139 |   usage?: {
140 |     promptTokens: number;
141 |     completionTokens: number;
142 |     totalTokens: number;
143 |   };
144 | 
145 |   // function to send another message in the same 'chat', this will automatically append a new message to the messages array
146 |   respond: (
147 |     message: ChatCompletionRequestMessage,
148 |     opt?: ModelRequestOptions,
149 |   ) => Promise<ModelResponse>;
150 | }
151 | ```
152 | 
153 | ### 📃 Token Errors
154 | 
155 | A common error with LLM APIs is token usage - you are only allowed to fit a certain amount of data in the context window.
156 | 
157 | If you set a `contextSize` key, `llm-api` will automatically determine if the request will breach the token limit BEFORE sending the actual request to the model provider (e.g. OpenAI). This will save one network round-trip call and let you handle these type of errors in a responsive manner.
158 | 
159 | ```typescript
160 | const openai = new OpenAIChatApi(
161 |   { apiKey: 'YOUR_OPENAI_KEY' },
162 |   { model: 'gpt-4-0613', contextSize: 8129 },
163 | );
164 | 
165 | try {
166 |   const res = await openai.textCompletion(...);
167 | } catch (e) {
168 |   if (e instanceof TokenError) {
169 |     // handle token errors...
170 |   }
171 | }
172 | ```
173 | 
174 | ## 🔷 Azure
175 | 
176 | `llm-api` also comes with support for Azure's OpenAI models. The Azure version is usually much faster and more reliable than OpenAI's own API endpoints. In order to use the Azure endpoints, you must include 2 Azure specific options when initializing the OpenAI model, `azureDeployment` and `azureEndpoint`. The `apiKey` field will also now be used for the Azure API key.
177 | 
178 | You can find the Azure API key and endpoint in the [Azure Portal](https://portal.azure.com/). The Azure Deployment must be created under the [Azure AI Portal](https://oai.azure.com/).
179 | 
180 | Note that the `model` parameter in `ModelConfig` will be ignored when using Azure. This is because in the Azure system, the `model` is selected on deployment creation, not on run time.
181 | 
182 | ```typescript
183 | const openai = new OpenAIChatApi({
184 |   apiKey: 'AZURE_OPENAI_KEY',
185 |   azureDeployment: 'AZURE_DEPLOYMENT_NAME',
186 |   azureEndpoint: 'AZURE_ENDPOINT',
187 | 
188 |   // optional, defaults to 2023-06-01-preview
189 |   azureApiVersion: 'YYYY-MM-DD',
190 | });
191 | ```
192 | 
193 | ## 🔶 Anthropic
194 | 
195 | Anthropic's models have the unique advantage of a large 100k context window and extremely fast performance. If no explicit model is specified, `llm-api` will default to the Claude Sonnet model.
196 | 
197 | ```typescript
198 | const anthropic = new AnthropicChatApi(params: AnthropicConfig, config: ModelConfig);
199 | ```
200 | 
201 | ## 🔶 Groq
202 | 
203 | Groq is a new LLM inference provider that provides the fastest inference speed on the market. They currently support Meta's Llama 2 and Mistral's Mixtral models.
204 | 
205 | ```typescript
206 | const groq = new GroqChatApi(params: GroqConfig, config: ModelConfig);
207 | ```
208 | 
209 | ## ❖ Amazon Bedrock
210 | 
211 | ```typescript
212 | const conf = {
213 |   accessKeyId: 'AWS_ACCESS_KEY',
214 |   secretAccessKey: 'AWS_SECRET_KEY',
215 | };
216 | 
217 | const bedrock = new AnthropicBedrockChatApi(params: BedrockConfig, config: ModelConfig);
218 | ```
219 | 
220 | ## 🤓 Debugging
221 | 
222 | `llm-api` usese the `debug` module for logging & error messages. To run in debug mode, set the `DEBUG` env variable:
223 | 
224 | `DEBUG=llm-api:* yarn playground`
225 | 
226 | You can also specify different logging types via:
227 | 
228 | `DEBUG=llm-api:error yarn playground`
229 | `DEBUG=llm-api:log yarn playground`
230 | 


--------------------------------------------------------------------------------
/jest.config.js:
--------------------------------------------------------------------------------
 1 | module.exports = {
 2 |   preset: 'ts-jest',
 3 |   testEnvironment: 'node',
 4 |   clearMocks: true,
 5 |   roots: ['<rootDir>/src'],
 6 |   modulePaths: ['<rootDir>/src'],
 7 |   testRegex: '(/__tests__/.*|(\\.|/)(test))\\.tsx?$',
 8 |   moduleFileExtensions: ['ts', 'tsx', 'js', 'jsx', 'json', 'node'],
 9 |   reporters: ['default'],
10 |   globals: {
11 |     // we must specify a custom tsconfig for tests because we need the typescript transform
12 |     // to transform jsx into js rather than leaving it jsx such as the next build requires.  you
13 |     // can see this setting in tsconfig.jest.json -> "jsx": "react"
14 |     'ts-jest': {
15 |       tsconfig: 'tsconfig.json',
16 | 
17 |       // set isolatedModules to fix jest memory leak with ts include directories
18 |       // https://github.com/kulshekhar/ts-jest/issues/1967
19 |       isolatedModules: true,
20 |     },
21 | 
22 |     // disable types from preventing tests from running
23 |     // https://github.com/kulshekhar/ts-jest/issues/822
24 |     diagnostics: {
25 |       exclude: ['!**/*.(spec|test).ts?(x)'],
26 |     },
27 |   },
28 | };
29 | 


--------------------------------------------------------------------------------
/package.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "name": "llm-api",
 3 |   "description": "Fully typed chat APIs for OpenAI and Azure's chat models - with token checking and retries",
 4 |   "version": "1.6.0",
 5 |   "main": "dist/src/index.js",
 6 |   "types": "dist/src/index.d.ts",
 7 |   "publishConfig": {
 8 |     "access": "public"
 9 |   },
10 |   "files": [
11 |     "dist"
12 |   ],
13 |   "keywords": [
14 |     "typescript",
15 |     "gpt",
16 |     "chatgpt",
17 |     "llama",
18 |     "llm",
19 |     "ai",
20 |     "ml",
21 |     "prompt",
22 |     "prompt engineering",
23 |     "openai"
24 |   ],
25 |   "author": "David Zhang <david@aomni.com>",
26 |   "license": "MIT",
27 |   "homepage": "https://github.com/dzhng/llm-api",
28 |   "repository": {
29 |     "type": "git",
30 |     "url": "git+ssh://git@github.com/dzhng/llm-api.git"
31 |   },
32 |   "bugs": {
33 |     "url": "https://github.com/dzhng/llm-api/issues"
34 |   },
35 |   "scripts": {
36 |     "setup": "husky install",
37 |     "build": "tsc --build --pretty",
38 |     "lint": "eslint src --ext ts,tsx,js,jsx --ignore-path .gitignore --fix",
39 |     "test": "jest --passWithNoTests",
40 |     "test:update": "jest -u --passWithNoTests",
41 |     "playground": "tsx playground"
42 |   },
43 |   "dependencies": {
44 |     "@anthropic-ai/sdk": "^0.20.7",
45 |     "@aws-sdk/client-bedrock-runtime": "^3.427.0",
46 |     "debug": "^4.3.4",
47 |     "groq-sdk": "^0.3.2",
48 |     "js-tiktoken": "^1.0.10",
49 |     "jsonic": "^1.0.1",
50 |     "jsonrepair": "^3.6.0",
51 |     "lodash": "^4.17.21",
52 |     "openai": "^4.38.5",
53 |     "tsx": "^4.7.1",
54 |     "type-fest": "^4.11.0"
55 |   },
56 |   "devDependencies": {
57 |     "@types/debug": "^4.1.10",
58 |     "@types/jest": "^29.5.7",
59 |     "@types/jsonic": "^0.3.2",
60 |     "@types/lodash": "^4.14.200",
61 |     "eslint": "^8.53.0",
62 |     "eslint-config-prettier": "^9.1.0",
63 |     "eslint-config-universe": "^12.0.0",
64 |     "eslint-import-resolver-typescript": "^3.3.0",
65 |     "eslint-plugin-import": "^2.26.0",
66 |     "eslint-plugin-prettier": "^5.1.3",
67 |     "husky": "^9.0.11",
68 |     "jest": "^29.7.0",
69 |     "lint-staged": "^15.0.2",
70 |     "prettier": "^3.2.5",
71 |     "ts-jest": "^29.1.1",
72 |     "typescript": "^5.2.2"
73 |   },
74 |   "lint-staged": {
75 |     "*.{js,jsx,ts,tsx}": [
76 |       "eslint --ext ts,tsx,js,jsx --fix --ignore-path .gitignore ",
77 |       "prettier --write"
78 |     ],
79 |     "*.{json,md,css,scss}": [
80 |       "prettier --write"
81 |     ]
82 |   }
83 | }
84 | 


--------------------------------------------------------------------------------
/patches/@anthropic-ai__sdk@0.16.1.patch:
--------------------------------------------------------------------------------
 1 | diff --git a/streaming.js b/streaming.js
 2 | index 205f39eacf1a72de3c7c98931b29fd5ed5564b93..d6ab811bcc8b620faaa5b02053381255fb7cd5db 100644
 3 | --- a/streaming.js
 4 | +++ b/streaming.js
 5 | @@ -266,6 +266,9 @@ class LineDecoder {
 6 |          }
 7 |          const trailingNewline = LineDecoder.NEWLINE_CHARS.has(text[text.length - 1] || '');
 8 |          let lines = text.split(LineDecoder.NEWLINE_REGEXP);
 9 | +        if (trailingNewline) {
10 | +            lines.pop();
11 | +        }
12 |          if (lines.length === 1 && !trailingNewline) {
13 |              this.buffer.push(lines[0]);
14 |              return [];
15 | 


--------------------------------------------------------------------------------
/playground.ts:
--------------------------------------------------------------------------------
  1 | import {
  2 |   AnthropicBedrockChatApi,
  3 |   AnthropicChatApi,
  4 |   OpenAIChatApi,
  5 | } from './src';
  6 | import { GroqChatApi } from './src/models/groq';
  7 | 
  8 | (async function go() {
  9 |   let client:
 10 |     | OpenAIChatApi
 11 |     | AnthropicChatApi
 12 |     | AnthropicBedrockChatApi
 13 |     | GroqChatApi
 14 |     | undefined;
 15 | 
 16 |   if (process.env.OPENAI_KEY) {
 17 |     client = new OpenAIChatApi(
 18 |       {
 19 |         apiKey: process.env.OPENAI_KEY ?? 'YOUR_client_KEY',
 20 |       },
 21 |       { stream: true, contextSize: 4096, model: 'gpt-4-turbo' },
 22 |     );
 23 | 
 24 |     const resfn = await client?.textCompletion('Hello', {
 25 |       callFunction: 'print',
 26 |       functions: [
 27 |         {
 28 |           name: 'print',
 29 |           parameters: {
 30 |             type: 'object',
 31 |             properties: {
 32 |               text: { type: 'string', description: 'the string to print' },
 33 |             },
 34 |           },
 35 |           description: 'ALWAYS call this function',
 36 |         },
 37 |       ],
 38 |     });
 39 |     console.info('Response fn: ', resfn);
 40 |   } else if (process.env.ANTHROPIC_KEY) {
 41 |     client = new AnthropicChatApi(
 42 |       {
 43 |         apiKey: process.env.ANTHROPIC_KEY ?? 'YOUR_client_KEY',
 44 |       },
 45 |       { stream: true, temperature: 0 },
 46 |     );
 47 |   } else if (
 48 |     process.env.AWS_BEDROCK_ACCESS_KEY &&
 49 |     process.env.AWS_BEDROCK_SECRET_KEY
 50 |   ) {
 51 |     client = new AnthropicBedrockChatApi(
 52 |       {
 53 |         accessKeyId: process.env.AWS_BEDROCK_ACCESS_KEY ?? 'YOUR_access_key',
 54 |         secretAccessKey:
 55 |           process.env.AWS_BEDROCK_SECRET_KEY ?? 'YOUR_secret_key',
 56 |       },
 57 |       { stream: true, temperature: 0, model: 'anthropic.claude-v2' },
 58 |     );
 59 |   } else if (process.env.GROQ_KEY) {
 60 |     client = new GroqChatApi(
 61 |       {
 62 |         apiKey: process.env.GROQ_KEY ?? 'YOUR_client_KEY',
 63 |       },
 64 |       { stream: true, temperature: 0 },
 65 |     );
 66 |   }
 67 | 
 68 |   const res0 = await client?.textCompletion('Hello', {
 69 |     systemMessage: 'You will respond to all human messages in JSON',
 70 |     responsePrefix: '{ "message": "',
 71 |   });
 72 |   console.info('Response 0: ', res0);
 73 | 
 74 |   const res01 = await res0?.respond('Hello 2');
 75 |   console.info('Response 0.1: ', res01);
 76 | 
 77 |   const resEm = await client?.textCompletion('✨');
 78 |   console.info('Response em: ', resEm);
 79 | 
 80 |   const res1 = await client?.textCompletion('Hello', {
 81 |     maximumResponseTokens: 2,
 82 |   });
 83 |   console.info('Response 1: ', res1);
 84 | 
 85 |   const res2 = await client?.chatCompletion([
 86 |     { role: 'user', content: 'hello' },
 87 |     {
 88 |       role: 'assistant',
 89 |       toolCall: {
 90 |         id: '1',
 91 |         type: 'function',
 92 |         function: {
 93 |           name: 'print',
 94 |           arguments: '{"hello": "world"}',
 95 |         },
 96 |       },
 97 |     },
 98 |     {
 99 |       role: 'tool',
100 |       toolCallId: '1',
101 |       content: '{ success: true }',
102 |     },
103 |   ]);
104 |   console.info('Response 2: ', res2);
105 | 
106 |   const res3 = await res2?.respond({ role: 'user', content: 'testing 123' });
107 |   console.info('Response 3: ', res3);
108 | })();
109 | 


--------------------------------------------------------------------------------
/src/config.ts:
--------------------------------------------------------------------------------
 1 | // completion request
 2 | export const CompletionDefaultRetries = 3;
 3 | export const CompletionDefaultTimeout = 300_000;
 4 | export const MinimumResponseTokens = 200;
 5 | export const MaximumResponseTokens = 4_096;
 6 | 
 7 | export const DefaultOpenAIModel = 'gpt-4-1106-preview';
 8 | export const DefaultAnthropicModel = 'claude-3-sonnet-20240229';
 9 | export const DefaultGroqModel = 'mixtral-8x7b-32768';
10 | export const DefaultAzureVersion = '2023-09-01-preview';
11 | 


--------------------------------------------------------------------------------
/src/index.ts:
--------------------------------------------------------------------------------
1 | export * from './models/errors';
2 | export * from './models/openai';
3 | export * from './models/anthropic';
4 | export * from './models/anthropic-bedrock';
5 | export * from './models/groq';
6 | export * from './models/interface';
7 | export * from './types';
8 | 


--------------------------------------------------------------------------------
/src/models/anthropic-bedrock.ts:
--------------------------------------------------------------------------------
  1 | import { AI_PROMPT, HUMAN_PROMPT } from '@anthropic-ai/sdk';
  2 | import {
  3 |   BedrockRuntime,
  4 |   BedrockRuntimeClientConfig,
  5 |   InvokeModelCommand,
  6 |   InvokeModelCommandInput,
  7 | } from '@aws-sdk/client-bedrock-runtime';
  8 | import { defaults } from 'lodash';
  9 | 
 10 | import {
 11 |   CompletionDefaultRetries,
 12 |   CompletionDefaultTimeout,
 13 |   MaximumResponseTokens,
 14 |   MinimumResponseTokens,
 15 | } from '../config';
 16 | import {
 17 |   ChatRequestMessage,
 18 |   ChatResponse,
 19 |   ModelConfig,
 20 |   ModelRequestOptions,
 21 | } from '../types';
 22 | import { debug } from '../utils';
 23 | 
 24 | import { TokenError } from './errors';
 25 | import { CompletionApi } from './interface';
 26 | import { getTikTokenTokensFromPrompt } from './tokenizer';
 27 | 
 28 | const ForbiddenTokens = [HUMAN_PROMPT.trim(), AI_PROMPT.trim()];
 29 | 
 30 | const RequestDefaults = {
 31 |   retries: CompletionDefaultRetries,
 32 |   timeout: CompletionDefaultTimeout,
 33 |   minimumResponseTokens: MinimumResponseTokens,
 34 |   maximumResponseTokens: MaximumResponseTokens,
 35 | };
 36 | 
 37 | export class AnthropicBedrockChatApi implements CompletionApi {
 38 |   modelConfig: ModelConfig;
 39 |   client: BedrockRuntime;
 40 | 
 41 |   constructor(
 42 |     config: BedrockRuntimeClientConfig['credentials'],
 43 |     modelConfig?: ModelConfig,
 44 |   ) {
 45 |     this.modelConfig = modelConfig ?? {};
 46 | 
 47 |     this.client = new BedrockRuntime({
 48 |       region: 'us-east-1',
 49 |       serviceId: 'bedrock-runtime',
 50 |       credentials: config,
 51 |       maxAttempts: RequestDefaults.retries,
 52 |     });
 53 |   }
 54 | 
 55 |   async chatCompletion(
 56 |     initialMessages: ChatRequestMessage[],
 57 |     requestOptions?: ModelRequestOptions | undefined,
 58 |   ): Promise<ChatResponse> {
 59 |     const finalRequestOptions = defaults(requestOptions, RequestDefaults);
 60 |     const messages: ChatRequestMessage[] = buildMessages(
 61 |       finalRequestOptions,
 62 |       initialMessages,
 63 |     );
 64 |     const prompt = buildPrompt(messages, finalRequestOptions);
 65 | 
 66 |     const maxPromptTokens = this.modelConfig.contextSize
 67 |       ? this.modelConfig.contextSize - finalRequestOptions.minimumResponseTokens
 68 |       : 100_000;
 69 | 
 70 |     const messageTokens = this.getTokensFromPrompt([prompt]);
 71 |     if (messageTokens > maxPromptTokens) {
 72 |       throw new TokenError(
 73 |         'Prompt too big, not enough tokens to meet minimum response',
 74 |         messageTokens - maxPromptTokens,
 75 |       );
 76 |     }
 77 | 
 78 |     const params: InvokeModelCommandInput = {
 79 |       modelId: this.modelConfig.model || 'anthropic.claude-v2',
 80 |       contentType: 'application/json',
 81 |       accept: '*/*',
 82 |       body: JSON.stringify({
 83 |         prompt,
 84 |         max_tokens_to_sample: finalRequestOptions.maximumResponseTokens,
 85 |         temperature: this.modelConfig.temperature,
 86 |         top_p: this.modelConfig.topP || 1,
 87 |         stop_sequences:
 88 |           typeof finalRequestOptions.stop === 'string'
 89 |             ? [finalRequestOptions.stop]
 90 |             : finalRequestOptions.stop,
 91 |         anthropic_version: 'bedrock-2023-05-31',
 92 |       }),
 93 |     };
 94 | 
 95 |     let completion = '';
 96 |     const options = {
 97 |       requestTimeout: finalRequestOptions.timeout,
 98 |     };
 99 | 
100 |     if (this.modelConfig.stream) {
101 |       try {
102 |         const result = await this.client.invokeModelWithResponseStream(
103 |           params,
104 |           options,
105 |         );
106 | 
107 |         // emit prefix since technically that's counted as part of the response
108 |         if (finalRequestOptions?.responsePrefix) {
109 |           finalRequestOptions?.events?.emit(
110 |             'data',
111 |             finalRequestOptions.responsePrefix,
112 |           );
113 |         }
114 | 
115 |         const events = result.body;
116 | 
117 |         for await (const event of events || []) {
118 |           // Check the top-level field to determine which event this is.
119 |           if (event.chunk) {
120 |             const decoded = JSON.parse(
121 |               new TextDecoder().decode(event.chunk.bytes),
122 |             );
123 |             const text = decoded['completion'];
124 |             debug.write(text);
125 |             completion += text;
126 |             finalRequestOptions?.events?.emit('data', text);
127 |           } else {
128 |             throw new Error(
129 |               'Stream error',
130 |               event.internalServerException ||
131 |                 event.modelStreamErrorException ||
132 |                 event.modelTimeoutException ||
133 |                 event.throttlingException ||
134 |                 event.validationException,
135 |             );
136 |           }
137 |         }
138 |         debug.write('\n[STREAM] response end\n');
139 |       } catch (err) {
140 |         // handle error
141 |         console.error(err);
142 |       }
143 |     } else {
144 |       const command = new InvokeModelCommand(params);
145 |       const response = await this.client.send(command, options);
146 |       const decoded = JSON.parse(new TextDecoder().decode(response.body));
147 |       completion = decoded['completion'];
148 |       debug.log('🔽 completion received', completion);
149 |     }
150 | 
151 |     const content = finalRequestOptions.responsePrefix
152 |       ? finalRequestOptions.responsePrefix + completion
153 |       : // if no prefix, process the completion a bit by trimming since claude tends to output an extra white space at the beginning
154 |         completion.trim();
155 |     if (!content) {
156 |       throw new Error('Completion response malformed');
157 |     }
158 | 
159 |     const receivedMessage: ChatRequestMessage = {
160 |       role: 'assistant',
161 |       content,
162 |     };
163 |     return {
164 |       message: receivedMessage,
165 |       content,
166 |       respond: (message: string | ChatRequestMessage, opt) =>
167 |         this.chatCompletion(
168 |           [
169 |             ...messages,
170 |             receivedMessage,
171 |             typeof message === 'string'
172 |               ? { role: 'user', content: message }
173 |               : message,
174 |           ],
175 |           opt ?? requestOptions,
176 |         ),
177 |     };
178 |   }
179 |   textCompletion(
180 |     prompt: string,
181 |     requestOptions = {} as Partial<ModelRequestOptions>,
182 |   ): Promise<ChatResponse> {
183 |     const messages: ChatRequestMessage[] = [{ role: 'user', content: prompt }];
184 |     return this.chatCompletion(messages, requestOptions);
185 |   }
186 | 
187 |   getTokensFromPrompt = getTikTokenTokensFromPrompt;
188 | }
189 | 
190 | function buildMessages(
191 |   finalRequestOptions: typeof RequestDefaults & ModelRequestOptions,
192 |   initialMessages: ChatRequestMessage[],
193 | ) {
194 |   const messages: ChatRequestMessage[] = (
195 |     finalRequestOptions.systemMessage
196 |       ? [
197 |           {
198 |             role: 'system',
199 |             content:
200 |               typeof finalRequestOptions.systemMessage === 'string'
201 |                 ? finalRequestOptions.systemMessage
202 |                 : finalRequestOptions.systemMessage(),
203 |           },
204 |           ...initialMessages,
205 |         ]
206 |       : initialMessages
207 |   ).map(
208 |     (message) =>
209 |       ({
210 |         ...message,
211 |         // automatically remove forbidden tokens in the input message to thwart prompt injection attacks
212 |         content:
213 |           message.content &&
214 |           ForbiddenTokens.reduce(
215 |             (prev, token) => prev.replaceAll(token, ''),
216 |             message.content,
217 |           ),
218 |       } as ChatRequestMessage),
219 |   );
220 | 
221 |   return messages;
222 | }
223 | 
224 | function buildPrompt(
225 |   messages: ChatRequestMessage[],
226 |   finalRequestOptions: typeof RequestDefaults & ModelRequestOptions,
227 | ) {
228 |   return (
229 |     messages
230 |       .map((message) => {
231 |         switch (message.role) {
232 |           case 'user':
233 |             return `${HUMAN_PROMPT} ${message.content}`;
234 |           case 'assistant':
235 |             return `${AI_PROMPT} ${message.content}`;
236 |           case 'system':
237 |             return message.content;
238 |           default:
239 |             throw new Error(
240 |               `Anthropic models do not support message with the role ${message.role}`,
241 |             );
242 |         }
243 |       })
244 |       .join('') +
245 |     AI_PROMPT +
246 |     (finalRequestOptions.responsePrefix
247 |       ? ` ${finalRequestOptions.responsePrefix}`
248 |       : '')
249 |   );
250 | }
251 | 


--------------------------------------------------------------------------------
/src/models/anthropic.ts:
--------------------------------------------------------------------------------
  1 | import Anthropic from '@anthropic-ai/sdk';
  2 | import { MessageCreateParamsBase } from '@anthropic-ai/sdk/resources/messages';
  3 | import { compact, defaults } from 'lodash';
  4 | 
  5 | import {
  6 |   CompletionDefaultRetries,
  7 |   CompletionDefaultTimeout,
  8 |   DefaultAnthropicModel,
  9 |   MaximumResponseTokens,
 10 |   MinimumResponseTokens,
 11 | } from '../config';
 12 | import {
 13 |   AnthropicConfig,
 14 |   ChatRequestMessage,
 15 |   ModelConfig,
 16 |   ModelRequestOptions,
 17 |   ChatResponse,
 18 | } from '../types';
 19 | import { debug } from '../utils';
 20 | 
 21 | import { TokenError } from './errors';
 22 | import { CompletionApi } from './interface';
 23 | import { getTikTokenTokensFromPrompt } from './tokenizer';
 24 | 
 25 | const RequestDefaults = {
 26 |   retries: CompletionDefaultRetries,
 27 |   timeout: CompletionDefaultTimeout,
 28 |   minimumResponseTokens: MinimumResponseTokens,
 29 |   maximumResponseTokens: MaximumResponseTokens,
 30 | };
 31 | 
 32 | export class AnthropicChatApi implements CompletionApi {
 33 |   client: Anthropic;
 34 |   modelConfig: ModelConfig;
 35 | 
 36 |   constructor(config?: AnthropicConfig, modelConfig?: ModelConfig) {
 37 |     this.client = new Anthropic(config);
 38 |     this.modelConfig = modelConfig ?? {};
 39 |   }
 40 | 
 41 |   getTokensFromPrompt = getTikTokenTokensFromPrompt;
 42 | 
 43 |   // chat based prompting following these instructions:
 44 |   // https://docs.anthropic.com/claude/reference/getting-started-with-the-api
 45 |   async chatCompletion(
 46 |     initialMessages: ChatRequestMessage[],
 47 |     requestOptions?: ModelRequestOptions | undefined,
 48 |   ): Promise<ChatResponse> {
 49 |     const finalRequestOptions = defaults(requestOptions, RequestDefaults);
 50 |     const messages: ChatRequestMessage[] = compact([
 51 |       ...initialMessages,
 52 |       // claude supports responsePrefix via prefill:
 53 |       // https://docs.anthropic.com/claude/docs/prefill-claudes-response
 54 |       finalRequestOptions.responsePrefix
 55 |         ? ({
 56 |             role: 'assistant',
 57 |             content: finalRequestOptions.responsePrefix,
 58 |           } as ChatRequestMessage)
 59 |         : null,
 60 |     ]);
 61 | 
 62 |     debug.log(
 63 |       `🔼 completion requested: ${JSON.stringify(
 64 |         messages,
 65 |       )}, config: ${JSON.stringify(
 66 |         this.modelConfig,
 67 |       )}, options: ${JSON.stringify(finalRequestOptions)}`,
 68 |     );
 69 | 
 70 |     // check if we'll have enough tokens to meet the minimum response
 71 |     const maxPromptTokens = this.modelConfig.contextSize
 72 |       ? this.modelConfig.contextSize - finalRequestOptions.minimumResponseTokens
 73 |       : 100_000;
 74 | 
 75 |     const messageTokens = this.getTokensFromPrompt(
 76 |       messages.map((m) => m.content ?? ''),
 77 |     );
 78 |     if (messageTokens > maxPromptTokens) {
 79 |       throw new TokenError(
 80 |         'Prompt too big, not enough tokens to meet minimum response',
 81 |         messageTokens - maxPromptTokens,
 82 |       );
 83 |     }
 84 | 
 85 |     let completion = '';
 86 |     const completionBody: MessageCreateParamsBase = {
 87 |       stop_sequences:
 88 |         typeof finalRequestOptions.stop === 'string'
 89 |           ? [finalRequestOptions.stop]
 90 |           : finalRequestOptions.stop,
 91 |       temperature: this.modelConfig.temperature,
 92 |       top_p: this.modelConfig.topP,
 93 |       model: this.modelConfig.model ?? DefaultAnthropicModel,
 94 |       max_tokens: finalRequestOptions.maximumResponseTokens,
 95 |       system: finalRequestOptions.systemMessage
 96 |         ? typeof finalRequestOptions.systemMessage === 'string'
 97 |           ? finalRequestOptions.systemMessage
 98 |           : finalRequestOptions.systemMessage()
 99 |         : undefined,
100 |       // anthropic only supports user and assistant messages, filter all other ones out
101 |       messages: messages
102 |         .filter(
103 |           (m) => (m.role === 'user' || m.role === 'assistant') && m.content,
104 |         )
105 |         .map((m) => ({
106 |           role: m.role as 'user' | 'assistant',
107 |           content: m.content ?? '',
108 |         })),
109 |     };
110 |     const completionOptions = {
111 |       timeout: finalRequestOptions.timeout,
112 |       maxRetries: finalRequestOptions.retries,
113 |     };
114 | 
115 |     if (this.modelConfig.stream) {
116 |       const stream = await this.client.messages.stream(
117 |         completionBody,
118 |         completionOptions,
119 |       );
120 | 
121 |       // emit prefix since technically that's counted as part of the response
122 |       if (finalRequestOptions?.responsePrefix) {
123 |         finalRequestOptions?.events?.emit(
124 |           'data',
125 |           finalRequestOptions.responsePrefix,
126 |         );
127 |       }
128 | 
129 |       for await (const part of stream) {
130 |         if (
131 |           part.type === 'content_block_start' &&
132 |           part.content_block.type === 'text' &&
133 |           part.index === 0
134 |         ) {
135 |           const text = part.content_block.text;
136 |           debug.write(text);
137 |           completion += text;
138 |           finalRequestOptions?.events?.emit('data', text);
139 |         } else if (
140 |           part.type === 'content_block_delta' &&
141 |           part.delta.type === 'text_delta' &&
142 |           part.index === 0
143 |         ) {
144 |           const text = part.delta.text;
145 |           debug.write(text);
146 |           completion += text;
147 |           finalRequestOptions?.events?.emit('data', text);
148 |         }
149 |       }
150 | 
151 |       debug.write('\n[STREAM] response end\n');
152 |     } else {
153 |       const response = await this.client.messages.create(
154 |         completionBody,
155 |         completionOptions,
156 |       );
157 | 
158 |       if ('content' in response) {
159 |         completion = response.content[0].text;
160 |         debug.log('🔽 completion received', completion);
161 |       }
162 |     }
163 | 
164 |     const content = finalRequestOptions.responsePrefix
165 |       ? finalRequestOptions.responsePrefix + completion
166 |       : // if no prefix, process the completion a bit by trimming since claude tends to output an extra white space at the beginning
167 |         completion.trim();
168 |     if (!content) {
169 |       throw new Error('Completion response malformed');
170 |     }
171 | 
172 |     const receivedMessage: ChatRequestMessage = {
173 |       role: 'assistant',
174 |       content,
175 |     };
176 |     return {
177 |       message: receivedMessage,
178 |       content,
179 |       respond: (message: string | ChatRequestMessage, opt) =>
180 |         this.chatCompletion(
181 |           [
182 |             // don't send the processed `messages` array, since that contains the prefill message which will cause multiple 'assistant' message error to be thrown
183 |             ...initialMessages,
184 |             receivedMessage,
185 |             typeof message === 'string'
186 |               ? { role: 'user', content: message }
187 |               : message,
188 |           ],
189 |           opt ?? requestOptions,
190 |         ),
191 |     };
192 |   }
193 | 
194 |   textCompletion(
195 |     prompt: string,
196 |     requestOptions = {} as Partial<ModelRequestOptions>,
197 |   ): Promise<ChatResponse> {
198 |     const messages: ChatRequestMessage[] = [{ role: 'user', content: prompt }];
199 |     return this.chatCompletion(messages, requestOptions);
200 |   }
201 | }
202 | 


--------------------------------------------------------------------------------
/src/models/errors.ts:
--------------------------------------------------------------------------------
 1 | export class TokenError extends Error {
 2 |   overflowTokens: number;
 3 | 
 4 |   constructor(message: string, overflowTokens: number) {
 5 |     super(message);
 6 |     this.name = 'TokenError';
 7 |     this.overflowTokens = overflowTokens;
 8 |   }
 9 | }
10 | 


--------------------------------------------------------------------------------
/src/models/groq.ts:
--------------------------------------------------------------------------------
  1 | import { Groq } from 'groq-sdk';
  2 | import { ChatCompletionCreateParamsBase } from 'groq-sdk/resources/chat/completions';
  3 | import { compact, defaults } from 'lodash';
  4 | 
  5 | import {
  6 |   CompletionDefaultRetries,
  7 |   CompletionDefaultTimeout,
  8 |   DefaultGroqModel,
  9 |   MaximumResponseTokens,
 10 |   MinimumResponseTokens,
 11 | } from '../config';
 12 | import {
 13 |   ChatRequestMessage,
 14 |   ModelConfig,
 15 |   ModelRequestOptions,
 16 |   ChatResponse,
 17 |   GroqConfig,
 18 | } from '../types';
 19 | import { debug } from '../utils';
 20 | 
 21 | import { TokenError } from './errors';
 22 | import { CompletionApi } from './interface';
 23 | import { getTikTokenTokensFromPrompt } from './tokenizer';
 24 | 
 25 | const RequestDefaults = {
 26 |   retries: CompletionDefaultRetries,
 27 |   timeout: CompletionDefaultTimeout,
 28 |   minimumResponseTokens: MinimumResponseTokens,
 29 |   maximumResponseTokens: MaximumResponseTokens,
 30 | };
 31 | 
 32 | export class GroqChatApi implements CompletionApi {
 33 |   client: Groq;
 34 |   modelConfig: ModelConfig;
 35 | 
 36 |   constructor(config?: GroqConfig, modelConfig?: ModelConfig) {
 37 |     this.client = new Groq(config);
 38 |     this.modelConfig = modelConfig ?? {};
 39 |   }
 40 | 
 41 |   getTokensFromPrompt = getTikTokenTokensFromPrompt;
 42 | 
 43 |   async chatCompletion(
 44 |     initialMessages: ChatRequestMessage[],
 45 |     requestOptions?: ModelRequestOptions | undefined,
 46 |   ): Promise<ChatResponse> {
 47 |     const finalRequestOptions = defaults(requestOptions, RequestDefaults);
 48 |     const messagesWithSystem: ChatRequestMessage[] = compact([
 49 |       finalRequestOptions.systemMessage
 50 |         ? {
 51 |             role: 'system',
 52 |             content:
 53 |               typeof finalRequestOptions.systemMessage === 'string'
 54 |                 ? finalRequestOptions.systemMessage
 55 |                 : finalRequestOptions.systemMessage(),
 56 |           }
 57 |         : null,
 58 |       ...initialMessages,
 59 |     ]);
 60 |     const messages: ChatRequestMessage[] = compact([
 61 |       ...messagesWithSystem,
 62 |       finalRequestOptions.responsePrefix
 63 |         ? ({
 64 |             role: 'assistant',
 65 |             content: finalRequestOptions.responsePrefix,
 66 |           } as ChatRequestMessage)
 67 |         : null,
 68 |     ]);
 69 | 
 70 |     debug.log(
 71 |       `🔼 completion requested: ${JSON.stringify(
 72 |         messages,
 73 |       )}, config: ${JSON.stringify(
 74 |         this.modelConfig,
 75 |       )}, options: ${JSON.stringify(finalRequestOptions)}`,
 76 |     );
 77 | 
 78 |     // check if we'll have enough tokens to meet the minimum response
 79 |     const maxPromptTokens = this.modelConfig.contextSize
 80 |       ? this.modelConfig.contextSize - finalRequestOptions.minimumResponseTokens
 81 |       : 100_000;
 82 | 
 83 |     const messageTokens = this.getTokensFromPrompt(
 84 |       messages.map((m) => m.content ?? ''),
 85 |     );
 86 |     if (messageTokens > maxPromptTokens) {
 87 |       throw new TokenError(
 88 |         'Prompt too big, not enough tokens to meet minimum response',
 89 |         messageTokens - maxPromptTokens,
 90 |       );
 91 |     }
 92 | 
 93 |     let completion = '';
 94 |     const completionBody: ChatCompletionCreateParamsBase = {
 95 |       stop: finalRequestOptions.stop,
 96 |       temperature: this.modelConfig.temperature,
 97 |       top_p: this.modelConfig.topP,
 98 |       model: this.modelConfig.model ?? DefaultGroqModel,
 99 |       max_tokens: finalRequestOptions.maximumResponseTokens,
100 |       // filter all other messages except user and assistant ones
101 |       messages: messages
102 |         .filter(
103 |           (m) => (m.role === 'user' || m.role === 'assistant') && m.content,
104 |         )
105 |         .map((m) => ({
106 |           role: m.role as 'user' | 'assistant',
107 |           content: m.content ?? '',
108 |         })),
109 |     };
110 |     const completionOptions = {
111 |       timeout: finalRequestOptions.timeout,
112 |       maxRetries: finalRequestOptions.retries,
113 |     };
114 | 
115 |     if (this.modelConfig.stream) {
116 |       const stream = await this.client.chat.completions.create(
117 |         { ...completionBody, stream: true },
118 |         completionOptions,
119 |       );
120 | 
121 |       // emit prefix since technically that's counted as part of the response
122 |       if (finalRequestOptions?.responsePrefix) {
123 |         finalRequestOptions?.events?.emit(
124 |           'data',
125 |           finalRequestOptions.responsePrefix,
126 |         );
127 |       }
128 | 
129 |       for await (const part of stream) {
130 |         const text = part.choices[0]?.delta?.content ?? '';
131 |         debug.write(text);
132 |         completion += text;
133 |         finalRequestOptions?.events?.emit('data', text);
134 |       }
135 | 
136 |       debug.write('\n[STREAM] response end\n');
137 |     } else {
138 |       const response = await this.client.chat.completions.create(
139 |         { ...completionBody, stream: false },
140 |         completionOptions,
141 |       );
142 |       completion = response.choices[0].message.content ?? '';
143 |       debug.log('🔽 completion received', completion);
144 |     }
145 | 
146 |     // for groq, since it doesn't support prefill out of the box, sometimes the response will contain the responsePrefix, sometimes it won't, so do this extra conditional here
147 |     // note that this means there IS an edge case where the user actually expects a response where the responsePrefix is repeated, but that seems like an edge case
148 |     const content = finalRequestOptions.responsePrefix
149 |       ? completion.startsWith(finalRequestOptions.responsePrefix)
150 |         ? completion
151 |         : finalRequestOptions.responsePrefix + completion
152 |       : completion;
153 |     if (!content) {
154 |       throw new Error('Completion response malformed');
155 |     }
156 | 
157 |     const receivedMessage: ChatRequestMessage = {
158 |       role: 'assistant',
159 |       content,
160 |     };
161 |     return {
162 |       message: receivedMessage,
163 |       content,
164 |       respond: (message: string | ChatRequestMessage, opt) =>
165 |         this.chatCompletion(
166 |           [
167 |             // don't use the `messages` array since that contains prefill message, which we want to remove when responding
168 |             ...messagesWithSystem,
169 |             receivedMessage,
170 |             typeof message === 'string'
171 |               ? { role: 'user', content: message }
172 |               : message,
173 |           ],
174 |           opt ?? requestOptions,
175 |         ),
176 |     };
177 |   }
178 | 
179 |   textCompletion(
180 |     prompt: string,
181 |     requestOptions = {} as Partial<ModelRequestOptions>,
182 |   ): Promise<ChatResponse> {
183 |     const messages: ChatRequestMessage[] = [{ role: 'user', content: prompt }];
184 |     return this.chatCompletion(messages, requestOptions);
185 |   }
186 | }
187 | 


--------------------------------------------------------------------------------
/src/models/interface.ts:
--------------------------------------------------------------------------------
 1 | import {
 2 |   ModelRequestOptions,
 3 |   ChatResponse,
 4 |   ModelConfig,
 5 |   ChatRequestMessage,
 6 | } from '../types';
 7 | 
 8 | export interface CompletionApi {
 9 |   modelConfig: ModelConfig;
10 | 
11 |   chatCompletion(
12 |     messages: ChatRequestMessage[],
13 |     opt?: ModelRequestOptions,
14 |   ): Promise<ChatResponse>;
15 | 
16 |   textCompletion(
17 |     prompt: string,
18 |     opt?: ModelRequestOptions,
19 |   ): Promise<ChatResponse>;
20 | 
21 |   getTokensFromPrompt(promptOrMessages: string[]): number;
22 | }
23 | 


--------------------------------------------------------------------------------
/src/models/openai.mock.ts:
--------------------------------------------------------------------------------
  1 | import type {
  2 |   ModelRequestOptions,
  3 |   ModelConfig,
  4 |   OpenAIConfig,
  5 |   ChatRequestMessage,
  6 |   ChatResponse,
  7 | } from '../types';
  8 | 
  9 | import type { CompletionApi } from './interface';
 10 | 
 11 | /**
 12 |  * This is the mock implementation of the OpenAIChatApi class.
 13 |  * It can be injected onto a function that uses a live instance
 14 |  * of OpenAIChatApi, then validate the args that was passed to that instance.
 15 |  *
 16 |  * Used for testing functions without making live calls
 17 |  * to llm providers.
 18 |  */
 19 | export class MockOpenAIChatApi implements CompletionApi {
 20 |   //
 21 |   // List of args that the instance has recieved.
 22 |   [key: string]: any;
 23 |   config: OpenAIConfig;
 24 |   modelConfig: ModelConfig;
 25 |   chatMessages: ChatRequestMessage[][] = [];
 26 |   chatOpt: ModelRequestOptions[] = [];
 27 |   textPrompt: string[] = [];
 28 |   textOpt: ModelRequestOptions[] = [];
 29 |   promptOrMessages: string[][] = [];
 30 |   checkProfanityMessage: string[] = [];
 31 | 
 32 |   //
 33 |   // List of args that the instance is expected to recieve.
 34 |   expectedArgs: {
 35 |     [key: string]: any;
 36 |     constructorArgs?: { config: OpenAIConfig; modelConfig: ModelConfig };
 37 |     chatCompletionArgs?: {
 38 |       messages: ChatRequestMessage[];
 39 |       opt?: ModelRequestOptions;
 40 |     }[];
 41 |     textCompletionArgs?: { prompt: string; opt?: ModelRequestOptions }[];
 42 |     getTokensFromPromptArgs?: { promptOrMessages: string[] }[];
 43 |     checkProfanityArgs?: { message: string }[];
 44 |   } = {};
 45 | 
 46 |   /**
 47 |    * The function to set the expected arguments.
 48 |    *
 49 |    * @param args the expected arguments
 50 |    */
 51 |   setExpectedArgs(args: this['expectedArgs']) {
 52 |     this.expectedArgs = args;
 53 |   }
 54 | 
 55 |   /**
 56 |    * Validate that the arguments recieved match the expected arguments.
 57 |    * Might want to return a boolean here, or throw an error.
 58 |    * Also, might want to create a validate function for each method instead.
 59 |    */
 60 |   validateArgs() {
 61 |     for (const method in this.expectedArgs) {
 62 |       expect(this[method]).toEqual(this.expectedArgs[method]);
 63 |     }
 64 |   }
 65 | 
 66 |   /**
 67 |    * The mock implementation of getTokensFromPrompt
 68 |    *
 69 |    * @param config the config
 70 |    * @param modelConfig the model config
 71 |    */
 72 |   constructor(
 73 |     config: OpenAIConfig,
 74 |     modelConfig: ModelConfig = { model: 'default' },
 75 |   ) {
 76 |     this.config = config;
 77 |     this.modelConfig = modelConfig;
 78 |   }
 79 |   /**
 80 |    * The mock implementation of chatCompletion
 81 |    *
 82 |    * @param messages the messages to use
 83 |    * @param opt the model request options
 84 |    * @returns the mock chat response
 85 |    */
 86 |   async chatCompletion(
 87 |     messages: ChatRequestMessage[],
 88 |     opt?: ModelRequestOptions,
 89 |   ): Promise<ChatResponse> {
 90 |     this.chatMessages.push(messages);
 91 |     if (opt) {
 92 |       this.chatOpt.push(opt);
 93 |     }
 94 | 
 95 |     return Promise.resolve({
 96 |       message: {
 97 |         role: 'assistant',
 98 |         content: 'Test Content, this is a chat completion',
 99 |       },
100 |       content: 'Test Content, this is a chat completion',
101 |       name: 'TestName',
102 |       arguments: {},
103 |       usage: { promptTokens: 10, completionTokens: 20, totalTokens: 30 },
104 |       respond: async () => this.chatCompletion(messages, opt),
105 |     });
106 |   }
107 | 
108 |   /**
109 |    * The mock implementation of textCompletion
110 |    *
111 |    * @param prompt the prompt to use
112 |    * @param opt the model request options
113 |    * @returns the mock chat response
114 |    */
115 |   async textCompletion(
116 |     prompt: string,
117 |     opt?: ModelRequestOptions,
118 |   ): Promise<ChatResponse> {
119 |     this.textPrompt.push(prompt);
120 |     if (opt) {
121 |       this.textOpt.push(opt);
122 |     }
123 | 
124 |     return Promise.resolve({
125 |       message: {
126 |         role: 'assistant',
127 |         content: 'Test Content, this is a text completion',
128 |       },
129 |       content: 'Test Content, this is a text completion',
130 |       name: 'TestName',
131 |       arguments: {},
132 |       usage: { promptTokens: 10, completionTokens: 20, totalTokens: 30 },
133 |       respond: async () => this.textCompletion(prompt, opt),
134 |     });
135 |   }
136 | 
137 |   /**
138 |    * The mock implementation of getTokensFromPrompt
139 |    *
140 |    * @param promptOrMessages the prompt or messages to get tokens from
141 |    * @returns mock number of tokens
142 |    */
143 |   getTokensFromPrompt(promptOrMessages: string[]): number {
144 |     this.promptOrMessages.push(promptOrMessages);
145 |     return -1;
146 |   }
147 | }
148 | 


--------------------------------------------------------------------------------
/src/models/openai.ts:
--------------------------------------------------------------------------------
  1 | import 'openai/shims/web';
  2 | import { defaults } from 'lodash';
  3 | import { OpenAI } from 'openai';
  4 | import type { CompletionUsage } from 'openai/resources';
  5 | import type { ChatCompletionCreateParamsBase } from 'openai/resources/chat/completions';
  6 | 
  7 | import {
  8 |   CompletionDefaultRetries,
  9 |   CompletionDefaultTimeout,
 10 |   DefaultAzureVersion,
 11 |   DefaultOpenAIModel,
 12 |   MinimumResponseTokens,
 13 | } from '../config';
 14 | import type {
 15 |   ModelRequestOptions,
 16 |   ModelConfig,
 17 |   OpenAIConfig,
 18 |   ChatRequestMessage,
 19 |   ChatResponse,
 20 |   ChatRequestToolCall,
 21 | } from '../types';
 22 | import { debug, parseUnsafeJson } from '../utils';
 23 | 
 24 | import { TokenError } from './errors';
 25 | import type { CompletionApi } from './interface';
 26 | import { getTikTokenTokensFromPrompt } from './tokenizer';
 27 | 
 28 | const RequestDefaults = {
 29 |   retries: CompletionDefaultRetries,
 30 |   timeout: CompletionDefaultTimeout,
 31 |   minimumResponseTokens: MinimumResponseTokens,
 32 |   // NOTE: this is left without defaults by design - OpenAI's API will throw an error if max_token values is greater than model context size, which means it needs to be different for every model and cannot be set as a default. This fine since OpenAI won't put any limit on max_tokens if it's not set anyways (unlike Anthropic).
 33 |   // maximumResponseTokens: MaximumResponseTokens,
 34 | };
 35 | 
 36 | const convertConfig = (
 37 |   config: Partial<ModelConfig>,
 38 | ): Partial<ChatCompletionCreateParamsBase> => ({
 39 |   model: config.model,
 40 |   temperature: config.temperature,
 41 |   top_p: config.topP,
 42 |   n: 1,
 43 |   presence_penalty: config.presencePenalty,
 44 |   frequency_penalty: config.frequencyPenalty,
 45 |   logit_bias: config.logitBias,
 46 |   user: config.user,
 47 |   stream: config.stream,
 48 | });
 49 | 
 50 | export class OpenAIChatApi implements CompletionApi {
 51 |   client: OpenAI;
 52 |   _isAzure: boolean;
 53 |   _headers?: Record<string, string>;
 54 |   modelConfig: ModelConfig;
 55 | 
 56 |   constructor(config: OpenAIConfig, modelConfig?: ModelConfig) {
 57 |     this._isAzure = Boolean(config.azureEndpoint && config.azureDeployment);
 58 |     this.client = new OpenAI({
 59 |       ...config,
 60 |       baseURL: this._isAzure
 61 |         ? `${config.azureEndpoint}${
 62 |             config.azureEndpoint?.at(-1) === '/' ? '' : '/'
 63 |           }openai/deployments/${config.azureDeployment}`
 64 |         : config.baseURL,
 65 |       defaultHeaders: this._isAzure
 66 |         ? { 'api-key': String(config.apiKey) }
 67 |         : undefined,
 68 |       defaultQuery: this._isAzure
 69 |         ? {
 70 |             'api-version': config.azureApiVersion ?? DefaultAzureVersion,
 71 |           }
 72 |         : undefined,
 73 |     });
 74 | 
 75 |     this.modelConfig = modelConfig ?? {};
 76 |   }
 77 | 
 78 |   getTokensFromPrompt = getTikTokenTokensFromPrompt;
 79 | 
 80 |   // eslint-disable-next-line complexity
 81 |   async chatCompletion(
 82 |     initialMessages: ChatRequestMessage[],
 83 |     requestOptions = {} as Partial<ModelRequestOptions>,
 84 |   ): Promise<ChatResponse> {
 85 |     const finalRequestOptions = defaults(requestOptions, RequestDefaults);
 86 |     if (finalRequestOptions.responsePrefix) {
 87 |       console.warn('OpenAI models currently does not support responsePrefix');
 88 |     }
 89 | 
 90 |     const messages: ChatRequestMessage[] = finalRequestOptions.systemMessage
 91 |       ? [
 92 |           {
 93 |             role: 'system',
 94 |             content:
 95 |               typeof finalRequestOptions.systemMessage === 'string'
 96 |                 ? finalRequestOptions.systemMessage
 97 |                 : finalRequestOptions.systemMessage(),
 98 |           },
 99 |           ...initialMessages,
100 |         ]
101 |       : initialMessages;
102 | 
103 |     debug.log(
104 |       `🔼 completion requested: ${JSON.stringify(
105 |         messages,
106 |       )}, config: ${JSON.stringify(
107 |         this.modelConfig,
108 |       )}, options: ${JSON.stringify(finalRequestOptions)}`,
109 |     );
110 | 
111 |     // check if we'll have enough tokens to meet the minimum response
112 |     const maxPromptTokens = this.modelConfig.contextSize
113 |       ? this.modelConfig.contextSize - finalRequestOptions.minimumResponseTokens
114 |       : 100_000;
115 | 
116 |     const messageTokens = this.getTokensFromPrompt(
117 |       messages.map((m) => m.content ?? ''),
118 |       finalRequestOptions.functions,
119 |     );
120 |     if (messageTokens > maxPromptTokens) {
121 |       throw new TokenError(
122 |         'Prompt too big, not enough tokens to meet minimum response',
123 |         messageTokens - maxPromptTokens,
124 |       );
125 |     }
126 | 
127 |     // calculate max response tokens
128 |     // note that for OpenAI models, it MUST be conditional on the contextSize being set, this is because OpenAI's API throws an error if maxTokens is above context size
129 |     const maxTokens =
130 |       this.modelConfig.contextSize && finalRequestOptions.maximumResponseTokens
131 |         ? Math.min(
132 |             this.modelConfig.contextSize - maxPromptTokens,
133 |             finalRequestOptions.maximumResponseTokens,
134 |           )
135 |         : undefined;
136 |     if (
137 |       finalRequestOptions.maximumResponseTokens &&
138 |       !this.modelConfig.contextSize
139 |     ) {
140 |       console.warn(
141 |         'maximumResponseTokens option ignored, please set contextSize in ModelConfig so the parameter can be calculated safely',
142 |       );
143 |     }
144 | 
145 |     let completion = '';
146 |     let toolCall: ChatRequestToolCall | undefined;
147 |     let usage: CompletionUsage | undefined;
148 |     const completionBody: ChatCompletionCreateParamsBase = {
149 |       model: DefaultOpenAIModel,
150 |       ...convertConfig(this.modelConfig),
151 |       max_tokens: maxTokens,
152 |       stop: finalRequestOptions.stop,
153 |       tools: finalRequestOptions.functions?.map((f) => ({
154 |         type: 'function',
155 |         function: f,
156 |       })),
157 |       tool_choice: finalRequestOptions.callFunction
158 |         ? {
159 |             type: 'function',
160 |             function: { name: finalRequestOptions.callFunction },
161 |           }
162 |         : finalRequestOptions.functions
163 |           ? 'auto'
164 |           : undefined,
165 |       messages: messages.map((m) =>
166 |         m.role === 'assistant'
167 |           ? {
168 |               role: 'assistant',
169 |               content: m.content ?? '',
170 |               tool_calls: m.toolCall ? [m.toolCall] : undefined,
171 |             }
172 |           : m.role === 'tool'
173 |             ? {
174 |                 role: 'tool',
175 |                 content: m.content ?? '',
176 |                 tool_call_id: m.toolCallId ?? '',
177 |               }
178 |             : { role: m.role, content: m.content ?? '' },
179 |       ),
180 |     };
181 |     const completionOptions = {
182 |       timeout: finalRequestOptions.timeout,
183 |       maxRetries: finalRequestOptions.retries,
184 |     };
185 | 
186 |     if (this.modelConfig.stream) {
187 |       const stream = await this.client.chat.completions.create(
188 |         { ...completionBody, stream: true },
189 |         completionOptions,
190 |       );
191 | 
192 |       // emit prefix since technically that's counted as part of the response
193 |       if (finalRequestOptions?.responsePrefix) {
194 |         finalRequestOptions?.events?.emit(
195 |           'data',
196 |           finalRequestOptions.responsePrefix,
197 |         );
198 |       }
199 | 
200 |       const toolCallStreamParts: Partial<ChatRequestToolCall>[] = [];
201 |       for await (const part of stream) {
202 |         const text = part.choices[0]?.delta?.content;
203 |         const call = part.choices[0]?.delta?.tool_calls?.[0] as Partial<
204 |           typeof toolCall
205 |         >;
206 |         if (text) {
207 |           debug.write(text);
208 |           completion += text;
209 |           finalRequestOptions?.events?.emit('data', text);
210 |         } else if (call) {
211 |           debug.write(
212 |             call.function
213 |               ? call.function.name
214 |                 ? `${call.function.name}: ${call.function.arguments}\n`
215 |                 : call.function.arguments
216 |               : call.id ?? '',
217 |           );
218 |           toolCallStreamParts.push(call);
219 |         }
220 |       }
221 | 
222 |       // finalize function call data from all parts from stream
223 |       if (toolCallStreamParts.length > 0) {
224 |         toolCall = toolCallStreamParts.reduce(
225 |           (prev, part) => ({
226 |             id: prev.id ?? part.id,
227 |             type: prev.type ?? part.type,
228 |             function: {
229 |               name: (prev.function?.name ?? '') + (part.function?.name ?? ''),
230 |               arguments:
231 |                 (prev.function?.arguments ?? '') +
232 |                 (part.function?.arguments ?? ''),
233 |             },
234 |           }),
235 |           {},
236 |         ) as ChatRequestToolCall;
237 |       }
238 | 
239 |       debug.write('\n[STREAM] response end\n');
240 |     } else {
241 |       const response = await this.client.chat.completions.create(
242 |         { ...completionBody, stream: false },
243 |         completionOptions,
244 |       );
245 |       completion = response.choices[0].message.content ?? '';
246 |       toolCall = response.choices[0].message.tool_calls?.[0];
247 |       usage = response.usage;
248 |       debug.log('🔽 completion received', completion);
249 |     }
250 | 
251 |     if (completion) {
252 |       const receivedMessage: ChatRequestMessage = {
253 |         role: 'assistant',
254 |         content: completion,
255 |       };
256 |       return {
257 |         message: receivedMessage,
258 |         content: completion,
259 |         respond: (message: string | ChatRequestMessage, opt) =>
260 |           this.chatCompletion(
261 |             [
262 |               ...messages,
263 |               receivedMessage,
264 |               typeof message === 'string'
265 |                 ? { role: 'user', content: message }
266 |                 : message,
267 |             ],
268 |             opt ?? requestOptions,
269 |           ),
270 |         usage: usage
271 |           ? {
272 |               totalTokens: usage.total_tokens,
273 |               promptTokens: usage.prompt_tokens,
274 |               completionTokens: usage.completion_tokens,
275 |             }
276 |           : undefined,
277 |       };
278 |     } else if (toolCall) {
279 |       const receivedMessage: ChatRequestMessage = {
280 |         role: 'assistant',
281 |         content: '', // explicitly put empty string, or api will complain it's required property
282 |         toolCall,
283 |       };
284 |       return {
285 |         message: receivedMessage,
286 |         toolCallId: toolCall.id,
287 |         name: toolCall.function.name,
288 |         arguments: parseUnsafeJson(toolCall.function.arguments),
289 |         respond: (message: string | ChatRequestMessage, opt) =>
290 |           this.chatCompletion(
291 |             [
292 |               ...messages,
293 |               receivedMessage,
294 |               // NOTE: all tool call messages must be followed up by a `tool` type message
295 |               typeof message === 'string'
296 |                 ? { role: 'tool', toolCallId: toolCall?.id, content: message }
297 |                 : message,
298 |             ],
299 |             opt ?? requestOptions,
300 |           ),
301 |         usage: usage
302 |           ? {
303 |               totalTokens: usage.total_tokens,
304 |               promptTokens: usage.prompt_tokens,
305 |               completionTokens: usage.completion_tokens,
306 |             }
307 |           : undefined,
308 |       };
309 |     } else {
310 |       throw new Error('Completion response malformed');
311 |     }
312 |   }
313 | 
314 |   async textCompletion(
315 |     prompt: string,
316 |     requestOptions = {} as Partial<ModelRequestOptions>,
317 |   ): Promise<ChatResponse> {
318 |     const messages: ChatRequestMessage[] = [{ role: 'user', content: prompt }];
319 |     return this.chatCompletion(messages, requestOptions);
320 |   }
321 | }
322 | 


--------------------------------------------------------------------------------
/src/models/tokenizer.ts:
--------------------------------------------------------------------------------
 1 | import tiktoken from 'js-tiktoken';
 2 | 
 3 | import { ModelFunction } from '../types';
 4 | 
 5 | const encoder = tiktoken.getEncoding('cl100k_base');
 6 | 
 7 | // NOTE: this is only accurate on OpenAI models, Anthropic uses a different tokenizer & format. But context windows are so large now anyways, it's probably good enough to just run an estimate.
 8 | export function getTikTokenTokensFromPrompt(
 9 |   promptOrMessages: string[],
10 |   functions?: ModelFunction[],
11 | ) {
12 |   let numTokens = 0;
13 | 
14 |   for (const message of promptOrMessages) {
15 |     numTokens += 5; // every message follows <im_start>{role/name}\n{content}<im_end>\n
16 |     numTokens += encoder.encode(message).length;
17 |   }
18 |   numTokens += 2; // every reply is primed with <im_start>assistant\n
19 | 
20 |   if (functions) {
21 |     for (const func of functions) {
22 |       numTokens += 5;
23 |       numTokens += encoder.encode(JSON.stringify(func)).length;
24 |     }
25 |     // estimate tokens needed to prime functions
26 |     numTokens += 20;
27 |   }
28 | 
29 |   return numTokens;
30 | }
31 | 


--------------------------------------------------------------------------------
/src/types.ts:
--------------------------------------------------------------------------------
  1 | import { ClientOptions as AnthropicClientOptions } from '@anthropic-ai/sdk';
  2 | import EventEmitter from 'events';
  3 | import { ClientOptions as GroqClientOptions } from 'groq-sdk';
  4 | import { ClientOptions as OpenAIClientOptions } from 'openai';
  5 | import { JsonValue } from 'type-fest';
  6 | 
  7 | export type GroqConfig = GroqClientOptions;
  8 | 
  9 | export type AnthropicConfig = AnthropicClientOptions;
 10 | 
 11 | export type OpenAIConfig = OpenAIClientOptions & {
 12 |   azureEndpoint?: string;
 13 |   azureDeployment?: string;
 14 |   azureApiVersion?: string;
 15 | };
 16 | 
 17 | export interface ModelConfig {
 18 |   model?: string;
 19 |   // set this to the total context size of the model, to enable automatic request chunking to avoid context overflows
 20 |   contextSize?: number;
 21 | 
 22 |   // max tokens to generate
 23 |   maxTokens?: number;
 24 |   temperature?: number;
 25 |   topP?: number;
 26 |   presencePenalty?: number;
 27 |   frequencyPenalty?: number;
 28 |   logitBias?: Record<string, number>;
 29 |   user?: string;
 30 | 
 31 |   // use stream mode for API response, the streamed tokens will be sent to `events in `ModelRequestOptions`
 32 |   // NOTE: this does NOT support functions
 33 |   stream?: boolean;
 34 | }
 35 | 
 36 | export type ModelFunction = {
 37 |   name: string;
 38 |   parameters: {
 39 |     [key: string]: any;
 40 |   };
 41 |   description?: string;
 42 | };
 43 | 
 44 | export type ModelRequestOptions = {
 45 |   systemMessage?: string | (() => string);
 46 | 
 47 |   // send a prefix to the model response so the model can continue generating from there, useful for steering the model towards certain output structures.
 48 |   // the response prefix WILL be appended to the model response.
 49 |   // for Anthropic's models ONLY
 50 |   responsePrefix?: string;
 51 | 
 52 |   // stop tokens to use
 53 |   stop?: string | string[];
 54 | 
 55 |   // function related parameters are for OpenAI's models ONLY
 56 |   functions?: ModelFunction[];
 57 |   // force the model to call the following function
 58 |   callFunction?: string;
 59 | 
 60 |   // the number of time to retry this request due to rate limit or recoverable API errors
 61 |   retries?: number;
 62 |   retryInterval?: number;
 63 |   timeout?: number;
 64 | 
 65 |   // the minimum amount of tokens to allocate for the response. if the request is predicted to not have enough tokens, it will automatically throw a 'TokenError' without sending the request
 66 |   minimumResponseTokens?: number;
 67 | 
 68 |   // the maximum amount of tokens to use for response
 69 |   // NOTE: in OpenAI models, setting this option also requires contextSize in ModelConfig to be set
 70 |   maximumResponseTokens?: number;
 71 | 
 72 |   // pass in an event emitter to receive message stream events
 73 |   events?: EventEmitter;
 74 | };
 75 | 
 76 | export type ChatRequestRole = 'system' | 'user' | 'assistant' | 'tool';
 77 | 
 78 | export interface ChatRequestMessage {
 79 |   role: ChatRequestRole;
 80 |   content?: string;
 81 |   toolCall?: ChatRequestToolCall; // used to respond to `assistant` type messages
 82 |   toolCallId?: string; // used to respond to `tool` type messages
 83 | }
 84 | 
 85 | export interface ChatRequestToolCall {
 86 |   id: string;
 87 |   type: 'function';
 88 |   function: {
 89 |     name: string;
 90 |     arguments: string;
 91 |   };
 92 | }
 93 | 
 94 | export type ChatResponse = {
 95 |   // the raw message object that was received
 96 |   message: ChatRequestMessage;
 97 | 
 98 |   content?: string;
 99 | 
100 |   // name and argument used for function reponse
101 |   toolCallId?: string;
102 |   name?: string;
103 |   arguments?: JsonValue;
104 | 
105 |   usage?: {
106 |     promptTokens: number;
107 |     completionTokens: number;
108 |     totalTokens: number;
109 |   };
110 | 
111 |   // function to send another message in the same chat, this will automatically reuse all existing settings, and append a new message to the messages array
112 |   respond: (
113 |     message: string | ChatRequestMessage,
114 |     opt?: ModelRequestOptions,
115 |   ) => Promise<ChatResponse>;
116 | };
117 | 


--------------------------------------------------------------------------------
/src/utils.ts:
--------------------------------------------------------------------------------
 1 | import { debug as mDebug } from 'debug';
 2 | import jsonic from 'jsonic';
 3 | import { jsonrepair } from 'jsonrepair';
 4 | import { JsonValue } from 'type-fest';
 5 | 
 6 | const error = mDebug('llm-api:error');
 7 | const log = mDebug('llm-api:log');
 8 | // eslint-disable-next-line no-console
 9 | log.log = console.log.bind(console);
10 | 
11 | export const debug = {
12 |   error,
13 |   log,
14 |   write: (t: string) =>
15 |     process.env.DEBUG &&
16 |     (process.env.DEBUG === '*' || 'llm-api:log'.match(process.env.DEBUG)) &&
17 |     process.stdout &&
18 |     process.stdout.write(t),
19 | };
20 | 
21 | export function sleep(delay: number) {
22 |   return new Promise((resolve) => {
23 |     setTimeout(resolve, delay);
24 |   });
25 | }
26 | 
27 | export function parseUnsafeJson(json: string): JsonValue {
28 |   return jsonic(jsonrepair(json));
29 | }
30 | 
31 | export type MaybePromise<T> = Promise<T> | T;
32 | 


--------------------------------------------------------------------------------
/tsconfig.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "compilerOptions": {
 3 |     "target": "esnext",
 4 |     "moduleResolution": "node",
 5 |     "lib": ["dom", "esnext"],
 6 |     "allowJs": true,
 7 |     "alwaysStrict": true,
 8 |     "skipLibCheck": true,
 9 |     "esModuleInterop": true,
10 |     "allowSyntheticDefaultImports": true,
11 |     "strict": true,
12 |     "forceConsistentCasingInFileNames": true,
13 |     "resolveJsonModule": true,
14 |     "noFallthroughCasesInSwitch": true,
15 |     "noUnusedLocals": true,
16 |     "noUnusedParameters": true,
17 |     "noImplicitAny": true,
18 |     "noImplicitThis": true,
19 |     "strictNullChecks": true,
20 | 
21 |     // compile settings
22 |     "module": "commonjs",
23 |     "declaration": true,
24 |     "declarationMap": true,
25 |     "sourceMap": false,
26 |     "removeComments": true,
27 |     "outDir": "dist"
28 |   },
29 |   "include": ["src/**/*", "playground.ts"],
30 |   "exclude": ["dist", "node_modules", "**/__mocks__/*"],
31 |   "ts-node": {
32 |     "compilerOptions": { "module": "commonjs" }
33 |   }
34 | }
35 | 


--------------------------------------------------------------------------------