├── .eslintrc.json
├── .github
├── dependabot.yml
└── workflows
│ ├── test-and-release.yml
│ └── upgrade-dependencies.yml
├── .gitignore
├── .ncurc.json
├── .npmrc
├── .prettierignore
├── LICENSE
├── README.md
├── api.oas.yml
├── bin
├── llmatic
└── llmatic.js
├── examples
├── custom-adapter
│ ├── custom-llm-adapter.ts
│ ├── llmatic.config.json
│ ├── package-lock.json
│ └── package.json
├── node-langchain
│ ├── index.mjs
│ ├── package-lock.json
│ └── package.json
└── python-langchain
│ ├── main.py
│ └── requirements.txt
├── media
├── chatbot-ui.gif
└── logo.png
├── package-lock.json
├── package.json
├── public
└── index.html
├── scripts
├── fetch-openai-oas.mjs
└── generate-types.mjs
├── src
├── cli
│ ├── cli-utils.test.ts
│ ├── cli-utils.ts
│ ├── common-options.ts
│ ├── llmatic-config.js
│ ├── llmatic-start.js
│ └── llmatic.ts
├── container.ts
├── default-llm-adapter.ts
├── fastify-server-factory.ts
├── handlers
│ ├── create-chat-completion.handler.test.ts
│ ├── create-chat-completion.handler.ts
│ ├── create-completion.handler.test.ts
│ ├── create-completion.handler.ts
│ ├── create-embedding.handler.test.ts
│ ├── create-embedding.handler.ts
│ ├── list-models.handler.test.ts
│ ├── list-models.handler.ts
│ ├── retrieve-model.handler.test.ts
│ └── retrieve-model.handler.ts
├── llama-node-core-llm-adapter.ts
├── llm-adapter.ts
├── operation-handler.ts
├── sse-helper.ts
├── test-support
│ └── test-container.ts
└── types
│ ├── create-chat-completion.ts
│ ├── create-completion.ts
│ ├── create-embedding.ts
│ ├── list-models.ts
│ └── retrieve-model.ts
└── tsconfig.json
/.eslintrc.json:
--------------------------------------------------------------------------------
1 | {
2 | "extends": ["plugin:unicorn/recommended", "xo", "xo-typescript", "prettier"],
3 | "rules": {
4 | "eqeqeq": ["error", "smart"],
5 | "no-eq-null": "off",
6 | "unicorn/no-null": "off",
7 | "unicorn/prefer-event-target": "off"
8 | },
9 | "ignorePatterns": [
10 | "bin/llmatic",
11 | "scripts/*.mjs",
12 | "src/cli/*.js",
13 | "src/types/*.ts",
14 | "examples/**",
15 | "dist/**"
16 | ]
17 | }
18 |
--------------------------------------------------------------------------------
/.github/dependabot.yml:
--------------------------------------------------------------------------------
1 | version: 2
2 | updates:
3 | - package-ecosystem: "github-actions"
4 | directory: "/"
5 | schedule:
6 | interval: "weekly"
7 | open-pull-requests-limit: 10
8 |
9 | - package-ecosystem: "npm"
10 | directory: "/"
11 | schedule:
12 | interval: "daily"
13 | open-pull-requests-limit: 10
14 |
--------------------------------------------------------------------------------
/.github/workflows/test-and-release.yml:
--------------------------------------------------------------------------------
1 | name: test-and-release
2 |
3 | on:
4 | push:
5 | branches:
6 | - master
7 | paths-ignore:
8 | - "examples/**"
9 | - "media/**"
10 | - "scripts/**"
11 | - "LICENSE"
12 | pull_request:
13 | paths-ignore:
14 | - "examples/**"
15 | - "media/**"
16 | - "scripts/**"
17 | - "LICENSE"
18 |
19 | jobs:
20 | test:
21 | runs-on: ubuntu-latest
22 | strategy:
23 | matrix:
24 | node: [18, 20, 21]
25 | fail-fast: true
26 | steps:
27 | - uses: actions/checkout@main
28 | - uses: actions/setup-node@main
29 | with:
30 | node-version: ${{ matrix.node }}
31 | - name: Cache node modules
32 | uses: actions/cache@main
33 | env:
34 | cache-name: cache-node-${{ matrix.node }}-modules
35 | with:
36 | path: ~/.npm
37 | key: ${{ runner.os }}-build-${{ env.cache-name }}-${{ hashFiles('**/package-lock.json') }}
38 | restore-keys: |
39 | ${{ runner.os }}-build-${{ env.cache-name }}-
40 | ${{ runner.os }}-build-
41 | ${{ runner.os }}-
42 | - name: Install
43 | run: npm ci
44 | - name: Tests
45 | run: npm test
46 | version-check:
47 | runs-on: ubuntu-latest
48 | needs: test
49 | if: github.ref == 'refs/heads/master'
50 | outputs:
51 | changed: ${{ steps.check.outputs.changed }}
52 | version: ${{ steps.check.outputs.version }}
53 | steps:
54 | - uses: actions/checkout@main
55 | - id: check
56 | uses: EndBug/version-check@v2
57 | with:
58 | diff-search: true
59 | token: ${{ secrets.GITHUB_TOKEN }}
60 | release:
61 | runs-on: ubuntu-latest
62 | needs: version-check
63 | if: needs.version-check.outputs.changed == 'true'
64 | steps:
65 | - uses: actions/checkout@main
66 | # https://github.com/rickstaa/action-create-tag/issues/10
67 | - name: "Change perms on GITHUB_WORKSPACE"
68 | run: |
69 | sudo chown -R root:root $GITHUB_WORKSPACE
70 | sudo chmod -R 0777 $GITHUB_WORKSPACE
71 | - uses: rickstaa/action-create-tag@main
72 | with:
73 | tag: v${{ needs.version-check.outputs.version }}
74 | message: v${{ needs.version-check.outputs.version }}
75 | - uses: ncipollo/release-action@main
76 | with:
77 | name: v${{ needs.version-check.outputs.version }}
78 | tag: v${{ needs.version-check.outputs.version }}
79 | token: ${{ secrets.GITHUB_TOKEN }}
80 | npm-publish:
81 | runs-on: ubuntu-latest
82 | needs: release
83 | if: needs.version-check.outputs.changed == 'true'
84 | steps:
85 | - uses: actions/checkout@main
86 | - uses: actions/setup-node@main
87 | with:
88 | node-version: 18
89 | registry-url: "https://registry.npmjs.org"
90 | - uses: actions/cache@main
91 | env:
92 | cache-name: cache-node-${{ matrix.node }}-modules
93 | with:
94 | path: ~/.npm
95 | key: ${{ runner.os }}-build-${{ env.cache-name }}-${{ hashFiles('**/package-lock.json') }}
96 | restore-keys: |
97 | ${{ runner.os }}-build-${{ env.cache-name }}-
98 | ${{ runner.os }}-build-
99 | ${{ runner.os }}-
100 | - name: Install
101 | run: |
102 | set -e
103 | npm ci
104 | - name: Publish
105 | run: npm publish
106 | env:
107 | NODE_AUTH_TOKEN: ${{ secrets.NPM_TOKEN }}
108 |
--------------------------------------------------------------------------------
/.github/workflows/upgrade-dependencies.yml:
--------------------------------------------------------------------------------
1 | name: upgrade-dependencies
2 |
3 | on:
4 | workflow_dispatch: {}
5 | schedule:
6 | - cron: "0 0 * * *"
7 |
8 | jobs:
9 | upgrade-deps:
10 | runs-on: ubuntu-latest
11 | steps:
12 | - uses: actions/checkout@main
13 | - uses: actions/setup-node@main
14 | with:
15 | node-version: 20
16 | - name: Cache node modules
17 | uses: actions/cache@main
18 | env:
19 | cache-name: cache-node-${{ matrix.node }}-modules
20 | with:
21 | path: ~/.npm
22 | key: ${{ runner.os }}-build-${{ env.cache-name }}-${{ hashFiles('**/package-lock.json') }}
23 | restore-keys: |
24 | ${{ runner.os }}-build-${{ env.cache-name }}-
25 | ${{ runner.os }}-build-
26 | ${{ runner.os }}-
27 | - name: Setup Git
28 | run: |
29 | git config --local user.name "github-actions[bot]"
30 | git config --local user.email "github-actions[bot]@users.noreply.github.com"
31 | git config --local pull.rebase true
32 |
33 | # work around "insufficient permission for adding an object to repository database .git/object" issue
34 | sudo chmod -R ugo+rwX .git
35 | - name: Check for updates
36 | id: check-updates
37 | run: |
38 | set -ex
39 | npm ci &> /dev/null
40 | npx ncu
41 | npm i &> /dev/null
42 | npm audit fix --quiet --no-progress --no-fund || true
43 | npm run fix &> /dev/null || true
44 |
45 | git add -u
46 | git update-index --refresh
47 | if ! git diff-index --quiet HEAD --; then
48 | echo "is-changed=1" >> $GITHUB_OUTPUT
49 | fi
50 | - name: Create a PR
51 | if: steps.check-updates.outputs.is-changed
52 | id: create-pr
53 | run: |
54 | npm version patch
55 | PKG_VERSION="$(node -e 'process.stdout.write(require("./package.json").version)')"
56 |
57 | REMOTE_REPO="https://${GITHUB_ACTOR}:${GITHUB_TOKEN}@github.com/${GITHUB_REPOSITORY}.git"
58 | CURRENT_BRANCH="$(git branch --show-current)"
59 | NEW_BRANCH="v${PKG_VERSION}"
60 |
61 | if git ls-remote --exit-code --heads "${REMOTE_REPO}" "${NEW_BRANCH}" > /dev/null; then
62 | # PR already exists
63 | exit 0
64 | fi
65 |
66 | git commit -a -m "${PKG_VERSION}" --no-verify
67 | git pull "${REMOTE_REPO}" "${CURRENT_BRANCH}"
68 | git checkout -b "${NEW_BRANCH}"
69 | git push "${REMOTE_REPO}" "HEAD:${NEW_BRANCH}"
70 |
71 | PR_URL=$(gh pr create -B "${CURRENT_BRANCH}" -H "${NEW_BRANCH}" -f)
72 | echo "pr-url=${PR_URL}" >> $GITHUB_OUTPUT
73 | env:
74 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
75 | - name: Approve and merge the PR
76 | if: steps.create-pr.outputs.pr-url
77 | run: |
78 | gh pr review --approve "${PR_URL}"
79 | gh pr merge --auto --delete-branch --rebase "${PR_URL}"
80 | env:
81 | GITHUB_TOKEN: ${{ secrets.GH_PAT }}
82 | PR_URL: ${{ steps.create-pr.outputs.pr-url }}
83 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # Created by https://www.toptal.com/developers/gitignore/api/macos,windows,linux,visualstudiocode,vim,node
2 | # Edit at https://www.toptal.com/developers/gitignore?templates=macos,windows,linux,visualstudiocode,vim,node
3 |
4 | ### Linux ###
5 | *~
6 |
7 | # temporary files which can be created if a process still has a handle open of a deleted file
8 | .fuse_hidden*
9 |
10 | # KDE directory preferences
11 | .directory
12 |
13 | # Linux trash folder which might appear on any partition or disk
14 | .Trash-*
15 |
16 | # .nfs files are created when an open file is removed but is still being accessed
17 | .nfs*
18 |
19 | ### macOS ###
20 | # General
21 | .DS_Store
22 | .AppleDouble
23 | .LSOverride
24 |
25 | # Icon must end with two \r
26 | Icon
27 |
28 |
29 | # Thumbnails
30 | ._*
31 |
32 | # Files that might appear in the root of a volume
33 | .DocumentRevisions-V100
34 | .fseventsd
35 | .Spotlight-V100
36 | .TemporaryItems
37 | .Trashes
38 | .VolumeIcon.icns
39 | .com.apple.timemachine.donotpresent
40 |
41 | # Directories potentially created on remote AFP share
42 | .AppleDB
43 | .AppleDesktop
44 | Network Trash Folder
45 | Temporary Items
46 | .apdisk
47 |
48 | ### macOS Patch ###
49 | # iCloud generated files
50 | *.icloud
51 |
52 | ### Node ###
53 | # Logs
54 | logs
55 | *.log
56 | npm-debug.log*
57 | yarn-debug.log*
58 | yarn-error.log*
59 | lerna-debug.log*
60 | .pnpm-debug.log*
61 |
62 | # Diagnostic reports (https://nodejs.org/api/report.html)
63 | report.[0-9]*.[0-9]*.[0-9]*.[0-9]*.json
64 |
65 | # Runtime data
66 | pids
67 | *.pid
68 | *.seed
69 | *.pid.lock
70 |
71 | # Directory for instrumented libs generated by jscoverage/JSCover
72 | lib-cov
73 |
74 | # Coverage directory used by tools like istanbul
75 | coverage
76 | *.lcov
77 |
78 | # nyc test coverage
79 | .nyc_output
80 |
81 | # Grunt intermediate storage (https://gruntjs.com/creating-plugins#storing-task-files)
82 | .grunt
83 |
84 | # Bower dependency directory (https://bower.io/)
85 | bower_components
86 |
87 | # node-waf configuration
88 | .lock-wscript
89 |
90 | # Compiled binary addons (https://nodejs.org/api/addons.html)
91 | build/Release
92 |
93 | # Dependency directories
94 | node_modules/
95 | jspm_packages/
96 |
97 | # Snowpack dependency directory (https://snowpack.dev/)
98 | web_modules/
99 |
100 | # TypeScript cache
101 | *.tsbuildinfo
102 |
103 | # Optional npm cache directory
104 | .npm
105 |
106 | # Optional eslint cache
107 | .eslintcache
108 |
109 | # Optional stylelint cache
110 | .stylelintcache
111 |
112 | # Microbundle cache
113 | .rpt2_cache/
114 | .rts2_cache_cjs/
115 | .rts2_cache_es/
116 | .rts2_cache_umd/
117 |
118 | # Optional REPL history
119 | .node_repl_history
120 |
121 | # Output of 'npm pack'
122 | *.tgz
123 |
124 | # Yarn Integrity file
125 | .yarn-integrity
126 |
127 | # dotenv environment variable files
128 | .env
129 | .env.development.local
130 | .env.test.local
131 | .env.production.local
132 | .env.local
133 |
134 | # parcel-bundler cache (https://parceljs.org/)
135 | .cache
136 | .parcel-cache
137 |
138 | # Next.js build output
139 | .next
140 | out
141 |
142 | # Nuxt.js build / generate output
143 | .nuxt
144 | dist
145 |
146 | # Gatsby files
147 | .cache/
148 | # Comment in the public line in if your project uses Gatsby and not Next.js
149 | # https://nextjs.org/blog/next-9-1#public-directory-support
150 | # public
151 |
152 | # vuepress build output
153 | .vuepress/dist
154 |
155 | # vuepress v2.x temp and cache directory
156 | .temp
157 |
158 | # Docusaurus cache and generated files
159 | .docusaurus
160 |
161 | # Serverless directories
162 | .serverless/
163 |
164 | # FuseBox cache
165 | .fusebox/
166 |
167 | # DynamoDB Local files
168 | .dynamodb/
169 |
170 | # TernJS port file
171 | .tern-port
172 |
173 | # Stores VSCode versions used for testing VSCode extensions
174 | .vscode-test
175 |
176 | # yarn v2
177 | .yarn/cache
178 | .yarn/unplugged
179 | .yarn/build-state.yml
180 | .yarn/install-state.gz
181 | .pnp.*
182 |
183 | ### Node Patch ###
184 | # Serverless Webpack directories
185 | .webpack/
186 |
187 | # Optional stylelint cache
188 |
189 | # SvelteKit build / generate output
190 | .svelte-kit
191 |
192 | ### Vim ###
193 | # Swap
194 | [._]*.s[a-v][a-z]
195 | !*.svg # comment out if you don't need vector files
196 | [._]*.sw[a-p]
197 | [._]s[a-rt-v][a-z]
198 | [._]ss[a-gi-z]
199 | [._]sw[a-p]
200 |
201 | # Session
202 | Session.vim
203 | Sessionx.vim
204 |
205 | # Temporary
206 | .netrwhist
207 | # Auto-generated tag files
208 | tags
209 | # Persistent undo
210 | [._]*.un~
211 |
212 | ### VisualStudioCode ###
213 | .vscode/*
214 | !.vscode/settings.json
215 | !.vscode/tasks.json
216 | !.vscode/launch.json
217 | !.vscode/extensions.json
218 | !.vscode/*.code-snippets
219 |
220 | # Local History for Visual Studio Code
221 | .history/
222 |
223 | # Built Visual Studio Code Extensions
224 | *.vsix
225 |
226 | ### VisualStudioCode Patch ###
227 | # Ignore all local history of files
228 | .history
229 | .ionide
230 |
231 | ### Windows ###
232 | # Windows thumbnail cache files
233 | Thumbs.db
234 | Thumbs.db:encryptable
235 | ehthumbs.db
236 | ehthumbs_vista.db
237 |
238 | # Dump file
239 | *.stackdump
240 |
241 | # Folder config file
242 | [Dd]esktop.ini
243 |
244 | # Recycle Bin used on file shares
245 | $RECYCLE.BIN/
246 |
247 | # Windows Installer files
248 | *.cab
249 | *.msi
250 | *.msix
251 | *.msm
252 | *.msp
253 |
254 | # Windows shortcuts
255 | *.lnk
256 |
257 | # End of https://www.toptal.com/developers/gitignore/api/macos,windows,linux,visualstudiocode,vim,node
258 | /models/
259 | .python-version
260 | /llmatic.config.json
--------------------------------------------------------------------------------
/.ncurc.json:
--------------------------------------------------------------------------------
1 | {
2 | "upgrade": true
3 | }
4 |
--------------------------------------------------------------------------------
/.npmrc:
--------------------------------------------------------------------------------
1 | git-tag-version=false
2 |
--------------------------------------------------------------------------------
/.prettierignore:
--------------------------------------------------------------------------------
1 | dist/
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) Fardjad Davari
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 |
2 |
3 | # LLMatic
4 |
5 |

6 |
7 | Use self-hosted LLMs with an OpenAI compatible API
8 |
9 |
14 |
15 |
16 |
17 |
18 |
19 | ## Project status
20 |
21 | This project was the result of my curiousity and experimentation with OpenAI's API and I enjoyed building it. It is certainly not the first nor the last project of its kind. Given my limited time and resources, I'd like to pause the development of this project for now. I'll list some other similar projects below that can be used as alternatives:
22 |
23 | 1. [Ollama](https://github.com/ollama/ollama/blob/main/docs/openai.md)
24 | 2. [LLaMA.cpp HTTP Server](https://github.com/ggerganov/llama.cpp/tree/master/examples/server)
25 | 3. [GPT4All Chat Server Mode](https://docs.gpt4all.io/gpt4all_chat.html#gpt4all-chat-server-mode)
26 | 4. [FastChat](https://github.com/lm-sys/FastChat/blob/main/docs/openai_api.md)
27 |
28 |
29 | ## Synopsis
30 |
31 | LLMatic can be used as a drop-in replacement for OpenAI's API [v1.2.0](https://github.com/openai/openai-openapi/blob/88f221442879061d9970ed453a65b973d226f15d/openapi.yaml) (see the
32 | supported endpoints). By default, it uses [llama-node](https://github.com/Atome-FE/llama-node)
33 | with [llama.cpp](https://github.com/ggerganov/llama.cpp) backend to run the models locally. However, you can easily create [your own adapter](#custom-adapters) to use any other model or service.
34 |
35 | Supported endpoints:
36 |
37 | - [x] /completions (stream and non-stream)
38 | - [x] /chat/completions (stream and non-stream)
39 | - [x] /embeddings
40 | - [x] /models
41 |
42 | ## How to use
43 |
44 | If you prefer a video tutorial, you can watch the following video for step-by-step instructions on how to use this project:
45 |
46 |
47 |
48 |
49 |
50 | ### Requirements
51 |
52 | - Node.js >=18.16
53 | - Unix-based OS (Linux, macOS, WSL, etc.)
54 |
55 | ### Installation
56 |
57 | Create an empty directory and run `npm init`:
58 |
59 | ```bash
60 | export LLMATIC_PROJECT_DIR=my-llmatic-project
61 | mkdir $LLMATIC_PROJECT_DIR
62 | cd $LLMATIC_PROJECT_DIR
63 | npm init -y
64 | ```
65 |
66 | Install and configure LLMatic:
67 |
68 | ```bash
69 | npm add llmatic
70 | # Download a model and generate a config file
71 | npx llmatic config
72 | ```
73 |
74 | Adjust the config file to your needs and start the server:
75 |
76 | ```bash
77 | npx llmatic start
78 | ```
79 |
80 | You can run `llmatic --help` to see all available commands.
81 |
82 | ### Usage with [chatbot-ui](https://github.com/mckaywrigley/chatbot-ui)
83 |
84 | Clone the repo and install the dependencies:
85 |
86 | ```bash
87 | git clone https://github.com/mckaywrigley/chatbot-ui.git
88 | cd chatbot-ui
89 | npm install
90 | ```
91 |
92 | Create a `.env.local` file:
93 |
94 | ```bash
95 | cat < .env.local
96 | # For now, this is ignored by LLMatic
97 | DEFAULT_MODEL=Ignored
98 |
99 | NEXT_PUBLIC_DEFAULT_SYSTEM_PROMPT=A chat between a curious human (user) and an artificial intelligence assistant (assistant). The assistant gives helpful, detailed, and polite answers to the human's questions.
100 |
101 | user: Hello!
102 | assistant: Hello! How may I help you today?
103 | user: Please tell me the largest city in Europe.
104 | assistant: Sure. The largest city in Europe is Moscow, the capital of Russia.
105 |
106 | OPENAI_API_KEY=ANYTHING_WILL_DO
107 | OPENAI_API_HOST=http://localhost:3000
108 |
109 | GOOGLE_API_KEY=YOUR_API_KEY
110 | GOOGLE_CSE_ID=YOUR_ENGINE_ID
111 | EOF
112 | ```
113 |
114 | Run the server:
115 |
116 | ```bash
117 | npm run dev -- --port 3001
118 | ```
119 |
120 | Demo:
121 |
122 | 
123 |
124 | ### Usage with [LangChain](https://langchain.com)
125 |
126 | There are two examples of using LLMatic with LangChain in the
127 | [`examples`](/examples) directory.
128 |
129 | To run the Node.js example, first install the dependencies:
130 |
131 | ```bash
132 | cd examples/node-langchain
133 | npm install
134 | ```
135 |
136 | Then run the main script:
137 |
138 | ```bash
139 | npm start
140 | ```
141 |
142 |
143 | Expand this to see the sample output
144 |
145 | ```
146 | [chain/start] [1:chain:llm_chain] Entering Chain run with input: {
147 | "humanInput": "Rememeber that this is a demo of LLMatic with LangChain.",
148 | "history": ""
149 | }
150 | [llm/start] [1:chain:llm_chain > 2:llm:openai] Entering LLM run with input: {
151 | "prompts": [
152 | "A chat between a curious user and an artificial intelligence assistant.\nThe assistant gives helpful, detailed, and polite answers to the user's questions.\n\n\nHuman: Rememeber that this is a demo of LLMatic with LangChain.\nAI:"
153 | ]
154 | }
155 | [llm/end] [1:chain:llm_chain > 2:llm:openai] [5.92s] Exiting LLM run with output: {
156 | "generations": [
157 | [
158 | {
159 | "text": " Yes, I understand. I am ready to assist you with your queries.",
160 | "generationInfo": {
161 | "finishReason": "stop",
162 | "logprobs": null
163 | }
164 | }
165 | ]
166 | ],
167 | "llmOutput": {
168 | "tokenUsage": {}
169 | }
170 | }
171 | [chain/end] [1:chain:llm_chain] [5.92s] Exiting Chain run with output: {
172 | "text": " Yes, I understand. I am ready to assist you with your queries."
173 | }
174 | [chain/start] [1:chain:llm_chain] Entering Chain run with input: {
175 | "humanInput": "What did I ask you to remember?",
176 | "history": "Human: Rememeber that this is a demo of LLMatic with LangChain.\nAI: Yes, I understand. I am ready to assist you with your queries."
177 | }
178 | [llm/start] [1:chain:llm_chain > 2:llm:openai] Entering LLM run with input: {
179 | "prompts": [
180 | "A chat between a curious user and an artificial intelligence assistant.\nThe assistant gives helpful, detailed, and polite answers to the user's questions.\n\nHuman: Rememeber that this is a demo of LLMatic with LangChain.\nAI: Yes, I understand. I am ready to assist you with your queries.\nHuman: What did I ask you to remember?\nAI:"
181 | ]
182 | }
183 | [llm/end] [1:chain:llm_chain > 2:llm:openai] [6.51s] Exiting LLM run with output: {
184 | "generations": [
185 | [
186 | {
187 | "text": " You asked me to remember that this is a demo of LLMatic with LangChain.",
188 | "generationInfo": {
189 | "finishReason": "stop",
190 | "logprobs": null
191 | }
192 | }
193 | ]
194 | ],
195 | "llmOutput": {
196 | "tokenUsage": {}
197 | }
198 | }
199 | [chain/end] [1:chain:llm_chain] [6.51s] Exiting Chain run with output: {
200 | "text": " You asked me to remember that this is a demo of LLMatic with LangChain."
201 | }
202 | ```
203 |
204 |
205 |
206 |
207 |
208 | To run the Python example, first install the dependencies:
209 |
210 | ```bash
211 | cd examples/python-langchain
212 | pip3 install -r requirements.txt
213 | ```
214 |
215 | Then run the main script:
216 |
217 | ```bash
218 | python3 main.py
219 | ```
220 |
221 |
222 | Expand this to see the sample output
223 |
224 | ```
225 | > Entering new LLMChain chain...
226 | Prompt after formatting:
227 | A chat between a curious user and an artificial intelligence assistant.
228 | The assistant gives helpful, detailed, and polite answers to the user's questions.
229 |
230 |
231 | Human: Rememeber that this is a demo of LLMatic with LangChain.
232 | AI:
233 |
234 | > Finished chain.
235 | Yes, I understand. I am ready to assist you with your queries.
236 |
237 |
238 | > Entering new LLMChain chain...
239 | Prompt after formatting:
240 | A chat between a curious user and an artificial intelligence assistant.
241 | The assistant gives helpful, detailed, and polite answers to the user's questions.
242 |
243 | Human: Rememeber that this is a demo of LLMatic with LangChain.
244 | AI: Yes, I understand. I am ready to assist you with your queries.
245 | Human: What did I ask you to remember?
246 | AI:
247 |
248 | > Finished chain.
249 | You asked me to remember that this is a demo of LLMatic with LangChain.
250 | ```
251 |
252 |
253 |
254 | ## Custom Adapters
255 |
256 | LLMatic is designed to be easily extensible. You can create your own adapters by extending the [`LlmAdapter`](/src/llm-adapter.ts) class. See [`examples/custom-adapter`](/examples/custom-adapter) for an example.
257 |
258 | To start llmatic with a custom adapter, use the `--llm-adapter` flag:
259 |
260 | ```bash
261 | llmatic start --llm-adapter ./custom-llm-adapter.ts
262 | ```
263 |
--------------------------------------------------------------------------------
/bin/llmatic:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env bash
2 |
3 | PREVIOUS_DIRECTORY="$(pwd)"
4 | BIN_DIRECTORY="$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )"
5 | cd "$PREVIOUS_DIRECTORY"
6 |
7 | exec "$BIN_DIRECTORY/llmatic.js" "$@"
--------------------------------------------------------------------------------
/bin/llmatic.js:
--------------------------------------------------------------------------------
1 | #!/usr/bin/env node
2 | import childProcess from "node:child_process";
3 | import { createRequire } from "node:module";
4 | import { fileURLToPath } from "node:url";
5 |
6 | const tsx =
7 | import.meta.resolve &&
8 | // This can be removed once node 18 is EOL
9 | import.meta.resolve.constructor.name !== "AsyncFunction"
10 | ? fileURLToPath(import.meta.resolve("tsx/cli"))
11 | : createRequire(import.meta.url).resolve("tsx/cli");
12 |
13 | childProcess.fork(tsx, [
14 | "--no-warnings",
15 | fileURLToPath(new URL("../src/cli/llmatic.ts", import.meta.url)),
16 | ...process.argv.slice(2),
17 | ]);
18 |
--------------------------------------------------------------------------------
/examples/custom-adapter/custom-llm-adapter.ts:
--------------------------------------------------------------------------------
1 | /* eslint-disable no-await-in-loop */
2 | import {
3 | LlmAdapter,
4 | type LlmAdapterCreateChatCompletionRequest,
5 | type LlmAdapterCreateChatCompletionResponse,
6 | type LlmAdapterCreateCompletionRequest,
7 | type LlmAdapterCreateCompletionResponse,
8 | type LlmAdapterCreateEmbeddingRequest,
9 | type LlmAdapterCreateEmbeddingResponse,
10 | type LlmAdapterModel,
11 | Role,
12 | } from "llmatic/llm-adapter";
13 |
14 | type AdapterConfig = Record;
15 |
16 | export default class CustomLlmAdapter extends LlmAdapter {
17 | #llmConfig: Record;
18 |
19 | constructor(llmConfig: AdapterConfig) {
20 | super();
21 |
22 | this.#llmConfig = { ...CustomLlmAdapter.defaultConfig, ...llmConfig };
23 | }
24 |
25 | async createChatCompletion(
26 | createChatCompletionRequest: LlmAdapterCreateChatCompletionRequest,
27 | abortSignal: AbortSignal,
28 | onData: (data: LlmAdapterCreateChatCompletionResponse) => void,
29 | ): Promise {
30 | const { messages, n } = createChatCompletionRequest;
31 |
32 | const count = messages.length * (n ?? 1);
33 | for (let tokenIndex = 0; tokenIndex < count; tokenIndex++) {
34 | onData({
35 | finishReason: "stop",
36 | index: 0,
37 | delta:
38 | tokenIndex === 0
39 | ? { role: Role.Assistant }
40 | : { content: `token ${tokenIndex}\n` },
41 | });
42 | }
43 | }
44 |
45 | async listModels(): Promise {
46 | return [
47 | {
48 | id: "fake-model",
49 | created: 0,
50 | ownedBy: "unknown",
51 | },
52 | ];
53 | }
54 |
55 | async createEmbedding({
56 | model,
57 | input,
58 | }: LlmAdapterCreateEmbeddingRequest): Promise {
59 | return [0];
60 | }
61 |
62 | async createCompletion(
63 | createCompletionRequest: LlmAdapterCreateCompletionRequest,
64 | abortSignal: AbortSignal,
65 | onData: (data: LlmAdapterCreateCompletionResponse) => void,
66 | ): Promise {
67 | const { prompt, n } = createCompletionRequest;
68 |
69 | const count = prompt.length * (n ?? 1);
70 | for (let index = 0; index < count; index++) {
71 | onData({
72 | finishReason: "stop",
73 | index,
74 | text: `token ${index}`,
75 | });
76 | }
77 | }
78 |
79 | static get defaultConfig() {
80 | return {};
81 | }
82 | }
83 |
--------------------------------------------------------------------------------
/examples/custom-adapter/llmatic.config.json:
--------------------------------------------------------------------------------
1 | {}
2 |
--------------------------------------------------------------------------------
/examples/custom-adapter/package.json:
--------------------------------------------------------------------------------
1 | {
2 | "name": "llmatic-custom-adapter-example",
3 | "version": "0.0.0",
4 | "type": "module",
5 | "description": "LLMatic custom adapter example",
6 | "scripts": {
7 | "start": "llmatic start --llm-adapter ./custom-llm-adapter.ts"
8 | },
9 | "keywords": [],
10 | "author": "Fardjad Davari ",
11 | "license": "MIT",
12 | "dependencies": {
13 | "llmatic": "^0.4.4"
14 | }
15 | }
16 |
--------------------------------------------------------------------------------
/examples/node-langchain/index.mjs:
--------------------------------------------------------------------------------
1 | import { LLMChain } from "langchain/chains";
2 | import { OpenAI } from "langchain/llms/openai";
3 | import { BufferMemory } from "langchain/memory";
4 | import { PromptTemplate } from "langchain/prompts";
5 |
6 | const model = new OpenAI(
7 | {
8 | temperature: 0,
9 | openAIApiKey: "ANYTHING_WILL_DO",
10 | modelName: "Ignored",
11 | },
12 | {
13 | basePath: "http://localhost:3000/v1",
14 | },
15 | );
16 |
17 | const template = `A chat between a curious user and an artificial intelligence assistant.
18 | The assistant gives helpful, detailed, and polite answers to the user's questions.
19 |
20 | {history}
21 | Human: {humanInput}
22 | AI:`;
23 |
24 | const prompt = new PromptTemplate({
25 | inputVariables: ["history", "humanInput"],
26 | template,
27 | });
28 |
29 | const chatgptChain = new LLMChain({
30 | llm: model,
31 | prompt,
32 | verbose: true,
33 | memory: new BufferMemory(),
34 | });
35 |
36 | await chatgptChain.predict({
37 | humanInput: "Rememeber that this is a demo of LLMatic with LangChain.",
38 | });
39 | await chatgptChain.predict({
40 | humanInput: "What did I ask you to remember?",
41 | });
42 |
--------------------------------------------------------------------------------
/examples/node-langchain/package-lock.json:
--------------------------------------------------------------------------------
1 | {
2 | "name": "langchain-llmatic-example",
3 | "version": "0.0.0",
4 | "lockfileVersion": 3,
5 | "requires": true,
6 | "packages": {
7 | "": {
8 | "name": "langchain-llmatic-example",
9 | "version": "0.0.0",
10 | "license": "UNLICENSED",
11 | "dependencies": {
12 | "langchain": "^0.0.78"
13 | }
14 | },
15 | "node_modules/@anthropic-ai/sdk": {
16 | "version": "0.4.3",
17 | "resolved": "https://registry.npmjs.org/@anthropic-ai/sdk/-/sdk-0.4.3.tgz",
18 | "integrity": "sha512-SZrlXvjUUYT9rPmSzlTtmVk1OjVNpkCzILRluhiYwNcxXfQyvPJDi0CI6PyymygcgtqEF5EVqhKmC/PtPsNEIw==",
19 | "dependencies": {
20 | "@fortaine/fetch-event-source": "^3.0.6",
21 | "cross-fetch": "^3.1.5"
22 | }
23 | },
24 | "node_modules/@fortaine/fetch-event-source": {
25 | "version": "3.0.6",
26 | "resolved": "https://registry.npmjs.org/@fortaine/fetch-event-source/-/fetch-event-source-3.0.6.tgz",
27 | "integrity": "sha512-621GAuLMvKtyZQ3IA6nlDWhV1V/7PGOTNIGLUifxt0KzM+dZIweJ6F3XvQF3QnqeNfS1N7WQ0Kil1Di/lhChEw==",
28 | "engines": {
29 | "node": ">=16.15"
30 | }
31 | },
32 | "node_modules/@types/retry": {
33 | "version": "0.12.0",
34 | "resolved": "https://registry.npmjs.org/@types/retry/-/retry-0.12.0.tgz",
35 | "integrity": "sha512-wWKOClTTiizcZhXnPY4wikVAwmdYHp8q6DmC+EJUzAMsycb7HB32Kh9RN4+0gExjmPmZSAQjgURXIGATPegAvA=="
36 | },
37 | "node_modules/ansi-styles": {
38 | "version": "5.2.0",
39 | "resolved": "https://registry.npmjs.org/ansi-styles/-/ansi-styles-5.2.0.tgz",
40 | "integrity": "sha512-Cxwpt2SfTzTtXcfOlzGEee8O+c+MmUgGrNiBcXnuWxuFJHe6a5Hz7qwhwe5OgaSYI0IJvkLqWX1ASG+cJOkEiA==",
41 | "engines": {
42 | "node": ">=10"
43 | },
44 | "funding": {
45 | "url": "https://github.com/chalk/ansi-styles?sponsor=1"
46 | }
47 | },
48 | "node_modules/asynckit": {
49 | "version": "0.4.0",
50 | "resolved": "https://registry.npmjs.org/asynckit/-/asynckit-0.4.0.tgz",
51 | "integrity": "sha512-Oei9OH4tRh0YqU3GxhX79dM/mwVgvbZJaSNaRk+bshkj0S5cfHcgYakreBjrHwatXKbz+IoIdYLxrKim2MjW0Q=="
52 | },
53 | "node_modules/axios": {
54 | "version": "0.26.1",
55 | "resolved": "https://registry.npmjs.org/axios/-/axios-0.26.1.tgz",
56 | "integrity": "sha512-fPwcX4EvnSHuInCMItEhAGnaSEXRBjtzh9fOtsE6E1G6p7vl7edEeZe11QHf18+6+9gR5PbKV/sGKNaD8YaMeA==",
57 | "dependencies": {
58 | "follow-redirects": "^1.14.8"
59 | }
60 | },
61 | "node_modules/base64-js": {
62 | "version": "1.5.1",
63 | "resolved": "https://registry.npmjs.org/base64-js/-/base64-js-1.5.1.tgz",
64 | "integrity": "sha512-AKpaYlHn8t4SVbOHCy+b5+KKgvR4vrsD8vbvrbiQJps7fKDTkjkDry6ji0rUJjC0kzbNePLwzxq8iypo41qeWA==",
65 | "funding": [
66 | {
67 | "type": "github",
68 | "url": "https://github.com/sponsors/feross"
69 | },
70 | {
71 | "type": "patreon",
72 | "url": "https://www.patreon.com/feross"
73 | },
74 | {
75 | "type": "consulting",
76 | "url": "https://feross.org/support"
77 | }
78 | ]
79 | },
80 | "node_modules/binary-extensions": {
81 | "version": "2.2.0",
82 | "resolved": "https://registry.npmjs.org/binary-extensions/-/binary-extensions-2.2.0.tgz",
83 | "integrity": "sha512-jDctJ/IVQbZoJykoeHbhXpOlNBqGNcwXJKJog42E5HDPUwQTSdjCHdihjj0DlnheQ7blbT6dHOafNAiS8ooQKA==",
84 | "engines": {
85 | "node": ">=8"
86 | }
87 | },
88 | "node_modules/binary-search": {
89 | "version": "1.3.6",
90 | "resolved": "https://registry.npmjs.org/binary-search/-/binary-search-1.3.6.tgz",
91 | "integrity": "sha512-nbE1WxOTTrUWIfsfZ4aHGYu5DOuNkbxGokjV6Z2kxfJK3uaAb8zNK1muzOeipoLHZjInT4Br88BHpzevc681xA=="
92 | },
93 | "node_modules/combined-stream": {
94 | "version": "1.0.8",
95 | "resolved": "https://registry.npmjs.org/combined-stream/-/combined-stream-1.0.8.tgz",
96 | "integrity": "sha512-FQN4MRfuJeHf7cBbBMJFXhKSDq+2kAArBlmRBvcvFE5BB1HZKXtSFASDhdlz9zOYwxh8lDdnvmMOe/+5cdoEdg==",
97 | "dependencies": {
98 | "delayed-stream": "~1.0.0"
99 | },
100 | "engines": {
101 | "node": ">= 0.8"
102 | }
103 | },
104 | "node_modules/cross-fetch": {
105 | "version": "3.1.6",
106 | "resolved": "https://registry.npmjs.org/cross-fetch/-/cross-fetch-3.1.6.tgz",
107 | "integrity": "sha512-riRvo06crlE8HiqOwIpQhxwdOk4fOeR7FVM/wXoxchFEqMNUjvbs3bfo4OTgMEMHzppd4DxFBDbyySj8Cv781g==",
108 | "dependencies": {
109 | "node-fetch": "^2.6.11"
110 | }
111 | },
112 | "node_modules/delayed-stream": {
113 | "version": "1.0.0",
114 | "resolved": "https://registry.npmjs.org/delayed-stream/-/delayed-stream-1.0.0.tgz",
115 | "integrity": "sha512-ZySD7Nf91aLB0RxL4KGrKHBXl7Eds1DAmEdcoVawXnLD7SDhpNgtuII2aAkg7a7QS41jxPSZ17p4VdGnMHk3MQ==",
116 | "engines": {
117 | "node": ">=0.4.0"
118 | }
119 | },
120 | "node_modules/eventemitter3": {
121 | "version": "4.0.7",
122 | "resolved": "https://registry.npmjs.org/eventemitter3/-/eventemitter3-4.0.7.tgz",
123 | "integrity": "sha512-8guHBZCwKnFhYdHr2ysuRWErTwhoN2X8XELRlrRwpmfeY2jjuUN4taQMsULKUVo1K4DvZl+0pgfyoysHxvmvEw=="
124 | },
125 | "node_modules/expr-eval": {
126 | "version": "2.0.2",
127 | "resolved": "https://registry.npmjs.org/expr-eval/-/expr-eval-2.0.2.tgz",
128 | "integrity": "sha512-4EMSHGOPSwAfBiibw3ndnP0AvjDWLsMvGOvWEZ2F96IGk0bIVdjQisOHxReSkE13mHcfbuCiXw+G4y0zv6N8Eg=="
129 | },
130 | "node_modules/flat": {
131 | "version": "5.0.2",
132 | "resolved": "https://registry.npmjs.org/flat/-/flat-5.0.2.tgz",
133 | "integrity": "sha512-b6suED+5/3rTpUBdG1gupIl8MPFCAMA0QXwmljLhvCUKcUvdE4gWky9zpuGCcXHOsz4J9wPGNWq6OKpmIzz3hQ==",
134 | "bin": {
135 | "flat": "cli.js"
136 | }
137 | },
138 | "node_modules/follow-redirects": {
139 | "version": "1.15.2",
140 | "resolved": "https://registry.npmjs.org/follow-redirects/-/follow-redirects-1.15.2.tgz",
141 | "integrity": "sha512-VQLG33o04KaQ8uYi2tVNbdrWp1QWxNNea+nmIB4EVM28v0hmP17z7aG1+wAkNzVq4KeXTq3221ye5qTJP91JwA==",
142 | "funding": [
143 | {
144 | "type": "individual",
145 | "url": "https://github.com/sponsors/RubenVerborgh"
146 | }
147 | ],
148 | "engines": {
149 | "node": ">=4.0"
150 | },
151 | "peerDependenciesMeta": {
152 | "debug": {
153 | "optional": true
154 | }
155 | }
156 | },
157 | "node_modules/form-data": {
158 | "version": "4.0.0",
159 | "resolved": "https://registry.npmjs.org/form-data/-/form-data-4.0.0.tgz",
160 | "integrity": "sha512-ETEklSGi5t0QMZuiXoA/Q6vcnxcLQP5vdugSpuAyi6SVGi2clPPp+xgEhuMaHC+zGgn31Kd235W35f7Hykkaww==",
161 | "dependencies": {
162 | "asynckit": "^0.4.0",
163 | "combined-stream": "^1.0.8",
164 | "mime-types": "^2.1.12"
165 | },
166 | "engines": {
167 | "node": ">= 6"
168 | }
169 | },
170 | "node_modules/is-any-array": {
171 | "version": "2.0.1",
172 | "resolved": "https://registry.npmjs.org/is-any-array/-/is-any-array-2.0.1.tgz",
173 | "integrity": "sha512-UtilS7hLRu++wb/WBAw9bNuP1Eg04Ivn1vERJck8zJthEvXCBEBpGR/33u/xLKWEQf95803oalHrVDptcAvFdQ=="
174 | },
175 | "node_modules/js-tiktoken": {
176 | "version": "1.0.6",
177 | "resolved": "https://registry.npmjs.org/js-tiktoken/-/js-tiktoken-1.0.6.tgz",
178 | "integrity": "sha512-lxHntEupgjWvSh37WxpAW4XN6UBXBtFJOpZZq5HN5oNjDfN7L/iJhHOKjyL/DFtuYXUwn5jfTciLtOWpgQmHjQ==",
179 | "dependencies": {
180 | "base64-js": "^1.5.1"
181 | }
182 | },
183 | "node_modules/jsonpointer": {
184 | "version": "5.0.1",
185 | "resolved": "https://registry.npmjs.org/jsonpointer/-/jsonpointer-5.0.1.tgz",
186 | "integrity": "sha512-p/nXbhSEcu3pZRdkW1OfJhpsVtW1gd4Wa1fnQc9YLiTfAjn0312eMKimbdIQzuZl9aa9xUGaRlP9T/CJE/ditQ==",
187 | "engines": {
188 | "node": ">=0.10.0"
189 | }
190 | },
191 | "node_modules/langchain": {
192 | "version": "0.0.78",
193 | "resolved": "https://registry.npmjs.org/langchain/-/langchain-0.0.78.tgz",
194 | "integrity": "sha512-AXoai3V1fJyQ2vDSS3KqRJr1VxRoAxX0L1sFeuXGvwyEzfzv6/dDKPJ7K1Onew3Jmfzu23t1qqhwsSMZOmwo7g==",
195 | "dependencies": {
196 | "@anthropic-ai/sdk": "^0.4.3",
197 | "ansi-styles": "^5.0.0",
198 | "binary-extensions": "^2.2.0",
199 | "expr-eval": "^2.0.2",
200 | "flat": "^5.0.2",
201 | "js-tiktoken": "^1.0.6",
202 | "jsonpointer": "^5.0.1",
203 | "ml-distance": "^4.0.0",
204 | "object-hash": "^3.0.0",
205 | "openai": "^3.2.0",
206 | "p-queue": "^6.6.2",
207 | "p-retry": "4",
208 | "uuid": "^9.0.0",
209 | "yaml": "^2.2.1",
210 | "zod": "^3.21.4",
211 | "zod-to-json-schema": "^3.20.4"
212 | },
213 | "engines": {
214 | "node": ">=18"
215 | },
216 | "peerDependencies": {
217 | "@aws-sdk/client-dynamodb": "^3.310.0",
218 | "@aws-sdk/client-lambda": "^3.310.0",
219 | "@aws-sdk/client-s3": "^3.310.0",
220 | "@aws-sdk/client-sagemaker-runtime": "^3.310.0",
221 | "@clickhouse/client": "^0.0.14",
222 | "@getmetal/metal-sdk": "*",
223 | "@huggingface/inference": "^1.5.1",
224 | "@opensearch-project/opensearch": "*",
225 | "@pinecone-database/pinecone": "*",
226 | "@supabase/supabase-js": "^2.10.0",
227 | "@tensorflow-models/universal-sentence-encoder": "*",
228 | "@tensorflow/tfjs-converter": "*",
229 | "@tensorflow/tfjs-core": "*",
230 | "@zilliz/milvus2-sdk-node": "^2.2.0",
231 | "apify-client": "^2.7.1",
232 | "axios": "*",
233 | "cheerio": "^1.0.0-rc.12",
234 | "chromadb": "^1.4.0",
235 | "cohere-ai": "^5.0.2",
236 | "d3-dsv": "^2.0.0",
237 | "epub2": "^3.0.1",
238 | "faiss-node": "^0.1.1",
239 | "hnswlib-node": "^1.4.2",
240 | "html-to-text": "^9.0.5",
241 | "mammoth": "*",
242 | "meriyah": "*",
243 | "mongodb": "^5.2.0",
244 | "pdf-parse": "1.1.1",
245 | "pickleparser": "^0.1.0",
246 | "playwright": "^1.32.1",
247 | "puppeteer": "^19.7.2",
248 | "redis": "^4.6.4",
249 | "replicate": "^0.9.0",
250 | "srt-parser-2": "^1.2.2",
251 | "typeorm": "^0.3.12",
252 | "weaviate-ts-client": "^1.0.0"
253 | },
254 | "peerDependenciesMeta": {
255 | "@aws-sdk/client-dynamodb": {
256 | "optional": true
257 | },
258 | "@aws-sdk/client-lambda": {
259 | "optional": true
260 | },
261 | "@aws-sdk/client-s3": {
262 | "optional": true
263 | },
264 | "@aws-sdk/client-sagemaker-runtime": {
265 | "optional": true
266 | },
267 | "@clickhouse/client": {
268 | "optional": true
269 | },
270 | "@getmetal/metal-sdk": {
271 | "optional": true
272 | },
273 | "@huggingface/inference": {
274 | "optional": true
275 | },
276 | "@opensearch-project/opensearch": {
277 | "optional": true
278 | },
279 | "@pinecone-database/pinecone": {
280 | "optional": true
281 | },
282 | "@supabase/supabase-js": {
283 | "optional": true
284 | },
285 | "@tensorflow-models/universal-sentence-encoder": {
286 | "optional": true
287 | },
288 | "@tensorflow/tfjs-converter": {
289 | "optional": true
290 | },
291 | "@tensorflow/tfjs-core": {
292 | "optional": true
293 | },
294 | "@zilliz/milvus2-sdk-node": {
295 | "optional": true
296 | },
297 | "apify-client": {
298 | "optional": true
299 | },
300 | "axios": {
301 | "optional": true
302 | },
303 | "cheerio": {
304 | "optional": true
305 | },
306 | "chromadb": {
307 | "optional": true
308 | },
309 | "cohere-ai": {
310 | "optional": true
311 | },
312 | "d3-dsv": {
313 | "optional": true
314 | },
315 | "epub2": {
316 | "optional": true
317 | },
318 | "faiss-node": {
319 | "optional": true
320 | },
321 | "hnswlib-node": {
322 | "optional": true
323 | },
324 | "html-to-text": {
325 | "optional": true
326 | },
327 | "mammoth": {
328 | "optional": true
329 | },
330 | "meriyah": {
331 | "optional": true
332 | },
333 | "mongodb": {
334 | "optional": true
335 | },
336 | "pdf-parse": {
337 | "optional": true
338 | },
339 | "pickleparser": {
340 | "optional": true
341 | },
342 | "playwright": {
343 | "optional": true
344 | },
345 | "puppeteer": {
346 | "optional": true
347 | },
348 | "redis": {
349 | "optional": true
350 | },
351 | "replicate": {
352 | "optional": true
353 | },
354 | "srt-parser-2": {
355 | "optional": true
356 | },
357 | "typeorm": {
358 | "optional": true
359 | },
360 | "weaviate-ts-client": {
361 | "optional": true
362 | }
363 | }
364 | },
365 | "node_modules/mime-db": {
366 | "version": "1.52.0",
367 | "resolved": "https://registry.npmjs.org/mime-db/-/mime-db-1.52.0.tgz",
368 | "integrity": "sha512-sPU4uV7dYlvtWJxwwxHD0PuihVNiE7TyAbQ5SWxDCB9mUYvOgroQOwYQQOKPJ8CIbE+1ETVlOoK1UC2nU3gYvg==",
369 | "engines": {
370 | "node": ">= 0.6"
371 | }
372 | },
373 | "node_modules/mime-types": {
374 | "version": "2.1.35",
375 | "resolved": "https://registry.npmjs.org/mime-types/-/mime-types-2.1.35.tgz",
376 | "integrity": "sha512-ZDY+bPm5zTTF+YpCrAU9nK0UgICYPT0QtT1NZWFv4s++TNkcgVaT0g6+4R2uI4MjQjzysHB1zxuWL50hzaeXiw==",
377 | "dependencies": {
378 | "mime-db": "1.52.0"
379 | },
380 | "engines": {
381 | "node": ">= 0.6"
382 | }
383 | },
384 | "node_modules/ml-array-mean": {
385 | "version": "1.1.6",
386 | "resolved": "https://registry.npmjs.org/ml-array-mean/-/ml-array-mean-1.1.6.tgz",
387 | "integrity": "sha512-MIdf7Zc8HznwIisyiJGRH9tRigg3Yf4FldW8DxKxpCCv/g5CafTw0RRu51nojVEOXuCQC7DRVVu5c7XXO/5joQ==",
388 | "dependencies": {
389 | "ml-array-sum": "^1.1.6"
390 | }
391 | },
392 | "node_modules/ml-array-sum": {
393 | "version": "1.1.6",
394 | "resolved": "https://registry.npmjs.org/ml-array-sum/-/ml-array-sum-1.1.6.tgz",
395 | "integrity": "sha512-29mAh2GwH7ZmiRnup4UyibQZB9+ZLyMShvt4cH4eTK+cL2oEMIZFnSyB3SS8MlsTh6q/w/yh48KmqLxmovN4Dw==",
396 | "dependencies": {
397 | "is-any-array": "^2.0.0"
398 | }
399 | },
400 | "node_modules/ml-distance": {
401 | "version": "4.0.0",
402 | "resolved": "https://registry.npmjs.org/ml-distance/-/ml-distance-4.0.0.tgz",
403 | "integrity": "sha512-zj7+UGZpHk3uL7n79XTfGNUjIGnhLn8xVvrxYvBHvXFxo3jq1q+/UjP311hZxnLVhbxbXCjUniThX8gozjacYA==",
404 | "dependencies": {
405 | "ml-array-mean": "^1.1.6",
406 | "ml-distance-euclidean": "^2.0.0",
407 | "ml-tree-similarity": "^1.0.0"
408 | }
409 | },
410 | "node_modules/ml-distance-euclidean": {
411 | "version": "2.0.0",
412 | "resolved": "https://registry.npmjs.org/ml-distance-euclidean/-/ml-distance-euclidean-2.0.0.tgz",
413 | "integrity": "sha512-yC9/2o8QF0A3m/0IXqCTXCzz2pNEzvmcE/9HFKOZGnTjatvBbsn4lWYJkxENkA4Ug2fnYl7PXQxnPi21sgMy/Q=="
414 | },
415 | "node_modules/ml-tree-similarity": {
416 | "version": "1.0.0",
417 | "resolved": "https://registry.npmjs.org/ml-tree-similarity/-/ml-tree-similarity-1.0.0.tgz",
418 | "integrity": "sha512-XJUyYqjSuUQkNQHMscr6tcjldsOoAekxADTplt40QKfwW6nd++1wHWV9AArl0Zvw/TIHgNaZZNvr8QGvE8wLRg==",
419 | "dependencies": {
420 | "binary-search": "^1.3.5",
421 | "num-sort": "^2.0.0"
422 | }
423 | },
424 | "node_modules/node-fetch": {
425 | "version": "2.6.11",
426 | "resolved": "https://registry.npmjs.org/node-fetch/-/node-fetch-2.6.11.tgz",
427 | "integrity": "sha512-4I6pdBY1EthSqDmJkiNk3JIT8cswwR9nfeW/cPdUagJYEQG7R95WRH74wpz7ma8Gh/9dI9FP+OU+0E4FvtA55w==",
428 | "dependencies": {
429 | "whatwg-url": "^5.0.0"
430 | },
431 | "engines": {
432 | "node": "4.x || >=6.0.0"
433 | },
434 | "peerDependencies": {
435 | "encoding": "^0.1.0"
436 | },
437 | "peerDependenciesMeta": {
438 | "encoding": {
439 | "optional": true
440 | }
441 | }
442 | },
443 | "node_modules/num-sort": {
444 | "version": "2.1.0",
445 | "resolved": "https://registry.npmjs.org/num-sort/-/num-sort-2.1.0.tgz",
446 | "integrity": "sha512-1MQz1Ed8z2yckoBeSfkQHHO9K1yDRxxtotKSJ9yvcTUUxSvfvzEq5GwBrjjHEpMlq/k5gvXdmJ1SbYxWtpNoVg==",
447 | "engines": {
448 | "node": ">=8"
449 | },
450 | "funding": {
451 | "url": "https://github.com/sponsors/sindresorhus"
452 | }
453 | },
454 | "node_modules/object-hash": {
455 | "version": "3.0.0",
456 | "resolved": "https://registry.npmjs.org/object-hash/-/object-hash-3.0.0.tgz",
457 | "integrity": "sha512-RSn9F68PjH9HqtltsSnqYC1XXoWe9Bju5+213R98cNGttag9q9yAOTzdbsqvIa7aNm5WffBZFpWYr2aWrklWAw==",
458 | "engines": {
459 | "node": ">= 6"
460 | }
461 | },
462 | "node_modules/openai": {
463 | "version": "3.2.1",
464 | "resolved": "https://registry.npmjs.org/openai/-/openai-3.2.1.tgz",
465 | "integrity": "sha512-762C9BNlJPbjjlWZi4WYK9iM2tAVAv0uUp1UmI34vb0CN5T2mjB/qM6RYBmNKMh/dN9fC+bxqPwWJZUTWW052A==",
466 | "dependencies": {
467 | "axios": "^0.26.0",
468 | "form-data": "^4.0.0"
469 | }
470 | },
471 | "node_modules/p-finally": {
472 | "version": "1.0.0",
473 | "resolved": "https://registry.npmjs.org/p-finally/-/p-finally-1.0.0.tgz",
474 | "integrity": "sha512-LICb2p9CB7FS+0eR1oqWnHhp0FljGLZCWBE9aix0Uye9W8LTQPwMTYVGWQWIw9RdQiDg4+epXQODwIYJtSJaow==",
475 | "engines": {
476 | "node": ">=4"
477 | }
478 | },
479 | "node_modules/p-queue": {
480 | "version": "6.6.2",
481 | "resolved": "https://registry.npmjs.org/p-queue/-/p-queue-6.6.2.tgz",
482 | "integrity": "sha512-RwFpb72c/BhQLEXIZ5K2e+AhgNVmIejGlTgiB9MzZ0e93GRvqZ7uSi0dvRF7/XIXDeNkra2fNHBxTyPDGySpjQ==",
483 | "dependencies": {
484 | "eventemitter3": "^4.0.4",
485 | "p-timeout": "^3.2.0"
486 | },
487 | "engines": {
488 | "node": ">=8"
489 | },
490 | "funding": {
491 | "url": "https://github.com/sponsors/sindresorhus"
492 | }
493 | },
494 | "node_modules/p-retry": {
495 | "version": "4.6.2",
496 | "resolved": "https://registry.npmjs.org/p-retry/-/p-retry-4.6.2.tgz",
497 | "integrity": "sha512-312Id396EbJdvRONlngUx0NydfrIQ5lsYu0znKVUzVvArzEIt08V1qhtyESbGVd1FGX7UKtiFp5uwKZdM8wIuQ==",
498 | "dependencies": {
499 | "@types/retry": "0.12.0",
500 | "retry": "^0.13.1"
501 | },
502 | "engines": {
503 | "node": ">=8"
504 | }
505 | },
506 | "node_modules/p-timeout": {
507 | "version": "3.2.0",
508 | "resolved": "https://registry.npmjs.org/p-timeout/-/p-timeout-3.2.0.tgz",
509 | "integrity": "sha512-rhIwUycgwwKcP9yTOOFK/AKsAopjjCakVqLHePO3CC6Mir1Z99xT+R63jZxAT5lFZLa2inS5h+ZS2GvR99/FBg==",
510 | "dependencies": {
511 | "p-finally": "^1.0.0"
512 | },
513 | "engines": {
514 | "node": ">=8"
515 | }
516 | },
517 | "node_modules/retry": {
518 | "version": "0.13.1",
519 | "resolved": "https://registry.npmjs.org/retry/-/retry-0.13.1.tgz",
520 | "integrity": "sha512-XQBQ3I8W1Cge0Seh+6gjj03LbmRFWuoszgK9ooCpwYIrhhoO80pfq4cUkU5DkknwfOfFteRwlZ56PYOGYyFWdg==",
521 | "engines": {
522 | "node": ">= 4"
523 | }
524 | },
525 | "node_modules/tr46": {
526 | "version": "0.0.3",
527 | "resolved": "https://registry.npmjs.org/tr46/-/tr46-0.0.3.tgz",
528 | "integrity": "sha512-N3WMsuqV66lT30CrXNbEjx4GEwlow3v6rr4mCcv6prnfwhS01rkgyFdjPNBYd9br7LpXV1+Emh01fHnq2Gdgrw=="
529 | },
530 | "node_modules/uuid": {
531 | "version": "9.0.0",
532 | "resolved": "https://registry.npmjs.org/uuid/-/uuid-9.0.0.tgz",
533 | "integrity": "sha512-MXcSTerfPa4uqyzStbRoTgt5XIe3x5+42+q1sDuy3R5MDk66URdLMOZe5aPX/SQd+kuYAh0FdP/pO28IkQyTeg==",
534 | "bin": {
535 | "uuid": "dist/bin/uuid"
536 | }
537 | },
538 | "node_modules/webidl-conversions": {
539 | "version": "3.0.1",
540 | "resolved": "https://registry.npmjs.org/webidl-conversions/-/webidl-conversions-3.0.1.tgz",
541 | "integrity": "sha512-2JAn3z8AR6rjK8Sm8orRC0h/bcl/DqL7tRPdGZ4I1CjdF+EaMLmYxBHyXuKL849eucPFhvBoxMsflfOb8kxaeQ=="
542 | },
543 | "node_modules/whatwg-url": {
544 | "version": "5.0.0",
545 | "resolved": "https://registry.npmjs.org/whatwg-url/-/whatwg-url-5.0.0.tgz",
546 | "integrity": "sha512-saE57nupxk6v3HY35+jzBwYa0rKSy0XR8JSxZPwgLr7ys0IBzhGviA1/TUGJLmSVqs8pb9AnvICXEuOHLprYTw==",
547 | "dependencies": {
548 | "tr46": "~0.0.3",
549 | "webidl-conversions": "^3.0.0"
550 | }
551 | },
552 | "node_modules/yaml": {
553 | "version": "2.2.2",
554 | "resolved": "https://registry.npmjs.org/yaml/-/yaml-2.2.2.tgz",
555 | "integrity": "sha512-CBKFWExMn46Foo4cldiChEzn7S7SRV+wqiluAb6xmueD/fGyRHIhX8m14vVGgeFWjN540nKCNVj6P21eQjgTuA==",
556 | "engines": {
557 | "node": ">= 14"
558 | }
559 | },
560 | "node_modules/zod": {
561 | "version": "3.21.4",
562 | "resolved": "https://registry.npmjs.org/zod/-/zod-3.21.4.tgz",
563 | "integrity": "sha512-m46AKbrzKVzOzs/DZgVnG5H55N1sv1M8qZU3A8RIKbs3mrACDNeIOeilDymVb2HdmP8uwshOCF4uJ8uM9rCqJw==",
564 | "funding": {
565 | "url": "https://github.com/sponsors/colinhacks"
566 | }
567 | },
568 | "node_modules/zod-to-json-schema": {
569 | "version": "3.21.1",
570 | "resolved": "https://registry.npmjs.org/zod-to-json-schema/-/zod-to-json-schema-3.21.1.tgz",
571 | "integrity": "sha512-y5g0MPxDq+YG/T+cHGPYH4PcBpyCqwK6wxeJ76MR563y0gk/14HKfebq8xHiItY7lkc9GDFygCnkvNDTvAhYAg==",
572 | "peerDependencies": {
573 | "zod": "^3.21.4"
574 | }
575 | }
576 | }
577 | }
578 |
--------------------------------------------------------------------------------
/examples/node-langchain/package.json:
--------------------------------------------------------------------------------
1 | {
2 | "name": "langchain-llmatic-example",
3 | "private": true,
4 | "version": "0.0.0",
5 | "description": "LangChain.js example with LLMatic",
6 | "scripts": {
7 | "start": "node index.mjs"
8 | },
9 | "author": "Fardjad Davari ",
10 | "license": "UNLICENSED",
11 | "dependencies": {
12 | "langchain": "^0.0.78"
13 | }
14 | }
15 |
--------------------------------------------------------------------------------
/examples/python-langchain/main.py:
--------------------------------------------------------------------------------
1 | import os
2 |
3 | os.environ["OPENAI_API_KEY"] = "ANYTHING_WILL_DO"
4 | os.environ["OPENAI_API_BASE"] = "http://127.0.0.1:3000/v1"
5 |
6 | from langchain import OpenAI, LLMChain, PromptTemplate
7 | from langchain.memory import ConversationBufferMemory
8 |
9 | model = OpenAI(temperature=0, model_name="Ignored")
10 |
11 | template = """A chat between a curious user and an artificial intelligence assistant.
12 | The assistant gives helpful, detailed, and polite answers to the user's questions.
13 |
14 | {history}
15 | Human: {human_input}
16 | AI:"""
17 |
18 | prompt = PromptTemplate(
19 | input_variables=["history", "human_input"],
20 | template=template
21 | )
22 |
23 | chatgpt_chain = LLMChain(
24 | llm=model,
25 | prompt=prompt,
26 | verbose=True,
27 | memory=ConversationBufferMemory(),
28 | )
29 |
30 | print(chatgpt_chain.predict(human_input="Rememeber that this is a demo of LLMatic with LangChain."))
31 | print(chatgpt_chain.predict(human_input="What did I ask you to remember?"))
--------------------------------------------------------------------------------
/examples/python-langchain/requirements.txt:
--------------------------------------------------------------------------------
1 | openai==0.27.7
2 | langchain==0.0.169
--------------------------------------------------------------------------------
/media/chatbot-ui.gif:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fardjad/node-llmatic/ff59eb04acced04224b5ae615c9a9578c6422a88/media/chatbot-ui.gif
--------------------------------------------------------------------------------
/media/logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/fardjad/node-llmatic/ff59eb04acced04224b5ae615c9a9578c6422a88/media/logo.png
--------------------------------------------------------------------------------
/package.json:
--------------------------------------------------------------------------------
1 | {
2 | "name": "llmatic",
3 | "version": "0.4.242",
4 | "description": "Use self-hosted LLMs with an OpenAI compatible API",
5 | "exports": {
6 | "./llm-adapter": {
7 | "types": "./dist/llm-adapter.d.ts",
8 | "import": "./dist/llm-adapter.js",
9 | "require": "./dist/llm-adapter.cjs"
10 | }
11 | },
12 | "typesVersions": {
13 | "*": {
14 | "llm-adapter": [
15 | "./dist/llm-adapter.d.ts"
16 | ]
17 | }
18 | },
19 | "type": "module",
20 | "keywords": [
21 | "openai",
22 | "api",
23 | "llm",
24 | "llama"
25 | ],
26 | "repository": "git@github.com:fardjad/node-llmatic.git",
27 | "license": "MIT",
28 | "author": "Fardjad Davari ",
29 | "bin": {
30 | "llmatic": "./bin/llmatic",
31 | "llmatic.js": "./bin/llmatic.js"
32 | },
33 | "files": [
34 | "README.md",
35 | "dist/llm-adapter.cjs",
36 | "dist/llm-adapter.js",
37 | "dist/llm-adapter.d.ts",
38 | "bin/llmatic",
39 | "bin/llmatic.js",
40 | "api.oas.yml",
41 | "public/index.html",
42 | "src/",
43 | "!src/test-support",
44 | "!**/*.test.*"
45 | ],
46 | "scripts": {
47 | "fetch-openai-oas": "node ./scripts/fetch-openai-oas.mjs",
48 | "fix": "prettier --write . && eslint --ext .mjs --ext .ts --fix .",
49 | "lint": "eslint --ext .mjs --ext .ts .",
50 | "pretest": "tsc",
51 | "posttest": "npm run lint",
52 | "test:base": "tsx --test --test-reporter spec src/**/*.test.ts",
53 | "test": "npm run test:base",
54 | "test:watch": "nodemon -q -e js,cjs,mjs,ts,cts,mts,tsx,yml,json --exec \"npm run test:base\"",
55 | "prepare": "tsup src/llm-adapter.ts --format esm,cjs --dts --minify --clean --silent"
56 | },
57 | "dependencies": {
58 | "@fastify/static": "^7.0.2",
59 | "@inquirer/prompts": "^4.3.1",
60 | "@inquirer/select": "^2.2.1",
61 | "@llama-node/llama-cpp": "^0.1.6",
62 | "@stoplight/json-ref-resolver": "^3.1.6",
63 | "ajv": "^8.12.0",
64 | "awilix": "^10.0.1",
65 | "commander": "^12.0.0",
66 | "fastify": "^4.26.2",
67 | "fastify-openapi-glue": "^4.5.0",
68 | "glob": "^10.3.12",
69 | "llama-node": "^0.1.6",
70 | "nodejs-file-downloader": "^4.12.1",
71 | "short-uuid": "^4.2.2",
72 | "swagger-ui-dist": "^5.13.0",
73 | "traverse": "^0.6.8",
74 | "tsx": "^4.7.1",
75 | "valid-filename": "^4.0.0"
76 | },
77 | "devDependencies": {
78 | "@trivago/prettier-plugin-sort-imports": "^4.3.0",
79 | "@types/js-yaml": "^4.0.9",
80 | "@types/node": "^20.12.2",
81 | "@types/swagger-ui-dist": "^3.30.4",
82 | "@types/traverse": "^0.6.36",
83 | "@typescript-eslint/eslint-plugin": "^7.4.0",
84 | "@typescript-eslint/parser": "^7.4.0",
85 | "change-case": "^5.4.3",
86 | "earl": "^1.1.0",
87 | "eslint": "^8.57.0",
88 | "eslint-config-prettier": "^9.1.0",
89 | "eslint-config-xo": "^0.44.0",
90 | "eslint-config-xo-typescript": "^4.0.0",
91 | "eslint-plugin-unicorn": "^51.0.1",
92 | "http-status-codes": "^2.3.0",
93 | "jsonpath-plus": "^8.1.0",
94 | "nodemon": "^3.1.0",
95 | "npm-check-updates": "^16.14.18",
96 | "prettier": "^3.2.5",
97 | "quicktype-core": "^23.0.115",
98 | "tsup": "^8.0.2",
99 | "typescript": "^5.4.3"
100 | },
101 | "overrides": {
102 | "@trivago/prettier-plugin-sort-imports": {
103 | "prettier": ">=3"
104 | }
105 | }
106 | }
107 |
--------------------------------------------------------------------------------
/public/index.html:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 | Swagger UI
6 |
7 |
8 |
14 |
20 |
21 |
22 |
23 |
24 |
25 |
29 |
46 |
47 |
48 |
--------------------------------------------------------------------------------
/scripts/fetch-openai-oas.mjs:
--------------------------------------------------------------------------------
1 | import fs from "node:fs";
2 | import prettier from "prettier";
3 |
4 | const OPENAI_OAS_URL =
5 | "https://raw.githubusercontent.com/openai/openai-openapi/master/openapi.yaml";
6 |
7 | const response = await fetch(OPENAI_OAS_URL, {
8 | redirect: "follow",
9 | });
10 |
11 | const text = await response.text();
12 | const formattedText = prettier.format(text, { parser: "yaml" });
13 |
14 | fs.writeFileSync(new URL("../api.oas.yml", import.meta.url), formattedText, {
15 | encoding: "utf8",
16 | });
17 |
--------------------------------------------------------------------------------
/scripts/generate-types.mjs:
--------------------------------------------------------------------------------
1 | /* eslint-disable no-await-in-loop */
2 | import { Resolver } from "@stoplight/json-ref-resolver";
3 | import { paramCase, pascalCase } from "change-case";
4 | import "http-status-codes";
5 | import { getReasonPhrase } from "http-status-codes";
6 | import yaml from "js-yaml";
7 | import { JSONPath as jsonPath } from "jsonpath-plus";
8 | import fs from "node:fs";
9 | import {
10 | quicktype,
11 | InputData,
12 | JSONSchemaInput,
13 | FetchingJSONSchemaStore,
14 | } from "quicktype-core";
15 |
16 | const typesDirectory = new URL("../src/types", import.meta.url);
17 | fs.mkdirSync(typesDirectory, { recursive: true });
18 |
19 | const oas = yaml.load(
20 | fs.readFileSync(new URL("../api.oas.yml", import.meta.url), "utf8"),
21 | );
22 | const resolver = new Resolver();
23 | const { result: resolvedOas } = await resolver.resolve(oas);
24 |
25 | const quicktypeJSONSchema = async (sources) => {
26 | const schemaInput = new JSONSchemaInput(new FetchingJSONSchemaStore());
27 | for (const { name, schema } of sources) {
28 | await schemaInput.addSource({ name, schema });
29 | }
30 |
31 | const inputData = new InputData();
32 | inputData.addInput(schemaInput);
33 |
34 | return quicktype({
35 | inputData,
36 | lang: "typescript",
37 | rendererOptions: {
38 | "just-types": true,
39 | "runtime-typecheck": false,
40 | "prefer-types": true,
41 | },
42 | });
43 | };
44 |
45 | const operationIds = [
46 | "createChatCompletion",
47 | "createCompletion",
48 | "createEmbedding",
49 | "listModels",
50 | "retrieveModel",
51 | ];
52 |
53 | for (const operationId of operationIds) {
54 | const operation = jsonPath({
55 | path: `$.paths.*[?(@.operationId === '${operationId}')]`,
56 | json: resolvedOas,
57 | })[0];
58 |
59 | const requestBodySchema = jsonPath({
60 | path: "$.requestBody.content['application/json'].schema",
61 | json: operation,
62 | })[0];
63 |
64 | const statusCodes = jsonPath({
65 | path: "$.responses.*.content['application/json'].schema^^^~",
66 | json: operation,
67 | });
68 |
69 | const responseBodySchemas = jsonPath({
70 | path: "$.responses.*.content['application/json'].schema",
71 | json: operation,
72 | });
73 |
74 | const responseBodySchemaPairs = statusCodes.map((statusCode, index) => ({
75 | statusCode,
76 | schema: responseBodySchemas[index],
77 | }));
78 |
79 | const requestSource = {
80 | name: `${operationId}Request`,
81 | schema: JSON.stringify(requestBodySchema),
82 | };
83 |
84 | const responseSources = responseBodySchemaPairs.map(
85 | ({ statusCode, schema }) => ({
86 | name: pascalCase(
87 | `${operationId} ${getReasonPhrase(statusCode)} Response`,
88 | ),
89 | schema: JSON.stringify(schema),
90 | }),
91 | );
92 |
93 | const sources = [requestSource, ...responseSources].filter(
94 | (source) => source.schema !== undefined,
95 | );
96 |
97 | const { lines } = await quicktypeJSONSchema(sources);
98 | const fileName = new URL(`${typesDirectory}/${paramCase(operationId)}.ts`);
99 | fs.writeFileSync(fileName, lines.join("\n"), "utf8");
100 | }
101 |
--------------------------------------------------------------------------------
/src/cli/cli-utils.test.ts:
--------------------------------------------------------------------------------
1 | import * as cliUtils from "./cli-utils.ts";
2 | import assert from "node:assert";
3 | import path from "node:path";
4 | import { test } from "node:test";
5 | import { fileURLToPath } from "node:url";
6 |
7 | await test("readPackageJson", async () => {
8 | const { version } = await cliUtils.readPackageJson();
9 | assert.strictEqual(typeof version, "string");
10 | });
11 |
12 | await test("fileExists", async (t) => {
13 | await t.test("file exists", async () => {
14 | const exists = await cliUtils.fileExists(new URL(import.meta.url));
15 | assert.strictEqual(exists, true);
16 | });
17 |
18 | await t.test("file does not exist", async () => {
19 | const exists = await cliUtils.fileExists("/does/not/exist");
20 | assert.strictEqual(exists, false);
21 | });
22 | });
23 |
24 | await test("invokeInDirectory", async (t) => {
25 | const newPath = path.resolve(fileURLToPath(new URL("../", import.meta.url)));
26 |
27 | await t.test(
28 | "should invoke a function in the specific directory",
29 | async () => {
30 | const cwd = process.cwd();
31 | const result = await cliUtils.invokeInDirectory(
32 | newPath,
33 | (previousWorkingDirectory, currentWorkingDirectory) => {
34 | assert.strictEqual(previousWorkingDirectory, cwd);
35 | assert.strictEqual(currentWorkingDirectory, process.cwd());
36 |
37 | return process.cwd();
38 | },
39 | );
40 | assert.strictEqual(process.cwd(), cwd);
41 | assert.strictEqual(result, newPath);
42 | },
43 | );
44 |
45 | await t.test("should await promises before returning a result", async () => {
46 | let count = 0;
47 |
48 | const result = await cliUtils.invokeInDirectory(newPath, async () => {
49 | await new Promise((resolve) => {
50 | setTimeout(resolve, 0);
51 | });
52 |
53 | count += 1;
54 |
55 | return count;
56 | });
57 |
58 | assert.strictEqual(count, 1);
59 | assert.strictEqual(result, count);
60 | });
61 | });
62 |
63 | await test("importFile", async () => {
64 | const importedModule = await cliUtils.importFile(
65 | fileURLToPath(new URL("cli-utils.ts", import.meta.url)),
66 | );
67 | assert.strictEqual(importedModule, cliUtils);
68 | });
69 |
--------------------------------------------------------------------------------
/src/cli/cli-utils.ts:
--------------------------------------------------------------------------------
1 | import fs from "node:fs";
2 | import path from "node:path";
3 |
4 | export const readPackageJson = async () => {
5 | const packageJsonPath = new URL("../../package.json", import.meta.url);
6 |
7 | return JSON.parse(
8 | await fs.promises.readFile(packageJsonPath, { encoding: "utf8" }),
9 | ) as {
10 | [key: string]: unknown;
11 |
12 | version: string;
13 | description: string;
14 | };
15 | };
16 |
17 | export const fileExists = async (path: URL | string) =>
18 | Boolean(await fs.promises.stat(path).catch(() => false));
19 |
20 | export const invokeInDirectory = async (
21 | directory: string,
22 | callback: (
23 | previousWorkingDirectory: string,
24 | currentWorkingDirectory: string,
25 | ) => T,
26 | ) => {
27 | const cwd = process.cwd();
28 | process.chdir(directory);
29 |
30 | return Promise.resolve(callback(cwd, directory)).finally(() => {
31 | process.chdir(cwd);
32 | });
33 | };
34 |
35 | export const importFile = async (filePath: string): Promise => {
36 | const resolvedPath = path.resolve(filePath);
37 | const fileDirectory = path.dirname(resolvedPath);
38 | return invokeInDirectory(fileDirectory, async () =>
39 | import(resolvedPath).then((module) => (module.default ?? module) as T),
40 | );
41 | };
42 |
--------------------------------------------------------------------------------
/src/cli/common-options.ts:
--------------------------------------------------------------------------------
1 | import { Option } from "commander";
2 | import { fileURLToPath } from "node:url";
3 |
4 | export const llmAdapterOption = new Option(
5 | "-a, --llm-adapter ",
6 | "llm adapter path",
7 | ).default(fileURLToPath(new URL("../default-llm-adapter.ts", import.meta.url)));
8 |
--------------------------------------------------------------------------------
/src/cli/llmatic-config.js:
--------------------------------------------------------------------------------
1 | import { importFile, readPackageJson } from "./cli-utils.ts";
2 | import { llmAdapterOption } from "./common-options.ts";
3 | import { input } from "@inquirer/prompts";
4 | import select, { Separator } from "@inquirer/select";
5 | import { program } from "commander";
6 | import fs from "node:fs";
7 | import path from "node:path";
8 | import Downloader from "nodejs-file-downloader";
9 | import isValidFilename from "valid-filename";
10 |
11 | const downloadFile = (url, fileName) => {
12 | const downloader = new Downloader({
13 | url,
14 | directory: "./models",
15 | fileName,
16 | skipExistingFileName: true,
17 | maxAttempts: 3,
18 | shouldStop(error) {
19 | if (error.statusCode && error.statusCode === 404) {
20 | return true;
21 | }
22 | },
23 | onProgress(percentage) {
24 | process.stdout.write(
25 | `\r${String(Number(percentage).toFixed(2)).padStart(6, "0")}%`,
26 | );
27 | },
28 | });
29 |
30 | return downloader.download();
31 | };
32 |
33 | const menu = async (llmDefaultConfig) => {
34 | const answer = await select({
35 | message: "What do you want to do?",
36 | choices: [
37 | {
38 | name: "Download a model",
39 | value: "download",
40 | },
41 | {
42 | name: "Generate a config file",
43 | value: "generateConfig",
44 | },
45 | new Separator(),
46 | {
47 | name: "Exit",
48 | value: "exit",
49 | },
50 | ],
51 | });
52 |
53 | if (answer === "download") {
54 | return downloadModel();
55 | }
56 |
57 | if (answer === "generateConfig") {
58 | return generateConfig(llmDefaultConfig);
59 | }
60 | };
61 |
62 | const generateConfig = async (llmDefaultConfig) => {
63 | const files = await fs.promises.readdir("./models");
64 | const binFiles = files.filter((file) => path.extname(file) === ".bin");
65 |
66 | if (binFiles.length === 0) {
67 | console.log("\n\n❌ No models found in ./models\n\n");
68 | return menu();
69 | }
70 |
71 | const choices = binFiles.map((file) => ({
72 | value: file,
73 | }));
74 |
75 | const answer = await select({
76 | message: "Select a model:",
77 | choices,
78 | });
79 |
80 | const modelPath = `./models/${answer}`;
81 | const llmConfig = {
82 | ...llmDefaultConfig,
83 | modelPath,
84 | };
85 |
86 | await fs.promises.writeFile(
87 | "./llmatic.config.json",
88 | JSON.stringify(llmConfig, null, 2),
89 | );
90 |
91 | console.log("\n\n📝 Generated config file: llmatic.config.json\n\n");
92 | };
93 |
94 | const downloadModel = async () => {
95 | const url = await input({
96 | message: "Enter the model URL (the full address to a GGML .bin file):",
97 | validate(value) {
98 | try {
99 | // eslint-disable-next-line no-new
100 | new URL(value);
101 | return true;
102 | } catch {
103 | return "Please enter a valid URL";
104 | }
105 | },
106 | });
107 |
108 | const suggestedFileName = new URL(url).pathname.split("/").pop();
109 | const fileName = await input({
110 | message: "Enter the file name (will skip download if file exists):",
111 | default: suggestedFileName,
112 | validate(value) {
113 | if (!isValidFilename(value)) {
114 | return "Please enter a valid file name";
115 | }
116 |
117 | if (path.extname(value) !== ".bin") {
118 | return "File name must end with .bin";
119 | }
120 |
121 | return true;
122 | },
123 | });
124 |
125 | await downloadFile(url, fileName);
126 | };
127 |
128 | const { version } = await readPackageJson();
129 |
130 | program
131 | .version(version)
132 | .description("Configure LLMatic")
133 | .addOption(llmAdapterOption)
134 | .action(async ({ llmAdapter: llmAdapterPath }) => {
135 | const llmAdapter = await importFile(llmAdapterPath);
136 | return menu(llmAdapter.defaultConfig);
137 | });
138 |
139 | await program.parseAsync(process.argv);
140 |
--------------------------------------------------------------------------------
/src/cli/llmatic-start.js:
--------------------------------------------------------------------------------
1 | /* eslint-disable unicorn/no-process-exit */
2 | import { createContainer, diTokens } from "../container.ts";
3 | import { fileExists, importFile, readPackageJson } from "./cli-utils.ts";
4 | import { llmAdapterOption } from "./common-options.ts";
5 | import awilix from "awilix";
6 | import { Option, program } from "commander";
7 | import fs from "node:fs";
8 |
9 | const { version } = await readPackageJson();
10 |
11 | program
12 | .version(version)
13 | .description("Start LLMatic server")
14 | .addOption(
15 | new Option("-c, --config [path]", "config file path").default(
16 | "llmatic.config.json",
17 | ),
18 | )
19 | .addOption(llmAdapterOption)
20 | .addOption(
21 | new Option("-p --port [port]", "port to listen on").default("3000"),
22 | )
23 | .addOption(
24 | new Option("-h --host [port]", "host to listen on").default("localhost"),
25 | )
26 | .action(
27 | async ({
28 | llmAdapter: llmAdapterPath,
29 | config: configFilePath,
30 | port,
31 | host,
32 | }) => {
33 | if (!(await fileExists(configFilePath))) {
34 | console.error(`File ${configFilePath} not found.`);
35 | process.exit(1);
36 | }
37 |
38 | const llmConfig = JSON.parse(
39 | await fs.promises.readFile(configFilePath, "utf8"),
40 | );
41 |
42 | const container = await createContainer([
43 | {
44 | token: diTokens.llmConfig,
45 | resolver() {
46 | return awilix.asValue(llmConfig);
47 | },
48 | },
49 | {
50 | token: diTokens.llmAdapter,
51 | async resolver() {
52 | const LLMAdapterConstructor = await importFile(llmAdapterPath);
53 | const llmAdapter = new LLMAdapterConstructor(llmConfig);
54 | return awilix.asValue(llmAdapter);
55 | },
56 | },
57 | ]);
58 | const fastifyServer = container.resolve(diTokens.fastifyServer);
59 | await fastifyServer.listen({ port: Number(port), host });
60 | },
61 | );
62 |
63 | await program.parseAsync(process.argv);
64 |
--------------------------------------------------------------------------------
/src/cli/llmatic.ts:
--------------------------------------------------------------------------------
1 | import { readPackageJson } from "./cli-utils.ts";
2 | import { program } from "commander";
3 |
4 | const { version, description } = await readPackageJson();
5 |
6 | program
7 | .version(version)
8 | .description(description)
9 | .command("config", "configure LLMatic")
10 | .command("start", "start LLMatic server");
11 |
12 | await program.parseAsync(process.argv);
13 |
--------------------------------------------------------------------------------
/src/container.ts:
--------------------------------------------------------------------------------
1 | import { createFastifyServer } from "./fastify-server-factory.ts";
2 | import type { LlmAdapter } from "./llm-adapter.ts";
3 | import { SseHelper } from "./sse-helper.ts";
4 | import awilix from "awilix";
5 |
6 | export type Cradle = {
7 | container: awilix.AwilixContainer;
8 | llmConfig: unknown;
9 | llmAdapter: LlmAdapter;
10 | sseHelper: SseHelper;
11 | fastifyServer: Awaited>;
12 | };
13 |
14 | /**
15 | * Use these tokens for registrations and resolutions to avoid the problems of
16 | * hardcoded strings.
17 | */
18 | export const diTokens: { [k in keyof Cradle]: k } = {
19 | container: "container",
20 | llmConfig: "llmConfig",
21 | llmAdapter: "llmAdapter",
22 | sseHelper: "sseHelper",
23 | fastifyServer: "fastifyServer",
24 | };
25 |
26 | export type ContainerRegistration = {
27 | token: keyof Cradle;
28 | resolver: () => Promise> | awilix.Resolver;
29 | };
30 |
31 | export const applyOverrides = (
32 | registrations: ContainerRegistration[],
33 | registrationOverrides: ContainerRegistration[],
34 | ) => {
35 | const registrationOverridesCopy = [...registrationOverrides];
36 |
37 | const result: ContainerRegistration[] = [];
38 |
39 | for (const { token, resolver } of registrations) {
40 | const overrideIndex = registrationOverridesCopy.findIndex(
41 | (override) => override.token === token,
42 | );
43 | if (overrideIndex === -1) {
44 | result.push({ token, resolver });
45 | } else {
46 | const override = registrationOverridesCopy.splice(overrideIndex, 1)[0];
47 | result.push({ token, resolver: override.resolver });
48 | }
49 | }
50 |
51 | for (const override of registrationOverridesCopy) {
52 | result.push(override);
53 | }
54 |
55 | return result;
56 | };
57 |
58 | /**
59 | * Create and configure the Awilix container. Async resolvers and overrides
60 | * are supported (can be useful for testing).
61 | */
62 | export const createContainer = async (
63 | registerationOverrides: ContainerRegistration[] = [],
64 | ) => {
65 | const container = awilix.createContainer({
66 | injectionMode: awilix.InjectionMode.PROXY,
67 | });
68 |
69 | const orderedRegistrations: ContainerRegistration[] = [
70 | {
71 | token: diTokens.container,
72 | resolver: () => awilix.asValue(container),
73 | },
74 | {
75 | token: diTokens.sseHelper,
76 | resolver: () =>
77 | awilix.asClass(SseHelper, { lifetime: awilix.Lifetime.SINGLETON }),
78 | },
79 | {
80 | token: diTokens.llmConfig,
81 | resolver() {
82 | throw new Error("llmConfig must be overridden");
83 | },
84 | },
85 | {
86 | token: diTokens.llmAdapter,
87 | resolver() {
88 | throw new Error("llmAdapter must be overridden");
89 | },
90 | },
91 | {
92 | token: diTokens.fastifyServer,
93 | resolver: async () =>
94 | awilix.asValue(await createFastifyServer(container.cradle)),
95 | },
96 | ];
97 |
98 | const newRegistrations = applyOverrides(
99 | orderedRegistrations,
100 | registerationOverrides,
101 | );
102 |
103 | for (const { token, resolver } of newRegistrations) {
104 | // eslint-disable-next-line no-await-in-loop
105 | container.register({ [token]: await Promise.resolve(resolver()) });
106 | }
107 |
108 | return container;
109 | };
110 |
--------------------------------------------------------------------------------
/src/default-llm-adapter.ts:
--------------------------------------------------------------------------------
1 | /* eslint-disable no-await-in-loop */
2 | import {
3 | type FinishReason,
4 | LlmAdapter,
5 | type LlmAdapterCreateChatCompletionRequest,
6 | type LlmAdapterCreateChatCompletionResponse,
7 | type LlmAdapterCreateCompletionRequest,
8 | type LlmAdapterCreateCompletionResponse,
9 | type LlmAdapterCreateEmbeddingRequest,
10 | type LlmAdapterCreateEmbeddingResponse,
11 | type LlmAdapterModel,
12 | Role,
13 | } from "./llm-adapter.ts";
14 | import type { Generate } from "@llama-node/llama-cpp";
15 | import { type LLMError, LLM as LlamaNode } from "llama-node";
16 | import { LLamaCpp, type LoadConfig } from "llama-node/dist/llm/llama-cpp.js";
17 | import { cpus } from "node:os";
18 | import path from "node:path";
19 |
20 | type DefaultLlmAdapterConfig = Generate & LoadConfig;
21 |
22 | export default class DefaultLlmAdapter extends LlmAdapter {
23 | readonly #llmConfig: DefaultLlmAdapterConfig;
24 | #loaded = false;
25 | readonly #llamaNode = new LlamaNode(LLamaCpp);
26 |
27 | constructor(llmConfig: DefaultLlmAdapterConfig) {
28 | super();
29 |
30 | this.#llmConfig = { ...DefaultLlmAdapter.defaultConfig, ...llmConfig };
31 | }
32 |
33 | async createChatCompletion(
34 | createChatCompletionRequest: LlmAdapterCreateChatCompletionRequest,
35 | abortSignal: AbortSignal,
36 | onData: (data: LlmAdapterCreateChatCompletionResponse) => void,
37 | ): Promise {
38 | await this.#load();
39 |
40 | const prompt = createChatCompletionRequest.messages
41 | .map(({ content, role }) => {
42 | if (role === Role.System) return `${content}\n`;
43 | return `${role ?? Role.User}: ${content}`;
44 | })
45 | .join("\n")
46 | .concat(`\n${Role.Assistant}: `);
47 |
48 | const bufferedTokens: string[] = [];
49 | const flushBuffer = (index: number) => {
50 | while (bufferedTokens.length > 0) {
51 | onData({
52 | index,
53 | delta: { content: bufferedTokens.shift() },
54 | });
55 | }
56 | };
57 |
58 | for (
59 | let index = 0;
60 | index < (createChatCompletionRequest.n ?? 1);
61 | index += 1
62 | ) {
63 | let isFirstToken = true;
64 |
65 | await this.#invokeLlamaNode(
66 | {
67 | ...this.#openAiCompletionRequestToLlamaNodeInvocation(
68 | createChatCompletionRequest,
69 | ),
70 | prompt,
71 | },
72 | abortSignal,
73 | ({ token, finishReason, stop }) => {
74 | if (isFirstToken) {
75 | onData({
76 | index,
77 | delta: { role: Role.Assistant },
78 | finishReason,
79 | });
80 |
81 | isFirstToken = false;
82 | }
83 |
84 | if (["\n", Role.User, ":"].includes(token.trim())) {
85 | bufferedTokens.push(token);
86 | if (bufferedTokens.join("").trim() === `${Role.User}:`) {
87 | stop();
88 | bufferedTokens.length = 0;
89 | }
90 | } else {
91 | flushBuffer(index);
92 | onData({
93 | index,
94 | delta: { content: token },
95 | finishReason,
96 | });
97 | }
98 | },
99 | () => {
100 | flushBuffer(index);
101 | onData({
102 | index,
103 | delta: {},
104 | finishReason: "stop",
105 | });
106 | },
107 | );
108 | }
109 | }
110 |
111 | async listModels(): Promise {
112 | return [
113 | {
114 | id: path.basename(this.#llmConfig.modelPath),
115 | created: 0,
116 | ownedBy: "unknown",
117 | },
118 | ];
119 | }
120 |
121 | async createEmbedding({
122 | model,
123 | input,
124 | }: LlmAdapterCreateEmbeddingRequest): Promise {
125 | await this.#load();
126 |
127 | return this.#llamaNode.getEmbedding({
128 | ...this.#llmConfig,
129 | prompt: input,
130 | });
131 | }
132 |
133 | async createCompletion(
134 | createCompletionRequest: LlmAdapterCreateCompletionRequest,
135 | abortSignal: AbortSignal,
136 | onData: (data: LlmAdapterCreateCompletionResponse) => void,
137 | ): Promise {
138 | await this.#load();
139 |
140 | for (
141 | let promptIndex = 0, index = 0;
142 | index <
143 | createCompletionRequest.prompt.length * (createCompletionRequest.n ?? 1);
144 | index += 1,
145 | promptIndex = (promptIndex + 1) % createCompletionRequest.prompt.length
146 | ) {
147 | const prompt = createCompletionRequest.prompt[promptIndex];
148 | await this.#invokeLlamaNode(
149 | {
150 | ...this.#openAiCompletionRequestToLlamaNodeInvocation(
151 | createCompletionRequest,
152 | ),
153 | prompt,
154 | },
155 | abortSignal,
156 | ({ token, finishReason }) => {
157 | onData({
158 | index,
159 | text: token,
160 | finishReason,
161 | });
162 | },
163 | );
164 | }
165 | }
166 |
167 | #openAiCompletionRequestToLlamaNodeInvocation(
168 | request:
169 | | LlmAdapterCreateCompletionRequest
170 | | LlmAdapterCreateChatCompletionRequest,
171 | ) {
172 | return {
173 | nTokPredict: request.maxTokens ?? this.#llmConfig.nTokPredict,
174 | temp: request.temperature ?? this.#llmConfig.temp,
175 | topP: request.topP ?? this.#llmConfig.topP,
176 | presencePenalty:
177 | request.presencePenalty ?? this.#llmConfig.presencePenalty,
178 | frequencyPenalty:
179 | request.frequencyPenalty ?? this.#llmConfig.frequencyPenalty,
180 | } satisfies Partial;
181 | }
182 |
183 | static get defaultConfig() {
184 | return {
185 | // Load config
186 | enableLogging: false,
187 | nParts: 1,
188 | nGpuLayers: 0,
189 | f16Kv: false,
190 | logitsAll: false,
191 | vocabOnly: false,
192 | seed: 0,
193 | useMlock: true,
194 | embedding: true,
195 | useMmap: true,
196 | nCtx: 4096,
197 |
198 | // Invocation config
199 | nThreads: cpus().length,
200 | nTokPredict: 32_768,
201 | topK: 40,
202 | topP: 0.95,
203 | temp: 0,
204 | repeatPenalty: 1.1,
205 | };
206 | }
207 |
208 | async #load() {
209 | if (this.#loaded) return;
210 |
211 | await this.#llamaNode.load({
212 | ...DefaultLlmAdapter.defaultConfig,
213 | ...this.#llmConfig,
214 | });
215 |
216 | this.#loaded = true;
217 | }
218 |
219 | async #invokeLlamaNode(
220 | invocationConfig: Partial,
221 | callerAbortSignal: AbortSignal,
222 | onToken: ({
223 | token,
224 | finishReason,
225 | stop,
226 | }: {
227 | token: string;
228 | finishReason: FinishReason;
229 | stop: () => void;
230 | }) => void,
231 | onComplete?: () => void,
232 | ) {
233 | let tokensGenerated = 0;
234 | const abortController = new AbortController();
235 |
236 | const handleAbort = () => {
237 | callerAbortSignal.removeEventListener("abort", handleAbort);
238 | abortController.abort();
239 | };
240 |
241 | const stop = () => {
242 | abortController.abort();
243 | };
244 |
245 | callerAbortSignal.addEventListener("abort", handleAbort);
246 | return this.#llamaNode
247 | .createCompletion(
248 | {
249 | ...this.#llmConfig,
250 | ...invocationConfig,
251 | },
252 | ({ token, completed }) => {
253 | // "llama-node" always emits "\n\n\n" at the end of inference
254 | if (completed) {
255 | if (onComplete) onComplete();
256 | return;
257 | }
258 |
259 | tokensGenerated += 1;
260 |
261 | let finishReason: FinishReason;
262 | if (tokensGenerated >= invocationConfig.nTokPredict!) {
263 | finishReason = "length";
264 | abortController.abort();
265 | }
266 |
267 | onToken({ token, finishReason, stop });
268 | },
269 | abortController.signal,
270 | )
271 | .catch((error: unknown) => {
272 | // Looks like LLMError is not exported as a Class
273 | if (Object.getPrototypeOf(error).constructor.name !== "LLMError") {
274 | throw error;
275 | }
276 |
277 | const llmError = error as LLMError;
278 | if (llmError.type !== ("Aborted" as LLMError["type"])) {
279 | throw llmError;
280 | }
281 | })
282 | .finally(() => {
283 | callerAbortSignal.removeEventListener("abort", handleAbort);
284 | });
285 | }
286 | }
287 |
--------------------------------------------------------------------------------
/src/fastify-server-factory.ts:
--------------------------------------------------------------------------------
1 | import type { Cradle } from "./container.ts";
2 | import type { OperationHandler } from "./operation-handler.ts";
3 | import fastifyStatic from "@fastify/static";
4 | import ajvModule from "ajv";
5 | import fastify from "fastify";
6 | import openapiGlue from "fastify-openapi-glue";
7 | import { glob } from "glob";
8 | import yaml from "js-yaml";
9 | import fs from "node:fs";
10 | import { fileURLToPath } from "node:url";
11 | import swaggerUiDist from "swagger-ui-dist";
12 | import traverse from "traverse";
13 |
14 | // https://github.com/ajv-validator/ajv/issues/2132
15 | // eslint-disable-next-line @typescript-eslint/naming-convention
16 | const Ajv = ajvModule.default;
17 |
18 | // FIXME: fix the types
19 | const createOpenapiGlueService = async ({ container }: Partial) => {
20 | const routeHandlerFiles = await glob("**/*.handler.[tj]s", {
21 | cwd: new URL("handlers", import.meta.url),
22 | absolute: true,
23 | });
24 |
25 | const handlers = await Promise.all(
26 | routeHandlerFiles.map(async (file) => {
27 | const { default: handlerConstructor } = (await import(file)) as {
28 | default: (...arguments_: any[]) => OperationHandler;
29 | };
30 | return container!.build(handlerConstructor);
31 | }),
32 | );
33 |
34 | return Object.fromEntries(
35 | handlers.map((handler) => [
36 | handler.operationId,
37 | handler.handle.bind(handler),
38 | ]),
39 | );
40 | };
41 |
42 | // FIXME: fix the types
43 | const configureOpenapiGlue = async ({
44 | container,
45 | fastifyServer,
46 | openapiDocument,
47 | }: Partial & { openapiDocument: any }) => {
48 | const schemaCompilers = {
49 | body: new Ajv(),
50 | params: new Ajv(),
51 | querystring: new Ajv(),
52 | headers: new Ajv(),
53 | };
54 |
55 | fastifyServer!.setValidatorCompiler((request) => {
56 | if (!request.httpPart) {
57 | throw new Error("Missing httpPart");
58 | }
59 |
60 | const compiler = schemaCompilers[request.httpPart] as
61 | | ajvModule.default
62 | | undefined;
63 | if (!compiler) {
64 | throw new Error(`Missing compiler for ${request.httpPart}`);
65 | }
66 |
67 | // OpenAI OAS is not entirely valid/compatible, so we need to remove some properties
68 | // eslint-disable-next-line unicorn/no-array-for-each
69 | traverse(request.schema).forEach(function (value) {
70 | if (!this.key) return;
71 |
72 | if (this.isLeaf && ["nullable", "x-oaiTypeLabel"].includes(this.key)) {
73 | this.remove();
74 | }
75 |
76 | if (this.key === "example") {
77 | this.remove();
78 | }
79 |
80 | if (this.isLeaf && this.key === "format" && value === "binary") {
81 | this.remove();
82 | }
83 | });
84 |
85 | return compiler.compile(request.schema);
86 | });
87 |
88 | const service = await createOpenapiGlueService({ container });
89 |
90 | await fastifyServer!.register(openapiGlue, {
91 | specification: openapiDocument as Record,
92 | prefix: "/v1",
93 | service,
94 | securityHandlers: {},
95 | });
96 | };
97 |
98 | // FIXME: fix the types
99 | const configureSwaggerUi = async ({
100 | fastifyServer,
101 | openapiDocument,
102 | }: Partial & { openapiDocument: any }) => {
103 | await fastifyServer!.register(fastifyStatic, {
104 | root: swaggerUiDist.getAbsoluteFSPath(),
105 | prefix: "/swagger-ui/",
106 | });
107 |
108 | fastifyServer!.get("/", (request, reply) =>
109 | reply.sendFile(
110 | "index.html",
111 | fileURLToPath(new URL("../public", import.meta.url)),
112 | ),
113 | );
114 |
115 | fastifyServer!.get("/api.oas.yml", (request, reply) => {
116 | const newOas = {
117 | ...(openapiDocument as Record),
118 | servers: [
119 | {
120 | url: `${request.protocol}://${request.hostname}/v1`,
121 | },
122 | ],
123 | };
124 |
125 | return reply.type("text/yaml").send(yaml.dump(newOas));
126 | });
127 | };
128 |
129 | export const createFastifyServer = async ({ container }: Cradle) => {
130 | const fastifyServer = fastify({
131 | logger: true,
132 | });
133 |
134 | const openapiDocument = yaml.load(
135 | await fs.promises.readFile(new URL("../api.oas.yml", import.meta.url), {
136 | encoding: "utf8",
137 | }),
138 | );
139 |
140 | await configureSwaggerUi({ fastifyServer, openapiDocument });
141 | await configureOpenapiGlue({ container, fastifyServer, openapiDocument });
142 |
143 | return fastifyServer;
144 | };
145 |
--------------------------------------------------------------------------------
/src/handlers/create-chat-completion.handler.test.ts:
--------------------------------------------------------------------------------
1 | /* eslint-disable @typescript-eslint/naming-convention */
2 | import type { Cradle } from "../container.ts";
3 | import { type LlmAdapter } from "../llm-adapter.ts";
4 | import { createTestContainer } from "../test-support/test-container.ts";
5 | import {
6 | Role,
7 | type CreateChatCompletionRequest,
8 | } from "../types/create-chat-completion.ts";
9 | import type { AwilixContainer } from "awilix";
10 | import { type MockObject, expect, mockObject } from "earl";
11 | import { test } from "node:test";
12 |
13 | const testModelId = "test-model-id";
14 | let testContainer: AwilixContainer | undefined;
15 | let llmAdapter: MockObject | undefined;
16 |
17 | const createChatCompletionRequest: CreateChatCompletionRequest = {
18 | messages: [
19 | { content: "prompt1", role: Role.System },
20 | { content: "prompt2", role: Role.User },
21 | { content: "prompt3", role: Role.Assistant },
22 | ],
23 | model: testModelId,
24 | frequency_penalty: 0.5,
25 | logit_bias: { "50256": -100 },
26 | max_tokens: 100,
27 | n: 2,
28 | presence_penalty: 1.5,
29 | stop: "stop",
30 | stream: false,
31 | temperature: 0,
32 | top_p: 0.1,
33 | };
34 |
35 | await test("createChatCompletionHandler", async (t) => {
36 | t.beforeEach(async () => {
37 | llmAdapter = mockObject({
38 | async createChatCompletion(
39 | createChatCompletionRequest,
40 | abortSignal,
41 | callback,
42 | ) {
43 | const { messages, n } = createChatCompletionRequest;
44 |
45 | const count = messages.length * (n ?? 1);
46 | for (let tokenIndex = 0; tokenIndex < count; tokenIndex++) {
47 | callback({
48 | finishReason: "stop",
49 | index: 0,
50 | delta:
51 | tokenIndex === 0
52 | ? { role: Role.Assistant }
53 | : { content: `token ${tokenIndex}\n` },
54 | });
55 | }
56 | },
57 | });
58 | });
59 |
60 | t.afterEach(async () => {
61 | await testContainer?.dispose();
62 | testContainer = undefined;
63 | });
64 |
65 | await t.test("valid request with no errors", async () => {
66 | testContainer = await createTestContainer(llmAdapter!);
67 | const fastifyServer = testContainer.resolve("fastifyServer");
68 |
69 | const response = await fastifyServer.inject({
70 | url: "/v1/chat/completions",
71 | method: "POST",
72 | headers: {
73 | "Content-Type": "application/json",
74 | },
75 | payload: JSON.stringify(createChatCompletionRequest),
76 | });
77 |
78 | expect(response.statusCode).toEqual(200);
79 |
80 | expect(llmAdapter!.createChatCompletion).toHaveBeenCalledWith(
81 | {
82 | messages: createChatCompletionRequest.messages,
83 | model: testModelId,
84 | frequencyPenalty: createChatCompletionRequest.frequency_penalty,
85 | logitBias: createChatCompletionRequest.logit_bias,
86 | maxTokens: createChatCompletionRequest.max_tokens,
87 | n: createChatCompletionRequest.n,
88 | presencePenalty: createChatCompletionRequest.presence_penalty,
89 | stop: (Array.isArray(createChatCompletionRequest.stop)
90 | ? createChatCompletionRequest.stop
91 | : [createChatCompletionRequest.stop]) as string[],
92 | temperature: createChatCompletionRequest.temperature,
93 | topP: createChatCompletionRequest.top_p,
94 | },
95 | expect.anything(),
96 | expect.anything(),
97 | );
98 | });
99 | });
100 |
--------------------------------------------------------------------------------
/src/handlers/create-chat-completion.handler.ts:
--------------------------------------------------------------------------------
1 | /* eslint-disable @typescript-eslint/naming-convention */
2 | import type { Cradle } from "../container.ts";
3 | import type { LlmAdapter } from "../llm-adapter.ts";
4 | import type { OperationHandler } from "../operation-handler.ts";
5 | import type { SseHelper } from "../sse-helper.ts";
6 | import {
7 | type ChoiceMessage,
8 | type CreateChatCompletionOkResponse,
9 | type CreateChatCompletionRequest,
10 | Role,
11 | } from "../types/create-chat-completion.ts";
12 | import type { Choice } from "../types/create-chat-completion.ts";
13 | import type { RouteHandlerMethod } from "fastify";
14 | import shortUUID from "short-uuid";
15 |
16 | type Chunk = Choice & {
17 | delta: Partial;
18 | };
19 | export default class CreateChatCompletionHandler implements OperationHandler {
20 | operationId = "createChatCompletion";
21 |
22 | readonly #llmAdapter: LlmAdapter;
23 | readonly #sseHelper: SseHelper;
24 |
25 | constructor({ llmAdapter, sseHelper }: Cradle) {
26 | this.#llmAdapter = llmAdapter;
27 | this.#sseHelper = sseHelper;
28 | }
29 |
30 | handle: RouteHandlerMethod = async (request, reply) => {
31 | const body = request.body as CreateChatCompletionRequest;
32 |
33 | const {
34 | frequency_penalty,
35 | logit_bias,
36 | max_tokens,
37 | messages,
38 | model,
39 | n,
40 | presence_penalty,
41 | stop,
42 | stream,
43 | temperature,
44 | top_p,
45 | } = body;
46 |
47 | const abortController = new AbortController();
48 | request.raw.once("close", () => {
49 | if (request.raw.destroyed) {
50 | abortController.abort();
51 | }
52 | });
53 |
54 | const id = `chatcmpl-${shortUUID.generate()}`;
55 | const choiceTokens: string[][] = [];
56 | const choices: Choice[] = [];
57 |
58 | await this.#llmAdapter.createChatCompletion(
59 | {
60 | messages,
61 | model,
62 | frequencyPenalty: frequency_penalty,
63 | logitBias: logit_bias,
64 | maxTokens: max_tokens,
65 | n,
66 | presencePenalty: presence_penalty,
67 | stop: (Array.isArray(stop) ? stop : [stop].filter(Boolean)) as string[],
68 | temperature,
69 | topP: top_p,
70 | },
71 | abortController.signal,
72 | ({ index, delta, finishReason }) => {
73 | if (stream) {
74 | this.#sseHelper.sse(
75 | reply,
76 | this.#createResponseChunk(id, model, {
77 | delta,
78 | index,
79 | }),
80 | );
81 |
82 | return;
83 | }
84 |
85 | choices[index] ??= {
86 | index,
87 | message: { role: Role.Assistant, content: "" },
88 | };
89 |
90 | choices[index].finish_reason = finishReason;
91 |
92 | if (delta.role) {
93 | choices[index].message!.role = delta.role;
94 | }
95 |
96 | choiceTokens[index] ??= [];
97 |
98 | if (delta.content) {
99 | choiceTokens[index].push(delta.content);
100 | }
101 | },
102 | );
103 |
104 | if (stream) {
105 | this.#sseHelper.sse(reply, "[DONE]");
106 | reply.raw.end();
107 | return;
108 | }
109 |
110 | for (const [index, choice] of choices.entries()) {
111 | if (!choice) {
112 | continue;
113 | }
114 |
115 | choice.message!.role = Role.Assistant;
116 | choice.message!.content = choiceTokens[index].join("");
117 | }
118 |
119 | const response: CreateChatCompletionOkResponse = {
120 | ...this.#createResponse(id, model, choices),
121 | usage: {
122 | completion_tokens: 0,
123 | prompt_tokens: 0,
124 | total_tokens: 0,
125 | },
126 | };
127 |
128 | return response;
129 | };
130 |
131 | #createResponse(
132 | id: string,
133 | model: string,
134 | choices: Choice[],
135 | ): CreateChatCompletionOkResponse {
136 | return {
137 | id,
138 | model,
139 | choices: choices.filter(Boolean),
140 | created: Math.floor(Date.now() / 1000),
141 | object: "chat.completion",
142 | };
143 | }
144 |
145 | #createResponseChunk(
146 | id: string,
147 | model: string,
148 | deltaChoice: Chunk,
149 | ): CreateChatCompletionOkResponse {
150 | return {
151 | id,
152 | model,
153 | choices: [deltaChoice],
154 | created: Date.now(),
155 | object: "chat.completion.chunk",
156 | };
157 | }
158 | }
159 |
--------------------------------------------------------------------------------
/src/handlers/create-completion.handler.test.ts:
--------------------------------------------------------------------------------
1 | /* eslint-disable @typescript-eslint/naming-convention */
2 | import type { Cradle } from "../container.ts";
3 | import { type LlmAdapter } from "../llm-adapter.ts";
4 | import { createTestContainer } from "../test-support/test-container.ts";
5 | import type { CreateCompletionRequest } from "../types/create-completion.ts";
6 | import type { AwilixContainer } from "awilix";
7 | import { type MockObject, expect, mockObject } from "earl";
8 | import { test } from "node:test";
9 |
10 | const testModelId = "test-model-id";
11 | let testContainer: AwilixContainer | undefined;
12 | let llmAdapter: MockObject | undefined;
13 |
14 | const createCompletionRequest: CreateCompletionRequest = {
15 | stream: false,
16 |
17 | best_of: 3,
18 | echo: true,
19 | frequency_penalty: 1,
20 | logit_bias: { "50256": -100 },
21 | logprobs: 5,
22 | max_tokens: 100,
23 | model: testModelId,
24 | n: 2,
25 | presence_penalty: 1.5,
26 | prompt: ["prompt1", "prompt2"],
27 | stop: "stop",
28 | suffix: "suffix",
29 | temperature: 0,
30 | top_p: 0.1,
31 | };
32 |
33 | await test("createCompletionHandler", async (t) => {
34 | t.beforeEach(async () => {
35 | llmAdapter = mockObject({
36 | async createCompletion(createCompletionRequest, abortSignal, callback) {
37 | const { prompt, n } = createCompletionRequest;
38 |
39 | const count = prompt.length * (n ?? 1);
40 | for (let index = 0; index < count; index++) {
41 | callback({
42 | finishReason: "stop",
43 | index,
44 | text: `token ${index}`,
45 | });
46 | }
47 | },
48 | });
49 | });
50 |
51 | t.afterEach(async () => {
52 | await testContainer?.dispose();
53 | testContainer = undefined;
54 | });
55 |
56 | await t.test("stream cannot be set if best_of > 1", async (t) => {
57 | testContainer = await createTestContainer(llmAdapter!);
58 | const fastifyServer = testContainer.resolve("fastifyServer");
59 |
60 | const payload = JSON.stringify({
61 | model: testModelId,
62 | prompt: "test-prompt",
63 |
64 | best_of: 2,
65 | stream: true,
66 | } as CreateCompletionRequest);
67 | const response = await fastifyServer.inject({
68 | url: "/v1/completions",
69 | method: "POST",
70 | headers: {
71 | "Content-Type": "application/json",
72 | },
73 | payload,
74 | });
75 |
76 | expect(response.statusCode).toEqual(400);
77 | });
78 |
79 | await t.test("valid request with no errors", async () => {
80 | testContainer = await createTestContainer(llmAdapter!);
81 | const fastifyServer = testContainer.resolve("fastifyServer");
82 |
83 | const response = await fastifyServer.inject({
84 | url: "/v1/completions",
85 | method: "POST",
86 | headers: {
87 | "Content-Type": "application/json",
88 | },
89 | payload: JSON.stringify(createCompletionRequest),
90 | });
91 |
92 | expect(response.statusCode).toEqual(200);
93 | expect(llmAdapter!.createCompletion).toHaveBeenCalledWith(
94 | {
95 | model: testModelId,
96 | bestOf: createCompletionRequest.best_of,
97 | echo: createCompletionRequest.echo,
98 | frequencyPenalty: createCompletionRequest.frequency_penalty,
99 | logitBias: createCompletionRequest.logit_bias,
100 | logprobs: createCompletionRequest.logprobs,
101 | maxTokens: createCompletionRequest.max_tokens,
102 | n: createCompletionRequest.n,
103 | presencePenalty: createCompletionRequest.presence_penalty,
104 | // TODO: make this more specific
105 | prompt: expect.satisfies((prompt) => Array.isArray(prompt)),
106 | stop: (Array.isArray(createCompletionRequest.stop)
107 | ? createCompletionRequest.stop
108 | : [createCompletionRequest.stop]) as string[],
109 | suffix: createCompletionRequest.suffix,
110 | temperature: createCompletionRequest.temperature,
111 | topP: createCompletionRequest.top_p,
112 | },
113 | expect.anything(),
114 | expect.anything(),
115 | );
116 | });
117 | });
118 |
--------------------------------------------------------------------------------
/src/handlers/create-completion.handler.ts:
--------------------------------------------------------------------------------
1 | /* eslint-disable @typescript-eslint/naming-convention */
2 | import type { Cradle } from "../container.ts";
3 | import type { LlmAdapter } from "../llm-adapter.ts";
4 | import type { OperationHandler } from "../operation-handler.ts";
5 | import type { SseHelper } from "../sse-helper.ts";
6 | import type {
7 | Choice,
8 | CreateCompletionOkResponse,
9 | CreateCompletionRequest,
10 | } from "../types/create-completion.ts";
11 | import type { RouteHandlerMethod } from "fastify";
12 | import shortUUID from "short-uuid";
13 |
14 | export default class CreateCompletionHandler implements OperationHandler {
15 | operationId = "createCompletion";
16 | readonly #llmAdapter: LlmAdapter;
17 | readonly #sseHelper: SseHelper;
18 |
19 | constructor({ llmAdapter, sseHelper }: Cradle) {
20 | this.#llmAdapter = llmAdapter;
21 | this.#sseHelper = sseHelper;
22 | }
23 |
24 | handle: RouteHandlerMethod = async (request, reply) => {
25 | const body = request.body as CreateCompletionRequest;
26 |
27 | const {
28 | model,
29 | best_of,
30 | stream,
31 | prompt,
32 | echo,
33 | frequency_penalty,
34 | logit_bias,
35 | max_tokens,
36 | logprobs,
37 | presence_penalty,
38 | stop,
39 | suffix,
40 | temperature,
41 | top_p,
42 | n,
43 | } = body;
44 |
45 | if (best_of != null && stream) {
46 | void reply.status(400);
47 | throw new Error("stream cannot be set if best_of is set");
48 | }
49 |
50 | const promptValidationError = () => {
51 | void reply.status(400);
52 | throw new Error("prompt must be a string or an array of strings");
53 | };
54 |
55 | if (typeof prompt !== "string") {
56 | if (!Array.isArray(prompt)) {
57 | promptValidationError();
58 | }
59 |
60 | if (prompt!.some((x) => typeof x !== "string")) {
61 | promptValidationError();
62 | }
63 | }
64 |
65 | const abortController = new AbortController();
66 | request.raw.once("close", () => {
67 | if (request.raw.destroyed) {
68 | abortController.abort();
69 | }
70 | });
71 |
72 | const id = `cmpl-${shortUUID.generate()}`;
73 | const choiceTokens: string[][] = [];
74 | const choices: Choice[] = [];
75 |
76 | await this.#llmAdapter.createCompletion(
77 | {
78 | model,
79 | bestOf: best_of,
80 | echo,
81 | frequencyPenalty: frequency_penalty,
82 | logitBias: logit_bias,
83 | maxTokens: max_tokens,
84 | logprobs,
85 | n,
86 | presencePenalty: presence_penalty,
87 | prompt: (Array.isArray(prompt)
88 | ? prompt
89 | : [prompt].filter(Boolean)) as string[],
90 | stop: (Array.isArray(stop) ? stop : [stop].filter(Boolean)) as string[],
91 | suffix,
92 | temperature,
93 | topP: top_p,
94 | },
95 | abortController.signal,
96 | ({
97 | finishReason,
98 | index,
99 | text,
100 | // TODO: Figure out how to handle logprobs
101 | logprobs,
102 | }) => {
103 | if (stream) {
104 | this.#sseHelper.sse(
105 | reply,
106 | this.#createResponseObject(id, model, [
107 | { finish_reason: finishReason, index, text },
108 | ]),
109 | );
110 |
111 | return;
112 | }
113 |
114 | choices[index] ??= {
115 | index,
116 | };
117 |
118 | choices[index].finish_reason = finishReason;
119 | choiceTokens[index] ??= [];
120 |
121 | choiceTokens[index].push(text);
122 | },
123 | );
124 |
125 | if (stream) {
126 | this.#sseHelper.sse(reply, "[DONE]");
127 | reply.raw.end();
128 | return;
129 | }
130 |
131 | for (const [index, choice] of choices.entries()) {
132 | if (!choice) {
133 | continue;
134 | }
135 |
136 | choice.text = choiceTokens[index].join("");
137 | choice.finish_reason ??= "stop";
138 | }
139 |
140 | const response: CreateCompletionOkResponse = {
141 | ...this.#createResponseObject(id, model, choices),
142 | usage: {
143 | completion_tokens: 0,
144 | prompt_tokens: 0,
145 | total_tokens: 0,
146 | },
147 | };
148 |
149 | return response;
150 | };
151 |
152 | #createResponseObject(
153 | id: string,
154 | model: string,
155 | choices: Choice[],
156 | ): CreateCompletionOkResponse {
157 | return {
158 | id,
159 | choices: choices.filter(Boolean),
160 | created: Math.floor(Date.now() / 1000),
161 | model,
162 | object: "text_completion",
163 | };
164 | }
165 | }
166 |
--------------------------------------------------------------------------------
/src/handlers/create-embedding.handler.test.ts:
--------------------------------------------------------------------------------
1 | import type { Cradle } from "../container.ts";
2 | import type { LlmAdapter } from "../llm-adapter.ts";
3 | import { createTestContainer } from "../test-support/test-container.ts";
4 | import type { CreateEmbeddingRequest } from "../types/create-embedding.ts";
5 | import type { AwilixContainer } from "awilix";
6 | import { type MockObject, expect, mockObject } from "earl";
7 | import { test } from "node:test";
8 |
9 | const testModelId = "test-model-id";
10 | let testContainer: AwilixContainer | undefined;
11 | let llmAdapter: MockObject | undefined;
12 |
13 | await test("createEmbeddingHandler", async (t) => {
14 | t.beforeEach(async () => {
15 | llmAdapter = mockObject({
16 | async createEmbedding({ model, input }) {
17 | return [0];
18 | },
19 | });
20 | });
21 |
22 | t.afterEach(async () => {
23 | await testContainer!.dispose();
24 | testContainer = undefined;
25 | });
26 |
27 | await t.test("single string input", async () => {
28 | testContainer = await createTestContainer(llmAdapter!);
29 | const fastifyServer = testContainer.resolve("fastifyServer");
30 | const testModelInput = "test-model-input";
31 |
32 | const payload = JSON.stringify({
33 | model: testModelId,
34 | input: testModelInput,
35 | } as CreateEmbeddingRequest);
36 |
37 | const response = await fastifyServer.inject({
38 | url: "/v1/embeddings",
39 | method: "POST",
40 | headers: {
41 | "Content-Type": "application/json",
42 | },
43 | payload,
44 | });
45 |
46 | expect(response.statusCode).toEqual(200);
47 | expect(llmAdapter!.createEmbedding).toHaveBeenCalledWith({
48 | input: testModelInput,
49 | model: testModelId,
50 | });
51 | });
52 |
53 | await t.test("multiple strings input", async () => {
54 | testContainer = await createTestContainer(llmAdapter!);
55 | const fastifyServer = testContainer.resolve("fastifyServer");
56 | const testModelInput = ["input1", "input2"];
57 |
58 | const payload = JSON.stringify({
59 | model: testModelId,
60 | input: testModelInput,
61 | } as CreateEmbeddingRequest);
62 |
63 | const response = await fastifyServer.inject({
64 | url: "/v1/embeddings",
65 | method: "POST",
66 | headers: {
67 | "Content-Type": "application/json",
68 | },
69 | payload,
70 | });
71 |
72 | expect(response.statusCode).toEqual(200);
73 | expect(llmAdapter!.createEmbedding).toHaveBeenNthCalledWith(1, {
74 | input: testModelInput[0],
75 | model: testModelId,
76 | });
77 | expect(llmAdapter!.createEmbedding).toHaveBeenNthCalledWith(2, {
78 | input: testModelInput[1],
79 | model: testModelId,
80 | });
81 | });
82 | });
83 |
--------------------------------------------------------------------------------
/src/handlers/create-embedding.handler.ts:
--------------------------------------------------------------------------------
1 | /* eslint-disable @typescript-eslint/naming-convention */
2 | import type { Cradle } from "../container.ts";
3 | import type { LlmAdapter } from "../llm-adapter.ts";
4 | import type { OperationHandler } from "../operation-handler.ts";
5 | import type {
6 | CreateEmbeddingOkResponse,
7 | CreateEmbeddingRequest,
8 | Datum,
9 | } from "../types/create-embedding.ts";
10 | import type { RouteHandlerMethod } from "fastify";
11 |
12 | export default class CreateEmbeddingHandler implements OperationHandler {
13 | operationId = "createEmbedding";
14 | readonly #llmAdapter: LlmAdapter;
15 |
16 | constructor({ llmAdapter }: Cradle) {
17 | this.#llmAdapter = llmAdapter;
18 | }
19 |
20 | handle: RouteHandlerMethod = async (request, reply) => {
21 | const body = request.body as CreateEmbeddingRequest;
22 | const { input, model } = body;
23 |
24 | if (Array.isArray(input) && typeof input[0] !== "string") {
25 | // FIXME: figure out how to handle numeric inputs
26 | throw new TypeError("Only string inputs are supported");
27 | }
28 |
29 | const inputStrings = Array.isArray(input) ? input : [input];
30 |
31 | const embeddings = await Promise.all(
32 | inputStrings.map(async (input: string) =>
33 | this.#llmAdapter.createEmbedding({
34 | input,
35 | model,
36 | }),
37 | ),
38 | );
39 |
40 | const data: Datum[] = embeddings.map((embedding, index) => ({
41 | index,
42 | embedding,
43 | object: "embedding",
44 | }));
45 |
46 | const response: CreateEmbeddingOkResponse = {
47 | data,
48 | object: "list",
49 | model,
50 | usage: {
51 | prompt_tokens: 0,
52 | total_tokens: 0,
53 | },
54 | };
55 |
56 | return response;
57 | };
58 | }
59 |
--------------------------------------------------------------------------------
/src/handlers/list-models.handler.test.ts:
--------------------------------------------------------------------------------
1 | import type { Cradle } from "../container.ts";
2 | import type { LlmAdapter } from "../llm-adapter.ts";
3 | import { createTestContainer } from "../test-support/test-container.ts";
4 | import type { AwilixContainer } from "awilix";
5 | import { type MockObject, expect, mockObject } from "earl";
6 | import { test } from "node:test";
7 |
8 | const testModelId = "test-model-id";
9 | let testContainer: AwilixContainer | undefined;
10 | let llmAdapter: MockObject | undefined;
11 |
12 | test.beforeEach(() => {
13 | llmAdapter = mockObject({
14 | async listModels() {
15 | return [{ created: 0, id: testModelId, ownedBy: "ownedBy" }];
16 | },
17 | });
18 | });
19 |
20 | test.afterEach(async () => {
21 | await testContainer!.dispose();
22 | testContainer = undefined;
23 | });
24 |
25 | await test("listModelsHandler", async () => {
26 | testContainer = await createTestContainer(llmAdapter!);
27 | const fastifyServer = testContainer.resolve("fastifyServer");
28 | const response = await fastifyServer.inject({
29 | url: "/v1/models",
30 | method: "GET",
31 | headers: {},
32 | });
33 |
34 | expect(response.statusCode).toEqual(200);
35 | expect(llmAdapter!.listModels).toHaveBeenCalled();
36 | });
37 |
--------------------------------------------------------------------------------
/src/handlers/list-models.handler.ts:
--------------------------------------------------------------------------------
1 | /* eslint-disable @typescript-eslint/naming-convention */
2 | import type { Cradle } from "../container.ts";
3 | import type { LlmAdapter } from "../llm-adapter.ts";
4 | import type { OperationHandler } from "../operation-handler.ts";
5 | import type {
6 | ListModelsOkResponse,
7 | ModelObject,
8 | } from "../types/list-models.ts";
9 | import type { RouteHandlerMethod } from "fastify";
10 |
11 | export default class ListModelsHandler implements OperationHandler {
12 | operationId = "listModels";
13 | readonly #llmAdapter: LlmAdapter;
14 |
15 | constructor({ llmAdapter }: Cradle) {
16 | this.#llmAdapter = llmAdapter;
17 | }
18 |
19 | handle: RouteHandlerMethod = async (request, reply) => {
20 | const adapterModels = await this.#llmAdapter.listModels();
21 |
22 | const data: ModelObject[] = adapterModels.map((model) => ({
23 | id: model.id,
24 | created: model.created,
25 | owned_by: model.ownedBy,
26 | object: "model",
27 |
28 | // Not part of the spec
29 | permission: [],
30 | }));
31 |
32 | const response: ListModelsOkResponse = {
33 | data,
34 | object: "list",
35 | };
36 |
37 | // To preserve model.permissions
38 | void reply.header("Content-Type", "application/json; charset=utf-8");
39 | void reply.serializer(JSON.stringify);
40 |
41 | return response;
42 | };
43 | }
44 |
--------------------------------------------------------------------------------
/src/handlers/retrieve-model.handler.test.ts:
--------------------------------------------------------------------------------
1 | import type { Cradle } from "../container.ts";
2 | import type { LlmAdapter } from "../llm-adapter.ts";
3 | import { createTestContainer } from "../test-support/test-container.ts";
4 | import type { AwilixContainer } from "awilix";
5 | import { type MockObject, expect, mockObject } from "earl";
6 | import { test } from "node:test";
7 |
8 | const testModelId = "test-model-id";
9 | let testContainer: AwilixContainer | undefined;
10 | let llmAdapter: MockObject | undefined;
11 |
12 | await test("retrieveModelHandler", async (t) => {
13 | t.beforeEach(() => {
14 | llmAdapter = mockObject({
15 | async listModels() {
16 | return [{ created: 0, id: testModelId, ownedBy: "ownedBy" }];
17 | },
18 | });
19 | });
20 |
21 | t.afterEach(async () => {
22 | await testContainer!.dispose();
23 | testContainer = undefined;
24 | });
25 |
26 | await t.test("should return a model when there is one", async (t) => {
27 | testContainer = await createTestContainer(llmAdapter!);
28 | const fastifyServer = testContainer.resolve("fastifyServer");
29 |
30 | const response = await fastifyServer.inject({
31 | url: `v1/models/${testModelId}`,
32 | method: "GET",
33 | headers: {},
34 | });
35 |
36 | expect(response.statusCode).toEqual(200);
37 | expect(llmAdapter!.listModels).toHaveBeenCalled();
38 | });
39 |
40 | await t.test("should return 404 when the model doesn't exist", async (t) => {
41 | testContainer = await createTestContainer(llmAdapter!);
42 | const fastifyServer = testContainer.resolve("fastifyServer");
43 |
44 | const response = await fastifyServer.inject({
45 | url: `v1/models/non-existent-model`,
46 | method: "GET",
47 | headers: {},
48 | });
49 |
50 | expect(response.statusCode).toEqual(404);
51 | expect(llmAdapter!.listModels).toHaveBeenCalled();
52 | });
53 | });
54 |
--------------------------------------------------------------------------------
/src/handlers/retrieve-model.handler.ts:
--------------------------------------------------------------------------------
1 | /* eslint-disable @typescript-eslint/naming-convention */
2 | import type { Cradle } from "../container.ts";
3 | import type { LlmAdapter } from "../llm-adapter.ts";
4 | import type { OperationHandler } from "../operation-handler.ts";
5 | import type { RetrieveModelOkResponseObject } from "../types/retrieve-model.ts";
6 | import type { RouteHandlerMethod } from "fastify";
7 |
8 | type RequestParameters = {
9 | model: string;
10 | };
11 |
12 | export default class RetrieveModelHandler implements OperationHandler {
13 | operationId = "retrieveModel";
14 | readonly #llmAdapter: LlmAdapter;
15 |
16 | constructor({ llmAdapter }: Cradle) {
17 | this.#llmAdapter = llmAdapter;
18 | }
19 |
20 | handle: RouteHandlerMethod = async (request, reply) => {
21 | const parameters: RequestParameters = request.params as RequestParameters;
22 |
23 | const { model } = parameters;
24 | const adapterModels = await this.#llmAdapter.listModels();
25 | const adapterModel = adapterModels.find(
26 | (adapterModel) => adapterModel.id === model,
27 | );
28 |
29 | if (!adapterModel) {
30 | void reply.status(404);
31 | return;
32 | }
33 |
34 | const response: RetrieveModelOkResponseObject = {
35 | created: 0,
36 | id: adapterModel.id,
37 | object: "model",
38 | owned_by: adapterModel.ownedBy,
39 |
40 | // Not part of the spec
41 | permission: [],
42 | };
43 |
44 | // To preserve model.permissions
45 | void reply.header("Content-Type", "application/json; charset=utf-8");
46 | void reply.serializer(JSON.stringify);
47 |
48 | return response;
49 | };
50 | }
51 |
--------------------------------------------------------------------------------
/src/llama-node-core-llm-adapter.ts:
--------------------------------------------------------------------------------
1 | /* eslint-disable no-await-in-loop */
2 | import {
3 | type FinishReason,
4 | LlmAdapter,
5 | type LlmAdapterCreateChatCompletionRequest,
6 | type LlmAdapterCreateChatCompletionResponse,
7 | type LlmAdapterCreateCompletionRequest,
8 | type LlmAdapterCreateCompletionResponse,
9 | type LlmAdapterCreateEmbeddingRequest,
10 | type LlmAdapterCreateEmbeddingResponse,
11 | type LlmAdapterModel,
12 | Role,
13 | } from "./llm-adapter.ts";
14 | import { type Generate, ModelType } from "@llama-node/core";
15 | import { type LLMError, LLM as LlamaNode } from "llama-node";
16 | import { LLMRS, type LoadConfig } from "llama-node/dist/llm/llm-rs.js";
17 | import { cpus } from "node:os";
18 | import path from "node:path";
19 |
20 | type LlamaNodeCoreLlmAdapterConfig = LoadConfig & Generate;
21 |
22 | export default class LlamaNodeCoreLlmAdapter extends LlmAdapter {
23 | readonly #llmConfig: LlamaNodeCoreLlmAdapterConfig;
24 | #loaded = false;
25 | readonly #llamaNode = new LlamaNode(LLMRS);
26 |
27 | constructor(llmConfig: LlamaNodeCoreLlmAdapterConfig) {
28 | super();
29 |
30 | this.#llmConfig = {
31 | ...LlamaNodeCoreLlmAdapter.defaultConfig,
32 | ...llmConfig,
33 | };
34 | }
35 |
36 | async createChatCompletion(
37 | createChatCompletionRequest: LlmAdapterCreateChatCompletionRequest,
38 | abortSignal: AbortSignal,
39 | onData: (data: LlmAdapterCreateChatCompletionResponse) => void,
40 | ): Promise {
41 | await this.#load();
42 |
43 | const prompt = createChatCompletionRequest.messages
44 | .map(({ content, role }) => {
45 | if (role === Role.System) return `${content}\n`;
46 | return `${role ?? Role.User}: ${content}`;
47 | })
48 | .join("\n")
49 | .concat(`\n${Role.Assistant}: `);
50 |
51 | const bufferedTokens: string[] = [];
52 | const flushBuffer = (index: number) => {
53 | while (bufferedTokens.length > 0) {
54 | onData({
55 | index,
56 | delta: { content: bufferedTokens.shift() },
57 | });
58 | }
59 | };
60 |
61 | for (
62 | let index = 0;
63 | index < (createChatCompletionRequest.n ?? 1);
64 | index += 1
65 | ) {
66 | let isFirstToken = true;
67 |
68 | await this.#invokeLlamaNode(
69 | {
70 | ...this.#openAiCompletionRequestToLlamaNodeInvocation(
71 | createChatCompletionRequest,
72 | ),
73 | prompt,
74 | },
75 | abortSignal,
76 | ({ token, finishReason, stop }) => {
77 | if (isFirstToken) {
78 | onData({
79 | index,
80 | delta: { role: Role.Assistant },
81 | finishReason,
82 | });
83 |
84 | isFirstToken = false;
85 | }
86 |
87 | if (["\n", Role.User, ":"].includes(token.trim())) {
88 | bufferedTokens.push(token);
89 | if (bufferedTokens.join("").trim() === `${Role.User}:`) {
90 | stop();
91 | bufferedTokens.length = 0;
92 | }
93 | } else {
94 | flushBuffer(index);
95 | onData({
96 | index,
97 | delta: { content: token },
98 | finishReason,
99 | });
100 | }
101 | },
102 | () => {
103 | flushBuffer(index);
104 | onData({
105 | index,
106 | delta: {},
107 | finishReason: "stop",
108 | });
109 | },
110 | );
111 | }
112 | }
113 |
114 | async listModels(): Promise {
115 | return [
116 | {
117 | id: path.basename(this.#llmConfig.modelPath),
118 | created: 0,
119 | ownedBy: "unknown",
120 | },
121 | ];
122 | }
123 |
124 | async createEmbedding({
125 | model,
126 | input,
127 | }: LlmAdapterCreateEmbeddingRequest): Promise {
128 | await this.#load();
129 |
130 | return this.#llamaNode.getEmbedding({
131 | ...this.#llmConfig,
132 | prompt: input,
133 | });
134 | }
135 |
136 | async createCompletion(
137 | createCompletionRequest: LlmAdapterCreateCompletionRequest,
138 | abortSignal: AbortSignal,
139 | onData: (data: LlmAdapterCreateCompletionResponse) => void,
140 | ): Promise {
141 | await this.#load();
142 |
143 | for (
144 | let promptIndex = 0, index = 0;
145 | index <
146 | createCompletionRequest.prompt.length * (createCompletionRequest.n ?? 1);
147 | index += 1,
148 | promptIndex = (promptIndex + 1) % createCompletionRequest.prompt.length
149 | ) {
150 | const prompt = createCompletionRequest.prompt[promptIndex];
151 | await this.#invokeLlamaNode(
152 | {
153 | ...this.#openAiCompletionRequestToLlamaNodeInvocation(
154 | createCompletionRequest,
155 | ),
156 | prompt,
157 | },
158 | abortSignal,
159 | ({ token, finishReason }) => {
160 | onData({
161 | index,
162 | text: token,
163 | finishReason,
164 | });
165 | },
166 | );
167 | }
168 | }
169 |
170 | #openAiCompletionRequestToLlamaNodeInvocation(
171 | request:
172 | | LlmAdapterCreateCompletionRequest
173 | | LlmAdapterCreateChatCompletionRequest,
174 | ) {
175 | let temperature = request.temperature ?? this.#llmConfig.temperature;
176 | // Temp=0 leads to a crash
177 | if (request.temperature === 0) {
178 | temperature = 1e-5;
179 | }
180 |
181 | return {
182 | numPredict: request.maxTokens ?? this.#llmConfig.numPredict ?? undefined,
183 | temperature,
184 | topP: request.topP ?? this.#llmConfig.topP,
185 | } satisfies Partial;
186 | }
187 |
188 | static get defaultConfig() {
189 | return {
190 | // Load config
191 | enableLogging: false,
192 | modelType: ModelType.Mpt,
193 | numCtxTokens: 4096,
194 | useMmap: true,
195 |
196 | // Generate config
197 | numThreads: cpus().length,
198 | numPredict: 32_768,
199 | batchSize: 128,
200 | repeatLastN: 64,
201 | repeatPenalty: 1.1,
202 | temperature: 0,
203 | topK: 40,
204 | topP: 0.95,
205 | seed: 0,
206 | float16: false,
207 | feedPrompt: true,
208 | } satisfies Partial;
209 | }
210 |
211 | async #load() {
212 | if (this.#loaded) return;
213 |
214 | await this.#llamaNode.load({
215 | ...LlamaNodeCoreLlmAdapter.defaultConfig,
216 | ...this.#llmConfig,
217 | });
218 |
219 | this.#loaded = true;
220 | }
221 |
222 | async #invokeLlamaNode(
223 | generateConfig: Partial,
224 | callerAbortSignal: AbortSignal,
225 | onToken: ({
226 | token,
227 | finishReason,
228 | stop,
229 | }: {
230 | token: string;
231 | finishReason: FinishReason;
232 | stop: () => void;
233 | }) => void,
234 | onComplete?: () => void,
235 | ) {
236 | let tokensGenerated = 0;
237 | const abortController = new AbortController();
238 |
239 | const handleAbort = () => {
240 | callerAbortSignal.removeEventListener("abort", handleAbort);
241 | abortController.abort();
242 | };
243 |
244 | const stop = () => {
245 | abortController.abort();
246 | };
247 |
248 | callerAbortSignal.addEventListener("abort", handleAbort);
249 |
250 | return this.#llamaNode
251 | .createCompletion(
252 | {
253 | ...this.#llmConfig,
254 | ...generateConfig,
255 | },
256 | ({ token, completed }) => {
257 | // "llama-node" always emits "\n\n\n" at the end of inference
258 | if (completed) {
259 | if (onComplete) onComplete();
260 | return;
261 | }
262 |
263 | tokensGenerated += 1;
264 |
265 | let finishReason: FinishReason;
266 | if (tokensGenerated >= generateConfig.numPredict!) {
267 | finishReason = "length";
268 | abortController.abort();
269 | }
270 |
271 | onToken({ token, finishReason, stop });
272 | },
273 | abortController.signal,
274 | )
275 | .catch((error: unknown) => {
276 | // Looks like LLMError is not exported as a Class
277 | if (Object.getPrototypeOf(error).constructor.name !== "LLMError") {
278 | throw error;
279 | }
280 |
281 | const llmError = error as LLMError;
282 | if (llmError.type !== ("Aborted" as LLMError["type"])) {
283 | throw llmError;
284 | }
285 | })
286 | .finally(() => {
287 | callerAbortSignal.removeEventListener("abort", handleAbort);
288 | });
289 | }
290 | }
291 |
--------------------------------------------------------------------------------
/src/llm-adapter.ts:
--------------------------------------------------------------------------------
1 | export type FinishReason = undefined | "length" | "stop";
2 | export enum Role {
3 | Assistant = "assistant",
4 | System = "system",
5 | User = "user",
6 | }
7 |
8 | export type LlmAdapterModel = { id: string; created: number; ownedBy: string };
9 |
10 | export type LlmAdapterCreateEmbeddingRequest = {
11 | model: string;
12 | input: string;
13 | };
14 |
15 | export type LlmAdapterCreateEmbeddingResponse = number[];
16 |
17 | export type LlmAdapterCreateCompletionRequest = {
18 | bestOf?: number;
19 | echo?: boolean;
20 | frequencyPenalty?: number;
21 | logitBias?: Record;
22 | logprobs?: number;
23 | maxTokens?: number;
24 | model: string;
25 | n?: number;
26 | presencePenalty?: number;
27 | // TODO: Support other types
28 | prompt: string[];
29 | stop?: string[];
30 | suffix?: string;
31 | temperature?: number;
32 | topP?: number;
33 | };
34 |
35 | export type LlmAdapterCreateChatCompletionRequest = {
36 | frequencyPenalty?: number;
37 | logitBias?: Record;
38 | maxTokens?: number;
39 | messages: Array<{
40 | content: string;
41 | name?: string;
42 | role: Role;
43 | }>;
44 | model: string;
45 | n?: number;
46 | presencePenalty?: number;
47 | stop?: string[];
48 | temperature?: number;
49 | topP?: number;
50 | };
51 |
52 | export type LlmAdapterCreateCompletionResponse = {
53 | index: number;
54 | // TODO: Figure out the type
55 | logprobs?: unknown;
56 | text: string;
57 | finishReason: FinishReason;
58 | };
59 |
60 | export type ChatCompletionDelta = {
61 | role?: Role;
62 | content?: string;
63 | };
64 |
65 | export type LlmAdapterCreateChatCompletionResponse = {
66 | index: number;
67 | delta: ChatCompletionDelta;
68 | finishReason?: string;
69 | };
70 |
71 | export abstract class LlmAdapter {
72 | static get defaultConfig(): Record {
73 | throw new Error("Not implemented");
74 | }
75 |
76 | abstract listModels(): Promise;
77 |
78 | abstract createEmbedding({
79 | model,
80 | input,
81 | }: LlmAdapterCreateEmbeddingRequest): Promise;
82 |
83 | abstract createCompletion(
84 | createCompletionRequest: LlmAdapterCreateCompletionRequest,
85 | abortSignal: AbortSignal,
86 | onData: (data: LlmAdapterCreateCompletionResponse) => void,
87 | ): Promise;
88 |
89 | abstract createChatCompletion(
90 | createChatCompletionRequest: LlmAdapterCreateChatCompletionRequest,
91 | abortSignal: AbortSignal,
92 | onData: (data: LlmAdapterCreateChatCompletionResponse) => void,
93 | ): Promise;
94 | }
95 |
--------------------------------------------------------------------------------
/src/operation-handler.ts:
--------------------------------------------------------------------------------
1 | import type { RouteHandlerMethod } from "fastify";
2 |
3 | export type OperationHandler = {
4 | handle: RouteHandlerMethod;
5 | get operationId(): string;
6 | };
7 |
--------------------------------------------------------------------------------
/src/sse-helper.ts:
--------------------------------------------------------------------------------
1 | /* eslint-disable @typescript-eslint/naming-convention */
2 | import type { FastifyReply } from "fastify";
3 |
4 | export class SseHelper {
5 | sse(reply: FastifyReply, data: unknown) {
6 | if (!reply.raw.headersSent) {
7 | const headers = {
8 | "Content-Type": "text/event-stream",
9 | Connection: "keep-alive",
10 | "Cache-Control": "no-cache",
11 | };
12 | reply.raw.writeHead(200, headers);
13 | }
14 |
15 | const normalizedData =
16 | typeof data === "string" ? data : JSON.stringify(data);
17 | const payload = `data: ${normalizedData}\n\n`;
18 | reply.raw.write(payload);
19 | }
20 | }
21 |
--------------------------------------------------------------------------------
/src/test-support/test-container.ts:
--------------------------------------------------------------------------------
1 | import { createContainer } from "../container.ts";
2 | import type { LlmAdapter } from "../llm-adapter.ts";
3 | import awilix from "awilix";
4 |
5 | // TODO: Allow overriding sseHelper and add seperate tests for stream=true and stream=false
6 | export const createTestContainer = async (llmadapter: LlmAdapter) => {
7 | const container = createContainer([
8 | {
9 | token: "llmConfig",
10 | resolver: () => awilix.asValue({}),
11 | },
12 | {
13 | token: "llmAdapter",
14 | resolver: () => awilix.asValue(llmadapter),
15 | },
16 | ]);
17 |
18 | return container;
19 | };
20 |
--------------------------------------------------------------------------------
/src/types/create-chat-completion.ts:
--------------------------------------------------------------------------------
1 | export type CreateChatCompletionRequest = {
2 | /**
3 | * Number between -2.0 and 2.0. Positive values penalize new tokens based on their existing
4 | * frequency in the text so far, decreasing the model's likelihood to repeat the same line
5 | * verbatim.
6 | *
7 | * [See more information about frequency and presence
8 | * penalties.](/docs/api-reference/parameter-details)
9 | */
10 | frequency_penalty?: number;
11 | /**
12 | * Modify the likelihood of specified tokens appearing in the completion.
13 | *
14 | * Accepts a json object that maps tokens (specified by their token ID in the tokenizer) to
15 | * an associated bias value from -100 to 100. Mathematically, the bias is added to the
16 | * logits generated by the model prior to sampling. The exact effect will vary per model,
17 | * but values between -1 and 1 should decrease or increase likelihood of selection; values
18 | * like -100 or 100 should result in a ban or exclusive selection of the relevant token.
19 | */
20 | logit_bias?: { [key: string]: any };
21 | /**
22 | * The maximum number of tokens allowed for the generated answer. By default, the number of
23 | * tokens the model can return will be (4096 - prompt tokens).
24 | */
25 | max_tokens?: number;
26 | /**
27 | * The messages to generate chat completions for, in the [chat
28 | * format](/docs/guides/chat/introduction).
29 | */
30 | messages: MessageElement[];
31 | /**
32 | * ID of the model to use. Currently, only `gpt-3.5-turbo` and `gpt-3.5-turbo-0301` are
33 | * supported.
34 | */
35 | model: string;
36 | /**
37 | * How many chat completion choices to generate for each input message.
38 | */
39 | n?: number;
40 | /**
41 | * Number between -2.0 and 2.0. Positive values penalize new tokens based on whether they
42 | * appear in the text so far, increasing the model's likelihood to talk about new topics.
43 | *
44 | * [See more information about frequency and presence
45 | * penalties.](/docs/api-reference/parameter-details)
46 | */
47 | presence_penalty?: number;
48 | /**
49 | * Up to 4 sequences where the API will stop generating further tokens.
50 | */
51 | stop?: string[] | string;
52 | /**
53 | * If set, partial message deltas will be sent, like in ChatGPT. Tokens will be sent as
54 | * data-only [server-sent
55 | * events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format)
56 | * as they become available, with the stream terminated by a `data: [DONE]` message.
57 | */
58 | stream?: boolean;
59 | /**
60 | * What sampling temperature to use, between 0 and 2. Higher values like 0.8 will make the
61 | * output more random, while lower values like 0.2 will make it more focused and
62 | * deterministic.
63 | *
64 | * We generally recommend altering this or `top_p` but not both.
65 | */
66 | temperature?: number;
67 | /**
68 | * An alternative to sampling with temperature, called nucleus sampling, where the model
69 | * considers the results of the tokens with top_p probability mass. So 0.1 means only the
70 | * tokens comprising the top 10% probability mass are considered.
71 | *
72 | * We generally recommend altering this or `temperature` but not both.
73 | */
74 | top_p?: number;
75 | /**
76 | * A unique identifier representing your end-user, which can help OpenAI to monitor and
77 | * detect abuse. [Learn more](/docs/guides/safety-best-practices/end-user-ids).
78 | */
79 | user?: string;
80 | [property: string]: any;
81 | };
82 |
83 | export type MessageElement = {
84 | /**
85 | * The contents of the message
86 | */
87 | content: string;
88 | /**
89 | * The name of the user in a multi-user chat
90 | */
91 | name?: string;
92 | /**
93 | * The role of the author of this message.
94 | */
95 | role: Role;
96 | [property: string]: any;
97 | };
98 |
99 | /**
100 | * The role of the author of this message.
101 | */
102 | export enum Role {
103 | Assistant = "assistant",
104 | System = "system",
105 | User = "user",
106 | }
107 |
108 | export type CreateChatCompletionOkResponse = {
109 | choices: Choice[];
110 | created: number;
111 | id: string;
112 | model: string;
113 | object: string;
114 | usage?: Usage;
115 | [property: string]: any;
116 | };
117 |
118 | export type Choice = {
119 | finish_reason?: string;
120 | index?: number;
121 | message?: ChoiceMessage;
122 | [property: string]: any;
123 | };
124 |
125 | export type ChoiceMessage = {
126 | /**
127 | * The contents of the message
128 | */
129 | content: string;
130 | /**
131 | * The role of the author of this message.
132 | */
133 | role: Role;
134 | [property: string]: any;
135 | };
136 |
137 | export type Usage = {
138 | completion_tokens: number;
139 | prompt_tokens: number;
140 | total_tokens: number;
141 | [property: string]: any;
142 | };
143 |
--------------------------------------------------------------------------------
/src/types/create-completion.ts:
--------------------------------------------------------------------------------
1 | export type CreateCompletionRequest = {
2 | /**
3 | * Generates `best_of` completions server-side and returns the "best" (the one with the
4 | * highest log probability per token). Results cannot be streamed.
5 | *
6 | * When used with `n`, `best_of` controls the number of candidate completions and `n`
7 | * specifies how many to return – `best_of` must be greater than `n`.
8 | *
9 | * **Note:** Because this parameter generates many completions, it can quickly consume your
10 | * token quota. Use carefully and ensure that you have reasonable settings for `max_tokens`
11 | * and `stop`.
12 | */
13 | best_of?: number;
14 | /**
15 | * Echo back the prompt in addition to the completion
16 | */
17 | echo?: boolean;
18 | /**
19 | * Number between -2.0 and 2.0. Positive values penalize new tokens based on their existing
20 | * frequency in the text so far, decreasing the model's likelihood to repeat the same line
21 | * verbatim.
22 | *
23 | * [See more information about frequency and presence
24 | * penalties.](/docs/api-reference/parameter-details)
25 | */
26 | frequency_penalty?: number;
27 | /**
28 | * Modify the likelihood of specified tokens appearing in the completion.
29 | *
30 | * Accepts a json object that maps tokens (specified by their token ID in the GPT tokenizer)
31 | * to an associated bias value from -100 to 100. You can use this [tokenizer
32 | * tool](/tokenizer?view=bpe) (which works for both GPT-2 and GPT-3) to convert text to
33 | * token IDs. Mathematically, the bias is added to the logits generated by the model prior
34 | * to sampling. The exact effect will vary per model, but values between -1 and 1 should
35 | * decrease or increase likelihood of selection; values like -100 or 100 should result in a
36 | * ban or exclusive selection of the relevant token.
37 | *
38 | * As an example, you can pass `{"50256": -100}` to prevent the <|endoftext|> token from
39 | * being generated.
40 | */
41 | logit_bias?: { [key: string]: any };
42 | /**
43 | * Include the log probabilities on the `logprobs` most likely tokens, as well the chosen
44 | * tokens. For example, if `logprobs` is 5, the API will return a list of the 5 most likely
45 | * tokens. The API will always return the `logprob` of the sampled token, so there may be up
46 | * to `logprobs+1` elements in the response.
47 | *
48 | * The maximum value for `logprobs` is 5. If you need more than this, please contact us
49 | * through our [Help center](https://help.openai.com) and describe your use case.
50 | */
51 | logprobs?: number;
52 | /**
53 | * The maximum number of [tokens](/tokenizer) to generate in the completion.
54 | *
55 | * The token count of your prompt plus `max_tokens` cannot exceed the model's context
56 | * length. Most models have a context length of 2048 tokens (except for the newest models,
57 | * which support 4096).
58 | */
59 | max_tokens?: number;
60 | /**
61 | * ID of the model to use. You can use the [List models](/docs/api-reference/models/list)
62 | * API to see all of your available models, or see our [Model
63 | * overview](/docs/models/overview) for descriptions of them.
64 | */
65 | model: string;
66 | /**
67 | * How many completions to generate for each prompt.
68 | *
69 | * **Note:** Because this parameter generates many completions, it can quickly consume your
70 | * token quota. Use carefully and ensure that you have reasonable settings for `max_tokens`
71 | * and `stop`.
72 | */
73 | n?: number;
74 | /**
75 | * Number between -2.0 and 2.0. Positive values penalize new tokens based on whether they
76 | * appear in the text so far, increasing the model's likelihood to talk about new topics.
77 | *
78 | * [See more information about frequency and presence
79 | * penalties.](/docs/api-reference/parameter-details)
80 | */
81 | presence_penalty?: number;
82 | /**
83 | * The prompt(s) to generate completions for, encoded as a string, array of strings, array
84 | * of tokens, or array of token arrays.
85 | *
86 | * Note that <|endoftext|> is the document separator that the model sees during training, so
87 | * if a prompt is not specified the model will generate as if from the beginning of a new
88 | * document.
89 | */
90 | prompt?: Array | string;
91 | /**
92 | * Up to 4 sequences where the API will stop generating further tokens. The returned text
93 | * will not contain the stop sequence.
94 | */
95 | stop?: string[] | string;
96 | /**
97 | * Whether to stream back partial progress. If set, tokens will be sent as data-only
98 | * [server-sent
99 | * events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format)
100 | * as they become available, with the stream terminated by a `data: [DONE]` message.
101 | */
102 | stream?: boolean;
103 | /**
104 | * The suffix that comes after a completion of inserted text.
105 | */
106 | suffix?: string;
107 | /**
108 | * What sampling temperature to use, between 0 and 2. Higher values like 0.8 will make the
109 | * output more random, while lower values like 0.2 will make it more focused and
110 | * deterministic.
111 | *
112 | * We generally recommend altering this or `top_p` but not both.
113 | */
114 | temperature?: number;
115 | /**
116 | * An alternative to sampling with temperature, called nucleus sampling, where the model
117 | * considers the results of the tokens with top_p probability mass. So 0.1 means only the
118 | * tokens comprising the top 10% probability mass are considered.
119 | *
120 | * We generally recommend altering this or `temperature` but not both.
121 | */
122 | top_p?: number;
123 | /**
124 | * A unique identifier representing your end-user, which can help OpenAI to monitor and
125 | * detect abuse. [Learn more](/docs/guides/safety-best-practices/end-user-ids).
126 | */
127 | user?: string;
128 | [property: string]: any;
129 | };
130 |
131 | export type CreateCompletionOkResponse = {
132 | choices: Choice[];
133 | created: number;
134 | id: string;
135 | model: string;
136 | object: string;
137 | usage?: Usage;
138 | [property: string]: any;
139 | };
140 |
141 | export type Choice = {
142 | finish_reason?: string;
143 | index?: number;
144 | logprobs?: Logprobs;
145 | text?: string;
146 | [property: string]: any;
147 | };
148 |
149 | export type Logprobs = {
150 | text_offset?: number[];
151 | token_logprobs?: number[];
152 | tokens?: string[];
153 | top_logprobs?: { [key: string]: any }[];
154 | [property: string]: any;
155 | };
156 |
157 | export type Usage = {
158 | completion_tokens: number;
159 | prompt_tokens: number;
160 | total_tokens: number;
161 | [property: string]: any;
162 | };
163 |
--------------------------------------------------------------------------------
/src/types/create-embedding.ts:
--------------------------------------------------------------------------------
1 | export type CreateEmbeddingRequest = {
2 | /**
3 | * Input text to get embeddings for, encoded as a string or array of tokens. To get
4 | * embeddings for multiple inputs in a single request, pass an array of strings or array of
5 | * token arrays. Each input must not exceed 8192 tokens in length.
6 | */
7 | input: Array | string;
8 | /**
9 | * ID of the model to use. You can use the [List models](/docs/api-reference/models/list)
10 | * API to see all of your available models, or see our [Model
11 | * overview](/docs/models/overview) for descriptions of them.
12 | */
13 | model: string;
14 | /**
15 | * A unique identifier representing your end-user, which can help OpenAI to monitor and
16 | * detect abuse. [Learn more](/docs/guides/safety-best-practices/end-user-ids).
17 | */
18 | user?: string;
19 | };
20 |
21 | export type CreateEmbeddingOkResponse = {
22 | data: Datum[];
23 | model: string;
24 | object: string;
25 | usage: Usage;
26 | [property: string]: any;
27 | };
28 |
29 | export type Datum = {
30 | embedding: number[];
31 | index: number;
32 | object: string;
33 | [property: string]: any;
34 | };
35 |
36 | export type Usage = {
37 | prompt_tokens: number;
38 | total_tokens: number;
39 | [property: string]: any;
40 | };
41 |
--------------------------------------------------------------------------------
/src/types/list-models.ts:
--------------------------------------------------------------------------------
1 | export type ListModelsOkResponse = {
2 | data: Array;
3 | object: string;
4 | [property: string]: any;
5 | };
6 |
7 | export type ModelObject = {
8 | created: number;
9 | id: string;
10 | object: string;
11 | owned_by: string;
12 | [property: string]: any;
13 | };
14 |
--------------------------------------------------------------------------------
/src/types/retrieve-model.ts:
--------------------------------------------------------------------------------
1 | export type RetrieveModelOkResponseObject = {
2 | created: number;
3 | id: string;
4 | object: string;
5 | owned_by: string;
6 | [property: string]: any;
7 | };
8 |
--------------------------------------------------------------------------------
/tsconfig.json:
--------------------------------------------------------------------------------
1 | {
2 | "compilerOptions": {
3 | "target": "esnext",
4 | "module": "nodenext",
5 | "moduleResolution": "nodenext",
6 | "allowSyntheticDefaultImports": true,
7 | "allowImportingTsExtensions": true,
8 | "strictNullChecks": true,
9 | "noEmit": true,
10 | "skipLibCheck": true
11 | },
12 | "exclude": ["examples/**/*"]
13 | }
14 |
--------------------------------------------------------------------------------