├── .eslintrc.json ├── .github ├── dependabot.yml └── workflows │ ├── test-and-release.yml │ └── upgrade-dependencies.yml ├── .gitignore ├── .ncurc.json ├── .npmrc ├── .prettierignore ├── LICENSE ├── README.md ├── api.oas.yml ├── bin ├── llmatic └── llmatic.js ├── examples ├── custom-adapter │ ├── custom-llm-adapter.ts │ ├── llmatic.config.json │ ├── package-lock.json │ └── package.json ├── node-langchain │ ├── index.mjs │ ├── package-lock.json │ └── package.json └── python-langchain │ ├── main.py │ └── requirements.txt ├── media ├── chatbot-ui.gif └── logo.png ├── package-lock.json ├── package.json ├── public └── index.html ├── scripts ├── fetch-openai-oas.mjs └── generate-types.mjs ├── src ├── cli │ ├── cli-utils.test.ts │ ├── cli-utils.ts │ ├── common-options.ts │ ├── llmatic-config.js │ ├── llmatic-start.js │ └── llmatic.ts ├── container.ts ├── default-llm-adapter.ts ├── fastify-server-factory.ts ├── handlers │ ├── create-chat-completion.handler.test.ts │ ├── create-chat-completion.handler.ts │ ├── create-completion.handler.test.ts │ ├── create-completion.handler.ts │ ├── create-embedding.handler.test.ts │ ├── create-embedding.handler.ts │ ├── list-models.handler.test.ts │ ├── list-models.handler.ts │ ├── retrieve-model.handler.test.ts │ └── retrieve-model.handler.ts ├── llama-node-core-llm-adapter.ts ├── llm-adapter.ts ├── operation-handler.ts ├── sse-helper.ts ├── test-support │ └── test-container.ts └── types │ ├── create-chat-completion.ts │ ├── create-completion.ts │ ├── create-embedding.ts │ ├── list-models.ts │ └── retrieve-model.ts └── tsconfig.json /.eslintrc.json: -------------------------------------------------------------------------------- 1 | { 2 | "extends": ["plugin:unicorn/recommended", "xo", "xo-typescript", "prettier"], 3 | "rules": { 4 | "eqeqeq": ["error", "smart"], 5 | "no-eq-null": "off", 6 | "unicorn/no-null": "off", 7 | "unicorn/prefer-event-target": "off" 8 | }, 9 | "ignorePatterns": [ 10 | "bin/llmatic", 11 | "scripts/*.mjs", 12 | "src/cli/*.js", 13 | "src/types/*.ts", 14 | "examples/**", 15 | "dist/**" 16 | ] 17 | } 18 | -------------------------------------------------------------------------------- /.github/dependabot.yml: -------------------------------------------------------------------------------- 1 | version: 2 2 | updates: 3 | - package-ecosystem: "github-actions" 4 | directory: "/" 5 | schedule: 6 | interval: "weekly" 7 | open-pull-requests-limit: 10 8 | 9 | - package-ecosystem: "npm" 10 | directory: "/" 11 | schedule: 12 | interval: "daily" 13 | open-pull-requests-limit: 10 14 | -------------------------------------------------------------------------------- /.github/workflows/test-and-release.yml: -------------------------------------------------------------------------------- 1 | name: test-and-release 2 | 3 | on: 4 | push: 5 | branches: 6 | - master 7 | paths-ignore: 8 | - "examples/**" 9 | - "media/**" 10 | - "scripts/**" 11 | - "LICENSE" 12 | pull_request: 13 | paths-ignore: 14 | - "examples/**" 15 | - "media/**" 16 | - "scripts/**" 17 | - "LICENSE" 18 | 19 | jobs: 20 | test: 21 | runs-on: ubuntu-latest 22 | strategy: 23 | matrix: 24 | node: [18, 20, 21] 25 | fail-fast: true 26 | steps: 27 | - uses: actions/checkout@main 28 | - uses: actions/setup-node@main 29 | with: 30 | node-version: ${{ matrix.node }} 31 | - name: Cache node modules 32 | uses: actions/cache@main 33 | env: 34 | cache-name: cache-node-${{ matrix.node }}-modules 35 | with: 36 | path: ~/.npm 37 | key: ${{ runner.os }}-build-${{ env.cache-name }}-${{ hashFiles('**/package-lock.json') }} 38 | restore-keys: | 39 | ${{ runner.os }}-build-${{ env.cache-name }}- 40 | ${{ runner.os }}-build- 41 | ${{ runner.os }}- 42 | - name: Install 43 | run: npm ci 44 | - name: Tests 45 | run: npm test 46 | version-check: 47 | runs-on: ubuntu-latest 48 | needs: test 49 | if: github.ref == 'refs/heads/master' 50 | outputs: 51 | changed: ${{ steps.check.outputs.changed }} 52 | version: ${{ steps.check.outputs.version }} 53 | steps: 54 | - uses: actions/checkout@main 55 | - id: check 56 | uses: EndBug/version-check@v2 57 | with: 58 | diff-search: true 59 | token: ${{ secrets.GITHUB_TOKEN }} 60 | release: 61 | runs-on: ubuntu-latest 62 | needs: version-check 63 | if: needs.version-check.outputs.changed == 'true' 64 | steps: 65 | - uses: actions/checkout@main 66 | # https://github.com/rickstaa/action-create-tag/issues/10 67 | - name: "Change perms on GITHUB_WORKSPACE" 68 | run: | 69 | sudo chown -R root:root $GITHUB_WORKSPACE 70 | sudo chmod -R 0777 $GITHUB_WORKSPACE 71 | - uses: rickstaa/action-create-tag@main 72 | with: 73 | tag: v${{ needs.version-check.outputs.version }} 74 | message: v${{ needs.version-check.outputs.version }} 75 | - uses: ncipollo/release-action@main 76 | with: 77 | name: v${{ needs.version-check.outputs.version }} 78 | tag: v${{ needs.version-check.outputs.version }} 79 | token: ${{ secrets.GITHUB_TOKEN }} 80 | npm-publish: 81 | runs-on: ubuntu-latest 82 | needs: release 83 | if: needs.version-check.outputs.changed == 'true' 84 | steps: 85 | - uses: actions/checkout@main 86 | - uses: actions/setup-node@main 87 | with: 88 | node-version: 18 89 | registry-url: "https://registry.npmjs.org" 90 | - uses: actions/cache@main 91 | env: 92 | cache-name: cache-node-${{ matrix.node }}-modules 93 | with: 94 | path: ~/.npm 95 | key: ${{ runner.os }}-build-${{ env.cache-name }}-${{ hashFiles('**/package-lock.json') }} 96 | restore-keys: | 97 | ${{ runner.os }}-build-${{ env.cache-name }}- 98 | ${{ runner.os }}-build- 99 | ${{ runner.os }}- 100 | - name: Install 101 | run: | 102 | set -e 103 | npm ci 104 | - name: Publish 105 | run: npm publish 106 | env: 107 | NODE_AUTH_TOKEN: ${{ secrets.NPM_TOKEN }} 108 | -------------------------------------------------------------------------------- /.github/workflows/upgrade-dependencies.yml: -------------------------------------------------------------------------------- 1 | name: upgrade-dependencies 2 | 3 | on: 4 | workflow_dispatch: {} 5 | schedule: 6 | - cron: "0 0 * * *" 7 | 8 | jobs: 9 | upgrade-deps: 10 | runs-on: ubuntu-latest 11 | steps: 12 | - uses: actions/checkout@main 13 | - uses: actions/setup-node@main 14 | with: 15 | node-version: 20 16 | - name: Cache node modules 17 | uses: actions/cache@main 18 | env: 19 | cache-name: cache-node-${{ matrix.node }}-modules 20 | with: 21 | path: ~/.npm 22 | key: ${{ runner.os }}-build-${{ env.cache-name }}-${{ hashFiles('**/package-lock.json') }} 23 | restore-keys: | 24 | ${{ runner.os }}-build-${{ env.cache-name }}- 25 | ${{ runner.os }}-build- 26 | ${{ runner.os }}- 27 | - name: Setup Git 28 | run: | 29 | git config --local user.name "github-actions[bot]" 30 | git config --local user.email "github-actions[bot]@users.noreply.github.com" 31 | git config --local pull.rebase true 32 | 33 | # work around "insufficient permission for adding an object to repository database .git/object" issue 34 | sudo chmod -R ugo+rwX .git 35 | - name: Check for updates 36 | id: check-updates 37 | run: | 38 | set -ex 39 | npm ci &> /dev/null 40 | npx ncu 41 | npm i &> /dev/null 42 | npm audit fix --quiet --no-progress --no-fund || true 43 | npm run fix &> /dev/null || true 44 | 45 | git add -u 46 | git update-index --refresh 47 | if ! git diff-index --quiet HEAD --; then 48 | echo "is-changed=1" >> $GITHUB_OUTPUT 49 | fi 50 | - name: Create a PR 51 | if: steps.check-updates.outputs.is-changed 52 | id: create-pr 53 | run: | 54 | npm version patch 55 | PKG_VERSION="$(node -e 'process.stdout.write(require("./package.json").version)')" 56 | 57 | REMOTE_REPO="https://${GITHUB_ACTOR}:${GITHUB_TOKEN}@github.com/${GITHUB_REPOSITORY}.git" 58 | CURRENT_BRANCH="$(git branch --show-current)" 59 | NEW_BRANCH="v${PKG_VERSION}" 60 | 61 | if git ls-remote --exit-code --heads "${REMOTE_REPO}" "${NEW_BRANCH}" > /dev/null; then 62 | # PR already exists 63 | exit 0 64 | fi 65 | 66 | git commit -a -m "${PKG_VERSION}" --no-verify 67 | git pull "${REMOTE_REPO}" "${CURRENT_BRANCH}" 68 | git checkout -b "${NEW_BRANCH}" 69 | git push "${REMOTE_REPO}" "HEAD:${NEW_BRANCH}" 70 | 71 | PR_URL=$(gh pr create -B "${CURRENT_BRANCH}" -H "${NEW_BRANCH}" -f) 72 | echo "pr-url=${PR_URL}" >> $GITHUB_OUTPUT 73 | env: 74 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 75 | - name: Approve and merge the PR 76 | if: steps.create-pr.outputs.pr-url 77 | run: | 78 | gh pr review --approve "${PR_URL}" 79 | gh pr merge --auto --delete-branch --rebase "${PR_URL}" 80 | env: 81 | GITHUB_TOKEN: ${{ secrets.GH_PAT }} 82 | PR_URL: ${{ steps.create-pr.outputs.pr-url }} 83 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Created by https://www.toptal.com/developers/gitignore/api/macos,windows,linux,visualstudiocode,vim,node 2 | # Edit at https://www.toptal.com/developers/gitignore?templates=macos,windows,linux,visualstudiocode,vim,node 3 | 4 | ### Linux ### 5 | *~ 6 | 7 | # temporary files which can be created if a process still has a handle open of a deleted file 8 | .fuse_hidden* 9 | 10 | # KDE directory preferences 11 | .directory 12 | 13 | # Linux trash folder which might appear on any partition or disk 14 | .Trash-* 15 | 16 | # .nfs files are created when an open file is removed but is still being accessed 17 | .nfs* 18 | 19 | ### macOS ### 20 | # General 21 | .DS_Store 22 | .AppleDouble 23 | .LSOverride 24 | 25 | # Icon must end with two \r 26 | Icon 27 | 28 | 29 | # Thumbnails 30 | ._* 31 | 32 | # Files that might appear in the root of a volume 33 | .DocumentRevisions-V100 34 | .fseventsd 35 | .Spotlight-V100 36 | .TemporaryItems 37 | .Trashes 38 | .VolumeIcon.icns 39 | .com.apple.timemachine.donotpresent 40 | 41 | # Directories potentially created on remote AFP share 42 | .AppleDB 43 | .AppleDesktop 44 | Network Trash Folder 45 | Temporary Items 46 | .apdisk 47 | 48 | ### macOS Patch ### 49 | # iCloud generated files 50 | *.icloud 51 | 52 | ### Node ### 53 | # Logs 54 | logs 55 | *.log 56 | npm-debug.log* 57 | yarn-debug.log* 58 | yarn-error.log* 59 | lerna-debug.log* 60 | .pnpm-debug.log* 61 | 62 | # Diagnostic reports (https://nodejs.org/api/report.html) 63 | report.[0-9]*.[0-9]*.[0-9]*.[0-9]*.json 64 | 65 | # Runtime data 66 | pids 67 | *.pid 68 | *.seed 69 | *.pid.lock 70 | 71 | # Directory for instrumented libs generated by jscoverage/JSCover 72 | lib-cov 73 | 74 | # Coverage directory used by tools like istanbul 75 | coverage 76 | *.lcov 77 | 78 | # nyc test coverage 79 | .nyc_output 80 | 81 | # Grunt intermediate storage (https://gruntjs.com/creating-plugins#storing-task-files) 82 | .grunt 83 | 84 | # Bower dependency directory (https://bower.io/) 85 | bower_components 86 | 87 | # node-waf configuration 88 | .lock-wscript 89 | 90 | # Compiled binary addons (https://nodejs.org/api/addons.html) 91 | build/Release 92 | 93 | # Dependency directories 94 | node_modules/ 95 | jspm_packages/ 96 | 97 | # Snowpack dependency directory (https://snowpack.dev/) 98 | web_modules/ 99 | 100 | # TypeScript cache 101 | *.tsbuildinfo 102 | 103 | # Optional npm cache directory 104 | .npm 105 | 106 | # Optional eslint cache 107 | .eslintcache 108 | 109 | # Optional stylelint cache 110 | .stylelintcache 111 | 112 | # Microbundle cache 113 | .rpt2_cache/ 114 | .rts2_cache_cjs/ 115 | .rts2_cache_es/ 116 | .rts2_cache_umd/ 117 | 118 | # Optional REPL history 119 | .node_repl_history 120 | 121 | # Output of 'npm pack' 122 | *.tgz 123 | 124 | # Yarn Integrity file 125 | .yarn-integrity 126 | 127 | # dotenv environment variable files 128 | .env 129 | .env.development.local 130 | .env.test.local 131 | .env.production.local 132 | .env.local 133 | 134 | # parcel-bundler cache (https://parceljs.org/) 135 | .cache 136 | .parcel-cache 137 | 138 | # Next.js build output 139 | .next 140 | out 141 | 142 | # Nuxt.js build / generate output 143 | .nuxt 144 | dist 145 | 146 | # Gatsby files 147 | .cache/ 148 | # Comment in the public line in if your project uses Gatsby and not Next.js 149 | # https://nextjs.org/blog/next-9-1#public-directory-support 150 | # public 151 | 152 | # vuepress build output 153 | .vuepress/dist 154 | 155 | # vuepress v2.x temp and cache directory 156 | .temp 157 | 158 | # Docusaurus cache and generated files 159 | .docusaurus 160 | 161 | # Serverless directories 162 | .serverless/ 163 | 164 | # FuseBox cache 165 | .fusebox/ 166 | 167 | # DynamoDB Local files 168 | .dynamodb/ 169 | 170 | # TernJS port file 171 | .tern-port 172 | 173 | # Stores VSCode versions used for testing VSCode extensions 174 | .vscode-test 175 | 176 | # yarn v2 177 | .yarn/cache 178 | .yarn/unplugged 179 | .yarn/build-state.yml 180 | .yarn/install-state.gz 181 | .pnp.* 182 | 183 | ### Node Patch ### 184 | # Serverless Webpack directories 185 | .webpack/ 186 | 187 | # Optional stylelint cache 188 | 189 | # SvelteKit build / generate output 190 | .svelte-kit 191 | 192 | ### Vim ### 193 | # Swap 194 | [._]*.s[a-v][a-z] 195 | !*.svg # comment out if you don't need vector files 196 | [._]*.sw[a-p] 197 | [._]s[a-rt-v][a-z] 198 | [._]ss[a-gi-z] 199 | [._]sw[a-p] 200 | 201 | # Session 202 | Session.vim 203 | Sessionx.vim 204 | 205 | # Temporary 206 | .netrwhist 207 | # Auto-generated tag files 208 | tags 209 | # Persistent undo 210 | [._]*.un~ 211 | 212 | ### VisualStudioCode ### 213 | .vscode/* 214 | !.vscode/settings.json 215 | !.vscode/tasks.json 216 | !.vscode/launch.json 217 | !.vscode/extensions.json 218 | !.vscode/*.code-snippets 219 | 220 | # Local History for Visual Studio Code 221 | .history/ 222 | 223 | # Built Visual Studio Code Extensions 224 | *.vsix 225 | 226 | ### VisualStudioCode Patch ### 227 | # Ignore all local history of files 228 | .history 229 | .ionide 230 | 231 | ### Windows ### 232 | # Windows thumbnail cache files 233 | Thumbs.db 234 | Thumbs.db:encryptable 235 | ehthumbs.db 236 | ehthumbs_vista.db 237 | 238 | # Dump file 239 | *.stackdump 240 | 241 | # Folder config file 242 | [Dd]esktop.ini 243 | 244 | # Recycle Bin used on file shares 245 | $RECYCLE.BIN/ 246 | 247 | # Windows Installer files 248 | *.cab 249 | *.msi 250 | *.msix 251 | *.msm 252 | *.msp 253 | 254 | # Windows shortcuts 255 | *.lnk 256 | 257 | # End of https://www.toptal.com/developers/gitignore/api/macos,windows,linux,visualstudiocode,vim,node 258 | /models/ 259 | .python-version 260 | /llmatic.config.json -------------------------------------------------------------------------------- /.ncurc.json: -------------------------------------------------------------------------------- 1 | { 2 | "upgrade": true 3 | } 4 | -------------------------------------------------------------------------------- /.npmrc: -------------------------------------------------------------------------------- 1 | git-tag-version=false 2 | -------------------------------------------------------------------------------- /.prettierignore: -------------------------------------------------------------------------------- 1 | dist/ -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) Fardjad Davari 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 |
2 | 3 | # LLMatic 4 | 5 | LLMatic Logo 6 | 7 | Use self-hosted LLMs with an OpenAI compatible API 8 | 9 |
10 | 11 | llmatic llmatic test and release 12 | 13 |
14 | 15 |
16 | 17 |
18 | 19 | ## Project status 20 | 21 | This project was the result of my curiousity and experimentation with OpenAI's API and I enjoyed building it. It is certainly not the first nor the last project of its kind. Given my limited time and resources, I'd like to pause the development of this project for now. I'll list some other similar projects below that can be used as alternatives: 22 | 23 | 1. [Ollama](https://github.com/ollama/ollama/blob/main/docs/openai.md) 24 | 2. [LLaMA.cpp HTTP Server](https://github.com/ggerganov/llama.cpp/tree/master/examples/server) 25 | 3. [GPT4All Chat Server Mode](https://docs.gpt4all.io/gpt4all_chat.html#gpt4all-chat-server-mode) 26 | 4. [FastChat](https://github.com/lm-sys/FastChat/blob/main/docs/openai_api.md) 27 | 28 | 29 | ## Synopsis 30 | 31 | LLMatic can be used as a drop-in replacement for OpenAI's API [v1.2.0](https://github.com/openai/openai-openapi/blob/88f221442879061d9970ed453a65b973d226f15d/openapi.yaml) (see the 32 | supported endpoints). By default, it uses [llama-node](https://github.com/Atome-FE/llama-node) 33 | with [llama.cpp](https://github.com/ggerganov/llama.cpp) backend to run the models locally. However, you can easily create [your own adapter](#custom-adapters) to use any other model or service. 34 | 35 | Supported endpoints: 36 | 37 | - [x] /completions (stream and non-stream) 38 | - [x] /chat/completions (stream and non-stream) 39 | - [x] /embeddings 40 | - [x] /models 41 | 42 | ## How to use 43 | 44 | If you prefer a video tutorial, you can watch the following video for step-by-step instructions on how to use this project: 45 | 46 | 47 | LLMatic 48 | 49 | 50 | ### Requirements 51 | 52 | - Node.js >=18.16 53 | - Unix-based OS (Linux, macOS, WSL, etc.) 54 | 55 | ### Installation 56 | 57 | Create an empty directory and run `npm init`: 58 | 59 | ```bash 60 | export LLMATIC_PROJECT_DIR=my-llmatic-project 61 | mkdir $LLMATIC_PROJECT_DIR 62 | cd $LLMATIC_PROJECT_DIR 63 | npm init -y 64 | ``` 65 | 66 | Install and configure LLMatic: 67 | 68 | ```bash 69 | npm add llmatic 70 | # Download a model and generate a config file 71 | npx llmatic config 72 | ``` 73 | 74 | Adjust the config file to your needs and start the server: 75 | 76 | ```bash 77 | npx llmatic start 78 | ``` 79 | 80 | You can run `llmatic --help` to see all available commands. 81 | 82 | ### Usage with [chatbot-ui](https://github.com/mckaywrigley/chatbot-ui) 83 | 84 | Clone the repo and install the dependencies: 85 | 86 | ```bash 87 | git clone https://github.com/mckaywrigley/chatbot-ui.git 88 | cd chatbot-ui 89 | npm install 90 | ``` 91 | 92 | Create a `.env.local` file: 93 | 94 | ```bash 95 | cat < .env.local 96 | # For now, this is ignored by LLMatic 97 | DEFAULT_MODEL=Ignored 98 | 99 | NEXT_PUBLIC_DEFAULT_SYSTEM_PROMPT=A chat between a curious human (user) and an artificial intelligence assistant (assistant). The assistant gives helpful, detailed, and polite answers to the human's questions. 100 | 101 | user: Hello! 102 | assistant: Hello! How may I help you today? 103 | user: Please tell me the largest city in Europe. 104 | assistant: Sure. The largest city in Europe is Moscow, the capital of Russia. 105 | 106 | OPENAI_API_KEY=ANYTHING_WILL_DO 107 | OPENAI_API_HOST=http://localhost:3000 108 | 109 | GOOGLE_API_KEY=YOUR_API_KEY 110 | GOOGLE_CSE_ID=YOUR_ENGINE_ID 111 | EOF 112 | ``` 113 | 114 | Run the server: 115 | 116 | ```bash 117 | npm run dev -- --port 3001 118 | ``` 119 | 120 | Demo: 121 | 122 | ![chatbot-ui Demo](/media/chatbot-ui.gif) 123 | 124 | ### Usage with [LangChain](https://langchain.com) 125 | 126 | There are two examples of using LLMatic with LangChain in the 127 | [`examples`](/examples) directory. 128 | 129 | To run the Node.js example, first install the dependencies: 130 | 131 | ```bash 132 | cd examples/node-langchain 133 | npm install 134 | ``` 135 | 136 | Then run the main script: 137 | 138 | ```bash 139 | npm start 140 | ``` 141 | 142 |
143 | Expand this to see the sample output 144 | 145 | ``` 146 | [chain/start] [1:chain:llm_chain] Entering Chain run with input: { 147 | "humanInput": "Rememeber that this is a demo of LLMatic with LangChain.", 148 | "history": "" 149 | } 150 | [llm/start] [1:chain:llm_chain > 2:llm:openai] Entering LLM run with input: { 151 | "prompts": [ 152 | "A chat between a curious user and an artificial intelligence assistant.\nThe assistant gives helpful, detailed, and polite answers to the user's questions.\n\n\nHuman: Rememeber that this is a demo of LLMatic with LangChain.\nAI:" 153 | ] 154 | } 155 | [llm/end] [1:chain:llm_chain > 2:llm:openai] [5.92s] Exiting LLM run with output: { 156 | "generations": [ 157 | [ 158 | { 159 | "text": " Yes, I understand. I am ready to assist you with your queries.", 160 | "generationInfo": { 161 | "finishReason": "stop", 162 | "logprobs": null 163 | } 164 | } 165 | ] 166 | ], 167 | "llmOutput": { 168 | "tokenUsage": {} 169 | } 170 | } 171 | [chain/end] [1:chain:llm_chain] [5.92s] Exiting Chain run with output: { 172 | "text": " Yes, I understand. I am ready to assist you with your queries." 173 | } 174 | [chain/start] [1:chain:llm_chain] Entering Chain run with input: { 175 | "humanInput": "What did I ask you to remember?", 176 | "history": "Human: Rememeber that this is a demo of LLMatic with LangChain.\nAI: Yes, I understand. I am ready to assist you with your queries." 177 | } 178 | [llm/start] [1:chain:llm_chain > 2:llm:openai] Entering LLM run with input: { 179 | "prompts": [ 180 | "A chat between a curious user and an artificial intelligence assistant.\nThe assistant gives helpful, detailed, and polite answers to the user's questions.\n\nHuman: Rememeber that this is a demo of LLMatic with LangChain.\nAI: Yes, I understand. I am ready to assist you with your queries.\nHuman: What did I ask you to remember?\nAI:" 181 | ] 182 | } 183 | [llm/end] [1:chain:llm_chain > 2:llm:openai] [6.51s] Exiting LLM run with output: { 184 | "generations": [ 185 | [ 186 | { 187 | "text": " You asked me to remember that this is a demo of LLMatic with LangChain.", 188 | "generationInfo": { 189 | "finishReason": "stop", 190 | "logprobs": null 191 | } 192 | } 193 | ] 194 | ], 195 | "llmOutput": { 196 | "tokenUsage": {} 197 | } 198 | } 199 | [chain/end] [1:chain:llm_chain] [6.51s] Exiting Chain run with output: { 200 | "text": " You asked me to remember that this is a demo of LLMatic with LangChain." 201 | } 202 | ``` 203 | 204 |
205 | 206 |
207 | 208 | To run the Python example, first install the dependencies: 209 | 210 | ```bash 211 | cd examples/python-langchain 212 | pip3 install -r requirements.txt 213 | ``` 214 | 215 | Then run the main script: 216 | 217 | ```bash 218 | python3 main.py 219 | ``` 220 | 221 |
222 | Expand this to see the sample output 223 | 224 | ``` 225 | > Entering new LLMChain chain... 226 | Prompt after formatting: 227 | A chat between a curious user and an artificial intelligence assistant. 228 | The assistant gives helpful, detailed, and polite answers to the user's questions. 229 | 230 | 231 | Human: Rememeber that this is a demo of LLMatic with LangChain. 232 | AI: 233 | 234 | > Finished chain. 235 | Yes, I understand. I am ready to assist you with your queries. 236 | 237 | 238 | > Entering new LLMChain chain... 239 | Prompt after formatting: 240 | A chat between a curious user and an artificial intelligence assistant. 241 | The assistant gives helpful, detailed, and polite answers to the user's questions. 242 | 243 | Human: Rememeber that this is a demo of LLMatic with LangChain. 244 | AI: Yes, I understand. I am ready to assist you with your queries. 245 | Human: What did I ask you to remember? 246 | AI: 247 | 248 | > Finished chain. 249 | You asked me to remember that this is a demo of LLMatic with LangChain. 250 | ``` 251 | 252 |
253 | 254 | ## Custom Adapters 255 | 256 | LLMatic is designed to be easily extensible. You can create your own adapters by extending the [`LlmAdapter`](/src/llm-adapter.ts) class. See [`examples/custom-adapter`](/examples/custom-adapter) for an example. 257 | 258 | To start llmatic with a custom adapter, use the `--llm-adapter` flag: 259 | 260 | ```bash 261 | llmatic start --llm-adapter ./custom-llm-adapter.ts 262 | ``` 263 | -------------------------------------------------------------------------------- /bin/llmatic: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | 3 | PREVIOUS_DIRECTORY="$(pwd)" 4 | BIN_DIRECTORY="$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )" 5 | cd "$PREVIOUS_DIRECTORY" 6 | 7 | exec "$BIN_DIRECTORY/llmatic.js" "$@" -------------------------------------------------------------------------------- /bin/llmatic.js: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env node 2 | import childProcess from "node:child_process"; 3 | import { createRequire } from "node:module"; 4 | import { fileURLToPath } from "node:url"; 5 | 6 | const tsx = 7 | import.meta.resolve && 8 | // This can be removed once node 18 is EOL 9 | import.meta.resolve.constructor.name !== "AsyncFunction" 10 | ? fileURLToPath(import.meta.resolve("tsx/cli")) 11 | : createRequire(import.meta.url).resolve("tsx/cli"); 12 | 13 | childProcess.fork(tsx, [ 14 | "--no-warnings", 15 | fileURLToPath(new URL("../src/cli/llmatic.ts", import.meta.url)), 16 | ...process.argv.slice(2), 17 | ]); 18 | -------------------------------------------------------------------------------- /examples/custom-adapter/custom-llm-adapter.ts: -------------------------------------------------------------------------------- 1 | /* eslint-disable no-await-in-loop */ 2 | import { 3 | LlmAdapter, 4 | type LlmAdapterCreateChatCompletionRequest, 5 | type LlmAdapterCreateChatCompletionResponse, 6 | type LlmAdapterCreateCompletionRequest, 7 | type LlmAdapterCreateCompletionResponse, 8 | type LlmAdapterCreateEmbeddingRequest, 9 | type LlmAdapterCreateEmbeddingResponse, 10 | type LlmAdapterModel, 11 | Role, 12 | } from "llmatic/llm-adapter"; 13 | 14 | type AdapterConfig = Record; 15 | 16 | export default class CustomLlmAdapter extends LlmAdapter { 17 | #llmConfig: Record; 18 | 19 | constructor(llmConfig: AdapterConfig) { 20 | super(); 21 | 22 | this.#llmConfig = { ...CustomLlmAdapter.defaultConfig, ...llmConfig }; 23 | } 24 | 25 | async createChatCompletion( 26 | createChatCompletionRequest: LlmAdapterCreateChatCompletionRequest, 27 | abortSignal: AbortSignal, 28 | onData: (data: LlmAdapterCreateChatCompletionResponse) => void, 29 | ): Promise { 30 | const { messages, n } = createChatCompletionRequest; 31 | 32 | const count = messages.length * (n ?? 1); 33 | for (let tokenIndex = 0; tokenIndex < count; tokenIndex++) { 34 | onData({ 35 | finishReason: "stop", 36 | index: 0, 37 | delta: 38 | tokenIndex === 0 39 | ? { role: Role.Assistant } 40 | : { content: `token ${tokenIndex}\n` }, 41 | }); 42 | } 43 | } 44 | 45 | async listModels(): Promise { 46 | return [ 47 | { 48 | id: "fake-model", 49 | created: 0, 50 | ownedBy: "unknown", 51 | }, 52 | ]; 53 | } 54 | 55 | async createEmbedding({ 56 | model, 57 | input, 58 | }: LlmAdapterCreateEmbeddingRequest): Promise { 59 | return [0]; 60 | } 61 | 62 | async createCompletion( 63 | createCompletionRequest: LlmAdapterCreateCompletionRequest, 64 | abortSignal: AbortSignal, 65 | onData: (data: LlmAdapterCreateCompletionResponse) => void, 66 | ): Promise { 67 | const { prompt, n } = createCompletionRequest; 68 | 69 | const count = prompt.length * (n ?? 1); 70 | for (let index = 0; index < count; index++) { 71 | onData({ 72 | finishReason: "stop", 73 | index, 74 | text: `token ${index}`, 75 | }); 76 | } 77 | } 78 | 79 | static get defaultConfig() { 80 | return {}; 81 | } 82 | } 83 | -------------------------------------------------------------------------------- /examples/custom-adapter/llmatic.config.json: -------------------------------------------------------------------------------- 1 | {} 2 | -------------------------------------------------------------------------------- /examples/custom-adapter/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "llmatic-custom-adapter-example", 3 | "version": "0.0.0", 4 | "type": "module", 5 | "description": "LLMatic custom adapter example", 6 | "scripts": { 7 | "start": "llmatic start --llm-adapter ./custom-llm-adapter.ts" 8 | }, 9 | "keywords": [], 10 | "author": "Fardjad Davari ", 11 | "license": "MIT", 12 | "dependencies": { 13 | "llmatic": "^0.4.4" 14 | } 15 | } 16 | -------------------------------------------------------------------------------- /examples/node-langchain/index.mjs: -------------------------------------------------------------------------------- 1 | import { LLMChain } from "langchain/chains"; 2 | import { OpenAI } from "langchain/llms/openai"; 3 | import { BufferMemory } from "langchain/memory"; 4 | import { PromptTemplate } from "langchain/prompts"; 5 | 6 | const model = new OpenAI( 7 | { 8 | temperature: 0, 9 | openAIApiKey: "ANYTHING_WILL_DO", 10 | modelName: "Ignored", 11 | }, 12 | { 13 | basePath: "http://localhost:3000/v1", 14 | }, 15 | ); 16 | 17 | const template = `A chat between a curious user and an artificial intelligence assistant. 18 | The assistant gives helpful, detailed, and polite answers to the user's questions. 19 | 20 | {history} 21 | Human: {humanInput} 22 | AI:`; 23 | 24 | const prompt = new PromptTemplate({ 25 | inputVariables: ["history", "humanInput"], 26 | template, 27 | }); 28 | 29 | const chatgptChain = new LLMChain({ 30 | llm: model, 31 | prompt, 32 | verbose: true, 33 | memory: new BufferMemory(), 34 | }); 35 | 36 | await chatgptChain.predict({ 37 | humanInput: "Rememeber that this is a demo of LLMatic with LangChain.", 38 | }); 39 | await chatgptChain.predict({ 40 | humanInput: "What did I ask you to remember?", 41 | }); 42 | -------------------------------------------------------------------------------- /examples/node-langchain/package-lock.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "langchain-llmatic-example", 3 | "version": "0.0.0", 4 | "lockfileVersion": 3, 5 | "requires": true, 6 | "packages": { 7 | "": { 8 | "name": "langchain-llmatic-example", 9 | "version": "0.0.0", 10 | "license": "UNLICENSED", 11 | "dependencies": { 12 | "langchain": "^0.0.78" 13 | } 14 | }, 15 | "node_modules/@anthropic-ai/sdk": { 16 | "version": "0.4.3", 17 | "resolved": "https://registry.npmjs.org/@anthropic-ai/sdk/-/sdk-0.4.3.tgz", 18 | "integrity": "sha512-SZrlXvjUUYT9rPmSzlTtmVk1OjVNpkCzILRluhiYwNcxXfQyvPJDi0CI6PyymygcgtqEF5EVqhKmC/PtPsNEIw==", 19 | "dependencies": { 20 | "@fortaine/fetch-event-source": "^3.0.6", 21 | "cross-fetch": "^3.1.5" 22 | } 23 | }, 24 | "node_modules/@fortaine/fetch-event-source": { 25 | "version": "3.0.6", 26 | "resolved": "https://registry.npmjs.org/@fortaine/fetch-event-source/-/fetch-event-source-3.0.6.tgz", 27 | "integrity": "sha512-621GAuLMvKtyZQ3IA6nlDWhV1V/7PGOTNIGLUifxt0KzM+dZIweJ6F3XvQF3QnqeNfS1N7WQ0Kil1Di/lhChEw==", 28 | "engines": { 29 | "node": ">=16.15" 30 | } 31 | }, 32 | "node_modules/@types/retry": { 33 | "version": "0.12.0", 34 | "resolved": "https://registry.npmjs.org/@types/retry/-/retry-0.12.0.tgz", 35 | "integrity": "sha512-wWKOClTTiizcZhXnPY4wikVAwmdYHp8q6DmC+EJUzAMsycb7HB32Kh9RN4+0gExjmPmZSAQjgURXIGATPegAvA==" 36 | }, 37 | "node_modules/ansi-styles": { 38 | "version": "5.2.0", 39 | "resolved": "https://registry.npmjs.org/ansi-styles/-/ansi-styles-5.2.0.tgz", 40 | "integrity": "sha512-Cxwpt2SfTzTtXcfOlzGEee8O+c+MmUgGrNiBcXnuWxuFJHe6a5Hz7qwhwe5OgaSYI0IJvkLqWX1ASG+cJOkEiA==", 41 | "engines": { 42 | "node": ">=10" 43 | }, 44 | "funding": { 45 | "url": "https://github.com/chalk/ansi-styles?sponsor=1" 46 | } 47 | }, 48 | "node_modules/asynckit": { 49 | "version": "0.4.0", 50 | "resolved": "https://registry.npmjs.org/asynckit/-/asynckit-0.4.0.tgz", 51 | "integrity": "sha512-Oei9OH4tRh0YqU3GxhX79dM/mwVgvbZJaSNaRk+bshkj0S5cfHcgYakreBjrHwatXKbz+IoIdYLxrKim2MjW0Q==" 52 | }, 53 | "node_modules/axios": { 54 | "version": "0.26.1", 55 | "resolved": "https://registry.npmjs.org/axios/-/axios-0.26.1.tgz", 56 | "integrity": "sha512-fPwcX4EvnSHuInCMItEhAGnaSEXRBjtzh9fOtsE6E1G6p7vl7edEeZe11QHf18+6+9gR5PbKV/sGKNaD8YaMeA==", 57 | "dependencies": { 58 | "follow-redirects": "^1.14.8" 59 | } 60 | }, 61 | "node_modules/base64-js": { 62 | "version": "1.5.1", 63 | "resolved": "https://registry.npmjs.org/base64-js/-/base64-js-1.5.1.tgz", 64 | "integrity": "sha512-AKpaYlHn8t4SVbOHCy+b5+KKgvR4vrsD8vbvrbiQJps7fKDTkjkDry6ji0rUJjC0kzbNePLwzxq8iypo41qeWA==", 65 | "funding": [ 66 | { 67 | "type": "github", 68 | "url": "https://github.com/sponsors/feross" 69 | }, 70 | { 71 | "type": "patreon", 72 | "url": "https://www.patreon.com/feross" 73 | }, 74 | { 75 | "type": "consulting", 76 | "url": "https://feross.org/support" 77 | } 78 | ] 79 | }, 80 | "node_modules/binary-extensions": { 81 | "version": "2.2.0", 82 | "resolved": "https://registry.npmjs.org/binary-extensions/-/binary-extensions-2.2.0.tgz", 83 | "integrity": "sha512-jDctJ/IVQbZoJykoeHbhXpOlNBqGNcwXJKJog42E5HDPUwQTSdjCHdihjj0DlnheQ7blbT6dHOafNAiS8ooQKA==", 84 | "engines": { 85 | "node": ">=8" 86 | } 87 | }, 88 | "node_modules/binary-search": { 89 | "version": "1.3.6", 90 | "resolved": "https://registry.npmjs.org/binary-search/-/binary-search-1.3.6.tgz", 91 | "integrity": "sha512-nbE1WxOTTrUWIfsfZ4aHGYu5DOuNkbxGokjV6Z2kxfJK3uaAb8zNK1muzOeipoLHZjInT4Br88BHpzevc681xA==" 92 | }, 93 | "node_modules/combined-stream": { 94 | "version": "1.0.8", 95 | "resolved": "https://registry.npmjs.org/combined-stream/-/combined-stream-1.0.8.tgz", 96 | "integrity": "sha512-FQN4MRfuJeHf7cBbBMJFXhKSDq+2kAArBlmRBvcvFE5BB1HZKXtSFASDhdlz9zOYwxh8lDdnvmMOe/+5cdoEdg==", 97 | "dependencies": { 98 | "delayed-stream": "~1.0.0" 99 | }, 100 | "engines": { 101 | "node": ">= 0.8" 102 | } 103 | }, 104 | "node_modules/cross-fetch": { 105 | "version": "3.1.6", 106 | "resolved": "https://registry.npmjs.org/cross-fetch/-/cross-fetch-3.1.6.tgz", 107 | "integrity": "sha512-riRvo06crlE8HiqOwIpQhxwdOk4fOeR7FVM/wXoxchFEqMNUjvbs3bfo4OTgMEMHzppd4DxFBDbyySj8Cv781g==", 108 | "dependencies": { 109 | "node-fetch": "^2.6.11" 110 | } 111 | }, 112 | "node_modules/delayed-stream": { 113 | "version": "1.0.0", 114 | "resolved": "https://registry.npmjs.org/delayed-stream/-/delayed-stream-1.0.0.tgz", 115 | "integrity": "sha512-ZySD7Nf91aLB0RxL4KGrKHBXl7Eds1DAmEdcoVawXnLD7SDhpNgtuII2aAkg7a7QS41jxPSZ17p4VdGnMHk3MQ==", 116 | "engines": { 117 | "node": ">=0.4.0" 118 | } 119 | }, 120 | "node_modules/eventemitter3": { 121 | "version": "4.0.7", 122 | "resolved": "https://registry.npmjs.org/eventemitter3/-/eventemitter3-4.0.7.tgz", 123 | "integrity": "sha512-8guHBZCwKnFhYdHr2ysuRWErTwhoN2X8XELRlrRwpmfeY2jjuUN4taQMsULKUVo1K4DvZl+0pgfyoysHxvmvEw==" 124 | }, 125 | "node_modules/expr-eval": { 126 | "version": "2.0.2", 127 | "resolved": "https://registry.npmjs.org/expr-eval/-/expr-eval-2.0.2.tgz", 128 | "integrity": "sha512-4EMSHGOPSwAfBiibw3ndnP0AvjDWLsMvGOvWEZ2F96IGk0bIVdjQisOHxReSkE13mHcfbuCiXw+G4y0zv6N8Eg==" 129 | }, 130 | "node_modules/flat": { 131 | "version": "5.0.2", 132 | "resolved": "https://registry.npmjs.org/flat/-/flat-5.0.2.tgz", 133 | "integrity": "sha512-b6suED+5/3rTpUBdG1gupIl8MPFCAMA0QXwmljLhvCUKcUvdE4gWky9zpuGCcXHOsz4J9wPGNWq6OKpmIzz3hQ==", 134 | "bin": { 135 | "flat": "cli.js" 136 | } 137 | }, 138 | "node_modules/follow-redirects": { 139 | "version": "1.15.2", 140 | "resolved": "https://registry.npmjs.org/follow-redirects/-/follow-redirects-1.15.2.tgz", 141 | "integrity": "sha512-VQLG33o04KaQ8uYi2tVNbdrWp1QWxNNea+nmIB4EVM28v0hmP17z7aG1+wAkNzVq4KeXTq3221ye5qTJP91JwA==", 142 | "funding": [ 143 | { 144 | "type": "individual", 145 | "url": "https://github.com/sponsors/RubenVerborgh" 146 | } 147 | ], 148 | "engines": { 149 | "node": ">=4.0" 150 | }, 151 | "peerDependenciesMeta": { 152 | "debug": { 153 | "optional": true 154 | } 155 | } 156 | }, 157 | "node_modules/form-data": { 158 | "version": "4.0.0", 159 | "resolved": "https://registry.npmjs.org/form-data/-/form-data-4.0.0.tgz", 160 | "integrity": "sha512-ETEklSGi5t0QMZuiXoA/Q6vcnxcLQP5vdugSpuAyi6SVGi2clPPp+xgEhuMaHC+zGgn31Kd235W35f7Hykkaww==", 161 | "dependencies": { 162 | "asynckit": "^0.4.0", 163 | "combined-stream": "^1.0.8", 164 | "mime-types": "^2.1.12" 165 | }, 166 | "engines": { 167 | "node": ">= 6" 168 | } 169 | }, 170 | "node_modules/is-any-array": { 171 | "version": "2.0.1", 172 | "resolved": "https://registry.npmjs.org/is-any-array/-/is-any-array-2.0.1.tgz", 173 | "integrity": "sha512-UtilS7hLRu++wb/WBAw9bNuP1Eg04Ivn1vERJck8zJthEvXCBEBpGR/33u/xLKWEQf95803oalHrVDptcAvFdQ==" 174 | }, 175 | "node_modules/js-tiktoken": { 176 | "version": "1.0.6", 177 | "resolved": "https://registry.npmjs.org/js-tiktoken/-/js-tiktoken-1.0.6.tgz", 178 | "integrity": "sha512-lxHntEupgjWvSh37WxpAW4XN6UBXBtFJOpZZq5HN5oNjDfN7L/iJhHOKjyL/DFtuYXUwn5jfTciLtOWpgQmHjQ==", 179 | "dependencies": { 180 | "base64-js": "^1.5.1" 181 | } 182 | }, 183 | "node_modules/jsonpointer": { 184 | "version": "5.0.1", 185 | "resolved": "https://registry.npmjs.org/jsonpointer/-/jsonpointer-5.0.1.tgz", 186 | "integrity": "sha512-p/nXbhSEcu3pZRdkW1OfJhpsVtW1gd4Wa1fnQc9YLiTfAjn0312eMKimbdIQzuZl9aa9xUGaRlP9T/CJE/ditQ==", 187 | "engines": { 188 | "node": ">=0.10.0" 189 | } 190 | }, 191 | "node_modules/langchain": { 192 | "version": "0.0.78", 193 | "resolved": "https://registry.npmjs.org/langchain/-/langchain-0.0.78.tgz", 194 | "integrity": "sha512-AXoai3V1fJyQ2vDSS3KqRJr1VxRoAxX0L1sFeuXGvwyEzfzv6/dDKPJ7K1Onew3Jmfzu23t1qqhwsSMZOmwo7g==", 195 | "dependencies": { 196 | "@anthropic-ai/sdk": "^0.4.3", 197 | "ansi-styles": "^5.0.0", 198 | "binary-extensions": "^2.2.0", 199 | "expr-eval": "^2.0.2", 200 | "flat": "^5.0.2", 201 | "js-tiktoken": "^1.0.6", 202 | "jsonpointer": "^5.0.1", 203 | "ml-distance": "^4.0.0", 204 | "object-hash": "^3.0.0", 205 | "openai": "^3.2.0", 206 | "p-queue": "^6.6.2", 207 | "p-retry": "4", 208 | "uuid": "^9.0.0", 209 | "yaml": "^2.2.1", 210 | "zod": "^3.21.4", 211 | "zod-to-json-schema": "^3.20.4" 212 | }, 213 | "engines": { 214 | "node": ">=18" 215 | }, 216 | "peerDependencies": { 217 | "@aws-sdk/client-dynamodb": "^3.310.0", 218 | "@aws-sdk/client-lambda": "^3.310.0", 219 | "@aws-sdk/client-s3": "^3.310.0", 220 | "@aws-sdk/client-sagemaker-runtime": "^3.310.0", 221 | "@clickhouse/client": "^0.0.14", 222 | "@getmetal/metal-sdk": "*", 223 | "@huggingface/inference": "^1.5.1", 224 | "@opensearch-project/opensearch": "*", 225 | "@pinecone-database/pinecone": "*", 226 | "@supabase/supabase-js": "^2.10.0", 227 | "@tensorflow-models/universal-sentence-encoder": "*", 228 | "@tensorflow/tfjs-converter": "*", 229 | "@tensorflow/tfjs-core": "*", 230 | "@zilliz/milvus2-sdk-node": "^2.2.0", 231 | "apify-client": "^2.7.1", 232 | "axios": "*", 233 | "cheerio": "^1.0.0-rc.12", 234 | "chromadb": "^1.4.0", 235 | "cohere-ai": "^5.0.2", 236 | "d3-dsv": "^2.0.0", 237 | "epub2": "^3.0.1", 238 | "faiss-node": "^0.1.1", 239 | "hnswlib-node": "^1.4.2", 240 | "html-to-text": "^9.0.5", 241 | "mammoth": "*", 242 | "meriyah": "*", 243 | "mongodb": "^5.2.0", 244 | "pdf-parse": "1.1.1", 245 | "pickleparser": "^0.1.0", 246 | "playwright": "^1.32.1", 247 | "puppeteer": "^19.7.2", 248 | "redis": "^4.6.4", 249 | "replicate": "^0.9.0", 250 | "srt-parser-2": "^1.2.2", 251 | "typeorm": "^0.3.12", 252 | "weaviate-ts-client": "^1.0.0" 253 | }, 254 | "peerDependenciesMeta": { 255 | "@aws-sdk/client-dynamodb": { 256 | "optional": true 257 | }, 258 | "@aws-sdk/client-lambda": { 259 | "optional": true 260 | }, 261 | "@aws-sdk/client-s3": { 262 | "optional": true 263 | }, 264 | "@aws-sdk/client-sagemaker-runtime": { 265 | "optional": true 266 | }, 267 | "@clickhouse/client": { 268 | "optional": true 269 | }, 270 | "@getmetal/metal-sdk": { 271 | "optional": true 272 | }, 273 | "@huggingface/inference": { 274 | "optional": true 275 | }, 276 | "@opensearch-project/opensearch": { 277 | "optional": true 278 | }, 279 | "@pinecone-database/pinecone": { 280 | "optional": true 281 | }, 282 | "@supabase/supabase-js": { 283 | "optional": true 284 | }, 285 | "@tensorflow-models/universal-sentence-encoder": { 286 | "optional": true 287 | }, 288 | "@tensorflow/tfjs-converter": { 289 | "optional": true 290 | }, 291 | "@tensorflow/tfjs-core": { 292 | "optional": true 293 | }, 294 | "@zilliz/milvus2-sdk-node": { 295 | "optional": true 296 | }, 297 | "apify-client": { 298 | "optional": true 299 | }, 300 | "axios": { 301 | "optional": true 302 | }, 303 | "cheerio": { 304 | "optional": true 305 | }, 306 | "chromadb": { 307 | "optional": true 308 | }, 309 | "cohere-ai": { 310 | "optional": true 311 | }, 312 | "d3-dsv": { 313 | "optional": true 314 | }, 315 | "epub2": { 316 | "optional": true 317 | }, 318 | "faiss-node": { 319 | "optional": true 320 | }, 321 | "hnswlib-node": { 322 | "optional": true 323 | }, 324 | "html-to-text": { 325 | "optional": true 326 | }, 327 | "mammoth": { 328 | "optional": true 329 | }, 330 | "meriyah": { 331 | "optional": true 332 | }, 333 | "mongodb": { 334 | "optional": true 335 | }, 336 | "pdf-parse": { 337 | "optional": true 338 | }, 339 | "pickleparser": { 340 | "optional": true 341 | }, 342 | "playwright": { 343 | "optional": true 344 | }, 345 | "puppeteer": { 346 | "optional": true 347 | }, 348 | "redis": { 349 | "optional": true 350 | }, 351 | "replicate": { 352 | "optional": true 353 | }, 354 | "srt-parser-2": { 355 | "optional": true 356 | }, 357 | "typeorm": { 358 | "optional": true 359 | }, 360 | "weaviate-ts-client": { 361 | "optional": true 362 | } 363 | } 364 | }, 365 | "node_modules/mime-db": { 366 | "version": "1.52.0", 367 | "resolved": "https://registry.npmjs.org/mime-db/-/mime-db-1.52.0.tgz", 368 | "integrity": "sha512-sPU4uV7dYlvtWJxwwxHD0PuihVNiE7TyAbQ5SWxDCB9mUYvOgroQOwYQQOKPJ8CIbE+1ETVlOoK1UC2nU3gYvg==", 369 | "engines": { 370 | "node": ">= 0.6" 371 | } 372 | }, 373 | "node_modules/mime-types": { 374 | "version": "2.1.35", 375 | "resolved": "https://registry.npmjs.org/mime-types/-/mime-types-2.1.35.tgz", 376 | "integrity": "sha512-ZDY+bPm5zTTF+YpCrAU9nK0UgICYPT0QtT1NZWFv4s++TNkcgVaT0g6+4R2uI4MjQjzysHB1zxuWL50hzaeXiw==", 377 | "dependencies": { 378 | "mime-db": "1.52.0" 379 | }, 380 | "engines": { 381 | "node": ">= 0.6" 382 | } 383 | }, 384 | "node_modules/ml-array-mean": { 385 | "version": "1.1.6", 386 | "resolved": "https://registry.npmjs.org/ml-array-mean/-/ml-array-mean-1.1.6.tgz", 387 | "integrity": "sha512-MIdf7Zc8HznwIisyiJGRH9tRigg3Yf4FldW8DxKxpCCv/g5CafTw0RRu51nojVEOXuCQC7DRVVu5c7XXO/5joQ==", 388 | "dependencies": { 389 | "ml-array-sum": "^1.1.6" 390 | } 391 | }, 392 | "node_modules/ml-array-sum": { 393 | "version": "1.1.6", 394 | "resolved": "https://registry.npmjs.org/ml-array-sum/-/ml-array-sum-1.1.6.tgz", 395 | "integrity": "sha512-29mAh2GwH7ZmiRnup4UyibQZB9+ZLyMShvt4cH4eTK+cL2oEMIZFnSyB3SS8MlsTh6q/w/yh48KmqLxmovN4Dw==", 396 | "dependencies": { 397 | "is-any-array": "^2.0.0" 398 | } 399 | }, 400 | "node_modules/ml-distance": { 401 | "version": "4.0.0", 402 | "resolved": "https://registry.npmjs.org/ml-distance/-/ml-distance-4.0.0.tgz", 403 | "integrity": "sha512-zj7+UGZpHk3uL7n79XTfGNUjIGnhLn8xVvrxYvBHvXFxo3jq1q+/UjP311hZxnLVhbxbXCjUniThX8gozjacYA==", 404 | "dependencies": { 405 | "ml-array-mean": "^1.1.6", 406 | "ml-distance-euclidean": "^2.0.0", 407 | "ml-tree-similarity": "^1.0.0" 408 | } 409 | }, 410 | "node_modules/ml-distance-euclidean": { 411 | "version": "2.0.0", 412 | "resolved": "https://registry.npmjs.org/ml-distance-euclidean/-/ml-distance-euclidean-2.0.0.tgz", 413 | "integrity": "sha512-yC9/2o8QF0A3m/0IXqCTXCzz2pNEzvmcE/9HFKOZGnTjatvBbsn4lWYJkxENkA4Ug2fnYl7PXQxnPi21sgMy/Q==" 414 | }, 415 | "node_modules/ml-tree-similarity": { 416 | "version": "1.0.0", 417 | "resolved": "https://registry.npmjs.org/ml-tree-similarity/-/ml-tree-similarity-1.0.0.tgz", 418 | "integrity": "sha512-XJUyYqjSuUQkNQHMscr6tcjldsOoAekxADTplt40QKfwW6nd++1wHWV9AArl0Zvw/TIHgNaZZNvr8QGvE8wLRg==", 419 | "dependencies": { 420 | "binary-search": "^1.3.5", 421 | "num-sort": "^2.0.0" 422 | } 423 | }, 424 | "node_modules/node-fetch": { 425 | "version": "2.6.11", 426 | "resolved": "https://registry.npmjs.org/node-fetch/-/node-fetch-2.6.11.tgz", 427 | "integrity": "sha512-4I6pdBY1EthSqDmJkiNk3JIT8cswwR9nfeW/cPdUagJYEQG7R95WRH74wpz7ma8Gh/9dI9FP+OU+0E4FvtA55w==", 428 | "dependencies": { 429 | "whatwg-url": "^5.0.0" 430 | }, 431 | "engines": { 432 | "node": "4.x || >=6.0.0" 433 | }, 434 | "peerDependencies": { 435 | "encoding": "^0.1.0" 436 | }, 437 | "peerDependenciesMeta": { 438 | "encoding": { 439 | "optional": true 440 | } 441 | } 442 | }, 443 | "node_modules/num-sort": { 444 | "version": "2.1.0", 445 | "resolved": "https://registry.npmjs.org/num-sort/-/num-sort-2.1.0.tgz", 446 | "integrity": "sha512-1MQz1Ed8z2yckoBeSfkQHHO9K1yDRxxtotKSJ9yvcTUUxSvfvzEq5GwBrjjHEpMlq/k5gvXdmJ1SbYxWtpNoVg==", 447 | "engines": { 448 | "node": ">=8" 449 | }, 450 | "funding": { 451 | "url": "https://github.com/sponsors/sindresorhus" 452 | } 453 | }, 454 | "node_modules/object-hash": { 455 | "version": "3.0.0", 456 | "resolved": "https://registry.npmjs.org/object-hash/-/object-hash-3.0.0.tgz", 457 | "integrity": "sha512-RSn9F68PjH9HqtltsSnqYC1XXoWe9Bju5+213R98cNGttag9q9yAOTzdbsqvIa7aNm5WffBZFpWYr2aWrklWAw==", 458 | "engines": { 459 | "node": ">= 6" 460 | } 461 | }, 462 | "node_modules/openai": { 463 | "version": "3.2.1", 464 | "resolved": "https://registry.npmjs.org/openai/-/openai-3.2.1.tgz", 465 | "integrity": "sha512-762C9BNlJPbjjlWZi4WYK9iM2tAVAv0uUp1UmI34vb0CN5T2mjB/qM6RYBmNKMh/dN9fC+bxqPwWJZUTWW052A==", 466 | "dependencies": { 467 | "axios": "^0.26.0", 468 | "form-data": "^4.0.0" 469 | } 470 | }, 471 | "node_modules/p-finally": { 472 | "version": "1.0.0", 473 | "resolved": "https://registry.npmjs.org/p-finally/-/p-finally-1.0.0.tgz", 474 | "integrity": "sha512-LICb2p9CB7FS+0eR1oqWnHhp0FljGLZCWBE9aix0Uye9W8LTQPwMTYVGWQWIw9RdQiDg4+epXQODwIYJtSJaow==", 475 | "engines": { 476 | "node": ">=4" 477 | } 478 | }, 479 | "node_modules/p-queue": { 480 | "version": "6.6.2", 481 | "resolved": "https://registry.npmjs.org/p-queue/-/p-queue-6.6.2.tgz", 482 | "integrity": "sha512-RwFpb72c/BhQLEXIZ5K2e+AhgNVmIejGlTgiB9MzZ0e93GRvqZ7uSi0dvRF7/XIXDeNkra2fNHBxTyPDGySpjQ==", 483 | "dependencies": { 484 | "eventemitter3": "^4.0.4", 485 | "p-timeout": "^3.2.0" 486 | }, 487 | "engines": { 488 | "node": ">=8" 489 | }, 490 | "funding": { 491 | "url": "https://github.com/sponsors/sindresorhus" 492 | } 493 | }, 494 | "node_modules/p-retry": { 495 | "version": "4.6.2", 496 | "resolved": "https://registry.npmjs.org/p-retry/-/p-retry-4.6.2.tgz", 497 | "integrity": "sha512-312Id396EbJdvRONlngUx0NydfrIQ5lsYu0znKVUzVvArzEIt08V1qhtyESbGVd1FGX7UKtiFp5uwKZdM8wIuQ==", 498 | "dependencies": { 499 | "@types/retry": "0.12.0", 500 | "retry": "^0.13.1" 501 | }, 502 | "engines": { 503 | "node": ">=8" 504 | } 505 | }, 506 | "node_modules/p-timeout": { 507 | "version": "3.2.0", 508 | "resolved": "https://registry.npmjs.org/p-timeout/-/p-timeout-3.2.0.tgz", 509 | "integrity": "sha512-rhIwUycgwwKcP9yTOOFK/AKsAopjjCakVqLHePO3CC6Mir1Z99xT+R63jZxAT5lFZLa2inS5h+ZS2GvR99/FBg==", 510 | "dependencies": { 511 | "p-finally": "^1.0.0" 512 | }, 513 | "engines": { 514 | "node": ">=8" 515 | } 516 | }, 517 | "node_modules/retry": { 518 | "version": "0.13.1", 519 | "resolved": "https://registry.npmjs.org/retry/-/retry-0.13.1.tgz", 520 | "integrity": "sha512-XQBQ3I8W1Cge0Seh+6gjj03LbmRFWuoszgK9ooCpwYIrhhoO80pfq4cUkU5DkknwfOfFteRwlZ56PYOGYyFWdg==", 521 | "engines": { 522 | "node": ">= 4" 523 | } 524 | }, 525 | "node_modules/tr46": { 526 | "version": "0.0.3", 527 | "resolved": "https://registry.npmjs.org/tr46/-/tr46-0.0.3.tgz", 528 | "integrity": "sha512-N3WMsuqV66lT30CrXNbEjx4GEwlow3v6rr4mCcv6prnfwhS01rkgyFdjPNBYd9br7LpXV1+Emh01fHnq2Gdgrw==" 529 | }, 530 | "node_modules/uuid": { 531 | "version": "9.0.0", 532 | "resolved": "https://registry.npmjs.org/uuid/-/uuid-9.0.0.tgz", 533 | "integrity": "sha512-MXcSTerfPa4uqyzStbRoTgt5XIe3x5+42+q1sDuy3R5MDk66URdLMOZe5aPX/SQd+kuYAh0FdP/pO28IkQyTeg==", 534 | "bin": { 535 | "uuid": "dist/bin/uuid" 536 | } 537 | }, 538 | "node_modules/webidl-conversions": { 539 | "version": "3.0.1", 540 | "resolved": "https://registry.npmjs.org/webidl-conversions/-/webidl-conversions-3.0.1.tgz", 541 | "integrity": "sha512-2JAn3z8AR6rjK8Sm8orRC0h/bcl/DqL7tRPdGZ4I1CjdF+EaMLmYxBHyXuKL849eucPFhvBoxMsflfOb8kxaeQ==" 542 | }, 543 | "node_modules/whatwg-url": { 544 | "version": "5.0.0", 545 | "resolved": "https://registry.npmjs.org/whatwg-url/-/whatwg-url-5.0.0.tgz", 546 | "integrity": "sha512-saE57nupxk6v3HY35+jzBwYa0rKSy0XR8JSxZPwgLr7ys0IBzhGviA1/TUGJLmSVqs8pb9AnvICXEuOHLprYTw==", 547 | "dependencies": { 548 | "tr46": "~0.0.3", 549 | "webidl-conversions": "^3.0.0" 550 | } 551 | }, 552 | "node_modules/yaml": { 553 | "version": "2.2.2", 554 | "resolved": "https://registry.npmjs.org/yaml/-/yaml-2.2.2.tgz", 555 | "integrity": "sha512-CBKFWExMn46Foo4cldiChEzn7S7SRV+wqiluAb6xmueD/fGyRHIhX8m14vVGgeFWjN540nKCNVj6P21eQjgTuA==", 556 | "engines": { 557 | "node": ">= 14" 558 | } 559 | }, 560 | "node_modules/zod": { 561 | "version": "3.21.4", 562 | "resolved": "https://registry.npmjs.org/zod/-/zod-3.21.4.tgz", 563 | "integrity": "sha512-m46AKbrzKVzOzs/DZgVnG5H55N1sv1M8qZU3A8RIKbs3mrACDNeIOeilDymVb2HdmP8uwshOCF4uJ8uM9rCqJw==", 564 | "funding": { 565 | "url": "https://github.com/sponsors/colinhacks" 566 | } 567 | }, 568 | "node_modules/zod-to-json-schema": { 569 | "version": "3.21.1", 570 | "resolved": "https://registry.npmjs.org/zod-to-json-schema/-/zod-to-json-schema-3.21.1.tgz", 571 | "integrity": "sha512-y5g0MPxDq+YG/T+cHGPYH4PcBpyCqwK6wxeJ76MR563y0gk/14HKfebq8xHiItY7lkc9GDFygCnkvNDTvAhYAg==", 572 | "peerDependencies": { 573 | "zod": "^3.21.4" 574 | } 575 | } 576 | } 577 | } 578 | -------------------------------------------------------------------------------- /examples/node-langchain/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "langchain-llmatic-example", 3 | "private": true, 4 | "version": "0.0.0", 5 | "description": "LangChain.js example with LLMatic", 6 | "scripts": { 7 | "start": "node index.mjs" 8 | }, 9 | "author": "Fardjad Davari ", 10 | "license": "UNLICENSED", 11 | "dependencies": { 12 | "langchain": "^0.0.78" 13 | } 14 | } 15 | -------------------------------------------------------------------------------- /examples/python-langchain/main.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | os.environ["OPENAI_API_KEY"] = "ANYTHING_WILL_DO" 4 | os.environ["OPENAI_API_BASE"] = "http://127.0.0.1:3000/v1" 5 | 6 | from langchain import OpenAI, LLMChain, PromptTemplate 7 | from langchain.memory import ConversationBufferMemory 8 | 9 | model = OpenAI(temperature=0, model_name="Ignored") 10 | 11 | template = """A chat between a curious user and an artificial intelligence assistant. 12 | The assistant gives helpful, detailed, and polite answers to the user's questions. 13 | 14 | {history} 15 | Human: {human_input} 16 | AI:""" 17 | 18 | prompt = PromptTemplate( 19 | input_variables=["history", "human_input"], 20 | template=template 21 | ) 22 | 23 | chatgpt_chain = LLMChain( 24 | llm=model, 25 | prompt=prompt, 26 | verbose=True, 27 | memory=ConversationBufferMemory(), 28 | ) 29 | 30 | print(chatgpt_chain.predict(human_input="Rememeber that this is a demo of LLMatic with LangChain.")) 31 | print(chatgpt_chain.predict(human_input="What did I ask you to remember?")) -------------------------------------------------------------------------------- /examples/python-langchain/requirements.txt: -------------------------------------------------------------------------------- 1 | openai==0.27.7 2 | langchain==0.0.169 -------------------------------------------------------------------------------- /media/chatbot-ui.gif: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fardjad/node-llmatic/ff59eb04acced04224b5ae615c9a9578c6422a88/media/chatbot-ui.gif -------------------------------------------------------------------------------- /media/logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/fardjad/node-llmatic/ff59eb04acced04224b5ae615c9a9578c6422a88/media/logo.png -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "llmatic", 3 | "version": "0.4.242", 4 | "description": "Use self-hosted LLMs with an OpenAI compatible API", 5 | "exports": { 6 | "./llm-adapter": { 7 | "types": "./dist/llm-adapter.d.ts", 8 | "import": "./dist/llm-adapter.js", 9 | "require": "./dist/llm-adapter.cjs" 10 | } 11 | }, 12 | "typesVersions": { 13 | "*": { 14 | "llm-adapter": [ 15 | "./dist/llm-adapter.d.ts" 16 | ] 17 | } 18 | }, 19 | "type": "module", 20 | "keywords": [ 21 | "openai", 22 | "api", 23 | "llm", 24 | "llama" 25 | ], 26 | "repository": "git@github.com:fardjad/node-llmatic.git", 27 | "license": "MIT", 28 | "author": "Fardjad Davari ", 29 | "bin": { 30 | "llmatic": "./bin/llmatic", 31 | "llmatic.js": "./bin/llmatic.js" 32 | }, 33 | "files": [ 34 | "README.md", 35 | "dist/llm-adapter.cjs", 36 | "dist/llm-adapter.js", 37 | "dist/llm-adapter.d.ts", 38 | "bin/llmatic", 39 | "bin/llmatic.js", 40 | "api.oas.yml", 41 | "public/index.html", 42 | "src/", 43 | "!src/test-support", 44 | "!**/*.test.*" 45 | ], 46 | "scripts": { 47 | "fetch-openai-oas": "node ./scripts/fetch-openai-oas.mjs", 48 | "fix": "prettier --write . && eslint --ext .mjs --ext .ts --fix .", 49 | "lint": "eslint --ext .mjs --ext .ts .", 50 | "pretest": "tsc", 51 | "posttest": "npm run lint", 52 | "test:base": "tsx --test --test-reporter spec src/**/*.test.ts", 53 | "test": "npm run test:base", 54 | "test:watch": "nodemon -q -e js,cjs,mjs,ts,cts,mts,tsx,yml,json --exec \"npm run test:base\"", 55 | "prepare": "tsup src/llm-adapter.ts --format esm,cjs --dts --minify --clean --silent" 56 | }, 57 | "dependencies": { 58 | "@fastify/static": "^7.0.2", 59 | "@inquirer/prompts": "^4.3.1", 60 | "@inquirer/select": "^2.2.1", 61 | "@llama-node/llama-cpp": "^0.1.6", 62 | "@stoplight/json-ref-resolver": "^3.1.6", 63 | "ajv": "^8.12.0", 64 | "awilix": "^10.0.1", 65 | "commander": "^12.0.0", 66 | "fastify": "^4.26.2", 67 | "fastify-openapi-glue": "^4.5.0", 68 | "glob": "^10.3.12", 69 | "llama-node": "^0.1.6", 70 | "nodejs-file-downloader": "^4.12.1", 71 | "short-uuid": "^4.2.2", 72 | "swagger-ui-dist": "^5.13.0", 73 | "traverse": "^0.6.8", 74 | "tsx": "^4.7.1", 75 | "valid-filename": "^4.0.0" 76 | }, 77 | "devDependencies": { 78 | "@trivago/prettier-plugin-sort-imports": "^4.3.0", 79 | "@types/js-yaml": "^4.0.9", 80 | "@types/node": "^20.12.2", 81 | "@types/swagger-ui-dist": "^3.30.4", 82 | "@types/traverse": "^0.6.36", 83 | "@typescript-eslint/eslint-plugin": "^7.4.0", 84 | "@typescript-eslint/parser": "^7.4.0", 85 | "change-case": "^5.4.3", 86 | "earl": "^1.1.0", 87 | "eslint": "^8.57.0", 88 | "eslint-config-prettier": "^9.1.0", 89 | "eslint-config-xo": "^0.44.0", 90 | "eslint-config-xo-typescript": "^4.0.0", 91 | "eslint-plugin-unicorn": "^51.0.1", 92 | "http-status-codes": "^2.3.0", 93 | "jsonpath-plus": "^8.1.0", 94 | "nodemon": "^3.1.0", 95 | "npm-check-updates": "^16.14.18", 96 | "prettier": "^3.2.5", 97 | "quicktype-core": "^23.0.115", 98 | "tsup": "^8.0.2", 99 | "typescript": "^5.4.3" 100 | }, 101 | "overrides": { 102 | "@trivago/prettier-plugin-sort-imports": { 103 | "prettier": ">=3" 104 | } 105 | } 106 | } 107 | -------------------------------------------------------------------------------- /public/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | Swagger UI 6 | 7 | 8 | 14 | 20 | 21 | 22 | 23 |
24 | 25 | 29 | 46 | 47 | 48 | -------------------------------------------------------------------------------- /scripts/fetch-openai-oas.mjs: -------------------------------------------------------------------------------- 1 | import fs from "node:fs"; 2 | import prettier from "prettier"; 3 | 4 | const OPENAI_OAS_URL = 5 | "https://raw.githubusercontent.com/openai/openai-openapi/master/openapi.yaml"; 6 | 7 | const response = await fetch(OPENAI_OAS_URL, { 8 | redirect: "follow", 9 | }); 10 | 11 | const text = await response.text(); 12 | const formattedText = prettier.format(text, { parser: "yaml" }); 13 | 14 | fs.writeFileSync(new URL("../api.oas.yml", import.meta.url), formattedText, { 15 | encoding: "utf8", 16 | }); 17 | -------------------------------------------------------------------------------- /scripts/generate-types.mjs: -------------------------------------------------------------------------------- 1 | /* eslint-disable no-await-in-loop */ 2 | import { Resolver } from "@stoplight/json-ref-resolver"; 3 | import { paramCase, pascalCase } from "change-case"; 4 | import "http-status-codes"; 5 | import { getReasonPhrase } from "http-status-codes"; 6 | import yaml from "js-yaml"; 7 | import { JSONPath as jsonPath } from "jsonpath-plus"; 8 | import fs from "node:fs"; 9 | import { 10 | quicktype, 11 | InputData, 12 | JSONSchemaInput, 13 | FetchingJSONSchemaStore, 14 | } from "quicktype-core"; 15 | 16 | const typesDirectory = new URL("../src/types", import.meta.url); 17 | fs.mkdirSync(typesDirectory, { recursive: true }); 18 | 19 | const oas = yaml.load( 20 | fs.readFileSync(new URL("../api.oas.yml", import.meta.url), "utf8"), 21 | ); 22 | const resolver = new Resolver(); 23 | const { result: resolvedOas } = await resolver.resolve(oas); 24 | 25 | const quicktypeJSONSchema = async (sources) => { 26 | const schemaInput = new JSONSchemaInput(new FetchingJSONSchemaStore()); 27 | for (const { name, schema } of sources) { 28 | await schemaInput.addSource({ name, schema }); 29 | } 30 | 31 | const inputData = new InputData(); 32 | inputData.addInput(schemaInput); 33 | 34 | return quicktype({ 35 | inputData, 36 | lang: "typescript", 37 | rendererOptions: { 38 | "just-types": true, 39 | "runtime-typecheck": false, 40 | "prefer-types": true, 41 | }, 42 | }); 43 | }; 44 | 45 | const operationIds = [ 46 | "createChatCompletion", 47 | "createCompletion", 48 | "createEmbedding", 49 | "listModels", 50 | "retrieveModel", 51 | ]; 52 | 53 | for (const operationId of operationIds) { 54 | const operation = jsonPath({ 55 | path: `$.paths.*[?(@.operationId === '${operationId}')]`, 56 | json: resolvedOas, 57 | })[0]; 58 | 59 | const requestBodySchema = jsonPath({ 60 | path: "$.requestBody.content['application/json'].schema", 61 | json: operation, 62 | })[0]; 63 | 64 | const statusCodes = jsonPath({ 65 | path: "$.responses.*.content['application/json'].schema^^^~", 66 | json: operation, 67 | }); 68 | 69 | const responseBodySchemas = jsonPath({ 70 | path: "$.responses.*.content['application/json'].schema", 71 | json: operation, 72 | }); 73 | 74 | const responseBodySchemaPairs = statusCodes.map((statusCode, index) => ({ 75 | statusCode, 76 | schema: responseBodySchemas[index], 77 | })); 78 | 79 | const requestSource = { 80 | name: `${operationId}Request`, 81 | schema: JSON.stringify(requestBodySchema), 82 | }; 83 | 84 | const responseSources = responseBodySchemaPairs.map( 85 | ({ statusCode, schema }) => ({ 86 | name: pascalCase( 87 | `${operationId} ${getReasonPhrase(statusCode)} Response`, 88 | ), 89 | schema: JSON.stringify(schema), 90 | }), 91 | ); 92 | 93 | const sources = [requestSource, ...responseSources].filter( 94 | (source) => source.schema !== undefined, 95 | ); 96 | 97 | const { lines } = await quicktypeJSONSchema(sources); 98 | const fileName = new URL(`${typesDirectory}/${paramCase(operationId)}.ts`); 99 | fs.writeFileSync(fileName, lines.join("\n"), "utf8"); 100 | } 101 | -------------------------------------------------------------------------------- /src/cli/cli-utils.test.ts: -------------------------------------------------------------------------------- 1 | import * as cliUtils from "./cli-utils.ts"; 2 | import assert from "node:assert"; 3 | import path from "node:path"; 4 | import { test } from "node:test"; 5 | import { fileURLToPath } from "node:url"; 6 | 7 | await test("readPackageJson", async () => { 8 | const { version } = await cliUtils.readPackageJson(); 9 | assert.strictEqual(typeof version, "string"); 10 | }); 11 | 12 | await test("fileExists", async (t) => { 13 | await t.test("file exists", async () => { 14 | const exists = await cliUtils.fileExists(new URL(import.meta.url)); 15 | assert.strictEqual(exists, true); 16 | }); 17 | 18 | await t.test("file does not exist", async () => { 19 | const exists = await cliUtils.fileExists("/does/not/exist"); 20 | assert.strictEqual(exists, false); 21 | }); 22 | }); 23 | 24 | await test("invokeInDirectory", async (t) => { 25 | const newPath = path.resolve(fileURLToPath(new URL("../", import.meta.url))); 26 | 27 | await t.test( 28 | "should invoke a function in the specific directory", 29 | async () => { 30 | const cwd = process.cwd(); 31 | const result = await cliUtils.invokeInDirectory( 32 | newPath, 33 | (previousWorkingDirectory, currentWorkingDirectory) => { 34 | assert.strictEqual(previousWorkingDirectory, cwd); 35 | assert.strictEqual(currentWorkingDirectory, process.cwd()); 36 | 37 | return process.cwd(); 38 | }, 39 | ); 40 | assert.strictEqual(process.cwd(), cwd); 41 | assert.strictEqual(result, newPath); 42 | }, 43 | ); 44 | 45 | await t.test("should await promises before returning a result", async () => { 46 | let count = 0; 47 | 48 | const result = await cliUtils.invokeInDirectory(newPath, async () => { 49 | await new Promise((resolve) => { 50 | setTimeout(resolve, 0); 51 | }); 52 | 53 | count += 1; 54 | 55 | return count; 56 | }); 57 | 58 | assert.strictEqual(count, 1); 59 | assert.strictEqual(result, count); 60 | }); 61 | }); 62 | 63 | await test("importFile", async () => { 64 | const importedModule = await cliUtils.importFile( 65 | fileURLToPath(new URL("cli-utils.ts", import.meta.url)), 66 | ); 67 | assert.strictEqual(importedModule, cliUtils); 68 | }); 69 | -------------------------------------------------------------------------------- /src/cli/cli-utils.ts: -------------------------------------------------------------------------------- 1 | import fs from "node:fs"; 2 | import path from "node:path"; 3 | 4 | export const readPackageJson = async () => { 5 | const packageJsonPath = new URL("../../package.json", import.meta.url); 6 | 7 | return JSON.parse( 8 | await fs.promises.readFile(packageJsonPath, { encoding: "utf8" }), 9 | ) as { 10 | [key: string]: unknown; 11 | 12 | version: string; 13 | description: string; 14 | }; 15 | }; 16 | 17 | export const fileExists = async (path: URL | string) => 18 | Boolean(await fs.promises.stat(path).catch(() => false)); 19 | 20 | export const invokeInDirectory = async ( 21 | directory: string, 22 | callback: ( 23 | previousWorkingDirectory: string, 24 | currentWorkingDirectory: string, 25 | ) => T, 26 | ) => { 27 | const cwd = process.cwd(); 28 | process.chdir(directory); 29 | 30 | return Promise.resolve(callback(cwd, directory)).finally(() => { 31 | process.chdir(cwd); 32 | }); 33 | }; 34 | 35 | export const importFile = async (filePath: string): Promise => { 36 | const resolvedPath = path.resolve(filePath); 37 | const fileDirectory = path.dirname(resolvedPath); 38 | return invokeInDirectory(fileDirectory, async () => 39 | import(resolvedPath).then((module) => (module.default ?? module) as T), 40 | ); 41 | }; 42 | -------------------------------------------------------------------------------- /src/cli/common-options.ts: -------------------------------------------------------------------------------- 1 | import { Option } from "commander"; 2 | import { fileURLToPath } from "node:url"; 3 | 4 | export const llmAdapterOption = new Option( 5 | "-a, --llm-adapter ", 6 | "llm adapter path", 7 | ).default(fileURLToPath(new URL("../default-llm-adapter.ts", import.meta.url))); 8 | -------------------------------------------------------------------------------- /src/cli/llmatic-config.js: -------------------------------------------------------------------------------- 1 | import { importFile, readPackageJson } from "./cli-utils.ts"; 2 | import { llmAdapterOption } from "./common-options.ts"; 3 | import { input } from "@inquirer/prompts"; 4 | import select, { Separator } from "@inquirer/select"; 5 | import { program } from "commander"; 6 | import fs from "node:fs"; 7 | import path from "node:path"; 8 | import Downloader from "nodejs-file-downloader"; 9 | import isValidFilename from "valid-filename"; 10 | 11 | const downloadFile = (url, fileName) => { 12 | const downloader = new Downloader({ 13 | url, 14 | directory: "./models", 15 | fileName, 16 | skipExistingFileName: true, 17 | maxAttempts: 3, 18 | shouldStop(error) { 19 | if (error.statusCode && error.statusCode === 404) { 20 | return true; 21 | } 22 | }, 23 | onProgress(percentage) { 24 | process.stdout.write( 25 | `\r${String(Number(percentage).toFixed(2)).padStart(6, "0")}%`, 26 | ); 27 | }, 28 | }); 29 | 30 | return downloader.download(); 31 | }; 32 | 33 | const menu = async (llmDefaultConfig) => { 34 | const answer = await select({ 35 | message: "What do you want to do?", 36 | choices: [ 37 | { 38 | name: "Download a model", 39 | value: "download", 40 | }, 41 | { 42 | name: "Generate a config file", 43 | value: "generateConfig", 44 | }, 45 | new Separator(), 46 | { 47 | name: "Exit", 48 | value: "exit", 49 | }, 50 | ], 51 | }); 52 | 53 | if (answer === "download") { 54 | return downloadModel(); 55 | } 56 | 57 | if (answer === "generateConfig") { 58 | return generateConfig(llmDefaultConfig); 59 | } 60 | }; 61 | 62 | const generateConfig = async (llmDefaultConfig) => { 63 | const files = await fs.promises.readdir("./models"); 64 | const binFiles = files.filter((file) => path.extname(file) === ".bin"); 65 | 66 | if (binFiles.length === 0) { 67 | console.log("\n\n❌ No models found in ./models\n\n"); 68 | return menu(); 69 | } 70 | 71 | const choices = binFiles.map((file) => ({ 72 | value: file, 73 | })); 74 | 75 | const answer = await select({ 76 | message: "Select a model:", 77 | choices, 78 | }); 79 | 80 | const modelPath = `./models/${answer}`; 81 | const llmConfig = { 82 | ...llmDefaultConfig, 83 | modelPath, 84 | }; 85 | 86 | await fs.promises.writeFile( 87 | "./llmatic.config.json", 88 | JSON.stringify(llmConfig, null, 2), 89 | ); 90 | 91 | console.log("\n\n📝 Generated config file: llmatic.config.json\n\n"); 92 | }; 93 | 94 | const downloadModel = async () => { 95 | const url = await input({ 96 | message: "Enter the model URL (the full address to a GGML .bin file):", 97 | validate(value) { 98 | try { 99 | // eslint-disable-next-line no-new 100 | new URL(value); 101 | return true; 102 | } catch { 103 | return "Please enter a valid URL"; 104 | } 105 | }, 106 | }); 107 | 108 | const suggestedFileName = new URL(url).pathname.split("/").pop(); 109 | const fileName = await input({ 110 | message: "Enter the file name (will skip download if file exists):", 111 | default: suggestedFileName, 112 | validate(value) { 113 | if (!isValidFilename(value)) { 114 | return "Please enter a valid file name"; 115 | } 116 | 117 | if (path.extname(value) !== ".bin") { 118 | return "File name must end with .bin"; 119 | } 120 | 121 | return true; 122 | }, 123 | }); 124 | 125 | await downloadFile(url, fileName); 126 | }; 127 | 128 | const { version } = await readPackageJson(); 129 | 130 | program 131 | .version(version) 132 | .description("Configure LLMatic") 133 | .addOption(llmAdapterOption) 134 | .action(async ({ llmAdapter: llmAdapterPath }) => { 135 | const llmAdapter = await importFile(llmAdapterPath); 136 | return menu(llmAdapter.defaultConfig); 137 | }); 138 | 139 | await program.parseAsync(process.argv); 140 | -------------------------------------------------------------------------------- /src/cli/llmatic-start.js: -------------------------------------------------------------------------------- 1 | /* eslint-disable unicorn/no-process-exit */ 2 | import { createContainer, diTokens } from "../container.ts"; 3 | import { fileExists, importFile, readPackageJson } from "./cli-utils.ts"; 4 | import { llmAdapterOption } from "./common-options.ts"; 5 | import awilix from "awilix"; 6 | import { Option, program } from "commander"; 7 | import fs from "node:fs"; 8 | 9 | const { version } = await readPackageJson(); 10 | 11 | program 12 | .version(version) 13 | .description("Start LLMatic server") 14 | .addOption( 15 | new Option("-c, --config [path]", "config file path").default( 16 | "llmatic.config.json", 17 | ), 18 | ) 19 | .addOption(llmAdapterOption) 20 | .addOption( 21 | new Option("-p --port [port]", "port to listen on").default("3000"), 22 | ) 23 | .addOption( 24 | new Option("-h --host [port]", "host to listen on").default("localhost"), 25 | ) 26 | .action( 27 | async ({ 28 | llmAdapter: llmAdapterPath, 29 | config: configFilePath, 30 | port, 31 | host, 32 | }) => { 33 | if (!(await fileExists(configFilePath))) { 34 | console.error(`File ${configFilePath} not found.`); 35 | process.exit(1); 36 | } 37 | 38 | const llmConfig = JSON.parse( 39 | await fs.promises.readFile(configFilePath, "utf8"), 40 | ); 41 | 42 | const container = await createContainer([ 43 | { 44 | token: diTokens.llmConfig, 45 | resolver() { 46 | return awilix.asValue(llmConfig); 47 | }, 48 | }, 49 | { 50 | token: diTokens.llmAdapter, 51 | async resolver() { 52 | const LLMAdapterConstructor = await importFile(llmAdapterPath); 53 | const llmAdapter = new LLMAdapterConstructor(llmConfig); 54 | return awilix.asValue(llmAdapter); 55 | }, 56 | }, 57 | ]); 58 | const fastifyServer = container.resolve(diTokens.fastifyServer); 59 | await fastifyServer.listen({ port: Number(port), host }); 60 | }, 61 | ); 62 | 63 | await program.parseAsync(process.argv); 64 | -------------------------------------------------------------------------------- /src/cli/llmatic.ts: -------------------------------------------------------------------------------- 1 | import { readPackageJson } from "./cli-utils.ts"; 2 | import { program } from "commander"; 3 | 4 | const { version, description } = await readPackageJson(); 5 | 6 | program 7 | .version(version) 8 | .description(description) 9 | .command("config", "configure LLMatic") 10 | .command("start", "start LLMatic server"); 11 | 12 | await program.parseAsync(process.argv); 13 | -------------------------------------------------------------------------------- /src/container.ts: -------------------------------------------------------------------------------- 1 | import { createFastifyServer } from "./fastify-server-factory.ts"; 2 | import type { LlmAdapter } from "./llm-adapter.ts"; 3 | import { SseHelper } from "./sse-helper.ts"; 4 | import awilix from "awilix"; 5 | 6 | export type Cradle = { 7 | container: awilix.AwilixContainer; 8 | llmConfig: unknown; 9 | llmAdapter: LlmAdapter; 10 | sseHelper: SseHelper; 11 | fastifyServer: Awaited>; 12 | }; 13 | 14 | /** 15 | * Use these tokens for registrations and resolutions to avoid the problems of 16 | * hardcoded strings. 17 | */ 18 | export const diTokens: { [k in keyof Cradle]: k } = { 19 | container: "container", 20 | llmConfig: "llmConfig", 21 | llmAdapter: "llmAdapter", 22 | sseHelper: "sseHelper", 23 | fastifyServer: "fastifyServer", 24 | }; 25 | 26 | export type ContainerRegistration = { 27 | token: keyof Cradle; 28 | resolver: () => Promise> | awilix.Resolver; 29 | }; 30 | 31 | export const applyOverrides = ( 32 | registrations: ContainerRegistration[], 33 | registrationOverrides: ContainerRegistration[], 34 | ) => { 35 | const registrationOverridesCopy = [...registrationOverrides]; 36 | 37 | const result: ContainerRegistration[] = []; 38 | 39 | for (const { token, resolver } of registrations) { 40 | const overrideIndex = registrationOverridesCopy.findIndex( 41 | (override) => override.token === token, 42 | ); 43 | if (overrideIndex === -1) { 44 | result.push({ token, resolver }); 45 | } else { 46 | const override = registrationOverridesCopy.splice(overrideIndex, 1)[0]; 47 | result.push({ token, resolver: override.resolver }); 48 | } 49 | } 50 | 51 | for (const override of registrationOverridesCopy) { 52 | result.push(override); 53 | } 54 | 55 | return result; 56 | }; 57 | 58 | /** 59 | * Create and configure the Awilix container. Async resolvers and overrides 60 | * are supported (can be useful for testing). 61 | */ 62 | export const createContainer = async ( 63 | registerationOverrides: ContainerRegistration[] = [], 64 | ) => { 65 | const container = awilix.createContainer({ 66 | injectionMode: awilix.InjectionMode.PROXY, 67 | }); 68 | 69 | const orderedRegistrations: ContainerRegistration[] = [ 70 | { 71 | token: diTokens.container, 72 | resolver: () => awilix.asValue(container), 73 | }, 74 | { 75 | token: diTokens.sseHelper, 76 | resolver: () => 77 | awilix.asClass(SseHelper, { lifetime: awilix.Lifetime.SINGLETON }), 78 | }, 79 | { 80 | token: diTokens.llmConfig, 81 | resolver() { 82 | throw new Error("llmConfig must be overridden"); 83 | }, 84 | }, 85 | { 86 | token: diTokens.llmAdapter, 87 | resolver() { 88 | throw new Error("llmAdapter must be overridden"); 89 | }, 90 | }, 91 | { 92 | token: diTokens.fastifyServer, 93 | resolver: async () => 94 | awilix.asValue(await createFastifyServer(container.cradle)), 95 | }, 96 | ]; 97 | 98 | const newRegistrations = applyOverrides( 99 | orderedRegistrations, 100 | registerationOverrides, 101 | ); 102 | 103 | for (const { token, resolver } of newRegistrations) { 104 | // eslint-disable-next-line no-await-in-loop 105 | container.register({ [token]: await Promise.resolve(resolver()) }); 106 | } 107 | 108 | return container; 109 | }; 110 | -------------------------------------------------------------------------------- /src/default-llm-adapter.ts: -------------------------------------------------------------------------------- 1 | /* eslint-disable no-await-in-loop */ 2 | import { 3 | type FinishReason, 4 | LlmAdapter, 5 | type LlmAdapterCreateChatCompletionRequest, 6 | type LlmAdapterCreateChatCompletionResponse, 7 | type LlmAdapterCreateCompletionRequest, 8 | type LlmAdapterCreateCompletionResponse, 9 | type LlmAdapterCreateEmbeddingRequest, 10 | type LlmAdapterCreateEmbeddingResponse, 11 | type LlmAdapterModel, 12 | Role, 13 | } from "./llm-adapter.ts"; 14 | import type { Generate } from "@llama-node/llama-cpp"; 15 | import { type LLMError, LLM as LlamaNode } from "llama-node"; 16 | import { LLamaCpp, type LoadConfig } from "llama-node/dist/llm/llama-cpp.js"; 17 | import { cpus } from "node:os"; 18 | import path from "node:path"; 19 | 20 | type DefaultLlmAdapterConfig = Generate & LoadConfig; 21 | 22 | export default class DefaultLlmAdapter extends LlmAdapter { 23 | readonly #llmConfig: DefaultLlmAdapterConfig; 24 | #loaded = false; 25 | readonly #llamaNode = new LlamaNode(LLamaCpp); 26 | 27 | constructor(llmConfig: DefaultLlmAdapterConfig) { 28 | super(); 29 | 30 | this.#llmConfig = { ...DefaultLlmAdapter.defaultConfig, ...llmConfig }; 31 | } 32 | 33 | async createChatCompletion( 34 | createChatCompletionRequest: LlmAdapterCreateChatCompletionRequest, 35 | abortSignal: AbortSignal, 36 | onData: (data: LlmAdapterCreateChatCompletionResponse) => void, 37 | ): Promise { 38 | await this.#load(); 39 | 40 | const prompt = createChatCompletionRequest.messages 41 | .map(({ content, role }) => { 42 | if (role === Role.System) return `${content}\n`; 43 | return `${role ?? Role.User}: ${content}`; 44 | }) 45 | .join("\n") 46 | .concat(`\n${Role.Assistant}: `); 47 | 48 | const bufferedTokens: string[] = []; 49 | const flushBuffer = (index: number) => { 50 | while (bufferedTokens.length > 0) { 51 | onData({ 52 | index, 53 | delta: { content: bufferedTokens.shift() }, 54 | }); 55 | } 56 | }; 57 | 58 | for ( 59 | let index = 0; 60 | index < (createChatCompletionRequest.n ?? 1); 61 | index += 1 62 | ) { 63 | let isFirstToken = true; 64 | 65 | await this.#invokeLlamaNode( 66 | { 67 | ...this.#openAiCompletionRequestToLlamaNodeInvocation( 68 | createChatCompletionRequest, 69 | ), 70 | prompt, 71 | }, 72 | abortSignal, 73 | ({ token, finishReason, stop }) => { 74 | if (isFirstToken) { 75 | onData({ 76 | index, 77 | delta: { role: Role.Assistant }, 78 | finishReason, 79 | }); 80 | 81 | isFirstToken = false; 82 | } 83 | 84 | if (["\n", Role.User, ":"].includes(token.trim())) { 85 | bufferedTokens.push(token); 86 | if (bufferedTokens.join("").trim() === `${Role.User}:`) { 87 | stop(); 88 | bufferedTokens.length = 0; 89 | } 90 | } else { 91 | flushBuffer(index); 92 | onData({ 93 | index, 94 | delta: { content: token }, 95 | finishReason, 96 | }); 97 | } 98 | }, 99 | () => { 100 | flushBuffer(index); 101 | onData({ 102 | index, 103 | delta: {}, 104 | finishReason: "stop", 105 | }); 106 | }, 107 | ); 108 | } 109 | } 110 | 111 | async listModels(): Promise { 112 | return [ 113 | { 114 | id: path.basename(this.#llmConfig.modelPath), 115 | created: 0, 116 | ownedBy: "unknown", 117 | }, 118 | ]; 119 | } 120 | 121 | async createEmbedding({ 122 | model, 123 | input, 124 | }: LlmAdapterCreateEmbeddingRequest): Promise { 125 | await this.#load(); 126 | 127 | return this.#llamaNode.getEmbedding({ 128 | ...this.#llmConfig, 129 | prompt: input, 130 | }); 131 | } 132 | 133 | async createCompletion( 134 | createCompletionRequest: LlmAdapterCreateCompletionRequest, 135 | abortSignal: AbortSignal, 136 | onData: (data: LlmAdapterCreateCompletionResponse) => void, 137 | ): Promise { 138 | await this.#load(); 139 | 140 | for ( 141 | let promptIndex = 0, index = 0; 142 | index < 143 | createCompletionRequest.prompt.length * (createCompletionRequest.n ?? 1); 144 | index += 1, 145 | promptIndex = (promptIndex + 1) % createCompletionRequest.prompt.length 146 | ) { 147 | const prompt = createCompletionRequest.prompt[promptIndex]; 148 | await this.#invokeLlamaNode( 149 | { 150 | ...this.#openAiCompletionRequestToLlamaNodeInvocation( 151 | createCompletionRequest, 152 | ), 153 | prompt, 154 | }, 155 | abortSignal, 156 | ({ token, finishReason }) => { 157 | onData({ 158 | index, 159 | text: token, 160 | finishReason, 161 | }); 162 | }, 163 | ); 164 | } 165 | } 166 | 167 | #openAiCompletionRequestToLlamaNodeInvocation( 168 | request: 169 | | LlmAdapterCreateCompletionRequest 170 | | LlmAdapterCreateChatCompletionRequest, 171 | ) { 172 | return { 173 | nTokPredict: request.maxTokens ?? this.#llmConfig.nTokPredict, 174 | temp: request.temperature ?? this.#llmConfig.temp, 175 | topP: request.topP ?? this.#llmConfig.topP, 176 | presencePenalty: 177 | request.presencePenalty ?? this.#llmConfig.presencePenalty, 178 | frequencyPenalty: 179 | request.frequencyPenalty ?? this.#llmConfig.frequencyPenalty, 180 | } satisfies Partial; 181 | } 182 | 183 | static get defaultConfig() { 184 | return { 185 | // Load config 186 | enableLogging: false, 187 | nParts: 1, 188 | nGpuLayers: 0, 189 | f16Kv: false, 190 | logitsAll: false, 191 | vocabOnly: false, 192 | seed: 0, 193 | useMlock: true, 194 | embedding: true, 195 | useMmap: true, 196 | nCtx: 4096, 197 | 198 | // Invocation config 199 | nThreads: cpus().length, 200 | nTokPredict: 32_768, 201 | topK: 40, 202 | topP: 0.95, 203 | temp: 0, 204 | repeatPenalty: 1.1, 205 | }; 206 | } 207 | 208 | async #load() { 209 | if (this.#loaded) return; 210 | 211 | await this.#llamaNode.load({ 212 | ...DefaultLlmAdapter.defaultConfig, 213 | ...this.#llmConfig, 214 | }); 215 | 216 | this.#loaded = true; 217 | } 218 | 219 | async #invokeLlamaNode( 220 | invocationConfig: Partial, 221 | callerAbortSignal: AbortSignal, 222 | onToken: ({ 223 | token, 224 | finishReason, 225 | stop, 226 | }: { 227 | token: string; 228 | finishReason: FinishReason; 229 | stop: () => void; 230 | }) => void, 231 | onComplete?: () => void, 232 | ) { 233 | let tokensGenerated = 0; 234 | const abortController = new AbortController(); 235 | 236 | const handleAbort = () => { 237 | callerAbortSignal.removeEventListener("abort", handleAbort); 238 | abortController.abort(); 239 | }; 240 | 241 | const stop = () => { 242 | abortController.abort(); 243 | }; 244 | 245 | callerAbortSignal.addEventListener("abort", handleAbort); 246 | return this.#llamaNode 247 | .createCompletion( 248 | { 249 | ...this.#llmConfig, 250 | ...invocationConfig, 251 | }, 252 | ({ token, completed }) => { 253 | // "llama-node" always emits "\n\n\n" at the end of inference 254 | if (completed) { 255 | if (onComplete) onComplete(); 256 | return; 257 | } 258 | 259 | tokensGenerated += 1; 260 | 261 | let finishReason: FinishReason; 262 | if (tokensGenerated >= invocationConfig.nTokPredict!) { 263 | finishReason = "length"; 264 | abortController.abort(); 265 | } 266 | 267 | onToken({ token, finishReason, stop }); 268 | }, 269 | abortController.signal, 270 | ) 271 | .catch((error: unknown) => { 272 | // Looks like LLMError is not exported as a Class 273 | if (Object.getPrototypeOf(error).constructor.name !== "LLMError") { 274 | throw error; 275 | } 276 | 277 | const llmError = error as LLMError; 278 | if (llmError.type !== ("Aborted" as LLMError["type"])) { 279 | throw llmError; 280 | } 281 | }) 282 | .finally(() => { 283 | callerAbortSignal.removeEventListener("abort", handleAbort); 284 | }); 285 | } 286 | } 287 | -------------------------------------------------------------------------------- /src/fastify-server-factory.ts: -------------------------------------------------------------------------------- 1 | import type { Cradle } from "./container.ts"; 2 | import type { OperationHandler } from "./operation-handler.ts"; 3 | import fastifyStatic from "@fastify/static"; 4 | import ajvModule from "ajv"; 5 | import fastify from "fastify"; 6 | import openapiGlue from "fastify-openapi-glue"; 7 | import { glob } from "glob"; 8 | import yaml from "js-yaml"; 9 | import fs from "node:fs"; 10 | import { fileURLToPath } from "node:url"; 11 | import swaggerUiDist from "swagger-ui-dist"; 12 | import traverse from "traverse"; 13 | 14 | // https://github.com/ajv-validator/ajv/issues/2132 15 | // eslint-disable-next-line @typescript-eslint/naming-convention 16 | const Ajv = ajvModule.default; 17 | 18 | // FIXME: fix the types 19 | const createOpenapiGlueService = async ({ container }: Partial) => { 20 | const routeHandlerFiles = await glob("**/*.handler.[tj]s", { 21 | cwd: new URL("handlers", import.meta.url), 22 | absolute: true, 23 | }); 24 | 25 | const handlers = await Promise.all( 26 | routeHandlerFiles.map(async (file) => { 27 | const { default: handlerConstructor } = (await import(file)) as { 28 | default: (...arguments_: any[]) => OperationHandler; 29 | }; 30 | return container!.build(handlerConstructor); 31 | }), 32 | ); 33 | 34 | return Object.fromEntries( 35 | handlers.map((handler) => [ 36 | handler.operationId, 37 | handler.handle.bind(handler), 38 | ]), 39 | ); 40 | }; 41 | 42 | // FIXME: fix the types 43 | const configureOpenapiGlue = async ({ 44 | container, 45 | fastifyServer, 46 | openapiDocument, 47 | }: Partial & { openapiDocument: any }) => { 48 | const schemaCompilers = { 49 | body: new Ajv(), 50 | params: new Ajv(), 51 | querystring: new Ajv(), 52 | headers: new Ajv(), 53 | }; 54 | 55 | fastifyServer!.setValidatorCompiler((request) => { 56 | if (!request.httpPart) { 57 | throw new Error("Missing httpPart"); 58 | } 59 | 60 | const compiler = schemaCompilers[request.httpPart] as 61 | | ajvModule.default 62 | | undefined; 63 | if (!compiler) { 64 | throw new Error(`Missing compiler for ${request.httpPart}`); 65 | } 66 | 67 | // OpenAI OAS is not entirely valid/compatible, so we need to remove some properties 68 | // eslint-disable-next-line unicorn/no-array-for-each 69 | traverse(request.schema).forEach(function (value) { 70 | if (!this.key) return; 71 | 72 | if (this.isLeaf && ["nullable", "x-oaiTypeLabel"].includes(this.key)) { 73 | this.remove(); 74 | } 75 | 76 | if (this.key === "example") { 77 | this.remove(); 78 | } 79 | 80 | if (this.isLeaf && this.key === "format" && value === "binary") { 81 | this.remove(); 82 | } 83 | }); 84 | 85 | return compiler.compile(request.schema); 86 | }); 87 | 88 | const service = await createOpenapiGlueService({ container }); 89 | 90 | await fastifyServer!.register(openapiGlue, { 91 | specification: openapiDocument as Record, 92 | prefix: "/v1", 93 | service, 94 | securityHandlers: {}, 95 | }); 96 | }; 97 | 98 | // FIXME: fix the types 99 | const configureSwaggerUi = async ({ 100 | fastifyServer, 101 | openapiDocument, 102 | }: Partial & { openapiDocument: any }) => { 103 | await fastifyServer!.register(fastifyStatic, { 104 | root: swaggerUiDist.getAbsoluteFSPath(), 105 | prefix: "/swagger-ui/", 106 | }); 107 | 108 | fastifyServer!.get("/", (request, reply) => 109 | reply.sendFile( 110 | "index.html", 111 | fileURLToPath(new URL("../public", import.meta.url)), 112 | ), 113 | ); 114 | 115 | fastifyServer!.get("/api.oas.yml", (request, reply) => { 116 | const newOas = { 117 | ...(openapiDocument as Record), 118 | servers: [ 119 | { 120 | url: `${request.protocol}://${request.hostname}/v1`, 121 | }, 122 | ], 123 | }; 124 | 125 | return reply.type("text/yaml").send(yaml.dump(newOas)); 126 | }); 127 | }; 128 | 129 | export const createFastifyServer = async ({ container }: Cradle) => { 130 | const fastifyServer = fastify({ 131 | logger: true, 132 | }); 133 | 134 | const openapiDocument = yaml.load( 135 | await fs.promises.readFile(new URL("../api.oas.yml", import.meta.url), { 136 | encoding: "utf8", 137 | }), 138 | ); 139 | 140 | await configureSwaggerUi({ fastifyServer, openapiDocument }); 141 | await configureOpenapiGlue({ container, fastifyServer, openapiDocument }); 142 | 143 | return fastifyServer; 144 | }; 145 | -------------------------------------------------------------------------------- /src/handlers/create-chat-completion.handler.test.ts: -------------------------------------------------------------------------------- 1 | /* eslint-disable @typescript-eslint/naming-convention */ 2 | import type { Cradle } from "../container.ts"; 3 | import { type LlmAdapter } from "../llm-adapter.ts"; 4 | import { createTestContainer } from "../test-support/test-container.ts"; 5 | import { 6 | Role, 7 | type CreateChatCompletionRequest, 8 | } from "../types/create-chat-completion.ts"; 9 | import type { AwilixContainer } from "awilix"; 10 | import { type MockObject, expect, mockObject } from "earl"; 11 | import { test } from "node:test"; 12 | 13 | const testModelId = "test-model-id"; 14 | let testContainer: AwilixContainer | undefined; 15 | let llmAdapter: MockObject | undefined; 16 | 17 | const createChatCompletionRequest: CreateChatCompletionRequest = { 18 | messages: [ 19 | { content: "prompt1", role: Role.System }, 20 | { content: "prompt2", role: Role.User }, 21 | { content: "prompt3", role: Role.Assistant }, 22 | ], 23 | model: testModelId, 24 | frequency_penalty: 0.5, 25 | logit_bias: { "50256": -100 }, 26 | max_tokens: 100, 27 | n: 2, 28 | presence_penalty: 1.5, 29 | stop: "stop", 30 | stream: false, 31 | temperature: 0, 32 | top_p: 0.1, 33 | }; 34 | 35 | await test("createChatCompletionHandler", async (t) => { 36 | t.beforeEach(async () => { 37 | llmAdapter = mockObject({ 38 | async createChatCompletion( 39 | createChatCompletionRequest, 40 | abortSignal, 41 | callback, 42 | ) { 43 | const { messages, n } = createChatCompletionRequest; 44 | 45 | const count = messages.length * (n ?? 1); 46 | for (let tokenIndex = 0; tokenIndex < count; tokenIndex++) { 47 | callback({ 48 | finishReason: "stop", 49 | index: 0, 50 | delta: 51 | tokenIndex === 0 52 | ? { role: Role.Assistant } 53 | : { content: `token ${tokenIndex}\n` }, 54 | }); 55 | } 56 | }, 57 | }); 58 | }); 59 | 60 | t.afterEach(async () => { 61 | await testContainer?.dispose(); 62 | testContainer = undefined; 63 | }); 64 | 65 | await t.test("valid request with no errors", async () => { 66 | testContainer = await createTestContainer(llmAdapter!); 67 | const fastifyServer = testContainer.resolve("fastifyServer"); 68 | 69 | const response = await fastifyServer.inject({ 70 | url: "/v1/chat/completions", 71 | method: "POST", 72 | headers: { 73 | "Content-Type": "application/json", 74 | }, 75 | payload: JSON.stringify(createChatCompletionRequest), 76 | }); 77 | 78 | expect(response.statusCode).toEqual(200); 79 | 80 | expect(llmAdapter!.createChatCompletion).toHaveBeenCalledWith( 81 | { 82 | messages: createChatCompletionRequest.messages, 83 | model: testModelId, 84 | frequencyPenalty: createChatCompletionRequest.frequency_penalty, 85 | logitBias: createChatCompletionRequest.logit_bias, 86 | maxTokens: createChatCompletionRequest.max_tokens, 87 | n: createChatCompletionRequest.n, 88 | presencePenalty: createChatCompletionRequest.presence_penalty, 89 | stop: (Array.isArray(createChatCompletionRequest.stop) 90 | ? createChatCompletionRequest.stop 91 | : [createChatCompletionRequest.stop]) as string[], 92 | temperature: createChatCompletionRequest.temperature, 93 | topP: createChatCompletionRequest.top_p, 94 | }, 95 | expect.anything(), 96 | expect.anything(), 97 | ); 98 | }); 99 | }); 100 | -------------------------------------------------------------------------------- /src/handlers/create-chat-completion.handler.ts: -------------------------------------------------------------------------------- 1 | /* eslint-disable @typescript-eslint/naming-convention */ 2 | import type { Cradle } from "../container.ts"; 3 | import type { LlmAdapter } from "../llm-adapter.ts"; 4 | import type { OperationHandler } from "../operation-handler.ts"; 5 | import type { SseHelper } from "../sse-helper.ts"; 6 | import { 7 | type ChoiceMessage, 8 | type CreateChatCompletionOkResponse, 9 | type CreateChatCompletionRequest, 10 | Role, 11 | } from "../types/create-chat-completion.ts"; 12 | import type { Choice } from "../types/create-chat-completion.ts"; 13 | import type { RouteHandlerMethod } from "fastify"; 14 | import shortUUID from "short-uuid"; 15 | 16 | type Chunk = Choice & { 17 | delta: Partial; 18 | }; 19 | export default class CreateChatCompletionHandler implements OperationHandler { 20 | operationId = "createChatCompletion"; 21 | 22 | readonly #llmAdapter: LlmAdapter; 23 | readonly #sseHelper: SseHelper; 24 | 25 | constructor({ llmAdapter, sseHelper }: Cradle) { 26 | this.#llmAdapter = llmAdapter; 27 | this.#sseHelper = sseHelper; 28 | } 29 | 30 | handle: RouteHandlerMethod = async (request, reply) => { 31 | const body = request.body as CreateChatCompletionRequest; 32 | 33 | const { 34 | frequency_penalty, 35 | logit_bias, 36 | max_tokens, 37 | messages, 38 | model, 39 | n, 40 | presence_penalty, 41 | stop, 42 | stream, 43 | temperature, 44 | top_p, 45 | } = body; 46 | 47 | const abortController = new AbortController(); 48 | request.raw.once("close", () => { 49 | if (request.raw.destroyed) { 50 | abortController.abort(); 51 | } 52 | }); 53 | 54 | const id = `chatcmpl-${shortUUID.generate()}`; 55 | const choiceTokens: string[][] = []; 56 | const choices: Choice[] = []; 57 | 58 | await this.#llmAdapter.createChatCompletion( 59 | { 60 | messages, 61 | model, 62 | frequencyPenalty: frequency_penalty, 63 | logitBias: logit_bias, 64 | maxTokens: max_tokens, 65 | n, 66 | presencePenalty: presence_penalty, 67 | stop: (Array.isArray(stop) ? stop : [stop].filter(Boolean)) as string[], 68 | temperature, 69 | topP: top_p, 70 | }, 71 | abortController.signal, 72 | ({ index, delta, finishReason }) => { 73 | if (stream) { 74 | this.#sseHelper.sse( 75 | reply, 76 | this.#createResponseChunk(id, model, { 77 | delta, 78 | index, 79 | }), 80 | ); 81 | 82 | return; 83 | } 84 | 85 | choices[index] ??= { 86 | index, 87 | message: { role: Role.Assistant, content: "" }, 88 | }; 89 | 90 | choices[index].finish_reason = finishReason; 91 | 92 | if (delta.role) { 93 | choices[index].message!.role = delta.role; 94 | } 95 | 96 | choiceTokens[index] ??= []; 97 | 98 | if (delta.content) { 99 | choiceTokens[index].push(delta.content); 100 | } 101 | }, 102 | ); 103 | 104 | if (stream) { 105 | this.#sseHelper.sse(reply, "[DONE]"); 106 | reply.raw.end(); 107 | return; 108 | } 109 | 110 | for (const [index, choice] of choices.entries()) { 111 | if (!choice) { 112 | continue; 113 | } 114 | 115 | choice.message!.role = Role.Assistant; 116 | choice.message!.content = choiceTokens[index].join(""); 117 | } 118 | 119 | const response: CreateChatCompletionOkResponse = { 120 | ...this.#createResponse(id, model, choices), 121 | usage: { 122 | completion_tokens: 0, 123 | prompt_tokens: 0, 124 | total_tokens: 0, 125 | }, 126 | }; 127 | 128 | return response; 129 | }; 130 | 131 | #createResponse( 132 | id: string, 133 | model: string, 134 | choices: Choice[], 135 | ): CreateChatCompletionOkResponse { 136 | return { 137 | id, 138 | model, 139 | choices: choices.filter(Boolean), 140 | created: Math.floor(Date.now() / 1000), 141 | object: "chat.completion", 142 | }; 143 | } 144 | 145 | #createResponseChunk( 146 | id: string, 147 | model: string, 148 | deltaChoice: Chunk, 149 | ): CreateChatCompletionOkResponse { 150 | return { 151 | id, 152 | model, 153 | choices: [deltaChoice], 154 | created: Date.now(), 155 | object: "chat.completion.chunk", 156 | }; 157 | } 158 | } 159 | -------------------------------------------------------------------------------- /src/handlers/create-completion.handler.test.ts: -------------------------------------------------------------------------------- 1 | /* eslint-disable @typescript-eslint/naming-convention */ 2 | import type { Cradle } from "../container.ts"; 3 | import { type LlmAdapter } from "../llm-adapter.ts"; 4 | import { createTestContainer } from "../test-support/test-container.ts"; 5 | import type { CreateCompletionRequest } from "../types/create-completion.ts"; 6 | import type { AwilixContainer } from "awilix"; 7 | import { type MockObject, expect, mockObject } from "earl"; 8 | import { test } from "node:test"; 9 | 10 | const testModelId = "test-model-id"; 11 | let testContainer: AwilixContainer | undefined; 12 | let llmAdapter: MockObject | undefined; 13 | 14 | const createCompletionRequest: CreateCompletionRequest = { 15 | stream: false, 16 | 17 | best_of: 3, 18 | echo: true, 19 | frequency_penalty: 1, 20 | logit_bias: { "50256": -100 }, 21 | logprobs: 5, 22 | max_tokens: 100, 23 | model: testModelId, 24 | n: 2, 25 | presence_penalty: 1.5, 26 | prompt: ["prompt1", "prompt2"], 27 | stop: "stop", 28 | suffix: "suffix", 29 | temperature: 0, 30 | top_p: 0.1, 31 | }; 32 | 33 | await test("createCompletionHandler", async (t) => { 34 | t.beforeEach(async () => { 35 | llmAdapter = mockObject({ 36 | async createCompletion(createCompletionRequest, abortSignal, callback) { 37 | const { prompt, n } = createCompletionRequest; 38 | 39 | const count = prompt.length * (n ?? 1); 40 | for (let index = 0; index < count; index++) { 41 | callback({ 42 | finishReason: "stop", 43 | index, 44 | text: `token ${index}`, 45 | }); 46 | } 47 | }, 48 | }); 49 | }); 50 | 51 | t.afterEach(async () => { 52 | await testContainer?.dispose(); 53 | testContainer = undefined; 54 | }); 55 | 56 | await t.test("stream cannot be set if best_of > 1", async (t) => { 57 | testContainer = await createTestContainer(llmAdapter!); 58 | const fastifyServer = testContainer.resolve("fastifyServer"); 59 | 60 | const payload = JSON.stringify({ 61 | model: testModelId, 62 | prompt: "test-prompt", 63 | 64 | best_of: 2, 65 | stream: true, 66 | } as CreateCompletionRequest); 67 | const response = await fastifyServer.inject({ 68 | url: "/v1/completions", 69 | method: "POST", 70 | headers: { 71 | "Content-Type": "application/json", 72 | }, 73 | payload, 74 | }); 75 | 76 | expect(response.statusCode).toEqual(400); 77 | }); 78 | 79 | await t.test("valid request with no errors", async () => { 80 | testContainer = await createTestContainer(llmAdapter!); 81 | const fastifyServer = testContainer.resolve("fastifyServer"); 82 | 83 | const response = await fastifyServer.inject({ 84 | url: "/v1/completions", 85 | method: "POST", 86 | headers: { 87 | "Content-Type": "application/json", 88 | }, 89 | payload: JSON.stringify(createCompletionRequest), 90 | }); 91 | 92 | expect(response.statusCode).toEqual(200); 93 | expect(llmAdapter!.createCompletion).toHaveBeenCalledWith( 94 | { 95 | model: testModelId, 96 | bestOf: createCompletionRequest.best_of, 97 | echo: createCompletionRequest.echo, 98 | frequencyPenalty: createCompletionRequest.frequency_penalty, 99 | logitBias: createCompletionRequest.logit_bias, 100 | logprobs: createCompletionRequest.logprobs, 101 | maxTokens: createCompletionRequest.max_tokens, 102 | n: createCompletionRequest.n, 103 | presencePenalty: createCompletionRequest.presence_penalty, 104 | // TODO: make this more specific 105 | prompt: expect.satisfies((prompt) => Array.isArray(prompt)), 106 | stop: (Array.isArray(createCompletionRequest.stop) 107 | ? createCompletionRequest.stop 108 | : [createCompletionRequest.stop]) as string[], 109 | suffix: createCompletionRequest.suffix, 110 | temperature: createCompletionRequest.temperature, 111 | topP: createCompletionRequest.top_p, 112 | }, 113 | expect.anything(), 114 | expect.anything(), 115 | ); 116 | }); 117 | }); 118 | -------------------------------------------------------------------------------- /src/handlers/create-completion.handler.ts: -------------------------------------------------------------------------------- 1 | /* eslint-disable @typescript-eslint/naming-convention */ 2 | import type { Cradle } from "../container.ts"; 3 | import type { LlmAdapter } from "../llm-adapter.ts"; 4 | import type { OperationHandler } from "../operation-handler.ts"; 5 | import type { SseHelper } from "../sse-helper.ts"; 6 | import type { 7 | Choice, 8 | CreateCompletionOkResponse, 9 | CreateCompletionRequest, 10 | } from "../types/create-completion.ts"; 11 | import type { RouteHandlerMethod } from "fastify"; 12 | import shortUUID from "short-uuid"; 13 | 14 | export default class CreateCompletionHandler implements OperationHandler { 15 | operationId = "createCompletion"; 16 | readonly #llmAdapter: LlmAdapter; 17 | readonly #sseHelper: SseHelper; 18 | 19 | constructor({ llmAdapter, sseHelper }: Cradle) { 20 | this.#llmAdapter = llmAdapter; 21 | this.#sseHelper = sseHelper; 22 | } 23 | 24 | handle: RouteHandlerMethod = async (request, reply) => { 25 | const body = request.body as CreateCompletionRequest; 26 | 27 | const { 28 | model, 29 | best_of, 30 | stream, 31 | prompt, 32 | echo, 33 | frequency_penalty, 34 | logit_bias, 35 | max_tokens, 36 | logprobs, 37 | presence_penalty, 38 | stop, 39 | suffix, 40 | temperature, 41 | top_p, 42 | n, 43 | } = body; 44 | 45 | if (best_of != null && stream) { 46 | void reply.status(400); 47 | throw new Error("stream cannot be set if best_of is set"); 48 | } 49 | 50 | const promptValidationError = () => { 51 | void reply.status(400); 52 | throw new Error("prompt must be a string or an array of strings"); 53 | }; 54 | 55 | if (typeof prompt !== "string") { 56 | if (!Array.isArray(prompt)) { 57 | promptValidationError(); 58 | } 59 | 60 | if (prompt!.some((x) => typeof x !== "string")) { 61 | promptValidationError(); 62 | } 63 | } 64 | 65 | const abortController = new AbortController(); 66 | request.raw.once("close", () => { 67 | if (request.raw.destroyed) { 68 | abortController.abort(); 69 | } 70 | }); 71 | 72 | const id = `cmpl-${shortUUID.generate()}`; 73 | const choiceTokens: string[][] = []; 74 | const choices: Choice[] = []; 75 | 76 | await this.#llmAdapter.createCompletion( 77 | { 78 | model, 79 | bestOf: best_of, 80 | echo, 81 | frequencyPenalty: frequency_penalty, 82 | logitBias: logit_bias, 83 | maxTokens: max_tokens, 84 | logprobs, 85 | n, 86 | presencePenalty: presence_penalty, 87 | prompt: (Array.isArray(prompt) 88 | ? prompt 89 | : [prompt].filter(Boolean)) as string[], 90 | stop: (Array.isArray(stop) ? stop : [stop].filter(Boolean)) as string[], 91 | suffix, 92 | temperature, 93 | topP: top_p, 94 | }, 95 | abortController.signal, 96 | ({ 97 | finishReason, 98 | index, 99 | text, 100 | // TODO: Figure out how to handle logprobs 101 | logprobs, 102 | }) => { 103 | if (stream) { 104 | this.#sseHelper.sse( 105 | reply, 106 | this.#createResponseObject(id, model, [ 107 | { finish_reason: finishReason, index, text }, 108 | ]), 109 | ); 110 | 111 | return; 112 | } 113 | 114 | choices[index] ??= { 115 | index, 116 | }; 117 | 118 | choices[index].finish_reason = finishReason; 119 | choiceTokens[index] ??= []; 120 | 121 | choiceTokens[index].push(text); 122 | }, 123 | ); 124 | 125 | if (stream) { 126 | this.#sseHelper.sse(reply, "[DONE]"); 127 | reply.raw.end(); 128 | return; 129 | } 130 | 131 | for (const [index, choice] of choices.entries()) { 132 | if (!choice) { 133 | continue; 134 | } 135 | 136 | choice.text = choiceTokens[index].join(""); 137 | choice.finish_reason ??= "stop"; 138 | } 139 | 140 | const response: CreateCompletionOkResponse = { 141 | ...this.#createResponseObject(id, model, choices), 142 | usage: { 143 | completion_tokens: 0, 144 | prompt_tokens: 0, 145 | total_tokens: 0, 146 | }, 147 | }; 148 | 149 | return response; 150 | }; 151 | 152 | #createResponseObject( 153 | id: string, 154 | model: string, 155 | choices: Choice[], 156 | ): CreateCompletionOkResponse { 157 | return { 158 | id, 159 | choices: choices.filter(Boolean), 160 | created: Math.floor(Date.now() / 1000), 161 | model, 162 | object: "text_completion", 163 | }; 164 | } 165 | } 166 | -------------------------------------------------------------------------------- /src/handlers/create-embedding.handler.test.ts: -------------------------------------------------------------------------------- 1 | import type { Cradle } from "../container.ts"; 2 | import type { LlmAdapter } from "../llm-adapter.ts"; 3 | import { createTestContainer } from "../test-support/test-container.ts"; 4 | import type { CreateEmbeddingRequest } from "../types/create-embedding.ts"; 5 | import type { AwilixContainer } from "awilix"; 6 | import { type MockObject, expect, mockObject } from "earl"; 7 | import { test } from "node:test"; 8 | 9 | const testModelId = "test-model-id"; 10 | let testContainer: AwilixContainer | undefined; 11 | let llmAdapter: MockObject | undefined; 12 | 13 | await test("createEmbeddingHandler", async (t) => { 14 | t.beforeEach(async () => { 15 | llmAdapter = mockObject({ 16 | async createEmbedding({ model, input }) { 17 | return [0]; 18 | }, 19 | }); 20 | }); 21 | 22 | t.afterEach(async () => { 23 | await testContainer!.dispose(); 24 | testContainer = undefined; 25 | }); 26 | 27 | await t.test("single string input", async () => { 28 | testContainer = await createTestContainer(llmAdapter!); 29 | const fastifyServer = testContainer.resolve("fastifyServer"); 30 | const testModelInput = "test-model-input"; 31 | 32 | const payload = JSON.stringify({ 33 | model: testModelId, 34 | input: testModelInput, 35 | } as CreateEmbeddingRequest); 36 | 37 | const response = await fastifyServer.inject({ 38 | url: "/v1/embeddings", 39 | method: "POST", 40 | headers: { 41 | "Content-Type": "application/json", 42 | }, 43 | payload, 44 | }); 45 | 46 | expect(response.statusCode).toEqual(200); 47 | expect(llmAdapter!.createEmbedding).toHaveBeenCalledWith({ 48 | input: testModelInput, 49 | model: testModelId, 50 | }); 51 | }); 52 | 53 | await t.test("multiple strings input", async () => { 54 | testContainer = await createTestContainer(llmAdapter!); 55 | const fastifyServer = testContainer.resolve("fastifyServer"); 56 | const testModelInput = ["input1", "input2"]; 57 | 58 | const payload = JSON.stringify({ 59 | model: testModelId, 60 | input: testModelInput, 61 | } as CreateEmbeddingRequest); 62 | 63 | const response = await fastifyServer.inject({ 64 | url: "/v1/embeddings", 65 | method: "POST", 66 | headers: { 67 | "Content-Type": "application/json", 68 | }, 69 | payload, 70 | }); 71 | 72 | expect(response.statusCode).toEqual(200); 73 | expect(llmAdapter!.createEmbedding).toHaveBeenNthCalledWith(1, { 74 | input: testModelInput[0], 75 | model: testModelId, 76 | }); 77 | expect(llmAdapter!.createEmbedding).toHaveBeenNthCalledWith(2, { 78 | input: testModelInput[1], 79 | model: testModelId, 80 | }); 81 | }); 82 | }); 83 | -------------------------------------------------------------------------------- /src/handlers/create-embedding.handler.ts: -------------------------------------------------------------------------------- 1 | /* eslint-disable @typescript-eslint/naming-convention */ 2 | import type { Cradle } from "../container.ts"; 3 | import type { LlmAdapter } from "../llm-adapter.ts"; 4 | import type { OperationHandler } from "../operation-handler.ts"; 5 | import type { 6 | CreateEmbeddingOkResponse, 7 | CreateEmbeddingRequest, 8 | Datum, 9 | } from "../types/create-embedding.ts"; 10 | import type { RouteHandlerMethod } from "fastify"; 11 | 12 | export default class CreateEmbeddingHandler implements OperationHandler { 13 | operationId = "createEmbedding"; 14 | readonly #llmAdapter: LlmAdapter; 15 | 16 | constructor({ llmAdapter }: Cradle) { 17 | this.#llmAdapter = llmAdapter; 18 | } 19 | 20 | handle: RouteHandlerMethod = async (request, reply) => { 21 | const body = request.body as CreateEmbeddingRequest; 22 | const { input, model } = body; 23 | 24 | if (Array.isArray(input) && typeof input[0] !== "string") { 25 | // FIXME: figure out how to handle numeric inputs 26 | throw new TypeError("Only string inputs are supported"); 27 | } 28 | 29 | const inputStrings = Array.isArray(input) ? input : [input]; 30 | 31 | const embeddings = await Promise.all( 32 | inputStrings.map(async (input: string) => 33 | this.#llmAdapter.createEmbedding({ 34 | input, 35 | model, 36 | }), 37 | ), 38 | ); 39 | 40 | const data: Datum[] = embeddings.map((embedding, index) => ({ 41 | index, 42 | embedding, 43 | object: "embedding", 44 | })); 45 | 46 | const response: CreateEmbeddingOkResponse = { 47 | data, 48 | object: "list", 49 | model, 50 | usage: { 51 | prompt_tokens: 0, 52 | total_tokens: 0, 53 | }, 54 | }; 55 | 56 | return response; 57 | }; 58 | } 59 | -------------------------------------------------------------------------------- /src/handlers/list-models.handler.test.ts: -------------------------------------------------------------------------------- 1 | import type { Cradle } from "../container.ts"; 2 | import type { LlmAdapter } from "../llm-adapter.ts"; 3 | import { createTestContainer } from "../test-support/test-container.ts"; 4 | import type { AwilixContainer } from "awilix"; 5 | import { type MockObject, expect, mockObject } from "earl"; 6 | import { test } from "node:test"; 7 | 8 | const testModelId = "test-model-id"; 9 | let testContainer: AwilixContainer | undefined; 10 | let llmAdapter: MockObject | undefined; 11 | 12 | test.beforeEach(() => { 13 | llmAdapter = mockObject({ 14 | async listModels() { 15 | return [{ created: 0, id: testModelId, ownedBy: "ownedBy" }]; 16 | }, 17 | }); 18 | }); 19 | 20 | test.afterEach(async () => { 21 | await testContainer!.dispose(); 22 | testContainer = undefined; 23 | }); 24 | 25 | await test("listModelsHandler", async () => { 26 | testContainer = await createTestContainer(llmAdapter!); 27 | const fastifyServer = testContainer.resolve("fastifyServer"); 28 | const response = await fastifyServer.inject({ 29 | url: "/v1/models", 30 | method: "GET", 31 | headers: {}, 32 | }); 33 | 34 | expect(response.statusCode).toEqual(200); 35 | expect(llmAdapter!.listModels).toHaveBeenCalled(); 36 | }); 37 | -------------------------------------------------------------------------------- /src/handlers/list-models.handler.ts: -------------------------------------------------------------------------------- 1 | /* eslint-disable @typescript-eslint/naming-convention */ 2 | import type { Cradle } from "../container.ts"; 3 | import type { LlmAdapter } from "../llm-adapter.ts"; 4 | import type { OperationHandler } from "../operation-handler.ts"; 5 | import type { 6 | ListModelsOkResponse, 7 | ModelObject, 8 | } from "../types/list-models.ts"; 9 | import type { RouteHandlerMethod } from "fastify"; 10 | 11 | export default class ListModelsHandler implements OperationHandler { 12 | operationId = "listModels"; 13 | readonly #llmAdapter: LlmAdapter; 14 | 15 | constructor({ llmAdapter }: Cradle) { 16 | this.#llmAdapter = llmAdapter; 17 | } 18 | 19 | handle: RouteHandlerMethod = async (request, reply) => { 20 | const adapterModels = await this.#llmAdapter.listModels(); 21 | 22 | const data: ModelObject[] = adapterModels.map((model) => ({ 23 | id: model.id, 24 | created: model.created, 25 | owned_by: model.ownedBy, 26 | object: "model", 27 | 28 | // Not part of the spec 29 | permission: [], 30 | })); 31 | 32 | const response: ListModelsOkResponse = { 33 | data, 34 | object: "list", 35 | }; 36 | 37 | // To preserve model.permissions 38 | void reply.header("Content-Type", "application/json; charset=utf-8"); 39 | void reply.serializer(JSON.stringify); 40 | 41 | return response; 42 | }; 43 | } 44 | -------------------------------------------------------------------------------- /src/handlers/retrieve-model.handler.test.ts: -------------------------------------------------------------------------------- 1 | import type { Cradle } from "../container.ts"; 2 | import type { LlmAdapter } from "../llm-adapter.ts"; 3 | import { createTestContainer } from "../test-support/test-container.ts"; 4 | import type { AwilixContainer } from "awilix"; 5 | import { type MockObject, expect, mockObject } from "earl"; 6 | import { test } from "node:test"; 7 | 8 | const testModelId = "test-model-id"; 9 | let testContainer: AwilixContainer | undefined; 10 | let llmAdapter: MockObject | undefined; 11 | 12 | await test("retrieveModelHandler", async (t) => { 13 | t.beforeEach(() => { 14 | llmAdapter = mockObject({ 15 | async listModels() { 16 | return [{ created: 0, id: testModelId, ownedBy: "ownedBy" }]; 17 | }, 18 | }); 19 | }); 20 | 21 | t.afterEach(async () => { 22 | await testContainer!.dispose(); 23 | testContainer = undefined; 24 | }); 25 | 26 | await t.test("should return a model when there is one", async (t) => { 27 | testContainer = await createTestContainer(llmAdapter!); 28 | const fastifyServer = testContainer.resolve("fastifyServer"); 29 | 30 | const response = await fastifyServer.inject({ 31 | url: `v1/models/${testModelId}`, 32 | method: "GET", 33 | headers: {}, 34 | }); 35 | 36 | expect(response.statusCode).toEqual(200); 37 | expect(llmAdapter!.listModels).toHaveBeenCalled(); 38 | }); 39 | 40 | await t.test("should return 404 when the model doesn't exist", async (t) => { 41 | testContainer = await createTestContainer(llmAdapter!); 42 | const fastifyServer = testContainer.resolve("fastifyServer"); 43 | 44 | const response = await fastifyServer.inject({ 45 | url: `v1/models/non-existent-model`, 46 | method: "GET", 47 | headers: {}, 48 | }); 49 | 50 | expect(response.statusCode).toEqual(404); 51 | expect(llmAdapter!.listModels).toHaveBeenCalled(); 52 | }); 53 | }); 54 | -------------------------------------------------------------------------------- /src/handlers/retrieve-model.handler.ts: -------------------------------------------------------------------------------- 1 | /* eslint-disable @typescript-eslint/naming-convention */ 2 | import type { Cradle } from "../container.ts"; 3 | import type { LlmAdapter } from "../llm-adapter.ts"; 4 | import type { OperationHandler } from "../operation-handler.ts"; 5 | import type { RetrieveModelOkResponseObject } from "../types/retrieve-model.ts"; 6 | import type { RouteHandlerMethod } from "fastify"; 7 | 8 | type RequestParameters = { 9 | model: string; 10 | }; 11 | 12 | export default class RetrieveModelHandler implements OperationHandler { 13 | operationId = "retrieveModel"; 14 | readonly #llmAdapter: LlmAdapter; 15 | 16 | constructor({ llmAdapter }: Cradle) { 17 | this.#llmAdapter = llmAdapter; 18 | } 19 | 20 | handle: RouteHandlerMethod = async (request, reply) => { 21 | const parameters: RequestParameters = request.params as RequestParameters; 22 | 23 | const { model } = parameters; 24 | const adapterModels = await this.#llmAdapter.listModels(); 25 | const adapterModel = adapterModels.find( 26 | (adapterModel) => adapterModel.id === model, 27 | ); 28 | 29 | if (!adapterModel) { 30 | void reply.status(404); 31 | return; 32 | } 33 | 34 | const response: RetrieveModelOkResponseObject = { 35 | created: 0, 36 | id: adapterModel.id, 37 | object: "model", 38 | owned_by: adapterModel.ownedBy, 39 | 40 | // Not part of the spec 41 | permission: [], 42 | }; 43 | 44 | // To preserve model.permissions 45 | void reply.header("Content-Type", "application/json; charset=utf-8"); 46 | void reply.serializer(JSON.stringify); 47 | 48 | return response; 49 | }; 50 | } 51 | -------------------------------------------------------------------------------- /src/llama-node-core-llm-adapter.ts: -------------------------------------------------------------------------------- 1 | /* eslint-disable no-await-in-loop */ 2 | import { 3 | type FinishReason, 4 | LlmAdapter, 5 | type LlmAdapterCreateChatCompletionRequest, 6 | type LlmAdapterCreateChatCompletionResponse, 7 | type LlmAdapterCreateCompletionRequest, 8 | type LlmAdapterCreateCompletionResponse, 9 | type LlmAdapterCreateEmbeddingRequest, 10 | type LlmAdapterCreateEmbeddingResponse, 11 | type LlmAdapterModel, 12 | Role, 13 | } from "./llm-adapter.ts"; 14 | import { type Generate, ModelType } from "@llama-node/core"; 15 | import { type LLMError, LLM as LlamaNode } from "llama-node"; 16 | import { LLMRS, type LoadConfig } from "llama-node/dist/llm/llm-rs.js"; 17 | import { cpus } from "node:os"; 18 | import path from "node:path"; 19 | 20 | type LlamaNodeCoreLlmAdapterConfig = LoadConfig & Generate; 21 | 22 | export default class LlamaNodeCoreLlmAdapter extends LlmAdapter { 23 | readonly #llmConfig: LlamaNodeCoreLlmAdapterConfig; 24 | #loaded = false; 25 | readonly #llamaNode = new LlamaNode(LLMRS); 26 | 27 | constructor(llmConfig: LlamaNodeCoreLlmAdapterConfig) { 28 | super(); 29 | 30 | this.#llmConfig = { 31 | ...LlamaNodeCoreLlmAdapter.defaultConfig, 32 | ...llmConfig, 33 | }; 34 | } 35 | 36 | async createChatCompletion( 37 | createChatCompletionRequest: LlmAdapterCreateChatCompletionRequest, 38 | abortSignal: AbortSignal, 39 | onData: (data: LlmAdapterCreateChatCompletionResponse) => void, 40 | ): Promise { 41 | await this.#load(); 42 | 43 | const prompt = createChatCompletionRequest.messages 44 | .map(({ content, role }) => { 45 | if (role === Role.System) return `${content}\n`; 46 | return `${role ?? Role.User}: ${content}`; 47 | }) 48 | .join("\n") 49 | .concat(`\n${Role.Assistant}: `); 50 | 51 | const bufferedTokens: string[] = []; 52 | const flushBuffer = (index: number) => { 53 | while (bufferedTokens.length > 0) { 54 | onData({ 55 | index, 56 | delta: { content: bufferedTokens.shift() }, 57 | }); 58 | } 59 | }; 60 | 61 | for ( 62 | let index = 0; 63 | index < (createChatCompletionRequest.n ?? 1); 64 | index += 1 65 | ) { 66 | let isFirstToken = true; 67 | 68 | await this.#invokeLlamaNode( 69 | { 70 | ...this.#openAiCompletionRequestToLlamaNodeInvocation( 71 | createChatCompletionRequest, 72 | ), 73 | prompt, 74 | }, 75 | abortSignal, 76 | ({ token, finishReason, stop }) => { 77 | if (isFirstToken) { 78 | onData({ 79 | index, 80 | delta: { role: Role.Assistant }, 81 | finishReason, 82 | }); 83 | 84 | isFirstToken = false; 85 | } 86 | 87 | if (["\n", Role.User, ":"].includes(token.trim())) { 88 | bufferedTokens.push(token); 89 | if (bufferedTokens.join("").trim() === `${Role.User}:`) { 90 | stop(); 91 | bufferedTokens.length = 0; 92 | } 93 | } else { 94 | flushBuffer(index); 95 | onData({ 96 | index, 97 | delta: { content: token }, 98 | finishReason, 99 | }); 100 | } 101 | }, 102 | () => { 103 | flushBuffer(index); 104 | onData({ 105 | index, 106 | delta: {}, 107 | finishReason: "stop", 108 | }); 109 | }, 110 | ); 111 | } 112 | } 113 | 114 | async listModels(): Promise { 115 | return [ 116 | { 117 | id: path.basename(this.#llmConfig.modelPath), 118 | created: 0, 119 | ownedBy: "unknown", 120 | }, 121 | ]; 122 | } 123 | 124 | async createEmbedding({ 125 | model, 126 | input, 127 | }: LlmAdapterCreateEmbeddingRequest): Promise { 128 | await this.#load(); 129 | 130 | return this.#llamaNode.getEmbedding({ 131 | ...this.#llmConfig, 132 | prompt: input, 133 | }); 134 | } 135 | 136 | async createCompletion( 137 | createCompletionRequest: LlmAdapterCreateCompletionRequest, 138 | abortSignal: AbortSignal, 139 | onData: (data: LlmAdapterCreateCompletionResponse) => void, 140 | ): Promise { 141 | await this.#load(); 142 | 143 | for ( 144 | let promptIndex = 0, index = 0; 145 | index < 146 | createCompletionRequest.prompt.length * (createCompletionRequest.n ?? 1); 147 | index += 1, 148 | promptIndex = (promptIndex + 1) % createCompletionRequest.prompt.length 149 | ) { 150 | const prompt = createCompletionRequest.prompt[promptIndex]; 151 | await this.#invokeLlamaNode( 152 | { 153 | ...this.#openAiCompletionRequestToLlamaNodeInvocation( 154 | createCompletionRequest, 155 | ), 156 | prompt, 157 | }, 158 | abortSignal, 159 | ({ token, finishReason }) => { 160 | onData({ 161 | index, 162 | text: token, 163 | finishReason, 164 | }); 165 | }, 166 | ); 167 | } 168 | } 169 | 170 | #openAiCompletionRequestToLlamaNodeInvocation( 171 | request: 172 | | LlmAdapterCreateCompletionRequest 173 | | LlmAdapterCreateChatCompletionRequest, 174 | ) { 175 | let temperature = request.temperature ?? this.#llmConfig.temperature; 176 | // Temp=0 leads to a crash 177 | if (request.temperature === 0) { 178 | temperature = 1e-5; 179 | } 180 | 181 | return { 182 | numPredict: request.maxTokens ?? this.#llmConfig.numPredict ?? undefined, 183 | temperature, 184 | topP: request.topP ?? this.#llmConfig.topP, 185 | } satisfies Partial; 186 | } 187 | 188 | static get defaultConfig() { 189 | return { 190 | // Load config 191 | enableLogging: false, 192 | modelType: ModelType.Mpt, 193 | numCtxTokens: 4096, 194 | useMmap: true, 195 | 196 | // Generate config 197 | numThreads: cpus().length, 198 | numPredict: 32_768, 199 | batchSize: 128, 200 | repeatLastN: 64, 201 | repeatPenalty: 1.1, 202 | temperature: 0, 203 | topK: 40, 204 | topP: 0.95, 205 | seed: 0, 206 | float16: false, 207 | feedPrompt: true, 208 | } satisfies Partial; 209 | } 210 | 211 | async #load() { 212 | if (this.#loaded) return; 213 | 214 | await this.#llamaNode.load({ 215 | ...LlamaNodeCoreLlmAdapter.defaultConfig, 216 | ...this.#llmConfig, 217 | }); 218 | 219 | this.#loaded = true; 220 | } 221 | 222 | async #invokeLlamaNode( 223 | generateConfig: Partial, 224 | callerAbortSignal: AbortSignal, 225 | onToken: ({ 226 | token, 227 | finishReason, 228 | stop, 229 | }: { 230 | token: string; 231 | finishReason: FinishReason; 232 | stop: () => void; 233 | }) => void, 234 | onComplete?: () => void, 235 | ) { 236 | let tokensGenerated = 0; 237 | const abortController = new AbortController(); 238 | 239 | const handleAbort = () => { 240 | callerAbortSignal.removeEventListener("abort", handleAbort); 241 | abortController.abort(); 242 | }; 243 | 244 | const stop = () => { 245 | abortController.abort(); 246 | }; 247 | 248 | callerAbortSignal.addEventListener("abort", handleAbort); 249 | 250 | return this.#llamaNode 251 | .createCompletion( 252 | { 253 | ...this.#llmConfig, 254 | ...generateConfig, 255 | }, 256 | ({ token, completed }) => { 257 | // "llama-node" always emits "\n\n\n" at the end of inference 258 | if (completed) { 259 | if (onComplete) onComplete(); 260 | return; 261 | } 262 | 263 | tokensGenerated += 1; 264 | 265 | let finishReason: FinishReason; 266 | if (tokensGenerated >= generateConfig.numPredict!) { 267 | finishReason = "length"; 268 | abortController.abort(); 269 | } 270 | 271 | onToken({ token, finishReason, stop }); 272 | }, 273 | abortController.signal, 274 | ) 275 | .catch((error: unknown) => { 276 | // Looks like LLMError is not exported as a Class 277 | if (Object.getPrototypeOf(error).constructor.name !== "LLMError") { 278 | throw error; 279 | } 280 | 281 | const llmError = error as LLMError; 282 | if (llmError.type !== ("Aborted" as LLMError["type"])) { 283 | throw llmError; 284 | } 285 | }) 286 | .finally(() => { 287 | callerAbortSignal.removeEventListener("abort", handleAbort); 288 | }); 289 | } 290 | } 291 | -------------------------------------------------------------------------------- /src/llm-adapter.ts: -------------------------------------------------------------------------------- 1 | export type FinishReason = undefined | "length" | "stop"; 2 | export enum Role { 3 | Assistant = "assistant", 4 | System = "system", 5 | User = "user", 6 | } 7 | 8 | export type LlmAdapterModel = { id: string; created: number; ownedBy: string }; 9 | 10 | export type LlmAdapterCreateEmbeddingRequest = { 11 | model: string; 12 | input: string; 13 | }; 14 | 15 | export type LlmAdapterCreateEmbeddingResponse = number[]; 16 | 17 | export type LlmAdapterCreateCompletionRequest = { 18 | bestOf?: number; 19 | echo?: boolean; 20 | frequencyPenalty?: number; 21 | logitBias?: Record; 22 | logprobs?: number; 23 | maxTokens?: number; 24 | model: string; 25 | n?: number; 26 | presencePenalty?: number; 27 | // TODO: Support other types 28 | prompt: string[]; 29 | stop?: string[]; 30 | suffix?: string; 31 | temperature?: number; 32 | topP?: number; 33 | }; 34 | 35 | export type LlmAdapterCreateChatCompletionRequest = { 36 | frequencyPenalty?: number; 37 | logitBias?: Record; 38 | maxTokens?: number; 39 | messages: Array<{ 40 | content: string; 41 | name?: string; 42 | role: Role; 43 | }>; 44 | model: string; 45 | n?: number; 46 | presencePenalty?: number; 47 | stop?: string[]; 48 | temperature?: number; 49 | topP?: number; 50 | }; 51 | 52 | export type LlmAdapterCreateCompletionResponse = { 53 | index: number; 54 | // TODO: Figure out the type 55 | logprobs?: unknown; 56 | text: string; 57 | finishReason: FinishReason; 58 | }; 59 | 60 | export type ChatCompletionDelta = { 61 | role?: Role; 62 | content?: string; 63 | }; 64 | 65 | export type LlmAdapterCreateChatCompletionResponse = { 66 | index: number; 67 | delta: ChatCompletionDelta; 68 | finishReason?: string; 69 | }; 70 | 71 | export abstract class LlmAdapter { 72 | static get defaultConfig(): Record { 73 | throw new Error("Not implemented"); 74 | } 75 | 76 | abstract listModels(): Promise; 77 | 78 | abstract createEmbedding({ 79 | model, 80 | input, 81 | }: LlmAdapterCreateEmbeddingRequest): Promise; 82 | 83 | abstract createCompletion( 84 | createCompletionRequest: LlmAdapterCreateCompletionRequest, 85 | abortSignal: AbortSignal, 86 | onData: (data: LlmAdapterCreateCompletionResponse) => void, 87 | ): Promise; 88 | 89 | abstract createChatCompletion( 90 | createChatCompletionRequest: LlmAdapterCreateChatCompletionRequest, 91 | abortSignal: AbortSignal, 92 | onData: (data: LlmAdapterCreateChatCompletionResponse) => void, 93 | ): Promise; 94 | } 95 | -------------------------------------------------------------------------------- /src/operation-handler.ts: -------------------------------------------------------------------------------- 1 | import type { RouteHandlerMethod } from "fastify"; 2 | 3 | export type OperationHandler = { 4 | handle: RouteHandlerMethod; 5 | get operationId(): string; 6 | }; 7 | -------------------------------------------------------------------------------- /src/sse-helper.ts: -------------------------------------------------------------------------------- 1 | /* eslint-disable @typescript-eslint/naming-convention */ 2 | import type { FastifyReply } from "fastify"; 3 | 4 | export class SseHelper { 5 | sse(reply: FastifyReply, data: unknown) { 6 | if (!reply.raw.headersSent) { 7 | const headers = { 8 | "Content-Type": "text/event-stream", 9 | Connection: "keep-alive", 10 | "Cache-Control": "no-cache", 11 | }; 12 | reply.raw.writeHead(200, headers); 13 | } 14 | 15 | const normalizedData = 16 | typeof data === "string" ? data : JSON.stringify(data); 17 | const payload = `data: ${normalizedData}\n\n`; 18 | reply.raw.write(payload); 19 | } 20 | } 21 | -------------------------------------------------------------------------------- /src/test-support/test-container.ts: -------------------------------------------------------------------------------- 1 | import { createContainer } from "../container.ts"; 2 | import type { LlmAdapter } from "../llm-adapter.ts"; 3 | import awilix from "awilix"; 4 | 5 | // TODO: Allow overriding sseHelper and add seperate tests for stream=true and stream=false 6 | export const createTestContainer = async (llmadapter: LlmAdapter) => { 7 | const container = createContainer([ 8 | { 9 | token: "llmConfig", 10 | resolver: () => awilix.asValue({}), 11 | }, 12 | { 13 | token: "llmAdapter", 14 | resolver: () => awilix.asValue(llmadapter), 15 | }, 16 | ]); 17 | 18 | return container; 19 | }; 20 | -------------------------------------------------------------------------------- /src/types/create-chat-completion.ts: -------------------------------------------------------------------------------- 1 | export type CreateChatCompletionRequest = { 2 | /** 3 | * Number between -2.0 and 2.0. Positive values penalize new tokens based on their existing 4 | * frequency in the text so far, decreasing the model's likelihood to repeat the same line 5 | * verbatim. 6 | * 7 | * [See more information about frequency and presence 8 | * penalties.](/docs/api-reference/parameter-details) 9 | */ 10 | frequency_penalty?: number; 11 | /** 12 | * Modify the likelihood of specified tokens appearing in the completion. 13 | * 14 | * Accepts a json object that maps tokens (specified by their token ID in the tokenizer) to 15 | * an associated bias value from -100 to 100. Mathematically, the bias is added to the 16 | * logits generated by the model prior to sampling. The exact effect will vary per model, 17 | * but values between -1 and 1 should decrease or increase likelihood of selection; values 18 | * like -100 or 100 should result in a ban or exclusive selection of the relevant token. 19 | */ 20 | logit_bias?: { [key: string]: any }; 21 | /** 22 | * The maximum number of tokens allowed for the generated answer. By default, the number of 23 | * tokens the model can return will be (4096 - prompt tokens). 24 | */ 25 | max_tokens?: number; 26 | /** 27 | * The messages to generate chat completions for, in the [chat 28 | * format](/docs/guides/chat/introduction). 29 | */ 30 | messages: MessageElement[]; 31 | /** 32 | * ID of the model to use. Currently, only `gpt-3.5-turbo` and `gpt-3.5-turbo-0301` are 33 | * supported. 34 | */ 35 | model: string; 36 | /** 37 | * How many chat completion choices to generate for each input message. 38 | */ 39 | n?: number; 40 | /** 41 | * Number between -2.0 and 2.0. Positive values penalize new tokens based on whether they 42 | * appear in the text so far, increasing the model's likelihood to talk about new topics. 43 | * 44 | * [See more information about frequency and presence 45 | * penalties.](/docs/api-reference/parameter-details) 46 | */ 47 | presence_penalty?: number; 48 | /** 49 | * Up to 4 sequences where the API will stop generating further tokens. 50 | */ 51 | stop?: string[] | string; 52 | /** 53 | * If set, partial message deltas will be sent, like in ChatGPT. Tokens will be sent as 54 | * data-only [server-sent 55 | * events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format) 56 | * as they become available, with the stream terminated by a `data: [DONE]` message. 57 | */ 58 | stream?: boolean; 59 | /** 60 | * What sampling temperature to use, between 0 and 2. Higher values like 0.8 will make the 61 | * output more random, while lower values like 0.2 will make it more focused and 62 | * deterministic. 63 | * 64 | * We generally recommend altering this or `top_p` but not both. 65 | */ 66 | temperature?: number; 67 | /** 68 | * An alternative to sampling with temperature, called nucleus sampling, where the model 69 | * considers the results of the tokens with top_p probability mass. So 0.1 means only the 70 | * tokens comprising the top 10% probability mass are considered. 71 | * 72 | * We generally recommend altering this or `temperature` but not both. 73 | */ 74 | top_p?: number; 75 | /** 76 | * A unique identifier representing your end-user, which can help OpenAI to monitor and 77 | * detect abuse. [Learn more](/docs/guides/safety-best-practices/end-user-ids). 78 | */ 79 | user?: string; 80 | [property: string]: any; 81 | }; 82 | 83 | export type MessageElement = { 84 | /** 85 | * The contents of the message 86 | */ 87 | content: string; 88 | /** 89 | * The name of the user in a multi-user chat 90 | */ 91 | name?: string; 92 | /** 93 | * The role of the author of this message. 94 | */ 95 | role: Role; 96 | [property: string]: any; 97 | }; 98 | 99 | /** 100 | * The role of the author of this message. 101 | */ 102 | export enum Role { 103 | Assistant = "assistant", 104 | System = "system", 105 | User = "user", 106 | } 107 | 108 | export type CreateChatCompletionOkResponse = { 109 | choices: Choice[]; 110 | created: number; 111 | id: string; 112 | model: string; 113 | object: string; 114 | usage?: Usage; 115 | [property: string]: any; 116 | }; 117 | 118 | export type Choice = { 119 | finish_reason?: string; 120 | index?: number; 121 | message?: ChoiceMessage; 122 | [property: string]: any; 123 | }; 124 | 125 | export type ChoiceMessage = { 126 | /** 127 | * The contents of the message 128 | */ 129 | content: string; 130 | /** 131 | * The role of the author of this message. 132 | */ 133 | role: Role; 134 | [property: string]: any; 135 | }; 136 | 137 | export type Usage = { 138 | completion_tokens: number; 139 | prompt_tokens: number; 140 | total_tokens: number; 141 | [property: string]: any; 142 | }; 143 | -------------------------------------------------------------------------------- /src/types/create-completion.ts: -------------------------------------------------------------------------------- 1 | export type CreateCompletionRequest = { 2 | /** 3 | * Generates `best_of` completions server-side and returns the "best" (the one with the 4 | * highest log probability per token). Results cannot be streamed. 5 | * 6 | * When used with `n`, `best_of` controls the number of candidate completions and `n` 7 | * specifies how many to return – `best_of` must be greater than `n`. 8 | * 9 | * **Note:** Because this parameter generates many completions, it can quickly consume your 10 | * token quota. Use carefully and ensure that you have reasonable settings for `max_tokens` 11 | * and `stop`. 12 | */ 13 | best_of?: number; 14 | /** 15 | * Echo back the prompt in addition to the completion 16 | */ 17 | echo?: boolean; 18 | /** 19 | * Number between -2.0 and 2.0. Positive values penalize new tokens based on their existing 20 | * frequency in the text so far, decreasing the model's likelihood to repeat the same line 21 | * verbatim. 22 | * 23 | * [See more information about frequency and presence 24 | * penalties.](/docs/api-reference/parameter-details) 25 | */ 26 | frequency_penalty?: number; 27 | /** 28 | * Modify the likelihood of specified tokens appearing in the completion. 29 | * 30 | * Accepts a json object that maps tokens (specified by their token ID in the GPT tokenizer) 31 | * to an associated bias value from -100 to 100. You can use this [tokenizer 32 | * tool](/tokenizer?view=bpe) (which works for both GPT-2 and GPT-3) to convert text to 33 | * token IDs. Mathematically, the bias is added to the logits generated by the model prior 34 | * to sampling. The exact effect will vary per model, but values between -1 and 1 should 35 | * decrease or increase likelihood of selection; values like -100 or 100 should result in a 36 | * ban or exclusive selection of the relevant token. 37 | * 38 | * As an example, you can pass `{"50256": -100}` to prevent the <|endoftext|> token from 39 | * being generated. 40 | */ 41 | logit_bias?: { [key: string]: any }; 42 | /** 43 | * Include the log probabilities on the `logprobs` most likely tokens, as well the chosen 44 | * tokens. For example, if `logprobs` is 5, the API will return a list of the 5 most likely 45 | * tokens. The API will always return the `logprob` of the sampled token, so there may be up 46 | * to `logprobs+1` elements in the response. 47 | * 48 | * The maximum value for `logprobs` is 5. If you need more than this, please contact us 49 | * through our [Help center](https://help.openai.com) and describe your use case. 50 | */ 51 | logprobs?: number; 52 | /** 53 | * The maximum number of [tokens](/tokenizer) to generate in the completion. 54 | * 55 | * The token count of your prompt plus `max_tokens` cannot exceed the model's context 56 | * length. Most models have a context length of 2048 tokens (except for the newest models, 57 | * which support 4096). 58 | */ 59 | max_tokens?: number; 60 | /** 61 | * ID of the model to use. You can use the [List models](/docs/api-reference/models/list) 62 | * API to see all of your available models, or see our [Model 63 | * overview](/docs/models/overview) for descriptions of them. 64 | */ 65 | model: string; 66 | /** 67 | * How many completions to generate for each prompt. 68 | * 69 | * **Note:** Because this parameter generates many completions, it can quickly consume your 70 | * token quota. Use carefully and ensure that you have reasonable settings for `max_tokens` 71 | * and `stop`. 72 | */ 73 | n?: number; 74 | /** 75 | * Number between -2.0 and 2.0. Positive values penalize new tokens based on whether they 76 | * appear in the text so far, increasing the model's likelihood to talk about new topics. 77 | * 78 | * [See more information about frequency and presence 79 | * penalties.](/docs/api-reference/parameter-details) 80 | */ 81 | presence_penalty?: number; 82 | /** 83 | * The prompt(s) to generate completions for, encoded as a string, array of strings, array 84 | * of tokens, or array of token arrays. 85 | * 86 | * Note that <|endoftext|> is the document separator that the model sees during training, so 87 | * if a prompt is not specified the model will generate as if from the beginning of a new 88 | * document. 89 | */ 90 | prompt?: Array | string; 91 | /** 92 | * Up to 4 sequences where the API will stop generating further tokens. The returned text 93 | * will not contain the stop sequence. 94 | */ 95 | stop?: string[] | string; 96 | /** 97 | * Whether to stream back partial progress. If set, tokens will be sent as data-only 98 | * [server-sent 99 | * events](https://developer.mozilla.org/en-US/docs/Web/API/Server-sent_events/Using_server-sent_events#Event_stream_format) 100 | * as they become available, with the stream terminated by a `data: [DONE]` message. 101 | */ 102 | stream?: boolean; 103 | /** 104 | * The suffix that comes after a completion of inserted text. 105 | */ 106 | suffix?: string; 107 | /** 108 | * What sampling temperature to use, between 0 and 2. Higher values like 0.8 will make the 109 | * output more random, while lower values like 0.2 will make it more focused and 110 | * deterministic. 111 | * 112 | * We generally recommend altering this or `top_p` but not both. 113 | */ 114 | temperature?: number; 115 | /** 116 | * An alternative to sampling with temperature, called nucleus sampling, where the model 117 | * considers the results of the tokens with top_p probability mass. So 0.1 means only the 118 | * tokens comprising the top 10% probability mass are considered. 119 | * 120 | * We generally recommend altering this or `temperature` but not both. 121 | */ 122 | top_p?: number; 123 | /** 124 | * A unique identifier representing your end-user, which can help OpenAI to monitor and 125 | * detect abuse. [Learn more](/docs/guides/safety-best-practices/end-user-ids). 126 | */ 127 | user?: string; 128 | [property: string]: any; 129 | }; 130 | 131 | export type CreateCompletionOkResponse = { 132 | choices: Choice[]; 133 | created: number; 134 | id: string; 135 | model: string; 136 | object: string; 137 | usage?: Usage; 138 | [property: string]: any; 139 | }; 140 | 141 | export type Choice = { 142 | finish_reason?: string; 143 | index?: number; 144 | logprobs?: Logprobs; 145 | text?: string; 146 | [property: string]: any; 147 | }; 148 | 149 | export type Logprobs = { 150 | text_offset?: number[]; 151 | token_logprobs?: number[]; 152 | tokens?: string[]; 153 | top_logprobs?: { [key: string]: any }[]; 154 | [property: string]: any; 155 | }; 156 | 157 | export type Usage = { 158 | completion_tokens: number; 159 | prompt_tokens: number; 160 | total_tokens: number; 161 | [property: string]: any; 162 | }; 163 | -------------------------------------------------------------------------------- /src/types/create-embedding.ts: -------------------------------------------------------------------------------- 1 | export type CreateEmbeddingRequest = { 2 | /** 3 | * Input text to get embeddings for, encoded as a string or array of tokens. To get 4 | * embeddings for multiple inputs in a single request, pass an array of strings or array of 5 | * token arrays. Each input must not exceed 8192 tokens in length. 6 | */ 7 | input: Array | string; 8 | /** 9 | * ID of the model to use. You can use the [List models](/docs/api-reference/models/list) 10 | * API to see all of your available models, or see our [Model 11 | * overview](/docs/models/overview) for descriptions of them. 12 | */ 13 | model: string; 14 | /** 15 | * A unique identifier representing your end-user, which can help OpenAI to monitor and 16 | * detect abuse. [Learn more](/docs/guides/safety-best-practices/end-user-ids). 17 | */ 18 | user?: string; 19 | }; 20 | 21 | export type CreateEmbeddingOkResponse = { 22 | data: Datum[]; 23 | model: string; 24 | object: string; 25 | usage: Usage; 26 | [property: string]: any; 27 | }; 28 | 29 | export type Datum = { 30 | embedding: number[]; 31 | index: number; 32 | object: string; 33 | [property: string]: any; 34 | }; 35 | 36 | export type Usage = { 37 | prompt_tokens: number; 38 | total_tokens: number; 39 | [property: string]: any; 40 | }; 41 | -------------------------------------------------------------------------------- /src/types/list-models.ts: -------------------------------------------------------------------------------- 1 | export type ListModelsOkResponse = { 2 | data: Array; 3 | object: string; 4 | [property: string]: any; 5 | }; 6 | 7 | export type ModelObject = { 8 | created: number; 9 | id: string; 10 | object: string; 11 | owned_by: string; 12 | [property: string]: any; 13 | }; 14 | -------------------------------------------------------------------------------- /src/types/retrieve-model.ts: -------------------------------------------------------------------------------- 1 | export type RetrieveModelOkResponseObject = { 2 | created: number; 3 | id: string; 4 | object: string; 5 | owned_by: string; 6 | [property: string]: any; 7 | }; 8 | -------------------------------------------------------------------------------- /tsconfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "compilerOptions": { 3 | "target": "esnext", 4 | "module": "nodenext", 5 | "moduleResolution": "nodenext", 6 | "allowSyntheticDefaultImports": true, 7 | "allowImportingTsExtensions": true, 8 | "strictNullChecks": true, 9 | "noEmit": true, 10 | "skipLibCheck": true 11 | }, 12 | "exclude": ["examples/**/*"] 13 | } 14 | --------------------------------------------------------------------------------