├── .changeset └── config.json ├── .eslintrc ├── .github ├── ISSUE_TEMPLATE │ ├── bug.yaml │ ├── config.yml │ └── question.yaml ├── banner_dark.png ├── banner_light.png └── workflows │ ├── build.yml │ ├── release.yml │ └── test.yml ├── .gitignore ├── .prettierignore ├── .prettierrc ├── CODE_OF_CONDUCT.md ├── CONTRIBUTING.md ├── LICENSE ├── LICENSES ├── Apache-2.0.txt ├── CC-BY-NC-SA-4.0.txt └── LicenseRef-LiveKitModelLicense.txt ├── NOTICE ├── README.md ├── REUSE.toml ├── agents ├── CHANGELOG.md ├── README.md ├── api-extractor.json ├── package.json ├── src │ ├── audio.ts │ ├── cli.ts │ ├── constants.ts │ ├── generator.ts │ ├── http_server.ts │ ├── index.ts │ ├── inference_runner.ts │ ├── ipc │ │ ├── index.ts │ │ ├── inference_executor.ts │ │ ├── inference_proc_executor.ts │ │ ├── inference_proc_lazy_main.ts │ │ ├── job_executor.ts │ │ ├── job_proc_executor.ts │ │ ├── job_proc_lazy_main.ts │ │ ├── message.ts │ │ ├── proc_pool.ts │ │ └── supervised_proc.ts │ ├── job.ts │ ├── llm │ │ ├── chat_context.ts │ │ ├── function_context.test.ts │ │ ├── function_context.ts │ │ ├── index.ts │ │ └── llm.ts │ ├── log.ts │ ├── metrics │ │ ├── base.ts │ │ ├── index.ts │ │ ├── usage_collector.ts │ │ └── utils.ts │ ├── multimodal │ │ ├── agent_playout.ts │ │ ├── index.ts │ │ └── multimodal_agent.ts │ ├── pipeline │ │ ├── agent_output.ts │ │ ├── agent_playout.ts │ │ ├── human_input.ts │ │ ├── index.ts │ │ ├── pipeline_agent.ts │ │ └── speech_handle.ts │ ├── plugin.ts │ ├── stt │ │ ├── index.ts │ │ ├── stream_adapter.ts │ │ └── stt.ts │ ├── tokenize │ │ ├── basic │ │ │ ├── basic.ts │ │ │ ├── hyphenator.ts │ │ │ ├── index.ts │ │ │ ├── paragraph.ts │ │ │ ├── sentence.ts │ │ │ └── word.ts │ │ ├── index.ts │ │ ├── token_stream.ts │ │ ├── tokenizer.test.ts │ │ └── tokenizer.ts │ ├── transcription.ts │ ├── tts │ │ ├── index.ts │ │ ├── stream_adapter.ts │ │ └── tts.ts │ ├── utils.ts │ ├── vad.ts │ ├── version.ts │ └── worker.ts ├── tsconfig.json └── tsup.config.ts ├── api-extractor-shared.json ├── examples ├── CHANGELOG.md ├── package.json ├── src │ ├── multimodal_agent.ts │ ├── outbound.ts │ ├── pipeline_voice_agent.ts │ ├── stt.ts │ └── tts.ts └── tsconfig.json ├── package.json ├── plugins ├── cartesia │ ├── CHANGELOG.md │ ├── README.md │ ├── api-extractor.json │ ├── package.json │ ├── src │ │ ├── index.ts │ │ ├── models.ts │ │ ├── tts.test.ts │ │ └── tts.ts │ ├── tsconfig.json │ └── tsup.config.ts ├── deepgram │ ├── CHANGELOG.md │ ├── README.md │ ├── api-extractor.json │ ├── package.json │ ├── src │ │ ├── index.ts │ │ ├── models.ts │ │ ├── stt.test.ts │ │ └── stt.ts │ ├── tsconfig.json │ └── tsup.config.ts ├── elevenlabs │ ├── CHANGELOG.md │ ├── README.md │ ├── api-extractor.json │ ├── package.json │ ├── src │ │ ├── index.ts │ │ ├── models.ts │ │ ├── tts.test.ts │ │ └── tts.ts │ ├── tsconfig.json │ └── tsup.config.ts ├── livekit │ ├── .gitattributes │ ├── CHANGELOG.md │ ├── README.md │ ├── api-extractor.json │ ├── package.json │ ├── src │ │ ├── index.ts │ │ ├── onnxruntime.d.ts │ │ ├── turn_detector.onnx │ │ └── turn_detector.ts │ ├── tsconfig.json │ └── tsup.config.ts ├── neuphonic │ ├── CHANGELOG.md │ ├── README.md │ ├── api-extractor.json │ ├── package.json │ ├── src │ │ ├── index.ts │ │ ├── models.ts │ │ ├── tts.test.ts │ │ └── tts.ts │ ├── tsconfig.json │ └── tsup.config.ts ├── openai │ ├── CHANGELOG.md │ ├── README.md │ ├── api-extractor.json │ ├── package.json │ ├── src │ │ ├── index.ts │ │ ├── llm.test.ts │ │ ├── llm.ts │ │ ├── models.ts │ │ ├── realtime │ │ │ ├── api_proto.ts │ │ │ ├── index.ts │ │ │ └── realtime_model.ts │ │ ├── stt.test.ts │ │ ├── stt.ts │ │ ├── tts.test.ts │ │ └── tts.ts │ ├── tsconfig.json │ └── tsup.config.ts ├── resemble │ ├── CHANGELOG.md │ ├── README.md │ ├── api-extractor.json │ ├── package.json │ ├── src │ │ ├── index.ts │ │ ├── models.ts │ │ ├── tts.test.ts │ │ └── tts.ts │ ├── tsconfig.json │ └── tsup.config.ts ├── silero │ ├── .gitattributes │ ├── CHANGELOG.md │ ├── README.md │ ├── api-extractor.json │ ├── package.json │ ├── src │ │ ├── index.ts │ │ ├── onnx_model.ts │ │ ├── onnxruntime.d.ts │ │ ├── silero_vad.onnx │ │ └── vad.ts │ ├── tsconfig.json │ └── tsup.config.ts └── test │ ├── .gitattributes │ ├── README.md │ ├── package.json │ ├── src │ ├── index.ts │ ├── llm.ts │ ├── long.wav │ ├── stt.ts │ └── tts.ts │ ├── tsconfig.json │ └── tsup.config.ts ├── pnpm-lock.yaml ├── pnpm-workspace.yaml ├── tsconfig.json ├── tsup.config.ts ├── turbo.json └── vitest.workspace.ts /.changeset/config.json: -------------------------------------------------------------------------------- 1 | { 2 | "$schema": "https://unpkg.com/@changesets/config@2.2.0/schema.json", 3 | "changelog": [ 4 | "@livekit/changesets-changelog-github", 5 | { 6 | "repo": "livekit/agents-js" 7 | } 8 | ], 9 | "commit": false, 10 | "ignore": ["livekit-agents-examples"], 11 | "linked": [], 12 | "access": "public", 13 | "baseBranch": "main", 14 | "updateInternalDependencies": "patch", 15 | "___experimentalUnsafeOptions_WILL_CHANGE_IN_PATCH": { 16 | "onlyUpdatePeerDependentsWhenOutOfRange": true 17 | } 18 | } 19 | -------------------------------------------------------------------------------- /.eslintrc: -------------------------------------------------------------------------------- 1 | { 2 | "plugins": ["@typescript-eslint/eslint-plugin", "eslint-plugin-tsdoc"], 3 | "extends": [ 4 | "turbo", 5 | "prettier", 6 | "plugin:prettier/recommended", 7 | "plugin:@typescript-eslint/recommended", 8 | ], 9 | "env": { 10 | "node": true, 11 | }, 12 | "parserOptions": { 13 | "ecmaVersion": 2022, 14 | "ecmaFeatures": {}, 15 | }, 16 | "settings": {}, 17 | "rules": { 18 | "tsdoc/syntax": "warn", 19 | "space-before-function-parens": 0, 20 | "@typescript-eslint/no-unused-vars": "error", 21 | "import/export": 0, 22 | "@typescript-eslint/ban-ts-comment": "warn", 23 | "@typescript-eslint/no-empty-interface": "warn", 24 | "@typescript-eslint/consistent-type-imports": "warn", 25 | "@typescript-eslint/no-explicit-any": "warn", 26 | }, 27 | } 28 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/bug.yaml: -------------------------------------------------------------------------------- 1 | name: Bug Report 2 | description: Let us know about an issue so we can fix it 3 | labels: ["bug"] 4 | body: 5 | - type: markdown 6 | attributes: 7 | value: | 8 | Hello! Thanks for taking the time to file a bug report. 9 | 10 | Before creating this issue, we kindly ask that you use the search functionality to see if anyone else has already reported this issue. 11 | - type: textarea 12 | attributes: 13 | label: Describe the bug 14 | description: What happened? What were you trying to do? How did it break? Which errors were emitted? Provide as much information as possible. 15 | validations: 16 | required: true 17 | - type: textarea 18 | attributes: 19 | label: Relevant log output 20 | - type: textarea 21 | attributes: 22 | label: Describe your environment 23 | description: | 24 | What OS are you running on? Are you using the latest version of the Agents framework published on npmjs.com? 25 | You can get all of your relevant environment information by running `npx envinfo --system --binaries --npmPackages "@livekit/*"`. 26 | validations: 27 | required: true 28 | - type: textarea 29 | attributes: 30 | label: Minimal reproducible example 31 | description: | 32 | If possible, provide an example we can run on our end that can reasonably reproduce the issue that you're running into. 33 | - type: textarea 34 | attributes: 35 | label: Additional information 36 | description: If you have any other information to provide, such as hunches about where the error could come from, do so here. 37 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/config.yml: -------------------------------------------------------------------------------- 1 | blank_issues_enabled: false 2 | contact_links: 3 | - name: Slack community chat 4 | url: https://livekit.io/join-slack 5 | about: Ask questions and discuss with other LiveKit users in real time 6 | - name: LiveKit Agents for Python 7 | url: https://github.com/livekit/agents 8 | about: The canonical version of LiveKit Agents. Direct feature requests here 9 | -------------------------------------------------------------------------------- /.github/ISSUE_TEMPLATE/question.yaml: -------------------------------------------------------------------------------- 1 | name: Question 2 | description: Ask for help with an issue you're facing 3 | labels: ["question"] 4 | body: 5 | - type: markdown 6 | attributes: 7 | value: | 8 | Hello! Thanks for taking the time to ask a question. 9 | 10 | Before creating this issue, we kindly ask that you use the search functionality to see if anyone else has already asked this question. 11 | Feel free to join us in the `#agents` channel on [our Slack](https://livekit.io/join-slack), and ask your question there to get quicker help from us and the community. 12 | - type: textarea 13 | attributes: 14 | label: Your question 15 | validations: 16 | required: true 17 | -------------------------------------------------------------------------------- /.github/banner_dark.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/livekit/agents-js/bae12f562fc81e18a440260dec8c399df410a8ee/.github/banner_dark.png -------------------------------------------------------------------------------- /.github/banner_light.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/livekit/agents-js/bae12f562fc81e18a440260dec8c399df410a8ee/.github/banner_light.png -------------------------------------------------------------------------------- /.github/workflows/build.yml: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: 2024 LiveKit, Inc. 2 | # 3 | # SPDX-License-Identifier: Apache-2.0 4 | 5 | name: Build 6 | on: 7 | push: 8 | branches: [next, main] 9 | pull_request: 10 | branches: [next, main] 11 | 12 | jobs: 13 | reuse: 14 | name: REUSE-3.2 15 | runs-on: ubuntu-latest 16 | steps: 17 | - uses: actions/checkout@v4 18 | - uses: fsfe/reuse-action@v4 19 | lint: 20 | name: Formatting 21 | runs-on: ubuntu-latest 22 | steps: 23 | - uses: actions/checkout@v4 24 | - uses: pnpm/action-setup@v4 25 | - name: Setup node 26 | uses: actions/setup-node@v4 27 | with: 28 | node-version: 20 29 | cache: pnpm 30 | - name: Install dependencies 31 | run: pnpm install --frozen-lockfile 32 | - name: Lint 33 | run: pnpm lint 34 | - name: Prettier 35 | run: pnpm format:check 36 | build: 37 | name: Build 38 | runs-on: ubuntu-latest 39 | steps: 40 | - uses: actions/checkout@v4 41 | - uses: pnpm/action-setup@v4 42 | - name: Setup node 43 | uses: actions/setup-node@v4 44 | with: 45 | node-version: 20 46 | cache: pnpm 47 | - name: Install dependencies 48 | run: pnpm install --frozen-lockfile 49 | - name: Build 50 | run: pnpm build 51 | -------------------------------------------------------------------------------- /.github/workflows/release.yml: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: 2024 LiveKit, Inc. 2 | # 3 | # SPDX-License-Identifier: Apache-2.0 4 | 5 | name: Release 6 | 7 | on: 8 | push: 9 | branches: 10 | - main 11 | 12 | concurrency: ${{ github.workflow }}-${{ github.ref }} 13 | 14 | jobs: 15 | release: 16 | name: Release 17 | runs-on: ubuntu-latest 18 | steps: 19 | - uses: actions/checkout@v4 20 | with: 21 | lfs: true 22 | - uses: pnpm/action-setup@v4 23 | - name: Use Node.js 20 24 | uses: actions/setup-node@v4 25 | with: 26 | node-version: 20 27 | cache: pnpm 28 | - name: Install dependencies 29 | run: pnpm install --frozen-lockfile 30 | - name: Create Release Pull Request or Publish to npm 31 | id: changesets 32 | uses: changesets/action@v1 33 | with: 34 | publish: pnpm ci:publish 35 | env: 36 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 37 | NPM_TOKEN: ${{ secrets.NPM_TOKEN }} 38 | - name: Build docs 39 | if: steps.changesets.outputs.published == 'true' 40 | run: pnpm doc 41 | - name: S3 upload 42 | if: steps.changesets.outputs.published == 'true' 43 | run: aws s3 cp docs/ s3://livekit-docs/agents-js --recursive 44 | env: 45 | AWS_ACCESS_KEY_ID: ${{ secrets.DOCS_DEPLOY_AWS_ACCESS_KEY }} 46 | AWS_SECRET_ACCESS_KEY: ${{ secrets.DOCS_DEPLOY_AWS_API_SECRET }} 47 | AWS_DEFAULT_REGION: "us-east-1" 48 | -------------------------------------------------------------------------------- /.github/workflows/test.yml: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: 2024 LiveKit, Inc. 2 | # 3 | # SPDX-License-Identifier: Apache-2.0 4 | 5 | name: Test 6 | on: 7 | push: 8 | branches: [next, main] 9 | pull_request: 10 | branches: [next, main] 11 | 12 | jobs: 13 | build: 14 | name: Test 15 | # Don't run tests for PRs on forks 16 | if: github.event_name != 'pull_request' || github.event.pull_request.head.repo.fork == false 17 | runs-on: ubuntu-latest 18 | steps: 19 | - uses: actions/checkout@v4 20 | with: 21 | lfs: true 22 | - uses: pnpm/action-setup@v4 23 | - name: Setup node 24 | uses: actions/setup-node@v4 25 | with: 26 | node-version: 20 27 | cache: pnpm 28 | - name: Install dependencies 29 | run: pnpm install --frozen-lockfile 30 | - name: Build 31 | run: pnpm build 32 | - name: Check which tests to run 33 | uses: dorny/paths-filter@v3 34 | id: filter 35 | with: 36 | filters: | 37 | agents-or-tests: 38 | - 'agents/**' 39 | - 'plugins/test/**' 40 | plugins: 41 | - 'plugins/**' 42 | - name: Test agents 43 | if: steps.filter.outputs.agents-or-tests == 'true' || github.event_name != 'pull_request' 44 | run: pnpm test agents 45 | - name: Test all plugins 46 | if: steps.filter.outputs.agents-or-tests == 'true' || github.event_name != 'pull_request' 47 | env: 48 | OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} 49 | ELEVEN_API_KEY: ${{ secrets.ELEVEN_API_KEY }} 50 | DEEPGRAM_API_KEY: ${{ secrets.DEEPGRAM_API_KEY }} 51 | CARTESIA_API_KEY: ${{ secrets.CARTESIA_API_KEY }} 52 | NEUPHONIC_API_KEY: ${{ secrets.NEUPHONIC_API_KEY }} 53 | RESEMBLE_API_KEY: ${{ secrets.RESEMBLE_API_KEY }} 54 | run: pnpm test plugins 55 | - name: Test specific plugins 56 | if: steps.filter.outputs.agents-or-tests == 'false' && steps.filter.outputs.plugins == 'true' && github.event_name == 'pull_request' 57 | env: 58 | OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }} 59 | ELEVEN_API_KEY: ${{ secrets.ELEVEN_API_KEY }} 60 | DEEPGRAM_API_KEY: ${{ secrets.DEEPGRAM_API_KEY }} 61 | CARTESIA_API_KEY: ${{ secrets.CARTESIA_API_KEY }} 62 | NEUPHONIC_API_KEY: ${{ secrets.NEUPHONIC_API_KEY }} 63 | RESEMBLE_API_KEY: ${{ secrets.RESEMBLE_API_KEY }} 64 | run: | 65 | plugins=$(git diff-tree --name-only --no-commit-id -r ${{ github.sha }} | grep '^plugins.*\.ts$' | cut -d/ -f2 | sort -u | tr '\n' ' ') 66 | read -ra plugins <<< "$plugins" 67 | for plugin in "${plugins[@]}"; do 68 | pnpm test $plugin 69 | done 70 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Based on https://raw.githubusercontent.com/github/gitignore/main/Node.gitignore 2 | 3 | # Logs 4 | 5 | logs 6 | _.log 7 | npm-debug.log_ 8 | yarn-debug.log* 9 | yarn-error.log* 10 | lerna-debug.log* 11 | .pnpm-debug.log* 12 | 13 | # Caches 14 | 15 | .cache 16 | 17 | # Diagnostic reports (https://nodejs.org/api/report.html) 18 | 19 | report.[0-9]_.[0-9]_.[0-9]_.[0-9]_.json 20 | 21 | # Runtime data 22 | 23 | pids 24 | _.pid 25 | _.seed 26 | *.pid.lock 27 | 28 | # Directory for instrumented libs generated by jscoverage/JSCover 29 | 30 | lib-cov 31 | 32 | # Coverage directory used by tools like istanbul 33 | 34 | coverage 35 | *.lcov 36 | 37 | # nyc test coverage 38 | 39 | .nyc_output 40 | 41 | # Grunt intermediate storage (https://gruntjs.com/creating-plugins#storing-task-files) 42 | 43 | .grunt 44 | 45 | # Bower dependency directory (https://bower.io/) 46 | 47 | bower_components 48 | 49 | # node-waf configuration 50 | 51 | .lock-wscript 52 | 53 | # Compiled binary addons (https://nodejs.org/api/addons.html) 54 | 55 | build/Release 56 | 57 | # Dependency directories 58 | 59 | node_modules/ 60 | jspm_packages/ 61 | 62 | # Snowpack dependency directory (https://snowpack.dev/) 63 | 64 | web_modules/ 65 | 66 | # TypeScript cache 67 | 68 | *.tsbuildinfo 69 | 70 | # Optional npm cache directory 71 | 72 | .npm 73 | 74 | # Optional eslint cache 75 | 76 | .eslintcache 77 | 78 | # Optional stylelint cache 79 | 80 | .stylelintcache 81 | 82 | # Microbundle cache 83 | 84 | .rpt2_cache/ 85 | .rts2_cache_cjs/ 86 | .rts2_cache_es/ 87 | .rts2_cache_umd/ 88 | 89 | # Optional REPL history 90 | 91 | .node_repl_history 92 | 93 | # Output of 'npm pack' 94 | 95 | *.tgz 96 | 97 | # Yarn Integrity file 98 | 99 | .yarn-integrity 100 | 101 | # dotenv environment variable files 102 | 103 | .env 104 | .env.development.local 105 | .env.test.local 106 | .env.production.local 107 | .env.local 108 | 109 | # parcel-bundler cache (https://parceljs.org/) 110 | 111 | .parcel-cache 112 | 113 | # Next.js build output 114 | 115 | .next 116 | out 117 | 118 | # Nuxt.js build / generate output 119 | 120 | .nuxt 121 | dist 122 | 123 | # Gatsby files 124 | 125 | # Comment in the public line in if your project uses Gatsby and not Next.js 126 | 127 | # https://nextjs.org/blog/next-9-1#public-directory-support 128 | 129 | # public 130 | 131 | # vuepress build output 132 | 133 | .vuepress/dist 134 | 135 | # vuepress v2.x temp and cache directory 136 | 137 | .temp 138 | 139 | # Docusaurus cache and generated files 140 | 141 | .docusaurus 142 | 143 | # Serverless directories 144 | 145 | .serverless/ 146 | 147 | # FuseBox cache 148 | 149 | .fusebox/ 150 | 151 | # DynamoDB Local files 152 | 153 | .dynamodb/ 154 | 155 | # TernJS port file 156 | 157 | .tern-port 158 | 159 | # Stores VSCode versions used for testing VSCode extensions 160 | 161 | .vscode-test 162 | 163 | # yarn v2 164 | 165 | .yarn/cache 166 | .yarn/unplugged 167 | .yarn/build-state.yml 168 | .yarn/install-state.gz 169 | .pnp.* 170 | 171 | # IntelliJ based IDEs 172 | .idea 173 | 174 | # Finder (MacOS) folder config 175 | .DS_Store 176 | 177 | # turbo 178 | .turbo 179 | 180 | # API extractor 181 | temp 182 | 183 | # typedoc 184 | docs 185 | 186 | # direnv 187 | .direnv 188 | -------------------------------------------------------------------------------- /.prettierignore: -------------------------------------------------------------------------------- 1 | dist 2 | node_modules 3 | pnpm-lock.yaml -------------------------------------------------------------------------------- /.prettierrc: -------------------------------------------------------------------------------- 1 | { 2 | "singleQuote": true, 3 | "trailingComma": "all", 4 | "semi": true, 5 | "tabWidth": 2, 6 | "printWidth": 100, 7 | "importOrder": ["", "^[./]"], 8 | "importOrderSeparation": false, 9 | "importOrderSortSpecifiers": true, 10 | "importOrderParserPlugins": ["typescript"], 11 | "plugins": ["@trivago/prettier-plugin-sort-imports"] 12 | } 13 | -------------------------------------------------------------------------------- /CODE_OF_CONDUCT.md: -------------------------------------------------------------------------------- 1 | 6 | 7 | # Code of Conduct 8 | 9 | ## Our Pledge 10 | 11 | We are committed to providing a welcoming, respectful, and harassment-free 12 | environment for everyone, regardless of background, experience, or identity. We 13 | strive to foster a positive and inclusive community where all participants feel 14 | valued and empowered to contribute. 15 | 16 | ## Our Standards 17 | 18 | ### Expected behavior 19 | 20 | * Demonstrating empathy and kindness toward other people 21 | * Being respectful of differing opinions, viewpoints, and experiences 22 | * Giving and gracefully accepting constructive feedback 23 | * Accepting responsibility and apologizing to those affected by our mistakes, 24 | and learning from the experience 25 | * Focusing on what is best not just for us as individuals, but for the overall 26 | community 27 | 28 | ### Unacceptable behavior 29 | 30 | * Harassment, discrimination, or offensive comments regarding identity, 31 | appearance, or background 32 | * Publishing others' private information, such as a physical or email address, 33 | without their explicit permission 34 | * Personal attacks, insults, or disruptive behavior that undermines the 35 | community 36 | * Posting content or engaging in activities that are inappropriate, unlawful, or 37 | harmful 38 | 39 | ## Enforcement Responsibilities 40 | 41 | Community leaders are responsible for clarifying and enforcing our standards of 42 | acceptable behavior and will take appropriate and fair corrective action in 43 | response to any behavior that they deem inappropriate, threatening, offensive, 44 | or harmful. 45 | 46 | Community leaders have the right and responsibility to remove, edit, or reject 47 | comments, commits, code, wiki edits, issues, and other contributions that are 48 | not aligned to this Code of Conduct, and will communicate reasons for moderation 49 | decisions when appropriate. 50 | 51 | ## Scope 52 | 53 | This Code of Conduct applies within all community spaces, and also applies when 54 | an individual is officially representing the community in public spaces. 55 | Examples of representing our community include using an official email address, 56 | posting via an official social media account, or acting as an appointed 57 | representative at an online or offline event. 58 | 59 | ## Enforcement 60 | 61 | Instances of abusive, harassing, or otherwise unacceptable behavior may be 62 | reported to the community leaders responsible for enforcement at 63 | . 64 | All complaints will be reviewed and investigated promptly and fairly. 65 | 66 | All community leaders are obligated to respect the privacy and security of the 67 | reporter of any incident. 68 | 69 | ## Enforcement Guidelines 70 | 71 | Violations of this Code of Conduct may result in removal from the community, 72 | project, or repository. Severe violations may result in a permanent ban. 73 | 74 | ## Attribution 75 | 76 | This Code of Conduct is adapted from the [Contributor Covenant][homepage], 77 | version 2.1, available at 78 | [https://www.contributor-covenant.org/version/2/1/code_of_conduct.html][v2.1]. 79 | It has been subtly adapted for formatting and brevity, as well as changing the 80 | actions taken after a violation. 81 | 82 | Community Impact Guidelines were inspired by 83 | [Mozilla's code of conduct enforcement ladder][Mozilla CoC]. 84 | 85 | For answers to common questions about this code of conduct, see the FAQ at 86 | [https://www.contributor-covenant.org/faq][FAQ]. Translations are available at 87 | [https://www.contributor-covenant.org/translations][translations]. 88 | 89 | [homepage]: https://www.contributor-covenant.org 90 | [v2.1]: https://www.contributor-covenant.org/version/2/1/code_of_conduct.html 91 | [Mozilla CoC]: https://github.com/mozilla/diversity 92 | [FAQ]: https://www.contributor-covenant.org/faq 93 | [translations]: https://www.contributor-covenant.org/translations 94 | 95 | -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | 6 | 7 | # Contributing to agents-js 8 | 9 | The LiveKit Node Agents framework is an open-source project, and we welcome any contribution from 10 | anyone willing to work in good faith with the community. No contribution is too small! 11 | 12 | ## Code of Conduct 13 | 14 | The Node Agents project has a [Code of Conduct](/CODE_OF_CONDUCT.md) to which all contributors must 15 | adhere. 16 | 17 | ## Contribute code 18 | 19 | There are many ways you can contribute code to the project: 20 | 21 | - **Write a plugin**: if there is a TTS/STT/LLM provider you use that isn't on our plugins list, 22 | feel free to write a plugin for it! Refer to the source code of similar plugins to see how they're 23 | built. 24 | 25 | - **Fix bugs**: we strive to make this framework as reliable as possible, and we'd appreciate your 26 | help with squashing bugs and improving stability. Follow the guidelines below for information 27 | about authoring pull requests. 28 | 29 | - **Add new features**: we're open to adding new features to the framework, though we ask that you 30 | open an issue first to discuss the viability and scope of the new functionality before starting 31 | work. 32 | 33 | Our continuous integration requires a few additional code quality steps for your pull request to 34 | be approved: 35 | 36 | - Base your work off of the `next` branch instead of `main`. `next` is the most up-to-date branch. 37 | 38 | - When creating a new file, make sure to add SPDX headers for [REUSE-3.2](https://reuse.software) 39 | compliance. If you don't know what that means, just copy the three first lines from any other 40 | TypeScript file in the repository and paste them at the top of your file. 41 | 42 | - Run `pnpm -w format:write` and `pnpm -w lint:fix` before committing your changes to ensure 43 | consistent file formatting and best practices. 44 | 45 | - If writing new methods/interfaces/enums/classes, document them. This project uses 46 | [TypeDoc](https://typedoc.org) for automatic API documentation generation, and every new addition 47 | has to be properly documented. 48 | 49 | - On your first pull request, the CLA Assistant bot will give you a link to sign this project's 50 | Contributor License Agreement, required to add your code to the repository. 51 | 52 | - There's no need to mess around with `CHANGELOG.md` or package manifests — we have a bot handle 53 | that for us. A maintainer will add the necessary notes before merging. 54 | 55 | ## Assist others in the community 56 | 57 | If you can't contribute code, you can still help us greatly by helping out community members who 58 | may have questions about the framework and how to use it. Join the `#agents` channel on 59 | [our Slack](https://livekit.io/join-slack). 60 | -------------------------------------------------------------------------------- /NOTICE: -------------------------------------------------------------------------------- 1 | Copyright 2024 LiveKit, Inc. 2 | 3 | Licensed under the Apache License, Version 2.0 (the "License"); 4 | you may not use this file except in compliance with the License. 5 | You may obtain a copy of the License at 6 | 7 | http://www.apache.org/licenses/LICENSE-2.0 8 | 9 | Unless required by applicable law or agreed to in writing, software 10 | distributed under the License is distributed on an "AS IS" BASIS, 11 | WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 | See the License for the specific language governing permissions and 13 | limitations under the License. 14 | -------------------------------------------------------------------------------- /REUSE.toml: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: 2024 LiveKit, Inc. 2 | # 3 | # SPDX-License-Identifier: Apache-2.0 4 | 5 | version = 1 6 | SPDX-PackageName = "agents-js" 7 | SPDX-PackageSupplier = "LiveKit, Inc. " 8 | SPDX-PackageDownloadLocation = "https://github.com/livekit/agents-js" 9 | 10 | # trivial files 11 | [[annotations]] 12 | path = [".gitignore", "flake.lock", ".envrc", "packages/livekit-rtc/.gitignore", ".changeset/**", "**/CHANGELOG.md", "NOTICE", ".github/**"] 13 | SPDX-FileCopyrightText = "2024 LiveKit, Inc." 14 | SPDX-License-Identifier = "Apache-2.0" 15 | 16 | # pnpm files 17 | [[annotations]] 18 | path = ["pnpm-workspace.yaml", "pnpm-lock.yaml"] 19 | SPDX-FileCopyrightText = "2024 LiveKit, Inc." 20 | SPDX-License-Identifier = "Apache-2.0" 21 | 22 | # project configuration files 23 | [[annotations]] 24 | path = [".prettierrc", ".prettierignore", ".eslintrc", "**.json", "**/tsup.config.ts"] 25 | SPDX-FileCopyrightText = "2024 LiveKit, Inc." 26 | SPDX-License-Identifier = "Apache-2.0" 27 | 28 | # silero onnx file 29 | [[annotations]] 30 | path = ["**/silero_vad.onnx"] 31 | SPDX-FileCopyrightText = "2024 Silero Team" 32 | SPDX-License-Identifier = "CC-BY-NC-SA-4.0" 33 | 34 | # turn detector onnx file 35 | [[annotations]] 36 | path = ["**/turn_detector.onnx"] 37 | SPDX-FileCopyrightText = "2024 LiveKit, Inc." 38 | SPDX-License-Identifier = "LicenseRef-LiveKitModelLicense" 39 | 40 | # testing files 41 | [[annotations]] 42 | path = ["**/.gitattributes", "**.wav"] 43 | SPDX-FileCopyrightText = "2024 LiveKit, Inc." 44 | SPDX-License-Identifier = "Apache-2.0" 45 | -------------------------------------------------------------------------------- /agents/README.md: -------------------------------------------------------------------------------- 1 | 6 | # LiveKit Agents for Node.js 7 | 8 | The Agents Framework is designed for building realtime, programmable 9 | participants that run on servers. Use it to create conversational, multi-modal 10 | voice agents that can see, hear, and understand. 11 | 12 | This package contains the main LiveKit Agents SDK. Refer to the 13 | [documentation](https://docs.livekit.io/agents/overview/) for information on how 14 | to use it, or browse the [API 15 | reference](https://docs.livekit.io/agents-js/modules/agents.html). See the 16 | [repository](https://github.com/livekit/agents-js) for more information about 17 | the framework as a whole. 18 | -------------------------------------------------------------------------------- /agents/api-extractor.json: -------------------------------------------------------------------------------- 1 | /** 2 | * Config file for API Extractor. For more info, please visit: https://api-extractor.com 3 | */ 4 | { 5 | "$schema": "https://developer.microsoft.com/json-schemas/api-extractor/v7/api-extractor.schema.json", 6 | 7 | /** 8 | * Optionally specifies another JSON config file that this file extends from. This provides a way for 9 | * standard settings to be shared across multiple projects. 10 | * 11 | * If the path starts with "./" or "../", the path is resolved relative to the folder of the file that contains 12 | * the "extends" field. Otherwise, the first path segment is interpreted as an NPM package name, and will be 13 | * resolved using NodeJS require(). 14 | * 15 | * SUPPORTED TOKENS: none 16 | * DEFAULT VALUE: "" 17 | */ 18 | "extends": "../api-extractor-shared.json", 19 | "mainEntryPointFilePath": "./dist/index.d.ts" 20 | } 21 | -------------------------------------------------------------------------------- /agents/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "@livekit/agents", 3 | "version": "0.7.6", 4 | "description": "LiveKit Agents - Node.js", 5 | "main": "dist/index.js", 6 | "require": "dist/index.cjs", 7 | "types": "dist/index.d.ts", 8 | "exports": { 9 | ".": { 10 | "types": "./dist/index.d.ts", 11 | "import": "./dist/index.js", 12 | "require": "./dist/index.cjs" 13 | } 14 | }, 15 | "author": "LiveKit", 16 | "type": "module", 17 | "repository": "git@github.com:livekit/agents-js.git", 18 | "license": "Apache-2.0", 19 | "files": [ 20 | "dist", 21 | "src", 22 | "README.md" 23 | ], 24 | "scripts": { 25 | "build": "tsup --onSuccess \"tsc --declaration --emitDeclarationOnly\"", 26 | "clean": "rm -rf dist", 27 | "clean:build": "pnpm clean && pnpm build", 28 | "lint": "eslint -f unix \"src/**/*.ts\"", 29 | "api:check": "api-extractor run --typescript-compiler-folder ../node_modules/typescript", 30 | "api:update": "api-extractor run --local --typescript-compiler-folder ../node_modules/typescript --verbose" 31 | }, 32 | "devDependencies": { 33 | "@livekit/rtc-node": "^0.13.12", 34 | "@microsoft/api-extractor": "^7.35.0", 35 | "@types/node": "^22.5.5", 36 | "@types/ws": "^8.5.10", 37 | "tsup": "^8.4.0", 38 | "typescript": "^5.0.0" 39 | }, 40 | "dependencies": { 41 | "@livekit/mutex": "^1.1.1", 42 | "@livekit/protocol": "^1.29.1", 43 | "@livekit/typed-emitter": "^3.0.0", 44 | "commander": "^12.0.0", 45 | "livekit-server-sdk": "^2.9.2", 46 | "pino": "^8.19.0", 47 | "pino-pretty": "^11.0.0", 48 | "ws": "^8.16.0", 49 | "zod": "^3.23.8" 50 | }, 51 | "peerDependencies": { 52 | "@livekit/rtc-node": "^0.13.12" 53 | } 54 | } 55 | -------------------------------------------------------------------------------- /agents/src/audio.ts: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: 2024 LiveKit, Inc. 2 | // 3 | // SPDX-License-Identifier: Apache-2.0 4 | import { AudioFrame } from '@livekit/rtc-node'; 5 | import { log } from './log.js'; 6 | 7 | /** AudioByteStream translates between LiveKit AudioFrame packets and raw byte data. */ 8 | export class AudioByteStream { 9 | #sampleRate: number; 10 | #numChannels: number; 11 | #bytesPerFrame: number; 12 | #buf: Int8Array; 13 | #logger = log(); 14 | 15 | constructor(sampleRate: number, numChannels: number, samplesPerChannel: number | null = null) { 16 | this.#sampleRate = sampleRate; 17 | this.#numChannels = numChannels; 18 | 19 | if (samplesPerChannel === null) { 20 | samplesPerChannel = Math.floor(sampleRate / 10); // 100ms by default 21 | } 22 | 23 | this.#bytesPerFrame = numChannels * samplesPerChannel * 2; // 2 bytes per sample (Int16) 24 | this.#buf = new Int8Array(); 25 | } 26 | 27 | write(data: ArrayBuffer): AudioFrame[] { 28 | this.#buf = new Int8Array([...this.#buf, ...new Int8Array(data)]); 29 | 30 | const frames: AudioFrame[] = []; 31 | while (this.#buf.length >= this.#bytesPerFrame) { 32 | const frameData = this.#buf.slice(0, this.#bytesPerFrame); 33 | this.#buf = this.#buf.slice(this.#bytesPerFrame); 34 | 35 | frames.push( 36 | new AudioFrame( 37 | new Int16Array(frameData.buffer), 38 | this.#sampleRate, 39 | this.#numChannels, 40 | frameData.length / 2, 41 | ), 42 | ); 43 | } 44 | 45 | return frames; 46 | } 47 | 48 | flush(): AudioFrame[] { 49 | if (this.#buf.length % (2 * this.#numChannels) !== 0) { 50 | this.#logger.warn('AudioByteStream: incomplete frame during flush, dropping'); 51 | return []; 52 | } 53 | 54 | return [ 55 | new AudioFrame( 56 | new Int16Array(this.#buf.buffer), 57 | this.#sampleRate, 58 | this.#numChannels, 59 | this.#buf.length / 2, 60 | ), 61 | ]; 62 | } 63 | } 64 | -------------------------------------------------------------------------------- /agents/src/constants.ts: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: 2024 LiveKit, Inc. 2 | // 3 | // SPDX-License-Identifier: Apache-2.0 4 | export const ATTRIBUTE_TRANSCRIPTION_TRACK_ID = 'lk.transcribed_track_id'; 5 | export const ATTRIBUTE_TRANSCRIPTION_FINAL = 'lk.transcription_final'; 6 | export const TOPIC_TRANSCRIPTION = 'lk.transcription'; 7 | export const TOPIC_CHAT = 'lk.chat'; 8 | -------------------------------------------------------------------------------- /agents/src/generator.ts: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: 2024 LiveKit, Inc. 2 | // 3 | // SPDX-License-Identifier: Apache-2.0 4 | import type { JobContext, JobProcess } from './job.js'; 5 | 6 | /** @see {@link defineAgent} */ 7 | export interface Agent { 8 | entry: (ctx: JobContext) => Promise; 9 | prewarm?: (proc: JobProcess) => unknown; 10 | } 11 | 12 | /** Helper to check if an object is an agent before running it. 13 | * 14 | * @internal 15 | */ 16 | export function isAgent(obj: unknown): obj is Agent { 17 | return ( 18 | typeof obj === 'object' && 19 | obj !== null && 20 | 'entry' in obj && 21 | typeof (obj as Agent).entry === 'function' && 22 | (('prewarm' in obj && typeof (obj as Agent).prewarm === 'function') || !('prewarm' in obj)) 23 | ); 24 | } 25 | 26 | /** 27 | * Helper to define an agent according to the required interface. 28 | * @example A basic agent with entry and prewarm functions 29 | * ``` 30 | * export default defineAgent({ 31 | * entry: async (ctx: JobContext) => { ... }, 32 | * prewarm: (proc: JobProcess) => { ... }, 33 | * }) 34 | * ``` 35 | */ 36 | export function defineAgent(agent: Agent): Agent { 37 | return agent; 38 | } 39 | -------------------------------------------------------------------------------- /agents/src/http_server.ts: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: 2024 LiveKit, Inc. 2 | // 3 | // SPDX-License-Identifier: Apache-2.0 4 | import { type IncomingMessage, type Server, type ServerResponse, createServer } from 'node:http'; 5 | import { log } from './log.js'; 6 | 7 | const healthCheck = async (res: ServerResponse) => { 8 | res.writeHead(200); 9 | res.end('OK'); 10 | }; 11 | 12 | interface WorkerResponse { 13 | agent_name: string; 14 | worker_type: string; 15 | active_jobs: number; 16 | } 17 | 18 | export class HTTPServer { 19 | host: string; 20 | port: number; 21 | app: Server; 22 | #logger = log(); 23 | 24 | constructor(host: string, port: number, workerListener: () => WorkerResponse) { 25 | this.host = host; 26 | this.port = port; 27 | 28 | this.app = createServer((req: IncomingMessage, res: ServerResponse) => { 29 | if (req.url === '/') { 30 | healthCheck(res); 31 | } else if (req.url === '/worker') { 32 | res.writeHead(200, { 'Contet-Type': 'application/json' }); 33 | res.end(JSON.stringify(workerListener())); 34 | } else { 35 | res.writeHead(404); 36 | res.end('not found'); 37 | } 38 | }); 39 | } 40 | 41 | async run(): Promise { 42 | return new Promise((resolve, reject) => { 43 | this.app.listen(this.port, this.host, (err?: Error) => { 44 | if (err) reject(err); 45 | const address = this.app.address(); 46 | if (typeof address! !== 'string') { 47 | this.#logger.info(`Server is listening on port ${address!.port}`); 48 | } 49 | resolve(); 50 | }); 51 | }); 52 | } 53 | 54 | async close(): Promise { 55 | return new Promise((resolve, reject) => { 56 | this.app.close((err?: Error) => { 57 | if (err) reject(err); 58 | resolve(); 59 | }); 60 | }); 61 | } 62 | } 63 | -------------------------------------------------------------------------------- /agents/src/index.ts: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: 2024 LiveKit, Inc. 2 | // 3 | // SPDX-License-Identifier: Apache-2.0 4 | 5 | /** 6 | * LiveKit Agents is a framework for building realtime programmable participants that run on 7 | * servers. 8 | * 9 | * @see {@link https://docs.livekit.io/agents/overview | LiveKit Agents documentation} 10 | * @packageDocumentation 11 | */ 12 | import * as cli from './cli.js'; 13 | import * as ipc from './ipc/index.js'; 14 | import * as llm from './llm/index.js'; 15 | import * as metrics from './metrics/index.js'; 16 | import * as multimodal from './multimodal/index.js'; 17 | import * as pipeline from './pipeline/index.js'; 18 | import * as stt from './stt/index.js'; 19 | import * as tokenize from './tokenize/index.js'; 20 | import * as tts from './tts/index.js'; 21 | 22 | export * from './vad.js'; 23 | export * from './plugin.js'; 24 | export * from './version.js'; 25 | export * from './job.js'; 26 | export * from './worker.js'; 27 | export * from './utils.js'; 28 | export * from './log.js'; 29 | export * from './generator.js'; 30 | export * from './audio.js'; 31 | export * from './transcription.js'; 32 | export * from './inference_runner.js'; 33 | 34 | export { cli, stt, tts, llm, pipeline, multimodal, tokenize, metrics, ipc }; 35 | -------------------------------------------------------------------------------- /agents/src/inference_runner.ts: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: 2024 LiveKit, Inc. 2 | // 3 | // SPDX-License-Identifier: Apache-2.0 4 | 5 | /** @internal */ 6 | export abstract class InferenceRunner { 7 | static INFERENCE_METHOD: string; 8 | static registeredRunners: { [id: string]: string } = {}; 9 | 10 | static registerRunner(method: string, importPath: string) { 11 | if (InferenceRunner.registeredRunners[method]) { 12 | throw new Error(`Inference runner ${method} already registered`); 13 | } 14 | InferenceRunner.registeredRunners[method] = importPath; 15 | } 16 | 17 | abstract initialize(): Promise; 18 | abstract run(data: unknown): Promise; 19 | abstract close(): Promise; 20 | } 21 | -------------------------------------------------------------------------------- /agents/src/ipc/index.ts: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: 2025 LiveKit, Inc. 2 | // 3 | // SPDX-License-Identifier: Apache-2.0 4 | 5 | export * from './inference_executor.js'; 6 | -------------------------------------------------------------------------------- /agents/src/ipc/inference_executor.ts: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: 2024 LiveKit, Inc. 2 | // 3 | // SPDX-License-Identifier: Apache-2.0 4 | 5 | export interface InferenceExecutor { 6 | doInference(method: string, data: unknown): Promise; 7 | } 8 | -------------------------------------------------------------------------------- /agents/src/ipc/inference_proc_executor.ts: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: 2025 LiveKit, Inc. 2 | // 3 | // SPDX-License-Identifier: Apache-2.0 4 | import type { ChildProcess } from 'node:child_process'; 5 | import { fork } from 'node:child_process'; 6 | import { randomUUID } from 'node:crypto'; 7 | import { log } from '../log.js'; 8 | import type { InferenceExecutor } from './inference_executor.js'; 9 | import type { IPCMessage } from './message.js'; 10 | import { SupervisedProc } from './supervised_proc.js'; 11 | 12 | class PendingInference { 13 | promise = new Promise<{ requestId: string; data: unknown; error?: Error }>((resolve) => { 14 | this.resolve = resolve; 15 | }); 16 | resolve(arg: { requestId: string; data: unknown; error?: Error }) { 17 | arg; 18 | } 19 | } 20 | 21 | export class InferenceProcExecutor extends SupervisedProc implements InferenceExecutor { 22 | #runners: { [id: string]: string }; 23 | #activeRequests: { [id: string]: PendingInference } = {}; 24 | #logger = log(); 25 | 26 | constructor({ 27 | runners, 28 | initializeTimeout, 29 | closeTimeout, 30 | memoryWarnMB, 31 | memoryLimitMB, 32 | pingInterval, 33 | pingTimeout, 34 | highPingThreshold, 35 | }: { 36 | runners: { [id: string]: string }; 37 | initializeTimeout: number; 38 | closeTimeout: number; 39 | memoryWarnMB: number; 40 | memoryLimitMB: number; 41 | pingInterval: number; 42 | pingTimeout: number; 43 | highPingThreshold: number; 44 | }) { 45 | super( 46 | initializeTimeout, 47 | closeTimeout, 48 | memoryWarnMB, 49 | memoryLimitMB, 50 | pingInterval, 51 | pingTimeout, 52 | highPingThreshold, 53 | ); 54 | this.#runners = runners; 55 | } 56 | 57 | createProcess(): ChildProcess { 58 | return fork(new URL(import.meta.resolve('./inference_proc_lazy_main.js')), [ 59 | JSON.stringify(this.#runners), 60 | ]); 61 | } 62 | 63 | async mainTask(proc: ChildProcess) { 64 | proc.on('message', (msg: IPCMessage) => { 65 | switch (msg.case) { 66 | case 'inferenceResponse': 67 | const res = this.#activeRequests[msg.value.requestId]; 68 | delete this.#activeRequests[msg.value.requestId]; 69 | if (!res) { 70 | this.#logger 71 | .child({ requestId: msg.value.requestId }) 72 | .warn('received unexpected inference response'); 73 | return; 74 | } 75 | 76 | res.resolve(msg.value); 77 | } 78 | }); 79 | } 80 | 81 | async doInference(method: string, data: unknown): Promise { 82 | const requestId = 'inference_req_' + randomUUID(); 83 | const fut = new PendingInference(); 84 | this.proc!.send({ case: 'inferenceRequest', value: { requestId, method, data } }); 85 | this.#activeRequests[requestId] = fut; 86 | 87 | const res = await fut.promise; 88 | if (res.error) { 89 | throw new Error(`inference of ${method} failed: ${res.error}`); 90 | } 91 | return res.data; 92 | } 93 | } 94 | -------------------------------------------------------------------------------- /agents/src/ipc/inference_proc_lazy_main.ts: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: 2025 LiveKit, Inc. 2 | // 3 | // SPDX-License-Identifier: Apache-2.0 4 | import { once } from 'node:events'; 5 | import type { InferenceRunner } from '../inference_runner.js'; 6 | import { initializeLogger, log } from '../log.js'; 7 | import { Future } from '../utils.js'; 8 | import type { IPCMessage } from './message.js'; 9 | 10 | const ORPHANED_TIMEOUT = 15 * 1000; 11 | 12 | (async () => { 13 | if (process.send) { 14 | const join = new Future(); 15 | 16 | // don't do anything on C-c 17 | // this is handled in cli, triggering a termination of all child processes at once. 18 | process.on('SIGINT', () => { 19 | logger.info('SIGINT received in inference proc'); 20 | }); 21 | 22 | // don't do anything on SIGTERM 23 | // Render uses SIGTERM in autoscale, this ensures the processes are properly drained if needed 24 | process.on('SIGTERM', () => { 25 | logger.info('SIGTERM received in inference proc'); 26 | }); 27 | 28 | await once(process, 'message').then(([msg]: IPCMessage[]) => { 29 | msg = msg!; 30 | if (msg.case !== 'initializeRequest') { 31 | throw new Error('first message must be InitializeRequest'); 32 | } 33 | initializeLogger(msg.value.loggerOptions); 34 | }); 35 | const logger = log().child({ pid: process.pid }); 36 | 37 | const runners: { [id: string]: InferenceRunner } = await Promise.all( 38 | Object.entries(JSON.parse(process.argv[2]!)).map(async ([k, v]) => { 39 | return [k, await import(v as string).then((m) => new m.default())]; 40 | }), 41 | ).then(Object.fromEntries); 42 | 43 | await Promise.all( 44 | Object.entries(runners).map(async ([runner, v]) => { 45 | logger.child({ runner }).debug('initializing inference runner'); 46 | await v.initialize(); 47 | }), 48 | ); 49 | logger.debug('all inference runners initialized'); 50 | process.send({ case: 'initializeResponse' }); 51 | 52 | const orphanedTimeout = setTimeout(() => { 53 | logger.warn('inference process orphaned, shutting down.'); 54 | process.exit(); 55 | }, ORPHANED_TIMEOUT); 56 | 57 | const handleInferenceRequest = async ({ 58 | method, 59 | requestId, 60 | data, 61 | }: { 62 | method: string; 63 | requestId: string; 64 | data: unknown; 65 | }) => { 66 | if (!runners[method]) { 67 | logger.child({ method }).warn('unknown inference method'); 68 | } 69 | 70 | try { 71 | const resp = await runners[method]!.run(data); 72 | process.send!({ case: 'inferenceResponse', value: { requestId, data: resp } }); 73 | } catch (error) { 74 | process.send!({ case: 'inferenceResponse', value: { requestId, error } }); 75 | } 76 | }; 77 | 78 | const messageHandler = (msg: IPCMessage) => { 79 | switch (msg.case) { 80 | case 'pingRequest': 81 | orphanedTimeout.refresh(); 82 | process.send!({ 83 | case: 'pongResponse', 84 | value: { lastTimestamp: msg.value.timestamp, timestamp: Date.now() }, 85 | }); 86 | break; 87 | case 'shutdownRequest': 88 | logger.info('inference process received shutdown request'); 89 | clearTimeout(orphanedTimeout); 90 | // Remove our message handler to stop processing new messages 91 | process.off('message', messageHandler); 92 | Promise.all(Object.values(runners).map((r) => r.close())) 93 | .then(() => { 94 | logger.info('Inference runners closed'); 95 | process.send!({ case: 'done' }); 96 | join.resolve(); 97 | }) 98 | .catch((err) => { 99 | logger.error('Error closing inference runners:', err); 100 | }); 101 | break; 102 | case 'inferenceRequest': 103 | handleInferenceRequest(msg.value); 104 | } 105 | }; 106 | 107 | process.on('message', messageHandler); 108 | 109 | await join.await; 110 | 111 | logger.info('Inference process shutdown'); 112 | 113 | return process.exitCode; 114 | } 115 | })(); 116 | -------------------------------------------------------------------------------- /agents/src/ipc/job_executor.ts: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: 2024 LiveKit, Inc. 2 | // 3 | // SPDX-License-Identifier: Apache-2.0 4 | import type { RunningJobInfo } from '../job.js'; 5 | 6 | export interface JobExecutor { 7 | started: boolean; 8 | userArguments: any; 9 | runningJob: RunningJobInfo | undefined; 10 | status: JobStatus; 11 | 12 | start(): Promise; 13 | join(): Promise; 14 | initialize(): Promise; 15 | close(): Promise; 16 | launchJob(info: RunningJobInfo): Promise; 17 | } 18 | 19 | export enum JobStatus { 20 | RUNNING, 21 | FAILED, 22 | SUCCESS, 23 | } 24 | -------------------------------------------------------------------------------- /agents/src/ipc/job_proc_executor.ts: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: 2025 LiveKit, Inc. 2 | // 3 | // SPDX-License-Identifier: Apache-2.0 4 | import type { ChildProcess } from 'node:child_process'; 5 | import { fork } from 'node:child_process'; 6 | import type { RunningJobInfo } from '../job.js'; 7 | import { log } from '../log.js'; 8 | import type { InferenceExecutor } from './inference_executor.js'; 9 | import type { JobExecutor } from './job_executor.js'; 10 | import { JobStatus } from './job_executor.js'; 11 | import type { IPCMessage } from './message.js'; 12 | import { SupervisedProc } from './supervised_proc.js'; 13 | 14 | export class JobProcExecutor extends SupervisedProc implements JobExecutor { 15 | #userArgs?: any; 16 | #jobStatus?: JobStatus; 17 | #runningJob?: RunningJobInfo; 18 | #agent: string; 19 | #inferenceExecutor?: InferenceExecutor; 20 | #inferenceTasks: Promise[] = []; 21 | #logger = log(); 22 | 23 | constructor( 24 | agent: string, 25 | inferenceExecutor: InferenceExecutor | undefined, 26 | initializeTimeout: number, 27 | closeTimeout: number, 28 | memoryWarnMB: number, 29 | memoryLimitMB: number, 30 | pingInterval: number, 31 | pingTimeout: number, 32 | highPingThreshold: number, 33 | ) { 34 | super( 35 | initializeTimeout, 36 | closeTimeout, 37 | memoryWarnMB, 38 | memoryLimitMB, 39 | pingInterval, 40 | pingTimeout, 41 | highPingThreshold, 42 | ); 43 | this.#agent = agent; 44 | this.#inferenceExecutor = inferenceExecutor; 45 | } 46 | 47 | get status(): JobStatus { 48 | if (this.#jobStatus) { 49 | return this.#jobStatus; 50 | } 51 | throw new Error('job status not available'); 52 | } 53 | 54 | get userArguments(): any { 55 | return this.#userArgs; 56 | } 57 | 58 | set userArguments(args: any) { 59 | this.#userArgs = args; 60 | } 61 | 62 | get runningJob(): RunningJobInfo | undefined { 63 | return this.#runningJob; 64 | } 65 | 66 | createProcess(): ChildProcess { 67 | return fork(new URL(import.meta.resolve('./job_proc_lazy_main.js')), [this.#agent]); 68 | } 69 | 70 | async mainTask(proc: ChildProcess) { 71 | proc.on('message', (msg: IPCMessage) => { 72 | switch (msg.case) { 73 | case 'inferenceRequest': 74 | this.#inferenceTasks.push(this.#doInferenceTask(proc, msg.value)); 75 | } 76 | }); 77 | } 78 | 79 | async #doInferenceTask( 80 | proc: ChildProcess, 81 | req: { method: string; requestId: string; data: unknown }, 82 | ) { 83 | if (!this.#inferenceExecutor) { 84 | this.#logger.warn('inference request received but no inference executor'); 85 | proc.send({ 86 | case: 'inferenceResponse', 87 | value: { requestId: req.requestId, error: new Error('no inference executor') }, 88 | }); 89 | return; 90 | } 91 | 92 | try { 93 | const data = await this.#inferenceExecutor.doInference(req.method, req.data); 94 | proc.send({ case: 'inferenceResponse', value: { requestId: req.requestId, data } }); 95 | } catch (error) { 96 | proc.send({ case: 'inferenceResponse', value: { requestId: req.requestId, error } }); 97 | } 98 | } 99 | 100 | async launchJob(info: RunningJobInfo) { 101 | if (this.#runningJob) { 102 | throw Error('process already has a running job'); 103 | } 104 | if (!this.init.done) { 105 | throw Error('process not initialized'); 106 | } 107 | this.#jobStatus = JobStatus.RUNNING; 108 | this.#runningJob = info; 109 | 110 | this.proc!.send({ case: 'startJobRequest', value: { runningJob: info } }); 111 | } 112 | } 113 | -------------------------------------------------------------------------------- /agents/src/ipc/message.ts: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: 2024 LiveKit, Inc. 2 | // 3 | // SPDX-License-Identifier: Apache-2.0 4 | import type { RunningJobInfo } from '../job.js'; 5 | import type { LoggerOptions } from '../log.js'; 6 | 7 | export type IPCMessage = 8 | | { 9 | case: 'initializeRequest'; 10 | value: { 11 | loggerOptions: LoggerOptions; 12 | pingInterval?: number; 13 | pingTimeout?: number; 14 | highPingThreshold?: number; 15 | }; 16 | } 17 | | { 18 | case: 'initializeResponse'; 19 | value: undefined; 20 | } 21 | | { 22 | case: 'pingRequest'; 23 | value: { timestamp: number }; 24 | } 25 | | { 26 | case: 'pongResponse'; 27 | value: { lastTimestamp: number; timestamp: number }; 28 | } 29 | | { 30 | case: 'startJobRequest'; 31 | value: { runningJob: RunningJobInfo }; 32 | } 33 | | { 34 | case: 'shutdownRequest'; 35 | value: { reason?: string }; 36 | } 37 | | { 38 | case: 'inferenceRequest'; 39 | value: { method: string; requestId: string; data: unknown }; 40 | } 41 | | { 42 | case: 'inferenceResponse'; 43 | value: { requestId: string; data: unknown; error?: Error }; 44 | } 45 | | { 46 | case: 'exiting'; 47 | value: { reason?: string }; 48 | } 49 | | { 50 | case: 'done'; 51 | value: undefined; 52 | }; 53 | -------------------------------------------------------------------------------- /agents/src/ipc/proc_pool.ts: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: 2024 LiveKit, Inc. 2 | // 3 | // SPDX-License-Identifier: Apache-2.0 4 | import { MultiMutex, Mutex } from '@livekit/mutex'; 5 | import type { RunningJobInfo } from '../job.js'; 6 | import { Queue } from '../utils.js'; 7 | import type { InferenceExecutor } from './inference_executor.js'; 8 | import type { JobExecutor } from './job_executor.js'; 9 | import { JobProcExecutor } from './job_proc_executor.js'; 10 | 11 | export class ProcPool { 12 | agent: string; 13 | initializeTimeout: number; 14 | closeTimeout: number; 15 | executors: JobExecutor[] = []; 16 | tasks: Promise[] = []; 17 | started = false; 18 | closed = false; 19 | controller = new AbortController(); 20 | initMutex = new Mutex(); 21 | procMutex?: MultiMutex; 22 | procUnlock?: () => void; 23 | warmedProcQueue = new Queue(); 24 | inferenceExecutor?: InferenceExecutor; 25 | memoryWarnMB: number; 26 | memoryLimitMB: number; 27 | 28 | constructor( 29 | agent: string, 30 | numIdleProcesses: number, 31 | initializeTimeout: number, 32 | closeTimeout: number, 33 | inferenceExecutor: InferenceExecutor | undefined, 34 | memoryWarnMB: number, 35 | memoryLimitMB: number, 36 | ) { 37 | this.agent = agent; 38 | if (numIdleProcesses > 0) { 39 | this.procMutex = new MultiMutex(numIdleProcesses); 40 | } 41 | this.initializeTimeout = initializeTimeout; 42 | this.closeTimeout = closeTimeout; 43 | this.inferenceExecutor = inferenceExecutor; 44 | this.memoryWarnMB = memoryWarnMB; 45 | this.memoryLimitMB = memoryLimitMB; 46 | } 47 | 48 | get processes(): JobExecutor[] { 49 | return this.executors; 50 | } 51 | 52 | getByJobId(id: string): JobExecutor | null { 53 | return this.executors.find((x) => x.runningJob && x.runningJob.job.id === id) || null; 54 | } 55 | 56 | async launchJob(info: RunningJobInfo) { 57 | let proc: JobExecutor; 58 | if (this.procMutex) { 59 | proc = await this.warmedProcQueue.get(); 60 | if (this.procUnlock) { 61 | this.procUnlock(); 62 | this.procUnlock = undefined; 63 | } 64 | } else { 65 | proc = new JobProcExecutor( 66 | this.agent, 67 | this.inferenceExecutor, 68 | this.initializeTimeout, 69 | this.closeTimeout, 70 | this.memoryWarnMB, 71 | this.memoryLimitMB, 72 | 2500, 73 | 60000, 74 | 500, 75 | ); 76 | this.executors.push(proc); 77 | await proc.start(); 78 | await proc.initialize(); 79 | } 80 | await proc.launchJob(info); 81 | } 82 | 83 | async procWatchTask() { 84 | const proc = new JobProcExecutor( 85 | this.agent, 86 | this.inferenceExecutor, 87 | this.initializeTimeout, 88 | this.closeTimeout, 89 | this.memoryWarnMB, 90 | this.memoryLimitMB, 91 | 2500, 92 | 60000, 93 | 500, 94 | ); 95 | 96 | try { 97 | this.executors.push(proc); 98 | 99 | const unlock = await this.initMutex.lock(); 100 | if (this.closed) { 101 | return; 102 | } 103 | 104 | await proc.start(); 105 | try { 106 | await proc.initialize(); 107 | await this.warmedProcQueue.put(proc); 108 | } catch { 109 | if (this.procUnlock) { 110 | this.procUnlock(); 111 | this.procUnlock = undefined; 112 | } 113 | } 114 | 115 | unlock(); 116 | await proc.join(); 117 | } finally { 118 | this.executors.splice(this.executors.indexOf(proc)); 119 | } 120 | } 121 | 122 | start() { 123 | if (this.started) { 124 | return; 125 | } 126 | 127 | this.started = true; 128 | this.run(this.controller.signal); 129 | } 130 | 131 | async run(signal: AbortSignal) { 132 | if (this.procMutex) { 133 | while (!signal.aborted) { 134 | this.procUnlock = await this.procMutex.lock(); 135 | const task = this.procWatchTask(); 136 | this.tasks.push(task); 137 | task.finally(() => this.tasks.splice(this.tasks.indexOf(task))); 138 | } 139 | } 140 | } 141 | 142 | async close() { 143 | if (!this.started) { 144 | return; 145 | } 146 | this.closed = true; 147 | this.controller.abort(); 148 | this.warmedProcQueue.items.forEach((e) => e.close()); 149 | this.executors.forEach((e) => e.close()); 150 | await Promise.allSettled(this.tasks); 151 | } 152 | } 153 | -------------------------------------------------------------------------------- /agents/src/llm/chat_context.ts: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: 2024 LiveKit, Inc. 2 | // 3 | // SPDX-License-Identifier: Apache-2.0 4 | import type { AudioFrame, VideoFrame } from '@livekit/rtc-node'; 5 | import type { CallableFunctionResult, FunctionCallInfo } from './function_context.js'; 6 | 7 | export enum ChatRole { 8 | SYSTEM, 9 | USER, 10 | ASSISTANT, 11 | TOOL, 12 | } 13 | 14 | export interface ChatImage { 15 | image: string | VideoFrame; 16 | inferenceWidth?: number; 17 | inferenceHeight?: number; 18 | /** 19 | * @internal 20 | * Used by LLM implementations to store a processed version of the image for later use. 21 | */ 22 | cache: { [id: string | number | symbol]: any }; 23 | } 24 | 25 | export interface ChatAudio { 26 | frame: AudioFrame | AudioFrame[]; 27 | } 28 | 29 | export type ChatContent = string | ChatImage | ChatAudio; 30 | 31 | const defaultCreateChatMessage = { 32 | text: '', 33 | images: [], 34 | role: ChatRole.SYSTEM, 35 | }; 36 | 37 | export class ChatMessage { 38 | readonly role: ChatRole; 39 | readonly id?: string; 40 | readonly name?: string; 41 | readonly content?: ChatContent | ChatContent[]; 42 | readonly toolCalls?: FunctionCallInfo[]; 43 | readonly toolCallId?: string; 44 | readonly toolException?: Error; 45 | 46 | /** @internal */ 47 | constructor({ 48 | role, 49 | id, 50 | name, 51 | content, 52 | toolCalls, 53 | toolCallId, 54 | toolException, 55 | }: { 56 | role: ChatRole; 57 | id?: string; 58 | name?: string; 59 | content?: ChatContent | ChatContent[]; 60 | toolCalls?: FunctionCallInfo[]; 61 | toolCallId?: string; 62 | toolException?: Error; 63 | }) { 64 | this.role = role; 65 | this.id = id; 66 | this.name = name; 67 | this.content = content; 68 | this.toolCalls = toolCalls; 69 | this.toolCallId = toolCallId; 70 | this.toolException = toolException; 71 | } 72 | 73 | static createToolFromFunctionResult(func: CallableFunctionResult): ChatMessage { 74 | if (!func.result && !func.error) { 75 | throw new TypeError('CallableFunctionResult must include result or error'); 76 | } 77 | 78 | return new ChatMessage({ 79 | role: ChatRole.TOOL, 80 | name: func.name, 81 | content: func.result || `Error: ${func.error}`, 82 | toolCallId: func.toolCallId, 83 | toolException: func.error, 84 | }); 85 | } 86 | 87 | static createToolCalls(toolCalls: FunctionCallInfo[], text = '') { 88 | return new ChatMessage({ 89 | role: ChatRole.ASSISTANT, 90 | toolCalls, 91 | content: text, 92 | }); 93 | } 94 | 95 | static create( 96 | options: Partial<{ 97 | text?: string; 98 | images: ChatImage[]; 99 | role: ChatRole; 100 | }>, 101 | ): ChatMessage { 102 | const { text, images, role } = { ...defaultCreateChatMessage, ...options }; 103 | 104 | if (!images.length) { 105 | return new ChatMessage({ 106 | role, 107 | content: text, 108 | }); 109 | } else { 110 | return new ChatMessage({ 111 | role, 112 | content: [...(text ? [text] : []), ...images], 113 | }); 114 | } 115 | } 116 | 117 | /** Returns a structured clone of this message. */ 118 | copy(): ChatMessage { 119 | return new ChatMessage({ 120 | role: this.role, 121 | id: this.id, 122 | name: this.name, 123 | content: this.content, 124 | toolCalls: this.toolCalls, 125 | toolCallId: this.toolCallId, 126 | toolException: this.toolException, 127 | }); 128 | } 129 | } 130 | 131 | export class ChatContext { 132 | messages: ChatMessage[] = []; 133 | metadata: { [id: string]: any } = {}; 134 | 135 | append(msg: { text?: string; images?: ChatImage[]; role: ChatRole }): ChatContext { 136 | this.messages.push(ChatMessage.create(msg)); 137 | return this; 138 | } 139 | 140 | /** Returns a structured clone of this context. */ 141 | copy(): ChatContext { 142 | const ctx = new ChatContext(); 143 | ctx.messages.push(...this.messages.map((msg) => msg.copy())); 144 | ctx.metadata = structuredClone(this.metadata); 145 | return ctx; 146 | } 147 | } 148 | -------------------------------------------------------------------------------- /agents/src/llm/function_context.ts: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: 2024 LiveKit, Inc. 2 | // 3 | // SPDX-License-Identifier: Apache-2.0 4 | import { z } from 'zod'; 5 | 6 | // heavily inspired by Vercel AI's `tool()`: 7 | // https://github.com/vercel/ai/blob/3b0983b/packages/ai/core/tool/tool.ts 8 | 9 | /* eslint-disable @typescript-eslint/no-explicit-any */ 10 | 11 | /** Type reinforcement for the callable function's execute parameters. */ 12 | export type inferParameters

= z.infer

; 13 | 14 | /** Raw OpenAI-adherent function parameters. */ 15 | export type OpenAIFunctionParameters = { 16 | type: 'object'; 17 | properties: { [id: string]: any }; 18 | required: string[]; 19 | additionalProperties: boolean; 20 | }; 21 | 22 | /** A definition for a function callable by the LLM. */ 23 | export interface CallableFunction

{ 24 | description: string; 25 | parameters: OpenAIFunctionParameters | P; 26 | execute: (args: inferParameters

) => PromiseLike; 27 | } 28 | 29 | /** A function that has been called but is not yet running */ 30 | export interface FunctionCallInfo

{ 31 | name: string; 32 | func: CallableFunction; 33 | toolCallId: string; 34 | rawParams: string; 35 | params: inferParameters

; 36 | task?: PromiseLike; 37 | } 38 | 39 | /** The result of a ran FunctionCallInfo. */ 40 | export interface CallableFunctionResult { 41 | name: string; 42 | toolCallId: string; 43 | result?: any; 44 | error?: any; 45 | } 46 | 47 | /** An object containing callable functions and their names */ 48 | export type FunctionContext = { 49 | [name: string]: CallableFunction; 50 | }; 51 | 52 | // XXX: Zod is victim to the dual-package hazard. this is a hacky sorta-fix 53 | // until Zod v4.0.0 is released. 54 | // https://github.com/colinhacks/zod/issues/2241#issuecomment-2142688925 55 | const looksLikeInstanceof = (value: unknown, target: new (...args: any[]) => T): value is T => { 56 | let current = value?.constructor; 57 | do { 58 | if (current?.name === target.name) return true; 59 | // eslint-disable-next-line @typescript-eslint/ban-types 60 | current = Object.getPrototypeOf(current) as Function; 61 | } while (current?.name); 62 | return false; 63 | }; 64 | 65 | /** @internal */ 66 | export const oaiParams = (p: z.AnyZodObject) => { 67 | const properties: Record = {}; 68 | const requiredProperties: string[] = []; 69 | 70 | const processZodType = (field: z.ZodTypeAny): any => { 71 | const isOptional = field instanceof z.ZodOptional; 72 | const nestedField = isOptional ? field._def.innerType : field; 73 | const description = field._def.description; 74 | 75 | if (looksLikeInstanceof(nestedField, z.ZodEnum)) { 76 | return { 77 | type: typeof nestedField._def.values[0], 78 | ...(description && { description }), 79 | enum: nestedField._def.values, 80 | }; 81 | } else if (looksLikeInstanceof(nestedField, z.ZodArray)) { 82 | const elementType = nestedField._def.type; 83 | return { 84 | type: 'array', 85 | ...(description && { description }), 86 | items: processZodType(elementType), 87 | }; 88 | } else if (looksLikeInstanceof(nestedField, z.ZodObject)) { 89 | const { properties, required } = oaiParams(nestedField); 90 | return { 91 | type: 'object', 92 | ...(description && { description }), 93 | properties, 94 | required, 95 | }; 96 | } else { 97 | let type = nestedField._def.typeName.toLowerCase(); 98 | type = type.includes('zod') ? type.substring(3) : type; 99 | return { 100 | type, 101 | ...(description && { description }), 102 | }; 103 | } 104 | }; 105 | 106 | for (const key in p.shape) { 107 | const field = p.shape[key]; 108 | properties[key] = processZodType(field); 109 | 110 | if (!(field instanceof z.ZodOptional)) { 111 | requiredProperties.push(key); 112 | } 113 | } 114 | 115 | const type = 'object' as const; 116 | return { 117 | type, 118 | properties, 119 | required: requiredProperties, 120 | }; 121 | }; 122 | 123 | /** @internal */ 124 | export const oaiBuildFunctionInfo = ( 125 | fncCtx: FunctionContext, 126 | toolCallId: string, 127 | fncName: string, 128 | rawArgs: string, 129 | ): FunctionCallInfo => { 130 | const func = fncCtx[fncName]; 131 | if (!func) { 132 | throw new Error(`AI function ${fncName} not found`); 133 | } 134 | 135 | return { 136 | name: fncName, 137 | func, 138 | toolCallId, 139 | rawParams: rawArgs, 140 | params: JSON.parse(rawArgs), 141 | }; 142 | }; 143 | -------------------------------------------------------------------------------- /agents/src/llm/index.ts: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: 2024 LiveKit, Inc. 2 | // 3 | // SPDX-License-Identifier: Apache-2.0 4 | export { 5 | type CallableFunction, 6 | type FunctionCallInfo, 7 | type CallableFunctionResult, 8 | type FunctionContext, 9 | type inferParameters, 10 | oaiParams, 11 | oaiBuildFunctionInfo, 12 | } from './function_context.js'; 13 | 14 | export { 15 | type ChatImage, 16 | type ChatAudio, 17 | type ChatContent, 18 | ChatRole, 19 | ChatMessage, 20 | ChatContext, 21 | } from './chat_context.js'; 22 | 23 | export { 24 | type ChoiceDelta, 25 | type CompletionUsage, 26 | type Choice, 27 | type ChatChunk, 28 | type LLMCallbacks, 29 | LLMEvent, 30 | LLM, 31 | LLMStream, 32 | } from './llm.js'; 33 | -------------------------------------------------------------------------------- /agents/src/llm/llm.ts: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: 2024 LiveKit, Inc. 2 | // 3 | // SPDX-License-Identifier: Apache-2.0 4 | import type { TypedEventEmitter as TypedEmitter } from '@livekit/typed-emitter'; 5 | import { EventEmitter } from 'node:events'; 6 | import type { LLMMetrics } from '../metrics/base.js'; 7 | import { AsyncIterableQueue } from '../utils.js'; 8 | import type { ChatContext, ChatRole } from './chat_context.js'; 9 | import type { FunctionCallInfo, FunctionContext } from './function_context.js'; 10 | 11 | export interface ChoiceDelta { 12 | role: ChatRole; 13 | content?: string; 14 | toolCalls?: FunctionCallInfo[]; 15 | } 16 | 17 | export interface CompletionUsage { 18 | completionTokens: number; 19 | promptTokens: number; 20 | totalTokens: number; 21 | } 22 | 23 | export interface Choice { 24 | delta: ChoiceDelta; 25 | index: number; 26 | } 27 | 28 | export interface ChatChunk { 29 | requestId: string; 30 | choices: Choice[]; 31 | usage?: CompletionUsage; 32 | } 33 | 34 | export enum LLMEvent { 35 | METRICS_COLLECTED, 36 | } 37 | 38 | export type LLMCallbacks = { 39 | [LLMEvent.METRICS_COLLECTED]: (metrics: LLMMetrics) => void; 40 | }; 41 | 42 | export abstract class LLM extends (EventEmitter as new () => TypedEmitter) { 43 | /** 44 | * Returns a {@link LLMStream} that can be used to push text and receive LLM responses. 45 | */ 46 | abstract chat({ 47 | chatCtx, 48 | fncCtx, 49 | temperature, 50 | n, 51 | parallelToolCalls, 52 | }: { 53 | chatCtx: ChatContext; 54 | fncCtx?: FunctionContext; 55 | temperature?: number; 56 | n?: number; 57 | parallelToolCalls?: boolean; 58 | }): LLMStream; 59 | } 60 | 61 | export abstract class LLMStream implements AsyncIterableIterator { 62 | protected output = new AsyncIterableQueue(); 63 | protected queue = new AsyncIterableQueue(); 64 | protected closed = false; 65 | protected _functionCalls: FunctionCallInfo[] = []; 66 | abstract label: string; 67 | 68 | #llm: LLM; 69 | #chatCtx: ChatContext; 70 | #fncCtx?: FunctionContext; 71 | 72 | constructor(llm: LLM, chatCtx: ChatContext, fncCtx?: FunctionContext) { 73 | this.#llm = llm; 74 | this.#chatCtx = chatCtx; 75 | this.#fncCtx = fncCtx; 76 | this.monitorMetrics(); 77 | } 78 | 79 | protected async monitorMetrics() { 80 | const startTime = process.hrtime.bigint(); 81 | let ttft: bigint | undefined; 82 | let requestId = ''; 83 | let usage: CompletionUsage | undefined; 84 | 85 | for await (const ev of this.queue) { 86 | this.output.put(ev); 87 | requestId = ev.requestId; 88 | if (!ttft) { 89 | ttft = process.hrtime.bigint() - startTime; 90 | } 91 | if (ev.usage) { 92 | usage = ev.usage; 93 | } 94 | } 95 | this.output.close(); 96 | 97 | const duration = process.hrtime.bigint() - startTime; 98 | const metrics: LLMMetrics = { 99 | timestamp: Date.now(), 100 | requestId, 101 | ttft: Math.trunc(Number(ttft! / BigInt(1000000))), 102 | duration: Math.trunc(Number(duration / BigInt(1000000))), 103 | cancelled: false, // XXX(nbsp) 104 | label: this.label, 105 | completionTokens: usage?.completionTokens || 0, 106 | promptTokens: usage?.promptTokens || 0, 107 | totalTokens: usage?.totalTokens || 0, 108 | tokensPerSecond: 109 | (usage?.completionTokens || 0) / Math.trunc(Number(duration / BigInt(1000000000))), 110 | }; 111 | this.#llm.emit(LLMEvent.METRICS_COLLECTED, metrics); 112 | } 113 | 114 | /** List of called functions from this stream. */ 115 | get functionCalls(): FunctionCallInfo[] { 116 | return this._functionCalls; 117 | } 118 | 119 | /** The function context of this stream. */ 120 | get fncCtx(): FunctionContext | undefined { 121 | return this.#fncCtx; 122 | } 123 | 124 | /** The initial chat context of this stream. */ 125 | get chatCtx(): ChatContext { 126 | return this.#chatCtx; 127 | } 128 | 129 | /** Execute all deferred functions of this stream concurrently. */ 130 | executeFunctions(): FunctionCallInfo[] { 131 | this._functionCalls.forEach( 132 | (f) => 133 | (f.task = f.func.execute(f.params).then( 134 | (result) => ({ name: f.name, toolCallId: f.toolCallId, result }), 135 | (error) => ({ name: f.name, toolCallId: f.toolCallId, error }), 136 | )), 137 | ); 138 | return this._functionCalls; 139 | } 140 | 141 | next(): Promise> { 142 | return this.output.next(); 143 | } 144 | 145 | close() { 146 | this.output.close(); 147 | this.queue.close(); 148 | this.closed = true; 149 | } 150 | 151 | [Symbol.asyncIterator](): LLMStream { 152 | return this; 153 | } 154 | } 155 | -------------------------------------------------------------------------------- /agents/src/log.ts: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: 2024 LiveKit, Inc. 2 | // 3 | // SPDX-License-Identifier: Apache-2.0 4 | import type { Logger } from 'pino'; 5 | import { pino } from 'pino'; 6 | 7 | /** @internal */ 8 | export type LoggerOptions = { 9 | pretty: boolean; 10 | level?: string; 11 | }; 12 | 13 | /** @internal */ 14 | export let loggerOptions: LoggerOptions; 15 | 16 | /** @internal */ 17 | let logger: Logger | undefined = undefined; 18 | 19 | /** @internal */ 20 | export const log = () => { 21 | if (!logger) { 22 | throw new TypeError('logger not initialized. did you forget to run initializeLogger()?'); 23 | } 24 | return logger; 25 | }; 26 | 27 | /** @internal */ 28 | export const initializeLogger = ({ pretty, level }: LoggerOptions) => { 29 | loggerOptions = { pretty, level }; 30 | logger = pino( 31 | pretty 32 | ? { 33 | transport: { 34 | target: 'pino-pretty', 35 | options: { 36 | colorize: true, 37 | }, 38 | }, 39 | } 40 | : {}, 41 | ); 42 | if (level) { 43 | logger.level = level; 44 | } 45 | }; 46 | -------------------------------------------------------------------------------- /agents/src/metrics/base.ts: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: 2024 LiveKit, Inc. 2 | // 3 | // SPDX-License-Identifier: Apache-2.0 4 | 5 | export interface LLMMetrics { 6 | requestId: string; 7 | timestamp: number; 8 | ttft: number; 9 | duration: number; 10 | label: string; 11 | cancelled: boolean; 12 | completionTokens: number; 13 | promptTokens: number; 14 | totalTokens: number; 15 | tokensPerSecond: number; 16 | error?: Error; 17 | } 18 | 19 | export interface STTMetrics { 20 | requestId: string; 21 | timestamp: number; 22 | duration: number; 23 | label: string; 24 | audioDuration: number; 25 | streamed: boolean; 26 | error?: Error; 27 | } 28 | 29 | export interface TTSMetrics { 30 | requestId: string; 31 | timestamp: number; 32 | ttfb: number; 33 | duration: number; 34 | label: string; 35 | audioDuration: number; 36 | cancelled: boolean; 37 | charactersCount: number; 38 | streamed: boolean; 39 | error?: Error; 40 | } 41 | 42 | export interface VADMetrics { 43 | timestamp: number; 44 | idleTime: number; 45 | inferenceDurationTotal: number; 46 | inferenceCount: number; 47 | label: string; 48 | } 49 | 50 | export interface PipelineEOUMetrics { 51 | /** 52 | * Unique identifier shared across different metrics to combine related STT, LLM, and TTS metrics 53 | */ 54 | sequenceId: string; 55 | /** Timestamp of when the event was recorded */ 56 | timestamp: number; 57 | /** Amount of time between the end of speech from VAD and the decision to end the user's turn */ 58 | endOfUtteranceDelay: number; 59 | /** 60 | * Time taken to obtain the transcript after the end of the user's speech. 61 | * 62 | * @remarks 63 | * May be 0 if the transcript was already available. 64 | */ 65 | transcriptionDelay: number; 66 | } 67 | 68 | export interface PipelineLLMMetrics extends LLMMetrics { 69 | /** 70 | * Unique identifier shared across different metrics to combine related STT, LLM, and TTS metrics 71 | */ 72 | sequenceId: string; 73 | } 74 | 75 | export interface PipelineTTSMetrics extends TTSMetrics { 76 | /** 77 | * Unique identifier shared across different metrics to combine related STT, LLM, and TTS metrics 78 | */ 79 | sequenceId: string; 80 | } 81 | 82 | export type PipelineSTTMetrics = STTMetrics; 83 | export type PipelineVADMetrics = VADMetrics; 84 | 85 | export class MultimodalLLMError extends Error { 86 | type?: string; 87 | reason?: string; 88 | code?: string; 89 | constructor({ 90 | type, 91 | reason, 92 | code, 93 | message, 94 | }: { type?: string; reason?: string; code?: string; message?: string } = {}) { 95 | super(message); 96 | this.type = type; 97 | this.reason = reason; 98 | this.code = code; 99 | } 100 | } 101 | 102 | export interface MultimodalLLMMetrics extends LLMMetrics { 103 | inputTokenDetails: { 104 | cachedTokens: number; 105 | textTokens: number; 106 | audioTokens: number; 107 | }; 108 | outputTokenDetails: { 109 | textTokens: number; 110 | audioTokens: number; 111 | }; 112 | } 113 | 114 | export type AgentMetrics = 115 | | STTMetrics 116 | | LLMMetrics 117 | | TTSMetrics 118 | | VADMetrics 119 | | PipelineSTTMetrics 120 | | PipelineEOUMetrics 121 | | PipelineLLMMetrics 122 | | PipelineTTSMetrics 123 | | PipelineVADMetrics 124 | | MultimodalLLMMetrics; 125 | -------------------------------------------------------------------------------- /agents/src/metrics/index.ts: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: 2024 LiveKit, Inc. 2 | // 3 | // SPDX-License-Identifier: Apache-2.0 4 | 5 | export type { 6 | AgentMetrics, 7 | STTMetrics, 8 | LLMMetrics, 9 | TTSMetrics, 10 | VADMetrics, 11 | PipelineSTTMetrics, 12 | PipelineEOUMetrics, 13 | PipelineLLMMetrics, 14 | PipelineTTSMetrics, 15 | PipelineVADMetrics, 16 | MultimodalLLMMetrics, 17 | } from './base.js'; 18 | export { MultimodalLLMError } from './base.js'; 19 | export { type UsageSummary, UsageCollector } from './usage_collector.js'; 20 | export { logMetrics } from './utils.js'; 21 | -------------------------------------------------------------------------------- /agents/src/metrics/usage_collector.ts: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: 2024 LiveKit, Inc. 2 | // 3 | // SPDX-License-Identifier: Apache-2.0 4 | import type { AgentMetrics } from './base.js'; 5 | import { isLLMMetrics, isSTTMetrics, isTTSMetrics } from './utils.js'; 6 | 7 | export interface UsageSummary { 8 | llmPromptTokens: number; 9 | llmCompletionTokens: number; 10 | ttsCharactersCount: number; 11 | sttAudioDuration: number; 12 | } 13 | 14 | export class UsageCollector { 15 | #summary: UsageSummary; 16 | 17 | constructor() { 18 | this.#summary = { 19 | llmPromptTokens: 0, 20 | llmCompletionTokens: 0, 21 | ttsCharactersCount: 0, 22 | sttAudioDuration: 0, 23 | }; 24 | } 25 | 26 | collect(metrics: AgentMetrics) { 27 | if (isLLMMetrics(metrics)) { 28 | this.#summary.llmPromptTokens += metrics.promptTokens; 29 | this.#summary.llmCompletionTokens += metrics.completionTokens; 30 | } else if (isTTSMetrics(metrics)) { 31 | this.#summary.ttsCharactersCount += metrics.charactersCount; 32 | } else if (isSTTMetrics(metrics)) { 33 | this.#summary.sttAudioDuration += metrics.audioDuration; 34 | } 35 | } 36 | 37 | get summary(): UsageSummary { 38 | return { ...this.#summary }; 39 | } 40 | } 41 | -------------------------------------------------------------------------------- /agents/src/metrics/utils.ts: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: 2024 LiveKit, Inc. 2 | // 3 | // SPDX-License-Identifier: Apache-2.0 4 | import { log } from '../log.js'; 5 | import type { 6 | AgentMetrics, 7 | LLMMetrics, 8 | PipelineEOUMetrics, 9 | PipelineLLMMetrics, 10 | PipelineTTSMetrics, 11 | STTMetrics, 12 | TTSMetrics, 13 | VADMetrics, 14 | } from './base.js'; 15 | 16 | export const logMetrics = (metrics: AgentMetrics) => { 17 | const logger = log(); 18 | if (isPipelineLLMMetrics(metrics)) { 19 | logger 20 | .child({ 21 | sequenceId: metrics.sequenceId, 22 | ttft: metrics.ttft, 23 | inputTokens: metrics.promptTokens, 24 | outputTokens: metrics.completionTokens, 25 | tokensPerSecond: metrics.tokensPerSecond, 26 | }) 27 | .info('Pipeline LLM metrics'); 28 | } else if (isLLMMetrics(metrics)) { 29 | logger 30 | .child({ 31 | ttft: metrics.ttft, 32 | inputTokens: metrics.promptTokens, 33 | outputTokens: metrics.completionTokens, 34 | tokensPerSecond: metrics.tokensPerSecond, 35 | }) 36 | .info('LLM metrics'); 37 | } else if (isPipelineTTSMetrics(metrics)) { 38 | logger 39 | .child({ 40 | sequenceId: metrics.sequenceId, 41 | ttfb: metrics.ttfb, 42 | audioDuration: metrics.audioDuration, 43 | }) 44 | .info('Pipeline TTS metrics'); 45 | } else if (isTTSMetrics(metrics)) { 46 | logger 47 | .child({ 48 | ttfb: metrics.ttfb, 49 | audioDuration: metrics.audioDuration, 50 | }) 51 | .info('TTS metrics'); 52 | } else if (isPipelineEOUMetrics(metrics)) { 53 | logger 54 | .child({ 55 | sequenceId: metrics.sequenceId, 56 | endOfUtteranceDelay: metrics.endOfUtteranceDelay, 57 | transcriptionDelay: metrics.transcriptionDelay, 58 | }) 59 | .info('Pipeline EOU metrics'); 60 | } else if (isSTTMetrics(metrics)) { 61 | logger 62 | .child({ 63 | audioDuration: metrics.audioDuration, 64 | }) 65 | .info('STT metrics'); 66 | } 67 | }; 68 | 69 | export const isLLMMetrics = (metrics: AgentMetrics): metrics is LLMMetrics => { 70 | return !!(metrics as LLMMetrics).ttft; 71 | }; 72 | 73 | export const isPipelineLLMMetrics = (metrics: AgentMetrics): metrics is PipelineLLMMetrics => { 74 | return isLLMMetrics(metrics) && !!(metrics as PipelineLLMMetrics).sequenceId; 75 | }; 76 | 77 | export const isVADMetrics = (metrics: AgentMetrics): metrics is VADMetrics => { 78 | return !!(metrics as VADMetrics).inferenceCount; 79 | }; 80 | 81 | export const isPipelineEOUMetrics = (metrics: AgentMetrics): metrics is PipelineEOUMetrics => { 82 | return !!(metrics as PipelineEOUMetrics).endOfUtteranceDelay; 83 | }; 84 | 85 | export const isTTSMetrics = (metrics: AgentMetrics): metrics is TTSMetrics => { 86 | return !!(metrics as TTSMetrics).ttfb; 87 | }; 88 | 89 | export const isPipelineTTSMetrics = (metrics: AgentMetrics): metrics is PipelineTTSMetrics => { 90 | return isTTSMetrics(metrics) && !!(metrics as PipelineTTSMetrics).sequenceId; 91 | }; 92 | 93 | export const isSTTMetrics = (metrics: AgentMetrics): metrics is STTMetrics => { 94 | return !( 95 | isLLMMetrics(metrics) || 96 | isVADMetrics(metrics) || 97 | isPipelineEOUMetrics(metrics) || 98 | isTTSMetrics(metrics) 99 | ); 100 | }; 101 | -------------------------------------------------------------------------------- /agents/src/multimodal/index.ts: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: 2024 LiveKit, Inc. 2 | // 3 | // SPDX-License-Identifier: Apache-2.0 4 | export * from './multimodal_agent.js'; 5 | export * from './agent_playout.js'; 6 | -------------------------------------------------------------------------------- /agents/src/pipeline/index.ts: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: 2024 LiveKit, Inc. 2 | // 3 | // SPDX-License-Identifier: Apache-2.0 4 | 5 | export { 6 | type AgentState, 7 | type BeforeTTSCallback, 8 | type BeforeLLMCallback, 9 | type VPACallbacks, 10 | type AgentTranscriptionOptions, 11 | type VPAOptions, 12 | VPAEvent, 13 | VoicePipelineAgent, 14 | AgentCallContext, 15 | } from './pipeline_agent.js'; 16 | -------------------------------------------------------------------------------- /agents/src/pipeline/speech_handle.ts: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: 2024 LiveKit, Inc. 2 | // 3 | // SPDX-License-Identifier: Apache-2.0 4 | import { randomUUID } from 'crypto'; 5 | import type { ChatMessage, LLMStream } from '../llm/index.js'; 6 | import { AsyncIterableQueue, Future } from '../utils.js'; 7 | import type { SynthesisHandle } from './agent_output.js'; 8 | 9 | export class SpeechHandle { 10 | #id: string; 11 | #allowInterruptions: boolean; 12 | #addToChatCtx: boolean; 13 | #isReply: boolean; 14 | #userQuestion: string; 15 | #userCommitted = false; 16 | #initFut = new Future(); 17 | #doneFut = new Future(); 18 | #speechCommitted = false; 19 | #source?: string | LLMStream | AsyncIterable; 20 | #synthesisHandle?: SynthesisHandle; 21 | #initialized = false; 22 | #fncNestedDepth: number; 23 | #fncExtraToolsMesages?: ChatMessage[]; 24 | #nestedSpeechHandles: SpeechHandle[] = []; 25 | #nestedSpeechChanged = new AsyncIterableQueue(); 26 | #nestedSpeechFinished = false; 27 | 28 | constructor( 29 | id: string, 30 | allowInterruptions: boolean, 31 | addToChatCtx: boolean, 32 | isReply: boolean, 33 | userQuestion: string, 34 | fncNestedDepth = 0, 35 | extraToolsMessages: ChatMessage[] | undefined = undefined, 36 | ) { 37 | this.#id = id; 38 | this.#allowInterruptions = allowInterruptions; 39 | this.#addToChatCtx = addToChatCtx; 40 | this.#isReply = isReply; 41 | this.#userQuestion = userQuestion; 42 | this.#fncNestedDepth = fncNestedDepth; 43 | this.#fncExtraToolsMesages = extraToolsMessages; 44 | } 45 | 46 | static createAssistantReply( 47 | allowInterruptions: boolean, 48 | addToChatCtx: boolean, 49 | userQuestion: string, 50 | ): SpeechHandle { 51 | return new SpeechHandle(randomUUID(), allowInterruptions, addToChatCtx, true, userQuestion); 52 | } 53 | 54 | static createAssistantSpeech(allowInterruptions: boolean, addToChatCtx: boolean): SpeechHandle { 55 | return new SpeechHandle(randomUUID(), allowInterruptions, addToChatCtx, false, ''); 56 | } 57 | 58 | static createToolSpeech( 59 | allowInterruptions: boolean, 60 | addToChatCtx: boolean, 61 | fncNestedDepth: number, 62 | extraToolsMessages: ChatMessage[], 63 | ): SpeechHandle { 64 | return new SpeechHandle( 65 | randomUUID(), 66 | allowInterruptions, 67 | addToChatCtx, 68 | false, 69 | '', 70 | fncNestedDepth, 71 | extraToolsMessages, 72 | ); 73 | } 74 | 75 | async waitForInitialization() { 76 | await this.#initFut.await; 77 | } 78 | 79 | initialize(source: string | LLMStream | AsyncIterable, synthesisHandle: SynthesisHandle) { 80 | if (this.interrupted) { 81 | throw new Error('speech was interrupted'); 82 | } 83 | 84 | this.#source = source; 85 | this.#synthesisHandle = synthesisHandle; 86 | this.#initialized = true; 87 | this.#initFut.resolve(); 88 | } 89 | 90 | markUserCommitted() { 91 | this.#userCommitted = true; 92 | } 93 | 94 | markSpeechCommitted() { 95 | this.#speechCommitted = true; 96 | } 97 | 98 | get userCommitted(): boolean { 99 | return this.#userCommitted; 100 | } 101 | 102 | get speechCommitted(): boolean { 103 | return this.#speechCommitted; 104 | } 105 | 106 | get id(): string { 107 | return this.#id; 108 | } 109 | 110 | get allowInterruptions(): boolean { 111 | return this.#allowInterruptions; 112 | } 113 | 114 | get addToChatCtx(): boolean { 115 | return this.#addToChatCtx; 116 | } 117 | 118 | get source(): string | LLMStream | AsyncIterable { 119 | if (!this.#source) { 120 | throw new Error('speech not initialized'); 121 | } 122 | return this.#source; 123 | } 124 | 125 | get synthesisHandle(): SynthesisHandle { 126 | if (!this.#synthesisHandle) { 127 | throw new Error('speech not initialized'); 128 | } 129 | return this.#synthesisHandle; 130 | } 131 | 132 | set synthesisHandle(handle: SynthesisHandle) { 133 | this.#synthesisHandle = handle; 134 | } 135 | 136 | get initialized(): boolean { 137 | return this.#initialized; 138 | } 139 | 140 | get isReply(): boolean { 141 | return this.#isReply; 142 | } 143 | 144 | get userQuestion(): string { 145 | return this.#userQuestion; 146 | } 147 | 148 | get interrupted(): boolean { 149 | return !!this.#synthesisHandle?.interrupted; 150 | } 151 | 152 | get fncNestedDepth(): number { 153 | return this.#fncNestedDepth; 154 | } 155 | 156 | get extraToolsMessages(): ChatMessage[] | undefined { 157 | return this.#fncExtraToolsMesages; 158 | } 159 | 160 | addNestedSpeech(handle: SpeechHandle) { 161 | this.#nestedSpeechHandles.push(handle); 162 | this.#nestedSpeechChanged.put(); 163 | } 164 | 165 | get nestedSpeechHandles(): SpeechHandle[] { 166 | return this.#nestedSpeechHandles; 167 | } 168 | 169 | async nestedSpeechChanged() { 170 | await this.#nestedSpeechChanged.next(); 171 | } 172 | 173 | get nestedSpeechFinished(): boolean { 174 | return this.#nestedSpeechFinished; 175 | } 176 | 177 | markNestedSpeechFinished() { 178 | this.#nestedSpeechFinished = true; 179 | } 180 | 181 | join() { 182 | return this.#doneFut.await; 183 | } 184 | 185 | setDone() { 186 | this.#doneFut.resolve(); 187 | } 188 | 189 | interrupt() { 190 | if (!this.#allowInterruptions) { 191 | throw new Error('interruptions are not allowed'); 192 | } 193 | this.cancel(); 194 | } 195 | 196 | cancel() { 197 | this.#initFut.reject(new Error()); 198 | this.#nestedSpeechChanged.close(); 199 | this.#synthesisHandle?.interrupt(); 200 | } 201 | } 202 | -------------------------------------------------------------------------------- /agents/src/plugin.ts: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: 2024 LiveKit, Inc. 2 | // 3 | // SPDX-License-Identifier: Apache-2.0 4 | 5 | export abstract class Plugin { 6 | registeredPlugins: Plugin[] = []; 7 | #title: string; 8 | #version: string; 9 | 10 | constructor(title: string, version: string) { 11 | this.#title = title; 12 | this.#version = version; 13 | } 14 | 15 | public static registerPlugins(plugin: Plugin) { 16 | plugin.registeredPlugins.push(plugin); 17 | } 18 | 19 | abstract downloadFiles(): void; 20 | 21 | get title(): string { 22 | return this.#title; 23 | } 24 | 25 | get version(): string { 26 | return this.#version; 27 | } 28 | } 29 | -------------------------------------------------------------------------------- /agents/src/stt/index.ts: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: 2024 LiveKit, Inc. 2 | // 3 | // SPDX-License-Identifier: Apache-2.0 4 | 5 | export { 6 | type SpeechEvent, 7 | type SpeechData, 8 | type STTCapabilities, 9 | type RecognitionUsage, 10 | type STTCallbacks, 11 | SpeechEventType, 12 | STT, 13 | SpeechStream, 14 | } from './stt.js'; 15 | export { StreamAdapter, StreamAdapterWrapper } from './stream_adapter.js'; 16 | -------------------------------------------------------------------------------- /agents/src/stt/stream_adapter.ts: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: 2024 LiveKit, Inc. 2 | // 3 | // SPDX-License-Identifier: Apache-2.0 4 | import type { AudioFrame } from '@livekit/rtc-node'; 5 | import { log } from '../log.js'; 6 | import type { VAD, VADStream } from '../vad.js'; 7 | import { VADEventType } from '../vad.js'; 8 | import type { SpeechEvent } from './stt.js'; 9 | import { STT, SpeechEventType, SpeechStream } from './stt.js'; 10 | 11 | export class StreamAdapter extends STT { 12 | #stt: STT; 13 | #vad: VAD; 14 | label: string; 15 | 16 | constructor(stt: STT, vad: VAD) { 17 | super({ streaming: true, interimResults: false }); 18 | this.#stt = stt; 19 | this.#vad = vad; 20 | this.label = `stt.StreamAdapter<${this.#stt.label}>`; 21 | 22 | this.#stt.on(SpeechEventType.METRICS_COLLECTED, (metrics) => { 23 | this.emit(SpeechEventType.METRICS_COLLECTED, metrics); 24 | }); 25 | } 26 | 27 | _recognize(frame: AudioFrame): Promise { 28 | return this.#stt.recognize(frame); 29 | } 30 | 31 | stream(): StreamAdapterWrapper { 32 | return new StreamAdapterWrapper(this.#stt, this.#vad); 33 | } 34 | } 35 | 36 | export class StreamAdapterWrapper extends SpeechStream { 37 | #stt: STT; 38 | #vadStream: VADStream; 39 | label: string; 40 | 41 | constructor(stt: STT, vad: VAD) { 42 | super(stt); 43 | this.#stt = stt; 44 | this.#vadStream = vad.stream(); 45 | this.label = `stt.StreamAdapterWrapper<${this.#stt.label}>`; 46 | 47 | this.#run(); 48 | } 49 | 50 | async monitorMetrics() { 51 | return; // do nothing 52 | } 53 | 54 | async #run() { 55 | const forwardInput = async () => { 56 | for await (const input of this.input) { 57 | if (input === SpeechStream.FLUSH_SENTINEL) { 58 | this.#vadStream.flush(); 59 | } else { 60 | this.#vadStream.pushFrame(input); 61 | } 62 | } 63 | this.#vadStream.endInput(); 64 | }; 65 | 66 | const recognize = async () => { 67 | for await (const ev of this.#vadStream) { 68 | switch (ev.type) { 69 | case VADEventType.START_OF_SPEECH: 70 | this.output.put({ type: SpeechEventType.START_OF_SPEECH }); 71 | break; 72 | case VADEventType.END_OF_SPEECH: 73 | this.output.put({ type: SpeechEventType.END_OF_SPEECH }); 74 | 75 | try { 76 | const event = await this.#stt.recognize(ev.frames); 77 | if (!event.alternatives![0].text) { 78 | continue; 79 | } 80 | 81 | this.output.put(event); 82 | break; 83 | } catch (error) { 84 | let logger = log(); 85 | if (error instanceof Error) { 86 | logger = logger.child({ error: error.message }); 87 | } else { 88 | logger = logger.child({ error }); 89 | } 90 | logger.error(`${this.label}: provider recognize task failed`); 91 | continue; 92 | } 93 | } 94 | } 95 | }; 96 | 97 | Promise.all([forwardInput(), recognize()]); 98 | } 99 | } 100 | -------------------------------------------------------------------------------- /agents/src/tokenize/basic/basic.ts: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: 2024 LiveKit, Inc. 2 | // 3 | // SPDX-License-Identifier: Apache-2.0 4 | import { BufferedSentenceStream, BufferedWordStream } from '../token_stream.js'; 5 | import * as tokenizer from '../tokenizer.js'; 6 | import { hyphenator } from './hyphenator.js'; 7 | import { splitParagraphs } from './paragraph.js'; 8 | import { splitSentences } from './sentence.js'; 9 | import { splitWords } from './word.js'; 10 | 11 | interface TokenizerOptions { 12 | language: string; 13 | minSentenceLength: number; 14 | streamContextLength: number; 15 | } 16 | 17 | export class SentenceTokenizer extends tokenizer.SentenceTokenizer { 18 | #config: TokenizerOptions; 19 | 20 | constructor(language = 'en-US', minSentenceLength = 20, streamContextLength = 10) { 21 | super(); 22 | this.#config = { 23 | language, 24 | minSentenceLength, 25 | streamContextLength, 26 | }; 27 | } 28 | 29 | // eslint-disable-next-line @typescript-eslint/no-unused-vars 30 | tokenize(text: string, language?: string): string[] { 31 | return splitSentences(text, this.#config.minSentenceLength).map((tok) => tok[0]); 32 | } 33 | 34 | // eslint-disable-next-line @typescript-eslint/no-unused-vars 35 | stream(language?: string): tokenizer.SentenceStream { 36 | return new BufferedSentenceStream( 37 | (text: string) => splitSentences(text, this.#config.minSentenceLength), 38 | this.#config.minSentenceLength, 39 | this.#config.streamContextLength, 40 | ); 41 | } 42 | } 43 | 44 | export class WordTokenizer extends tokenizer.WordTokenizer { 45 | #ignorePunctuation: boolean; 46 | 47 | constructor(ignorePunctuation = true) { 48 | super(); 49 | this.#ignorePunctuation = ignorePunctuation; 50 | } 51 | 52 | // eslint-disable-next-line @typescript-eslint/no-unused-vars 53 | tokenize(text: string, language?: string): string[] { 54 | return splitWords(text, this.#ignorePunctuation).map((tok) => tok[0]); 55 | } 56 | 57 | // eslint-disable-next-line @typescript-eslint/no-unused-vars 58 | stream(language?: string): tokenizer.WordStream { 59 | return new BufferedWordStream( 60 | (text: string) => splitWords(text, this.#ignorePunctuation), 61 | 1, 62 | 1, 63 | ); 64 | } 65 | } 66 | 67 | export const hyphenateWord = (word: string): string[] => { 68 | return hyphenator.hyphenateWord(word); 69 | }; 70 | 71 | export { splitWords }; 72 | 73 | export const tokenizeParagraphs = (text: string): string[] => { 74 | return splitParagraphs(text).map((tok) => tok[0]); 75 | }; 76 | -------------------------------------------------------------------------------- /agents/src/tokenize/basic/index.ts: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: 2024 LiveKit, Inc. 2 | // 3 | // SPDX-License-Identifier: Apache-2.0 4 | 5 | export { 6 | SentenceTokenizer, 7 | WordTokenizer, 8 | tokenizeParagraphs, 9 | hyphenateWord, 10 | splitWords, 11 | } from './basic.js'; 12 | -------------------------------------------------------------------------------- /agents/src/tokenize/basic/paragraph.ts: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: 2024 LiveKit, Inc. 2 | // 3 | // SPDX-License-Identifier: Apache-2.0 4 | 5 | /** 6 | * Split the text into paragraphs. 7 | */ 8 | export const splitParagraphs = (text: string): [string, number, number][] => { 9 | const re = /\n\s*\n/g; 10 | const splits = Array.from(text.matchAll(re)); 11 | 12 | const paragraphs: [string, number, number][] = []; 13 | let start = 0; 14 | 15 | // no splits (single paragraph) 16 | if (splits.length === 0) { 17 | const stripped = text.trim(); 18 | if (!stripped) return paragraphs; 19 | 20 | const start = text.indexOf(stripped); 21 | return [[stripped, start, start + stripped.length]]; 22 | } 23 | 24 | for (const split of splits) { 25 | const end = split.index!; 26 | const paragraph = text.slice(start, end).trim(); 27 | if (paragraph) { 28 | const paragraphStart = start + text.slice(start, end).indexOf(paragraph); 29 | const paragraphEnd = paragraphStart + paragraph.length; 30 | paragraphs.push([paragraph, paragraphStart, paragraphEnd]); 31 | } 32 | start = end + split[0].length; 33 | } 34 | 35 | const lastParagraph = text.slice(start).trim(); 36 | if (lastParagraph) { 37 | const paragraphStart = start + text.slice(start).indexOf(lastParagraph); 38 | const paragraphEnd = paragraphStart + lastParagraph.length; 39 | paragraphs.push([lastParagraph, paragraphStart, paragraphEnd]); 40 | } 41 | 42 | return paragraphs; 43 | }; 44 | -------------------------------------------------------------------------------- /agents/src/tokenize/basic/sentence.ts: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: 2024 LiveKit, Inc. 2 | // 3 | // SPDX-License-Identifier: Apache-2.0 4 | 5 | /** 6 | * Split the text into sentences. 7 | */ 8 | export const splitSentences = (text: string, minLength = 20): [string, number, number][] => { 9 | const alphabets = /([A-Za-z])/g; 10 | const prefixes = /(Mr|St|Mrs|Ms|Dr)[.]/g; 11 | const suffixes = /(Inc|Ltd|Jr|Sr|Co)/g; 12 | const starters = 13 | /(Mr|Mrs|Ms|Dr|Prof|Capt|Cpt|Lt|He\s|She\s|It\s|They\s|Their\s|Our\s|We\s|But\s|However\s|That\s|This\s|Wherever)/g; 14 | const acronyms = /([A-Z][.][A-Z][.](?:[A-Z][.])?)/g; 15 | const websites = /[.](com|net|org|io|gov|edu|me)/g; 16 | const digits = /([0-9])/g; 17 | const dots = /\.{2,}/g; 18 | 19 | text = text.replaceAll('\n', ' '); 20 | text = text.replaceAll(prefixes, '$1'); 21 | text = text.replaceAll(websites, '$2'); 22 | text = text.replaceAll(new RegExp(`${digits.source}[.]${digits.source}`, 'g'), '$1$2'); 23 | text = text.replaceAll(dots, (match) => ''.repeat(match.length)); 24 | text = text.replaceAll('Ph.D.', 'PhD'); 25 | text = text.replaceAll(new RegExp(`\\s${alphabets.source}[.] `, 'g'), ' $1 '); 26 | text = text.replaceAll(new RegExp(`${acronyms.source} ${starters.source}`, 'g'), '$1 $2'); 27 | text = text.replaceAll( 28 | new RegExp(`${alphabets.source}[.]${alphabets.source}[.]${alphabets.source}[.]`, 'g'), 29 | '$1$2$3', 30 | ); 31 | text = text.replaceAll( 32 | new RegExp(`${alphabets.source}[.]${alphabets.source}[.]`, 'g'), 33 | '$1$2', 34 | ); 35 | text = text.replaceAll( 36 | new RegExp(` ${suffixes.source}[.] ${starters.source}`, 'g'), 37 | '$1 $2', 38 | ); 39 | text = text.replaceAll(new RegExp(` ${suffixes.source}[.]`, 'g'), '$1'); 40 | text = text.replaceAll(new RegExp(` ${alphabets.source}[.]`, 'g'), '$1'); 41 | text = text.replaceAll('.”', '”.'); 42 | text = text.replaceAll('."', '".'); 43 | text = text.replaceAll('!"', '"!'); 44 | text = text.replaceAll('?"', '"?'); 45 | text = text.replaceAll('.', '.'); 46 | text = text.replaceAll('?', '?'); 47 | text = text.replaceAll('!', '!'); 48 | text = text.replaceAll('', '.'); 49 | 50 | const split = text.split(''); 51 | text = text.replaceAll('', ''); 52 | 53 | const sentences: [string, number, number][] = []; 54 | let buf = ''; 55 | let start = 0; 56 | let end = 0; 57 | for (const match of split) { 58 | const sentence = match.trim(); 59 | if (!sentence) continue; 60 | 61 | buf += ' ' + sentence; 62 | end += match.length; 63 | if (buf.length > minLength) { 64 | sentences.push([buf.slice(1), start, end]); 65 | start = end; 66 | buf = ''; 67 | } 68 | } 69 | 70 | if (buf) { 71 | sentences.push([buf.slice(1), start, text.length - 1]); 72 | } 73 | 74 | return sentences; 75 | }; 76 | -------------------------------------------------------------------------------- /agents/src/tokenize/basic/word.ts: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: 2024 LiveKit, Inc. 2 | // 3 | // SPDX-License-Identifier: Apache-2.0 4 | import { PUNCTUATIONS } from '../tokenizer.js'; 5 | 6 | /** 7 | * Split the text into words. 8 | */ 9 | export const splitWords = (text: string, ignorePunctuation = true): [string, number, number][] => { 10 | const re = /\S+/g; 11 | const words: [string, number, number][] = []; 12 | 13 | let arr; 14 | while ((arr = re.exec(text)) !== null) { 15 | let word = arr[0]; 16 | const start = arr.index; 17 | const end = start + word.length; 18 | 19 | if (ignorePunctuation) { 20 | word = word.replace(new RegExp(`[${PUNCTUATIONS.join('')}]`, 'g'), ''); 21 | } 22 | 23 | words.push([word, start, end]); 24 | } 25 | 26 | return words; 27 | }; 28 | -------------------------------------------------------------------------------- /agents/src/tokenize/index.ts: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: 2024 LiveKit, Inc. 2 | // 3 | // SPDX-License-Identifier: Apache-2.0 4 | import * as basic from './basic/index.js'; 5 | 6 | export { 7 | type TokenData, 8 | SentenceTokenizer, 9 | SentenceStream, 10 | WordTokenizer, 11 | WordStream, 12 | } from './tokenizer.js'; 13 | 14 | export { BufferedSentenceStream, BufferedTokenStream, BufferedWordStream } from './token_stream.js'; 15 | 16 | export { basic }; 17 | -------------------------------------------------------------------------------- /agents/src/tokenize/token_stream.ts: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: 2024 LiveKit, Inc. 2 | // 3 | // SPDX-License-Identifier: Apache-2.0 4 | import { randomUUID } from 'node:crypto'; 5 | import { AsyncIterableQueue } from '../utils.js'; 6 | import type { TokenData } from './tokenizer.js'; 7 | import { SentenceStream, WordStream } from './tokenizer.js'; 8 | 9 | type TokenizeFunc = (x: string) => string[] | [string, number, number][]; 10 | 11 | export class BufferedTokenStream implements AsyncIterableIterator { 12 | protected queue = new AsyncIterableQueue(); 13 | protected closed = false; 14 | 15 | #func: TokenizeFunc; 16 | #minTokenLength: number; 17 | #minContextLength: number; 18 | #bufTokens: string[] = []; 19 | #inBuf = ''; 20 | #outBuf = ''; 21 | #currentSegmentId: string; 22 | 23 | constructor(func: TokenizeFunc, minTokenLength: number, minContextLength: number) { 24 | this.#func = func; 25 | this.#minTokenLength = minTokenLength; 26 | this.#minContextLength = minContextLength; 27 | 28 | this.#currentSegmentId = randomUUID(); 29 | } 30 | 31 | /** Push a string of text into the token stream */ 32 | pushText(text: string) { 33 | if (this.closed) { 34 | throw new Error('Stream is closed'); 35 | } 36 | 37 | this.#inBuf += text; 38 | if (this.#inBuf.length < this.#minContextLength) return; 39 | 40 | while (true) { 41 | const tokens = this.#func(this.#inBuf); 42 | if (tokens.length <= 1) break; 43 | 44 | if (this.#outBuf) this.#outBuf += ' '; 45 | 46 | const tok = tokens.shift()!; 47 | let tokText: string; 48 | if (Array.isArray(tok)) { 49 | tokText = tok[0]; 50 | } else { 51 | tokText = tok; 52 | } 53 | 54 | this.#outBuf += tokText; 55 | 56 | if (this.#outBuf.length >= this.#minTokenLength) { 57 | this.queue.put({ token: this.#outBuf, segmentId: this.#currentSegmentId }); 58 | this.#outBuf = ''; 59 | } 60 | 61 | if (typeof tok! !== 'string') { 62 | this.#inBuf = this.#inBuf.slice(tok![2]); 63 | } else { 64 | this.#inBuf = this.#inBuf 65 | .slice(Math.max(0, this.#inBuf.indexOf(tok)) + tok.length) 66 | .trimStart(); 67 | } 68 | } 69 | } 70 | 71 | /** Flush the stream, causing it to process all pending text */ 72 | flush() { 73 | if (this.closed) { 74 | throw new Error('Stream is closed'); 75 | } 76 | 77 | if (this.#inBuf || this.#outBuf) { 78 | const tokens = this.#func(this.#inBuf); 79 | if (tokens) { 80 | if (this.#outBuf) this.#outBuf += ' '; 81 | 82 | if (Array.isArray(tokens[0])) { 83 | this.#outBuf += tokens.map((tok) => tok[0]).join(' '); 84 | } else { 85 | this.#outBuf += tokens.join(' '); 86 | } 87 | } 88 | 89 | if (this.#outBuf) { 90 | this.queue.put({ token: this.#outBuf, segmentId: this.#currentSegmentId }); 91 | } 92 | 93 | this.#currentSegmentId = randomUUID(); 94 | } 95 | 96 | this.#inBuf = ''; 97 | this.#outBuf = ''; 98 | } 99 | 100 | /** Mark the input as ended and forbid additional pushes */ 101 | endInput() { 102 | if (this.closed) { 103 | throw new Error('Stream is closed'); 104 | } 105 | this.flush(); 106 | this.close(); 107 | } 108 | 109 | next(): Promise> { 110 | return this.queue.next(); 111 | } 112 | 113 | /** Close both the input and output of the token stream */ 114 | close() { 115 | this.queue.close(); 116 | this.closed = true; 117 | } 118 | 119 | [Symbol.asyncIterator](): BufferedTokenStream { 120 | return this; 121 | } 122 | } 123 | 124 | export class BufferedSentenceStream extends SentenceStream { 125 | #stream: BufferedTokenStream; 126 | 127 | constructor(func: TokenizeFunc, minTokenLength: number, minContextLength: number) { 128 | super(); 129 | this.#stream = new BufferedTokenStream(func, minTokenLength, minContextLength); 130 | } 131 | 132 | pushText(text: string) { 133 | this.#stream.pushText(text); 134 | } 135 | 136 | flush() { 137 | this.#stream.flush(); 138 | } 139 | 140 | close() { 141 | super.close(); 142 | this.#stream.close(); 143 | } 144 | 145 | next(): Promise> { 146 | return this.#stream.next(); 147 | } 148 | } 149 | 150 | export class BufferedWordStream extends WordStream { 151 | #stream: BufferedTokenStream; 152 | 153 | constructor(func: TokenizeFunc, minTokenLength: number, minContextLength: number) { 154 | super(); 155 | this.#stream = new BufferedTokenStream(func, minTokenLength, minContextLength); 156 | } 157 | 158 | pushText(text: string) { 159 | this.#stream.pushText(text); 160 | } 161 | 162 | flush() { 163 | this.#stream.flush(); 164 | } 165 | 166 | endInput() { 167 | this.#stream.endInput(); 168 | } 169 | 170 | close() { 171 | this.#stream.close(); 172 | } 173 | 174 | next(): Promise> { 175 | return this.#stream.next(); 176 | } 177 | } 178 | -------------------------------------------------------------------------------- /agents/src/tokenize/tokenizer.ts: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: 2024 LiveKit, Inc. 2 | // 3 | // SPDX-License-Identifier: Apache-2.0 4 | import { AsyncIterableQueue } from '../utils.js'; 5 | 6 | // prettier-ignore 7 | export const PUNCTUATIONS = [ 8 | '!', '"', '#', '$', '%', '&', "'", '(', ')', '*', '+', ',', '-', '.', '/', ':', ';', '<', '=', 9 | '>', '?', '@', '[', '\\', ']', '^', '_', '`', '{', '|', '}', '~', '±', '—', '‘', '’', '“', '”', 10 | '…', 11 | ] 12 | 13 | export interface TokenData { 14 | segmentId: string; 15 | token: string; 16 | } 17 | 18 | export abstract class SentenceTokenizer { 19 | abstract tokenize(text: string, language?: string): string[]; 20 | 21 | /** 22 | * Returns a {@link SentenceStream} that can be used to push strings and receive smaller segments. 23 | */ 24 | abstract stream(): SentenceStream; 25 | } 26 | 27 | export abstract class SentenceStream { 28 | protected static readonly FLUSH_SENTINEL = Symbol('FLUSH_SENTINEL'); 29 | protected input = new AsyncIterableQueue(); 30 | protected queue = new AsyncIterableQueue(); 31 | #closed = false; 32 | 33 | get closed(): boolean { 34 | return this.#closed; 35 | } 36 | 37 | /** Push a string of text to the tokenizer */ 38 | pushText(text: string) { 39 | if (this.input.closed) { 40 | throw new Error('Input is closed'); 41 | } 42 | if (this.#closed) { 43 | throw new Error('Stream is closed'); 44 | } 45 | this.input.put(text); 46 | } 47 | 48 | /** Flush the tokenizer, causing it to process all pending text */ 49 | flush() { 50 | if (this.input.closed) { 51 | throw new Error('Input is closed'); 52 | } 53 | if (this.#closed) { 54 | throw new Error('Stream is closed'); 55 | } 56 | this.input.put(SentenceStream.FLUSH_SENTINEL); 57 | } 58 | 59 | /** Mark the input as ended and forbid additional pushes */ 60 | endInput() { 61 | if (this.input.closed) { 62 | throw new Error('Input is closed'); 63 | } 64 | if (this.#closed) { 65 | throw new Error('Stream is closed'); 66 | } 67 | this.input.close(); 68 | } 69 | 70 | next(): Promise> { 71 | return this.queue.next(); 72 | } 73 | 74 | /** Close both the input and output of the tokenizer stream */ 75 | close() { 76 | this.input.close(); 77 | this.queue.close(); 78 | this.#closed = true; 79 | } 80 | 81 | [Symbol.asyncIterator](): SentenceStream { 82 | return this; 83 | } 84 | } 85 | 86 | export abstract class WordTokenizer { 87 | abstract tokenize(text: string, language?: string): string[]; 88 | 89 | /** 90 | * Returns a {@link WordStream} that can be used to push words and receive smaller segments. 91 | */ 92 | abstract stream(): WordStream; 93 | } 94 | 95 | export abstract class WordStream { 96 | protected static readonly FLUSH_SENTINEL = Symbol('FLUSH_SENTINEL'); 97 | protected input = new AsyncIterableQueue(); 98 | protected queue = new AsyncIterableQueue(); 99 | #closed = false; 100 | 101 | get closed(): boolean { 102 | return this.#closed; 103 | } 104 | 105 | /** Push a string of text to the tokenizer */ 106 | pushText(text: string) { 107 | if (this.input.closed) { 108 | throw new Error('Input is closed'); 109 | } 110 | if (this.#closed) { 111 | throw new Error('Stream is closed'); 112 | } 113 | this.input.put(text); 114 | } 115 | 116 | /** Flush the tokenizer, causing it to process all pending text */ 117 | flush() { 118 | if (this.input.closed) { 119 | throw new Error('Input is closed'); 120 | } 121 | if (this.#closed) { 122 | throw new Error('Stream is closed'); 123 | } 124 | this.input.put(WordStream.FLUSH_SENTINEL); 125 | } 126 | 127 | /** Mark the input as ended and forbid additional pushes */ 128 | endInput() { 129 | if (this.input.closed) { 130 | throw new Error('Input is closed'); 131 | } 132 | if (this.#closed) { 133 | throw new Error('Stream is closed'); 134 | } 135 | this.input.close(); 136 | } 137 | 138 | next(): Promise> { 139 | return this.queue.next(); 140 | } 141 | 142 | /** Close both the input and output of the tokenizer stream */ 143 | close() { 144 | this.input.close(); 145 | this.queue.close(); 146 | this.#closed = true; 147 | } 148 | 149 | [Symbol.asyncIterator](): WordStream { 150 | return this; 151 | } 152 | } 153 | -------------------------------------------------------------------------------- /agents/src/tts/index.ts: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: 2024 LiveKit, Inc. 2 | // 3 | // SPDX-License-Identifier: Apache-2.0 4 | export { 5 | type SynthesizedAudio, 6 | type TTSCapabilities, 7 | type TTSCallbacks, 8 | TTS, 9 | TTSEvent, 10 | SynthesizeStream, 11 | ChunkedStream, 12 | } from './tts.js'; 13 | export { StreamAdapter, StreamAdapterWrapper } from './stream_adapter.js'; 14 | -------------------------------------------------------------------------------- /agents/src/tts/stream_adapter.ts: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: 2024 LiveKit, Inc. 2 | // 3 | // SPDX-License-Identifier: Apache-2.0 4 | import type { SentenceStream, SentenceTokenizer } from '../tokenize/index.js'; 5 | import type { ChunkedStream } from './tts.js'; 6 | import { SynthesizeStream, TTS, TTSEvent } from './tts.js'; 7 | 8 | export class StreamAdapter extends TTS { 9 | #tts: TTS; 10 | #sentenceTokenizer: SentenceTokenizer; 11 | label: string; 12 | 13 | constructor(tts: TTS, sentenceTokenizer: SentenceTokenizer) { 14 | super(tts.sampleRate, tts.numChannels, { streaming: true }); 15 | this.#tts = tts; 16 | this.#sentenceTokenizer = sentenceTokenizer; 17 | this.label = this.#tts.label; 18 | this.label = `tts.StreamAdapter<${this.#tts.label}>`; 19 | 20 | this.#tts.on(TTSEvent.METRICS_COLLECTED, (metrics) => { 21 | this.emit(TTSEvent.METRICS_COLLECTED, metrics); 22 | }); 23 | } 24 | 25 | synthesize(text: string): ChunkedStream { 26 | return this.#tts.synthesize(text); 27 | } 28 | 29 | stream(): StreamAdapterWrapper { 30 | return new StreamAdapterWrapper(this.#tts, this.#sentenceTokenizer); 31 | } 32 | } 33 | 34 | export class StreamAdapterWrapper extends SynthesizeStream { 35 | #tts: TTS; 36 | #sentenceStream: SentenceStream; 37 | label: string; 38 | 39 | constructor(tts: TTS, sentenceTokenizer: SentenceTokenizer) { 40 | super(tts); 41 | this.#tts = tts; 42 | this.#sentenceStream = sentenceTokenizer.stream(); 43 | this.label = `tts.StreamAdapterWrapper<${this.#tts.label}>`; 44 | 45 | this.#run(); 46 | } 47 | 48 | async monitorMetrics() { 49 | return; // do nothing 50 | } 51 | 52 | async #run() { 53 | const forwardInput = async () => { 54 | for await (const input of this.input) { 55 | if (input === SynthesizeStream.FLUSH_SENTINEL) { 56 | this.#sentenceStream.flush(); 57 | } else { 58 | this.#sentenceStream.pushText(input); 59 | } 60 | } 61 | this.#sentenceStream.endInput(); 62 | this.#sentenceStream.close(); 63 | }; 64 | 65 | const synthesize = async () => { 66 | for await (const ev of this.#sentenceStream) { 67 | for await (const audio of this.#tts.synthesize(ev.token)) { 68 | this.output.put(audio); 69 | } 70 | } 71 | this.output.put(SynthesizeStream.END_OF_STREAM); 72 | }; 73 | 74 | Promise.all([forwardInput(), synthesize()]); 75 | } 76 | } 77 | -------------------------------------------------------------------------------- /agents/src/vad.ts: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: 2024 LiveKit, Inc. 2 | // 3 | // SPDX-License-Identifier: Apache-2.0 4 | import type { AudioFrame } from '@livekit/rtc-node'; 5 | import type { TypedEventEmitter as TypedEmitter } from '@livekit/typed-emitter'; 6 | import { EventEmitter } from 'node:events'; 7 | import type { VADMetrics } from './metrics/base.js'; 8 | import { AsyncIterableQueue } from './utils.js'; 9 | 10 | export enum VADEventType { 11 | START_OF_SPEECH, 12 | INFERENCE_DONE, 13 | END_OF_SPEECH, 14 | METRICS_COLLECTED, 15 | } 16 | 17 | export interface VADEvent { 18 | /** Type of the VAD event (e.g., start of speech, end of speech, inference done). */ 19 | type: VADEventType; 20 | /** 21 | * Index of the audio sample where the event occurred, relative to the inference sample rate. 22 | */ 23 | samplesIndex: number; 24 | /** Timestamp when the event was fired. */ 25 | timestamp: number; 26 | /** Duration of the speech segment. */ 27 | speechDuration: number; 28 | /** Duration of the silence segment. */ 29 | silenceDuration: number; 30 | /** 31 | * List of audio frames associated with the speech. 32 | * 33 | * @remarks 34 | * - For `start_of_speech` events, this contains the audio chunks that triggered the detection. 35 | * - For `inference_done` events, this contains the audio chunks that were processed. 36 | * - For `end_of_speech` events, this contains the complete user speech. 37 | */ 38 | frames: AudioFrame[]; 39 | /** Probability that speech is present (only for `INFERENCE_DONE` events). */ 40 | probability: number; 41 | /** Time taken to perform the inference, in seconds (only for `INFERENCE_DONE` events). */ 42 | inferenceDuration: number; 43 | /** Indicates whether speech was detected in the frames. */ 44 | speaking: boolean; 45 | /** Threshold used to detect silence. */ 46 | rawAccumulatedSilence: number; 47 | /** Threshold used to detect speech. */ 48 | rawAccumulatedSpeech: number; 49 | } 50 | 51 | export interface VADCapabilities { 52 | updateInterval: number; 53 | } 54 | 55 | export type VADCallbacks = { 56 | [VADEventType.METRICS_COLLECTED]: (metrics: VADMetrics) => void; 57 | }; 58 | 59 | export abstract class VAD extends (EventEmitter as new () => TypedEmitter) { 60 | #capabilities: VADCapabilities; 61 | abstract label: string; 62 | 63 | constructor(capabilities: VADCapabilities) { 64 | super(); 65 | this.#capabilities = capabilities; 66 | } 67 | 68 | get capabilities(): VADCapabilities { 69 | return this.#capabilities; 70 | } 71 | 72 | /** 73 | * Returns a {@link VADStream} that can be used to push audio frames and receive VAD events. 74 | */ 75 | abstract stream(): VADStream; 76 | } 77 | 78 | export abstract class VADStream implements AsyncIterableIterator { 79 | protected static readonly FLUSH_SENTINEL = Symbol('FLUSH_SENTINEL'); 80 | protected input = new AsyncIterableQueue(); 81 | protected queue = new AsyncIterableQueue(); 82 | protected output = new AsyncIterableQueue(); 83 | protected closed = false; 84 | #vad: VAD; 85 | #lastActivityTime = BigInt(0); 86 | 87 | constructor(vad: VAD) { 88 | this.#vad = vad; 89 | this.monitorMetrics(); 90 | } 91 | 92 | protected async monitorMetrics() { 93 | let inferenceDurationTotal = 0; 94 | let inferenceCount = 0; 95 | 96 | for await (const event of this.queue) { 97 | this.output.put(event); 98 | switch (event.type) { 99 | case VADEventType.START_OF_SPEECH: 100 | inferenceCount++; 101 | if (inferenceCount >= 1 / this.#vad.capabilities.updateInterval) { 102 | this.#vad.emit(VADEventType.METRICS_COLLECTED, { 103 | timestamp: Date.now(), 104 | idleTime: Math.trunc( 105 | Number((process.hrtime.bigint() - this.#lastActivityTime) / BigInt(1000000)), 106 | ), 107 | inferenceDurationTotal, 108 | inferenceCount, 109 | label: this.#vad.label, 110 | }); 111 | 112 | inferenceCount = 0; 113 | inferenceDurationTotal = 0; 114 | } 115 | break; 116 | case VADEventType.INFERENCE_DONE: 117 | case VADEventType.END_OF_SPEECH: 118 | this.#lastActivityTime = process.hrtime.bigint(); 119 | break; 120 | } 121 | } 122 | this.output.close(); 123 | } 124 | 125 | pushFrame(frame: AudioFrame) { 126 | if (this.input.closed) { 127 | throw new Error('Input is closed'); 128 | } 129 | if (this.closed) { 130 | throw new Error('Stream is closed'); 131 | } 132 | this.input.put(frame); 133 | } 134 | 135 | flush() { 136 | if (this.input.closed) { 137 | throw new Error('Input is closed'); 138 | } 139 | if (this.closed) { 140 | throw new Error('Stream is closed'); 141 | } 142 | this.input.put(VADStream.FLUSH_SENTINEL); 143 | } 144 | 145 | endInput() { 146 | if (this.input.closed) { 147 | throw new Error('Input is closed'); 148 | } 149 | if (this.closed) { 150 | throw new Error('Stream is closed'); 151 | } 152 | this.input.close(); 153 | } 154 | 155 | next(): Promise> { 156 | return this.output.next(); 157 | } 158 | 159 | close() { 160 | this.input.close(); 161 | this.queue.close(); 162 | this.output.close(); 163 | this.closed = true; 164 | } 165 | 166 | [Symbol.asyncIterator](): VADStream { 167 | return this; 168 | } 169 | } 170 | -------------------------------------------------------------------------------- /agents/src/version.ts: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: 2024 LiveKit, Inc. 2 | // 3 | // SPDX-License-Identifier: Apache-2.0 4 | 5 | export const version = '0.1.0'; 6 | -------------------------------------------------------------------------------- /agents/tsconfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "extends": "../tsconfig.json", 3 | 4 | "compilerOptions": { 5 | "rootDir": "./src", 6 | "declarationDir": "dist", 7 | "outDir": "dist" 8 | }, 9 | "typedocOptions": { 10 | "name": "agents", 11 | "entryPointStrategy": "resolve", 12 | "readme": "none", 13 | "entryPoints": ["src/index.ts"] 14 | }, 15 | "include": ["src/**/*.ts"], 16 | "exclude": ["src/**/*.test.ts"] 17 | } 18 | -------------------------------------------------------------------------------- /agents/tsup.config.ts: -------------------------------------------------------------------------------- 1 | import { defineConfig } from 'tsup'; 2 | 3 | import defaults from '../tsup.config'; 4 | 5 | export default defineConfig({ 6 | ...defaults, 7 | }); 8 | 9 | -------------------------------------------------------------------------------- /examples/CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # livekit-agents-examples 2 | 3 | ## null 4 | 5 | ### Patch Changes 6 | 7 | - A few more bugs and updates - [#88](https://github.com/livekit/agents-js/pull/88) ([@bcherry](https://github.com/bcherry)) 8 | 9 | - Updated dependencies [[`56333dd89486a1a10157f57576447d3bb7cb83c3`](https://github.com/livekit/agents-js/commit/56333dd89486a1a10157f57576447d3bb7cb83c3), [`07b4d4b123955bd850a208471d651810e075f0af`](https://github.com/livekit/agents-js/commit/07b4d4b123955bd850a208471d651810e075f0af)]: 10 | - @livekit/agents@0.3.2 11 | - @livekit/agents-plugin-openai@0.3.2 12 | 13 | ## null 14 | 15 | ### Patch Changes 16 | 17 | - Updated dependencies [[`5ec3db8f645d5e45b673318816e2746c3f6ccb1b`](https://github.com/livekit/agents-js/commit/5ec3db8f645d5e45b673318816e2746c3f6ccb1b), [`d3db7cf19c696f611b5717ff8d510b2f910da712`](https://github.com/livekit/agents-js/commit/d3db7cf19c696f611b5717ff8d510b2f910da712), [`c0cce8a0f71cd8def7052917d8a6479e06178447`](https://github.com/livekit/agents-js/commit/c0cce8a0f71cd8def7052917d8a6479e06178447), [`e748aa4f7be76361c5fcafb03bdb760314b29a9f`](https://github.com/livekit/agents-js/commit/e748aa4f7be76361c5fcafb03bdb760314b29a9f), [`b35952ca243fecb087c898b670f5db0eaa1949bf`](https://github.com/livekit/agents-js/commit/b35952ca243fecb087c898b670f5db0eaa1949bf), [`4edacb8ba7dbbdd060dfedffe3116f1af4739b52`](https://github.com/livekit/agents-js/commit/4edacb8ba7dbbdd060dfedffe3116f1af4739b52)]: 18 | - @livekit/agents-plugin-openai@0.3.1 19 | - @livekit/agents@0.3.1 20 | 21 | ## null 22 | 23 | ### Minor Changes 24 | 25 | - Maximize self-import compatibility - [#69](https://github.com/livekit/agents-js/pull/69) ([@bcherry](https://github.com/bcherry)) 26 | 27 | - omniassistant overhaul - [#65](https://github.com/livekit/agents-js/pull/65) ([@nbsp](https://github.com/nbsp)) 28 | 29 | ### Patch Changes 30 | 31 | - update rtc-node to 0.9.0 - [#73](https://github.com/livekit/agents-js/pull/73) ([@nbsp](https://github.com/nbsp)) 32 | 33 | - Rename to MultimodalAgent, move to main package - [#74](https://github.com/livekit/agents-js/pull/74) ([@bcherry](https://github.com/bcherry)) 34 | 35 | - Updated dependencies [[`4e6babac612c20b1a8d9121d39fe57902d22228f`](https://github.com/livekit/agents-js/commit/4e6babac612c20b1a8d9121d39fe57902d22228f), [`9cb2313f06f9d013ca3b08980a7ade1b6b43a04a`](https://github.com/livekit/agents-js/commit/9cb2313f06f9d013ca3b08980a7ade1b6b43a04a), [`08b9a329c05a6a1369de7682f555445f669fea79`](https://github.com/livekit/agents-js/commit/08b9a329c05a6a1369de7682f555445f669fea79), [`d703265a57c4491d7799936117a8a2b8ad527653`](https://github.com/livekit/agents-js/commit/d703265a57c4491d7799936117a8a2b8ad527653), [`5cbd46c715ded05107cd78492d85551c2ce924ae`](https://github.com/livekit/agents-js/commit/5cbd46c715ded05107cd78492d85551c2ce924ae), [`eee688907aafdef8ca2856929b8eb10ba72e8dee`](https://github.com/livekit/agents-js/commit/eee688907aafdef8ca2856929b8eb10ba72e8dee), [`9cb2313f06f9d013ca3b08980a7ade1b6b43a04a`](https://github.com/livekit/agents-js/commit/9cb2313f06f9d013ca3b08980a7ade1b6b43a04a), [`856ebe2294962f64b81c8f635bd762b513b2faac`](https://github.com/livekit/agents-js/commit/856ebe2294962f64b81c8f635bd762b513b2faac), [`c509b62972892ea3945403ef0cd50c2ece3fd4f2`](https://github.com/livekit/agents-js/commit/c509b62972892ea3945403ef0cd50c2ece3fd4f2), [`45cb43f41a5d53a048eef392bb81313ad5e95121`](https://github.com/livekit/agents-js/commit/45cb43f41a5d53a048eef392bb81313ad5e95121), [`eb7e73173c46dbbcee4e728299b8fe05fb8fdc01`](https://github.com/livekit/agents-js/commit/eb7e73173c46dbbcee4e728299b8fe05fb8fdc01)]: 36 | - @livekit/agents-plugin-openai@0.3.0 37 | - @livekit/agents@0.3.0 38 | 39 | ## null 40 | 41 | ### Minor Changes 42 | 43 | - bump underlying dependencies - [`be7160d39ea57239a51fbf6ad2cbea1342cc1889`](https://github.com/livekit/agents-js/commit/be7160d39ea57239a51fbf6ad2cbea1342cc1889) ([@bcherry](https://github.com/bcherry)) 44 | fix load calculation 45 | report worker status 46 | 47 | ### Patch Changes 48 | 49 | - Fix assistant startup process - [#36](https://github.com/livekit/agents-js/pull/36) ([@bcherry](https://github.com/bcherry)) 50 | 51 | - Send agent transcript progressively and handle interruptions - [#40](https://github.com/livekit/agents-js/pull/40) ([@bcherry](https://github.com/bcherry)) 52 | 53 | - Updated dependencies [[`1c8caf04c148dfa57af4e844b6538d97d6be652a`](https://github.com/livekit/agents-js/commit/1c8caf04c148dfa57af4e844b6538d97d6be652a), [`5923b1a796642bec4892f41545ea1be1c6b9fb36`](https://github.com/livekit/agents-js/commit/5923b1a796642bec4892f41545ea1be1c6b9fb36), [`ccff5ce34d071a0fb449da5ce77938e346679b1b`](https://github.com/livekit/agents-js/commit/ccff5ce34d071a0fb449da5ce77938e346679b1b), [`be7160d39ea57239a51fbf6ad2cbea1342cc1889`](https://github.com/livekit/agents-js/commit/be7160d39ea57239a51fbf6ad2cbea1342cc1889), [`5c320c88a04ffd8b7753696d4172a610fbe1bc2b`](https://github.com/livekit/agents-js/commit/5c320c88a04ffd8b7753696d4172a610fbe1bc2b), [`24a4f58a23d4a3aad8620fcccabdab5d2e1152c7`](https://github.com/livekit/agents-js/commit/24a4f58a23d4a3aad8620fcccabdab5d2e1152c7), [`1063d2a25c4a01022948699e673d267d04c1ec05`](https://github.com/livekit/agents-js/commit/1063d2a25c4a01022948699e673d267d04c1ec05), [`36c553a60fef7621b9c4232b5c79555b2f83aad8`](https://github.com/livekit/agents-js/commit/36c553a60fef7621b9c4232b5c79555b2f83aad8), [`7e6bb7fbf661e6c6aa012f6a362b84d542d2c84e`](https://github.com/livekit/agents-js/commit/7e6bb7fbf661e6c6aa012f6a362b84d542d2c84e)]: 54 | - @livekit/agents@0.2.0 55 | - @livekit/agents-plugin-openai@0.2.0 56 | -------------------------------------------------------------------------------- /examples/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "private": true, 3 | "name": "livekit-agents-examples", 4 | "type": "module", 5 | "scripts": { 6 | "build": "tsc", 7 | "clean": "rm -rf dist", 8 | "clean:build": "pnpm clean && pnpm build", 9 | "lint": "eslint -f unix \"src/**/*.ts\"", 10 | "minimal": "pnpm exec tsx src/multimodal_agent.ts" 11 | }, 12 | "devDependencies": { 13 | "@types/node": "^22.5.5", 14 | "tsx": "^4.19.2", 15 | "typescript": "^5.0.0" 16 | }, 17 | "dependencies": { 18 | "@livekit/agents": "workspace:*", 19 | "@livekit/agents-plugin-deepgram": "workspace:*", 20 | "@livekit/agents-plugin-elevenlabs": "workspace:*", 21 | "@livekit/agents-plugin-openai": "workspace:*", 22 | "@livekit/agents-plugin-silero": "workspace:*", 23 | "@livekit/agents-plugin-livekit": "workspace:*", 24 | "livekit-server-sdk": "^2.9.2", 25 | "@livekit/rtc-node": "^0.13.11", 26 | "zod": "^3.23.8" 27 | }, 28 | "version": null 29 | } 30 | -------------------------------------------------------------------------------- /examples/src/multimodal_agent.ts: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: 2024 LiveKit, Inc. 2 | // 3 | // SPDX-License-Identifier: Apache-2.0 4 | import { type JobContext, WorkerOptions, cli, defineAgent, llm, multimodal } from '@livekit/agents'; 5 | import * as openai from '@livekit/agents-plugin-openai'; 6 | import { fileURLToPath } from 'node:url'; 7 | import { z } from 'zod'; 8 | 9 | export default defineAgent({ 10 | entry: async (ctx: JobContext) => { 11 | await ctx.connect(); 12 | 13 | console.log('waiting for participant'); 14 | const participant = await ctx.waitForParticipant(); 15 | console.log(`starting assistant example agent for ${participant.identity}`); 16 | 17 | let model: openai.realtime.RealtimeModel; 18 | 19 | if (process.env.AZURE_OPENAI_ENDPOINT) { 20 | model = openai.realtime.RealtimeModel.withAzure({ 21 | baseURL: process.env.AZURE_OPENAI_ENDPOINT, 22 | azureDeployment: process.env.AZURE_OPENAI_DEPLOYMENT || '', 23 | apiKey: process.env.AZURE_OPENAI_API_KEY, 24 | entraToken: process.env.AZURE_OPENAI_ENTRA_TOKEN, 25 | instructions: 'You are a helpful assistant.', 26 | }); 27 | } else { 28 | model = new openai.realtime.RealtimeModel({ 29 | instructions: 'You are a helpful assistant.', 30 | }); 31 | } 32 | 33 | const fncCtx: llm.FunctionContext = { 34 | weather: { 35 | description: 'Get the weather in a location', 36 | parameters: z.object({ 37 | location: z.string().describe('The location to get the weather for'), 38 | }), 39 | execute: async ({ location }) => { 40 | console.debug(`executing weather function for ${location}`); 41 | const response = await fetch(`https://wttr.in/${location}?format=%C+%t`); 42 | if (!response.ok) { 43 | throw new Error(`Weather API returned status: ${response.status}`); 44 | } 45 | const weather = await response.text(); 46 | return `The weather in ${location} right now is ${weather}.`; 47 | }, 48 | }, 49 | }; 50 | 51 | const agent = new multimodal.MultimodalAgent({ 52 | model, 53 | fncCtx, 54 | }); 55 | 56 | const session = await agent 57 | .start(ctx.room, participant) 58 | .then((session) => session as openai.realtime.RealtimeSession); 59 | 60 | session.conversation.item.create( 61 | llm.ChatMessage.create({ 62 | role: llm.ChatRole.USER, 63 | text: 'Say "How can I help you today?"', 64 | }), 65 | ); 66 | session.response.create(); 67 | }, 68 | }); 69 | 70 | cli.runApp(new WorkerOptions({ agent: fileURLToPath(import.meta.url) })); 71 | -------------------------------------------------------------------------------- /examples/src/outbound.ts: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: 2024 LiveKit, Inc. 2 | // 3 | // SPDX-License-Identifier: Apache-2.0 4 | import { 5 | type JobContext, 6 | type JobProcess, 7 | WorkerOptions, 8 | cli, 9 | defineAgent, 10 | llm, 11 | pipeline, 12 | } from '@livekit/agents'; 13 | import * as deepgram from '@livekit/agents-plugin-deepgram'; 14 | import * as elevenlabs from '@livekit/agents-plugin-elevenlabs'; 15 | import * as openai from '@livekit/agents-plugin-openai'; 16 | import * as silero from '@livekit/agents-plugin-silero'; 17 | import { SipClient } from 'livekit-server-sdk'; 18 | import { fileURLToPath } from 'node:url'; 19 | 20 | export default defineAgent({ 21 | prewarm: async (proc: JobProcess) => { 22 | proc.userData.vad = await silero.VAD.load(); 23 | }, 24 | entry: async (ctx: JobContext) => { 25 | const vad = ctx.proc.userData.vad! as silero.VAD; 26 | 27 | await ctx.connect(); 28 | 29 | const sipClient = new SipClient( 30 | process.env.LIVEKIT_URL ?? '', 31 | process.env.LIVEKIT_API_KEY, 32 | process.env.LIVEKIT_API_SECRET, 33 | ); 34 | 35 | const trunkId = '...'; // create this with the CLI: https://docs.livekit.io/agents/quickstarts/outbound-calls/ 36 | const phoneNumber = '...'; // read this from the metadata or hardcode it - e.g.: 'tel:+43.....' 37 | const roomName = ctx.room.name ?? ''; 38 | const participantIdentity = 'Example participant identity'; 39 | 40 | const sipParticipantOptions = { 41 | participantIdentity, 42 | participantName: 'Example participant name', 43 | }; 44 | 45 | console.log('came here'); 46 | await sipClient.createSipParticipant(trunkId, phoneNumber, roomName, sipParticipantOptions); 47 | 48 | const participant = await ctx.waitForParticipant(participantIdentity); 49 | 50 | const initialContext = new llm.ChatContext().append({ 51 | role: llm.ChatRole.SYSTEM, 52 | text: 'You are a helpful assistant.', 53 | }); 54 | 55 | const agent = new pipeline.VoicePipelineAgent( 56 | vad, 57 | new deepgram.STT(), 58 | new openai.LLM(), 59 | new elevenlabs.TTS(), 60 | { 61 | chatCtx: initialContext, 62 | }, 63 | ); 64 | 65 | agent.start(ctx.room, participant); 66 | 67 | await agent.say('Hello - how can I help?', true); 68 | }, 69 | }); 70 | 71 | cli.runApp(new WorkerOptions({ agent: fileURLToPath(import.meta.url) })); 72 | -------------------------------------------------------------------------------- /examples/src/pipeline_voice_agent.ts: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: 2024 LiveKit, Inc. 2 | // 3 | // SPDX-License-Identifier: Apache-2.0 4 | import type { JobProcess } from '@livekit/agents'; 5 | import { 6 | AutoSubscribe, 7 | type JobContext, 8 | WorkerOptions, 9 | cli, 10 | defineAgent, 11 | llm, 12 | pipeline, 13 | } from '@livekit/agents'; 14 | import * as deepgram from '@livekit/agents-plugin-deepgram'; 15 | import * as livekit from '@livekit/agents-plugin-livekit'; 16 | import * as openai from '@livekit/agents-plugin-openai'; 17 | import * as silero from '@livekit/agents-plugin-silero'; 18 | import { fileURLToPath } from 'node:url'; 19 | import { z } from 'zod'; 20 | 21 | export default defineAgent({ 22 | prewarm: async (proc: JobProcess) => { 23 | proc.userData.vad = await silero.VAD.load(); 24 | }, 25 | entry: async (ctx: JobContext) => { 26 | const vad = ctx.proc.userData.vad! as silero.VAD; 27 | const initialContext = new llm.ChatContext().append({ 28 | role: llm.ChatRole.SYSTEM, 29 | text: 30 | 'You are a voice assistant created by LiveKit. Your interface with users will be voice. ' + 31 | 'You should use short and concise responses, and avoiding usage of unpronounceable ' + 32 | 'punctuation.', 33 | }); 34 | 35 | await ctx.connect(undefined, AutoSubscribe.AUDIO_ONLY); 36 | console.log('waiting for participant'); 37 | const participant = await ctx.waitForParticipant(); 38 | console.log(`starting assistant example agent for ${participant.identity}`); 39 | 40 | const fncCtx: llm.FunctionContext = { 41 | weather: { 42 | description: 'Get the weather in a location', 43 | parameters: z.object({ 44 | location: z.string().describe('The location to get the weather for'), 45 | }), 46 | execute: async ({ location }) => { 47 | console.debug(`executing weather function for ${location}`); 48 | const response = await fetch(`https://wttr.in/${location}?format=%C+%t`); 49 | if (!response.ok) { 50 | throw new Error(`Weather API returned status: ${response.status}`); 51 | } 52 | const weather = await response.text(); 53 | return `The weather in ${location} right now is ${weather}.`; 54 | }, 55 | }, 56 | }; 57 | 58 | const agent = new pipeline.VoicePipelineAgent( 59 | vad, 60 | new deepgram.STT(), 61 | new openai.LLM(), 62 | new openai.TTS(), 63 | { chatCtx: initialContext, fncCtx, turnDetector: new livekit.turnDetector.EOUModel() }, 64 | ); 65 | agent.start(ctx.room, participant); 66 | 67 | await agent.say('Hey, how can I help you today', true); 68 | }, 69 | }); 70 | 71 | cli.runApp(new WorkerOptions({ agent: fileURLToPath(import.meta.url) })); 72 | -------------------------------------------------------------------------------- /examples/src/stt.ts: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: 2024 LiveKit, Inc. 2 | // 3 | // SPDX-License-Identifier: Apache-2.0 4 | import { type JobContext, WorkerOptions, cli, defineAgent, stt } from '@livekit/agents'; 5 | import { STT } from '@livekit/agents-plugin-deepgram'; 6 | import type { Track } from '@livekit/rtc-node'; 7 | import { AudioStream, RoomEvent, TrackKind } from '@livekit/rtc-node'; 8 | import { fileURLToPath } from 'node:url'; 9 | 10 | export default defineAgent({ 11 | entry: async (ctx: JobContext) => { 12 | await ctx.connect(); 13 | console.log('starting STT example agent'); 14 | 15 | const transcribeTrack = async (track: Track) => { 16 | const audioStream = new AudioStream(track); 17 | const sttStream = new STT({ sampleRate: 48000 }).stream(); 18 | 19 | const sendTask = async () => { 20 | for await (const event of audioStream) { 21 | sttStream.pushFrame(event); 22 | } 23 | }; 24 | 25 | const recvTask = async () => { 26 | for await (const event of sttStream) { 27 | if (event.type === stt.SpeechEventType.FINAL_TRANSCRIPT) { 28 | console.log(event.alternatives![0].text); 29 | } 30 | } 31 | }; 32 | 33 | Promise.all([sendTask(), recvTask()]); 34 | }; 35 | 36 | ctx.room.on(RoomEvent.TrackSubscribed, async (track: Track) => { 37 | if (track.kind === TrackKind.KIND_AUDIO) { 38 | transcribeTrack(track); 39 | } 40 | }); 41 | }, 42 | }); 43 | 44 | cli.runApp(new WorkerOptions({ agent: fileURLToPath(import.meta.url) })); 45 | -------------------------------------------------------------------------------- /examples/src/tts.ts: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: 2024 LiveKit, Inc. 2 | // 3 | // SPDX-License-Identifier: Apache-2.0 4 | import { type JobContext, WorkerOptions, cli, defineAgent } from '@livekit/agents'; 5 | import { SynthesizeStream, TTS } from '@livekit/agents-plugin-elevenlabs'; 6 | import { 7 | AudioSource, 8 | LocalAudioTrack, 9 | RoomEvent, 10 | TrackPublishOptions, 11 | TrackSource, 12 | } from '@livekit/rtc-node'; 13 | import { fileURLToPath } from 'node:url'; 14 | 15 | export default defineAgent({ 16 | entry: async (ctx: JobContext) => { 17 | await ctx.connect(); 18 | 19 | console.log('starting TTS example agent'); 20 | 21 | const source = new AudioSource(22050, 1); 22 | const track = LocalAudioTrack.createAudioTrack('agent-mic', source); 23 | const options = new TrackPublishOptions(); 24 | options.source = TrackSource.SOURCE_MICROPHONE; 25 | 26 | await ctx.room.localParticipant?.publishTrack(track, options); 27 | const stream = new TTS().stream(); 28 | 29 | ctx.room.on(RoomEvent.LocalTrackSubscribed, async () => { 30 | console.log('speaking "Hello!"'); 31 | stream.pushText('Hello!'); 32 | stream.flush(); 33 | 34 | await new Promise((resolve) => setTimeout(resolve, 2000)); 35 | 36 | console.log('speaking "Goodbye!"'); 37 | stream.pushText('Goodbye!'); 38 | stream.flush(); 39 | stream.endInput(); 40 | }); 41 | 42 | for await (const audio of stream) { 43 | if (audio !== SynthesizeStream.END_OF_STREAM) { 44 | await source.captureFrame(audio.frame); 45 | } 46 | } 47 | }, 48 | }); 49 | 50 | cli.runApp(new WorkerOptions({ agent: fileURLToPath(import.meta.url) })); 51 | -------------------------------------------------------------------------------- /examples/tsconfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "extends": "../tsconfig.json", 3 | "include": ["./src"], 4 | "compilerOptions": { 5 | // match output dir to input dir. e.g. dist/index instead of dist/src/index 6 | "rootDir": "./src", 7 | "declarationDir": "./dist", 8 | "outDir": "./dist" 9 | } 10 | } 11 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "@livekit/agents-monorepo", 3 | "private": true, 4 | "type": "module", 5 | "scripts": { 6 | "build": "turbo run build", 7 | "clean": "turbo run clean", 8 | "clean:build": "turbo run clean:build", 9 | "build:agents": "turbo run build --filter=@livekit/agents...", 10 | "build:plugins": "turbo run build --filter=@livekit/agents-plugin-*...", 11 | "ci:publish": "pnpm build && changeset publish", 12 | "api:check": "turbo run api:check", 13 | "api:update": "turbo run api:update", 14 | "format:check": "prettier --check \"**/src/**/*.{ts,tsx,md,json}\"", 15 | "format:write": "prettier --write \"**/src/**/*.{ts,tsx,md,json}\"", 16 | "lint": "turbo lint", 17 | "lint:fix": "turbo lint -- --fix", 18 | "test": "vitest run", 19 | "test:watch": "vitest", 20 | "doc": "typedoc && mkdir -p docs/assets/github && cp .github/*.png docs/assets/github/ && find docs -name '*.html' -type f -exec sed -i.bak 's|=\"/.github/|=\"assets/github/|g' {} + && find docs -name '*.bak' -delete", 21 | "examples:minimal": "pnpm exec tsx examples/src/multimodal_agent.ts" 22 | }, 23 | "devDependencies": { 24 | "@changesets/cli": "^2.27.1", 25 | "@livekit/changesets-changelog-github": "^0.0.4", 26 | "@rushstack/heft": "^0.66.0", 27 | "@trivago/prettier-plugin-sort-imports": "^4.3.0", 28 | "@typescript-eslint/eslint-plugin": "^6.21.0", 29 | "@typescript-eslint/parser": "^6.21.0", 30 | "eslint": "^8.56.0", 31 | "eslint-config-next": "^14.1.0", 32 | "eslint-config-prettier": "^8.10.0", 33 | "eslint-config-standard": "^17.1.0", 34 | "eslint-config-turbo": "^1.12.2", 35 | "eslint-plugin-import": "^2.29.1", 36 | "eslint-plugin-n": "^16.6.2", 37 | "eslint-plugin-prettier": "^5.1.3", 38 | "eslint-plugin-promise": "^6.1.1", 39 | "eslint-plugin-standard": "^5.0.0", 40 | "eslint-plugin-tsdoc": "^0.2.17", 41 | "prettier": "^3.2.5", 42 | "turbo": "^1.13.3", 43 | "typedoc": "^0.25.13", 44 | "typescript": "^5.4.5", 45 | "vitest": "^1.6.0" 46 | }, 47 | "packageManager": "pnpm@9.7.0" 48 | } 49 | -------------------------------------------------------------------------------- /plugins/cartesia/CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # @livekit/agents-plugin-cartesia 2 | 3 | ## 0.1.3 4 | 5 | ### Patch Changes 6 | 7 | - add updateOptions - [#375](https://github.com/livekit/agents-js/pull/375) ([@nbsp](https://github.com/nbsp)) 8 | 9 | - Updated dependencies [[`5f805961b91f121c4a7d83a04b648b1122b15dba`](https://github.com/livekit/agents-js/commit/5f805961b91f121c4a7d83a04b648b1122b15dba), [`a0c49c9b7b771bb67748fb7c75c5dd2fe9f90e8a`](https://github.com/livekit/agents-js/commit/a0c49c9b7b771bb67748fb7c75c5dd2fe9f90e8a), [`c06c84fa79ecdfae9170a5255a10315cf64216aa`](https://github.com/livekit/agents-js/commit/c06c84fa79ecdfae9170a5255a10315cf64216aa)]: 10 | - @livekit/agents@0.7.4 11 | 12 | ## 0.1.2 13 | 14 | ### Patch Changes 15 | 16 | - bump to use 0.7.0 - [#316](https://github.com/livekit/agents-js/pull/316) ([@nbsp](https://github.com/nbsp)) 17 | 18 | - Updated dependencies [[`724c02bb7a91c27d6c8daf961842fb9f0934770c`](https://github.com/livekit/agents-js/commit/724c02bb7a91c27d6c8daf961842fb9f0934770c), [`7398cffad62b17c79b5fe2f0ca4e99e548560367`](https://github.com/livekit/agents-js/commit/7398cffad62b17c79b5fe2f0ca4e99e548560367), [`6ed0c90d1bab013854c416768b10ef96f3227d68`](https://github.com/livekit/agents-js/commit/6ed0c90d1bab013854c416768b10ef96f3227d68), [`33c241960f0e8f325f534d2406f42148a4486b5a`](https://github.com/livekit/agents-js/commit/33c241960f0e8f325f534d2406f42148a4486b5a)]: 19 | - @livekit/agents@0.7.1 20 | 21 | ## 0.1.1 22 | 23 | ### Patch Changes 24 | 25 | - update rtc-node to 0.13.2 to fix issue with e2ee - [#258](https://github.com/livekit/agents-js/pull/258) ([@nbsp](https://github.com/nbsp)) 26 | 27 | - Updated dependencies [[`dedb1cf139c8af4ce8709c86440c818157f5b475`](https://github.com/livekit/agents-js/commit/dedb1cf139c8af4ce8709c86440c818157f5b475), [`f3258b948539406213c15f8e817449b2588cde84`](https://github.com/livekit/agents-js/commit/f3258b948539406213c15f8e817449b2588cde84)]: 28 | - @livekit/agents@0.6.2 29 | 30 | ## 0.1.0 31 | 32 | ### Minor Changes 33 | 34 | - plugins(cartesia): init with TTS - [#217](https://github.com/livekit/agents-js/pull/217) ([@nbsp](https://github.com/nbsp)) 35 | 36 | ### Patch Changes 37 | 38 | - Updated dependencies [[`4a66a82fc2fd0a25e30bdaa0bd095804c65ee101`](https://github.com/livekit/agents-js/commit/4a66a82fc2fd0a25e30bdaa0bd095804c65ee101), [`01aaa85445bbb8f30afe9c16360afb5a45c38e9e`](https://github.com/livekit/agents-js/commit/01aaa85445bbb8f30afe9c16360afb5a45c38e9e), [`4b7504654c73d9111d39e90d325d5f660b2c8ad9`](https://github.com/livekit/agents-js/commit/4b7504654c73d9111d39e90d325d5f660b2c8ad9)]: 39 | - @livekit/agents@0.6.1 40 | -------------------------------------------------------------------------------- /plugins/cartesia/README.md: -------------------------------------------------------------------------------- 1 | 6 | # Cartesia plugin for LiveKit Agents 7 | 8 | The Agents Framework is designed for building realtime, programmable 9 | participants that run on servers. Use it to create conversational, multi-modal 10 | voice agents that can see, hear, and understand. 11 | 12 | This package contains the Cartesia plugin, which allows for voice synthesis. 13 | Refer to the [documentation](https://docs.livekit.io/agents/overview/) for 14 | information on how to use it, or browse the [API 15 | reference](https://docs.livekit.io/agents-js/modules/plugins_agents_plugin_cartesia.html). 16 | See the [repository](https://github.com/livekit/agents-js) for more information 17 | about the framework as a whole. 18 | -------------------------------------------------------------------------------- /plugins/cartesia/api-extractor.json: -------------------------------------------------------------------------------- 1 | /** 2 | * Config file for API Extractor. For more info, please visit: https://api-extractor.com 3 | */ 4 | { 5 | "$schema": "https://developer.microsoft.com/json-schemas/api-extractor/v7/api-extractor.schema.json", 6 | 7 | /** 8 | * Optionally specifies another JSON config file that this file extends from. This provides a way for 9 | * standard settings to be shared across multiple projects. 10 | * 11 | * If the path starts with "./" or "../", the path is resolved relative to the folder of the file that contains 12 | * the "extends" field. Otherwise, the first path segment is interpreted as an NPM package name, and will be 13 | * resolved using NodeJS require(). 14 | * 15 | * SUPPORTED TOKENS: none 16 | * DEFAULT VALUE: "" 17 | */ 18 | "extends": "../../api-extractor-shared.json", 19 | "mainEntryPointFilePath": "./dist/index.d.ts" 20 | } 21 | -------------------------------------------------------------------------------- /plugins/cartesia/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "@livekit/agents-plugin-cartesia", 3 | "version": "0.1.3", 4 | "description": "Cartesia plugin for LiveKit Node Agents", 5 | "main": "dist/index.js", 6 | "require": "dist/index.cjs", 7 | "types": "dist/index.d.ts", 8 | "exports": { 9 | ".": { 10 | "types": "./dist/index.d.ts", 11 | "import": "./dist/index.js", 12 | "require": "./dist/index.cjs" 13 | } 14 | }, 15 | "author": "LiveKit", 16 | "type": "module", 17 | "repository": "git@github.com:livekit/agents-js.git", 18 | "license": "Apache-2.0", 19 | "files": [ 20 | "dist", 21 | "src", 22 | "README.md" 23 | ], 24 | "scripts": { 25 | "build": "tsup --onSuccess \"tsc --declaration --emitDeclarationOnly\"", 26 | "clean": "rm -rf dist", 27 | "clean:build": "pnpm clean && pnpm build", 28 | "lint": "eslint -f unix \"src/**/*.{ts,js}\"", 29 | "api:check": "api-extractor run --typescript-compiler-folder ../../node_modules/typescript", 30 | "api:update": "api-extractor run --local --typescript-compiler-folder ../../node_modules/typescript --verbose" 31 | }, 32 | "devDependencies": { 33 | "@livekit/agents": "workspace:^x", 34 | "@livekit/agents-plugin-openai": "workspace:^x", 35 | "@livekit/agents-plugins-test": "workspace:^x", 36 | "@livekit/rtc-node": "^0.13.11", 37 | "@microsoft/api-extractor": "^7.35.0", 38 | "@types/ws": "^8.5.10", 39 | "tsup": "^8.3.5", 40 | "typescript": "^5.0.0" 41 | }, 42 | "dependencies": { 43 | "ws": "^8.16.0" 44 | }, 45 | "peerDependencies": { 46 | "@livekit/agents": "workspace:^x", 47 | "@livekit/rtc-node": "^0.13.11" 48 | } 49 | } 50 | -------------------------------------------------------------------------------- /plugins/cartesia/src/index.ts: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: 2024 LiveKit, Inc. 2 | // 3 | // SPDX-License-Identifier: Apache-2.0 4 | 5 | export * from './tts.js'; 6 | -------------------------------------------------------------------------------- /plugins/cartesia/src/models.ts: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: 2024 LiveKit, Inc. 2 | // 3 | // SPDX-License-Identifier: Apache-2.0 4 | 5 | export type TTSModels = 'sonic-english' | 'sonic-multilingual'; 6 | 7 | export type TTSLanguages = 'en' | 'es' | 'fr' | 'de' | 'pt' | 'zh' | 'ja'; 8 | 9 | export const TTSDefaultVoiceId = 'c2ac25f9-ecc4-4f56-9095-651354df60c0'; 10 | 11 | export type TTSVoiceSpeed = 'fastest' | 'fast' | 'normal' | 'slow' | 'slowest'; 12 | 13 | export type TTSVoiceEmotion = 14 | | 'anger:lowest' 15 | | 'anger:low' 16 | | 'anger' 17 | | 'anger:high' 18 | | 'anger:highest' 19 | | 'positivity:lowest' 20 | | 'positivity:low' 21 | | 'positivity' 22 | | 'positivity:high' 23 | | 'positivity:highest' 24 | | 'surprise:lowest' 25 | | 'surprise:low' 26 | | 'surprise' 27 | | 'surprise:high' 28 | | 'surprise:highest' 29 | | 'sadness:lowest' 30 | | 'sadness:low' 31 | | 'sadness' 32 | | 'sadness:high' 33 | | 'sadness:highest' 34 | | 'curiosity:lowest' 35 | | 'curiosity:low' 36 | | 'curiosity' 37 | | 'curiosity:high' 38 | | 'curiosity:highest'; 39 | 40 | export type TTSEncoding = 41 | // XXX(nbsp): not yet supported 42 | // | 'pcm_f32le' 43 | // | 'pcm_mulaw' 44 | // | 'pcm_alaw' 45 | 'pcm_s16le'; 46 | -------------------------------------------------------------------------------- /plugins/cartesia/src/tts.test.ts: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: 2024 LiveKit, Inc. 2 | // 3 | // SPDX-License-Identifier: Apache-2.0 4 | import { STT } from '@livekit/agents-plugin-openai'; 5 | import { tts } from '@livekit/agents-plugins-test'; 6 | import { describe } from 'vitest'; 7 | import { TTS } from './tts.js'; 8 | 9 | describe('Cartesia', async () => { 10 | await tts(new TTS(), new STT()); 11 | }); 12 | -------------------------------------------------------------------------------- /plugins/cartesia/tsconfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "extends": "../../tsconfig.json", 3 | "include": ["./src"], 4 | "compilerOptions": { 5 | // match output dir to input dir. e.g. dist/index instead of dist/src/index 6 | "rootDir": "./src", 7 | "declarationDir": "./dist", 8 | "outDir": "./dist" 9 | }, 10 | "typedocOptions": { 11 | "name": "plugins/agents-plugin-cartesia", 12 | "entryPointStrategy": "resolve", 13 | "readme": "none", 14 | "entryPoints": ["src/index.ts"] 15 | } 16 | } 17 | -------------------------------------------------------------------------------- /plugins/cartesia/tsup.config.ts: -------------------------------------------------------------------------------- 1 | import { defineConfig } from 'tsup'; 2 | 3 | import defaults from '../../tsup.config'; 4 | 5 | export default defineConfig({ 6 | ...defaults, 7 | }); 8 | -------------------------------------------------------------------------------- /plugins/deepgram/README.md: -------------------------------------------------------------------------------- 1 | 6 | # Deepgram plugin for LiveKit Agents 7 | 8 | The Agents Framework is designed for building realtime, programmable 9 | participants that run on servers. Use it to create conversational, multi-modal 10 | voice agents that can see, hear, and understand. 11 | 12 | This package contains the Deepgram plugin, which allows for speech recognition. 13 | Refer to the [documentation](https://docs.livekit.io/agents/overview/) for 14 | information on how to use it, or browse the [API 15 | reference](https://docs.livekit.io/agents-js/modules/plugins_agents_plugin_deepgram.html). 16 | See the [repository](https://github.com/livekit/agents-js) for more information 17 | about the framework as a whole. 18 | -------------------------------------------------------------------------------- /plugins/deepgram/api-extractor.json: -------------------------------------------------------------------------------- 1 | /** 2 | * Config file for API Extractor. For more info, please visit: https://api-extractor.com 3 | */ 4 | { 5 | "$schema": "https://developer.microsoft.com/json-schemas/api-extractor/v7/api-extractor.schema.json", 6 | 7 | /** 8 | * Optionally specifies another JSON config file that this file extends from. This provides a way for 9 | * standard settings to be shared across multiple projects. 10 | * 11 | * If the path starts with "./" or "../", the path is resolved relative to the folder of the file that contains 12 | * the "extends" field. Otherwise, the first path segment is interpreted as an NPM package name, and will be 13 | * resolved using NodeJS require(). 14 | * 15 | * SUPPORTED TOKENS: none 16 | * DEFAULT VALUE: "" 17 | */ 18 | "extends": "../../api-extractor-shared.json", 19 | "mainEntryPointFilePath": "./dist/index.d.ts" 20 | } 21 | -------------------------------------------------------------------------------- /plugins/deepgram/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "@livekit/agents-plugin-deepgram", 3 | "version": "0.5.6", 4 | "description": "Deepgram plugin for LiveKit Agents for Node.js", 5 | "main": "dist/index.js", 6 | "require": "dist/index.cjs", 7 | "types": "dist/index.d.ts", 8 | "exports": { 9 | ".": { 10 | "types": "./dist/index.d.ts", 11 | "import": "./dist/index.js", 12 | "require": "./dist/index.cjs" 13 | } 14 | }, 15 | "author": "LiveKit", 16 | "type": "module", 17 | "repository": "git@github.com:livekit/agents-js.git", 18 | "license": "Apache-2.0", 19 | "files": [ 20 | "dist", 21 | "src", 22 | "README.md" 23 | ], 24 | "scripts": { 25 | "build": "tsup --onSuccess \"tsc --declaration --emitDeclarationOnly\"", 26 | "clean": "rm -rf dist", 27 | "clean:build": "pnpm clean && pnpm build", 28 | "lint": "eslint -f unix \"src/**/*.{ts,js}\"", 29 | "api:check": "api-extractor run --typescript-compiler-folder ../../node_modules/typescript", 30 | "api:update": "api-extractor run --local --typescript-compiler-folder ../../node_modules/typescript --verbose" 31 | }, 32 | "devDependencies": { 33 | "@livekit/agents": "workspace:^x", 34 | "@livekit/agents-plugin-silero": "workspace:^x", 35 | "@livekit/agents-plugins-test": "workspace:^x", 36 | "@livekit/rtc-node": "^0.13.11", 37 | "@microsoft/api-extractor": "^7.35.0", 38 | "@types/ws": "^8.5.10", 39 | "tsup": "^8.3.5", 40 | "typescript": "^5.0.0" 41 | }, 42 | "dependencies": { 43 | "ws": "^8.16.0" 44 | }, 45 | "peerDependencies": { 46 | "@livekit/agents": "workspace:^x", 47 | "@livekit/rtc-node": "^0.13.11" 48 | } 49 | } 50 | -------------------------------------------------------------------------------- /plugins/deepgram/src/index.ts: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: 2024 LiveKit, Inc. 2 | // 3 | // SPDX-License-Identifier: Apache-2.0 4 | 5 | export * from './stt.js'; 6 | -------------------------------------------------------------------------------- /plugins/deepgram/src/models.ts: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: 2024 LiveKit, Inc. 2 | // 3 | // SPDX-License-Identifier: Apache-2.0 4 | 5 | export type STTModels = 6 | | 'nova-general' 7 | | 'nova-phonecall' 8 | | 'nova-meeting' 9 | | 'nova-2-general' 10 | | 'nova-2-meeting' 11 | | 'nova-2-phonecall' 12 | | 'nova-2-finance' 13 | | 'nova-2-conversationalai' 14 | | 'nova-2-voicemail' 15 | | 'nova-2-video' 16 | | 'nova-2-medical' 17 | | 'nova-2-drivethru' 18 | | 'nova-2-automotive' 19 | | 'nova-3-general' 20 | | 'enhanced-general' 21 | | 'enhanced-meeting' 22 | | 'enhanced-phonecall' 23 | | 'enhanced-finance' 24 | | 'base' 25 | | 'meeting' 26 | | 'phonecall' 27 | | 'finance' 28 | | 'conversationalai' 29 | | 'voicemail' 30 | | 'video' 31 | | 'whisper-tiny' 32 | | 'whisper-base' 33 | | 'whisper-small' 34 | | 'whisper-medium' 35 | | 'whisper-large'; 36 | 37 | export type STTLanguages = 38 | | 'da' 39 | | 'de' 40 | | 'en' 41 | | 'en-AU' 42 | | 'en-GB' 43 | | 'en-IN' 44 | | 'en-NZ' 45 | | 'en-US' 46 | | 'es' 47 | | 'es-419' 48 | | 'es-LATAM' 49 | | 'fr' 50 | | 'fr-CA' 51 | | 'hi' 52 | | 'hi-Latn' 53 | | 'id' 54 | | 'it' 55 | | 'ja' 56 | | 'ko' 57 | | 'nl' 58 | | 'no' 59 | | 'pl' 60 | | 'pt' 61 | | 'pt-BR' 62 | | 'ru' 63 | | 'sv' 64 | | 'ta' 65 | | 'taq' 66 | | 'th' 67 | | 'tr' 68 | | 'uk' 69 | | 'zh' 70 | | 'zh-CN' 71 | | 'zh-TW'; 72 | -------------------------------------------------------------------------------- /plugins/deepgram/src/stt.test.ts: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: 2024 LiveKit, Inc. 2 | // 3 | // SPDX-License-Identifier: Apache-2.0 4 | import { initializeLogger } from '@livekit/agents'; 5 | import { VAD } from '@livekit/agents-plugin-silero'; 6 | import { stt } from '@livekit/agents-plugins-test'; 7 | import { describe } from 'vitest'; 8 | import { STT } from './stt.js'; 9 | 10 | describe('Deepgram', async () => { 11 | initializeLogger({ pretty: false }); 12 | await stt(new STT(), await VAD.load(), { nonStreaming: false }); 13 | }); 14 | -------------------------------------------------------------------------------- /plugins/deepgram/tsconfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "extends": "../../tsconfig.json", 3 | "include": ["./src"], 4 | "compilerOptions": { 5 | // match output dir to input dir. e.g. dist/index instead of dist/src/index 6 | "rootDir": "./src", 7 | "declarationDir": "./dist", 8 | "outDir": "./dist" 9 | }, 10 | "typedocOptions": { 11 | "name": "plugins/agents-plugin-deepgram", 12 | "entryPointStrategy": "resolve", 13 | "readme": "none", 14 | "entryPoints": ["src/index.ts"] 15 | } 16 | } 17 | -------------------------------------------------------------------------------- /plugins/deepgram/tsup.config.ts: -------------------------------------------------------------------------------- 1 | import { defineConfig } from 'tsup'; 2 | 3 | import defaults from '../../tsup.config'; 4 | 5 | export default defineConfig({ 6 | ...defaults, 7 | }); 8 | -------------------------------------------------------------------------------- /plugins/elevenlabs/README.md: -------------------------------------------------------------------------------- 1 | 6 | # ElevenLabs plugin for LiveKit Agents 7 | 8 | The Agents Framework is designed for building realtime, programmable 9 | participants that run on servers. Use it to create conversational, multi-modal 10 | voice agents that can see, hear, and understand. 11 | 12 | This package contains the ElevenLabs plugin, which allows for voice synthesis. 13 | Refer to the [documentation](https://docs.livekit.io/agents/overview/) for 14 | information on how to use it, or browse the [API 15 | reference](https://docs.livekit.io/agents-js/modules/plugins_agents_plugin_elevenlabs.html). 16 | See the [repository](https://github.com/livekit/agents-js) for more information 17 | about the framework as a whole. 18 | -------------------------------------------------------------------------------- /plugins/elevenlabs/api-extractor.json: -------------------------------------------------------------------------------- 1 | /** 2 | * Config file for API Extractor. For more info, please visit: https://api-extractor.com 3 | */ 4 | { 5 | "$schema": "https://developer.microsoft.com/json-schemas/api-extractor/v7/api-extractor.schema.json", 6 | 7 | /** 8 | * Optionally specifies another JSON config file that this file extends from. This provides a way for 9 | * standard settings to be shared across multiple projects. 10 | * 11 | * If the path starts with "./" or "../", the path is resolved relative to the folder of the file that contains 12 | * the "extends" field. Otherwise, the first path segment is interpreted as an NPM package name, and will be 13 | * resolved using NodeJS require(). 14 | * 15 | * SUPPORTED TOKENS: none 16 | * DEFAULT VALUE: "" 17 | */ 18 | "extends": "../../api-extractor-shared.json", 19 | "mainEntryPointFilePath": "./dist/index.d.ts" 20 | } 21 | -------------------------------------------------------------------------------- /plugins/elevenlabs/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "@livekit/agents-plugin-elevenlabs", 3 | "version": "0.6.2", 4 | "description": "ElevenLabs plugin for LiveKit Node Agents", 5 | "main": "dist/index.js", 6 | "require": "dist/index.cjs", 7 | "types": "dist/index.d.ts", 8 | "exports": { 9 | ".": { 10 | "types": "./dist/index.d.ts", 11 | "import": "./dist/index.js", 12 | "require": "./dist/index.cjs" 13 | } 14 | }, 15 | "author": "LiveKit", 16 | "type": "module", 17 | "repository": "git@github.com:livekit/agents-js.git", 18 | "license": "Apache-2.0", 19 | "files": [ 20 | "dist", 21 | "src", 22 | "README.md" 23 | ], 24 | "scripts": { 25 | "build": "tsup --onSuccess \"tsc --declaration --emitDeclarationOnly\"", 26 | "clean": "rm -rf dist", 27 | "clean:build": "pnpm clean && pnpm build", 28 | "lint": "eslint -f unix \"src/**/*.{ts,js}\"", 29 | "api:check": "api-extractor run --typescript-compiler-folder ../../node_modules/typescript", 30 | "api:update": "api-extractor run --local --typescript-compiler-folder ../../node_modules/typescript --verbose" 31 | }, 32 | "devDependencies": { 33 | "@livekit/agents": "workspace:^x", 34 | "@livekit/agents-plugin-openai": "workspace:^x", 35 | "@livekit/agents-plugins-test": "workspace:^x", 36 | "@livekit/rtc-node": "^0.13.11", 37 | "@microsoft/api-extractor": "^7.35.0", 38 | "@types/ws": "^8.5.10", 39 | "tsup": "^8.3.5", 40 | "typescript": "^5.0.0" 41 | }, 42 | "dependencies": { 43 | "ws": "^8.16.0" 44 | }, 45 | "peerDependencies": { 46 | "@livekit/agents": "workspace:^x", 47 | "@livekit/rtc-node": "^0.13.11" 48 | } 49 | } 50 | -------------------------------------------------------------------------------- /plugins/elevenlabs/src/index.ts: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: 2024 LiveKit, Inc. 2 | // 3 | // SPDX-License-Identifier: Apache-2.0 4 | 5 | export * from './tts.js'; 6 | -------------------------------------------------------------------------------- /plugins/elevenlabs/src/models.ts: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: 2024 LiveKit, Inc. 2 | // 3 | // SPDX-License-Identifier: Apache-2.0 4 | 5 | export type TTSModels = 6 | | 'eleven_monolingual_v1' 7 | | 'eleven_multilingual_v1' 8 | | 'eleven_multilingual_v2' 9 | | 'eleven_flash_v2' 10 | | 'eleven_flash_v2_5' 11 | | 'eleven_turbo_v2' 12 | | 'eleven_turbo_v2_5'; 13 | 14 | export type TTSEncoding = 15 | // XXX(nbsp): MP3 is not yet supported 16 | // | 'mp3_22050_32' 17 | // | 'mp3_44100_32' 18 | // | 'mp3_44100_64' 19 | // | 'mp3_44100_96' 20 | // | 'mp3_44100_128' 21 | // | 'mp3_44100_192' 22 | 'pcm_16000' | 'pcm_22050' | 'pcm_44100'; 23 | -------------------------------------------------------------------------------- /plugins/elevenlabs/src/tts.test.ts: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: 2024 LiveKit, Inc. 2 | // 3 | // SPDX-License-Identifier: Apache-2.0 4 | import { STT } from '@livekit/agents-plugin-openai'; 5 | import { tts } from '@livekit/agents-plugins-test'; 6 | import { describe } from 'vitest'; 7 | import { TTS } from './tts.js'; 8 | 9 | describe('ElevenLabs', async () => { 10 | await tts(new TTS(), new STT(), { nonStreaming: false }); 11 | }); 12 | -------------------------------------------------------------------------------- /plugins/elevenlabs/tsconfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "extends": "../../tsconfig.json", 3 | "include": ["./src"], 4 | "compilerOptions": { 5 | // match output dir to input dir. e.g. dist/index instead of dist/src/index 6 | "rootDir": "./src", 7 | "declarationDir": "./dist", 8 | "outDir": "./dist" 9 | }, 10 | "typedocOptions": { 11 | "name": "plugins/agents-plugin-elevenlabs", 12 | "entryPointStrategy": "resolve", 13 | "readme": "none", 14 | "entryPoints": ["src/index.ts"] 15 | } 16 | } 17 | -------------------------------------------------------------------------------- /plugins/elevenlabs/tsup.config.ts: -------------------------------------------------------------------------------- 1 | import { defineConfig } from 'tsup'; 2 | 3 | import defaults from '../../tsup.config'; 4 | 5 | export default defineConfig({ 6 | ...defaults, 7 | }); 8 | -------------------------------------------------------------------------------- /plugins/livekit/.gitattributes: -------------------------------------------------------------------------------- 1 | src/turn_detector.onnx filter=lfs diff=lfs merge=lfs -text 2 | -------------------------------------------------------------------------------- /plugins/livekit/CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # @livekit/agents-plugin-livekit 2 | 3 | ## 0.1.1 4 | 5 | ### Patch Changes 6 | 7 | - cleanup resources with onnx runtime - [#377](https://github.com/livekit/agents-js/pull/377) ([@Shubhrakanti](https://github.com/Shubhrakanti)) 8 | 9 | - Updated dependencies [[`ae508d76d6b521218f8320f8dba1b1d0fdad79a2`](https://github.com/livekit/agents-js/commit/ae508d76d6b521218f8320f8dba1b1d0fdad79a2), [`e1dd8f86660b4f96585833dc896b719dd3ad54b2`](https://github.com/livekit/agents-js/commit/e1dd8f86660b4f96585833dc896b719dd3ad54b2)]: 10 | - @livekit/agents@0.7.5 11 | 12 | ## 0.1.0 13 | 14 | ### Minor Changes 15 | 16 | - feat: add turn detector - [#225](https://github.com/livekit/agents-js/pull/225) ([@nbsp](https://github.com/nbsp)) 17 | 18 | ### Patch Changes 19 | 20 | - Updated dependencies [[`4681792123ebf7eb6f75d89efe32ec11cb1ee179`](https://github.com/livekit/agents-js/commit/4681792123ebf7eb6f75d89efe32ec11cb1ee179), [`3e1b2d0fd07a5fab53bf20c151faad3fd9bfa77d`](https://github.com/livekit/agents-js/commit/3e1b2d0fd07a5fab53bf20c151faad3fd9bfa77d), [`b0fa6007372dc798e222487e87f7b80f1a64ac4e`](https://github.com/livekit/agents-js/commit/b0fa6007372dc798e222487e87f7b80f1a64ac4e), [`c2794335d5395744e9ba0c6691a4ff6bb7c28e40`](https://github.com/livekit/agents-js/commit/c2794335d5395744e9ba0c6691a4ff6bb7c28e40), [`a3d025047e62d89e935b878502735a0768076d7c`](https://github.com/livekit/agents-js/commit/a3d025047e62d89e935b878502735a0768076d7c), [`629c737098b6b6636356527bbe8a4e81d8b6f047`](https://github.com/livekit/agents-js/commit/629c737098b6b6636356527bbe8a4e81d8b6f047)]: 21 | - @livekit/agents@0.7.0 22 | -------------------------------------------------------------------------------- /plugins/livekit/README.md: -------------------------------------------------------------------------------- 1 | 6 | # Additional utilities for LiveKit Agents 7 | 8 | The Agents Framework is designed for building realtime, programmable 9 | participants that run on servers. Use it to create conversational, multi-modal 10 | voice agents that can see, hear, and understand. 11 | 12 | This package contains additional utilities provided by LiveKit. 13 | Refer to the [documentation](https://docs.livekit.io/agents/overview/) for 14 | information on how to use it, or browse the [API 15 | reference](https://docs.livekit.io/agents-js/modules/plugins_agents_plugin_livekit.html). 16 | See the [repository](https://github.com/livekit/agents-js) for more information 17 | about the framework as a whole. 18 | -------------------------------------------------------------------------------- /plugins/livekit/api-extractor.json: -------------------------------------------------------------------------------- 1 | /** 2 | * Config file for API Extractor. For more info, please visit: https://api-extractor.com 3 | */ 4 | { 5 | "$schema": "https://developer.microsoft.com/json-schemas/api-extractor/v7/api-extractor.schema.json", 6 | 7 | /** 8 | * Optionally specifies another JSON config file that this file extends from. This provides a way for 9 | * standard settings to be shared across multiple projects. 10 | * 11 | * If the path starts with "./" or "../", the path is resolved relative to the folder of the file that contains 12 | * the "extends" field. Otherwise, the first path segment is interpreted as an NPM package name, and will be 13 | * resolved using NodeJS require(). 14 | * 15 | * SUPPORTED TOKENS: none 16 | * DEFAULT VALUE: "" 17 | */ 18 | "extends": "../../api-extractor-shared.json", 19 | "mainEntryPointFilePath": "./dist/index.d.ts" 20 | } 21 | -------------------------------------------------------------------------------- /plugins/livekit/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "@livekit/agents-plugin-livekit", 3 | "version": "0.1.1", 4 | "description": "Additional utilities for LiveKit Node Agents", 5 | "main": "dist/index.js", 6 | "require": "dist/index.cjs", 7 | "types": "dist/index.d.ts", 8 | "exports": { 9 | ".": { 10 | "types": "./dist/index.d.ts", 11 | "import": "./dist/index.js", 12 | "require": "./dist/index.cjs" 13 | } 14 | }, 15 | "author": "LiveKit", 16 | "type": "module", 17 | "repository": "git@github.com:livekit/agents-js.git", 18 | "license": "Apache-2.0", 19 | "files": [ 20 | "dist", 21 | "src", 22 | "README.md" 23 | ], 24 | "scripts": { 25 | "build": "tsup --onSuccess \"tsc --declaration --emitDeclarationOnly\" && cp src/turn_detector.onnx dist/", 26 | "clean": "rm -rf dist", 27 | "clean:build": "pnpm clean && pnpm build", 28 | "lint": "eslint -f unix \"src/**/*.{ts,js}\"", 29 | "api:check": "api-extractor run --typescript-compiler-folder ../../node_modules/typescript", 30 | "api:update": "api-extractor run --local --typescript-compiler-folder ../../node_modules/typescript --verbose" 31 | }, 32 | "devDependencies": { 33 | "@livekit/agents": "workspace:^x", 34 | "@microsoft/api-extractor": "^7.35.0", 35 | "onnxruntime-common": "^1.19.2", 36 | "tsup": "^8.3.5", 37 | "typescript": "^5.0.0" 38 | }, 39 | "peerDependencies": { 40 | "@livekit/agents": "workspace:^x" 41 | }, 42 | "dependencies": { 43 | "@huggingface/transformers": "^3.2.1", 44 | "onnxruntime-node": "^1.19.2" 45 | } 46 | } 47 | -------------------------------------------------------------------------------- /plugins/livekit/src/index.ts: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: 2024 LiveKit, Inc. 2 | // 3 | // SPDX-License-Identifier: Apache-2.0 4 | import { InferenceRunner } from '@livekit/agents'; 5 | import * as turnDetector from './turn_detector.js'; 6 | 7 | InferenceRunner.registerRunner( 8 | turnDetector.EOURunner.INFERENCE_METHOD, 9 | import.meta.resolve('./turn_detector.js'), 10 | ); 11 | export { turnDetector }; 12 | -------------------------------------------------------------------------------- /plugins/livekit/src/onnxruntime.d.ts: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: 2024 LiveKit, Inc. 2 | // 3 | // SPDX-License-Identifier: Apache-2.0 4 | 5 | // https://github.com/microsoft/onnxruntime/issues/17979 6 | declare module 'onnxruntime-node' { 7 | export * from 'onnxruntime-common'; 8 | } 9 | -------------------------------------------------------------------------------- /plugins/livekit/src/turn_detector.onnx: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:22e48174ef25bf87243a6400ca4e0bdfc1425ef82337456d15d98df6e1977000 3 | size 65712276 4 | -------------------------------------------------------------------------------- /plugins/livekit/src/turn_detector.ts: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: 2024 LiveKit, Inc. 2 | // 3 | // SPDX-License-Identifier: Apache-2.0 4 | import type { PreTrainedTokenizer } from '@huggingface/transformers'; 5 | import { AutoTokenizer } from '@huggingface/transformers'; 6 | import type { ipc } from '@livekit/agents'; 7 | import { CurrentJobContext, InferenceRunner, llm, log } from '@livekit/agents'; 8 | import { fileURLToPath } from 'node:url'; 9 | import { InferenceSession, Tensor } from 'onnxruntime-node'; 10 | 11 | const MAX_HISTORY = 4; 12 | 13 | type RawChatContext = { role: string; content: string }[]; 14 | 15 | export class EOURunner extends InferenceRunner { 16 | static INFERENCE_METHOD = 'lk_end_of_utterance'; 17 | #tokenizerPromise: Promise; 18 | #session: Promise; 19 | #tokenizer?: PreTrainedTokenizer; 20 | #logger = log(); 21 | 22 | constructor() { 23 | super(); 24 | this.#tokenizerPromise = AutoTokenizer.from_pretrained('livekit/turn-detector', { 25 | revision: 'v1.2.0', 26 | // local_files_only: true, // TODO(nbsp): can't find it 27 | }); 28 | this.#session = InferenceSession.create( 29 | fileURLToPath(new URL('turn_detector.onnx', import.meta.url).href), 30 | { 31 | executionProviders: [{ name: 'cpu' }], 32 | }, 33 | ); 34 | } 35 | 36 | async initialize() { 37 | this.#tokenizer = await this.#tokenizerPromise; 38 | } 39 | 40 | async run(data: RawChatContext): Promise { 41 | const text = this.#formatChatContext(data); 42 | const startTime = Date.now(); 43 | const inputs = this.#tokenizer!.encode(text, { add_special_tokens: false }); 44 | const outputs = await this.#session.then((session) => 45 | session.run({ input_ids: new Tensor('int64', inputs, [1, inputs.length]) }, ['prob']), 46 | ); 47 | const endTime = Date.now(); 48 | const logits = outputs.prob!; 49 | const eouProbability = logits.data[0] as number; 50 | this.#logger 51 | .child({ eouProbability, input: text, duration: endTime - startTime }) 52 | .debug('eou prediction'); 53 | return eouProbability; 54 | } 55 | 56 | #formatChatContext(ctx: RawChatContext): string { 57 | const newCtx: RawChatContext = []; 58 | for (const msg of ctx) { 59 | if (!msg.content) continue; 60 | newCtx.push(msg); 61 | } 62 | 63 | const convoText = this.#tokenizer!.apply_chat_template(newCtx, { 64 | add_generation_prompt: false, 65 | tokenize: false, 66 | }) as string; 67 | // remove EOU token from current utterance 68 | return convoText.slice(0, convoText.lastIndexOf('<|im_end|>')); 69 | } 70 | 71 | async close() { 72 | await this.#session.then((session) => session.release()); 73 | } 74 | } 75 | 76 | export class EOUModel { 77 | readonly unlikelyThreshold: number; 78 | #executor: ipc.InferenceExecutor; 79 | 80 | constructor(unlikelyThreshold = 0.15) { 81 | this.unlikelyThreshold = unlikelyThreshold; 82 | this.#executor = CurrentJobContext.getCurrent().inferenceExecutor; 83 | } 84 | 85 | supportsLanguage(language?: string) { 86 | if (!language) return false; 87 | const parts = language.toLowerCase().split('-'); 88 | return parts[0] === 'en' || parts[0] === 'english'; 89 | } 90 | 91 | async predictEndOfTurn(chatCtx: llm.ChatContext): Promise { 92 | let messages: RawChatContext = []; 93 | 94 | for (const msg of chatCtx.messages) { 95 | if (msg.role !== llm.ChatRole.ASSISTANT && msg.role !== llm.ChatRole.USER) { 96 | continue; 97 | } 98 | 99 | if (typeof msg.content === 'string') { 100 | messages.push({ 101 | role: msg.role === llm.ChatRole.ASSISTANT ? 'assistant' : 'user', 102 | content: msg.content, 103 | }); 104 | } else if (Array.isArray(msg.content)) { 105 | for (const content of msg.content) { 106 | if (typeof content === 'string') { 107 | messages.push({ 108 | role: msg.role === llm.ChatRole.ASSISTANT ? 'assistant' : 'user', 109 | content: content, 110 | }); 111 | } 112 | } 113 | } 114 | } 115 | messages = messages.slice(-MAX_HISTORY); 116 | const result = await this.#executor.doInference(EOURunner.INFERENCE_METHOD, messages); 117 | return result as any; 118 | } 119 | } 120 | 121 | export default EOURunner; 122 | -------------------------------------------------------------------------------- /plugins/livekit/tsconfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "extends": "../../tsconfig.json", 3 | "include": ["./src"], 4 | "compilerOptions": { 5 | // match output dir to input dir. e.g. dist/index instead of dist/src/index 6 | "rootDir": "./src", 7 | "declarationDir": "./dist", 8 | "outDir": "./dist" 9 | }, 10 | "typedocOptions": { 11 | "name": "plugins/agents-plugin-silero", 12 | "entryPointStrategy": "resolve", 13 | "entryPoints": ["src/index.ts"] 14 | } 15 | } 16 | -------------------------------------------------------------------------------- /plugins/livekit/tsup.config.ts: -------------------------------------------------------------------------------- 1 | import { defineConfig } from 'tsup'; 2 | 3 | import defaults from '../../tsup.config'; 4 | 5 | export default defineConfig({ 6 | ...defaults, 7 | }); 8 | -------------------------------------------------------------------------------- /plugins/neuphonic/CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # @livekit/agents-plugin-neuphonic 2 | 3 | ## 0.1.0 4 | 5 | ### Minor Changes 6 | 7 | - initial version - [#343](https://github.com/livekit/agents-js/pull/343) ([@nbsp](https://github.com/nbsp)) 8 | 9 | ### Patch Changes 10 | 11 | - Updated dependencies [[`d44445934cc291df987013068f5c43491634dfa1`](https://github.com/livekit/agents-js/commit/d44445934cc291df987013068f5c43491634dfa1), [`a7350c92f8968e0fd833e7679a607eaf9a1d7e7f`](https://github.com/livekit/agents-js/commit/a7350c92f8968e0fd833e7679a607eaf9a1d7e7f), [`2dcfeab76ace2e1851993771d769ebcb7c188144`](https://github.com/livekit/agents-js/commit/2dcfeab76ace2e1851993771d769ebcb7c188144), [`2bb936c55233ac0747582a5045caa595c6338651`](https://github.com/livekit/agents-js/commit/2bb936c55233ac0747582a5045caa595c6338651)]: 12 | - @livekit/agents@0.7.2 13 | -------------------------------------------------------------------------------- /plugins/neuphonic/README.md: -------------------------------------------------------------------------------- 1 | 6 | # Neuphonic plugin for LiveKit Agents 7 | 8 | The Agents Framework is designed for building realtime, programmable 9 | participants that run on servers. Use it to create conversational, multi-modal 10 | voice agents that can see, hear, and understand. 11 | 12 | This package contains the Neuphonic plugin, which allows for voice synthesis. 13 | Refer to the [documentation](https://docs.livekit.io/agents/overview/) for 14 | information on how to use it. 15 | See the [repository](https://github.com/livekit/agents-js) for more information 16 | about the framework as a whole. 17 | -------------------------------------------------------------------------------- /plugins/neuphonic/api-extractor.json: -------------------------------------------------------------------------------- 1 | /** 2 | * Config file for API Extractor. For more info, please visit: https://api-extractor.com 3 | */ 4 | { 5 | "$schema": "https://developer.microsoft.com/json-schemas/api-extractor/v7/api-extractor.schema.json", 6 | 7 | /** 8 | * Optionally specifies another JSON config file that this file extends from. This provides a way for 9 | * standard settings to be shared across multiple projects. 10 | * 11 | * If the path starts with "./" or "../", the path is resolved relative to the folder of the file that contains 12 | * the "extends" field. Otherwise, the first path segment is interpreted as an NPM package name, and will be 13 | * resolved using NodeJS require(). 14 | * 15 | * SUPPORTED TOKENS: none 16 | * DEFAULT VALUE: "" 17 | */ 18 | "extends": "../../api-extractor-shared.json", 19 | "mainEntryPointFilePath": "./dist/index.d.ts" 20 | } 21 | -------------------------------------------------------------------------------- /plugins/neuphonic/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "@livekit/agents-plugin-neuphonic", 3 | "version": "0.1.0", 4 | "description": "Neuphonic plugin for LiveKit Node Agents", 5 | "main": "dist/index.js", 6 | "require": "dist/index.cjs", 7 | "types": "dist/index.d.ts", 8 | "exports": { 9 | ".": { 10 | "types": "./dist/index.d.ts", 11 | "import": "./dist/index.js", 12 | "require": "./dist/index.cjs" 13 | } 14 | }, 15 | "author": "LiveKit", 16 | "type": "module", 17 | "repository": "git@github.com:livekit/agents-js.git", 18 | "license": "Apache-2.0", 19 | "files": [ 20 | "dist", 21 | "src", 22 | "README.md" 23 | ], 24 | "scripts": { 25 | "build": "tsup --onSuccess \"tsc --declaration --emitDeclarationOnly\"", 26 | "clean": "rm -rf dist", 27 | "clean:build": "pnpm clean && pnpm build", 28 | "lint": "eslint -f unix \"src/**/*.{ts,js}\"", 29 | "api:check": "api-extractor run --typescript-compiler-folder ../../node_modules/typescript", 30 | "api:update": "api-extractor run --local --typescript-compiler-folder ../../node_modules/typescript --verbose" 31 | }, 32 | "devDependencies": { 33 | "@livekit/agents": "workspace:^x", 34 | "@livekit/agents-plugin-openai": "workspace:^x", 35 | "@livekit/agents-plugins-test": "workspace:^x", 36 | "@livekit/rtc-node": "^0.13.11", 37 | "@microsoft/api-extractor": "^7.35.0", 38 | "@types/ws": "^8.5.10", 39 | "tsup": "^8.3.5", 40 | "typescript": "^5.0.0" 41 | }, 42 | "dependencies": { 43 | "ws": "^8.16.0" 44 | }, 45 | "peerDependencies": { 46 | "@livekit/agents": "workspace:^x", 47 | "@livekit/rtc-node": "^0.13.11" 48 | } 49 | } 50 | -------------------------------------------------------------------------------- /plugins/neuphonic/src/index.ts: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: 2024 LiveKit, Inc. 2 | // 3 | // SPDX-License-Identifier: Apache-2.0 4 | 5 | export * from './tts.js'; 6 | -------------------------------------------------------------------------------- /plugins/neuphonic/src/models.ts: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: 2024 LiveKit, Inc. 2 | // 3 | // SPDX-License-Identifier: Apache-2.0 4 | 5 | export type TTSEncodings = 'pcm_linear' | 'pcm_mulaw'; 6 | 7 | export type TTSModels = 'neu-fast' | 'neu-hq'; 8 | 9 | export type TTSLangCodes = 'en' | 'nl' | 'es' | 'de' | 'hi' | 'en-hi' | 'ar'; 10 | -------------------------------------------------------------------------------- /plugins/neuphonic/src/tts.test.ts: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: 2024 LiveKit, Inc. 2 | // 3 | // SPDX-License-Identifier: Apache-2.0 4 | import { STT } from '@livekit/agents-plugin-openai'; 5 | import { tts } from '@livekit/agents-plugins-test'; 6 | import { describe } from 'vitest'; 7 | import { TTS } from './tts.js'; 8 | 9 | describe.skip('Neuphonic', async () => { 10 | await tts(new TTS(), new STT()); 11 | }); 12 | -------------------------------------------------------------------------------- /plugins/neuphonic/tsconfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "extends": "../../tsconfig.json", 3 | "include": ["./src"], 4 | "compilerOptions": { 5 | // match output dir to input dir. e.g. dist/index instead of dist/src/index 6 | "rootDir": "./src", 7 | "declarationDir": "./dist", 8 | "outDir": "./dist" 9 | }, 10 | "typedocOptions": { 11 | "name": "plugins/agents-plugin-neuphonic", 12 | "entryPointStrategy": "resolve", 13 | "readme": "none", 14 | "entryPoints": ["src/index.ts"] 15 | } 16 | } 17 | -------------------------------------------------------------------------------- /plugins/neuphonic/tsup.config.ts: -------------------------------------------------------------------------------- 1 | import { defineConfig } from 'tsup'; 2 | 3 | import defaults from '../../tsup.config'; 4 | 5 | export default defineConfig({ 6 | ...defaults, 7 | }); 8 | -------------------------------------------------------------------------------- /plugins/openai/README.md: -------------------------------------------------------------------------------- 1 | 6 | # OpenAI plugin for LiveKit Agents 7 | 8 | The Agents Framework is designed for building realtime, programmable 9 | participants that run on servers. Use it to create conversational, multi-modal 10 | voice agents that can see, hear, and understand. 11 | 12 | This package contains the OpenAI plugin, which allows for TTS, STT, LLM, as well 13 | as using the Realtime API. Refer to the 14 | [documentation](https://docs.livekit.io/agents/overview/) for information on how 15 | to use it, or browse the [API 16 | reference](https://docs.livekit.io/agents-js/modules/plugins_agents_plugin_openai.html). 17 | See the [repository](https://github.com/livekit/agents-js) for more information 18 | about the framework as a whole. 19 | -------------------------------------------------------------------------------- /plugins/openai/api-extractor.json: -------------------------------------------------------------------------------- 1 | /** 2 | * Config file for API Extractor. For more info, please visit: https://api-extractor.com 3 | */ 4 | { 5 | "$schema": "https://developer.microsoft.com/json-schemas/api-extractor/v7/api-extractor.schema.json", 6 | 7 | /** 8 | * Optionally specifies another JSON config file that this file extends from. This provides a way for 9 | * standard settings to be shared across multiple projects. 10 | * 11 | * If the path starts with "./" or "../", the path is resolved relative to the folder of the file that contains 12 | * the "extends" field. Otherwise, the first path segment is interpreted as an NPM package name, and will be 13 | * resolved using NodeJS require(). 14 | * 15 | * SUPPORTED TOKENS: none 16 | * DEFAULT VALUE: "" 17 | */ 18 | "extends": "../../api-extractor-shared.json", 19 | "mainEntryPointFilePath": "./dist/index.d.ts" 20 | } 21 | -------------------------------------------------------------------------------- /plugins/openai/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "@livekit/agents-plugin-openai", 3 | "version": "0.9.1", 4 | "description": "OpenAI plugin for LiveKit Node Agents", 5 | "main": "dist/index.js", 6 | "require": "dist/index.cjs", 7 | "types": "dist/index.d.ts", 8 | "exports": { 9 | ".": { 10 | "types": "./dist/index.d.ts", 11 | "import": "./dist/index.js", 12 | "require": "./dist/index.cjs" 13 | } 14 | }, 15 | "author": "LiveKit", 16 | "type": "module", 17 | "repository": "git@github.com:livekit/agents-js.git", 18 | "license": "Apache-2.0", 19 | "files": [ 20 | "dist", 21 | "src", 22 | "README.md" 23 | ], 24 | "scripts": { 25 | "build": "tsup --onSuccess \"tsc --declaration --emitDeclarationOnly\"", 26 | "clean": "rm -rf dist", 27 | "clean:build": "pnpm clean && pnpm build", 28 | "lint": "eslint -f unix \"src/**/*.{ts,js}\"", 29 | "api:check": "api-extractor run --typescript-compiler-folder ../../node_modules/typescript", 30 | "api:update": "api-extractor run --local --typescript-compiler-folder ../../node_modules/typescript --verbose" 31 | }, 32 | "devDependencies": { 33 | "@livekit/agents": "workspace:^x", 34 | "@livekit/agents-plugin-silero": "workspace:^x", 35 | "@livekit/agents-plugins-test": "workspace:^x", 36 | "@livekit/rtc-node": "^0.13.11", 37 | "@microsoft/api-extractor": "^7.35.0", 38 | "@types/ws": "^8.5.10", 39 | "tsup": "^8.3.5", 40 | "typescript": "^5.0.0" 41 | }, 42 | "dependencies": { 43 | "openai": "^4.91.1", 44 | "sharp": "^0.33.5", 45 | "ws": "^8.16.0" 46 | }, 47 | "peerDependencies": { 48 | "@livekit/agents": "workspace:^x", 49 | "@livekit/rtc-node": "^0.13.11" 50 | } 51 | } 52 | -------------------------------------------------------------------------------- /plugins/openai/src/index.ts: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: 2024 LiveKit, Inc. 2 | // 3 | // SPDX-License-Identifier: Apache-2.0 4 | export * as realtime from './realtime/index.js'; 5 | export * from './models.js'; 6 | export { type LLMOptions, LLM, LLMStream } from './llm.js'; 7 | export { type STTOptions, STT } from './stt.js'; 8 | export { type TTSOptions, TTS, ChunkedStream } from './tts.js'; 9 | -------------------------------------------------------------------------------- /plugins/openai/src/llm.test.ts: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: 2024 LiveKit, Inc. 2 | // 3 | // SPDX-License-Identifier: Apache-2.0 4 | import { llm } from '@livekit/agents-plugins-test'; 5 | import { describe } from 'vitest'; 6 | import { LLM } from './llm.js'; 7 | 8 | describe('OpenAI', async () => { 9 | await llm(new LLM()); 10 | }); 11 | -------------------------------------------------------------------------------- /plugins/openai/src/models.ts: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: 2024 LiveKit, Inc. 2 | // 3 | // SPDX-License-Identifier: Apache-2.0 4 | 5 | export type ChatModels = 6 | | 'gpt-4o' 7 | | 'gpt-4o-2024-05-13' 8 | | 'gpt-4o-mini' 9 | | 'gpt-4o-mini-2024-07-18' 10 | | 'gpt-4-turbo' 11 | | 'gpt-4-turbo-2024-04-09' 12 | | 'gpt-4-turbo-preview' 13 | | 'gpt-4-0125-preview' 14 | | 'gpt-4-1106-preview' 15 | | 'gpt-4-vision-preview' 16 | | 'gpt-4-1106-vision-preview' 17 | | 'gpt-4' 18 | | 'gpt-4-0314' 19 | | 'gpt-4-0613' 20 | | 'gpt-4-32k' 21 | | 'gpt-4-32k-0314' 22 | | 'gpt-4-32k-0613' 23 | | 'gpt-3.5-turbo' 24 | | 'gpt-3.5-turbo-16k' 25 | | 'gpt-3.5-turbo-0301' 26 | | 'gpt-3.5-turbo-0613' 27 | | 'gpt-3.5-turbo-1106' 28 | | 'gpt-3.5-turbo-16k-0613'; 29 | 30 | export type WhisperModels = 'whisper-1'; 31 | 32 | export type TTSModels = 'tts-1' | 'tts-1-hd' | 'gpt-4o-mini-tts'; 33 | 34 | export type TTSVoices = 35 | | 'alloy' 36 | | 'ash' 37 | | 'ballad' 38 | | 'coral' 39 | | 'echo' 40 | | 'fable' 41 | | 'nova' 42 | | 'onyx' 43 | | 'sage' 44 | | 'shimmer' 45 | | 'verse'; 46 | 47 | // adapters for OpenAI-compatible LLMs, TTSs, STTs 48 | 49 | export type TelnyxChatModels = 50 | | 'meta-llama/Meta-Llama-3.1-8B-Instruct' 51 | | 'meta-llama/Meta-Llama-3.1-70B-Instruct'; 52 | 53 | export type CerebrasChatModels = 'llama3.1-8b' | 'llama3.1-70b'; 54 | 55 | export type PerplexityChatModels = 56 | | 'llama-3.1-sonar-small-128k-online' 57 | | 'llama-3.1-sonar-small-128k-chat' 58 | | 'llama-3.1-sonar-large-128k-online' 59 | | 'llama-3.1-sonar-large-128k-chat' 60 | | 'llama-3.1-8b-instruct' 61 | | 'llama-3.1-70b-instruct'; 62 | 63 | export type GroqChatModels = 64 | | 'llama-3.1-405b-reasoning' 65 | | 'llama-3.1-70b-versatile' 66 | | 'llama-3.1-8b-instant' 67 | | 'llama-3.3-70b-versatile' 68 | | 'llama3-groq-70b-8192-tool-use-preview' 69 | | 'llama3-groq-8b-8192-tool-use-preview' 70 | | 'llama-guard-3-8b' 71 | | 'llama3-70b-8192' 72 | | 'llama3-8b-8192' 73 | | 'mixtral-8x7b-32768' 74 | | 'gemma-7b-it' 75 | | 'gemma2-9b-it'; 76 | 77 | export type GroqAudioModels = 78 | | 'whisper-large-v3' 79 | | 'distil-whisper-large-v3-en' 80 | | 'whisper-large-v3-turbo'; 81 | 82 | export type DeepSeekChatModels = 'deepseek-coder' | 'deepseek-chat'; 83 | 84 | export type TogetherChatModels = 85 | | 'garage-bAInd/Platypus2-70B-instruct' 86 | | 'google/gemma-2-27b-it' 87 | | 'google/gemma-2-9b-it' 88 | | 'google/gemma-2b-it' 89 | | 'google/gemma-7b-it' 90 | | 'lmsys/vicuna-13b-v1.5' 91 | | 'lmsys/vicuna-7b-v1.5' 92 | | 'meta-llama/Llama-2-13b-chat-hf' 93 | | 'meta-llama/Llama-2-70b-chat-hf' 94 | | 'meta-llama/Llama-2-7b-chat-hf' 95 | | 'meta-llama/Llama-3-70b-chat-hf' 96 | | 'meta-llama/Llama-3-8b-chat-hf' 97 | | 'meta-llama/Meta-Llama-3-70B-Instruct-Lite' 98 | | 'meta-llama/Meta-Llama-3-70B-Instruct-Turbo' 99 | | 'meta-llama/Meta-Llama-3-8B-Instruct-Lite' 100 | | 'meta-llama/Meta-Llama-3-8B-Instruct-Turbo' 101 | | 'meta-llama/Meta-Llama-3.1-405B-Instruct-Turbo' 102 | | 'meta-llama/Meta-Llama-3.1-70B-Instruct-Turbo' 103 | | 'meta-llama/Meta-Llama-3.1-8B-Instruct-Turbo' 104 | | 'mistralai/Mistral-7B-Instruct-v0.1' 105 | | 'mistralai/Mistral-7B-Instruct-v0.2' 106 | | 'mistralai/Mistral-7B-Instruct-v0.3' 107 | | 'mistralai/Mixtral-8x22B-Instruct-v0.1' 108 | | 'mistralai/Mixtral-8x7B-Instruct-v0.1' 109 | | 'openchat/openchat-3.5-1210' 110 | | 'snorkelai/Snorkel-Mistral-PairRM-DPO' 111 | | 'teknium/OpenHermes-2-Mistral-7B' 112 | | 'teknium/OpenHermes-2p5-Mistral-7B' 113 | | 'togethercomputer/Llama-2-7B-32K-Instruct' 114 | | 'togethercomputer/RedPajama-INCITE-7B-Chat' 115 | | 'togethercomputer/RedPajama-INCITE-Chat-3B-v1' 116 | | 'togethercomputer/StripedHyena-Nous-7B' 117 | | 'togethercomputer/alpaca-7b' 118 | | 'upstage/SOLAR-10.7B-Instruct-v1.0' 119 | | 'zero-one-ai/Yi-34B-Chat'; 120 | 121 | export type OctoChatModels = 122 | | 'meta-llama-3-70b-instruct' 123 | | 'meta-llama-3.1-405b-instruct' 124 | | 'meta-llama-3.1-70b-instruct' 125 | | 'meta-llama-3.1-8b-instruct' 126 | | 'mistral-7b-instruct' 127 | | 'mixtral-8x7b-instruct' 128 | | 'wizardlm-2-8x22bllamaguard-2-7b'; 129 | 130 | export type XAIChatModels = 'grok-2' | 'grok-2-mini' | 'grok-2-mini-public' | 'grok-2-public'; 131 | 132 | export type MetaChatModels = 133 | | 'Llama-4-Scout-17B-16E-Instruct-FP8' 134 | | 'Llama-4-Maverick-17B-128E-Instruct-FP8' 135 | | 'Llama-3.3-70B-Instruct' 136 | | 'Llama-3.3-8B-Instruct'; 137 | -------------------------------------------------------------------------------- /plugins/openai/src/realtime/index.ts: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: 2024 LiveKit, Inc. 2 | // 3 | // SPDX-License-Identifier: Apache-2.0 4 | export * from './api_proto.js'; 5 | export * from './realtime_model.js'; 6 | -------------------------------------------------------------------------------- /plugins/openai/src/stt.test.ts: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: 2024 LiveKit, Inc. 2 | // 3 | // SPDX-License-Identifier: Apache-2.0 4 | import { VAD } from '@livekit/agents-plugin-silero'; 5 | import { stt } from '@livekit/agents-plugins-test'; 6 | import { describe } from 'vitest'; 7 | import { STT } from './stt.js'; 8 | 9 | describe('OpenAI', async () => { 10 | await stt(new STT(), await VAD.load(), { streaming: false }); 11 | }); 12 | -------------------------------------------------------------------------------- /plugins/openai/src/stt.ts: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: 2024 LiveKit, Inc. 2 | // 3 | // SPDX-License-Identifier: Apache-2.0 4 | import { type AudioBuffer, mergeFrames, stt } from '@livekit/agents'; 5 | import type { AudioFrame } from '@livekit/rtc-node'; 6 | import { OpenAI } from 'openai'; 7 | import type { GroqAudioModels, WhisperModels } from './models.js'; 8 | 9 | export interface STTOptions { 10 | apiKey?: string; 11 | language: string; 12 | prompt?: string; 13 | detectLanguage: boolean; 14 | model: WhisperModels | string; 15 | baseURL?: string; 16 | client?: OpenAI; 17 | } 18 | 19 | const defaultSTTOptions: STTOptions = { 20 | apiKey: process.env.OPENAI_API_KEY, 21 | language: 'en', 22 | detectLanguage: false, 23 | model: 'whisper-1', 24 | }; 25 | 26 | export class STT extends stt.STT { 27 | #opts: STTOptions; 28 | #client: OpenAI; 29 | label = 'openai.STT'; 30 | 31 | /** 32 | * Create a new instance of OpenAI STT. 33 | * 34 | * @remarks 35 | * `apiKey` must be set to your OpenAI API key, either using the argument or by setting the 36 | * `OPENAI_API_KEY` environmental variable. 37 | */ 38 | constructor(opts: Partial = defaultSTTOptions) { 39 | super({ streaming: false, interimResults: false }); 40 | 41 | this.#opts = { ...defaultSTTOptions, ...opts }; 42 | if (this.#opts.apiKey === undefined) { 43 | throw new Error('OpenAI API key is required, whether as an argument or as $OPENAI_API_KEY'); 44 | } 45 | 46 | this.#client = 47 | this.#opts.client || 48 | new OpenAI({ 49 | baseURL: opts.baseURL, 50 | apiKey: opts.apiKey, 51 | }); 52 | } 53 | 54 | /** 55 | * Create a new instance of Groq STT. 56 | * 57 | * @remarks 58 | * `apiKey` must be set to your Groq API key, either using the argument or by setting the 59 | * `GROQ_API_KEY` environmental variable. 60 | */ 61 | static withGroq( 62 | opts: Partial<{ 63 | model: string | GroqAudioModels; 64 | apiKey?: string; 65 | baseURL?: string; 66 | client: OpenAI; 67 | language: string; 68 | detectLanguage: boolean; 69 | }> = {}, 70 | ): STT { 71 | opts.apiKey = opts.apiKey || process.env.GROQ_API_KEY; 72 | if (opts.apiKey === undefined) { 73 | throw new Error('Groq API key is required, whether as an argument or as $GROQ_API_KEY'); 74 | } 75 | 76 | return new STT({ 77 | model: 'whisper-large-v3-turbo', 78 | baseURL: 'https://api.groq.com/openai/v1', 79 | ...opts, 80 | }); 81 | } 82 | 83 | #sanitizeOptions(language?: string): STTOptions { 84 | if (language) { 85 | return { ...this.#opts, language }; 86 | } else { 87 | return this.#opts; 88 | } 89 | } 90 | 91 | #createWav(frame: AudioFrame): Buffer { 92 | const bitsPerSample = 16; 93 | const byteRate = (frame.sampleRate * frame.channels * bitsPerSample) / 8; 94 | const blockAlign = (frame.channels * bitsPerSample) / 8; 95 | 96 | const header = Buffer.alloc(44); 97 | header.write('RIFF', 0); 98 | header.writeUInt32LE(36 + frame.data.byteLength, 4); 99 | header.write('WAVE', 8); 100 | header.write('fmt ', 12); 101 | header.writeUInt32LE(16, 16); 102 | header.writeUInt16LE(1, 20); 103 | header.writeUInt16LE(frame.channels, 22); 104 | header.writeUInt32LE(frame.sampleRate, 24); 105 | header.writeUInt32LE(byteRate, 28); 106 | header.writeUInt16LE(blockAlign, 32); 107 | header.writeUInt16LE(16, 34); 108 | header.write('data', 36); 109 | header.writeUInt32LE(frame.data.byteLength, 40); 110 | return Buffer.concat([header, Buffer.from(frame.data.buffer)]); 111 | } 112 | 113 | async _recognize(buffer: AudioBuffer, language?: string): Promise { 114 | const config = this.#sanitizeOptions(language); 115 | buffer = mergeFrames(buffer); 116 | const file = new File([this.#createWav(buffer)], 'audio.wav', { type: 'audio/wav' }); 117 | const resp = await this.#client.audio.transcriptions.create({ 118 | file, 119 | model: this.#opts.model, 120 | language: config.language, 121 | prompt: config.prompt, 122 | response_format: 'json', 123 | }); 124 | 125 | return { 126 | type: stt.SpeechEventType.FINAL_TRANSCRIPT, 127 | alternatives: [ 128 | { 129 | text: resp.text || '', 130 | language: language || '', 131 | startTime: 0, 132 | endTime: 0, 133 | confidence: 0, 134 | }, 135 | ], 136 | }; 137 | } 138 | 139 | /** This method throws an error; streaming is unsupported on OpenAI STT. */ 140 | stream(): stt.SpeechStream { 141 | throw new Error('Streaming is not supported on OpenAI STT'); 142 | } 143 | } 144 | -------------------------------------------------------------------------------- /plugins/openai/src/tts.test.ts: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: 2024 LiveKit, Inc. 2 | // 3 | // SPDX-License-Identifier: Apache-2.0 4 | import { tts } from '@livekit/agents-plugins-test'; 5 | import { describe } from 'vitest'; 6 | import { STT } from './stt.js'; 7 | import { TTS } from './tts.js'; 8 | 9 | describe('OpenAI', async () => { 10 | await tts(new TTS(), new STT(), { streaming: false }); 11 | }); 12 | -------------------------------------------------------------------------------- /plugins/openai/src/tts.ts: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: 2024 LiveKit, Inc. 2 | // 3 | // SPDX-License-Identifier: Apache-2.0 4 | import { AudioByteStream, tts } from '@livekit/agents'; 5 | import type { AudioFrame } from '@livekit/rtc-node'; 6 | import { randomUUID } from 'crypto'; 7 | import { OpenAI } from 'openai'; 8 | import type { TTSModels, TTSVoices } from './models.js'; 9 | 10 | const OPENAI_TTS_SAMPLE_RATE = 24000; 11 | const OPENAI_TTS_CHANNELS = 1; 12 | 13 | export interface TTSOptions { 14 | model: TTSModels | string; 15 | voice: TTSVoices; 16 | speed: number; 17 | instructions?: string; 18 | baseURL?: string; 19 | client?: OpenAI; 20 | apiKey?: string; 21 | } 22 | 23 | const defaultTTSOptions: TTSOptions = { 24 | apiKey: process.env.OPENAI_API_KEY, 25 | model: 'tts-1', 26 | voice: 'alloy', 27 | speed: 1, 28 | }; 29 | 30 | export class TTS extends tts.TTS { 31 | #opts: TTSOptions; 32 | #client: OpenAI; 33 | label = 'openai.TTS'; 34 | 35 | /** 36 | * Create a new instance of OpenAI TTS. 37 | * 38 | * @remarks 39 | * `apiKey` must be set to your OpenAI API key, either using the argument or by setting the 40 | * `OPENAI_API_KEY` environmental variable. 41 | */ 42 | constructor(opts: Partial = defaultTTSOptions) { 43 | super(OPENAI_TTS_SAMPLE_RATE, OPENAI_TTS_CHANNELS, { streaming: false }); 44 | 45 | this.#opts = { ...defaultTTSOptions, ...opts }; 46 | if (this.#opts.apiKey === undefined) { 47 | throw new Error('OpenAI API key is required, whether as an argument or as $OPENAI_API_KEY'); 48 | } 49 | 50 | this.#client = 51 | this.#opts.client || 52 | new OpenAI({ 53 | baseURL: opts.baseURL, 54 | apiKey: opts.apiKey, 55 | }); 56 | } 57 | 58 | updateOptions(opts: { model?: TTSModels | string; voice?: TTSVoices; speed?: number }) { 59 | this.#opts = { ...this.#opts, ...opts }; 60 | } 61 | 62 | synthesize(text: string): ChunkedStream { 63 | return new ChunkedStream( 64 | this, 65 | text, 66 | this.#client.audio.speech.create({ 67 | input: text, 68 | model: this.#opts.model, 69 | voice: this.#opts.voice, 70 | instructions: this.#opts.instructions, 71 | response_format: 'pcm', 72 | speed: this.#opts.speed, 73 | }), 74 | ); 75 | } 76 | 77 | stream(): tts.SynthesizeStream { 78 | throw new Error('Streaming is not supported on OpenAI TTS'); 79 | } 80 | } 81 | 82 | export class ChunkedStream extends tts.ChunkedStream { 83 | label = 'openai.ChunkedStream'; 84 | 85 | // set Promise to any because OpenAI returns an annoying Response type 86 | constructor(tts: TTS, text: string, stream: Promise) { 87 | super(text, tts); 88 | this.#run(stream); 89 | } 90 | 91 | async #run(stream: Promise) { 92 | const buffer = await stream.then((r) => r.arrayBuffer()); 93 | const requestId = randomUUID(); 94 | const audioByteStream = new AudioByteStream(OPENAI_TTS_SAMPLE_RATE, OPENAI_TTS_CHANNELS); 95 | const frames = audioByteStream.write(buffer); 96 | 97 | let lastFrame: AudioFrame | undefined; 98 | const sendLastFrame = (segmentId: string, final: boolean) => { 99 | if (lastFrame) { 100 | this.queue.put({ requestId, segmentId, frame: lastFrame, final }); 101 | lastFrame = undefined; 102 | } 103 | }; 104 | 105 | for (const frame of frames) { 106 | sendLastFrame(requestId, false); 107 | lastFrame = frame; 108 | } 109 | sendLastFrame(requestId, true); 110 | 111 | this.queue.close(); 112 | } 113 | } 114 | -------------------------------------------------------------------------------- /plugins/openai/tsconfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "extends": "../../tsconfig.json", 3 | "include": ["./src"], 4 | "compilerOptions": { 5 | // match output dir to input dir. e.g. dist/index instead of dist/src/index 6 | "rootDir": "./src", 7 | "declarationDir": "./dist", 8 | "outDir": "./dist" 9 | }, 10 | "typedocOptions": { 11 | "name": "plugins/agents-plugin-openai", 12 | "entryPointStrategy": "resolve", 13 | "entryPoints": ["src/index.ts"] 14 | } 15 | } 16 | -------------------------------------------------------------------------------- /plugins/openai/tsup.config.ts: -------------------------------------------------------------------------------- 1 | import { defineConfig } from 'tsup'; 2 | 3 | import defaults from '../../tsup.config'; 4 | 5 | export default defineConfig({ 6 | ...defaults, 7 | }); 8 | -------------------------------------------------------------------------------- /plugins/resemble/CHANGELOG.md: -------------------------------------------------------------------------------- 1 | # @livekit/agents-plugin-resemble 2 | 3 | ## 0.1.0 4 | 5 | ### Minor Changes 6 | 7 | - initial version - [#343](https://github.com/livekit/agents-js/pull/343) ([@nbsp](https://github.com/nbsp)) 8 | 9 | ### Patch Changes 10 | 11 | - Updated dependencies [[`d44445934cc291df987013068f5c43491634dfa1`](https://github.com/livekit/agents-js/commit/d44445934cc291df987013068f5c43491634dfa1), [`a7350c92f8968e0fd833e7679a607eaf9a1d7e7f`](https://github.com/livekit/agents-js/commit/a7350c92f8968e0fd833e7679a607eaf9a1d7e7f), [`2dcfeab76ace2e1851993771d769ebcb7c188144`](https://github.com/livekit/agents-js/commit/2dcfeab76ace2e1851993771d769ebcb7c188144), [`2bb936c55233ac0747582a5045caa595c6338651`](https://github.com/livekit/agents-js/commit/2bb936c55233ac0747582a5045caa595c6338651)]: 12 | - @livekit/agents@0.7.2 13 | -------------------------------------------------------------------------------- /plugins/resemble/README.md: -------------------------------------------------------------------------------- 1 | 6 | # Resemble plugin for LiveKit Agents 7 | 8 | The Agents Framework is designed for building realtime, programmable 9 | participants that run on servers. Use it to create conversational, multi-modal 10 | voice agents that can see, hear, and understand. 11 | 12 | This package contains the Resemble plugin, which allows for voice synthesis. 13 | Refer to the [documentation](https://docs.livekit.io/agents/overview/) for 14 | information on how to use it, or browse the [API 15 | reference](https://docs.livekit.io/agents-js/modules/plugins_agents_plugin_resemble.html). 16 | See the [repository](https://github.com/livekit/agents-js) for more information 17 | about the framework as a whole. 18 | -------------------------------------------------------------------------------- /plugins/resemble/api-extractor.json: -------------------------------------------------------------------------------- 1 | /** 2 | * Config file for API Extractor. For more info, please visit: https://api-extractor.com 3 | */ 4 | { 5 | "$schema": "https://developer.microsoft.com/json-schemas/api-extractor/v7/api-extractor.schema.json", 6 | 7 | /** 8 | * Optionally specifies another JSON config file that this file extends from. This provides a way for 9 | * standard settings to be shared across multiple projects. 10 | * 11 | * If the path starts with "./" or "../", the path is resolved relative to the folder of the file that contains 12 | * the "extends" field. Otherwise, the first path segment is interpreted as an NPM package name, and will be 13 | * resolved using NodeJS require(). 14 | * 15 | * SUPPORTED TOKENS: none 16 | * DEFAULT VALUE: "" 17 | */ 18 | "extends": "../../api-extractor-shared.json", 19 | "mainEntryPointFilePath": "./dist/index.d.ts" 20 | } 21 | -------------------------------------------------------------------------------- /plugins/resemble/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "@livekit/agents-plugin-resemble", 3 | "version": "0.1.0", 4 | "description": "Resemble plugin for LiveKit Node Agents", 5 | "main": "dist/index.js", 6 | "require": "dist/index.cjs", 7 | "types": "dist/index.d.ts", 8 | "exports": { 9 | ".": { 10 | "types": "./dist/index.d.ts", 11 | "import": "./dist/index.js", 12 | "require": "./dist/index.cjs" 13 | } 14 | }, 15 | "author": "LiveKit", 16 | "type": "module", 17 | "repository": "git@github.com:livekit/agents-js.git", 18 | "license": "Apache-2.0", 19 | "files": [ 20 | "dist", 21 | "src", 22 | "README.md" 23 | ], 24 | "scripts": { 25 | "build": "tsup --onSuccess \"tsc --declaration --emitDeclarationOnly\"", 26 | "clean": "rm -rf dist", 27 | "clean:build": "pnpm clean && pnpm build", 28 | "lint": "eslint -f unix \"src/**/*.{ts,js}\"", 29 | "api:check": "api-extractor run --typescript-compiler-folder ../../node_modules/typescript", 30 | "api:update": "api-extractor run --local --typescript-compiler-folder ../../node_modules/typescript --verbose" 31 | }, 32 | "devDependencies": { 33 | "@livekit/agents": "workspace:^", 34 | "@livekit/agents-plugin-openai": "workspace:^", 35 | "@livekit/agents-plugins-test": "workspace:^", 36 | "@livekit/rtc-node": "^0.13.11", 37 | "@microsoft/api-extractor": "^7.35.0", 38 | "@types/ws": "^8.5.10", 39 | "tsup": "^8.3.5", 40 | "typescript": "^5.0.0" 41 | }, 42 | "dependencies": { 43 | "ws": "^8.16.0" 44 | }, 45 | "peerDependencies": { 46 | "@livekit/agents": "workspace:^x", 47 | "@livekit/rtc-node": "^0.13.11" 48 | } 49 | } 50 | -------------------------------------------------------------------------------- /plugins/resemble/src/index.ts: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: 2024 LiveKit, Inc. 2 | // 3 | // SPDX-License-Identifier: Apache-2.0 4 | 5 | export * from './tts.js'; 6 | export * from './models.js'; 7 | -------------------------------------------------------------------------------- /plugins/resemble/src/models.ts: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: 2024 LiveKit, Inc. 2 | // 3 | // SPDX-License-Identifier: Apache-2.0 4 | 5 | /** 6 | * Output format for Resemble TTS 7 | */ 8 | export type OutputFormat = 'wav'; 9 | 10 | export type Precision = 'PCM_16'; 11 | -------------------------------------------------------------------------------- /plugins/resemble/src/tts.test.ts: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: 2024 LiveKit, Inc. 2 | // 3 | // SPDX-License-Identifier: Apache-2.0 4 | import { STT } from '@livekit/agents-plugin-openai'; 5 | import { tts } from '@livekit/agents-plugins-test'; 6 | import { describe } from 'vitest'; 7 | import { TTS } from './tts.js'; 8 | 9 | describe('Resemble', async () => { 10 | await tts(new TTS(), new STT(), { nonStreaming: true }); 11 | }); 12 | -------------------------------------------------------------------------------- /plugins/resemble/tsconfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "extends": "../../tsconfig.json", 3 | "include": ["src/**/*"], 4 | "compilerOptions": { 5 | "baseUrl": ".", 6 | "outDir": "dist", 7 | "rootDir": "src" 8 | }, 9 | "typedocOptions": { 10 | "name": "plugins/agents-plugin-resemble", 11 | "entryPointStrategy": "resolve", 12 | "readme": "none", 13 | "entryPoints": ["src/index.ts"] 14 | } 15 | } 16 | -------------------------------------------------------------------------------- /plugins/resemble/tsup.config.ts: -------------------------------------------------------------------------------- 1 | import { defineConfig } from 'tsup'; 2 | 3 | import defaults from '../../tsup.config'; 4 | 5 | export default defineConfig({ 6 | ...defaults, 7 | }); 8 | -------------------------------------------------------------------------------- /plugins/silero/.gitattributes: -------------------------------------------------------------------------------- 1 | # SPDX-FileCopyrightText: 2024 LiveKit, Inc. 2 | # 3 | # SPDX-License-Identifier: Apache-2.0 4 | 5 | **/*.onnx filter=lfs diff=lfs merge=lfs -text 6 | -------------------------------------------------------------------------------- /plugins/silero/README.md: -------------------------------------------------------------------------------- 1 | 6 | # Silero plugin for LiveKit Agents 7 | 8 | The Agents Framework is designed for building realtime, programmable 9 | participants that run on servers. Use it to create conversational, multi-modal 10 | voice agents that can see, hear, and understand. 11 | 12 | This package contains the Silero plugin, providing voice activity detection. 13 | Refer to the [documentation](https://docs.livekit.io/agents/overview/) for 14 | information on how to use it, or browse the [API 15 | reference](https://docs.livekit.io/agents-js/modules/plugins_agents_plugin_silero.html). 16 | See the [repository](https://github.com/livekit/agents-js) for more information 17 | about the framework as a whole. 18 | -------------------------------------------------------------------------------- /plugins/silero/api-extractor.json: -------------------------------------------------------------------------------- 1 | /** 2 | * Config file for API Extractor. For more info, please visit: https://api-extractor.com 3 | */ 4 | { 5 | "$schema": "https://developer.microsoft.com/json-schemas/api-extractor/v7/api-extractor.schema.json", 6 | 7 | /** 8 | * Optionally specifies another JSON config file that this file extends from. This provides a way for 9 | * standard settings to be shared across multiple projects. 10 | * 11 | * If the path starts with "./" or "../", the path is resolved relative to the folder of the file that contains 12 | * the "extends" field. Otherwise, the first path segment is interpreted as an NPM package name, and will be 13 | * resolved using NodeJS require(). 14 | * 15 | * SUPPORTED TOKENS: none 16 | * DEFAULT VALUE: "" 17 | */ 18 | "extends": "../../api-extractor-shared.json", 19 | "mainEntryPointFilePath": "./dist/index.d.ts" 20 | } 21 | -------------------------------------------------------------------------------- /plugins/silero/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "@livekit/agents-plugin-silero", 3 | "version": "0.5.6", 4 | "description": "Silero voice activity detection LiveKit Node Agents", 5 | "main": "dist/index.js", 6 | "require": "dist/index.cjs", 7 | "types": "dist/index.d.ts", 8 | "exports": { 9 | ".": { 10 | "types": "./dist/index.d.ts", 11 | "import": "./dist/index.js", 12 | "require": "./dist/index.cjs" 13 | } 14 | }, 15 | "author": "LiveKit", 16 | "type": "module", 17 | "repository": "git@github.com:livekit/agents-js.git", 18 | "license": "Apache-2.0", 19 | "files": [ 20 | "dist", 21 | "src", 22 | "README.md" 23 | ], 24 | "scripts": { 25 | "build": "tsup --onSuccess \"tsc --declaration --emitDeclarationOnly\" && cp src/silero_vad.onnx dist/", 26 | "clean": "rm -rf dist", 27 | "clean:build": "pnpm clean && pnpm build", 28 | "lint": "eslint -f unix \"src/**/*.{ts,js}\"", 29 | "api:check": "api-extractor run --typescript-compiler-folder ../../node_modules/typescript", 30 | "api:update": "api-extractor run --local --typescript-compiler-folder ../../node_modules/typescript --verbose" 31 | }, 32 | "devDependencies": { 33 | "@livekit/agents": "workspace:^x", 34 | "@livekit/rtc-node": "^0.13.11", 35 | "@microsoft/api-extractor": "^7.35.0", 36 | "@types/ws": "^8.5.10", 37 | "onnxruntime-common": "^1.19.2", 38 | "tsup": "^8.3.5", 39 | "typescript": "^5.0.0" 40 | }, 41 | "dependencies": { 42 | "onnxruntime-node": "^1.19.2", 43 | "ws": "^8.16.0" 44 | }, 45 | "peerDependencies": { 46 | "@livekit/agents": "workspace:^x", 47 | "@livekit/rtc-node": "^0.13.11" 48 | } 49 | } 50 | -------------------------------------------------------------------------------- /plugins/silero/src/index.ts: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: 2024 LiveKit, Inc. 2 | // 3 | // SPDX-License-Identifier: Apache-2.0 4 | export { VAD, VADStream } from './vad.js'; 5 | -------------------------------------------------------------------------------- /plugins/silero/src/onnx_model.ts: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: 2024 LiveKit, Inc. 2 | // 3 | // SPDX-License-Identifier: Apache-2.0 4 | import { fileURLToPath } from 'node:url'; 5 | import { InferenceSession, Tensor } from 'onnxruntime-node'; 6 | 7 | export type SampleRate = 8000 | 16000; 8 | 9 | export const newInferenceSession = (forceCPU: boolean) => { 10 | return InferenceSession.create(fileURLToPath(new URL('silero_vad.onnx', import.meta.url).href), { 11 | interOpNumThreads: 1, 12 | intraOpNumThreads: 1, 13 | executionMode: 'sequential', 14 | executionProviders: forceCPU ? [{ name: 'cpu' }] : undefined, 15 | }); 16 | }; 17 | 18 | export class OnnxModel { 19 | #session: InferenceSession; 20 | #sampleRate: number; 21 | #windowSizeSamples: number; 22 | #contextSize: number; 23 | #sampleRateNd: BigInt64Array; 24 | #context: Float32Array; 25 | // #state: Float32Array; 26 | #rnnState: Float32Array; 27 | #inputBuffer: Float32Array; 28 | 29 | constructor(session: InferenceSession, sampleRate: SampleRate) { 30 | this.#session = session; 31 | this.#sampleRate = sampleRate; 32 | 33 | switch (sampleRate) { 34 | case 8000: 35 | this.#windowSizeSamples = 256; 36 | this.#contextSize = 32; 37 | break; 38 | case 16000: 39 | this.#windowSizeSamples = 512; 40 | this.#contextSize = 64; 41 | break; 42 | } 43 | 44 | this.#sampleRateNd = BigInt64Array.from([BigInt(sampleRate)]); 45 | this.#context = new Float32Array(this.#contextSize); 46 | this.#rnnState = new Float32Array(2 * 1 * 128); 47 | this.#inputBuffer = new Float32Array(this.#contextSize + this.#windowSizeSamples); 48 | } 49 | 50 | get sampleRate(): number { 51 | return this.#sampleRate; 52 | } 53 | 54 | get windowSizeSamples(): number { 55 | return this.#windowSizeSamples; 56 | } 57 | 58 | get contextSize(): number { 59 | return this.#contextSize; 60 | } 61 | 62 | async run(x: Float32Array): Promise { 63 | this.#inputBuffer.set(this.#context, 0); 64 | this.#inputBuffer.set(x, this.#contextSize); 65 | 66 | return await this.#session 67 | .run({ 68 | input: new Tensor('float32', this.#inputBuffer, [ 69 | 1, 70 | this.#contextSize + this.#windowSizeSamples, 71 | ]), 72 | state: new Tensor('float32', this.#rnnState, [2, 1, 128]), 73 | sr: new Tensor('int64', this.#sampleRateNd), 74 | }) 75 | .then((result) => { 76 | // this.#state = result.output.data as Float32Array, 77 | this.#context = this.#inputBuffer.subarray(0, this.#contextSize); 78 | return (result.output!.data as Float32Array).at(0)!; 79 | }); 80 | } 81 | } 82 | -------------------------------------------------------------------------------- /plugins/silero/src/onnxruntime.d.ts: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: 2024 LiveKit, Inc. 2 | // 3 | // SPDX-License-Identifier: Apache-2.0 4 | 5 | // https://github.com/microsoft/onnxruntime/issues/17979 6 | declare module 'onnxruntime-node' { 7 | export * from 'onnxruntime-common'; 8 | } 9 | -------------------------------------------------------------------------------- /plugins/silero/src/silero_vad.onnx: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:6b99cbfd39246b6706f98ec13c7c50c6b299181f2474fa05cbc8046acc274396 3 | size 2313101 4 | -------------------------------------------------------------------------------- /plugins/silero/tsconfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "extends": "../../tsconfig.json", 3 | "include": ["./src"], 4 | "compilerOptions": { 5 | // match output dir to input dir. e.g. dist/index instead of dist/src/index 6 | "rootDir": "./src", 7 | "declarationDir": "./dist", 8 | "outDir": "./dist" 9 | }, 10 | "typedocOptions": { 11 | "name": "plugins/agents-plugin-silero", 12 | "entryPointStrategy": "resolve", 13 | "entryPoints": ["src/index.ts"] 14 | } 15 | } 16 | -------------------------------------------------------------------------------- /plugins/silero/tsup.config.ts: -------------------------------------------------------------------------------- 1 | import { defineConfig } from 'tsup'; 2 | 3 | import defaults from '../../tsup.config'; 4 | 5 | export default defineConfig({ 6 | ...defaults, 7 | }); 8 | -------------------------------------------------------------------------------- /plugins/test/.gitattributes: -------------------------------------------------------------------------------- 1 | *.wav filter=lfs diff=lfs merge=lfs -text 2 | -------------------------------------------------------------------------------- /plugins/test/README.md: -------------------------------------------------------------------------------- 1 | 6 | # Testing suite for LiveKit Agents plugins 7 | 8 | This package contains testing facilities for LiveKit Agents plugins. To use the 9 | tests, install this package as a dev dependency, and refer to other plugins for 10 | usage information. 11 | -------------------------------------------------------------------------------- /plugins/test/package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "@livekit/agents-plugins-test", 3 | "version": "0.0.0", 4 | "description": "Testing suite for LiveKit Agents plugins", 5 | "author": "LiveKit", 6 | "type": "module", 7 | "private": true, 8 | "repository": "git@github.com:livekit/agents-js.git", 9 | "license": "Apache-2.0", 10 | "main": "dist/index.js", 11 | "require": "dist/index.cjs", 12 | "types": "dist/index.d.ts", 13 | "exports": { 14 | ".": { 15 | "types": "./dist/index.d.ts", 16 | "import": "./dist/index.js", 17 | "require": "./dist/index.cjs" 18 | } 19 | }, 20 | "files": [ 21 | "src", 22 | "dist", 23 | "README.md" 24 | ], 25 | "scripts": { 26 | "build": "tsup --onSuccess \"tsc --declaration --emitDeclarationOnly\" && cp src/long.wav dist/", 27 | "lint": "eslint -f unix \"src/**/*.{ts,js}\"" 28 | }, 29 | "devDependencies": { 30 | "@livekit/agents": "workspace:^x", 31 | "@livekit/rtc-node": "^0.13.11", 32 | "@types/node": "^22.5.5", 33 | "tsup": "^8.3.5", 34 | "typescript": "^5.0.0" 35 | }, 36 | "dependencies": { 37 | "fastest-levenshtein": "^1.0.16", 38 | "vitest": "^1.6.0", 39 | "zod": "^3.23.8" 40 | }, 41 | "peerDependencies": { 42 | "@livekit/agents": "workspace:^x", 43 | "@livekit/rtc-node": "^0.13.11" 44 | } 45 | } 46 | -------------------------------------------------------------------------------- /plugins/test/src/index.ts: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: 2024 LiveKit, Inc. 2 | // 3 | // SPDX-License-Identifier: Apache-2.0 4 | export { tts } from './tts.js'; 5 | export { llm } from './llm.js'; 6 | export { stt } from './stt.js'; 7 | -------------------------------------------------------------------------------- /plugins/test/src/long.wav: -------------------------------------------------------------------------------- 1 | version https://git-lfs.github.com/spec/v1 2 | oid sha256:8ab87c695f2525c7553a4f0044335ab9466576f1115a4299e3fac3d3f8d9b795 3 | size 2398244 4 | -------------------------------------------------------------------------------- /plugins/test/src/tts.ts: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: 2024 LiveKit, Inc. 2 | // 3 | // SPDX-License-Identifier: Apache-2.0 4 | import type { stt } from '@livekit/agents'; 5 | import { type AudioBuffer, initializeLogger, tokenize, tts as ttslib } from '@livekit/agents'; 6 | import type { AudioFrame } from '@livekit/rtc-node'; 7 | import { distance } from 'fastest-levenshtein'; 8 | import { describe, expect, it } from 'vitest'; 9 | 10 | const TEXT = 11 | 'The people who are crazy enough to think they can change the world are the ones who do.'; 12 | 13 | const validate = async (frames: AudioBuffer, stt: stt.STT, text: string, threshold: number) => { 14 | const event = await stt.recognize(frames); 15 | const eventText = event.alternatives![0].text.toLowerCase().replace(/\s/g, ' ').trim(); 16 | text = text.toLowerCase().replace(/\s/g, ' ').trim(); 17 | expect(distance(text, eventText) / text.length).toBeLessThanOrEqual(threshold); 18 | }; 19 | 20 | export const tts = async ( 21 | tts: ttslib.TTS, 22 | stt: stt.STT, 23 | supports: Partial<{ streaming: boolean; nonStreaming: boolean }> = {}, 24 | ) => { 25 | initializeLogger({ pretty: false }); 26 | supports = { streaming: true, nonStreaming: true, ...supports }; 27 | describe('TTS', () => { 28 | it.skipIf(!supports.nonStreaming)('should properly synthesize text', async () => { 29 | const synthesize = tts.synthesize(TEXT); 30 | const frames = await synthesize.collect(); 31 | synthesize.close(); 32 | await validate(frames, stt, TEXT, 0.2); 33 | }); 34 | 35 | it('should properly stream synthesize text', async () => { 36 | let stream: ttslib.SynthesizeStream; 37 | if (supports.streaming) { 38 | stream = tts.stream(); 39 | } else { 40 | stream = new ttslib.StreamAdapter(tts, new tokenize.basic.SentenceTokenizer()).stream(); 41 | } 42 | 43 | const pattern = [1, 2, 4]; 44 | let text = TEXT; 45 | const chunks = []; 46 | const patternIter = Array(Math.ceil(text.length / pattern.reduce((sum, num) => sum + num, 0))) 47 | .fill(pattern) 48 | .flat() 49 | [Symbol.iterator](); 50 | 51 | for (const size of patternIter) { 52 | if (!text) break; 53 | chunks.push(text.slice(undefined, size)); 54 | text = text.slice(size); 55 | } 56 | 57 | for (const chunk of chunks) { 58 | stream.pushText(chunk); 59 | } 60 | stream.flush(); 61 | stream.endInput(); 62 | 63 | const frames: AudioFrame[] = []; 64 | for await (const event of stream) { 65 | if (event === ttslib.SynthesizeStream.END_OF_STREAM) break; 66 | frames.push(event.frame); 67 | } 68 | 69 | await validate(frames, stt, TEXT, 0.2); 70 | stream.close(); 71 | }); 72 | }); 73 | }; 74 | -------------------------------------------------------------------------------- /plugins/test/tsconfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "extends": "../../tsconfig.json", 3 | "include": ["./src"], 4 | "compilerOptions": { 5 | // match output dir to input dir. e.g. dist/index instead of dist/src/index 6 | "rootDir": "./src", 7 | "declarationDir": "./dist", 8 | "outDir": "./dist", 9 | } 10 | } 11 | -------------------------------------------------------------------------------- /plugins/test/tsup.config.ts: -------------------------------------------------------------------------------- 1 | import { defineConfig } from 'tsup'; 2 | 3 | import defaults from '../../tsup.config'; 4 | 5 | export default defineConfig({ 6 | ...defaults, 7 | }); 8 | 9 | -------------------------------------------------------------------------------- /pnpm-workspace.yaml: -------------------------------------------------------------------------------- 1 | packages: 2 | - "agents" 3 | - "plugins/*" 4 | - "examples" 5 | -------------------------------------------------------------------------------- /tsconfig.json: -------------------------------------------------------------------------------- 1 | { 2 | "compilerOptions": { 3 | "rootDir": ".", 4 | "baseUrl": ".", 5 | "target": "es2022", 6 | "module": "node16", 7 | "declaration": true, 8 | "declarationMap": true, 9 | "sourceMap": true, 10 | "moduleResolution": "node16", 11 | "strict": true, 12 | "esModuleInterop": true, 13 | "skipLibCheck": true, 14 | "forceConsistentCasingInFileNames": true, 15 | "verbatimModuleSyntax": true, 16 | "isolatedModules": true, 17 | "noUncheckedIndexedAccess": true, 18 | "paths": { 19 | "@livekit/agents": ["agents/src"], 20 | "@livekit/agents-plugin-*": ["plugins/*/src"] 21 | } 22 | }, 23 | 24 | "typedocOptions": { 25 | "entryPoints": ["agents", "plugins/*"], 26 | "entryPointStrategy": "packages", 27 | "name": "LiveKit Agents", 28 | "exclude": ["plugins/test"], 29 | "excludeInternal": true, 30 | "excludePrivate": true, 31 | "excludeProtected": true, 32 | "excludeExternals": true, 33 | "includeVersion": true, 34 | "out": "docs", 35 | "theme": "default" 36 | } 37 | } 38 | -------------------------------------------------------------------------------- /tsup.config.ts: -------------------------------------------------------------------------------- 1 | import { Options } from 'tsup'; 2 | 3 | const defaultOptions: Options = { 4 | entry: ['src/**/*.ts'], 5 | format: ['cjs', 'esm'], 6 | splitting: false, 7 | sourcemap: true, 8 | // for the type maps to work, we use tsc's declaration-only command 9 | dts: false, 10 | clean: true, 11 | target: 'node16', 12 | bundle: false, 13 | shims: true, 14 | esbuildOptions: (options, context) => { 15 | if (context.format === 'esm') { 16 | options.packages = 'external'; 17 | } 18 | }, 19 | plugins: [ 20 | { 21 | // https://github.com/egoist/tsup/issues/953#issuecomment-2294998890 22 | // ensuring that all local requires/imports in `.cjs` files import from `.cjs` files. 23 | // require('./path') → require('./path.cjs') in `.cjs` files 24 | // require('../path') → require('../path.cjs') in `.cjs` files 25 | // from './path' → from './path.cjs' in `.cjs` files 26 | // from '../path' → from '../path.cjs' in `.cjs` files 27 | name: 'fix-cjs-imports', 28 | renderChunk(code) { 29 | if (this.format === 'cjs') { 30 | const regexCjs = /require\((?['"])(?\.[^'"]+)\.js['"]\)/g; 31 | const regexDynamic = /import\((?['"])(?\.[^'"]+)\.js['"]\)/g; 32 | const regexEsm = /from(?[\s]*)(?['"])(?\.[^'"]+)\.js['"]/g; 33 | return { 34 | code: code 35 | .replace(regexCjs, 'require($$.cjs$)') 36 | .replace(regexDynamic, 'import($$.cjs$)') 37 | .replace(regexEsm, 'from$$$.cjs$'), 38 | }; 39 | } 40 | }, 41 | }, 42 | ], 43 | }; 44 | export default defaultOptions; 45 | -------------------------------------------------------------------------------- /turbo.json: -------------------------------------------------------------------------------- 1 | { 2 | "$schema": "https://turborepo.org/schema.json", 3 | "globalEnv": [ 4 | "AZURE_API_KEY", 5 | "AZURE_OPENAI_API_KEY", 6 | "AZURE_OPENAI_DEPLOYMENT", 7 | "AZURE_OPENAI_ENDPOINT", 8 | "AZURE_OPENAI_ENTRA_TOKEN", 9 | "CARTESIA_API_KEY", 10 | "CEREBRAS_API_KEY", 11 | "DEEPGRAM_API_KEY", 12 | "DEEPSEEK_API_KEY", 13 | "ELEVEN_API_KEY", 14 | "FIREWORKS_API_KEY", 15 | "GROQ_API_KEY", 16 | "LIVEKIT_API_KEY", 17 | "LIVEKIT_API_SECRET", 18 | "LIVEKIT_URL", 19 | "LLAMA_API_KEY", 20 | "LOG_LEVEL", 21 | "OCTOAI_TOKEN", 22 | "OPENAI_API_KEY", 23 | "PERPLEXITY_API_KEY", 24 | "TELNYX_API_KEY", 25 | "TOGETHER_API_KEY", 26 | "XAI_API_KEY", 27 | "NEUPHONIC_API_KEY", 28 | "RESEMBLE_API_KEY" 29 | ], 30 | "pipeline": { 31 | "build": { 32 | "dependsOn": ["^build"], 33 | "outputs": ["dist/**"] 34 | }, 35 | "clean": { 36 | "dependsOn": ["^clean"], 37 | "outputs": [""] 38 | }, 39 | "clean:build": { 40 | "dependsOn": ["^clean:build"], 41 | "outputs": ["dist/**"] 42 | }, 43 | "lint": { 44 | "outputs": [] 45 | }, 46 | "api:check": { 47 | "cache": false, 48 | "dependsOn": ["^build"] 49 | }, 50 | "api:update": { 51 | "dependsOn": ["^build"] 52 | } 53 | } 54 | } 55 | -------------------------------------------------------------------------------- /vitest.workspace.ts: -------------------------------------------------------------------------------- 1 | // SPDX-FileCopyrightText: 2024 LiveKit, Inc. 2 | // 3 | // SPDX-License-Identifier: Apache-2.0 4 | import { defineWorkspace } from 'vitest/config'; 5 | 6 | // defineWorkspace provides a nice type hinting DX 7 | export default defineWorkspace([ 8 | 'packages/*', 9 | { 10 | test: { 11 | include: ['**/*.test.ts'], 12 | // it is recommended to define a name when using inline configs 13 | name: 'nodejs', 14 | environment: 'node', 15 | testTimeout: 60_000, 16 | }, 17 | }, 18 | ]); 19 | --------------------------------------------------------------------------------