├── .dockerignore ├── .env-example ├── .github └── workflows │ ├── docker.yml │ └── prettier.yml ├── .gitignore ├── .npmrc ├── .prettierrc ├── Dockerfile ├── README.md ├── docker-compose.yml ├── docs ├── .nojekyll ├── README.md ├── _sidebar.md ├── index.html └── pages │ ├── configure-prefix.md │ ├── disclaimer.md │ ├── docker.md │ ├── gpt.md │ ├── installation.md │ ├── langchain.md │ ├── send-messages-to-yourself.md │ ├── transcription.md │ ├── tts.md │ └── usage.md ├── installer.sh ├── package-lock.json ├── package.json └── src ├── cli └── ui.ts ├── commands ├── chat.ts ├── general.ts ├── gpt.ts ├── stable-diffusion.ts ├── transcription.ts └── tts.ts ├── config.ts ├── constants.ts ├── handlers ├── ai-config.ts ├── dalle.ts ├── gpt.ts ├── langchain.ts ├── message.ts └── moderation.ts ├── index.ts ├── providers ├── aws.ts ├── browser-agent.ts ├── openai.ts ├── speech.ts ├── whisper-api.ts └── whisper-local.ts ├── types ├── ai-config.ts ├── aws-polly-engine.ts ├── commands.ts ├── dalle-config.ts ├── transcription-mode.ts └── tts-mode.ts └── utils.ts /.dockerignore: -------------------------------------------------------------------------------- 1 | node_modules/* -------------------------------------------------------------------------------- /.env-example: -------------------------------------------------------------------------------- 1 | # ----------------------------------------------- # 2 | # Make a copy of this file and rename it to .env # 3 | # # 4 | # Also check out our documentation: # 5 | # https://askrella.github.io/whatsapp-chatgpt # 6 | # ----------------------------------------------- # 7 | 8 | # Get your key here: https://platform.openai.com/account/api-keys 9 | OPENAI_API_KEY=sk-xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx 10 | 11 | # HuggingFace API Token https://huggingface.co/settings/tokens 12 | HUGGINGFACE_API_TOKEN=hf_xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx 13 | 14 | # In case you run into ratelimit on a single organization token, you might setting up multiple API keys here 15 | # Example: 16 | # OPENAI_API_KEYS=sk-xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx,sk-xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx 17 | 18 | # GPT Model (default: gpt-3.5-turbo) 19 | OPENAI_GPT_MODEL= 20 | 21 | # Max tokens to use for GPT per request 22 | MAX_MODEL_TOKENS=2000 23 | 24 | # GPT Pre Prompt, executed after creating a conversation 25 | # Example: Act very funny and overreact to messages. Do that for every message you get, forever. 26 | PRE_PROMPT= 27 | 28 | # Whether or not to use prefixes !gpt and !dalle 29 | PREFIX_ENABLED=true 30 | 31 | # Whether or not to use prefixes for self-note conversation 32 | PREFIX_SKIPPED_FOR_ME=true 33 | 34 | # Set own prefixes for ChatGPT, DALL-E, reset context, configuration 35 | GPT_PREFIX=!gpt 36 | DALLE_PREFIX=!dalle 37 | RESET_PREFIX=!reset 38 | AI_CONFIG_PREFIX=!config 39 | 40 | # Whether or not to allow the bot interacting on groupchats 41 | GROUPCHATS_ENABLED=false 42 | 43 | # Prompt Moderation 44 | # If enabled, the bot will check any prompts submitted by users with the OpenAI Moderation API 45 | # If the prompt is classified as any of the categories in the blacklisted categories, the prompt will be rejected 46 | # You can find the available categories here: https://beta.openai.com/docs/api-reference/moderations 47 | PROMPT_MODERATION_ENABLED = true 48 | PROMPT_MODERATION_BLACKLISTED_CATEGORIES = ["hate", "hate/threatening", "self-harm", "sexual", "sexual/minors", "violence", "violence/graphic"] 49 | 50 | # Access control, only allow whatsapp-chatgpt to react to specific phone numbers, comma-separated 51 | WHITELISTED_PHONE_NUMBERS= 52 | WHITELISTED_ENABLED=false 53 | # Speech API URL 54 | # You can use host your own Speech API 55 | # https://github.com/askrella/speech-rest-api 56 | SPEECH_API_URL= 57 | 58 | # Whisper API 59 | 60 | # API Key 61 | WHISPER_API_KEY= 62 | 63 | # You can use Whisper API for voice transcription 64 | WHISPER_API_URL=https://transcribe.whisperapi.com 65 | 66 | # Defines if the bot should recognize and transcribe your voice messages 67 | TRANSCRIPTION_ENABLED=false 68 | 69 | # Defines if the bot should use the local or remote transcription service 70 | # "local" = You need to have "whisper" installed on your machine 71 | # "openai" = It will use Open AI's transcription API with whisper-1 model 72 | # "speech-api" = It will use our Speech API to transcribe your voice messages 73 | # "whisper-api" = It will use whisper's API to transcribe your voice messages 74 | TRANSCRIPTION_MODE=local 75 | 76 | # Define the language of transcription, depends on transcriber it might auto-detect if not given 77 | TRANSCRIPTION_LANGUAGE= 78 | 79 | # Defines if the bot should send voice message responses (text-to-speech) 80 | # Be aware that this feature will use the Speech API to convert the GPT response to voice 81 | # It's open source: https://github.com/askrella/speech-rest-api 82 | TTS_ENABLED=false 83 | 84 | # Defines if the bot should return the TTS response as a text message too 85 | # If enabled, the bot will send the text response and the voice message 86 | TTS_TRANSCRIPTION_RESPONSE_ENABLED=true 87 | 88 | # Defines if the bot should use the Speech API or AWS Polly to convert text to speech 89 | # "speech-api" = It will use our Speech API to transcribe your voice messages 90 | # "aws-polly" = It will use AWS Polly to convert text to speech 91 | TTS_MODE=speech-api 92 | 93 | # AWS Config 94 | # You can use AWS Polly to convert text to speech 95 | # You need to have an AWS account and create an IAM user with Polly permissions 96 | # You can find the available voices here: https://docs.aws.amazon.com/polly/latest/dg/voicelist.html 97 | # For the Voice Engine, you can use "standard" or "neural", make sure to use the correct voice for the engine 98 | AWS_ACCESS_KEY_ID=xxxxxxxxxxxxxxxxxxxx 99 | AWS_SECRET_ACCESS_KEY=xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx 100 | AWS_REGION=eu-central-1 101 | AWS_POLLY_VOICE_ID=Joanna 102 | AWS_POLLY_VOICE_ENGINE=standard 103 | 104 | # LangChain Tool Config https://js.langchain.com/docs/modules/agents/tools/ 105 | SERPAPI_API_KEY=xxxxxxxxx 106 | -------------------------------------------------------------------------------- /.github/workflows/docker.yml: -------------------------------------------------------------------------------- 1 | name: Docker 2 | 3 | # This workflow uses actions that are not certified by GitHub. 4 | # They are provided by a third-party and are governed by 5 | # separate terms of service, privacy policy, and support 6 | # documentation. 7 | 8 | on: 9 | schedule: 10 | - cron: "28 2 * * *" 11 | push: 12 | branches: ["master"] 13 | # Publish semver tags as releases. 14 | tags: ["v*.*.*"] 15 | pull_request: 16 | branches: ["master"] 17 | 18 | env: 19 | # Use docker.io for Docker Hub if empty 20 | REGISTRY: ghcr.io 21 | # github.repository as / 22 | IMAGE_NAME: ${{ github.repository }} 23 | 24 | jobs: 25 | build: 26 | runs-on: ubuntu-latest 27 | permissions: 28 | contents: read 29 | packages: write 30 | # This is used to complete the identity challenge 31 | # with sigstore/fulcio when running outside of PRs. 32 | id-token: write 33 | 34 | steps: 35 | - name: Checkout repository 36 | uses: actions/checkout@v3 37 | 38 | # Install the cosign tool except on PR 39 | # https://github.com/sigstore/cosign-installer 40 | - name: Install cosign 41 | if: github.event_name != 'pull_request' 42 | uses: sigstore/cosign-installer@v3.5.0 43 | with: 44 | cosign-release: "v2.2.4" 45 | 46 | # Workaround: https://github.com/docker/build-push-action/issues/461 47 | - name: Setup Docker buildx 48 | uses: docker/setup-buildx-action@79abd3f86f79a9d68a23c75a09a9a85889262adf 49 | 50 | # Login against a Docker registry except on PR 51 | # https://github.com/docker/login-action 52 | - name: Log into registry ${{ env.REGISTRY }} 53 | if: github.event_name != 'pull_request' 54 | uses: docker/login-action@28218f9b04b4f3f62068d7b6ce6ca5b26e35336c 55 | with: 56 | registry: ${{ env.REGISTRY }} 57 | username: ${{ github.actor }} 58 | password: ${{ secrets.GITHUB_TOKEN }} 59 | 60 | # Extract metadata (tags, labels) for Docker 61 | # https://github.com/docker/metadata-action 62 | - name: Extract Docker metadata 63 | id: meta 64 | uses: docker/metadata-action@98669ae865ea3cffbcbaa878cf57c20bbf1c6c38 65 | with: 66 | images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }} 67 | 68 | # Build and push Docker image with Buildx (don't push on PR) 69 | # https://github.com/docker/build-push-action 70 | - name: Build and push Docker image 71 | id: build-and-push 72 | uses: docker/build-push-action@ac9327eae2b366085ac7f6a2d02df8aa8ead720a 73 | with: 74 | context: . 75 | push: ${{ github.event_name != 'pull_request' }} 76 | tags: ${{ steps.meta.outputs.tags }} 77 | labels: ${{ steps.meta.outputs.labels }} 78 | cache-from: type=gha 79 | cache-to: type=gha,mode=max 80 | 81 | # Sign the resulting Docker image digest except on PRs. 82 | # This will only write to the public Rekor transparency log when the Docker 83 | # repository is public to avoid leaking data. If you would like to publish 84 | # transparency data even for private images, pass --force to cosign below. 85 | # https://github.com/sigstore/cosign 86 | - name: Sign the published Docker image 87 | if: ${{ github.event_name != 'pull_request' }} 88 | env: 89 | COSIGN_EXPERIMENTAL: "true" 90 | # This step uses the identity token to provision an ephemeral certificate 91 | # against the sigstore community Fulcio instance. 92 | run: echo "${{ steps.meta.outputs.tags }}" | xargs -I {} cosign sign --yes {}@${{ steps.build-and-push.outputs.digest }} 93 | -------------------------------------------------------------------------------- /.github/workflows/prettier.yml: -------------------------------------------------------------------------------- 1 | name: Prettier 2 | on: 3 | push: 4 | branches: 5 | - master 6 | - main 7 | 8 | jobs: 9 | prettier: 10 | runs-on: ubuntu-latest 11 | steps: 12 | - name: Checkout code 13 | uses: actions/checkout@v2 14 | 15 | - name: Install dependencies 16 | run: npm ci 17 | 18 | - name: Run Prettier 19 | run: npm run prettier 20 | env: 21 | GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} 22 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | node_modules/* 2 | .env 3 | session/ 4 | 5 | # Created by https://www.toptal.com/developers/gitignore/api/vs,intellij+all,node 6 | # Edit at https://www.toptal.com/developers/gitignore?templates=vs,intellij+all,node 7 | 8 | ### Intellij+all ### 9 | # Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio, WebStorm and Rider 10 | # Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839 11 | 12 | # User-specific stuff 13 | .idea/**/workspace.xml 14 | .idea/**/tasks.xml 15 | .idea/**/usage.statistics.xml 16 | .idea/**/dictionaries 17 | .idea/**/shelf 18 | 19 | # AWS User-specific 20 | .idea/**/aws.xml 21 | 22 | # Generated files 23 | .idea/**/contentModel.xml 24 | 25 | # Sensitive or high-churn files 26 | .idea/**/dataSources/ 27 | .idea/**/dataSources.ids 28 | .idea/**/dataSources.local.xml 29 | .idea/**/sqlDataSources.xml 30 | .idea/**/dynamic.xml 31 | .idea/**/uiDesigner.xml 32 | .idea/**/dbnavigator.xml 33 | 34 | # Gradle 35 | .idea/**/gradle.xml 36 | .idea/**/libraries 37 | 38 | # Gradle and Maven with auto-import 39 | # When using Gradle or Maven with auto-import, you should exclude module files, 40 | # since they will be recreated, and may cause churn. Uncomment if using 41 | # auto-import. 42 | # .idea/artifacts 43 | # .idea/compiler.xml 44 | # .idea/jarRepositories.xml 45 | # .idea/modules.xml 46 | # .idea/*.iml 47 | # .idea/modules 48 | # *.iml 49 | # *.ipr 50 | 51 | # CMake 52 | cmake-build-*/ 53 | 54 | # Mongo Explorer plugin 55 | .idea/**/mongoSettings.xml 56 | 57 | # File-based project format 58 | *.iws 59 | 60 | # IntelliJ 61 | out/ 62 | 63 | # mpeltonen/sbt-idea plugin 64 | .idea_modules/ 65 | 66 | # JIRA plugin 67 | atlassian-ide-plugin.xml 68 | 69 | # Cursive Clojure plugin 70 | .idea/replstate.xml 71 | 72 | # SonarLint plugin 73 | .idea/sonarlint/ 74 | 75 | # Crashlytics plugin (for Android Studio and IntelliJ) 76 | com_crashlytics_export_strings.xml 77 | crashlytics.properties 78 | crashlytics-build.properties 79 | fabric.properties 80 | 81 | # Editor-based Rest Client 82 | .idea/httpRequests 83 | 84 | # Android studio 3.1+ serialized cache file 85 | .idea/caches/build_file_checksums.ser 86 | 87 | ### Intellij+all Patch ### 88 | # Ignore everything but code style settings and run configurations 89 | # that are supposed to be shared within teams. 90 | 91 | .idea/* 92 | 93 | !.idea/codeStyles 94 | !.idea/runConfigurations 95 | 96 | ### Node ### 97 | # Logs 98 | logs 99 | *.log 100 | npm-debug.log* 101 | yarn-debug.log* 102 | yarn-error.log* 103 | lerna-debug.log* 104 | .pnpm-debug.log* 105 | 106 | # Diagnostic reports (https://nodejs.org/api/report.html) 107 | report.[0-9]*.[0-9]*.[0-9]*.[0-9]*.json 108 | 109 | # Runtime data 110 | pids 111 | *.pid 112 | *.seed 113 | *.pid.lock 114 | 115 | # Directory for instrumented libs generated by jscoverage/JSCover 116 | lib-cov 117 | 118 | # Coverage directory used by tools like istanbul 119 | coverage 120 | *.lcov 121 | 122 | # nyc test coverage 123 | .nyc_output 124 | 125 | # Grunt intermediate storage (https://gruntjs.com/creating-plugins#storing-task-files) 126 | .grunt 127 | 128 | # Bower dependency directory (https://bower.io/) 129 | bower_components 130 | 131 | # node-waf configuration 132 | .lock-wscript 133 | 134 | # Compiled binary addons (https://nodejs.org/api/addons.html) 135 | build/Release 136 | 137 | # Dependency directories 138 | node_modules/ 139 | jspm_packages/ 140 | 141 | # Snowpack dependency directory (https://snowpack.dev/) 142 | web_modules/ 143 | 144 | # TypeScript cache 145 | *.tsbuildinfo 146 | 147 | # Optional npm cache directory 148 | .npm 149 | 150 | # Optional eslint cache 151 | .eslintcache 152 | 153 | # Optional stylelint cache 154 | .stylelintcache 155 | 156 | # Microbundle cache 157 | .rpt2_cache/ 158 | .rts2_cache_cjs/ 159 | .rts2_cache_es/ 160 | .rts2_cache_umd/ 161 | 162 | # Optional REPL history 163 | .node_repl_history 164 | 165 | # Output of 'npm pack' 166 | *.tgz 167 | 168 | # Yarn Integrity file 169 | .yarn-integrity 170 | 171 | # dotenv environment variable files 172 | .env 173 | .env.development.local 174 | .env.test.local 175 | .env.production.local 176 | .env.local 177 | 178 | # parcel-bundler cache (https://parceljs.org/) 179 | .cache 180 | .parcel-cache 181 | 182 | # Next.js build output 183 | .next 184 | out 185 | 186 | # Nuxt.js build / generate output 187 | .nuxt 188 | dist 189 | 190 | # Gatsby files 191 | .cache/ 192 | # Comment in the public line in if your project uses Gatsby and not Next.js 193 | # https://nextjs.org/blog/next-9-1#public-directory-support 194 | # public 195 | 196 | # vuepress build output 197 | .vuepress/dist 198 | 199 | # vuepress v2.x temp and cache directory 200 | .temp 201 | 202 | # Docusaurus cache and generated files 203 | .docusaurus 204 | 205 | # Serverless directories 206 | .serverless/ 207 | 208 | # FuseBox cache 209 | .fusebox/ 210 | 211 | # DynamoDB Local files 212 | .dynamodb/ 213 | 214 | # TernJS port file 215 | .tern-port 216 | 217 | # Stores VSCode versions used for testing VSCode extensions 218 | .vscode-test 219 | 220 | # yarn v2 221 | .yarn/cache 222 | .yarn/unplugged 223 | .yarn/build-state.yml 224 | .yarn/install-state.gz 225 | .pnp.* 226 | 227 | ### Node Patch ### 228 | # Serverless Webpack directories 229 | .webpack/ 230 | 231 | # Optional stylelint cache 232 | 233 | # SvelteKit build / generate output 234 | .svelte-kit 235 | 236 | ### vs ### 237 | ## Ignore Visual Studio temporary files, build results, and 238 | ## files generated by popular Visual Studio add-ons. 239 | ## 240 | ## Get latest from https://github.com/github/gitignore/blob/master/VisualStudio.gitignore 241 | 242 | # User-specific files 243 | *.rsuser 244 | *.suo 245 | *.user 246 | *.userosscache 247 | *.sln.docstates 248 | 249 | # User-specific files (MonoDevelop/Xamarin Studio) 250 | *.userprefs 251 | 252 | # Mono auto generated files 253 | mono_crash.* 254 | 255 | # Build results 256 | [Dd]ebug/ 257 | [Dd]ebugPublic/ 258 | [Rr]elease/ 259 | [Rr]eleases/ 260 | x64/ 261 | x86/ 262 | [Aa][Rr][Mm]/ 263 | [Aa][Rr][Mm]64/ 264 | bld/ 265 | [Bb]in/ 266 | [Oo]bj/ 267 | [Ll]og/ 268 | [Ll]ogs/ 269 | 270 | # Visual Studio 2015/2017 cache/options directory 271 | .vs/ 272 | # Uncomment if you have tasks that create the project's static files in wwwroot 273 | #wwwroot/ 274 | 275 | # Visual Studio 2017 auto generated files 276 | Generated\ Files/ 277 | 278 | # MSTest test Results 279 | [Tt]est[Rr]esult*/ 280 | [Bb]uild[Ll]og.* 281 | 282 | # NUnit 283 | *.VisualState.xml 284 | TestResult.xml 285 | nunit-*.xml 286 | 287 | # Build Results of an ATL Project 288 | [Dd]ebugPS/ 289 | [Rr]eleasePS/ 290 | dlldata.c 291 | 292 | # Benchmark Results 293 | BenchmarkDotNet.Artifacts/ 294 | 295 | # .NET Core 296 | project.lock.json 297 | project.fragment.lock.json 298 | artifacts/ 299 | 300 | # StyleCop 301 | StyleCopReport.xml 302 | 303 | # Files built by Visual Studio 304 | *_i.c 305 | *_p.c 306 | *_h.h 307 | *.ilk 308 | *.meta 309 | *.obj 310 | *.iobj 311 | *.pch 312 | *.pdb 313 | *.ipdb 314 | *.pgc 315 | *.pgd 316 | *.rsp 317 | *.sbr 318 | *.tlb 319 | *.tli 320 | *.tlh 321 | *.tmp 322 | *.tmp_proj 323 | *_wpftmp.csproj 324 | *.vspscc 325 | *.vssscc 326 | .builds 327 | *.pidb 328 | *.svclog 329 | *.scc 330 | 331 | # Chutzpah Test files 332 | _Chutzpah* 333 | 334 | # Visual C++ cache files 335 | ipch/ 336 | *.aps 337 | *.ncb 338 | *.opendb 339 | *.opensdf 340 | *.sdf 341 | *.cachefile 342 | *.VC.db 343 | *.VC.VC.opendb 344 | 345 | # Visual Studio profiler 346 | *.psess 347 | *.vsp 348 | *.vspx 349 | *.sap 350 | 351 | # Visual Studio Trace Files 352 | *.e2e 353 | 354 | # TFS 2012 Local Workspace 355 | $tf/ 356 | 357 | # Guidance Automation Toolkit 358 | *.gpState 359 | 360 | # ReSharper is a .NET coding add-in 361 | _ReSharper*/ 362 | *.[Rr]e[Ss]harper 363 | *.DotSettings.user 364 | 365 | # TeamCity is a build add-in 366 | _TeamCity* 367 | 368 | # DotCover is a Code Coverage Tool 369 | *.dotCover 370 | 371 | # AxoCover is a Code Coverage Tool 372 | .axoCover/* 373 | !.axoCover/settings.json 374 | 375 | # Coverlet is a free, cross platform Code Coverage Tool 376 | coverage*[.json, .xml, .info] 377 | 378 | # Visual Studio code coverage results 379 | *.coverage 380 | *.coveragexml 381 | 382 | # NCrunch 383 | _NCrunch_* 384 | .*crunch*.local.xml 385 | nCrunchTemp_* 386 | 387 | # MightyMoose 388 | *.mm.* 389 | AutoTest.Net/ 390 | 391 | # Web workbench (sass) 392 | .sass-cache/ 393 | 394 | # Installshield output folder 395 | [Ee]xpress/ 396 | 397 | # DocProject is a documentation generator add-in 398 | DocProject/buildhelp/ 399 | DocProject/Help/*.HxT 400 | DocProject/Help/*.HxC 401 | DocProject/Help/*.hhc 402 | DocProject/Help/*.hhk 403 | DocProject/Help/*.hhp 404 | DocProject/Help/Html2 405 | DocProject/Help/html 406 | 407 | # Click-Once directory 408 | publish/ 409 | 410 | # Publish Web Output 411 | *.[Pp]ublish.xml 412 | *.azurePubxml 413 | # Note: Comment the next line if you want to checkin your web deploy settings, 414 | # but database connection strings (with potential passwords) will be unencrypted 415 | *.pubxml 416 | *.publishproj 417 | 418 | # Microsoft Azure Web App publish settings. Comment the next line if you want to 419 | # checkin your Azure Web App publish settings, but sensitive information contained 420 | # in these scripts will be unencrypted 421 | PublishScripts/ 422 | 423 | # NuGet Packages 424 | *.nupkg 425 | # NuGet Symbol Packages 426 | *.snupkg 427 | # The packages folder can be ignored because of Package Restore 428 | **/[Pp]ackages/* 429 | # except build/, which is used as an MSBuild target. 430 | !**/[Pp]ackages/build/ 431 | # Uncomment if necessary however generally it will be regenerated when needed 432 | #!**/[Pp]ackages/repositories.config 433 | # NuGet v3's project.json files produces more ignorable files 434 | *.nuget.props 435 | *.nuget.targets 436 | 437 | # Microsoft Azure Build Output 438 | csx/ 439 | *.build.csdef 440 | 441 | # Microsoft Azure Emulator 442 | ecf/ 443 | rcf/ 444 | 445 | # Windows Store app package directories and files 446 | AppPackages/ 447 | BundleArtifacts/ 448 | Package.StoreAssociation.xml 449 | _pkginfo.txt 450 | *.appx 451 | *.appxbundle 452 | *.appxupload 453 | 454 | # Visual Studio cache files 455 | # files ending in .cache can be ignored 456 | *.[Cc]ache 457 | # but keep track of directories ending in .cache 458 | !?*.[Cc]ache/ 459 | 460 | # Others 461 | ClientBin/ 462 | ~$* 463 | *~ 464 | *.dbmdl 465 | *.dbproj.schemaview 466 | *.jfm 467 | *.pfx 468 | *.publishsettings 469 | orleans.codegen.cs 470 | 471 | # Including strong name files can present a security risk 472 | # (https://github.com/github/gitignore/pull/2483#issue-259490424) 473 | #*.snk 474 | 475 | # Since there are multiple workflows, uncomment next line to ignore bower_components 476 | # (https://github.com/github/gitignore/pull/1529#issuecomment-104372622) 477 | #bower_components/ 478 | 479 | # RIA/Silverlight projects 480 | Generated_Code/ 481 | 482 | # Backup & report files from converting an old project file 483 | # to a newer Visual Studio version. Backup files are not needed, 484 | # because we have git ;-) 485 | _UpgradeReport_Files/ 486 | Backup*/ 487 | UpgradeLog*.XML 488 | UpgradeLog*.htm 489 | ServiceFabricBackup/ 490 | *.rptproj.bak 491 | 492 | # SQL Server files 493 | *.mdf 494 | *.ldf 495 | *.ndf 496 | 497 | # Business Intelligence projects 498 | *.rdl.data 499 | *.bim.layout 500 | *.bim_*.settings 501 | *.rptproj.rsuser 502 | *- [Bb]ackup.rdl 503 | *- [Bb]ackup ([0-9]).rdl 504 | *- [Bb]ackup ([0-9][0-9]).rdl 505 | 506 | # Microsoft Fakes 507 | FakesAssemblies/ 508 | 509 | # GhostDoc plugin setting file 510 | *.GhostDoc.xml 511 | 512 | # Node.js Tools for Visual Studio 513 | .ntvs_analysis.dat 514 | 515 | # Visual Studio 6 build log 516 | *.plg 517 | 518 | # Visual Studio 6 workspace options file 519 | *.opt 520 | 521 | # Visual Studio 6 auto-generated workspace file (contains which files were open etc.) 522 | *.vbw 523 | 524 | # Visual Studio LightSwitch build output 525 | **/*.HTMLClient/GeneratedArtifacts 526 | **/*.DesktopClient/GeneratedArtifacts 527 | **/*.DesktopClient/ModelManifest.xml 528 | **/*.Server/GeneratedArtifacts 529 | **/*.Server/ModelManifest.xml 530 | _Pvt_Extensions 531 | 532 | # Paket dependency manager 533 | .paket/paket.exe 534 | paket-files/ 535 | 536 | # FAKE - F# Make 537 | .fake/ 538 | 539 | # CodeRush personal settings 540 | .cr/personal 541 | 542 | # Python Tools for Visual Studio (PTVS) 543 | __pycache__/ 544 | *.pyc 545 | 546 | # Cake - Uncomment if you are using it 547 | # tools/** 548 | # !tools/packages.config 549 | 550 | # Tabs Studio 551 | *.tss 552 | 553 | # Telerik's JustMock configuration file 554 | *.jmconfig 555 | 556 | # BizTalk build output 557 | *.btp.cs 558 | *.btm.cs 559 | *.odx.cs 560 | *.xsd.cs 561 | 562 | # OpenCover UI analysis results 563 | OpenCover/ 564 | 565 | # Azure Stream Analytics local run output 566 | ASALocalRun/ 567 | 568 | # MSBuild Binary and Structured Log 569 | *.binlog 570 | 571 | # NVidia Nsight GPU debugger configuration file 572 | *.nvuser 573 | 574 | # MFractors (Xamarin productivity tool) working folder 575 | .mfractor/ 576 | 577 | # Local History for Visual Studio 578 | .localhistory/ 579 | 580 | # BeatPulse healthcheck temp database 581 | healthchecksdb 582 | 583 | # Backup folder for Package Reference Convert tool in Visual Studio 2017 584 | MigrationBackup/ 585 | 586 | # Ionide (cross platform F# VS Code tools) working folder 587 | .ionide/ 588 | 589 | # End of https://www.toptal.com/developers/gitignore/api/vs,intellij+all,node 590 | .DS_Store 591 | 592 | 593 | .wwebjs_cache 594 | .session -------------------------------------------------------------------------------- /.npmrc: -------------------------------------------------------------------------------- 1 | engine-strict=true -------------------------------------------------------------------------------- /.prettierrc: -------------------------------------------------------------------------------- 1 | { 2 | "semi": true, 3 | "tabWidth": 4, 4 | "useTabs": true, 5 | "printWidth": 140, 6 | "singleQuote": false, 7 | "trailingComma": "none", 8 | "jsxBracketSameLine": true, 9 | "bracketSameLine": true, 10 | "endOfLine": "lf" 11 | } 12 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | FROM node:bullseye-slim 2 | 3 | RUN apt update 4 | # components for whatsapp-web.js (support no-gui systems) 5 | RUN apt install -y gconf-service libgbm-dev libasound2 libatk1.0-0 libc6 libcairo2 libcups2 libdbus-1-3 libexpat1 libfontconfig1 libgcc1 libgconf-2-4 libgdk-pixbuf2.0-0 libglib2.0-0 libgtk-3-0 libnspr4 libpango-1.0-0 libpangocairo-1.0-0 libstdc++6 libx11-6 libx11-xcb1 libxcb1 libxcomposite1 libxcursor1 libxdamage1 libxext6 libxfixes3 libxi6 libxrandr2 libxrender1 libxss1 libxtst6 ca-certificates fonts-liberation libappindicator1 libnss3 lsb-release xdg-utils wget 6 | RUN apt install -y chromium 7 | 8 | # For transcription 9 | RUN apt install -y ffmpeg 10 | ## It will install latest model of OpenAI Whisper (around 6~7 GB) 11 | ## Uncomment below command if you want to use the local version of transcription module 12 | # RUN pip install -y python pip 13 | # RUN pip install -U openai-whisper 14 | 15 | WORKDIR /app/ 16 | 17 | ENV OPENAI_API_KEY "" 18 | ENV PREFIX_ENABLED "" 19 | 20 | COPY package.json package-lock.json ./ 21 | 22 | RUN npm install 23 | RUN npm install vite-node 24 | 25 | COPY . . 26 | 27 | CMD ["npm", "run", "start"] 28 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # GPT + DALL-E + WhatsApp = AI Assistant 🚀 2 | 3 | ![Docker](https://github.com/askrella/whatsapp-chatgpt/actions/workflows/docker.yml/badge.svg) 4 | ![Prettier](https://github.com/askrella/whatsapp-chatgpt/actions/workflows/prettier.yml/badge.svg) 5 | [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT) 6 | 7 | [![Discord Invite](https://dcbadge.vercel.app/api/server/9VJaRXKwd3)](https://discord.gg/9VJaRXKwd3) 8 | 9 | This WhatsApp bot uses OpenAI's GPT and DALL-E 2 to respond to user inputs. 10 | 11 | You can talk to the bot in voice messages, the bot will transcribe and respond. :robot: 12 | 13 |

14 | Whatsapp ChatGPT 15 |

16 | 17 | ## Requirements 18 | 19 | - Node.js (18 or newer) 20 | - A recent version of npm 21 | - An [OpenAI API key](https://beta.openai.com/signup) 22 | - A WhatsApp account 23 | 24 | ## Documentation 25 | 26 | In the documentation you can find more information about how to install, configure and use this bot. 27 | 28 | ➡️ https://askrella.github.io/whatsapp-chatgpt 29 | 30 | ## Disclaimer 31 | 32 | The operations performed by this bot are not free. You will be charged by OpenAI for each request you make. 33 | 34 | This bot uses Puppeteer to run a real instance of Whatsapp Web to avoid getting blocked. 35 | 36 | NOTE: We can't guarantee that you won't be blocked using this method, although it does work. WhatsApp does not allow bots or unofficial clients on its platform, so this should not be considered completely safe. 37 | 38 | ## Contributors 39 | 40 | 41 | 42 | 43 | 44 | ## Used libraries 45 | 46 | - https://github.com/transitive-bullshit/chatgpt-api 47 | - https://github.com/pedroslopez/whatsapp-web.js 48 | - https://github.com/askrella/speech-rest-api 49 | -------------------------------------------------------------------------------- /docker-compose.yml: -------------------------------------------------------------------------------- 1 | version: "3" 2 | services: 3 | whatsapp-chatgpt: 4 | container_name: whatsapp-chatgpt 5 | read_only: true 6 | image: ghcr.io/askrella/whatsapp-chatgpt:master 7 | environment: 8 | OPENAI_API_KEY: "" 9 | OPENAI_GPT_MODEL: "" 10 | PREFIX_ENABLED: "" 11 | SERPAPI_API_KEY: "" 12 | restart: unless-stopped 13 | volumes: 14 | - session-data:/app/session 15 | volumes: 16 | session-data: 17 | -------------------------------------------------------------------------------- /docs/.nojekyll: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/askrella/whatsapp-chatgpt/50a7611f3da2f9479509a6e150a6d25a6cfb91fb/docs/.nojekyll -------------------------------------------------------------------------------- /docs/README.md: -------------------------------------------------------------------------------- 1 | # GPT + DALL-E + WhatsApp = AI Assistant 🚀 2 | 3 | ![Docker](https://github.com/askrella/whatsapp-chatgpt/actions/workflows/docker.yml/badge.svg) 4 | ![Prettier](https://github.com/askrella/whatsapp-chatgpt/actions/workflows/prettier.yml/badge.svg) 5 | [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT) 6 | 7 | [![Discord Invite](https://dcbadge.vercel.app/api/server/9VJaRXKwd3)](https://discord.gg/9VJaRXKwd3) 8 | 9 | This WhatsApp bot uses OpenAI's GPT and DALL-E 2 to respond to user inputs. 10 | 11 | You can talk to the bot in voice messages, the bot will transcribe and respond. :robot: 12 | 13 |

14 | Whatsapp ChatGPT 15 |

16 | 17 | ## Requirements 18 | 19 | - Node.js (18 or newer) 20 | - A recent version of npm 21 | - An [OpenAI API key](https://beta.openai.com/signup) 22 | - A WhatsApp account 23 | 24 | ## Documentation 25 | 26 | In the documentation you can find more information about how to install, configure and use this bot. 27 | 28 | ➡️ https://askrella.github.io/whatsapp-chatgpt 29 | 30 | ## Disclaimer 31 | 32 | The operations performed by this bot are not free. You will be charged by OpenAI for each request you make. 33 | 34 | This bot uses Puppeteer to run a real instance of Whatsapp Web to avoid getting blocked. 35 | 36 | NOTE: We can't guarantee that you won't be blocked using this method, although it does work. WhatsApp does not allow bots or unofficial clients on its platform, so this should not be considered completely safe. 37 | 38 | ## Contributors 39 | 40 | 41 | 42 | 43 | 44 | ## Used libraries 45 | 46 | - https://github.com/transitive-bullshit/chatgpt-api 47 | - https://github.com/pedroslopez/whatsapp-web.js 48 | - https://github.com/askrella/speech-rest-api 49 | -------------------------------------------------------------------------------- /docs/_sidebar.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | - [Home](/) 4 | - [Installation](pages/installation.md) 5 | - [Docker](pages/docker.md) 6 | - [Usage](pages/usage.md) 7 | - [Configuration](pages/gpt.md) 8 | - [GPT configuration](pages/gpt.md) 9 | - [Langchain & Agents](pages/langchain.md) 10 | - [Configure Prefix](pages/configure-prefix.md) 11 | - [Talk with the bot](pages/transcription.md) 12 | - [Transcription with OpenAI Whisper](pages/transcription.md) 13 | - [Text-To-Speech](pages/tts.md) 14 | - [Send Messages to yourself](pages/send-messages-to-yourself.md) 15 | - [Disclaimer](pages/disclaimer.md) 16 | -------------------------------------------------------------------------------- /docs/index.html: -------------------------------------------------------------------------------- 1 | 2 | 3 | 4 | 5 | WhatsApp AI Assistant 🚀 6 | 7 | 8 | 9 | 10 | 11 | 12 |
13 | 20 | 21 | 22 | 23 | 24 | -------------------------------------------------------------------------------- /docs/pages/configure-prefix.md: -------------------------------------------------------------------------------- 1 | # Configure Prefix 2 | 3 | ## Disable prefix 4 | 5 | You can disable the `!gpt`/`!dalle`/`!sd`/`!config` prefix by setting `PREFIX_ENABLED` to `false` in the `.env` file.
6 | 7 | If you disable the prefix, the bot will not support DALL-E and Stable Diffusion, only GPT will be used. 8 | 9 | ## Set own prefixes 10 | 11 | You can set your own prefixes for ChatGPT, DALL-E and configuration in the `.env` file. 12 | 13 | ``` 14 | GPT_PREFIX=!gpt 15 | DALLE_PREFIX=!dalle 16 | STABLE_DIFFUSION_PREFIX=!sd 17 | AI_CONFIG_PREFIX=!config 18 | ``` 19 | -------------------------------------------------------------------------------- /docs/pages/disclaimer.md: -------------------------------------------------------------------------------- 1 | # Disclaimer 2 | 3 | The operations performed by this bot are not free. You will be charged by OpenAI for each request you make. 4 | 5 | This bot uses Puppeteer to run a real instance of Whatsapp Web to avoid getting blocked. 6 | 7 | NOTE: We can't guarantee that you won't be blocked using this method, although it does work. 8 | 9 | WhatsApp does not allow bots or unofficial clients on its platform, so this should not be considered completely safe. 10 | -------------------------------------------------------------------------------- /docs/pages/docker.md: -------------------------------------------------------------------------------- 1 | # Docker 2 | 3 | Make sure to edit the `docker-compose.yml` file and set your own variables there. 4 | 5 | ```sh 6 | sudo docker-compose up 7 | ``` 8 | -------------------------------------------------------------------------------- /docs/pages/gpt.md: -------------------------------------------------------------------------------- 1 | # GPT 2 | 3 | ## Model 4 | 5 | You can specify the model which should be used with the `OPENAI_MODEL` environment variabl 6 | 7 | ```bash 8 | OPENAI_MODEL=gpt-3.5-turbo # or gpt-4 9 | ``` 10 | 11 | ## Configuration 12 | 13 | You can modify the max model tokens by setting the `MAX_MODEL_TOKENS` environment variable. For example: 14 | 15 | ```bash 16 | MAX_MODEL_TOKENS=2000 17 | ``` 18 | 19 | ## What are tokens and how to count them? 20 | 21 | https://help.openai.com/en/articles/4936856-what-are-tokens-and-how-to-count-them 22 | 23 | ## Pre Prompt 24 | 25 | You can configure a pre prompt which is executed after creating a new conversation. 26 | 27 | To do that, use the `PRE_PROMPT` environment variable. For example: 28 | 29 | ```bash 30 | PRE_PROMPT=Act very funny and overreact to messages. Do that for every message you get, forever. 31 | ``` 32 | 33 | ## Groupchats 34 | 35 | You can enable the bot to interact on groupchats by setting the `GROUPCHATS_ENABLED` environment variable to `true`. For example: 36 | 37 | ```bash 38 | GROUPCHATS_ENABLED=true 39 | ``` 40 | 41 | ## Prompt Moderation 42 | 43 | You can configure a prompt moderation, which will be executed before sending the prompt to GPT. 44 | This way, you can filter out prompts before sending them to GPT. 45 | This is achieved by using the [OpenAI Moderation API](https://beta.openai.com/docs/api-reference/moderations). 46 | 47 | To enable it, use the `PROMPT_MODERATION_ENABLED` environment variable. For example: 48 | 49 | ```bash 50 | PROMPT_MODERATION_ENABLED=true 51 | ``` 52 | 53 | You can also configure the blacklisted categories, which will be used to filter the prompt moderation. 54 | 55 | To do that, use the `PROMPT_MODERATION_BLACKLISTED_CATEGORIES` environment variable. For example: 56 | 57 | ```bash 58 | PROMPT_MODERATION_BLACKLISTED_CATEGORIES = ["hate","hate/threatening","self-harm","sexual","sexual/minors","violence","violence/graphic"] 59 | ``` 60 | 61 | You can see all available categories [here](https://beta.openai.com/docs/api-reference/moderations). 62 | 63 | Please, keep in mind that disabling the prompt moderation or modifying the blacklisted categories, will not disable the moderation of the GPT API. Because OpenAI uses their own moderation, which is not configurable. 64 | 65 | ## Rate Limit 66 | 67 | https://platform.openai.com/docs/guides/rate-limits 68 | 69 | If you are with heavy usage, you might run into the rate limit of Open API. Since the rate limit is on organization level, you could create another account and get a new API key separately. And then setting the keys into environment variables `OPENAI_API_KEYS`. API keys will be used in a random basis. 70 | -------------------------------------------------------------------------------- /docs/pages/installation.md: -------------------------------------------------------------------------------- 1 | # Installation 2 | 3 | 1. Clone this repository 4 | 2. Install the required packages by running `npm install` 5 | 3. Put your OpenAI API key into the `.env` file 6 | - Example file: [.env-example](https://github.com/askrella/whatsapp-chatgpt/blob/master/.env-example) 7 | - You can obtain an API key [here](https://platform.openai.com/account/api-keys) 8 | 4. Run the bot using `npm run start` 9 | 5. Scan the QR code with WhatsApp (link a device) 10 | 6. Now you're ready to go! People can send you messages, and the bot will respond to them 11 | -------------------------------------------------------------------------------- /docs/pages/langchain.md: -------------------------------------------------------------------------------- 1 | # LangChain 2 | 3 | ## About 4 | 5 | Use this handler to allow GPT to interact with other sources of data, ie. the internet, or different mediums like pdfs and images. Ideally the user doesn't have to differentiate between GPT instances that can and cannot use external data sources, but we'll keep them separate for ease of implementation for now. 6 | 7 | ## Example 8 | 9 | In the following example, GPT uses `SerpAPI` as a tool to access Google Search API. You can use `RequestsGetTool` and parse the HTML if you don't have a [SerpAPI](https://serpapi.com/) API key. 10 | 11 | > !lang nba game april 11st 2023 12 | > 13 | > // Uses SerpAPI or RequestsGetTool to access the search engine, parse results in either JSON or HTML, and have GPT interpret the best answer for the prompt. 14 | > "The result of the NBA games on April 11st 2023 is Minnesota Timberwolves vs Los Angeles Lakers" 15 | 16 | ## Tools 17 | 18 | Abstractions for GPT to interact with to interact with external data sources. For example, both `RequestGetTools` and `SerpAPI` allows GPT to access the internet. 19 | 20 | See other tools in the LangChain [Tools section](https://js.langchain.com/docs/modules/agents/tools/). 21 | 22 | ## References 23 | 24 | - [LangChain in JS](https://js.langchain.com/docs/) 25 | - [LangChain in Python](https://python.langchain.com/en/latest/index.html) 26 | -------------------------------------------------------------------------------- /docs/pages/send-messages-to-yourself.md: -------------------------------------------------------------------------------- 1 | # Send messages to yourself 2 | 3 | You can also use the bot to send messages to yourself. 4 | 5 | Use this WhatsApp link: https://wa.me/your_phone_number. 6 | 7 | Replace `your_phone_number` with your phone number, including the country code. (e.g. +11234567890) 8 | 9 | The URL above will take you to your own chat window. 10 | -------------------------------------------------------------------------------- /docs/pages/transcription.md: -------------------------------------------------------------------------------- 1 | # Transcription (EXPERIMENTAL) 2 | 3 | The transcription feature allows you to use your voice to interact with the bot. 4 | It's a great way to use the bot without having to type anything. 5 | 6 | You can enable it by setting `TRANSCRIPTION_ENABLED=true` in your `.env` file. 7 | 8 | There are multiple modes available: 9 | 10 | - `local` 11 | - `openai` 12 | - `speech-api` 13 | - `whisper-api` 14 | 15 | # Transcription Modes 16 | 17 | ## Local 18 | 19 | For the local mode you need to have [whisper](https://github.com/openai/whisper) installed on your machine. 20 | 21 | With local mode the voice messages will be transcribed on your machine. Best for privacy. 22 | 23 | You need to install Python: 24 | 25 | - https://www.python.org/downloads/ 26 | 27 | Check out the whisper installation guide here: 28 | 29 | - https://github.com/openai/whisper#setup 30 | 31 | Use the following environment variable to enable the local mode: 32 | 33 | ```bash 34 | TRANSCRIPTION_MODE=local 35 | ``` 36 | 37 | ## Using A Remote Transcription API 38 | 39 | You might use an external API to turn audio into text, the voice messages are processed on the server and not on your machine. 40 | 41 | ## Open AI (Whisper) 42 | 43 | To use the official Open AI transcription endpoint based on large-v2 Whisper model, you will need to ensure that you have the `OPENAI_API_KEY` environment variable set. 44 | 45 | If you already have this set, you can proceed to set the `TRANSCRIPTION_MODE` environment variable: 46 | 47 | ```bash 48 | TRANSCRIPTION_MODE=openai 49 | ``` 50 | 51 | The transcribed language is usually detected automatically, but if you want to ensure accurate language detection, you can set the environment variable `TRANSCRIPTION_LANGUAGE` to the desired language (for example, "English" for English, see [Supported Languages](https://github.com/openai/whisper#available-models-and-languages) for the full list). 52 | 53 | ```bash 54 | TRANSCRIPTION_LANGUAGE=English 55 | ``` 56 | 57 | Remarks: 58 | 59 | - Please note that this endpoint has a file size limit of 25 MB, so it is recommended to avoid transcribing long audio files. 60 | 61 | ## Speech-API 62 | 63 | The Speech API is a REST API that converts your voice messages to text. The voice messages are processed on the server and not on your machine. 64 | 65 | The Speech API doesn't store the voice messages permanently. It's open source and you can host it yourself. 66 | 67 | You can find the source code here: 68 | 69 | - https://github.com/askrella/speech-rest-api 70 | 71 | If you want use the Speech API mode you need to set the following environment variable: 72 | 73 | ```bash 74 | TRANSCRIPTION_MODE=speech-api 75 | ``` 76 | 77 | By default the bot will use our hosted Speech API (for free). You can change the URL by setting the following environment variable: 78 | 79 | ```bash 80 | SPEECH_API_URL= 81 | ``` 82 | 83 | ## Whisper API 84 | 85 | The Whisper API is a REST API provided by AssemblyAI that is capable of converting voice messages into text. The voice messages are processed on the server, rather than on your own machine. 86 | 87 | If you wish to use the Whisper API mode, you will need to set the environment variable 88 | 89 | ```bash 90 | TRANSCRIPTION_MODE=whisper-api 91 | ``` 92 | 93 | To use the API, you must first sign up and obtain an API key from: 94 | 95 | - https://whisperapi.com/ 96 | 97 | The transcribed language is usually detected automatically, but if you want to ensure accurate language detection, you can set the environment variable `TRANSCRIPTION_LANGUAGE` to the desired language (for example, "en" for English). 98 | 99 | ``` 100 | TRANSCRIPTION_LANGUAGE=en 101 | ``` 102 | -------------------------------------------------------------------------------- /docs/pages/tts.md: -------------------------------------------------------------------------------- 1 | # Text-To-Speech (EXPERIMENTAL) 2 | 3 | The TTS feature allows the bot to answer with voice messages instead of text messages. You can actually talk to the bot. 4 | 5 | You can enable it by setting the following environment variable: 6 | 7 | ```bash 8 | TTS_ENABLED=true 9 | ``` 10 | 11 | By default, when TTS is enabled, the bot will answer two messages: the text response and the audio response. 12 | 13 | You can disable the text response by changing the following environment variable: 14 | 15 | ```bash 16 | TTS_TRANSCRIPTION_RESPONSE_ENABLED=true 17 | ``` 18 | 19 | ## Supported Providers 20 | 21 | - [Speech API](#speech-api) 22 | - [AWS Polly](#aws-polly) 23 | 24 | ## Speech API 25 | 26 | This feature will use the Speech API to convert the GPT response to voice. It's open source and you can host it yourself. 27 | 28 | You can find the source code here: 29 | 30 | - https://github.com/askrella/speech-rest-api 31 | 32 | By default the bot will use our hosted Speech API (for free). You can change the URL by setting the following environment variables: 33 | 34 | ```bash 35 | SPEECH_API_URL= 36 | TTS_MODE=speech-api 37 | ``` 38 | 39 | ## AWS Polly 40 | 41 | You can use Amazon Web Services Polly to convert the GPT response to voice. 42 | 43 | You can find the official documentation here: 44 | 45 | - https://docs.aws.amazon.com/polly/latest/dg/what-is.html 46 | 47 | You can enable this service by setting the following environment variables: 48 | 49 | ```bash 50 | TTS_ENABLED=true 51 | TTS_PROVIDER=aws-polly 52 | AWS_ACCESS_KEY_ID= 53 | AWS_SECRET_ACCESS_KEY= 54 | AWS_REGION= 55 | AWS_POLLY_VOICE_ID= 56 | AWS_POLLY_VOICE_ENGINE= 57 | ``` 58 | 59 | The provided AWS credentials must have the `polly:SynthesizeSpeech` permission. 60 | 61 | You can find the list of available regions here: 62 | 63 | - https://docs.aws.amazon.com/general/latest/gr/rande.html#polly_region 64 | 65 | You can find the list of available voices here: 66 | 67 | - https://docs.aws.amazon.com/polly/latest/dg/voicelist.html 68 | 69 | And the list of available engines here: 70 | 71 | - https://docs.aws.amazon.com/polly/latest/dg/engines.html 72 | 73 | Keep in mind that the AWS Polly service is not free. You will be charged for the usage, so make sure to check the pricing before enabling it. 74 | -------------------------------------------------------------------------------- /docs/pages/usage.md: -------------------------------------------------------------------------------- 1 | # Usage 2 | 3 | To use the bot, simply send a message with the `!gpt`/`!dalle`/`!sd`/`!config` command followed by your prompt. For example: 4 | 5 | ### GPT 6 | 7 | ``` 8 | !gpt What is the meaning of life? 9 | ``` 10 | 11 | ### DALLE 12 | 13 | ``` 14 | !dalle A frog with a red hat is walking on a bridge. 15 | ``` 16 | 17 | ### Stable Diffusion 18 | 19 | ``` 20 | !sd A frog with a red hat is walking on a bridge. 21 | ``` 22 | 23 | It is using huggingface's stable diffusion model for image rendering, you might change the model with `!config sd setModel ` command. 24 | 25 | ### AI Config 26 | 27 | To modify the bot's configuration, you can use the `!config` command. Run `!config help` for detail: 28 | 29 | ``` 30 | Available commands: 31 | !config dalle size - Set dalle size to 32 | !config chat id - Get the ID of the chat 33 | !config general settings - Get current settings 34 | !config general whitelist - Set whitelisted phone numbers 35 | !config gpt apiKey - Set token pool, support multiple tokens with comma-separated 36 | !config gpt maxModelTokens - Set max model tokens value 37 | !config transcription enabled - Toggle if transcription is enabled 38 | !config transcription mode - Set transcription mode 39 | !config tts enabled - Toggle if TTS is enabled 40 | !config sd setModel - Set the model to be used of Stable Diffusion (with huggingface) 41 | 42 | Available values: 43 | dalle size: 256x256, 512x512, 1024x1024 44 | gpt apiKey: sk-xxxx,sk-xxxx 45 | gpt maxModelTokens: integer 46 | transcription enabled: true, false 47 | transcription mode: local, speech-api, whisper-api, openai 48 | tts enabled: true, false 49 | sd setModel: runwayml/stable-diffusion-v1-5 50 | ``` 51 | -------------------------------------------------------------------------------- /installer.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Check if user has root/sudo access 4 | if [[ $(id -u) -ne 0 ]]; then 5 | echo "This script must be run as root or with sudo." 6 | exit 1 7 | fi 8 | 9 | # Check user's operating system 10 | os=$(uname -s) 11 | case $os in 12 | Linux) 13 | # Install required packages using package manager 14 | if command -v apt-get &> /dev/null; then 15 | echo "Installing packages using apt-get..." 16 | apt-get update 17 | echo "Installing latest version of docker..." 18 | curl -fsSL https://get.docker.com -o get-docker.sh 19 | sh get-docker.sh 20 | apt install docker-compose 21 | echo "Packages installed successfully." 22 | elif command -v yum &> /dev/null; then 23 | echo "Installing packages using yum..." 24 | yum update 25 | echo "Installing latest version of docker..." 26 | curl -fsSL https://get.docker.com -o get-docker.sh 27 | sh get-docker.sh 28 | yum install -y git docker-compose 29 | echo "Packages installed successfully." 30 | else 31 | echo "Unsupported package manager." 32 | exit 1 33 | fi 34 | ;; 35 | *) 36 | echo "Unsupported operating system." 37 | exit 1 38 | ;; 39 | esac 40 | 41 | # Clone Git repo and run Docker Compose 42 | echo "Cloning Git repo..." 43 | git clone https://github.com/askrella/whatsapp-chatgpt.git 44 | cd repo 45 | 46 | # Prompt user for API key 47 | read -p "Enter your OpenAI API key: " api_key 48 | 49 | # Replace API key variable in Docker Compose file 50 | sed -i "s/OPENAI_API_KEY:.*/OPENAI_API_KEY: \"$api_key\"/g" docker-compose.yml 51 | 52 | # Start Docker Compose 53 | echo "Starting Docker containers..." 54 | docker-compose up -d 55 | echo "Docker containers started successfully." 56 | -------------------------------------------------------------------------------- /package.json: -------------------------------------------------------------------------------- 1 | { 2 | "name": "whatsapp-chatgpt", 3 | "version": "1.0.0", 4 | "description": "Whatsapp bot that uses OpenAI's GPT & DALLE to respond to user inputs", 5 | "main": "src/index.ts", 6 | "author": "Askrella Software Agency UG (haftungsbeschränkt)", 7 | "scripts": { 8 | "start": "npx vite-node src/index.ts", 9 | "prettier": "prettier --write ./src" 10 | }, 11 | "license": "MIT", 12 | "dependencies": { 13 | "aws-sdk": "^2.1649.0", 14 | "chatgpt": "^5.2.5", 15 | "dotenv": "^16.3.1", 16 | "fetch-blob": "^4.0.0", 17 | "ffmpeg": "^0.0.4", 18 | "langchain": "^0.0.156", 19 | "openai": "^4.52.1", 20 | "picocolors": "^1.0.0", 21 | "qrcode": "^1.5.3", 22 | "whatsapp-web.js": "^1.25.0" 23 | }, 24 | "engines": { 25 | "node": ">=18.0.0" 26 | }, 27 | "devDependencies": { 28 | "@types/qrcode": "^1.5.2", 29 | "prettier": "^3.0.3" 30 | } 31 | } 32 | -------------------------------------------------------------------------------- /src/cli/ui.ts: -------------------------------------------------------------------------------- 1 | import color from "picocolors"; 2 | 3 | export const print = (text: string) => { 4 | console.log(color.green("◇") + " " + text); 5 | }; 6 | 7 | export const printError = (text: string) => { 8 | console.log(color.red("◇") + " " + text); 9 | }; 10 | 11 | export const printIntro = () => { 12 | console.log(""); 13 | console.log(color.bgCyan(color.white(" Whatsapp ChatGPT & DALL-E "))); 14 | console.log("|-------------------------------------------------------------------------------------------------|"); 15 | console.log("| A Whatsapp bot that uses OpenAI's ChatGPT and DALL-E to generate text and images from a prompt. |"); 16 | console.log("|-------------------------------------------------------------------------------------------------|"); 17 | console.log(""); 18 | }; 19 | 20 | export const printQRCode = (qr: string) => { 21 | console.log(qr); 22 | console.log("Scan the QR code above to login to Whatsapp Web..."); 23 | }; 24 | 25 | export const printLoading = () => { 26 | console.log("Loading..."); 27 | }; 28 | 29 | export const printAuthenticated = () => { 30 | console.log("Authenticated, session started!"); 31 | }; 32 | 33 | export const printAuthenticationFailure = () => { 34 | console.log("Authentication failed!"); 35 | }; 36 | 37 | export const printOutro = () => { 38 | console.log(""); 39 | console.log("The bot is ready to use."); 40 | console.log("To get started, send a message to the bot with the prompt you want to use."); 41 | console.log("Use the prefix '!gpt' if configured that way."); 42 | }; 43 | -------------------------------------------------------------------------------- /src/commands/chat.ts: -------------------------------------------------------------------------------- 1 | import { ICommandModule, ICommandDefinition, ICommandsMap } from "../types/commands"; 2 | import { Message } from "whatsapp-web.js"; 3 | 4 | export const ChatModule: ICommandModule = { 5 | key: "chat", 6 | register: (): ICommandsMap => { 7 | return { 8 | id 9 | }; 10 | } 11 | }; 12 | 13 | const id: ICommandDefinition = { 14 | help: "- Get the ID of the chat", 15 | execute: (message: Message) => { 16 | message.reply(message.to); 17 | } 18 | }; 19 | -------------------------------------------------------------------------------- /src/commands/general.ts: -------------------------------------------------------------------------------- 1 | import { ICommandModule, ICommandDefinition, ICommandsMap } from "../types/commands"; 2 | import { Message } from "whatsapp-web.js"; 3 | import { config } from "../config"; 4 | import { aiConfigTarget, aiConfigTypes, aiConfigValues, IAiConfig } from "../types/ai-config"; 5 | import { aiConfig, getConfig } from "../handlers/ai-config"; 6 | 7 | export const GeneralModule: ICommandModule = { 8 | key: "general", 9 | register: (): ICommandsMap => { 10 | return { 11 | settings, 12 | whitelist 13 | }; 14 | } 15 | }; 16 | 17 | const settings: ICommandDefinition = { 18 | help: "- Get current settings", 19 | execute: function (message: Message) { 20 | const selfNotedMessage = message.fromMe && message.hasQuotedMsg === false && message.from === message.to; 21 | if (!selfNotedMessage) { 22 | // Only allow printing out the settings on self-noted for security reasons 23 | return; 24 | } 25 | 26 | let response = "Runtime settings:"; 27 | for (let module in aiConfig.commandsMap) { 28 | for (let command in aiConfig.commandsMap[module]) { 29 | if (aiConfig.commandsMap[module][command].data === undefined) { 30 | continue; 31 | } 32 | let val; 33 | if (typeof aiConfig.commandsMap[module][command].data === "function") { 34 | val = aiConfig.commandsMap[module][command].data(); 35 | } else { 36 | val = aiConfig.commandsMap[module][command].data; 37 | } 38 | response += `\n${module} ${command}: ${val}`; 39 | } 40 | } 41 | 42 | response += `\n\nStatic settings:`; 43 | 44 | for (let target in aiConfigTarget) { 45 | for (let type in aiConfigTypes[target]) { 46 | response += `\n${target} ${type}: ${aiConfig[target][type]}`; 47 | } 48 | } 49 | 50 | // Whitelisted fields from config 51 | [ 52 | "openAIModel", 53 | "prePrompt", 54 | "gptPrefix", 55 | "dallePrefix", 56 | "stableDiffusionPrefix", 57 | "resetPrefix", 58 | "groupchatsEnabled", 59 | "promptModerationEnabled", 60 | "promptModerationBlacklistedCategories", 61 | "ttsMode" 62 | ].forEach((field) => { 63 | response += `\n${field}: ${config[field]}`; 64 | }); 65 | message.reply(response); 66 | } 67 | }; 68 | 69 | const whitelist: ICommandDefinition = { 70 | help: " - Set whitelisted phone numbers", 71 | data: config.whitelistedPhoneNumbers, 72 | execute: function (message: Message, value?: string) { 73 | if (!value) { 74 | message.reply(`Invalid value, please give a comma-separated list of phone numbers.`); 75 | return; 76 | } 77 | this.data = value.split(","); 78 | message.reply(`Updated whitelist phone numbers to ${this.data}`); 79 | } 80 | }; 81 | -------------------------------------------------------------------------------- /src/commands/gpt.ts: -------------------------------------------------------------------------------- 1 | import { ICommandModule, ICommandDefinition, ICommandsMap } from "../types/commands"; 2 | import { Message } from "whatsapp-web.js"; 3 | import { config } from "../config"; 4 | import { initOpenAI } from "../providers/openai"; 5 | 6 | export const GptModule: ICommandModule = { 7 | key: "gpt", 8 | register: (): ICommandsMap => { 9 | return { 10 | apiKey, 11 | maxModelTokens 12 | }; 13 | } 14 | }; 15 | 16 | const apiKey: ICommandDefinition = { 17 | help: " - Set token pool, support multiple tokens with comma-separated", 18 | hint: "sk-xxxx,sk-xxxx", 19 | data: () => { 20 | // Randomly pick an API key 21 | return config.openAIAPIKeys[Math.floor(Math.random() * config.openAIAPIKeys.length)]; 22 | }, 23 | execute: function (message: Message, valueStr?: string) { 24 | if (!valueStr) { 25 | message.reply(`Invalid value, please give a comma-separated string of OpenAI api keys.`); 26 | return; 27 | } 28 | config.openAIAPIKeys = valueStr.split(",") as string[]; 29 | message.reply(`Updated API keys, total keys: ${config.openAIAPIKeys.length}`); 30 | } 31 | }; 32 | 33 | const maxModelTokens: ICommandDefinition = { 34 | help: " - Set max model tokens value", 35 | hint: "integer", 36 | data: config.maxModelTokens, 37 | execute: function (message: Message, valueStr?: string) { 38 | const value = parseInt(valueStr || ""); 39 | if (!value || isNaN(value)) { 40 | message.reply(`Invalid value, please give an integer value`); 41 | return; 42 | } 43 | this.data = value; 44 | initOpenAI(); 45 | message.reply(`Updated max model tokens to ${this.data}`); 46 | } 47 | }; 48 | -------------------------------------------------------------------------------- /src/commands/stable-diffusion.ts: -------------------------------------------------------------------------------- 1 | import { ICommandModule, ICommandDefinition, ICommandsMap } from "../types/commands"; 2 | import { Message, MessageMedia } from "whatsapp-web.js"; 3 | import * as cli from "../cli/ui"; 4 | 5 | export const StableDiffusionModule: ICommandModule = { 6 | key: "sd", 7 | register: (): ICommandsMap => { 8 | return { 9 | setModel, 10 | generate 11 | }; 12 | } 13 | }; 14 | 15 | let model = "runwayml/stable-diffusion-v1-5"; 16 | 17 | const setModel: ICommandDefinition = { 18 | help: " - Set the model to be used of Stable Diffusion (with huggingface)", 19 | hint: "runwayml/stable-diffusion-v1-5", 20 | data: model, 21 | execute: function (message: Message, valueStr?: string) { 22 | if (!valueStr) { 23 | message.reply(`Invalid value, please give a model name.`); 24 | return; 25 | } 26 | this.data = valueStr; 27 | model = valueStr; 28 | message.reply(`Updated model to ${this.data}`); 29 | } 30 | }; 31 | 32 | const generate: ICommandDefinition = { 33 | help: " - Given the prompt, generate an image using Stable Diffusion (with huggingface)", 34 | hint: 'A magical and adventurous story about "The Littlest Pudu."', 35 | execute: async (message: Message, valueStr?: string) => { 36 | try { 37 | const start = Date.now(); 38 | 39 | cli.print(`[Stable Diffusion] Received prompt from ${message.from}: ${valueStr}`); 40 | 41 | const huggingFaceAPIToken = process.env.HUGGINGFACE_API_TOKEN; 42 | 43 | if (!huggingFaceAPIToken) { 44 | throw new Error("[Stable Diffusion] Huggingface API token not found, set the HUGGINGFACE_API_TOKEN environment variable"); 45 | } 46 | 47 | const url = `https://api-inference.huggingface.co/models/${model}`; 48 | const options = { 49 | method: "POST", 50 | headers: { 51 | "Content-Type": "application/json", 52 | Authorization: `Bearer ${huggingFaceAPIToken}` 53 | }, 54 | body: JSON.stringify({ 55 | inputs: valueStr, 56 | options: { 57 | wait_for_model: true 58 | } 59 | }) 60 | }; 61 | const response = await fetch(url, options); 62 | const end = Date.now() - start; 63 | const imageBlob = await response.blob(); 64 | const contentType = response.headers.get("Content-Type") || "image/jpeg"; 65 | const buffer = Buffer.from(await imageBlob.arrayBuffer()); 66 | const image = new MessageMedia(contentType, buffer.toString("base64")); 67 | 68 | cli.print(`[Stable Diffusion] Answer to ${message.from} | Huggingface request took ${end}ms`); 69 | 70 | message.reply(image); 71 | } catch (error: any) { 72 | console.error("An error occurred", error); 73 | message.reply("An error occurred, please contact the administrator. (" + error.message + ")"); 74 | } 75 | } 76 | }; 77 | -------------------------------------------------------------------------------- /src/commands/transcription.ts: -------------------------------------------------------------------------------- 1 | import { ICommandModule, ICommandDefinition, ICommandsMap } from "../types/commands"; 2 | import { Message } from "whatsapp-web.js"; 3 | import { config } from "../config"; 4 | import { TranscriptionMode } from "../types/transcription-mode"; 5 | 6 | export const TranscriptionModule: ICommandModule = { 7 | key: "transcription", 8 | register: (): ICommandsMap => { 9 | return { 10 | enabled, 11 | mode 12 | } as ICommandsMap; 13 | } 14 | }; 15 | 16 | const enabled: ICommandDefinition = { 17 | help: " - Toggle if transcription is enabled", 18 | hint: "true, false", 19 | data: config.transcriptionEnabled, 20 | execute: function (message: Message, valueStr?: string) { 21 | if (["true", "false"].indexOf(valueStr || "") < 0) { 22 | message.reply(`Invalid value, please specify true or false`); 23 | return; 24 | } 25 | this.data = valueStr == "true"; 26 | message.reply(`Updated transcription enabled to ${this.data}`); 27 | } 28 | }; 29 | 30 | const mode: ICommandDefinition = { 31 | help: " - Set transcription mode", 32 | hint: Object.values(TranscriptionMode), 33 | data: config.transcriptionMode, 34 | execute: function (message: Message, valueStr?: string) { 35 | if ((Object.values(TranscriptionMode) as string[]).indexOf(valueStr || "") < 0) { 36 | message.reply(`Invalid value, available modes are: ${Object.values(TranscriptionMode).join(", ")}`); 37 | return; 38 | } 39 | this.data = valueStr; 40 | message.reply(`Updated transcription mode to ${this.data}`); 41 | } 42 | }; 43 | -------------------------------------------------------------------------------- /src/commands/tts.ts: -------------------------------------------------------------------------------- 1 | import { ICommandModule, ICommandDefinition, ICommandsMap } from "../types/commands"; 2 | import { Message } from "whatsapp-web.js"; 3 | import { config } from "../config"; 4 | 5 | export const TTSModule: ICommandModule = { 6 | key: "tts", 7 | register: (): ICommandsMap => { 8 | return { 9 | enabled 10 | }; 11 | } 12 | }; 13 | 14 | const enabled: ICommandDefinition = { 15 | help: " - Toggle if TTS is enabled", 16 | hint: "true, false", 17 | data: config.ttsEnabled, 18 | execute: function (message: Message, valueStr?: string) { 19 | if (["true", "false"].indexOf(valueStr || "") < 0) { 20 | message.reply(`Invalid value, please specify true or false`); 21 | return; 22 | } 23 | this.data = valueStr == "true"; 24 | message.reply(`Updated TTS enabled to ${this.data}`); 25 | } 26 | }; 27 | -------------------------------------------------------------------------------- /src/config.ts: -------------------------------------------------------------------------------- 1 | import process from "process"; 2 | 3 | import { TranscriptionMode } from "./types/transcription-mode"; 4 | import { TTSMode } from "./types/tts-mode"; 5 | import { AWSPollyEngine } from "./types/aws-polly-engine"; 6 | 7 | // Environment variables 8 | import dotenv from "dotenv"; 9 | dotenv.config(); 10 | 11 | // Config Interface 12 | interface IConfig { 13 | // Access control 14 | whitelistedPhoneNumbers: string[]; 15 | whitelistedEnabled: boolean; 16 | // OpenAI 17 | openAIModel: string; 18 | openAIAPIKeys: string[]; 19 | maxModelTokens: number; 20 | prePrompt: string | undefined; 21 | 22 | // Prefix 23 | prefixEnabled: boolean; 24 | prefixSkippedForMe: boolean; 25 | gptPrefix: string; 26 | dallePrefix: string; 27 | stableDiffusionPrefix: string; 28 | langChainPrefix: string; 29 | resetPrefix: string; 30 | aiConfigPrefix: string; 31 | 32 | // Groupchats 33 | groupchatsEnabled: boolean; 34 | 35 | // Prompt Moderation 36 | promptModerationEnabled: boolean; 37 | promptModerationBlacklistedCategories: string[]; 38 | 39 | // AWS 40 | awsAccessKeyId: string; 41 | awsSecretAccessKey: string; 42 | awsRegion: string; 43 | awsPollyVoiceId: string; 44 | awsPollyEngine: AWSPollyEngine; 45 | 46 | // Voice transcription & Text-to-Speech 47 | speechServerUrl: string; 48 | whisperServerUrl: string; 49 | openAIServerUrl: string; 50 | whisperApiKey: string; 51 | ttsEnabled: boolean; 52 | ttsMode: TTSMode; 53 | ttsTranscriptionResponse: boolean; 54 | transcriptionEnabled: boolean; 55 | transcriptionMode: TranscriptionMode; 56 | transcriptionLanguage: string; 57 | } 58 | 59 | // Config 60 | export const config: IConfig = { 61 | whitelistedPhoneNumbers: process.env.WHITELISTED_PHONE_NUMBERS?.split(",") || [], 62 | whitelistedEnabled: getEnvBooleanWithDefault("WHITELISTED_ENABLED", false), 63 | 64 | openAIAPIKeys: (process.env.OPENAI_API_KEYS || process.env.OPENAI_API_KEY || "").split(",").filter((key) => !!key), // Default: [] 65 | openAIModel: process.env.OPENAI_GPT_MODEL || "gpt-3.5-turbo", // Default: gpt-3.5-turbo 66 | maxModelTokens: getEnvMaxModelTokens(), // Default: 4096 67 | prePrompt: process.env.PRE_PROMPT, // Default: undefined 68 | 69 | // Prefix 70 | prefixEnabled: getEnvBooleanWithDefault("PREFIX_ENABLED", true), // Default: true 71 | prefixSkippedForMe: getEnvBooleanWithDefault("PREFIX_SKIPPED_FOR_ME", true), // Default: true 72 | gptPrefix: process.env.GPT_PREFIX || "!gpt", // Default: !gpt 73 | dallePrefix: process.env.DALLE_PREFIX || "!dalle", // Default: !dalle 74 | stableDiffusionPrefix: process.env.STABLE_DIFFUSION_PREFIX || "!sd", // Default: !sd 75 | resetPrefix: process.env.RESET_PREFIX || "!reset", // Default: !reset 76 | aiConfigPrefix: process.env.AI_CONFIG_PREFIX || "!config", // Default: !config 77 | langChainPrefix: process.env.LANGCHAIN_PREFIX || "!lang", // Default: !lang 78 | 79 | // Groupchats 80 | groupchatsEnabled: getEnvBooleanWithDefault("GROUPCHATS_ENABLED", false), // Default: false 81 | 82 | // Prompt Moderation 83 | promptModerationEnabled: getEnvBooleanWithDefault("PROMPT_MODERATION_ENABLED", false), // Default: false 84 | promptModerationBlacklistedCategories: getEnvPromptModerationBlacklistedCategories(), // Default: ["hate", "hate/threatening", "self-harm", "sexual", "sexual/minors", "violence", "violence/graphic"] 85 | 86 | // AWS 87 | awsAccessKeyId: process.env.AWS_ACCESS_KEY_ID || "", // Default: "" 88 | awsSecretAccessKey: process.env.AWS_SECRET_ACCESS_KEY || "", // Default: "" 89 | awsRegion: process.env.AWS_REGION || "", // Default: "" 90 | awsPollyVoiceId: process.env.AWS_POLLY_VOICE_ID || "", // Default: "Joanna" 91 | awsPollyEngine: getEnvAWSPollyVoiceEngine(), // Default: standard 92 | 93 | // Speech API, Default: https://speech-service.verlekar.com 94 | speechServerUrl: process.env.SPEECH_API_URL || "https://speech-service.verlekar.com", 95 | whisperServerUrl: process.env.WHISPER_API_URL || "https://transcribe.whisperapi.com", 96 | openAIServerUrl: process.env.OPENAI_API_URL || "https://api.openai.com/v1/audio/transcriptions", 97 | whisperApiKey: process.env.WHISPER_API_KEY || "", // Default: "" 98 | 99 | // Text-to-Speech 100 | ttsEnabled: getEnvBooleanWithDefault("TTS_ENABLED", false), // Default: false 101 | ttsMode: getEnvTTSMode(), // Default: speech-api 102 | ttsTranscriptionResponse: getEnvBooleanWithDefault("TTS_TRANSCRIPTION_RESPONSE_ENABLED", true), // Default: true 103 | 104 | // Transcription 105 | transcriptionEnabled: getEnvBooleanWithDefault("TRANSCRIPTION_ENABLED", false), // Default: false 106 | transcriptionMode: getEnvTranscriptionMode(), // Default: local 107 | transcriptionLanguage: process.env.TRANSCRIPTION_LANGUAGE || "" // Default: null 108 | }; 109 | 110 | /** 111 | * Get the max model tokens from the environment variable 112 | * @returns The max model tokens from the environment variable or 4096 113 | */ 114 | function getEnvMaxModelTokens() { 115 | const envValue = process.env.MAX_MODEL_TOKENS; 116 | if (envValue == undefined || envValue == "") { 117 | return 4096; 118 | } 119 | 120 | return parseInt(envValue); 121 | } 122 | 123 | /** 124 | * Get an environment variable as a boolean with a default value 125 | * @param key The environment variable key 126 | * @param defaultValue The default value 127 | * @returns The value of the environment variable or the default value 128 | */ 129 | function getEnvBooleanWithDefault(key: string, defaultValue: boolean): boolean { 130 | const envValue = process.env[key]?.toLowerCase(); 131 | if (envValue == undefined || envValue == "") { 132 | return defaultValue; 133 | } 134 | 135 | return envValue == "true"; 136 | } 137 | 138 | /** 139 | * Get the blacklist categories for prompt moderation from the environment variable 140 | * @returns Blacklisted categories for prompt moderation 141 | */ 142 | function getEnvPromptModerationBlacklistedCategories(): string[] { 143 | const envValue = process.env.PROMPT_MODERATION_BLACKLISTED_CATEGORIES; 144 | if (envValue == undefined || envValue == "") { 145 | return ["hate", "hate/threatening", "self-harm", "sexual", "sexual/minors", "violence", "violence/graphic"]; 146 | } else { 147 | return JSON.parse(envValue.replace(/'/g, '"')); 148 | } 149 | } 150 | 151 | /** 152 | * Get the transcription mode from the environment variable 153 | * @returns The transcription mode 154 | */ 155 | function getEnvTranscriptionMode(): TranscriptionMode { 156 | const envValue = process.env.TRANSCRIPTION_MODE?.toLowerCase(); 157 | if (envValue == undefined || envValue == "") { 158 | return TranscriptionMode.Local; 159 | } 160 | 161 | return envValue as TranscriptionMode; 162 | } 163 | 164 | /** 165 | * Get the tss mode from the environment variable 166 | * @returns The tts mode 167 | */ 168 | function getEnvTTSMode(): TTSMode { 169 | const envValue = process.env.TTS_MODE?.toLowerCase(); 170 | if (envValue == undefined || envValue == "") { 171 | return TTSMode.SpeechAPI; 172 | } 173 | 174 | return envValue as TTSMode; 175 | } 176 | 177 | /** 178 | * Get the AWS Polly voice engine from the environment variable 179 | * @returns The voice engine 180 | */ 181 | function getEnvAWSPollyVoiceEngine(): AWSPollyEngine { 182 | const envValue = process.env.AWS_POLLY_VOICE_ENGINE?.toLowerCase(); 183 | if (envValue == undefined || envValue == "") { 184 | return AWSPollyEngine.Standard; 185 | } 186 | 187 | return envValue as AWSPollyEngine; 188 | } 189 | 190 | export default config; 191 | -------------------------------------------------------------------------------- /src/constants.ts: -------------------------------------------------------------------------------- 1 | interface IConstants { 2 | // WhatsApp status broadcast 3 | statusBroadcast: string; 4 | 5 | // WhatsApp session storage 6 | sessionPath: string; 7 | } 8 | 9 | const constants: IConstants = { 10 | statusBroadcast: "status@broadcast", 11 | sessionPath: "./" 12 | }; 13 | 14 | export default constants; 15 | -------------------------------------------------------------------------------- /src/handlers/ai-config.ts: -------------------------------------------------------------------------------- 1 | import { Message } from "whatsapp-web.js"; 2 | import { aiConfigTarget, aiConfigTypes, aiConfigValues, IAiConfig } from "../types/ai-config"; 3 | import { dalleImageSize } from "../types/dalle-config"; 4 | import { GeneralModule } from "../commands/general"; 5 | import { ChatModule } from "../commands/chat"; 6 | import { ICommandDefinition } from "../types/commands"; 7 | import { GptModule } from "../commands/gpt"; 8 | import { TranscriptionModule } from "../commands/transcription"; 9 | import { TTSModule } from "../commands/tts"; 10 | import { StableDiffusionModule } from "../commands/stable-diffusion"; 11 | 12 | import config from "../config"; 13 | 14 | let aiConfig: IAiConfig = { 15 | dalle: { 16 | size: dalleImageSize["512x512"] 17 | }, 18 | // chatgpt: {} 19 | commandsMap: {} 20 | }; 21 | 22 | const initAiConfig = () => { 23 | // Register commands 24 | [ChatModule, GeneralModule, GptModule, TranscriptionModule, TTSModule, StableDiffusionModule].forEach((module) => { 25 | aiConfig.commandsMap[module.key] = module.register(); 26 | }); 27 | }; 28 | 29 | const handleMessageAIConfig = async (message: Message, prompt: any) => { 30 | try { 31 | console.log("[AI-Config] Received prompt from " + message.from + ": " + prompt); 32 | 33 | const args: string[] = prompt.split(" "); 34 | 35 | /* 36 | !config 37 | !config help 38 | */ 39 | if (args.length == 1 || prompt === "help") { 40 | // Available commands 41 | let helpMessage = "Available commands:\n"; 42 | for (let target in aiConfigTarget) { 43 | for (let type in aiConfigTypes[target]) { 44 | helpMessage += `\t${config.aiConfigPrefix} ${target} ${type} - Set ${target} ${type} to \n`; 45 | } 46 | } 47 | for (let module in aiConfig.commandsMap) { 48 | for (let command in aiConfig.commandsMap[module]) { 49 | helpMessage += `\t${config.aiConfigPrefix} ${module} ${command} ${aiConfig.commandsMap[module][command].help}\n`; 50 | } 51 | } 52 | 53 | // Available values 54 | helpMessage += "\nAvailable values:\n"; 55 | for (let target in aiConfigTarget) { 56 | for (let type in aiConfigTypes[target]) { 57 | helpMessage += `\t${target} ${type}: ${Object.keys(aiConfigValues[target][type]).join(", ")}\n`; 58 | } 59 | } 60 | for (let module in aiConfig.commandsMap) { 61 | for (let command in aiConfig.commandsMap[module]) { 62 | if (aiConfig.commandsMap[module][command].hint) { 63 | let hint = aiConfig.commandsMap[module][command].hint; 64 | if (typeof hint === "object") { 65 | hint = Object.keys(hint).join(", "); 66 | } 67 | helpMessage += `\t${module} ${command}: ${hint}\n`; 68 | } 69 | } 70 | } 71 | message.reply(helpMessage); 72 | return; 73 | } 74 | 75 | // !config 76 | if (args.length < 2) { 77 | message.reply( 78 | "Invalid number of arguments, please use the following format: or type !config help for more information." 79 | ); 80 | return; 81 | } 82 | 83 | const target: string = args[0]; 84 | const type: string = args[1]; 85 | const value: string | undefined = args.length >= 3 ? args.slice(2).join(" ") : undefined; 86 | 87 | if (!(target in aiConfigTarget) && !(target in aiConfig.commandsMap)) { 88 | message.reply("Invalid target, please use one of the following: " + Object.keys(aiConfigTarget).join(", ")); 89 | return; 90 | } 91 | 92 | if (target && type && aiConfig.commandsMap[target]) { 93 | if (aiConfig.commandsMap[target][type]) { 94 | aiConfig.commandsMap[target][type].execute(message, value); 95 | } else { 96 | message.reply("Invalid command, please use one of the following: " + Object.keys(aiConfig.commandsMap[target]).join(", ")); 97 | } 98 | return; 99 | } 100 | 101 | if (typeof aiConfigTypes[target] !== "object" || !(type in aiConfigTypes[target])) { 102 | message.reply("Invalid type, please use one of the following: " + Object.keys(aiConfigTypes[target]).join(", ")); 103 | return; 104 | } 105 | 106 | if (value === undefined || (typeof aiConfigValues[target][type] === "object" && !(value in aiConfigValues[target][type]))) { 107 | message.reply("Invalid value, please use one of the following: " + Object.keys(aiConfigValues[target][type]).join(", ")); 108 | return; 109 | } 110 | 111 | aiConfig[target][type] = value; 112 | 113 | message.reply("Successfully set " + target + " " + type + " to " + value); 114 | } catch (error: any) { 115 | console.error("An error occured", error); 116 | message.reply("An error occured, please contact the administrator. (" + error.message + ")"); 117 | } 118 | }; 119 | 120 | export function getCommand(module: string, command: string): ICommandDefinition { 121 | return aiConfig.commandsMap[module][command]; 122 | } 123 | 124 | export function getConfig(target: string, type: string): any { 125 | if (aiConfig.commandsMap[target] && aiConfig.commandsMap[target][type]) { 126 | if (typeof aiConfig.commandsMap[target][type].data === "function") { 127 | return aiConfig.commandsMap[target][type].data(); 128 | } 129 | return aiConfig.commandsMap[target][type].data; 130 | } 131 | return aiConfig[target][type]; 132 | } 133 | 134 | export function executeCommand(target: string, type: string, message: Message, value?: string | undefined) { 135 | if (aiConfig.commandsMap[target] && aiConfig.commandsMap[target][type]) { 136 | if (typeof aiConfig.commandsMap[target][type].execute === "function") { 137 | return aiConfig.commandsMap[target][type].execute(message, value); 138 | } 139 | } 140 | } 141 | 142 | export { aiConfig, handleMessageAIConfig, initAiConfig }; 143 | -------------------------------------------------------------------------------- /src/handlers/dalle.ts: -------------------------------------------------------------------------------- 1 | import { MessageMedia } from "whatsapp-web.js"; 2 | import { openai } from "../providers/openai"; 3 | import { aiConfig } from "../handlers/ai-config"; 4 | import OpenAI from "openai"; 5 | import config from "../config"; 6 | import * as cli from "../cli/ui"; 7 | 8 | // Moderation 9 | import { moderateIncomingPrompt } from "./moderation"; 10 | 11 | const handleMessageDALLE = async (message: any, prompt: any) => { 12 | try { 13 | const start = Date.now(); 14 | 15 | cli.print(`[DALL-E] Received prompt from ${message.from}: ${prompt}`); 16 | 17 | // Prompt Moderation 18 | if (config.promptModerationEnabled) { 19 | try { 20 | await moderateIncomingPrompt(prompt); 21 | } catch (error: any) { 22 | message.reply(error.message); 23 | return; 24 | } 25 | } 26 | 27 | // Send the prompt to the API 28 | const response = await openai.images.generate({ 29 | prompt: prompt, 30 | n: 1, 31 | size: aiConfig.dalle.size as CreateImageRequestSizeEnum, 32 | response_format: "b64_json" 33 | }); 34 | 35 | const end = Date.now() - start; 36 | 37 | const base64 = response.data.data[0].b64_json as string; 38 | const image = new MessageMedia("image/jpeg", base64, "image.jpg"); 39 | 40 | cli.print(`[DALL-E] Answer to ${message.from} | OpenAI request took ${end}ms`); 41 | 42 | message.reply(image); 43 | } catch (error: any) { 44 | console.error("An error occured", error); 45 | message.reply("An error occured, please contact the administrator. (" + error.message + ")"); 46 | } 47 | }; 48 | 49 | export { handleMessageDALLE }; 50 | -------------------------------------------------------------------------------- /src/handlers/gpt.ts: -------------------------------------------------------------------------------- 1 | import os from "os"; 2 | import fs from "fs"; 3 | import path from "path"; 4 | import { randomUUID } from "crypto"; 5 | import { Message, MessageMedia } from "whatsapp-web.js"; 6 | import { chatgpt } from "../providers/openai"; 7 | import * as cli from "../cli/ui"; 8 | import config from "../config"; 9 | 10 | import { ChatMessage } from "chatgpt"; 11 | 12 | // TTS 13 | import { ttsRequest as speechTTSRequest } from "../providers/speech"; 14 | import { ttsRequest as awsTTSRequest } from "../providers/aws"; 15 | import { TTSMode } from "../types/tts-mode"; 16 | 17 | // Moderation 18 | import { moderateIncomingPrompt } from "./moderation"; 19 | import { aiConfig, getConfig } from "./ai-config"; 20 | 21 | // Mapping from number to last conversation id 22 | const conversations = {}; 23 | 24 | const handleMessageGPT = async (message: Message, prompt: string) => { 25 | try { 26 | // Get last conversation 27 | const lastConversationId = conversations[message.from]; 28 | 29 | cli.print(`[GPT] Received prompt from ${message.from}: ${prompt}`); 30 | 31 | // Prompt Moderation 32 | if (config.promptModerationEnabled) { 33 | try { 34 | await moderateIncomingPrompt(prompt); 35 | } catch (error: any) { 36 | message.reply(error.message); 37 | return; 38 | } 39 | } 40 | 41 | const start = Date.now(); 42 | 43 | // Check if we have a conversation with the user 44 | let response: ChatMessage; 45 | if (lastConversationId) { 46 | // Handle message with previous conversation 47 | response = await chatgpt.sendMessage(prompt, { 48 | parentMessageId: lastConversationId 49 | }); 50 | } else { 51 | let promptBuilder = ""; 52 | 53 | // Pre prompt 54 | if (config.prePrompt != null && config.prePrompt.trim() != "") { 55 | promptBuilder += config.prePrompt + "\n\n"; 56 | promptBuilder += prompt + "\n\n"; 57 | } 58 | 59 | // Handle message with new conversation 60 | response = await chatgpt.sendMessage(promptBuilder); 61 | 62 | cli.print(`[GPT] New conversation for ${message.from} (ID: ${response.id})`); 63 | } 64 | 65 | // Set conversation id 66 | conversations[message.from] = response.id; 67 | 68 | const end = Date.now() - start; 69 | 70 | cli.print(`[GPT] Answer to ${message.from}: ${response.text} | OpenAI request took ${end}ms)`); 71 | 72 | // TTS reply (Default: disabled) 73 | if (getConfig("tts", "enabled")) { 74 | sendVoiceMessageReply(message, response.text); 75 | message.reply(response.text); 76 | return; 77 | } 78 | 79 | // Default: Text reply 80 | message.reply(response.text); 81 | } catch (error: any) { 82 | console.error("An error occured", error); 83 | message.reply("An error occured, please contact the administrator. (" + error.message + ")"); 84 | } 85 | }; 86 | 87 | const handleDeleteConversation = async (message: Message) => { 88 | // Delete conversation 89 | delete conversations[message.from]; 90 | 91 | // Reply 92 | message.reply("Conversation context was resetted!"); 93 | }; 94 | 95 | async function sendVoiceMessageReply(message: Message, gptTextResponse: string) { 96 | var logTAG = "[TTS]"; 97 | var ttsRequest = async function (): Promise { 98 | return await speechTTSRequest(gptTextResponse); 99 | }; 100 | 101 | switch (config.ttsMode) { 102 | case TTSMode.SpeechAPI: 103 | logTAG = "[SpeechAPI]"; 104 | ttsRequest = async function (): Promise { 105 | return await speechTTSRequest(gptTextResponse); 106 | }; 107 | break; 108 | 109 | case TTSMode.AWSPolly: 110 | logTAG = "[AWSPolly]"; 111 | ttsRequest = async function (): Promise { 112 | return await awsTTSRequest(gptTextResponse); 113 | }; 114 | break; 115 | 116 | default: 117 | logTAG = "[SpeechAPI]"; 118 | ttsRequest = async function (): Promise { 119 | return await speechTTSRequest(gptTextResponse); 120 | }; 121 | break; 122 | } 123 | 124 | // Get audio buffer 125 | cli.print(`${logTAG} Generating audio from GPT response "${gptTextResponse}"...`); 126 | const audioBuffer = await ttsRequest(); 127 | 128 | // Check if audio buffer is valid 129 | if (audioBuffer == null || audioBuffer.length == 0) { 130 | message.reply(`${logTAG} couldn't generate audio, please contact the administrator.`); 131 | return; 132 | } 133 | 134 | cli.print(`${logTAG} Audio generated!`); 135 | 136 | // Get temp folder and file path 137 | const tempFolder = os.tmpdir(); 138 | const tempFilePath = path.join(tempFolder, randomUUID() + ".opus"); 139 | 140 | // Save buffer to temp file 141 | fs.writeFileSync(tempFilePath, audioBuffer); 142 | 143 | // Send audio 144 | const messageMedia = new MessageMedia("audio/ogg; codecs=opus", audioBuffer.toString("base64")); 145 | message.reply(messageMedia); 146 | 147 | // Delete temp file 148 | fs.unlinkSync(tempFilePath); 149 | } 150 | 151 | export { handleMessageGPT, handleDeleteConversation }; 152 | -------------------------------------------------------------------------------- /src/handlers/langchain.ts: -------------------------------------------------------------------------------- 1 | import { Message } from "whatsapp-web.js"; 2 | import BrowserAgentProvider from "../providers/browser-agent"; 3 | import * as cli from "../cli/ui"; 4 | 5 | const browserAgent = new BrowserAgentProvider(); 6 | 7 | // TODO add conversation ID to build a chat history 8 | const handleMessageLangChain = async (message: Message, prompt: string) => { 9 | try { 10 | const start = Date.now(); 11 | const output = await browserAgent.fetch(prompt); 12 | const end = Date.now() - start; 13 | 14 | cli.print(`[GPT] Answer to ${message.from}: ${output} | OpenAI request took ${end}ms)`); 15 | 16 | // Default: Text reply 17 | message.reply(output); 18 | } catch (error: any) { 19 | console.error("An error occured", error); 20 | message.reply("An error occured, please contact the administrator. (" + error.message + ")"); 21 | } 22 | }; 23 | 24 | export { handleMessageLangChain }; 25 | -------------------------------------------------------------------------------- /src/handlers/message.ts: -------------------------------------------------------------------------------- 1 | import { Message } from "whatsapp-web.js"; 2 | import { startsWithIgnoreCase } from "../utils"; 3 | 4 | // Config & Constants 5 | import config from "../config"; 6 | 7 | // CLI 8 | import * as cli from "../cli/ui"; 9 | 10 | // ChatGPT & DALLE 11 | import { handleMessageGPT, handleDeleteConversation } from "../handlers/gpt"; 12 | import { handleMessageDALLE } from "../handlers/dalle"; 13 | import { handleMessageAIConfig, getConfig, executeCommand } from "../handlers/ai-config"; 14 | import { handleMessageLangChain } from "../handlers/langchain"; 15 | 16 | // Speech API & Whisper 17 | import { TranscriptionMode } from "../types/transcription-mode"; 18 | import { transcribeRequest } from "../providers/speech"; 19 | import { transcribeAudioLocal } from "../providers/whisper-local"; 20 | import { transcribeWhisperApi } from "../providers/whisper-api"; 21 | import { transcribeOpenAI } from "../providers/openai"; 22 | 23 | // For deciding to ignore old messages 24 | import { botReadyTimestamp } from "../index"; 25 | 26 | // Handles message 27 | async function handleIncomingMessage(message: Message) { 28 | let messageString = message.body; 29 | 30 | // Prevent handling old messages 31 | if (message.timestamp != null) { 32 | const messageTimestamp = new Date(message.timestamp * 1000); 33 | 34 | // If startTimestamp is null, the bot is not ready yet 35 | if (botReadyTimestamp == null) { 36 | cli.print("Ignoring message because bot is not ready yet: " + messageString); 37 | return; 38 | } 39 | 40 | // Ignore messages that are sent before the bot is started 41 | if (messageTimestamp < botReadyTimestamp) { 42 | cli.print("Ignoring old message: " + messageString); 43 | return; 44 | } 45 | } 46 | 47 | // Ignore groupchats if disabled 48 | if ((await message.getChat()).isGroup && !config.groupchatsEnabled) return; 49 | 50 | const selfNotedMessage = message.fromMe && message.hasQuotedMsg === false && message.from === message.to; 51 | 52 | if (config.whitelistedEnabled) { 53 | const whitelistedPhoneNumbers = getConfig("general", "whitelist"); 54 | 55 | if (!selfNotedMessage && whitelistedPhoneNumbers.length > 0 && !whitelistedPhoneNumbers.includes(message.from)) { 56 | cli.print(`Ignoring message from ${message.from} because it is not whitelisted.`); 57 | return; 58 | } 59 | } 60 | // Transcribe audio 61 | if (message.hasMedia) { 62 | const media = await message.downloadMedia(); 63 | 64 | // Ignore non-audio media 65 | if (!media || !media.mimetype.startsWith("audio/")) return; 66 | 67 | // Check if transcription is enabled (Default: false) 68 | if (!getConfig("transcription", "enabled")) { 69 | cli.print("[Transcription] Received voice messsage but voice transcription is disabled."); 70 | return; 71 | } 72 | 73 | // Convert media to base64 string 74 | const mediaBuffer = Buffer.from(media.data, "base64"); 75 | 76 | // Transcribe locally or with Speech API 77 | const transcriptionMode = getConfig("transcription", "mode"); 78 | cli.print(`[Transcription] Transcribing audio with "${transcriptionMode}" mode...`); 79 | 80 | let res; 81 | switch (transcriptionMode) { 82 | case TranscriptionMode.Local: 83 | res = await transcribeAudioLocal(mediaBuffer); 84 | break; 85 | case TranscriptionMode.OpenAI: 86 | res = await transcribeOpenAI(mediaBuffer); 87 | break; 88 | case TranscriptionMode.WhisperAPI: 89 | res = await transcribeWhisperApi(new Blob([mediaBuffer])); 90 | break; 91 | case TranscriptionMode.SpeechAPI: 92 | res = await transcribeRequest(new Blob([mediaBuffer])); 93 | break; 94 | default: 95 | cli.print(`[Transcription] Unsupported transcription mode: ${transcriptionMode}`); 96 | } 97 | const { text: transcribedText, language: transcribedLanguage } = res; 98 | 99 | // Check transcription is null (error) 100 | if (transcribedText == null) { 101 | message.reply("I couldn't understand what you said."); 102 | return; 103 | } 104 | 105 | // Check transcription is empty (silent voice message) 106 | if (transcribedText.length == 0) { 107 | message.reply("I couldn't understand what you said."); 108 | return; 109 | } 110 | 111 | // Log transcription 112 | cli.print(`[Transcription] Transcription response: ${transcribedText} (language: ${transcribedLanguage})`); 113 | 114 | // Reply with transcription 115 | if (config.ttsTranscriptionResponse) { 116 | const reply = `You said: ${transcribedText}${transcribedLanguage ? " (language: " + transcribedLanguage + ")" : ""}`; 117 | message.reply(reply); 118 | } 119 | 120 | // Handle message GPT 121 | await handleMessageGPT(message, transcribedText); 122 | return; 123 | } 124 | 125 | // Clear conversation context (!clear) 126 | if (startsWithIgnoreCase(messageString, config.resetPrefix)) { 127 | await handleDeleteConversation(message); 128 | return; 129 | } 130 | 131 | // AiConfig (!config ) 132 | if (startsWithIgnoreCase(messageString, config.aiConfigPrefix)) { 133 | const prompt = messageString.substring(config.aiConfigPrefix.length + 1); 134 | await handleMessageAIConfig(message, prompt); 135 | return; 136 | } 137 | 138 | // GPT (!gpt ) 139 | if (startsWithIgnoreCase(messageString, config.gptPrefix)) { 140 | const prompt = messageString.substring(config.gptPrefix.length + 1); 141 | await handleMessageGPT(message, prompt); 142 | return; 143 | } 144 | 145 | // GPT (!lang ) 146 | if (startsWithIgnoreCase(messageString, config.langChainPrefix)) { 147 | const prompt = messageString.substring(config.langChainPrefix.length + 1); 148 | await handleMessageLangChain(message, prompt); 149 | return; 150 | } 151 | 152 | // DALLE (!dalle ) 153 | if (startsWithIgnoreCase(messageString, config.dallePrefix)) { 154 | const prompt = messageString.substring(config.dallePrefix.length + 1); 155 | await handleMessageDALLE(message, prompt); 156 | return; 157 | } 158 | 159 | // Stable Diffusion (!sd ) 160 | if (startsWithIgnoreCase(messageString, config.stableDiffusionPrefix)) { 161 | const prompt = messageString.substring(config.stableDiffusionPrefix.length + 1); 162 | await executeCommand("sd", "generate", message, prompt); 163 | return; 164 | } 165 | 166 | // GPT (only ) 167 | if (!config.prefixEnabled || (config.prefixSkippedForMe && selfNotedMessage)) { 168 | await handleMessageGPT(message, messageString); 169 | return; 170 | } 171 | } 172 | 173 | export { handleIncomingMessage }; 174 | -------------------------------------------------------------------------------- /src/handlers/moderation.ts: -------------------------------------------------------------------------------- 1 | import * as cli from "../cli/ui"; 2 | import config from "../config"; 3 | import { openai } from "../providers/openai"; 4 | 5 | /** 6 | * Handle prompt moderation 7 | * 8 | * @param prompt Prompt to moderate 9 | * @returns true if the prompt is safe, throws an error otherwise 10 | */ 11 | const moderateIncomingPrompt = async (prompt: string) => { 12 | cli.print("[MODERATION] Checking user prompt..."); 13 | const moderationResponse = await openai.moderations.create({ 14 | input: prompt 15 | }); 16 | 17 | const moderationResponseData = moderationResponse.data; 18 | const moderationResponseCategories = moderationResponseData.results[0].categories; 19 | const blackListedCategories = config.promptModerationBlacklistedCategories; 20 | 21 | // Print categories as [ category: true/false ] 22 | const categoriesForPrint = Object.keys(moderationResponseCategories).map((category) => { 23 | return `${category}: ${moderationResponseCategories[category]}`; 24 | }); 25 | cli.print(`[MODERATION] OpenAI Moderation response: ${JSON.stringify(categoriesForPrint)}`); 26 | 27 | // Check if any of the blacklisted categories are set to true 28 | for (const category of blackListedCategories) { 29 | if (moderationResponseCategories[category]) { 30 | throw new Error(`Prompt was rejected by the moderation system. Reason: ${category}`); 31 | } 32 | } 33 | 34 | return true; 35 | }; 36 | 37 | export { moderateIncomingPrompt }; 38 | -------------------------------------------------------------------------------- /src/index.ts: -------------------------------------------------------------------------------- 1 | import qrcode from "qrcode"; 2 | import { Client, Message, Events, LocalAuth } from "whatsapp-web.js"; 3 | 4 | // Constants 5 | import constants from "./constants"; 6 | 7 | // CLI 8 | import * as cli from "./cli/ui"; 9 | import { handleIncomingMessage } from "./handlers/message"; 10 | 11 | // Config 12 | import { initAiConfig } from "./handlers/ai-config"; 13 | import { initOpenAI } from "./providers/openai"; 14 | 15 | // Ready timestamp of the bot 16 | let botReadyTimestamp: Date | null = null; 17 | 18 | // Entrypoint 19 | const start = async () => { 20 | const wwebVersion = "2.2412.54"; 21 | cli.printIntro(); 22 | 23 | // WhatsApp Client 24 | const client = new Client({ 25 | puppeteer: { 26 | args: ["--no-sandbox"] 27 | }, 28 | authStrategy: new LocalAuth({ 29 | dataPath: constants.sessionPath 30 | }), 31 | webVersionCache: { 32 | type: "remote", 33 | remotePath: `https://raw.githubusercontent.com/wppconnect-team/wa-version/main/html/${wwebVersion}.html` 34 | } 35 | }); 36 | 37 | // WhatsApp auth 38 | client.on(Events.QR_RECEIVED, (qr: string) => { 39 | console.log(""); 40 | qrcode.toString( 41 | qr, 42 | { 43 | type: "terminal", 44 | small: true, 45 | margin: 2, 46 | scale: 1 47 | }, 48 | (err, url) => { 49 | if (err) throw err; 50 | cli.printQRCode(url); 51 | } 52 | ); 53 | }); 54 | 55 | // WhatsApp loading 56 | client.on(Events.LOADING_SCREEN, (percent) => { 57 | if (percent == "0") { 58 | cli.printLoading(); 59 | } 60 | }); 61 | 62 | // WhatsApp authenticated 63 | client.on(Events.AUTHENTICATED, () => { 64 | cli.printAuthenticated(); 65 | }); 66 | 67 | // WhatsApp authentication failure 68 | client.on(Events.AUTHENTICATION_FAILURE, () => { 69 | cli.printAuthenticationFailure(); 70 | }); 71 | 72 | // WhatsApp ready 73 | client.on(Events.READY, () => { 74 | // Print outro 75 | cli.printOutro(); 76 | 77 | // Set bot ready timestamp 78 | botReadyTimestamp = new Date(); 79 | 80 | initAiConfig(); 81 | initOpenAI(); 82 | }); 83 | 84 | // WhatsApp message 85 | client.on(Events.MESSAGE_RECEIVED, async (message: any) => { 86 | // Ignore if message is from status broadcast 87 | if (message.from == constants.statusBroadcast) return; 88 | 89 | // Ignore if it's a quoted message, (e.g. Bot reply) 90 | if (message.hasQuotedMsg) return; 91 | 92 | await handleIncomingMessage(message); 93 | }); 94 | 95 | // Reply to own message 96 | client.on(Events.MESSAGE_CREATE, async (message: Message) => { 97 | // Ignore if message is from status broadcast 98 | if (message.from == constants.statusBroadcast) return; 99 | 100 | // Ignore if it's a quoted message, (e.g. Bot reply) 101 | if (message.hasQuotedMsg) return; 102 | 103 | // Ignore if it's not from me 104 | if (!message.fromMe) return; 105 | 106 | await handleIncomingMessage(message); 107 | }); 108 | 109 | // WhatsApp initialization 110 | client.initialize(); 111 | }; 112 | 113 | start(); 114 | 115 | export { botReadyTimestamp }; 116 | -------------------------------------------------------------------------------- /src/providers/aws.ts: -------------------------------------------------------------------------------- 1 | const AWS = require("aws-sdk"); 2 | import config from "../config"; 3 | 4 | /** 5 | * @param text The sentence to be converted to speech 6 | * @returns Audio buffer 7 | */ 8 | async function ttsRequest(text: string): Promise { 9 | const polly = new AWS.Polly({ 10 | credentials: new AWS.Credentials(config.awsAccessKeyId, config.awsSecretAccessKey), 11 | region: config.awsRegion 12 | }); 13 | 14 | const params = { 15 | OutputFormat: "mp3", 16 | Text: text, 17 | Engine: config.awsPollyEngine, 18 | VoiceId: config.awsPollyVoiceId 19 | }; 20 | 21 | try { 22 | const data = await polly.synthesizeSpeech(params).promise(); 23 | if (data.AudioStream instanceof Buffer) { 24 | return data.AudioStream; 25 | } 26 | return null; 27 | } catch (error) { 28 | console.error("An error occured (TTS request)", error); 29 | return null; 30 | } 31 | } 32 | 33 | export { ttsRequest }; 34 | -------------------------------------------------------------------------------- /src/providers/browser-agent.ts: -------------------------------------------------------------------------------- 1 | import { OpenAI } from "langchain/llms/openai"; 2 | import { SerpAPI } from "langchain/tools"; 3 | import { initializeAgentExecutor } from "langchain/agents"; 4 | 5 | export default class BrowserAgentProvider { 6 | // Can use other browser tools like RequestGetTool if you do not have a [SerpAPI](https://serpapi.com/) API key. 7 | tools = [ 8 | new SerpAPI() 9 | // new RequestsGetTool(), 10 | ]; 11 | // Always select highest probability word in search 12 | model = new OpenAI({ temperature: 0 }); 13 | 14 | fetch = async (query) => { 15 | const executor = await initializeAgentExecutor(this.tools, this.model, "zero-shot-react-description", true); 16 | const result = await executor.call({ input: query }); 17 | 18 | return result.output; // Return the final text instead of result.output 19 | }; 20 | } 21 | -------------------------------------------------------------------------------- /src/providers/openai.ts: -------------------------------------------------------------------------------- 1 | import fs from "fs"; 2 | import os from "os"; 3 | import path from "path"; 4 | import { randomUUID } from "crypto"; 5 | import { ChatGPTAPI } from "chatgpt"; 6 | import OpenAI from "openai"; 7 | 8 | import ffmpeg from "fluent-ffmpeg"; 9 | import { blobFromSync, File } from "fetch-blob/from.js"; 10 | import config from "../config"; 11 | import { getConfig } from "../handlers/ai-config"; 12 | 13 | export let chatgpt: ChatGPTAPI; 14 | 15 | // OpenAI Client (DALL-E) 16 | export let openai: OpenAI; 17 | 18 | export function initOpenAI() { 19 | chatgpt = new ChatGPTAPI({ 20 | apiKey: getConfig("gpt", "apiKey"), 21 | completionParams: { 22 | model: config.openAIModel, 23 | temperature: 0.7, 24 | top_p: 0.9, 25 | max_tokens: getConfig("gpt", "maxModelTokens") 26 | } 27 | }); 28 | 29 | openai = new OpenAI( 30 | { 31 | apiKey: getConfig("gpt", "apiKey") 32 | } 33 | ); 34 | } 35 | 36 | export async function transcribeOpenAI(audioBuffer: Buffer): Promise<{ text: string; language: string }> { 37 | const url = config.openAIServerUrl; 38 | let language = ""; 39 | 40 | const tempdir = os.tmpdir(); 41 | const oggPath = path.join(tempdir, randomUUID() + ".ogg"); 42 | const wavFilename = randomUUID() + ".wav"; 43 | const wavPath = path.join(tempdir, wavFilename); 44 | fs.writeFileSync(oggPath, audioBuffer); 45 | try { 46 | await convertOggToWav(oggPath, wavPath); 47 | } catch (e) { 48 | fs.unlinkSync(oggPath); 49 | return { 50 | text: "", 51 | language 52 | }; 53 | } 54 | 55 | // FormData 56 | const formData = new FormData(); 57 | formData.append("file", new File([blobFromSync(wavPath)], wavFilename, { type: "audio/wav" })); 58 | formData.append("model", "whisper-1"); 59 | if (config.transcriptionLanguage) { 60 | formData.append("language", config.transcriptionLanguage); 61 | language = config.transcriptionLanguage; 62 | } 63 | 64 | const headers = new Headers(); 65 | headers.append("Authorization", `Bearer ${getConfig("gpt", "apiKey")}`); 66 | 67 | // Request options 68 | const options = { 69 | method: "POST", 70 | body: formData, 71 | headers 72 | }; 73 | 74 | let response; 75 | try { 76 | response = await fetch(url, options); 77 | } catch (e) { 78 | console.error(e); 79 | } finally { 80 | fs.unlinkSync(oggPath); 81 | fs.unlinkSync(wavPath); 82 | } 83 | 84 | if (!response || response.status != 200) { 85 | console.error(response); 86 | return { 87 | text: "", 88 | language: language 89 | }; 90 | } 91 | 92 | const transcription = await response.json(); 93 | return { 94 | text: transcription.text, 95 | language 96 | }; 97 | } 98 | 99 | async function convertOggToWav(oggPath: string, wavPath: string): Promise { 100 | return new Promise((resolve, reject) => { 101 | ffmpeg(oggPath) 102 | .toFormat("wav") 103 | .outputOptions("-acodec pcm_s16le") 104 | .output(wavPath) 105 | .on("end", () => resolve()) 106 | .on("error", (err) => reject(err)) 107 | .run(); 108 | }); 109 | } 110 | -------------------------------------------------------------------------------- /src/providers/speech.ts: -------------------------------------------------------------------------------- 1 | import config from "../config"; 2 | 3 | /** 4 | * @param text The sentence to be converted to speech 5 | * @returns Audio buffer 6 | */ 7 | async function ttsRequest(text: string): Promise { 8 | const url = config.speechServerUrl + "/tts"; 9 | 10 | // Request options 11 | const options = { 12 | method: "POST", 13 | headers: { 14 | "Content-Type": "application/json" 15 | }, 16 | body: JSON.stringify({ 17 | text 18 | }) 19 | }; 20 | 21 | try { 22 | const response = await fetch(url, options); 23 | const audioBuffer = await response.arrayBuffer(); 24 | return Buffer.from(audioBuffer); 25 | } catch (error) { 26 | console.error("An error occured (TTS request)", error); 27 | return null; 28 | } 29 | } 30 | 31 | /** 32 | * @param audioBlob The audio blob to be transcribed 33 | * @returns Response: { text: string, language: string } 34 | */ 35 | async function transcribeRequest(audioBlob: Blob): Promise<{ text: string; language: string }> { 36 | const url = config.speechServerUrl + "/transcribe"; 37 | 38 | // FormData 39 | const formData = new FormData(); 40 | formData.append("audio", audioBlob); 41 | 42 | // Request options 43 | const options = { 44 | method: "POST", 45 | body: formData 46 | }; 47 | 48 | const response = await fetch(url, options); 49 | const transcription = await response.json(); 50 | return transcription; 51 | } 52 | 53 | export { ttsRequest, transcribeRequest }; 54 | -------------------------------------------------------------------------------- /src/providers/whisper-api.ts: -------------------------------------------------------------------------------- 1 | import config from "../config"; 2 | 3 | async function transcribeWhisperApi(audioBlob: Blob): Promise<{ text: string; language: string }> { 4 | const url = config.whisperServerUrl; 5 | 6 | // FormData 7 | const formData = new FormData(); 8 | formData.append("file", audioBlob); 9 | formData.append("diarization", "false"); 10 | formData.append("numSpeakers", "1"); 11 | formData.append("fileType", "ogg"); 12 | if (config.transcriptionLanguage) { 13 | formData.append("language", config.transcriptionLanguage); 14 | } 15 | formData.append("task", "transcribe"); 16 | 17 | const headers = new Headers(); 18 | headers.append("Authorization", `Bearer ${config.whisperApiKey}`); 19 | 20 | // Request options 21 | const options = { 22 | method: "POST", 23 | body: formData, 24 | headers 25 | }; 26 | 27 | const response = await fetch(url, options); 28 | const transcription = await response.json(); 29 | return transcription; 30 | } 31 | 32 | export { transcribeWhisperApi }; 33 | -------------------------------------------------------------------------------- /src/providers/whisper-local.ts: -------------------------------------------------------------------------------- 1 | import fs from "fs"; 2 | import os from "os"; 3 | import path from "path"; 4 | import { execSync } from "child_process"; 5 | import { randomUUID } from "crypto"; 6 | 7 | async function transcribeAudioLocal(audioBuffer: Buffer): Promise<{ text: string; language: string }> { 8 | // Write audio buffer to tempdir 9 | const tempdir = os.tmpdir(); 10 | const audioPath = path.join(tempdir, randomUUID() + ".wav"); 11 | fs.writeFileSync(audioPath, audioBuffer); 12 | 13 | // Transcribe audio 14 | const output = execSync(`whisper ${audioPath}`, { encoding: "utf-8" }); 15 | 16 | // Delete tmp file 17 | fs.unlinkSync(audioPath); 18 | 19 | // Delete whisper created tmp files 20 | const extensions = [".wav.srt", ".wav.txt", ".wav.vtt"]; 21 | for (const extension of extensions) { 22 | fs.readdirSync(process.cwd()).forEach((file) => { 23 | if (file.endsWith(extension)) fs.unlinkSync(file); 24 | }); 25 | } 26 | 27 | // Return parsed text and language 28 | return { 29 | text: parseTextAfterTimeFrame(output), 30 | language: parseDetectedLanguage(output) 31 | }; 32 | } 33 | 34 | function parseDetectedLanguage(text) { 35 | const languageLine = text.split("\n")[1]; // Extract the second line of text 36 | const languageMatch = languageLine.match(/Detected language:\s(.+)/); // Extract the detected language 37 | 38 | if (languageMatch) { 39 | return languageMatch[1].trim(); 40 | } 41 | 42 | return null; // Return null if match is not found 43 | } 44 | 45 | function parseTextAfterTimeFrame(text) { 46 | const textMatch = text.match(/\[(\d{2}:\d{2}\.\d{3})\s-->\s(\d{2}:\d{2}\.\d{3})\]\s(.+)/); // Extract the text 47 | 48 | if (textMatch) { 49 | return textMatch[3].trim(); 50 | } 51 | 52 | return null; // Return null if match is not found 53 | } 54 | 55 | export { transcribeAudioLocal }; 56 | -------------------------------------------------------------------------------- /src/types/ai-config.ts: -------------------------------------------------------------------------------- 1 | import { ICommandsMap } from "./commands"; 2 | import { dalleConfigType, dalleImageSize } from "./dalle-config"; 3 | 4 | export enum aiConfigTarget { 5 | dalle = "dalle" 6 | // chatgpt = "chatgpt" 7 | } 8 | 9 | export const aiConfigTypes = { 10 | dalle: dalleConfigType 11 | }; 12 | 13 | export const aiConfigValues = { 14 | dalle: { 15 | size: dalleImageSize 16 | } 17 | }; 18 | 19 | export interface IAiConfig { 20 | dalle: { 21 | size: dalleImageSize; 22 | }; 23 | commandsMap: { 24 | [key: string]: ICommandsMap; 25 | }; 26 | } 27 | -------------------------------------------------------------------------------- /src/types/aws-polly-engine.ts: -------------------------------------------------------------------------------- 1 | export enum AWSPollyEngine { 2 | Standard = "standard", 3 | Neural = "neural" 4 | } 5 | -------------------------------------------------------------------------------- /src/types/commands.ts: -------------------------------------------------------------------------------- 1 | import { Message } from "whatsapp-web.js"; 2 | 3 | export interface ICommandExecution { 4 | (message: Message, value?: string): void; 5 | } 6 | 7 | export interface ICommandDefinition { 8 | data?: any; 9 | help: string; 10 | hint?: string | Object | undefined; 11 | execute: ICommandExecution; 12 | } 13 | 14 | export interface ICommandsMap { 15 | [key: string]: ICommandDefinition; 16 | } 17 | 18 | export interface ICommandModule { 19 | key: string; 20 | register: () => ICommandsMap; 21 | } 22 | -------------------------------------------------------------------------------- /src/types/dalle-config.ts: -------------------------------------------------------------------------------- 1 | export enum dalleConfigType { 2 | size = "size" 3 | } 4 | 5 | export enum dalleImageSize { 6 | "256x256" = "256x256", 7 | "512x512" = "512x512", 8 | "1024x1024" = "1024x1024" 9 | } 10 | -------------------------------------------------------------------------------- /src/types/transcription-mode.ts: -------------------------------------------------------------------------------- 1 | export enum TranscriptionMode { 2 | Local = "local", 3 | SpeechAPI = "speech-api", 4 | WhisperAPI = "whisper-api", 5 | OpenAI = "openai" 6 | } 7 | -------------------------------------------------------------------------------- /src/types/tts-mode.ts: -------------------------------------------------------------------------------- 1 | export enum TTSMode { 2 | SpeechAPI = "speech-api", 3 | AWSPolly = "aws-polly" 4 | } 5 | -------------------------------------------------------------------------------- /src/utils.ts: -------------------------------------------------------------------------------- 1 | const startsWithIgnoreCase = (str, prefix) => str.toLowerCase().startsWith(prefix.toLowerCase()); 2 | 3 | export { startsWithIgnoreCase }; 4 | --------------------------------------------------------------------------------