├── .dockerignore
├── .env-example
├── .github
    └── workflows
    │   ├── docker.yml
    │   └── prettier.yml
├── .gitignore
├── .npmrc
├── .prettierrc
├── Dockerfile
├── README.md
├── docker-compose.yml
├── docs
    ├── .nojekyll
    ├── README.md
    ├── _sidebar.md
    ├── index.html
    └── pages
    │   ├── configure-prefix.md
    │   ├── disclaimer.md
    │   ├── docker.md
    │   ├── gpt.md
    │   ├── installation.md
    │   ├── langchain.md
    │   ├── send-messages-to-yourself.md
    │   ├── transcription.md
    │   ├── tts.md
    │   └── usage.md
├── installer.sh
├── package-lock.json
├── package.json
└── src
    ├── cli
        └── ui.ts
    ├── commands
        ├── chat.ts
        ├── general.ts
        ├── gpt.ts
        ├── stable-diffusion.ts
        ├── transcription.ts
        └── tts.ts
    ├── config.ts
    ├── constants.ts
    ├── handlers
        ├── ai-config.ts
        ├── dalle.ts
        ├── gpt.ts
        ├── langchain.ts
        ├── message.ts
        └── moderation.ts
    ├── index.ts
    ├── providers
        ├── aws.ts
        ├── browser-agent.ts
        ├── openai.ts
        ├── speech.ts
        ├── whisper-api.ts
        └── whisper-local.ts
    ├── types
        ├── ai-config.ts
        ├── aws-polly-engine.ts
        ├── commands.ts
        ├── dalle-config.ts
        ├── transcription-mode.ts
        └── tts-mode.ts
    └── utils.ts


/.dockerignore:
--------------------------------------------------------------------------------
1 | node_modules/*


--------------------------------------------------------------------------------
/.env-example:
--------------------------------------------------------------------------------
  1 | # ----------------------------------------------- #
  2 | # Make a copy of this file and rename it to .env  #
  3 | #                                                 #
  4 | #        Also check out our documentation:        #
  5 | #   https://askrella.github.io/whatsapp-chatgpt   #
  6 | # ----------------------------------------------- #
  7 | 
  8 | # Get your key here: https://platform.openai.com/account/api-keys
  9 | OPENAI_API_KEY=sk-xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
 10 | 
 11 | # HuggingFace API Token https://huggingface.co/settings/tokens
 12 | HUGGINGFACE_API_TOKEN=hf_xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
 13 | 
 14 | # In case you run into ratelimit on a single organization token, you might setting up multiple API keys here
 15 | # Example:
 16 | # OPENAI_API_KEYS=sk-xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx,sk-xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
 17 | 
 18 | # GPT Model (default: gpt-3.5-turbo)
 19 | OPENAI_GPT_MODEL=
 20 | 
 21 | # Max tokens to use for GPT per request
 22 | MAX_MODEL_TOKENS=2000
 23 | 
 24 | # GPT Pre Prompt, executed after creating a conversation
 25 | # Example: Act very funny and overreact to messages. Do that for every message you get, forever.
 26 | PRE_PROMPT=
 27 | 
 28 | # Whether or not to use prefixes !gpt and !dalle
 29 | PREFIX_ENABLED=true
 30 | 
 31 | # Whether or not to use prefixes for self-note conversation
 32 | PREFIX_SKIPPED_FOR_ME=true
 33 | 
 34 | # Set own prefixes for ChatGPT, DALL-E, reset context, configuration
 35 | GPT_PREFIX=!gpt
 36 | DALLE_PREFIX=!dalle
 37 | RESET_PREFIX=!reset
 38 | AI_CONFIG_PREFIX=!config
 39 | 
 40 | # Whether or not to allow the bot interacting on groupchats
 41 | GROUPCHATS_ENABLED=false
 42 | 
 43 | # Prompt Moderation
 44 | # If enabled, the bot will check any prompts submitted by users with the OpenAI Moderation API
 45 | # If the prompt is classified as any of the categories in the blacklisted categories, the prompt will be rejected
 46 | # You can find the available categories here: https://beta.openai.com/docs/api-reference/moderations
 47 | PROMPT_MODERATION_ENABLED = true
 48 | PROMPT_MODERATION_BLACKLISTED_CATEGORIES = ["hate", "hate/threatening", "self-harm", "sexual", "sexual/minors", "violence", "violence/graphic"]
 49 | 
 50 | # Access control, only allow whatsapp-chatgpt to react to specific phone numbers, comma-separated
 51 | WHITELISTED_PHONE_NUMBERS=
 52 | WHITELISTED_ENABLED=false
 53 | # Speech API URL
 54 | # You can use host your own Speech API
 55 | # https://github.com/askrella/speech-rest-api
 56 | SPEECH_API_URL=
 57 | 
 58 | # Whisper API
 59 | 
 60 | # API Key
 61 | WHISPER_API_KEY=
 62 | 
 63 | # You can use Whisper API for voice transcription
 64 | WHISPER_API_URL=https://transcribe.whisperapi.com
 65 | 
 66 | # Defines if the bot should recognize and transcribe your voice messages
 67 | TRANSCRIPTION_ENABLED=false
 68 | 
 69 | # Defines if the bot should use the local or remote transcription service
 70 | # "local" = You need to have "whisper" installed on your machine
 71 | # "openai" = It will use Open AI's transcription API with whisper-1 model
 72 | # "speech-api" = It will use our Speech API to transcribe your voice messages
 73 | # "whisper-api" = It will use whisper's API  to transcribe your voice messages
 74 | TRANSCRIPTION_MODE=local
 75 | 
 76 | # Define the language of transcription, depends on transcriber it might auto-detect if not given
 77 | TRANSCRIPTION_LANGUAGE=
 78 | 
 79 | # Defines if the bot should send voice message responses (text-to-speech)
 80 | # Be aware that this feature will use the Speech API to convert the GPT response to voice
 81 | # It's open source: https://github.com/askrella/speech-rest-api
 82 | TTS_ENABLED=false
 83 | 
 84 | # Defines if the bot should return the TTS response as a text message too
 85 | # If enabled, the bot will send the text response and the voice message
 86 | TTS_TRANSCRIPTION_RESPONSE_ENABLED=true
 87 | 
 88 | # Defines if the bot should use the Speech API or AWS Polly to convert text to speech
 89 | # "speech-api" = It will use our Speech API to transcribe your voice messages
 90 | # "aws-polly" = It will use AWS Polly to convert text to speech
 91 | TTS_MODE=speech-api
 92 | 
 93 | # AWS Config
 94 | # You can use AWS Polly to convert text to speech
 95 | # You need to have an AWS account and create an IAM user with Polly permissions
 96 | # You can find the available voices here: https://docs.aws.amazon.com/polly/latest/dg/voicelist.html
 97 | # For the Voice Engine, you can use "standard" or "neural", make sure to use the correct voice for the engine
 98 | AWS_ACCESS_KEY_ID=xxxxxxxxxxxxxxxxxxxx
 99 | AWS_SECRET_ACCESS_KEY=xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
100 | AWS_REGION=eu-central-1
101 | AWS_POLLY_VOICE_ID=Joanna
102 | AWS_POLLY_VOICE_ENGINE=standard
103 | 
104 | # LangChain Tool Config https://js.langchain.com/docs/modules/agents/tools/
105 | SERPAPI_API_KEY=xxxxxxxxx
106 | 


--------------------------------------------------------------------------------
/.github/workflows/docker.yml:
--------------------------------------------------------------------------------
 1 | name: Docker
 2 | 
 3 | # This workflow uses actions that are not certified by GitHub.
 4 | # They are provided by a third-party and are governed by
 5 | # separate terms of service, privacy policy, and support
 6 | # documentation.
 7 | 
 8 | on:
 9 |     schedule:
10 |         - cron: "28 2 * * *"
11 |     push:
12 |         branches: ["master"]
13 |         # Publish semver tags as releases.
14 |         tags: ["v*.*.*"]
15 |     pull_request:
16 |         branches: ["master"]
17 | 
18 | env:
19 |     # Use docker.io for Docker Hub if empty
20 |     REGISTRY: ghcr.io
21 |     # github.repository as <account>/<repo>
22 |     IMAGE_NAME: ${{ github.repository }}
23 | 
24 | jobs:
25 |     build:
26 |         runs-on: ubuntu-latest
27 |         permissions:
28 |             contents: read
29 |             packages: write
30 |             # This is used to complete the identity challenge
31 |             # with sigstore/fulcio when running outside of PRs.
32 |             id-token: write
33 | 
34 |         steps:
35 |             - name: Checkout repository
36 |               uses: actions/checkout@v3
37 | 
38 |             # Install the cosign tool except on PR
39 |             # https://github.com/sigstore/cosign-installer
40 |             - name: Install cosign
41 |               if: github.event_name != 'pull_request'
42 |               uses: sigstore/cosign-installer@v3.5.0
43 |               with:
44 |                   cosign-release: "v2.2.4"
45 | 
46 |             # Workaround: https://github.com/docker/build-push-action/issues/461
47 |             - name: Setup Docker buildx
48 |               uses: docker/setup-buildx-action@79abd3f86f79a9d68a23c75a09a9a85889262adf
49 | 
50 |             # Login against a Docker registry except on PR
51 |             # https://github.com/docker/login-action
52 |             - name: Log into registry ${{ env.REGISTRY }}
53 |               if: github.event_name != 'pull_request'
54 |               uses: docker/login-action@28218f9b04b4f3f62068d7b6ce6ca5b26e35336c
55 |               with:
56 |                   registry: ${{ env.REGISTRY }}
57 |                   username: ${{ github.actor }}
58 |                   password: ${{ secrets.GITHUB_TOKEN }}
59 | 
60 |             # Extract metadata (tags, labels) for Docker
61 |             # https://github.com/docker/metadata-action
62 |             - name: Extract Docker metadata
63 |               id: meta
64 |               uses: docker/metadata-action@98669ae865ea3cffbcbaa878cf57c20bbf1c6c38
65 |               with:
66 |                   images: ${{ env.REGISTRY }}/${{ env.IMAGE_NAME }}
67 | 
68 |             # Build and push Docker image with Buildx (don't push on PR)
69 |             # https://github.com/docker/build-push-action
70 |             - name: Build and push Docker image
71 |               id: build-and-push
72 |               uses: docker/build-push-action@ac9327eae2b366085ac7f6a2d02df8aa8ead720a
73 |               with:
74 |                   context: .
75 |                   push: ${{ github.event_name != 'pull_request' }}
76 |                   tags: ${{ steps.meta.outputs.tags  }}
77 |                   labels: ${{ steps.meta.outputs.labels }}
78 |                   cache-from: type=gha
79 |                   cache-to: type=gha,mode=max
80 | 
81 |             # Sign the resulting Docker image digest except on PRs.
82 |             # This will only write to the public Rekor transparency log when the Docker
83 |             # repository is public to avoid leaking data.  If you would like to publish
84 |             # transparency data even for private images, pass --force to cosign below.
85 |             # https://github.com/sigstore/cosign
86 |             - name: Sign the published Docker image
87 |               if: ${{ github.event_name != 'pull_request' }}
88 |               env:
89 |                   COSIGN_EXPERIMENTAL: "true"
90 |               # This step uses the identity token to provision an ephemeral certificate
91 |               # against the sigstore community Fulcio instance.
92 |               run: echo "${{ steps.meta.outputs.tags }}" | xargs -I {} cosign sign --yes {}@${{ steps.build-and-push.outputs.digest }}
93 | 


--------------------------------------------------------------------------------
/.github/workflows/prettier.yml:
--------------------------------------------------------------------------------
 1 | name: Prettier
 2 | on:
 3 |     push:
 4 |         branches:
 5 |             - master
 6 |             - main
 7 | 
 8 | jobs:
 9 |     prettier:
10 |         runs-on: ubuntu-latest
11 |         steps:
12 |             - name: Checkout code
13 |               uses: actions/checkout@v2
14 | 
15 |             - name: Install dependencies
16 |               run: npm ci
17 | 
18 |             - name: Run Prettier
19 |               run: npm run prettier
20 |               env:
21 |                   GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
22 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | node_modules/*
  2 | .env
  3 | session/
  4 | 
  5 | # Created by https://www.toptal.com/developers/gitignore/api/vs,intellij+all,node
  6 | # Edit at https://www.toptal.com/developers/gitignore?templates=vs,intellij+all,node
  7 | 
  8 | ### Intellij+all ###
  9 | # Covers JetBrains IDEs: IntelliJ, RubyMine, PhpStorm, AppCode, PyCharm, CLion, Android Studio, WebStorm and Rider
 10 | # Reference: https://intellij-support.jetbrains.com/hc/en-us/articles/206544839
 11 | 
 12 | # User-specific stuff
 13 | .idea/**/workspace.xml
 14 | .idea/**/tasks.xml
 15 | .idea/**/usage.statistics.xml
 16 | .idea/**/dictionaries
 17 | .idea/**/shelf
 18 | 
 19 | # AWS User-specific
 20 | .idea/**/aws.xml
 21 | 
 22 | # Generated files
 23 | .idea/**/contentModel.xml
 24 | 
 25 | # Sensitive or high-churn files
 26 | .idea/**/dataSources/
 27 | .idea/**/dataSources.ids
 28 | .idea/**/dataSources.local.xml
 29 | .idea/**/sqlDataSources.xml
 30 | .idea/**/dynamic.xml
 31 | .idea/**/uiDesigner.xml
 32 | .idea/**/dbnavigator.xml
 33 | 
 34 | # Gradle
 35 | .idea/**/gradle.xml
 36 | .idea/**/libraries
 37 | 
 38 | # Gradle and Maven with auto-import
 39 | # When using Gradle or Maven with auto-import, you should exclude module files,
 40 | # since they will be recreated, and may cause churn.  Uncomment if using
 41 | # auto-import.
 42 | # .idea/artifacts
 43 | # .idea/compiler.xml
 44 | # .idea/jarRepositories.xml
 45 | # .idea/modules.xml
 46 | # .idea/*.iml
 47 | # .idea/modules
 48 | # *.iml
 49 | # *.ipr
 50 | 
 51 | # CMake
 52 | cmake-build-*/
 53 | 
 54 | # Mongo Explorer plugin
 55 | .idea/**/mongoSettings.xml
 56 | 
 57 | # File-based project format
 58 | *.iws
 59 | 
 60 | # IntelliJ
 61 | out/
 62 | 
 63 | # mpeltonen/sbt-idea plugin
 64 | .idea_modules/
 65 | 
 66 | # JIRA plugin
 67 | atlassian-ide-plugin.xml
 68 | 
 69 | # Cursive Clojure plugin
 70 | .idea/replstate.xml
 71 | 
 72 | # SonarLint plugin
 73 | .idea/sonarlint/
 74 | 
 75 | # Crashlytics plugin (for Android Studio and IntelliJ)
 76 | com_crashlytics_export_strings.xml
 77 | crashlytics.properties
 78 | crashlytics-build.properties
 79 | fabric.properties
 80 | 
 81 | # Editor-based Rest Client
 82 | .idea/httpRequests
 83 | 
 84 | # Android studio 3.1+ serialized cache file
 85 | .idea/caches/build_file_checksums.ser
 86 | 
 87 | ### Intellij+all Patch ###
 88 | # Ignore everything but code style settings and run configurations
 89 | # that are supposed to be shared within teams.
 90 | 
 91 | .idea/*
 92 | 
 93 | !.idea/codeStyles
 94 | !.idea/runConfigurations
 95 | 
 96 | ### Node ###
 97 | # Logs
 98 | logs
 99 | *.log
100 | npm-debug.log*
101 | yarn-debug.log*
102 | yarn-error.log*
103 | lerna-debug.log*
104 | .pnpm-debug.log*
105 | 
106 | # Diagnostic reports (https://nodejs.org/api/report.html)
107 | report.[0-9]*.[0-9]*.[0-9]*.[0-9]*.json
108 | 
109 | # Runtime data
110 | pids
111 | *.pid
112 | *.seed
113 | *.pid.lock
114 | 
115 | # Directory for instrumented libs generated by jscoverage/JSCover
116 | lib-cov
117 | 
118 | # Coverage directory used by tools like istanbul
119 | coverage
120 | *.lcov
121 | 
122 | # nyc test coverage
123 | .nyc_output
124 | 
125 | # Grunt intermediate storage (https://gruntjs.com/creating-plugins#storing-task-files)
126 | .grunt
127 | 
128 | # Bower dependency directory (https://bower.io/)
129 | bower_components
130 | 
131 | # node-waf configuration
132 | .lock-wscript
133 | 
134 | # Compiled binary addons (https://nodejs.org/api/addons.html)
135 | build/Release
136 | 
137 | # Dependency directories
138 | node_modules/
139 | jspm_packages/
140 | 
141 | # Snowpack dependency directory (https://snowpack.dev/)
142 | web_modules/
143 | 
144 | # TypeScript cache
145 | *.tsbuildinfo
146 | 
147 | # Optional npm cache directory
148 | .npm
149 | 
150 | # Optional eslint cache
151 | .eslintcache
152 | 
153 | # Optional stylelint cache
154 | .stylelintcache
155 | 
156 | # Microbundle cache
157 | .rpt2_cache/
158 | .rts2_cache_cjs/
159 | .rts2_cache_es/
160 | .rts2_cache_umd/
161 | 
162 | # Optional REPL history
163 | .node_repl_history
164 | 
165 | # Output of 'npm pack'
166 | *.tgz
167 | 
168 | # Yarn Integrity file
169 | .yarn-integrity
170 | 
171 | # dotenv environment variable files
172 | .env
173 | .env.development.local
174 | .env.test.local
175 | .env.production.local
176 | .env.local
177 | 
178 | # parcel-bundler cache (https://parceljs.org/)
179 | .cache
180 | .parcel-cache
181 | 
182 | # Next.js build output
183 | .next
184 | out
185 | 
186 | # Nuxt.js build / generate output
187 | .nuxt
188 | dist
189 | 
190 | # Gatsby files
191 | .cache/
192 | # Comment in the public line in if your project uses Gatsby and not Next.js
193 | # https://nextjs.org/blog/next-9-1#public-directory-support
194 | # public
195 | 
196 | # vuepress build output
197 | .vuepress/dist
198 | 
199 | # vuepress v2.x temp and cache directory
200 | .temp
201 | 
202 | # Docusaurus cache and generated files
203 | .docusaurus
204 | 
205 | # Serverless directories
206 | .serverless/
207 | 
208 | # FuseBox cache
209 | .fusebox/
210 | 
211 | # DynamoDB Local files
212 | .dynamodb/
213 | 
214 | # TernJS port file
215 | .tern-port
216 | 
217 | # Stores VSCode versions used for testing VSCode extensions
218 | .vscode-test
219 | 
220 | # yarn v2
221 | .yarn/cache
222 | .yarn/unplugged
223 | .yarn/build-state.yml
224 | .yarn/install-state.gz
225 | .pnp.*
226 | 
227 | ### Node Patch ###
228 | # Serverless Webpack directories
229 | .webpack/
230 | 
231 | # Optional stylelint cache
232 | 
233 | # SvelteKit build / generate output
234 | .svelte-kit
235 | 
236 | ### vs ###
237 | ## Ignore Visual Studio temporary files, build results, and
238 | ## files generated by popular Visual Studio add-ons.
239 | ##
240 | ## Get latest from https://github.com/github/gitignore/blob/master/VisualStudio.gitignore
241 | 
242 | # User-specific files
243 | *.rsuser
244 | *.suo
245 | *.user
246 | *.userosscache
247 | *.sln.docstates
248 | 
249 | # User-specific files (MonoDevelop/Xamarin Studio)
250 | *.userprefs
251 | 
252 | # Mono auto generated files
253 | mono_crash.*
254 | 
255 | # Build results
256 | [Dd]ebug/
257 | [Dd]ebugPublic/
258 | [Rr]elease/
259 | [Rr]eleases/
260 | x64/
261 | x86/
262 | [Aa][Rr][Mm]/
263 | [Aa][Rr][Mm]64/
264 | bld/
265 | [Bb]in/
266 | [Oo]bj/
267 | [Ll]og/
268 | [Ll]ogs/
269 | 
270 | # Visual Studio 2015/2017 cache/options directory
271 | .vs/
272 | # Uncomment if you have tasks that create the project's static files in wwwroot
273 | #wwwroot/
274 | 
275 | # Visual Studio 2017 auto generated files
276 | Generated\ Files/
277 | 
278 | # MSTest test Results
279 | [Tt]est[Rr]esult*/
280 | [Bb]uild[Ll]og.*
281 | 
282 | # NUnit
283 | *.VisualState.xml
284 | TestResult.xml
285 | nunit-*.xml
286 | 
287 | # Build Results of an ATL Project
288 | [Dd]ebugPS/
289 | [Rr]eleasePS/
290 | dlldata.c
291 | 
292 | # Benchmark Results
293 | BenchmarkDotNet.Artifacts/
294 | 
295 | # .NET Core
296 | project.lock.json
297 | project.fragment.lock.json
298 | artifacts/
299 | 
300 | # StyleCop
301 | StyleCopReport.xml
302 | 
303 | # Files built by Visual Studio
304 | *_i.c
305 | *_p.c
306 | *_h.h
307 | *.ilk
308 | *.meta
309 | *.obj
310 | *.iobj
311 | *.pch
312 | *.pdb
313 | *.ipdb
314 | *.pgc
315 | *.pgd
316 | *.rsp
317 | *.sbr
318 | *.tlb
319 | *.tli
320 | *.tlh
321 | *.tmp
322 | *.tmp_proj
323 | *_wpftmp.csproj
324 | *.vspscc
325 | *.vssscc
326 | .builds
327 | *.pidb
328 | *.svclog
329 | *.scc
330 | 
331 | # Chutzpah Test files
332 | _Chutzpah*
333 | 
334 | # Visual C++ cache files
335 | ipch/
336 | *.aps
337 | *.ncb
338 | *.opendb
339 | *.opensdf
340 | *.sdf
341 | *.cachefile
342 | *.VC.db
343 | *.VC.VC.opendb
344 | 
345 | # Visual Studio profiler
346 | *.psess
347 | *.vsp
348 | *.vspx
349 | *.sap
350 | 
351 | # Visual Studio Trace Files
352 | *.e2e
353 | 
354 | # TFS 2012 Local Workspace
355 | $tf/
356 | 
357 | # Guidance Automation Toolkit
358 | *.gpState
359 | 
360 | # ReSharper is a .NET coding add-in
361 | _ReSharper*/
362 | *.[Rr]e[Ss]harper
363 | *.DotSettings.user
364 | 
365 | # TeamCity is a build add-in
366 | _TeamCity*
367 | 
368 | # DotCover is a Code Coverage Tool
369 | *.dotCover
370 | 
371 | # AxoCover is a Code Coverage Tool
372 | .axoCover/*
373 | !.axoCover/settings.json
374 | 
375 | # Coverlet is a free, cross platform Code Coverage Tool
376 | coverage*[.json, .xml, .info]
377 | 
378 | # Visual Studio code coverage results
379 | *.coverage
380 | *.coveragexml
381 | 
382 | # NCrunch
383 | _NCrunch_*
384 | .*crunch*.local.xml
385 | nCrunchTemp_*
386 | 
387 | # MightyMoose
388 | *.mm.*
389 | AutoTest.Net/
390 | 
391 | # Web workbench (sass)
392 | .sass-cache/
393 | 
394 | # Installshield output folder
395 | [Ee]xpress/
396 | 
397 | # DocProject is a documentation generator add-in
398 | DocProject/buildhelp/
399 | DocProject/Help/*.HxT
400 | DocProject/Help/*.HxC
401 | DocProject/Help/*.hhc
402 | DocProject/Help/*.hhk
403 | DocProject/Help/*.hhp
404 | DocProject/Help/Html2
405 | DocProject/Help/html
406 | 
407 | # Click-Once directory
408 | publish/
409 | 
410 | # Publish Web Output
411 | *.[Pp]ublish.xml
412 | *.azurePubxml
413 | # Note: Comment the next line if you want to checkin your web deploy settings,
414 | # but database connection strings (with potential passwords) will be unencrypted
415 | *.pubxml
416 | *.publishproj
417 | 
418 | # Microsoft Azure Web App publish settings. Comment the next line if you want to
419 | # checkin your Azure Web App publish settings, but sensitive information contained
420 | # in these scripts will be unencrypted
421 | PublishScripts/
422 | 
423 | # NuGet Packages
424 | *.nupkg
425 | # NuGet Symbol Packages
426 | *.snupkg
427 | # The packages folder can be ignored because of Package Restore
428 | **/[Pp]ackages/*
429 | # except build/, which is used as an MSBuild target.
430 | !**/[Pp]ackages/build/
431 | # Uncomment if necessary however generally it will be regenerated when needed
432 | #!**/[Pp]ackages/repositories.config
433 | # NuGet v3's project.json files produces more ignorable files
434 | *.nuget.props
435 | *.nuget.targets
436 | 
437 | # Microsoft Azure Build Output
438 | csx/
439 | *.build.csdef
440 | 
441 | # Microsoft Azure Emulator
442 | ecf/
443 | rcf/
444 | 
445 | # Windows Store app package directories and files
446 | AppPackages/
447 | BundleArtifacts/
448 | Package.StoreAssociation.xml
449 | _pkginfo.txt
450 | *.appx
451 | *.appxbundle
452 | *.appxupload
453 | 
454 | # Visual Studio cache files
455 | # files ending in .cache can be ignored
456 | *.[Cc]ache
457 | # but keep track of directories ending in .cache
458 | !?*.[Cc]ache/
459 | 
460 | # Others
461 | ClientBin/
462 | ~$*
463 | *~
464 | *.dbmdl
465 | *.dbproj.schemaview
466 | *.jfm
467 | *.pfx
468 | *.publishsettings
469 | orleans.codegen.cs
470 | 
471 | # Including strong name files can present a security risk
472 | # (https://github.com/github/gitignore/pull/2483#issue-259490424)
473 | #*.snk
474 | 
475 | # Since there are multiple workflows, uncomment next line to ignore bower_components
476 | # (https://github.com/github/gitignore/pull/1529#issuecomment-104372622)
477 | #bower_components/
478 | 
479 | # RIA/Silverlight projects
480 | Generated_Code/
481 | 
482 | # Backup & report files from converting an old project file
483 | # to a newer Visual Studio version. Backup files are not needed,
484 | # because we have git ;-)
485 | _UpgradeReport_Files/
486 | Backup*/
487 | UpgradeLog*.XML
488 | UpgradeLog*.htm
489 | ServiceFabricBackup/
490 | *.rptproj.bak
491 | 
492 | # SQL Server files
493 | *.mdf
494 | *.ldf
495 | *.ndf
496 | 
497 | # Business Intelligence projects
498 | *.rdl.data
499 | *.bim.layout
500 | *.bim_*.settings
501 | *.rptproj.rsuser
502 | *- [Bb]ackup.rdl
503 | *- [Bb]ackup ([0-9]).rdl
504 | *- [Bb]ackup ([0-9][0-9]).rdl
505 | 
506 | # Microsoft Fakes
507 | FakesAssemblies/
508 | 
509 | # GhostDoc plugin setting file
510 | *.GhostDoc.xml
511 | 
512 | # Node.js Tools for Visual Studio
513 | .ntvs_analysis.dat
514 | 
515 | # Visual Studio 6 build log
516 | *.plg
517 | 
518 | # Visual Studio 6 workspace options file
519 | *.opt
520 | 
521 | # Visual Studio 6 auto-generated workspace file (contains which files were open etc.)
522 | *.vbw
523 | 
524 | # Visual Studio LightSwitch build output
525 | **/*.HTMLClient/GeneratedArtifacts
526 | **/*.DesktopClient/GeneratedArtifacts
527 | **/*.DesktopClient/ModelManifest.xml
528 | **/*.Server/GeneratedArtifacts
529 | **/*.Server/ModelManifest.xml
530 | _Pvt_Extensions
531 | 
532 | # Paket dependency manager
533 | .paket/paket.exe
534 | paket-files/
535 | 
536 | # FAKE - F# Make
537 | .fake/
538 | 
539 | # CodeRush personal settings
540 | .cr/personal
541 | 
542 | # Python Tools for Visual Studio (PTVS)
543 | __pycache__/
544 | *.pyc
545 | 
546 | # Cake - Uncomment if you are using it
547 | # tools/**
548 | # !tools/packages.config
549 | 
550 | # Tabs Studio
551 | *.tss
552 | 
553 | # Telerik's JustMock configuration file
554 | *.jmconfig
555 | 
556 | # BizTalk build output
557 | *.btp.cs
558 | *.btm.cs
559 | *.odx.cs
560 | *.xsd.cs
561 | 
562 | # OpenCover UI analysis results
563 | OpenCover/
564 | 
565 | # Azure Stream Analytics local run output
566 | ASALocalRun/
567 | 
568 | # MSBuild Binary and Structured Log
569 | *.binlog
570 | 
571 | # NVidia Nsight GPU debugger configuration file
572 | *.nvuser
573 | 
574 | # MFractors (Xamarin productivity tool) working folder
575 | .mfractor/
576 | 
577 | # Local History for Visual Studio
578 | .localhistory/
579 | 
580 | # BeatPulse healthcheck temp database
581 | healthchecksdb
582 | 
583 | # Backup folder for Package Reference Convert tool in Visual Studio 2017
584 | MigrationBackup/
585 | 
586 | # Ionide (cross platform F# VS Code tools) working folder
587 | .ionide/
588 | 
589 | # End of https://www.toptal.com/developers/gitignore/api/vs,intellij+all,node
590 | .DS_Store
591 | 
592 | 
593 | .wwebjs_cache
594 | .session


--------------------------------------------------------------------------------
/.npmrc:
--------------------------------------------------------------------------------
1 | engine-strict=true


--------------------------------------------------------------------------------
/.prettierrc:
--------------------------------------------------------------------------------
 1 | {
 2 | 	"semi": true,
 3 | 	"tabWidth": 4,
 4 | 	"useTabs": true,
 5 | 	"printWidth": 140,
 6 | 	"singleQuote": false,
 7 | 	"trailingComma": "none",
 8 | 	"jsxBracketSameLine": true,
 9 | 	"bracketSameLine": true,
10 | 	"endOfLine": "lf"
11 | }
12 | 


--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM node:bullseye-slim
 2 | 
 3 | RUN apt update
 4 | # components for whatsapp-web.js (support no-gui systems)
 5 | RUN apt install -y gconf-service libgbm-dev libasound2 libatk1.0-0 libc6 libcairo2 libcups2 libdbus-1-3 libexpat1 libfontconfig1 libgcc1 libgconf-2-4 libgdk-pixbuf2.0-0 libglib2.0-0 libgtk-3-0 libnspr4 libpango-1.0-0 libpangocairo-1.0-0 libstdc++6 libx11-6 libx11-xcb1 libxcb1 libxcomposite1 libxcursor1 libxdamage1 libxext6 libxfixes3 libxi6 libxrandr2 libxrender1 libxss1 libxtst6 ca-certificates fonts-liberation libappindicator1 libnss3 lsb-release xdg-utils wget
 6 | RUN apt install -y chromium
 7 | 
 8 | # For transcription
 9 | RUN apt install -y ffmpeg
10 | ## It will install latest model of OpenAI Whisper (around 6~7 GB)
11 | ## Uncomment below command if you want to use the local version of transcription module
12 | # RUN pip install -y python pip
13 | # RUN pip install -U openai-whisper
14 | 
15 | WORKDIR /app/
16 | 
17 | ENV OPENAI_API_KEY ""
18 | ENV PREFIX_ENABLED ""
19 | 
20 | COPY package.json package-lock.json ./
21 | 
22 | RUN npm install
23 | RUN npm install vite-node
24 | 
25 | COPY . .
26 | 
27 | CMD ["npm", "run", "start"]
28 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # GPT + DALL-E + WhatsApp = AI Assistant 🚀
 2 | 
 3 | ![Docker](https://github.com/askrella/whatsapp-chatgpt/actions/workflows/docker.yml/badge.svg)
 4 | ![Prettier](https://github.com/askrella/whatsapp-chatgpt/actions/workflows/prettier.yml/badge.svg)
 5 | [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
 6 | 
 7 | [![Discord Invite](https://dcbadge.vercel.app/api/server/9VJaRXKwd3)](https://discord.gg/9VJaRXKwd3)
 8 | 
 9 | This WhatsApp bot uses OpenAI's GPT and DALL-E 2 to respond to user inputs.
10 | 
11 | You can talk to the bot in voice messages, the bot will transcribe and respond. :robot:
12 | 
13 | <p align="center">
14 | <img width="904" alt="Whatsapp ChatGPT" src="https://user-images.githubusercontent.com/6507938/220681521-17a12a41-44df-4d51-b491-f6a83871fc9e.png">
15 | </p>
16 | 
17 | ## Requirements
18 | 
19 | -   Node.js (18 or newer)
20 | -   A recent version of npm
21 | -   An [OpenAI API key](https://beta.openai.com/signup)
22 | -   A WhatsApp account
23 | 
24 | ## Documentation
25 | 
26 | In the documentation you can find more information about how to install, configure and use this bot.
27 | 
28 | <span style="font-size: 1.4rem;">➡️ https://askrella.github.io/whatsapp-chatgpt</span>
29 | 
30 | ## Disclaimer
31 | 
32 | The operations performed by this bot are not free. You will be charged by OpenAI for each request you make.
33 | 
34 | This bot uses Puppeteer to run a real instance of Whatsapp Web to avoid getting blocked.
35 | 
36 | NOTE: We can't guarantee that you won't be blocked using this method, although it does work. WhatsApp does not allow bots or unofficial clients on its platform, so this should not be considered completely safe.
37 | 
38 | ## Contributors
39 | 
40 | <a href="https://github.com/askrella/whatsapp-chatgpt/graphs/contributors">
41 |   <img src="https://contrib.rocks/image?repo=askrella/whatsapp-chatgpt" />
42 | </a>
43 | 
44 | ## Used libraries
45 | 
46 | -   https://github.com/transitive-bullshit/chatgpt-api
47 | -   https://github.com/pedroslopez/whatsapp-web.js
48 | -   https://github.com/askrella/speech-rest-api
49 | 


--------------------------------------------------------------------------------
/docker-compose.yml:
--------------------------------------------------------------------------------
 1 | version: "3"
 2 | services:
 3 |     whatsapp-chatgpt:
 4 |         container_name: whatsapp-chatgpt
 5 |         read_only: true
 6 |         image: ghcr.io/askrella/whatsapp-chatgpt:master
 7 |         environment:
 8 |             OPENAI_API_KEY: ""
 9 |             OPENAI_GPT_MODEL: ""
10 |             PREFIX_ENABLED: ""
11 |             SERPAPI_API_KEY: ""
12 |         restart: unless-stopped
13 |         volumes:
14 |             - session-data:/app/session
15 | volumes:
16 |     session-data:
17 | 


--------------------------------------------------------------------------------
/docs/.nojekyll:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/askrella/whatsapp-chatgpt/50a7611f3da2f9479509a6e150a6d25a6cfb91fb/docs/.nojekyll


--------------------------------------------------------------------------------
/docs/README.md:
--------------------------------------------------------------------------------
 1 | # GPT + DALL-E + WhatsApp = AI Assistant 🚀
 2 | 
 3 | ![Docker](https://github.com/askrella/whatsapp-chatgpt/actions/workflows/docker.yml/badge.svg)
 4 | ![Prettier](https://github.com/askrella/whatsapp-chatgpt/actions/workflows/prettier.yml/badge.svg)
 5 | [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
 6 | 
 7 | [![Discord Invite](https://dcbadge.vercel.app/api/server/9VJaRXKwd3)](https://discord.gg/9VJaRXKwd3)
 8 | 
 9 | This WhatsApp bot uses OpenAI's GPT and DALL-E 2 to respond to user inputs.
10 | 
11 | You can talk to the bot in voice messages, the bot will transcribe and respond. :robot:
12 | 
13 | <p align="center">
14 | <img width="904" alt="Whatsapp ChatGPT" src="https://user-images.githubusercontent.com/6507938/220681521-17a12a41-44df-4d51-b491-f6a83871fc9e.png">
15 | </p>
16 | 
17 | ## Requirements
18 | 
19 | -   Node.js (18 or newer)
20 | -   A recent version of npm
21 | -   An [OpenAI API key](https://beta.openai.com/signup)
22 | -   A WhatsApp account
23 | 
24 | ## Documentation
25 | 
26 | In the documentation you can find more information about how to install, configure and use this bot.
27 | 
28 | <span style="font-size: 1.4rem;">➡️ https://askrella.github.io/whatsapp-chatgpt</span>
29 | 
30 | ## Disclaimer
31 | 
32 | The operations performed by this bot are not free. You will be charged by OpenAI for each request you make.
33 | 
34 | This bot uses Puppeteer to run a real instance of Whatsapp Web to avoid getting blocked.
35 | 
36 | NOTE: We can't guarantee that you won't be blocked using this method, although it does work. WhatsApp does not allow bots or unofficial clients on its platform, so this should not be considered completely safe.
37 | 
38 | ## Contributors
39 | 
40 | <a href="https://github.com/askrella/whatsapp-chatgpt/graphs/contributors">
41 |   <img src="https://contrib.rocks/image?repo=askrella/whatsapp-chatgpt" />
42 | </a>
43 | 
44 | ## Used libraries
45 | 
46 | -   https://github.com/transitive-bullshit/chatgpt-api
47 | -   https://github.com/pedroslopez/whatsapp-web.js
48 | -   https://github.com/askrella/speech-rest-api
49 | 


--------------------------------------------------------------------------------
/docs/_sidebar.md:
--------------------------------------------------------------------------------
 1 | <!-- docs/_sidebar.md -->
 2 | 
 3 | -   [Home](/)
 4 | -   [Installation](pages/installation.md)
 5 |     -   [Docker](pages/docker.md)
 6 | -   [Usage](pages/usage.md)
 7 | -   [Configuration](pages/gpt.md)
 8 |     -   [GPT configuration](pages/gpt.md)
 9 |     -   [Langchain & Agents](pages/langchain.md)
10 |     -   [Configure Prefix](pages/configure-prefix.md)
11 | -   [Talk with the bot](pages/transcription.md)
12 |     -   [Transcription with OpenAI Whisper](pages/transcription.md)
13 |     -   [Text-To-Speech](pages/tts.md)
14 | -   [Send Messages to yourself](pages/send-messages-to-yourself.md)
15 | -   [Disclaimer](pages/disclaimer.md)
16 | 


--------------------------------------------------------------------------------
/docs/index.html:
--------------------------------------------------------------------------------
 1 | <!DOCTYPE html>
 2 | <html lang="en">
 3 | 	<head>
 4 | 		<meta charset="UTF-8" />
 5 | 		<title>WhatsApp AI Assistant 🚀</title>
 6 | 		<meta http-equiv="X-UA-Compatible" content="IE=edge,chrome=1" />
 7 | 		<meta name="description" content="This WhatsApp bot uses OpenAI's GPT and DALL-E 2 to respond to user inputs." />
 8 | 		<meta name="viewport" content="width=device-width, initial-scale=1.0, minimum-scale=1.0" />
 9 | 		<link rel="stylesheet" href="//cdn.jsdelivr.net/npm/docsify@4/lib/themes/vue.css" />
10 | 	</head>
11 | 	<body>
12 | 		<div id="app"></div>
13 | 		<script>
14 | 			window.$docsify = {
15 | 				name: "WhatsApp ChatGPT",
16 | 				repo: "https://github.com/askrella/whatsapp-chatgpt",
17 | 				loadSidebar: true
18 | 			};
19 | 		</script>
20 | 		<!-- Docsify v4 -->
21 | 		<script src="//cdn.jsdelivr.net/npm/docsify@4"></script>
22 | 	</body>
23 | </html>
24 | 


--------------------------------------------------------------------------------
/docs/pages/configure-prefix.md:
--------------------------------------------------------------------------------
 1 | # Configure Prefix
 2 | 
 3 | ## Disable prefix
 4 | 
 5 | You can disable the `!gpt`/`!dalle`/`!sd`/`!config` prefix by setting `PREFIX_ENABLED` to `false` in the `.env` file.<br/>
 6 | 
 7 | If you disable the prefix, the bot will not support DALL-E and Stable Diffusion, only GPT will be used.
 8 | 
 9 | ## Set own prefixes
10 | 
11 | You can set your own prefixes for ChatGPT, DALL-E and configuration in the `.env` file.
12 | 
13 | ```
14 | GPT_PREFIX=!gpt
15 | DALLE_PREFIX=!dalle
16 | STABLE_DIFFUSION_PREFIX=!sd
17 | AI_CONFIG_PREFIX=!config
18 | ```
19 | 


--------------------------------------------------------------------------------
/docs/pages/disclaimer.md:
--------------------------------------------------------------------------------
 1 | # Disclaimer
 2 | 
 3 | The operations performed by this bot are not free. You will be charged by OpenAI for each request you make.
 4 | 
 5 | This bot uses Puppeteer to run a real instance of Whatsapp Web to avoid getting blocked.
 6 | 
 7 | NOTE: We can't guarantee that you won't be blocked using this method, although it does work.
 8 | 
 9 | WhatsApp does not allow bots or unofficial clients on its platform, so this should not be considered completely safe.
10 | 


--------------------------------------------------------------------------------
/docs/pages/docker.md:
--------------------------------------------------------------------------------
1 | # Docker
2 | 
3 | Make sure to edit the `docker-compose.yml` file and set your own variables there.
4 | 
5 | ```sh
6 | sudo docker-compose up
7 | ```
8 | 


--------------------------------------------------------------------------------
/docs/pages/gpt.md:
--------------------------------------------------------------------------------
 1 | # GPT
 2 | 
 3 | ## Model
 4 | 
 5 | You can specify the model which should be used with the `OPENAI_MODEL` environment variabl
 6 | 
 7 | ```bash
 8 | OPENAI_MODEL=gpt-3.5-turbo # or gpt-4
 9 | ```
10 | 
11 | ## Configuration
12 | 
13 | You can modify the max model tokens by setting the `MAX_MODEL_TOKENS` environment variable. For example:
14 | 
15 | ```bash
16 | MAX_MODEL_TOKENS=2000
17 | ```
18 | 
19 | ## What are tokens and how to count them?
20 | 
21 | https://help.openai.com/en/articles/4936856-what-are-tokens-and-how-to-count-them
22 | 
23 | ## Pre Prompt
24 | 
25 | You can configure a pre prompt which is executed after creating a new conversation.
26 | 
27 | To do that, use the `PRE_PROMPT` environment variable. For example:
28 | 
29 | ```bash
30 | PRE_PROMPT=Act very funny and overreact to messages. Do that for every message you get, forever.
31 | ```
32 | 
33 | ## Groupchats
34 | 
35 | You can enable the bot to interact on groupchats by setting the `GROUPCHATS_ENABLED` environment variable to `true`. For example:
36 | 
37 | ```bash
38 | GROUPCHATS_ENABLED=true
39 | ```
40 | 
41 | ## Prompt Moderation
42 | 
43 | You can configure a prompt moderation, which will be executed before sending the prompt to GPT.
44 | This way, you can filter out prompts before sending them to GPT.
45 | This is achieved by using the [OpenAI Moderation API](https://beta.openai.com/docs/api-reference/moderations).
46 | 
47 | To enable it, use the `PROMPT_MODERATION_ENABLED` environment variable. For example:
48 | 
49 | ```bash
50 | PROMPT_MODERATION_ENABLED=true
51 | ```
52 | 
53 | You can also configure the blacklisted categories, which will be used to filter the prompt moderation.
54 | 
55 | To do that, use the `PROMPT_MODERATION_BLACKLISTED_CATEGORIES` environment variable. For example:
56 | 
57 | ```bash
58 | PROMPT_MODERATION_BLACKLISTED_CATEGORIES = ["hate","hate/threatening","self-harm","sexual","sexual/minors","violence","violence/graphic"]
59 | ```
60 | 
61 | You can see all available categories [here](https://beta.openai.com/docs/api-reference/moderations).
62 | 
63 | Please, keep in mind that disabling the prompt moderation or modifying the blacklisted categories, will not disable the moderation of the GPT API. Because OpenAI uses their own moderation, which is not configurable.
64 | 
65 | ## Rate Limit
66 | 
67 | https://platform.openai.com/docs/guides/rate-limits
68 | 
69 | If you are with heavy usage, you might run into the rate limit of Open API. Since the rate limit is on organization level, you could create another account and get a new API key separately. And then setting the keys into environment variables `OPENAI_API_KEYS`. API keys will be used in a random basis.
70 | 


--------------------------------------------------------------------------------
/docs/pages/installation.md:
--------------------------------------------------------------------------------
 1 | # Installation
 2 | 
 3 | 1. Clone this repository
 4 | 2. Install the required packages by running `npm install`
 5 | 3. Put your OpenAI API key into the `.env` file
 6 |     - Example file: [.env-example](https://github.com/askrella/whatsapp-chatgpt/blob/master/.env-example)
 7 |     - You can obtain an API key [here](https://platform.openai.com/account/api-keys)
 8 | 4. Run the bot using `npm run start`
 9 | 5. Scan the QR code with WhatsApp (link a device)
10 | 6. Now you're ready to go! People can send you messages, and the bot will respond to them
11 | 


--------------------------------------------------------------------------------
/docs/pages/langchain.md:
--------------------------------------------------------------------------------
 1 | # LangChain
 2 | 
 3 | ## About
 4 | 
 5 | Use this handler to allow GPT to interact with other sources of data, ie. the internet, or different mediums like pdfs and images. Ideally the user doesn't have to differentiate between GPT instances that can and cannot use external data sources, but we'll keep them separate for ease of implementation for now.
 6 | 
 7 | ## Example
 8 | 
 9 | In the following example, GPT uses `SerpAPI` as a tool to access Google Search API. You can use `RequestsGetTool` and parse the HTML if you don't have a [SerpAPI](https://serpapi.com/) API key.
10 | 
11 | > !lang nba game april 11st 2023
12 | >
13 | > // Uses SerpAPI or RequestsGetTool to access the search engine, parse results in either JSON or HTML, and have GPT interpret the best answer for the prompt.
14 | > "The result of the NBA games on April 11st 2023 is Minnesota Timberwolves vs Los Angeles Lakers"
15 | 
16 | ## Tools
17 | 
18 | Abstractions for GPT to interact with to interact with external data sources. For example, both `RequestGetTools` and `SerpAPI` allows GPT to access the internet.
19 | 
20 | See other tools in the LangChain [Tools section](https://js.langchain.com/docs/modules/agents/tools/).
21 | 
22 | ## References
23 | 
24 | -   [LangChain in JS](https://js.langchain.com/docs/)
25 | -   [LangChain in Python](https://python.langchain.com/en/latest/index.html)
26 | 


--------------------------------------------------------------------------------
/docs/pages/send-messages-to-yourself.md:
--------------------------------------------------------------------------------
 1 | # Send messages to yourself
 2 | 
 3 | You can also use the bot to send messages to yourself.
 4 | 
 5 | Use this WhatsApp link: https://wa.me/your_phone_number.
 6 | 
 7 | Replace `your_phone_number` with your phone number, including the country code. (e.g. +11234567890)
 8 | 
 9 | The URL above will take you to your own chat window.
10 | 


--------------------------------------------------------------------------------
/docs/pages/transcription.md:
--------------------------------------------------------------------------------
  1 | # Transcription (EXPERIMENTAL)
  2 | 
  3 | The transcription feature allows you to use your voice to interact with the bot.
  4 | It's a great way to use the bot without having to type anything.
  5 | 
  6 | You can enable it by setting `TRANSCRIPTION_ENABLED=true` in your `.env` file.
  7 | 
  8 | There are multiple modes available:
  9 | 
 10 | -   `local`
 11 | -   `openai`
 12 | -   `speech-api`
 13 | -   `whisper-api`
 14 | 
 15 | # Transcription Modes
 16 | 
 17 | ## Local
 18 | 
 19 | For the local mode you need to have [whisper](https://github.com/openai/whisper) installed on your machine.
 20 | 
 21 | With local mode the voice messages will be transcribed on your machine. Best for privacy.
 22 | 
 23 | You need to install Python:
 24 | 
 25 | -   https://www.python.org/downloads/
 26 | 
 27 | Check out the whisper installation guide here:
 28 | 
 29 | -   https://github.com/openai/whisper#setup
 30 | 
 31 | Use the following environment variable to enable the local mode:
 32 | 
 33 | ```bash
 34 | TRANSCRIPTION_MODE=local
 35 | ```
 36 | 
 37 | ## Using A Remote Transcription API
 38 | 
 39 | You might use an external API to turn audio into text, the voice messages are processed on the server and not on your machine.
 40 | 
 41 | ## Open AI (Whisper)
 42 | 
 43 | To use the official Open AI transcription endpoint based on large-v2 Whisper model, you will need to ensure that you have the `OPENAI_API_KEY` environment variable set.
 44 | 
 45 | If you already have this set, you can proceed to set the `TRANSCRIPTION_MODE` environment variable:
 46 | 
 47 | ```bash
 48 | TRANSCRIPTION_MODE=openai
 49 | ```
 50 | 
 51 | The transcribed language is usually detected automatically, but if you want to ensure accurate language detection, you can set the environment variable `TRANSCRIPTION_LANGUAGE` to the desired language (for example, "English" for English, see [Supported Languages](https://github.com/openai/whisper#available-models-and-languages) for the full list).
 52 | 
 53 | ```bash
 54 | TRANSCRIPTION_LANGUAGE=English
 55 | ```
 56 | 
 57 | Remarks:
 58 | 
 59 | -   Please note that this endpoint has a file size limit of 25 MB, so it is recommended to avoid transcribing long audio files.
 60 | 
 61 | ## Speech-API
 62 | 
 63 | The Speech API is a REST API that converts your voice messages to text. The voice messages are processed on the server and not on your machine.
 64 | 
 65 | The Speech API doesn't store the voice messages permanently. It's open source and you can host it yourself.
 66 | 
 67 | You can find the source code here:
 68 | 
 69 | -   https://github.com/askrella/speech-rest-api
 70 | 
 71 | If you want use the Speech API mode you need to set the following environment variable:
 72 | 
 73 | ```bash
 74 | TRANSCRIPTION_MODE=speech-api
 75 | ```
 76 | 
 77 | By default the bot will use our hosted Speech API (for free). You can change the URL by setting the following environment variable:
 78 | 
 79 | ```bash
 80 | SPEECH_API_URL=<your-speech-api-url>
 81 | ```
 82 | 
 83 | ## Whisper API
 84 | 
 85 | The Whisper API is a REST API provided by AssemblyAI that is capable of converting voice messages into text. The voice messages are processed on the server, rather than on your own machine.
 86 | 
 87 | If you wish to use the Whisper API mode, you will need to set the environment variable
 88 | 
 89 | ```bash
 90 | TRANSCRIPTION_MODE=whisper-api
 91 | ```
 92 | 
 93 | To use the API, you must first sign up and obtain an API key from:
 94 | 
 95 | -   https://whisperapi.com/
 96 | 
 97 | The transcribed language is usually detected automatically, but if you want to ensure accurate language detection, you can set the environment variable `TRANSCRIPTION_LANGUAGE` to the desired language (for example, "en" for English).
 98 | 
 99 | ```
100 | TRANSCRIPTION_LANGUAGE=en
101 | ```
102 | 


--------------------------------------------------------------------------------
/docs/pages/tts.md:
--------------------------------------------------------------------------------
 1 | # Text-To-Speech (EXPERIMENTAL)
 2 | 
 3 | The TTS feature allows the bot to answer with voice messages instead of text messages. You can actually talk to the bot.
 4 | 
 5 | You can enable it by setting the following environment variable:
 6 | 
 7 | ```bash
 8 | TTS_ENABLED=true
 9 | ```
10 | 
11 | By default, when TTS is enabled, the bot will answer two messages: the text response and the audio response.
12 | 
13 | You can disable the text response by changing the following environment variable:
14 | 
15 | ```bash
16 | TTS_TRANSCRIPTION_RESPONSE_ENABLED=true
17 | ```
18 | 
19 | ## Supported Providers
20 | 
21 | -   [Speech API](#speech-api)
22 | -   [AWS Polly](#aws-polly)
23 | 
24 | ## Speech API
25 | 
26 | This feature will use the Speech API to convert the GPT response to voice. It's open source and you can host it yourself.
27 | 
28 | You can find the source code here:
29 | 
30 | -   https://github.com/askrella/speech-rest-api
31 | 
32 | By default the bot will use our hosted Speech API (for free). You can change the URL by setting the following environment variables:
33 | 
34 | ```bash
35 | SPEECH_API_URL=<your-speech-api-url>
36 | TTS_MODE=speech-api
37 | ```
38 | 
39 | ## AWS Polly
40 | 
41 | You can use Amazon Web Services Polly to convert the GPT response to voice.
42 | 
43 | You can find the official documentation here:
44 | 
45 | -   https://docs.aws.amazon.com/polly/latest/dg/what-is.html
46 | 
47 | You can enable this service by setting the following environment variables:
48 | 
49 | ```bash
50 | TTS_ENABLED=true
51 | TTS_PROVIDER=aws-polly
52 | AWS_ACCESS_KEY_ID=<your-aws-access-key-id>
53 | AWS_SECRET_ACCESS_KEY=<your-aws-secret-access-key>
54 | AWS_REGION=<your-aws-region>
55 | AWS_POLLY_VOICE_ID=<your-aws-polly-voice-id>
56 | AWS_POLLY_VOICE_ENGINE=<your-aws-polly-voice-engine>
57 | ```
58 | 
59 | The provided AWS credentials must have the `polly:SynthesizeSpeech` permission.
60 | 
61 | You can find the list of available regions here:
62 | 
63 | -   https://docs.aws.amazon.com/general/latest/gr/rande.html#polly_region
64 | 
65 | You can find the list of available voices here:
66 | 
67 | -   https://docs.aws.amazon.com/polly/latest/dg/voicelist.html
68 | 
69 | And the list of available engines here:
70 | 
71 | -   https://docs.aws.amazon.com/polly/latest/dg/engines.html
72 | 
73 | Keep in mind that the AWS Polly service is not free. You will be charged for the usage, so make sure to check the pricing before enabling it.
74 | 


--------------------------------------------------------------------------------
/docs/pages/usage.md:
--------------------------------------------------------------------------------
 1 | # Usage
 2 | 
 3 | To use the bot, simply send a message with the `!gpt`/`!dalle`/`!sd`/`!config` command followed by your prompt. For example:
 4 | 
 5 | ### GPT
 6 | 
 7 | ```
 8 | !gpt What is the meaning of life?
 9 | ```
10 | 
11 | ### DALLE
12 | 
13 | ```
14 | !dalle A frog with a red hat is walking on a bridge.
15 | ```
16 | 
17 | ### Stable Diffusion
18 | 
19 | ```
20 | !sd A frog with a red hat is walking on a bridge.
21 | ```
22 | 
23 | It is using huggingface's stable diffusion model for image rendering, you might change the model with `!config sd setModel <model>` command.
24 | 
25 | ### AI Config
26 | 
27 | To modify the bot's configuration, you can use the `!config` command. Run `!config help` for detail:
28 | 
29 | ```
30 | Available commands:
31 | 	!config dalle size <value> - Set dalle size to <value>
32 | 	!config chat id - Get the ID of the chat
33 | 	!config general settings - Get current settings
34 | 	!config general whitelist <value> - Set whitelisted phone numbers
35 | 	!config gpt apiKey <value> - Set token pool, support multiple tokens with comma-separated
36 | 	!config gpt maxModelTokens <value> - Set max model tokens value
37 | 	!config transcription enabled <value> - Toggle if transcription is enabled
38 | 	!config transcription mode <value> - Set transcription mode
39 | 	!config tts enabled <value> - Toggle if TTS is enabled
40 | 	!config sd setModel <value> - Set the model to be used of Stable Diffusion (with huggingface)
41 | 
42 | Available values:
43 | 	dalle size: 256x256, 512x512, 1024x1024
44 | 	gpt apiKey: sk-xxxx,sk-xxxx
45 | 	gpt maxModelTokens: integer
46 | 	transcription enabled: true, false
47 | 	transcription mode: local, speech-api, whisper-api, openai
48 | 	tts enabled: true, false
49 | 	sd setModel: runwayml/stable-diffusion-v1-5
50 | ```
51 | 


--------------------------------------------------------------------------------
/installer.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # Check if user has root/sudo access
 4 | if [[ $(id -u) -ne 0 ]]; then
 5 |   echo "This script must be run as root or with sudo."
 6 |   exit 1
 7 | fi
 8 | 
 9 | # Check user's operating system
10 | os=$(uname -s)
11 | case $os in
12 |   Linux)
13 |     # Install required packages using package manager
14 |     if command -v apt-get &> /dev/null; then
15 |       echo "Installing packages using apt-get..."
16 |       apt-get update
17 |       echo "Installing latest version of docker..."
18 |       curl -fsSL https://get.docker.com -o get-docker.sh
19 |       sh get-docker.sh
20 |       apt install docker-compose
21 |       echo "Packages installed successfully."
22 |     elif command -v yum &> /dev/null; then
23 |       echo "Installing packages using yum..."
24 |       yum update
25 |       echo "Installing latest version of docker..."
26 |       curl -fsSL https://get.docker.com -o get-docker.sh
27 |       sh get-docker.sh
28 |       yum install -y git docker-compose
29 |       echo "Packages installed successfully."
30 |     else
31 |       echo "Unsupported package manager."
32 |       exit 1
33 |     fi
34 |     ;;
35 |   *)
36 |     echo "Unsupported operating system."
37 |     exit 1
38 |     ;;
39 | esac
40 | 
41 | # Clone Git repo and run Docker Compose
42 | echo "Cloning Git repo..."
43 | git clone https://github.com/askrella/whatsapp-chatgpt.git
44 | cd repo
45 | 
46 | # Prompt user for API key
47 | read -p "Enter your OpenAI API key: " api_key
48 | 
49 | # Replace API key variable in Docker Compose file
50 | sed -i "s/OPENAI_API_KEY:.*/OPENAI_API_KEY: \"$api_key\"/g" docker-compose.yml
51 | 
52 | # Start Docker Compose
53 | echo "Starting Docker containers..."
54 | docker-compose up -d
55 | echo "Docker containers started successfully."
56 | 


--------------------------------------------------------------------------------
/package.json:
--------------------------------------------------------------------------------
 1 | {
 2 | 	"name": "whatsapp-chatgpt",
 3 | 	"version": "1.0.0",
 4 | 	"description": "Whatsapp bot that uses OpenAI's GPT & DALLE to respond to user inputs",
 5 | 	"main": "src/index.ts",
 6 | 	"author": "Askrella Software Agency UG (haftungsbeschränkt)",
 7 | 	"scripts": {
 8 | 		"start": "npx vite-node src/index.ts",
 9 | 		"prettier": "prettier --write ./src"
10 | 	},
11 | 	"license": "MIT",
12 | 	"dependencies": {
13 | 		"aws-sdk": "^2.1649.0",
14 | 		"chatgpt": "^5.2.5",
15 | 		"dotenv": "^16.3.1",
16 | 		"fetch-blob": "^4.0.0",
17 | 		"ffmpeg": "^0.0.4",
18 | 		"langchain": "^0.0.156",
19 | 		"openai": "^4.52.1",
20 | 		"picocolors": "^1.0.0",
21 | 		"qrcode": "^1.5.3",
22 | 		"whatsapp-web.js": "^1.25.0"
23 | 	},
24 | 	"engines": {
25 | 		"node": ">=18.0.0"
26 | 	},
27 | 	"devDependencies": {
28 | 		"@types/qrcode": "^1.5.2",
29 | 		"prettier": "^3.0.3"
30 | 	}
31 | }
32 | 


--------------------------------------------------------------------------------
/src/cli/ui.ts:
--------------------------------------------------------------------------------
 1 | import color from "picocolors";
 2 | 
 3 | export const print = (text: string) => {
 4 | 	console.log(color.green("◇") + "  " + text);
 5 | };
 6 | 
 7 | export const printError = (text: string) => {
 8 | 	console.log(color.red("◇") + "  " + text);
 9 | };
10 | 
11 | export const printIntro = () => {
12 | 	console.log("");
13 | 	console.log(color.bgCyan(color.white(" Whatsapp ChatGPT & DALL-E ")));
14 | 	console.log("|-------------------------------------------------------------------------------------------------|");
15 | 	console.log("| A Whatsapp bot that uses OpenAI's ChatGPT and DALL-E to generate text and images from a prompt. |");
16 | 	console.log("|-------------------------------------------------------------------------------------------------|");
17 | 	console.log("");
18 | };
19 | 
20 | export const printQRCode = (qr: string) => {
21 | 	console.log(qr);
22 | 	console.log("Scan the QR code above to login to Whatsapp Web...");
23 | };
24 | 
25 | export const printLoading = () => {
26 | 	console.log("Loading...");
27 | };
28 | 
29 | export const printAuthenticated = () => {
30 | 	console.log("Authenticated, session started!");
31 | };
32 | 
33 | export const printAuthenticationFailure = () => {
34 | 	console.log("Authentication failed!");
35 | };
36 | 
37 | export const printOutro = () => {
38 | 	console.log("");
39 | 	console.log("The bot is ready to use.");
40 | 	console.log("To get started, send a message to the bot with the prompt you want to use.");
41 | 	console.log("Use the prefix '!gpt' if configured that way.");
42 | };
43 | 


--------------------------------------------------------------------------------
/src/commands/chat.ts:
--------------------------------------------------------------------------------
 1 | import { ICommandModule, ICommandDefinition, ICommandsMap } from "../types/commands";
 2 | import { Message } from "whatsapp-web.js";
 3 | 
 4 | export const ChatModule: ICommandModule = {
 5 | 	key: "chat",
 6 | 	register: (): ICommandsMap => {
 7 | 		return {
 8 | 			id
 9 | 		};
10 | 	}
11 | };
12 | 
13 | const id: ICommandDefinition = {
14 | 	help: "- Get the ID of the chat",
15 | 	execute: (message: Message) => {
16 | 		message.reply(message.to);
17 | 	}
18 | };
19 | 


--------------------------------------------------------------------------------
/src/commands/general.ts:
--------------------------------------------------------------------------------
 1 | import { ICommandModule, ICommandDefinition, ICommandsMap } from "../types/commands";
 2 | import { Message } from "whatsapp-web.js";
 3 | import { config } from "../config";
 4 | import { aiConfigTarget, aiConfigTypes, aiConfigValues, IAiConfig } from "../types/ai-config";
 5 | import { aiConfig, getConfig } from "../handlers/ai-config";
 6 | 
 7 | export const GeneralModule: ICommandModule = {
 8 | 	key: "general",
 9 | 	register: (): ICommandsMap => {
10 | 		return {
11 | 			settings,
12 | 			whitelist
13 | 		};
14 | 	}
15 | };
16 | 
17 | const settings: ICommandDefinition = {
18 | 	help: "- Get current settings",
19 | 	execute: function (message: Message) {
20 | 		const selfNotedMessage = message.fromMe && message.hasQuotedMsg === false && message.from === message.to;
21 | 		if (!selfNotedMessage) {
22 | 			// Only allow printing out the settings on self-noted for security reasons
23 | 			return;
24 | 		}
25 | 
26 | 		let response = "Runtime settings:";
27 | 		for (let module in aiConfig.commandsMap) {
28 | 			for (let command in aiConfig.commandsMap[module]) {
29 | 				if (aiConfig.commandsMap[module][command].data === undefined) {
30 | 					continue;
31 | 				}
32 | 				let val;
33 | 				if (typeof aiConfig.commandsMap[module][command].data === "function") {
34 | 					val = aiConfig.commandsMap[module][command].data();
35 | 				} else {
36 | 					val = aiConfig.commandsMap[module][command].data;
37 | 				}
38 | 				response += `\n${module} ${command}: ${val}`;
39 | 			}
40 | 		}
41 | 
42 | 		response += `\n\nStatic settings:`;
43 | 
44 | 		for (let target in aiConfigTarget) {
45 | 			for (let type in aiConfigTypes[target]) {
46 | 				response += `\n${target} ${type}: ${aiConfig[target][type]}`;
47 | 			}
48 | 		}
49 | 
50 | 		// Whitelisted fields from config
51 | 		[
52 | 			"openAIModel",
53 | 			"prePrompt",
54 | 			"gptPrefix",
55 | 			"dallePrefix",
56 | 			"stableDiffusionPrefix",
57 | 			"resetPrefix",
58 | 			"groupchatsEnabled",
59 | 			"promptModerationEnabled",
60 | 			"promptModerationBlacklistedCategories",
61 | 			"ttsMode"
62 | 		].forEach((field) => {
63 | 			response += `\n${field}: ${config[field]}`;
64 | 		});
65 | 		message.reply(response);
66 | 	}
67 | };
68 | 
69 | const whitelist: ICommandDefinition = {
70 | 	help: "<value> - Set whitelisted phone numbers",
71 | 	data: config.whitelistedPhoneNumbers,
72 | 	execute: function (message: Message, value?: string) {
73 | 		if (!value) {
74 | 			message.reply(`Invalid value, please give a comma-separated list of phone numbers.`);
75 | 			return;
76 | 		}
77 | 		this.data = value.split(",");
78 | 		message.reply(`Updated whitelist phone numbers to ${this.data}`);
79 | 	}
80 | };
81 | 


--------------------------------------------------------------------------------
/src/commands/gpt.ts:
--------------------------------------------------------------------------------
 1 | import { ICommandModule, ICommandDefinition, ICommandsMap } from "../types/commands";
 2 | import { Message } from "whatsapp-web.js";
 3 | import { config } from "../config";
 4 | import { initOpenAI } from "../providers/openai";
 5 | 
 6 | export const GptModule: ICommandModule = {
 7 | 	key: "gpt",
 8 | 	register: (): ICommandsMap => {
 9 | 		return {
10 | 			apiKey,
11 | 			maxModelTokens
12 | 		};
13 | 	}
14 | };
15 | 
16 | const apiKey: ICommandDefinition = {
17 | 	help: "<value> - Set token pool, support multiple tokens with comma-separated",
18 | 	hint: "sk-xxxx,sk-xxxx",
19 | 	data: () => {
20 | 		// Randomly pick an API key
21 | 		return config.openAIAPIKeys[Math.floor(Math.random() * config.openAIAPIKeys.length)];
22 | 	},
23 | 	execute: function (message: Message, valueStr?: string) {
24 | 		if (!valueStr) {
25 | 			message.reply(`Invalid value, please give a comma-separated string of OpenAI api keys.`);
26 | 			return;
27 | 		}
28 | 		config.openAIAPIKeys = valueStr.split(",") as string[];
29 | 		message.reply(`Updated API keys, total keys: ${config.openAIAPIKeys.length}`);
30 | 	}
31 | };
32 | 
33 | const maxModelTokens: ICommandDefinition = {
34 | 	help: "<value> - Set max model tokens value",
35 | 	hint: "integer",
36 | 	data: config.maxModelTokens,
37 | 	execute: function (message: Message, valueStr?: string) {
38 | 		const value = parseInt(valueStr || "");
39 | 		if (!value || isNaN(value)) {
40 | 			message.reply(`Invalid value, please give an integer value`);
41 | 			return;
42 | 		}
43 | 		this.data = value;
44 | 		initOpenAI();
45 | 		message.reply(`Updated max model tokens to ${this.data}`);
46 | 	}
47 | };
48 | 


--------------------------------------------------------------------------------
/src/commands/stable-diffusion.ts:
--------------------------------------------------------------------------------
 1 | import { ICommandModule, ICommandDefinition, ICommandsMap } from "../types/commands";
 2 | import { Message, MessageMedia } from "whatsapp-web.js";
 3 | import * as cli from "../cli/ui";
 4 | 
 5 | export const StableDiffusionModule: ICommandModule = {
 6 | 	key: "sd",
 7 | 	register: (): ICommandsMap => {
 8 | 		return {
 9 | 			setModel,
10 | 			generate
11 | 		};
12 | 	}
13 | };
14 | 
15 | let model = "runwayml/stable-diffusion-v1-5";
16 | 
17 | const setModel: ICommandDefinition = {
18 | 	help: "<value> - Set the model to be used of Stable Diffusion (with huggingface)",
19 | 	hint: "runwayml/stable-diffusion-v1-5",
20 | 	data: model,
21 | 	execute: function (message: Message, valueStr?: string) {
22 | 		if (!valueStr) {
23 | 			message.reply(`Invalid value, please give a model name.`);
24 | 			return;
25 | 		}
26 | 		this.data = valueStr;
27 | 		model = valueStr;
28 | 		message.reply(`Updated model to ${this.data}`);
29 | 	}
30 | };
31 | 
32 | const generate: ICommandDefinition = {
33 | 	help: "<prompt> - Given the prompt, generate an image using Stable Diffusion (with huggingface)",
34 | 	hint: 'A magical and adventurous story about "The Littlest Pudu."',
35 | 	execute: async (message: Message, valueStr?: string) => {
36 | 		try {
37 | 			const start = Date.now();
38 | 
39 | 			cli.print(`[Stable Diffusion] Received prompt from ${message.from}: ${valueStr}`);
40 | 
41 | 			const huggingFaceAPIToken = process.env.HUGGINGFACE_API_TOKEN;
42 | 
43 | 			if (!huggingFaceAPIToken) {
44 | 				throw new Error("[Stable Diffusion] Huggingface API token not found, set the HUGGINGFACE_API_TOKEN environment variable");
45 | 			}
46 | 
47 | 			const url = `https://api-inference.huggingface.co/models/${model}`;
48 | 			const options = {
49 | 				method: "POST",
50 | 				headers: {
51 | 					"Content-Type": "application/json",
52 | 					Authorization: `Bearer ${huggingFaceAPIToken}`
53 | 				},
54 | 				body: JSON.stringify({
55 | 					inputs: valueStr,
56 | 					options: {
57 | 						wait_for_model: true
58 | 					}
59 | 				})
60 | 			};
61 | 			const response = await fetch(url, options);
62 | 			const end = Date.now() - start;
63 | 			const imageBlob = await response.blob();
64 | 			const contentType = response.headers.get("Content-Type") || "image/jpeg";
65 | 			const buffer = Buffer.from(await imageBlob.arrayBuffer());
66 | 			const image = new MessageMedia(contentType, buffer.toString("base64"));
67 | 
68 | 			cli.print(`[Stable Diffusion] Answer to ${message.from} | Huggingface request took ${end}ms`);
69 | 
70 | 			message.reply(image);
71 | 		} catch (error: any) {
72 | 			console.error("An error occurred", error);
73 | 			message.reply("An error occurred, please contact the administrator. (" + error.message + ")");
74 | 		}
75 | 	}
76 | };
77 | 


--------------------------------------------------------------------------------
/src/commands/transcription.ts:
--------------------------------------------------------------------------------
 1 | import { ICommandModule, ICommandDefinition, ICommandsMap } from "../types/commands";
 2 | import { Message } from "whatsapp-web.js";
 3 | import { config } from "../config";
 4 | import { TranscriptionMode } from "../types/transcription-mode";
 5 | 
 6 | export const TranscriptionModule: ICommandModule = {
 7 | 	key: "transcription",
 8 | 	register: (): ICommandsMap => {
 9 | 		return {
10 | 			enabled,
11 | 			mode
12 | 		} as ICommandsMap;
13 | 	}
14 | };
15 | 
16 | const enabled: ICommandDefinition = {
17 | 	help: "<value> - Toggle if transcription is enabled",
18 | 	hint: "true, false",
19 | 	data: config.transcriptionEnabled,
20 | 	execute: function (message: Message, valueStr?: string) {
21 | 		if (["true", "false"].indexOf(valueStr || "") < 0) {
22 | 			message.reply(`Invalid value, please specify true or false`);
23 | 			return;
24 | 		}
25 | 		this.data = valueStr == "true";
26 | 		message.reply(`Updated transcription enabled to ${this.data}`);
27 | 	}
28 | };
29 | 
30 | const mode: ICommandDefinition = {
31 | 	help: "<value> - Set transcription mode",
32 | 	hint: Object.values(TranscriptionMode),
33 | 	data: config.transcriptionMode,
34 | 	execute: function (message: Message, valueStr?: string) {
35 | 		if ((Object.values(TranscriptionMode) as string[]).indexOf(valueStr || "") < 0) {
36 | 			message.reply(`Invalid value, available modes are: ${Object.values(TranscriptionMode).join(", ")}`);
37 | 			return;
38 | 		}
39 | 		this.data = valueStr;
40 | 		message.reply(`Updated transcription mode to ${this.data}`);
41 | 	}
42 | };
43 | 


--------------------------------------------------------------------------------
/src/commands/tts.ts:
--------------------------------------------------------------------------------
 1 | import { ICommandModule, ICommandDefinition, ICommandsMap } from "../types/commands";
 2 | import { Message } from "whatsapp-web.js";
 3 | import { config } from "../config";
 4 | 
 5 | export const TTSModule: ICommandModule = {
 6 | 	key: "tts",
 7 | 	register: (): ICommandsMap => {
 8 | 		return {
 9 | 			enabled
10 | 		};
11 | 	}
12 | };
13 | 
14 | const enabled: ICommandDefinition = {
15 | 	help: "<value> - Toggle if TTS is enabled",
16 | 	hint: "true, false",
17 | 	data: config.ttsEnabled,
18 | 	execute: function (message: Message, valueStr?: string) {
19 | 		if (["true", "false"].indexOf(valueStr || "") < 0) {
20 | 			message.reply(`Invalid value, please specify true or false`);
21 | 			return;
22 | 		}
23 | 		this.data = valueStr == "true";
24 | 		message.reply(`Updated TTS enabled to ${this.data}`);
25 | 	}
26 | };
27 | 


--------------------------------------------------------------------------------
/src/config.ts:
--------------------------------------------------------------------------------
  1 | import process from "process";
  2 | 
  3 | import { TranscriptionMode } from "./types/transcription-mode";
  4 | import { TTSMode } from "./types/tts-mode";
  5 | import { AWSPollyEngine } from "./types/aws-polly-engine";
  6 | 
  7 | // Environment variables
  8 | import dotenv from "dotenv";
  9 | dotenv.config();
 10 | 
 11 | // Config Interface
 12 | interface IConfig {
 13 | 	// Access control
 14 | 	whitelistedPhoneNumbers: string[];
 15 | 	whitelistedEnabled: boolean;
 16 | 	// OpenAI
 17 | 	openAIModel: string;
 18 | 	openAIAPIKeys: string[];
 19 | 	maxModelTokens: number;
 20 | 	prePrompt: string | undefined;
 21 | 
 22 | 	// Prefix
 23 | 	prefixEnabled: boolean;
 24 | 	prefixSkippedForMe: boolean;
 25 | 	gptPrefix: string;
 26 | 	dallePrefix: string;
 27 | 	stableDiffusionPrefix: string;
 28 | 	langChainPrefix: string;
 29 | 	resetPrefix: string;
 30 | 	aiConfigPrefix: string;
 31 | 
 32 | 	// Groupchats
 33 | 	groupchatsEnabled: boolean;
 34 | 
 35 | 	// Prompt Moderation
 36 | 	promptModerationEnabled: boolean;
 37 | 	promptModerationBlacklistedCategories: string[];
 38 | 
 39 | 	// AWS
 40 | 	awsAccessKeyId: string;
 41 | 	awsSecretAccessKey: string;
 42 | 	awsRegion: string;
 43 | 	awsPollyVoiceId: string;
 44 | 	awsPollyEngine: AWSPollyEngine;
 45 | 
 46 | 	// Voice transcription & Text-to-Speech
 47 | 	speechServerUrl: string;
 48 | 	whisperServerUrl: string;
 49 | 	openAIServerUrl: string;
 50 | 	whisperApiKey: string;
 51 | 	ttsEnabled: boolean;
 52 | 	ttsMode: TTSMode;
 53 | 	ttsTranscriptionResponse: boolean;
 54 | 	transcriptionEnabled: boolean;
 55 | 	transcriptionMode: TranscriptionMode;
 56 | 	transcriptionLanguage: string;
 57 | }
 58 | 
 59 | // Config
 60 | export const config: IConfig = {
 61 | 	whitelistedPhoneNumbers: process.env.WHITELISTED_PHONE_NUMBERS?.split(",") || [],
 62 | 	whitelistedEnabled: getEnvBooleanWithDefault("WHITELISTED_ENABLED", false),
 63 | 
 64 | 	openAIAPIKeys: (process.env.OPENAI_API_KEYS || process.env.OPENAI_API_KEY || "").split(",").filter((key) => !!key), // Default: []
 65 | 	openAIModel: process.env.OPENAI_GPT_MODEL || "gpt-3.5-turbo", // Default: gpt-3.5-turbo
 66 | 	maxModelTokens: getEnvMaxModelTokens(), // Default: 4096
 67 | 	prePrompt: process.env.PRE_PROMPT, // Default: undefined
 68 | 
 69 | 	// Prefix
 70 | 	prefixEnabled: getEnvBooleanWithDefault("PREFIX_ENABLED", true), // Default: true
 71 | 	prefixSkippedForMe: getEnvBooleanWithDefault("PREFIX_SKIPPED_FOR_ME", true), // Default: true
 72 | 	gptPrefix: process.env.GPT_PREFIX || "!gpt", // Default: !gpt
 73 | 	dallePrefix: process.env.DALLE_PREFIX || "!dalle", // Default: !dalle
 74 | 	stableDiffusionPrefix: process.env.STABLE_DIFFUSION_PREFIX || "!sd", // Default: !sd
 75 | 	resetPrefix: process.env.RESET_PREFIX || "!reset", // Default: !reset
 76 | 	aiConfigPrefix: process.env.AI_CONFIG_PREFIX || "!config", // Default: !config
 77 | 	langChainPrefix: process.env.LANGCHAIN_PREFIX || "!lang", // Default: !lang
 78 | 
 79 | 	// Groupchats
 80 | 	groupchatsEnabled: getEnvBooleanWithDefault("GROUPCHATS_ENABLED", false), // Default: false
 81 | 
 82 | 	// Prompt Moderation
 83 | 	promptModerationEnabled: getEnvBooleanWithDefault("PROMPT_MODERATION_ENABLED", false), // Default: false
 84 | 	promptModerationBlacklistedCategories: getEnvPromptModerationBlacklistedCategories(), // Default: ["hate", "hate/threatening", "self-harm", "sexual", "sexual/minors", "violence", "violence/graphic"]
 85 | 
 86 | 	// AWS
 87 | 	awsAccessKeyId: process.env.AWS_ACCESS_KEY_ID || "", // Default: ""
 88 | 	awsSecretAccessKey: process.env.AWS_SECRET_ACCESS_KEY || "", // Default: ""
 89 | 	awsRegion: process.env.AWS_REGION || "", // Default: ""
 90 | 	awsPollyVoiceId: process.env.AWS_POLLY_VOICE_ID || "", // Default: "Joanna"
 91 | 	awsPollyEngine: getEnvAWSPollyVoiceEngine(), // Default: standard
 92 | 
 93 | 	// Speech API, Default: https://speech-service.verlekar.com
 94 | 	speechServerUrl: process.env.SPEECH_API_URL || "https://speech-service.verlekar.com",
 95 | 	whisperServerUrl: process.env.WHISPER_API_URL || "https://transcribe.whisperapi.com",
 96 | 	openAIServerUrl: process.env.OPENAI_API_URL || "https://api.openai.com/v1/audio/transcriptions",
 97 | 	whisperApiKey: process.env.WHISPER_API_KEY || "", // Default: ""
 98 | 
 99 | 	// Text-to-Speech
100 | 	ttsEnabled: getEnvBooleanWithDefault("TTS_ENABLED", false), // Default: false
101 | 	ttsMode: getEnvTTSMode(), // Default: speech-api
102 | 	ttsTranscriptionResponse: getEnvBooleanWithDefault("TTS_TRANSCRIPTION_RESPONSE_ENABLED", true), // Default: true
103 | 
104 | 	// Transcription
105 | 	transcriptionEnabled: getEnvBooleanWithDefault("TRANSCRIPTION_ENABLED", false), // Default: false
106 | 	transcriptionMode: getEnvTranscriptionMode(), // Default: local
107 | 	transcriptionLanguage: process.env.TRANSCRIPTION_LANGUAGE || "" // Default: null
108 | };
109 | 
110 | /**
111 |  * Get the max model tokens from the environment variable
112 |  * @returns The max model tokens from the environment variable or 4096
113 |  */
114 | function getEnvMaxModelTokens() {
115 | 	const envValue = process.env.MAX_MODEL_TOKENS;
116 | 	if (envValue == undefined || envValue == "") {
117 | 		return 4096;
118 | 	}
119 | 
120 | 	return parseInt(envValue);
121 | }
122 | 
123 | /**
124 |  * Get an environment variable as a boolean with a default value
125 |  * @param key The environment variable key
126 |  * @param defaultValue The default value
127 |  * @returns The value of the environment variable or the default value
128 |  */
129 | function getEnvBooleanWithDefault(key: string, defaultValue: boolean): boolean {
130 | 	const envValue = process.env[key]?.toLowerCase();
131 | 	if (envValue == undefined || envValue == "") {
132 | 		return defaultValue;
133 | 	}
134 | 
135 | 	return envValue == "true";
136 | }
137 | 
138 | /**
139 |  * Get the blacklist categories for prompt moderation from the environment variable
140 |  * @returns Blacklisted categories for prompt moderation
141 |  */
142 | function getEnvPromptModerationBlacklistedCategories(): string[] {
143 | 	const envValue = process.env.PROMPT_MODERATION_BLACKLISTED_CATEGORIES;
144 | 	if (envValue == undefined || envValue == "") {
145 | 		return ["hate", "hate/threatening", "self-harm", "sexual", "sexual/minors", "violence", "violence/graphic"];
146 | 	} else {
147 | 		return JSON.parse(envValue.replace(/'/g, '"'));
148 | 	}
149 | }
150 | 
151 | /**
152 |  * Get the transcription mode from the environment variable
153 |  * @returns The transcription mode
154 |  */
155 | function getEnvTranscriptionMode(): TranscriptionMode {
156 | 	const envValue = process.env.TRANSCRIPTION_MODE?.toLowerCase();
157 | 	if (envValue == undefined || envValue == "") {
158 | 		return TranscriptionMode.Local;
159 | 	}
160 | 
161 | 	return envValue as TranscriptionMode;
162 | }
163 | 
164 | /**
165 |  * Get the tss mode from the environment variable
166 |  * @returns The tts mode
167 |  */
168 | function getEnvTTSMode(): TTSMode {
169 | 	const envValue = process.env.TTS_MODE?.toLowerCase();
170 | 	if (envValue == undefined || envValue == "") {
171 | 		return TTSMode.SpeechAPI;
172 | 	}
173 | 
174 | 	return envValue as TTSMode;
175 | }
176 | 
177 | /**
178 |  * Get the AWS Polly voice engine from the environment variable
179 |  * @returns The voice engine
180 |  */
181 | function getEnvAWSPollyVoiceEngine(): AWSPollyEngine {
182 | 	const envValue = process.env.AWS_POLLY_VOICE_ENGINE?.toLowerCase();
183 | 	if (envValue == undefined || envValue == "") {
184 | 		return AWSPollyEngine.Standard;
185 | 	}
186 | 
187 | 	return envValue as AWSPollyEngine;
188 | }
189 | 
190 | export default config;
191 | 


--------------------------------------------------------------------------------
/src/constants.ts:
--------------------------------------------------------------------------------
 1 | interface IConstants {
 2 | 	// WhatsApp status broadcast
 3 | 	statusBroadcast: string;
 4 | 
 5 | 	// WhatsApp session storage
 6 | 	sessionPath: string;
 7 | }
 8 | 
 9 | const constants: IConstants = {
10 | 	statusBroadcast: "status@broadcast",
11 | 	sessionPath: "./"
12 | };
13 | 
14 | export default constants;
15 | 


--------------------------------------------------------------------------------
/src/handlers/ai-config.ts:
--------------------------------------------------------------------------------
  1 | import { Message } from "whatsapp-web.js";
  2 | import { aiConfigTarget, aiConfigTypes, aiConfigValues, IAiConfig } from "../types/ai-config";
  3 | import { dalleImageSize } from "../types/dalle-config";
  4 | import { GeneralModule } from "../commands/general";
  5 | import { ChatModule } from "../commands/chat";
  6 | import { ICommandDefinition } from "../types/commands";
  7 | import { GptModule } from "../commands/gpt";
  8 | import { TranscriptionModule } from "../commands/transcription";
  9 | import { TTSModule } from "../commands/tts";
 10 | import { StableDiffusionModule } from "../commands/stable-diffusion";
 11 | 
 12 | import config from "../config";
 13 | 
 14 | let aiConfig: IAiConfig = {
 15 | 	dalle: {
 16 | 		size: dalleImageSize["512x512"]
 17 | 	},
 18 | 	// chatgpt: {}
 19 | 	commandsMap: {}
 20 | };
 21 | 
 22 | const initAiConfig = () => {
 23 | 	// Register commands
 24 | 	[ChatModule, GeneralModule, GptModule, TranscriptionModule, TTSModule, StableDiffusionModule].forEach((module) => {
 25 | 		aiConfig.commandsMap[module.key] = module.register();
 26 | 	});
 27 | };
 28 | 
 29 | const handleMessageAIConfig = async (message: Message, prompt: any) => {
 30 | 	try {
 31 | 		console.log("[AI-Config] Received prompt from " + message.from + ": " + prompt);
 32 | 
 33 | 		const args: string[] = prompt.split(" ");
 34 | 
 35 | 		/*
 36 | 			!config
 37 | 			!config help
 38 | 		*/
 39 | 		if (args.length == 1 || prompt === "help") {
 40 | 			// Available commands
 41 | 			let helpMessage = "Available commands:\n";
 42 | 			for (let target in aiConfigTarget) {
 43 | 				for (let type in aiConfigTypes[target]) {
 44 | 					helpMessage += `\t${config.aiConfigPrefix} ${target} ${type} <value> - Set ${target} ${type} to <value>\n`;
 45 | 				}
 46 | 			}
 47 | 			for (let module in aiConfig.commandsMap) {
 48 | 				for (let command in aiConfig.commandsMap[module]) {
 49 | 					helpMessage += `\t${config.aiConfigPrefix} ${module} ${command} ${aiConfig.commandsMap[module][command].help}\n`;
 50 | 				}
 51 | 			}
 52 | 
 53 | 			// Available values
 54 | 			helpMessage += "\nAvailable values:\n";
 55 | 			for (let target in aiConfigTarget) {
 56 | 				for (let type in aiConfigTypes[target]) {
 57 | 					helpMessage += `\t${target} ${type}: ${Object.keys(aiConfigValues[target][type]).join(", ")}\n`;
 58 | 				}
 59 | 			}
 60 | 			for (let module in aiConfig.commandsMap) {
 61 | 				for (let command in aiConfig.commandsMap[module]) {
 62 | 					if (aiConfig.commandsMap[module][command].hint) {
 63 | 						let hint = aiConfig.commandsMap[module][command].hint;
 64 | 						if (typeof hint === "object") {
 65 | 							hint = Object.keys(hint).join(", ");
 66 | 						}
 67 | 						helpMessage += `\t${module} ${command}: ${hint}\n`;
 68 | 					}
 69 | 				}
 70 | 			}
 71 | 			message.reply(helpMessage);
 72 | 			return;
 73 | 		}
 74 | 
 75 | 		// !config <target> <type> <value>
 76 | 		if (args.length < 2) {
 77 | 			message.reply(
 78 | 				"Invalid number of arguments, please use the following format: <target> <type> <value> or type !config help for more information."
 79 | 			);
 80 | 			return;
 81 | 		}
 82 | 
 83 | 		const target: string = args[0];
 84 | 		const type: string = args[1];
 85 | 		const value: string | undefined = args.length >= 3 ? args.slice(2).join(" ") : undefined;
 86 | 
 87 | 		if (!(target in aiConfigTarget) && !(target in aiConfig.commandsMap)) {
 88 | 			message.reply("Invalid target, please use one of the following: " + Object.keys(aiConfigTarget).join(", "));
 89 | 			return;
 90 | 		}
 91 | 
 92 | 		if (target && type && aiConfig.commandsMap[target]) {
 93 | 			if (aiConfig.commandsMap[target][type]) {
 94 | 				aiConfig.commandsMap[target][type].execute(message, value);
 95 | 			} else {
 96 | 				message.reply("Invalid command, please use one of the following: " + Object.keys(aiConfig.commandsMap[target]).join(", "));
 97 | 			}
 98 | 			return;
 99 | 		}
100 | 
101 | 		if (typeof aiConfigTypes[target] !== "object" || !(type in aiConfigTypes[target])) {
102 | 			message.reply("Invalid type, please use one of the following: " + Object.keys(aiConfigTypes[target]).join(", "));
103 | 			return;
104 | 		}
105 | 
106 | 		if (value === undefined || (typeof aiConfigValues[target][type] === "object" && !(value in aiConfigValues[target][type]))) {
107 | 			message.reply("Invalid value, please use one of the following: " + Object.keys(aiConfigValues[target][type]).join(", "));
108 | 			return;
109 | 		}
110 | 
111 | 		aiConfig[target][type] = value;
112 | 
113 | 		message.reply("Successfully set " + target + " " + type + " to " + value);
114 | 	} catch (error: any) {
115 | 		console.error("An error occured", error);
116 | 		message.reply("An error occured, please contact the administrator. (" + error.message + ")");
117 | 	}
118 | };
119 | 
120 | export function getCommand(module: string, command: string): ICommandDefinition {
121 | 	return aiConfig.commandsMap[module][command];
122 | }
123 | 
124 | export function getConfig(target: string, type: string): any {
125 | 	if (aiConfig.commandsMap[target] && aiConfig.commandsMap[target][type]) {
126 | 		if (typeof aiConfig.commandsMap[target][type].data === "function") {
127 | 			return aiConfig.commandsMap[target][type].data();
128 | 		}
129 | 		return aiConfig.commandsMap[target][type].data;
130 | 	}
131 | 	return aiConfig[target][type];
132 | }
133 | 
134 | export function executeCommand(target: string, type: string, message: Message, value?: string | undefined) {
135 | 	if (aiConfig.commandsMap[target] && aiConfig.commandsMap[target][type]) {
136 | 		if (typeof aiConfig.commandsMap[target][type].execute === "function") {
137 | 			return aiConfig.commandsMap[target][type].execute(message, value);
138 | 		}
139 | 	}
140 | }
141 | 
142 | export { aiConfig, handleMessageAIConfig, initAiConfig };
143 | 


--------------------------------------------------------------------------------
/src/handlers/dalle.ts:
--------------------------------------------------------------------------------
 1 | import { MessageMedia } from "whatsapp-web.js";
 2 | import { openai } from "../providers/openai";
 3 | import { aiConfig } from "../handlers/ai-config";
 4 | import OpenAI from "openai";
 5 | import config from "../config";
 6 | import * as cli from "../cli/ui";
 7 | 
 8 | // Moderation
 9 | import { moderateIncomingPrompt } from "./moderation";
10 | 
11 | const handleMessageDALLE = async (message: any, prompt: any) => {
12 | 	try {
13 | 		const start = Date.now();
14 | 
15 | 		cli.print(`[DALL-E] Received prompt from ${message.from}: ${prompt}`);
16 | 
17 | 		// Prompt Moderation
18 | 		if (config.promptModerationEnabled) {
19 | 			try {
20 | 				await moderateIncomingPrompt(prompt);
21 | 			} catch (error: any) {
22 | 				message.reply(error.message);
23 | 				return;
24 | 			}
25 | 		}
26 | 
27 | 		// Send the prompt to the API
28 | 		const response = await openai.images.generate({
29 | 			prompt: prompt,
30 | 			n: 1,
31 | 			size: aiConfig.dalle.size as CreateImageRequestSizeEnum,
32 | 			response_format: "b64_json"
33 | 		});
34 | 
35 | 		const end = Date.now() - start;
36 | 
37 | 		const base64 = response.data.data[0].b64_json as string;
38 | 		const image = new MessageMedia("image/jpeg", base64, "image.jpg");
39 | 
40 | 		cli.print(`[DALL-E] Answer to ${message.from} | OpenAI request took ${end}ms`);
41 | 
42 | 		message.reply(image);
43 | 	} catch (error: any) {
44 | 		console.error("An error occured", error);
45 | 		message.reply("An error occured, please contact the administrator. (" + error.message + ")");
46 | 	}
47 | };
48 | 
49 | export { handleMessageDALLE };
50 | 


--------------------------------------------------------------------------------
/src/handlers/gpt.ts:
--------------------------------------------------------------------------------
  1 | import os from "os";
  2 | import fs from "fs";
  3 | import path from "path";
  4 | import { randomUUID } from "crypto";
  5 | import { Message, MessageMedia } from "whatsapp-web.js";
  6 | import { chatgpt } from "../providers/openai";
  7 | import * as cli from "../cli/ui";
  8 | import config from "../config";
  9 | 
 10 | import { ChatMessage } from "chatgpt";
 11 | 
 12 | // TTS
 13 | import { ttsRequest as speechTTSRequest } from "../providers/speech";
 14 | import { ttsRequest as awsTTSRequest } from "../providers/aws";
 15 | import { TTSMode } from "../types/tts-mode";
 16 | 
 17 | // Moderation
 18 | import { moderateIncomingPrompt } from "./moderation";
 19 | import { aiConfig, getConfig } from "./ai-config";
 20 | 
 21 | // Mapping from number to last conversation id
 22 | const conversations = {};
 23 | 
 24 | const handleMessageGPT = async (message: Message, prompt: string) => {
 25 | 	try {
 26 | 		// Get last conversation
 27 | 		const lastConversationId = conversations[message.from];
 28 | 
 29 | 		cli.print(`[GPT] Received prompt from ${message.from}: ${prompt}`);
 30 | 
 31 | 		// Prompt Moderation
 32 | 		if (config.promptModerationEnabled) {
 33 | 			try {
 34 | 				await moderateIncomingPrompt(prompt);
 35 | 			} catch (error: any) {
 36 | 				message.reply(error.message);
 37 | 				return;
 38 | 			}
 39 | 		}
 40 | 
 41 | 		const start = Date.now();
 42 | 
 43 | 		// Check if we have a conversation with the user
 44 | 		let response: ChatMessage;
 45 | 		if (lastConversationId) {
 46 | 			// Handle message with previous conversation
 47 | 			response = await chatgpt.sendMessage(prompt, {
 48 | 				parentMessageId: lastConversationId
 49 | 			});
 50 | 		} else {
 51 | 			let promptBuilder = "";
 52 | 
 53 | 			// Pre prompt
 54 | 			if (config.prePrompt != null && config.prePrompt.trim() != "") {
 55 | 				promptBuilder += config.prePrompt + "\n\n";
 56 | 				promptBuilder += prompt + "\n\n";
 57 | 			}
 58 | 
 59 | 			// Handle message with new conversation
 60 | 			response = await chatgpt.sendMessage(promptBuilder);
 61 | 
 62 | 			cli.print(`[GPT] New conversation for ${message.from} (ID: ${response.id})`);
 63 | 		}
 64 | 		
 65 | 		// Set conversation id
 66 | 		conversations[message.from] = response.id;
 67 | 
 68 | 		const end = Date.now() - start;
 69 | 
 70 | 		cli.print(`[GPT] Answer to ${message.from}: ${response.text}  | OpenAI request took ${end}ms)`);
 71 | 
 72 | 		// TTS reply (Default: disabled)
 73 | 		if (getConfig("tts", "enabled")) {
 74 | 			sendVoiceMessageReply(message, response.text);
 75 | 			message.reply(response.text);
 76 | 			return;
 77 | 		}
 78 | 
 79 | 		// Default: Text reply
 80 | 		message.reply(response.text);
 81 | 	} catch (error: any) {
 82 | 		console.error("An error occured", error);
 83 | 		message.reply("An error occured, please contact the administrator. (" + error.message + ")");
 84 | 	}
 85 | };
 86 | 
 87 | const handleDeleteConversation = async (message: Message) => {
 88 | 	// Delete conversation
 89 | 	delete conversations[message.from];
 90 | 
 91 | 	// Reply
 92 | 	message.reply("Conversation context was resetted!");
 93 | };
 94 | 
 95 | async function sendVoiceMessageReply(message: Message, gptTextResponse: string) {
 96 | 	var logTAG = "[TTS]";
 97 | 	var ttsRequest = async function (): Promise<Buffer | null> {
 98 | 		return await speechTTSRequest(gptTextResponse);
 99 | 	};
100 | 
101 | 	switch (config.ttsMode) {
102 | 		case TTSMode.SpeechAPI:
103 | 			logTAG = "[SpeechAPI]";
104 | 			ttsRequest = async function (): Promise<Buffer | null> {
105 | 				return await speechTTSRequest(gptTextResponse);
106 | 			};
107 | 			break;
108 | 
109 | 		case TTSMode.AWSPolly:
110 | 			logTAG = "[AWSPolly]";
111 | 			ttsRequest = async function (): Promise<Buffer | null> {
112 | 				return await awsTTSRequest(gptTextResponse);
113 | 			};
114 | 			break;
115 | 
116 | 		default:
117 | 			logTAG = "[SpeechAPI]";
118 | 			ttsRequest = async function (): Promise<Buffer | null> {
119 | 				return await speechTTSRequest(gptTextResponse);
120 | 			};
121 | 			break;
122 | 	}
123 | 
124 | 	// Get audio buffer
125 | 	cli.print(`${logTAG} Generating audio from GPT response "${gptTextResponse}"...`);
126 | 	const audioBuffer = await ttsRequest();
127 | 
128 | 	// Check if audio buffer is valid
129 | 	if (audioBuffer == null || audioBuffer.length == 0) {
130 | 		message.reply(`${logTAG} couldn't generate audio, please contact the administrator.`);
131 | 		return;
132 | 	}
133 | 
134 | 	cli.print(`${logTAG} Audio generated!`);
135 | 
136 | 	// Get temp folder and file path
137 | 	const tempFolder = os.tmpdir();
138 | 	const tempFilePath = path.join(tempFolder, randomUUID() + ".opus");
139 | 
140 | 	// Save buffer to temp file
141 | 	fs.writeFileSync(tempFilePath, audioBuffer);
142 | 
143 | 	// Send audio
144 | 	const messageMedia = new MessageMedia("audio/ogg; codecs=opus", audioBuffer.toString("base64"));
145 | 	message.reply(messageMedia);
146 | 
147 | 	// Delete temp file
148 | 	fs.unlinkSync(tempFilePath);
149 | }
150 | 
151 | export { handleMessageGPT, handleDeleteConversation };
152 | 


--------------------------------------------------------------------------------
/src/handlers/langchain.ts:
--------------------------------------------------------------------------------
 1 | import { Message } from "whatsapp-web.js";
 2 | import BrowserAgentProvider from "../providers/browser-agent";
 3 | import * as cli from "../cli/ui";
 4 | 
 5 | const browserAgent = new BrowserAgentProvider();
 6 | 
 7 | // TODO add conversation ID to build a chat history
 8 | const handleMessageLangChain = async (message: Message, prompt: string) => {
 9 | 	try {
10 | 		const start = Date.now();
11 | 		const output = await browserAgent.fetch(prompt);
12 | 		const end = Date.now() - start;
13 | 
14 | 		cli.print(`[GPT] Answer to ${message.from}: ${output}  | OpenAI request took ${end}ms)`);
15 | 
16 | 		// Default: Text reply
17 | 		message.reply(output);
18 | 	} catch (error: any) {
19 | 		console.error("An error occured", error);
20 | 		message.reply("An error occured, please contact the administrator. (" + error.message + ")");
21 | 	}
22 | };
23 | 
24 | export { handleMessageLangChain };
25 | 


--------------------------------------------------------------------------------
/src/handlers/message.ts:
--------------------------------------------------------------------------------
  1 | import { Message } from "whatsapp-web.js";
  2 | import { startsWithIgnoreCase } from "../utils";
  3 | 
  4 | // Config & Constants
  5 | import config from "../config";
  6 | 
  7 | // CLI
  8 | import * as cli from "../cli/ui";
  9 | 
 10 | // ChatGPT & DALLE
 11 | import { handleMessageGPT, handleDeleteConversation } from "../handlers/gpt";
 12 | import { handleMessageDALLE } from "../handlers/dalle";
 13 | import { handleMessageAIConfig, getConfig, executeCommand } from "../handlers/ai-config";
 14 | import { handleMessageLangChain } from "../handlers/langchain";
 15 | 
 16 | // Speech API & Whisper
 17 | import { TranscriptionMode } from "../types/transcription-mode";
 18 | import { transcribeRequest } from "../providers/speech";
 19 | import { transcribeAudioLocal } from "../providers/whisper-local";
 20 | import { transcribeWhisperApi } from "../providers/whisper-api";
 21 | import { transcribeOpenAI } from "../providers/openai";
 22 | 
 23 | // For deciding to ignore old messages
 24 | import { botReadyTimestamp } from "../index";
 25 | 
 26 | // Handles message
 27 | async function handleIncomingMessage(message: Message) {
 28 | 	let messageString = message.body;
 29 | 
 30 | 	// Prevent handling old messages
 31 | 	if (message.timestamp != null) {
 32 | 		const messageTimestamp = new Date(message.timestamp * 1000);
 33 | 
 34 | 		// If startTimestamp is null, the bot is not ready yet
 35 | 		if (botReadyTimestamp == null) {
 36 | 			cli.print("Ignoring message because bot is not ready yet: " + messageString);
 37 | 			return;
 38 | 		}
 39 | 
 40 | 		// Ignore messages that are sent before the bot is started
 41 | 		if (messageTimestamp < botReadyTimestamp) {
 42 | 			cli.print("Ignoring old message: " + messageString);
 43 | 			return;
 44 | 		}
 45 | 	}
 46 | 
 47 | 	// Ignore groupchats if disabled
 48 | 	if ((await message.getChat()).isGroup && !config.groupchatsEnabled) return;
 49 | 
 50 | 	const selfNotedMessage = message.fromMe && message.hasQuotedMsg === false && message.from === message.to;
 51 | 
 52 | 	if (config.whitelistedEnabled) {
 53 | 		const whitelistedPhoneNumbers = getConfig("general", "whitelist");
 54 | 
 55 | 		if (!selfNotedMessage && whitelistedPhoneNumbers.length > 0 && !whitelistedPhoneNumbers.includes(message.from)) {
 56 | 			cli.print(`Ignoring message from ${message.from} because it is not whitelisted.`);
 57 | 			return;
 58 | 		}
 59 | 	}
 60 | 	// Transcribe audio
 61 | 	if (message.hasMedia) {
 62 | 		const media = await message.downloadMedia();
 63 | 
 64 | 		// Ignore non-audio media
 65 | 		if (!media || !media.mimetype.startsWith("audio/")) return;
 66 | 
 67 | 		// Check if transcription is enabled (Default: false)
 68 | 		if (!getConfig("transcription", "enabled")) {
 69 | 			cli.print("[Transcription] Received voice messsage but voice transcription is disabled.");
 70 | 			return;
 71 | 		}
 72 | 
 73 | 		// Convert media to base64 string
 74 | 		const mediaBuffer = Buffer.from(media.data, "base64");
 75 | 
 76 | 		// Transcribe locally or with Speech API
 77 | 		const transcriptionMode = getConfig("transcription", "mode");
 78 | 		cli.print(`[Transcription] Transcribing audio with "${transcriptionMode}" mode...`);
 79 | 
 80 | 		let res;
 81 | 		switch (transcriptionMode) {
 82 | 			case TranscriptionMode.Local:
 83 | 				res = await transcribeAudioLocal(mediaBuffer);
 84 | 				break;
 85 | 			case TranscriptionMode.OpenAI:
 86 | 				res = await transcribeOpenAI(mediaBuffer);
 87 | 				break;
 88 | 			case TranscriptionMode.WhisperAPI:
 89 | 				res = await transcribeWhisperApi(new Blob([mediaBuffer]));
 90 | 				break;
 91 | 			case TranscriptionMode.SpeechAPI:
 92 | 				res = await transcribeRequest(new Blob([mediaBuffer]));
 93 | 				break;
 94 | 			default:
 95 | 				cli.print(`[Transcription] Unsupported transcription mode: ${transcriptionMode}`);
 96 | 		}
 97 | 		const { text: transcribedText, language: transcribedLanguage } = res;
 98 | 
 99 | 		// Check transcription is null (error)
100 | 		if (transcribedText == null) {
101 | 			message.reply("I couldn't understand what you said.");
102 | 			return;
103 | 		}
104 | 
105 | 		// Check transcription is empty (silent voice message)
106 | 		if (transcribedText.length == 0) {
107 | 			message.reply("I couldn't understand what you said.");
108 | 			return;
109 | 		}
110 | 
111 | 		// Log transcription
112 | 		cli.print(`[Transcription] Transcription response: ${transcribedText} (language: ${transcribedLanguage})`);
113 | 
114 | 		// Reply with transcription
115 | 		if (config.ttsTranscriptionResponse) {
116 | 			const reply = `You said: ${transcribedText}${transcribedLanguage ? " (language: " + transcribedLanguage + ")" : ""}`;
117 | 			message.reply(reply);
118 | 		}
119 | 
120 | 		// Handle message GPT
121 | 		await handleMessageGPT(message, transcribedText);
122 | 		return;
123 | 	}
124 | 
125 | 	// Clear conversation context (!clear)
126 | 	if (startsWithIgnoreCase(messageString, config.resetPrefix)) {
127 | 		await handleDeleteConversation(message);
128 | 		return;
129 | 	}
130 | 
131 | 	// AiConfig (!config <args>)
132 | 	if (startsWithIgnoreCase(messageString, config.aiConfigPrefix)) {
133 | 		const prompt = messageString.substring(config.aiConfigPrefix.length + 1);
134 | 		await handleMessageAIConfig(message, prompt);
135 | 		return;
136 | 	}
137 | 
138 | 	// GPT (!gpt <prompt>)
139 | 	if (startsWithIgnoreCase(messageString, config.gptPrefix)) {
140 | 		const prompt = messageString.substring(config.gptPrefix.length + 1);
141 | 		await handleMessageGPT(message, prompt);
142 | 		return;
143 | 	}
144 | 
145 | 	// GPT (!lang <prompt>)
146 | 	if (startsWithIgnoreCase(messageString, config.langChainPrefix)) {
147 | 		const prompt = messageString.substring(config.langChainPrefix.length + 1);
148 | 		await handleMessageLangChain(message, prompt);
149 | 		return;
150 | 	}
151 | 
152 | 	// DALLE (!dalle <prompt>)
153 | 	if (startsWithIgnoreCase(messageString, config.dallePrefix)) {
154 | 		const prompt = messageString.substring(config.dallePrefix.length + 1);
155 | 		await handleMessageDALLE(message, prompt);
156 | 		return;
157 | 	}
158 | 
159 | 	// Stable Diffusion (!sd <prompt>)
160 | 	if (startsWithIgnoreCase(messageString, config.stableDiffusionPrefix)) {
161 | 		const prompt = messageString.substring(config.stableDiffusionPrefix.length + 1);
162 | 		await executeCommand("sd", "generate", message, prompt);
163 | 		return;
164 | 	}
165 | 
166 | 	// GPT (only <prompt>)
167 | 	if (!config.prefixEnabled || (config.prefixSkippedForMe && selfNotedMessage)) {
168 | 		await handleMessageGPT(message, messageString);
169 | 		return;
170 | 	}
171 | }
172 | 
173 | export { handleIncomingMessage };
174 | 


--------------------------------------------------------------------------------
/src/handlers/moderation.ts:
--------------------------------------------------------------------------------
 1 | import * as cli from "../cli/ui";
 2 | import config from "../config";
 3 | import { openai } from "../providers/openai";
 4 | 
 5 | /**
 6 |  * Handle prompt moderation
 7 |  *
 8 |  * @param prompt Prompt to moderate
 9 |  * @returns true if the prompt is safe, throws an error otherwise
10 |  */
11 | const moderateIncomingPrompt = async (prompt: string) => {
12 | 	cli.print("[MODERATION] Checking user prompt...");
13 | 	const moderationResponse = await openai.moderations.create({
14 | 		input: prompt
15 | 	});
16 | 
17 | 	const moderationResponseData = moderationResponse.data;
18 | 	const moderationResponseCategories = moderationResponseData.results[0].categories;
19 | 	const blackListedCategories = config.promptModerationBlacklistedCategories;
20 | 
21 | 	// Print categories as [ category: true/false ]
22 | 	const categoriesForPrint = Object.keys(moderationResponseCategories).map((category) => {
23 | 		return `${category}: ${moderationResponseCategories[category]}`;
24 | 	});
25 | 	cli.print(`[MODERATION] OpenAI Moderation response: ${JSON.stringify(categoriesForPrint)}`);
26 | 
27 | 	// Check if any of the blacklisted categories are set to true
28 | 	for (const category of blackListedCategories) {
29 | 		if (moderationResponseCategories[category]) {
30 | 			throw new Error(`Prompt was rejected by the moderation system. Reason: ${category}`);
31 | 		}
32 | 	}
33 | 
34 | 	return true;
35 | };
36 | 
37 | export { moderateIncomingPrompt };
38 | 


--------------------------------------------------------------------------------
/src/index.ts:
--------------------------------------------------------------------------------
  1 | import qrcode from "qrcode";
  2 | import { Client, Message, Events, LocalAuth } from "whatsapp-web.js";
  3 | 
  4 | // Constants
  5 | import constants from "./constants";
  6 | 
  7 | // CLI
  8 | import * as cli from "./cli/ui";
  9 | import { handleIncomingMessage } from "./handlers/message";
 10 | 
 11 | // Config
 12 | import { initAiConfig } from "./handlers/ai-config";
 13 | import { initOpenAI } from "./providers/openai";
 14 | 
 15 | // Ready timestamp of the bot
 16 | let botReadyTimestamp: Date | null = null;
 17 | 
 18 | // Entrypoint
 19 | const start = async () => {
 20 | 	const wwebVersion = "2.2412.54";
 21 | 	cli.printIntro();
 22 | 
 23 | 	// WhatsApp Client
 24 | 	const client = new Client({
 25 | 		puppeteer: {
 26 | 			args: ["--no-sandbox"]
 27 | 		},
 28 | 		authStrategy: new LocalAuth({
 29 | 			dataPath: constants.sessionPath
 30 | 		}),
 31 | 		webVersionCache: {
 32 | 			type: "remote",
 33 | 			remotePath: `https://raw.githubusercontent.com/wppconnect-team/wa-version/main/html/${wwebVersion}.html`
 34 | 		}
 35 | 	});
 36 | 
 37 | 	// WhatsApp auth
 38 | 	client.on(Events.QR_RECEIVED, (qr: string) => {
 39 | 		console.log("");
 40 | 		qrcode.toString(
 41 | 			qr,
 42 | 			{
 43 | 				type: "terminal",
 44 | 				small: true,
 45 | 				margin: 2,
 46 | 				scale: 1
 47 | 			},
 48 | 			(err, url) => {
 49 | 				if (err) throw err;
 50 | 				cli.printQRCode(url);
 51 | 			}
 52 | 		);
 53 | 	});
 54 | 
 55 | 	// WhatsApp loading
 56 | 	client.on(Events.LOADING_SCREEN, (percent) => {
 57 | 		if (percent == "0") {
 58 | 			cli.printLoading();
 59 | 		}
 60 | 	});
 61 | 
 62 | 	// WhatsApp authenticated
 63 | 	client.on(Events.AUTHENTICATED, () => {
 64 | 		cli.printAuthenticated();
 65 | 	});
 66 | 
 67 | 	// WhatsApp authentication failure
 68 | 	client.on(Events.AUTHENTICATION_FAILURE, () => {
 69 | 		cli.printAuthenticationFailure();
 70 | 	});
 71 | 
 72 | 	// WhatsApp ready
 73 | 	client.on(Events.READY, () => {
 74 | 		// Print outro
 75 | 		cli.printOutro();
 76 | 
 77 | 		// Set bot ready timestamp
 78 | 		botReadyTimestamp = new Date();
 79 | 
 80 | 		initAiConfig();
 81 | 		initOpenAI();
 82 | 	});
 83 | 
 84 | 	// WhatsApp message
 85 | 	client.on(Events.MESSAGE_RECEIVED, async (message: any) => {
 86 | 		// Ignore if message is from status broadcast
 87 | 		if (message.from == constants.statusBroadcast) return;
 88 | 
 89 | 		// Ignore if it's a quoted message, (e.g. Bot reply)
 90 | 		if (message.hasQuotedMsg) return;
 91 | 
 92 | 		await handleIncomingMessage(message);
 93 | 	});
 94 | 
 95 | 	// Reply to own message
 96 | 	client.on(Events.MESSAGE_CREATE, async (message: Message) => {
 97 | 		// Ignore if message is from status broadcast
 98 | 		if (message.from == constants.statusBroadcast) return;
 99 | 
100 | 		// Ignore if it's a quoted message, (e.g. Bot reply)
101 | 		if (message.hasQuotedMsg) return;
102 | 
103 | 		// Ignore if it's not from me
104 | 		if (!message.fromMe) return;
105 | 
106 | 		await handleIncomingMessage(message);
107 | 	});
108 | 
109 | 	// WhatsApp initialization
110 | 	client.initialize();
111 | };
112 | 
113 | start();
114 | 
115 | export { botReadyTimestamp };
116 | 


--------------------------------------------------------------------------------
/src/providers/aws.ts:
--------------------------------------------------------------------------------
 1 | const AWS = require("aws-sdk");
 2 | import config from "../config";
 3 | 
 4 | /**
 5 |  * @param text The sentence to be converted to speech
 6 |  * @returns Audio buffer
 7 |  */
 8 | async function ttsRequest(text: string): Promise<Buffer | null> {
 9 | 	const polly = new AWS.Polly({
10 | 		credentials: new AWS.Credentials(config.awsAccessKeyId, config.awsSecretAccessKey),
11 | 		region: config.awsRegion
12 | 	});
13 | 
14 | 	const params = {
15 | 		OutputFormat: "mp3",
16 | 		Text: text,
17 | 		Engine: config.awsPollyEngine,
18 | 		VoiceId: config.awsPollyVoiceId
19 | 	};
20 | 
21 | 	try {
22 | 		const data = await polly.synthesizeSpeech(params).promise();
23 | 		if (data.AudioStream instanceof Buffer) {
24 | 			return data.AudioStream;
25 | 		}
26 | 		return null;
27 | 	} catch (error) {
28 | 		console.error("An error occured (TTS request)", error);
29 | 		return null;
30 | 	}
31 | }
32 | 
33 | export { ttsRequest };
34 | 


--------------------------------------------------------------------------------
/src/providers/browser-agent.ts:
--------------------------------------------------------------------------------
 1 | import { OpenAI } from "langchain/llms/openai";
 2 | import { SerpAPI } from "langchain/tools";
 3 | import { initializeAgentExecutor } from "langchain/agents";
 4 | 
 5 | export default class BrowserAgentProvider {
 6 | 	// Can use other browser tools like RequestGetTool if you do not have a [SerpAPI](https://serpapi.com/) API key.
 7 | 	tools = [
 8 | 		new SerpAPI()
 9 | 		// new RequestsGetTool(),
10 | 	];
11 | 	// Always select highest probability word in search
12 | 	model = new OpenAI({ temperature: 0 });
13 | 
14 | 	fetch = async (query) => {
15 | 		const executor = await initializeAgentExecutor(this.tools, this.model, "zero-shot-react-description", true);
16 | 		const result = await executor.call({ input: query });
17 | 
18 | 		return result.output; // Return the final text instead of result.output
19 | 	};
20 | }
21 | 


--------------------------------------------------------------------------------
/src/providers/openai.ts:
--------------------------------------------------------------------------------
  1 | import fs from "fs";
  2 | import os from "os";
  3 | import path from "path";
  4 | import { randomUUID } from "crypto";
  5 | import { ChatGPTAPI } from "chatgpt";
  6 | import OpenAI from "openai";
  7 | 
  8 | import ffmpeg from "fluent-ffmpeg";
  9 | import { blobFromSync, File } from "fetch-blob/from.js";
 10 | import config from "../config";
 11 | import { getConfig } from "../handlers/ai-config";
 12 | 
 13 | export let chatgpt: ChatGPTAPI;
 14 | 
 15 | // OpenAI Client (DALL-E)
 16 | export let openai: OpenAI;
 17 | 
 18 | export function initOpenAI() {
 19 | 	chatgpt = new ChatGPTAPI({
 20 | 		apiKey: getConfig("gpt", "apiKey"),
 21 | 		completionParams: {
 22 | 			model: config.openAIModel,
 23 | 			temperature: 0.7,
 24 | 			top_p: 0.9,
 25 | 			max_tokens: getConfig("gpt", "maxModelTokens")
 26 | 		}
 27 | 	});
 28 | 
 29 | 	openai = new OpenAI(
 30 | 		{
 31 | 			apiKey: getConfig("gpt", "apiKey")
 32 | 		}
 33 | 	);
 34 | }
 35 | 
 36 | export async function transcribeOpenAI(audioBuffer: Buffer): Promise<{ text: string; language: string }> {
 37 | 	const url = config.openAIServerUrl;
 38 | 	let language = "";
 39 | 
 40 | 	const tempdir = os.tmpdir();
 41 | 	const oggPath = path.join(tempdir, randomUUID() + ".ogg");
 42 | 	const wavFilename = randomUUID() + ".wav";
 43 | 	const wavPath = path.join(tempdir, wavFilename);
 44 | 	fs.writeFileSync(oggPath, audioBuffer);
 45 | 	try {
 46 | 		await convertOggToWav(oggPath, wavPath);
 47 | 	} catch (e) {
 48 | 		fs.unlinkSync(oggPath);
 49 | 		return {
 50 | 			text: "",
 51 | 			language
 52 | 		};
 53 | 	}
 54 | 
 55 | 	// FormData
 56 | 	const formData = new FormData();
 57 | 	formData.append("file", new File([blobFromSync(wavPath)], wavFilename, { type: "audio/wav" }));
 58 | 	formData.append("model", "whisper-1");
 59 | 	if (config.transcriptionLanguage) {
 60 | 		formData.append("language", config.transcriptionLanguage);
 61 | 		language = config.transcriptionLanguage;
 62 | 	}
 63 | 
 64 | 	const headers = new Headers();
 65 | 	headers.append("Authorization", `Bearer ${getConfig("gpt", "apiKey")}`);
 66 | 
 67 | 	// Request options
 68 | 	const options = {
 69 | 		method: "POST",
 70 | 		body: formData,
 71 | 		headers
 72 | 	};
 73 | 
 74 | 	let response;
 75 | 	try {
 76 | 		response = await fetch(url, options);
 77 | 	} catch (e) {
 78 | 		console.error(e);
 79 | 	} finally {
 80 | 		fs.unlinkSync(oggPath);
 81 | 		fs.unlinkSync(wavPath);
 82 | 	}
 83 | 
 84 | 	if (!response || response.status != 200) {
 85 | 		console.error(response);
 86 | 		return {
 87 | 			text: "",
 88 | 			language: language
 89 | 		};
 90 | 	}
 91 | 
 92 | 	const transcription = await response.json();
 93 | 	return {
 94 | 		text: transcription.text,
 95 | 		language
 96 | 	};
 97 | }
 98 | 
 99 | async function convertOggToWav(oggPath: string, wavPath: string): Promise<void> {
100 | 	return new Promise((resolve, reject) => {
101 | 		ffmpeg(oggPath)
102 | 			.toFormat("wav")
103 | 			.outputOptions("-acodec pcm_s16le")
104 | 			.output(wavPath)
105 | 			.on("end", () => resolve())
106 | 			.on("error", (err) => reject(err))
107 | 			.run();
108 | 	});
109 | }
110 | 


--------------------------------------------------------------------------------
/src/providers/speech.ts:
--------------------------------------------------------------------------------
 1 | import config from "../config";
 2 | 
 3 | /**
 4 |  * @param text The sentence to be converted to speech
 5 |  * @returns Audio buffer
 6 |  */
 7 | async function ttsRequest(text: string): Promise<Buffer | null> {
 8 | 	const url = config.speechServerUrl + "/tts";
 9 | 
10 | 	// Request options
11 | 	const options = {
12 | 		method: "POST",
13 | 		headers: {
14 | 			"Content-Type": "application/json"
15 | 		},
16 | 		body: JSON.stringify({
17 | 			text
18 | 		})
19 | 	};
20 | 
21 | 	try {
22 | 		const response = await fetch(url, options);
23 | 		const audioBuffer = await response.arrayBuffer();
24 | 		return Buffer.from(audioBuffer);
25 | 	} catch (error) {
26 | 		console.error("An error occured (TTS request)", error);
27 | 		return null;
28 | 	}
29 | }
30 | 
31 | /**
32 |  * @param audioBlob The audio blob to be transcribed
33 |  * @returns Response: { text: string, language: string }
34 |  */
35 | async function transcribeRequest(audioBlob: Blob): Promise<{ text: string; language: string }> {
36 | 	const url = config.speechServerUrl + "/transcribe";
37 | 
38 | 	// FormData
39 | 	const formData = new FormData();
40 | 	formData.append("audio", audioBlob);
41 | 
42 | 	// Request options
43 | 	const options = {
44 | 		method: "POST",
45 | 		body: formData
46 | 	};
47 | 
48 | 	const response = await fetch(url, options);
49 | 	const transcription = await response.json();
50 | 	return transcription;
51 | }
52 | 
53 | export { ttsRequest, transcribeRequest };
54 | 


--------------------------------------------------------------------------------
/src/providers/whisper-api.ts:
--------------------------------------------------------------------------------
 1 | import config from "../config";
 2 | 
 3 | async function transcribeWhisperApi(audioBlob: Blob): Promise<{ text: string; language: string }> {
 4 | 	const url = config.whisperServerUrl;
 5 | 
 6 | 	// FormData
 7 | 	const formData = new FormData();
 8 | 	formData.append("file", audioBlob);
 9 | 	formData.append("diarization", "false");
10 | 	formData.append("numSpeakers", "1");
11 | 	formData.append("fileType", "ogg");
12 | 	if (config.transcriptionLanguage) {
13 | 		formData.append("language", config.transcriptionLanguage);
14 | 	}
15 | 	formData.append("task", "transcribe");
16 | 
17 | 	const headers = new Headers();
18 | 	headers.append("Authorization", `Bearer ${config.whisperApiKey}`);
19 | 
20 | 	// Request options
21 | 	const options = {
22 | 		method: "POST",
23 | 		body: formData,
24 | 		headers
25 | 	};
26 | 
27 | 	const response = await fetch(url, options);
28 | 	const transcription = await response.json();
29 | 	return transcription;
30 | }
31 | 
32 | export { transcribeWhisperApi };
33 | 


--------------------------------------------------------------------------------
/src/providers/whisper-local.ts:
--------------------------------------------------------------------------------
 1 | import fs from "fs";
 2 | import os from "os";
 3 | import path from "path";
 4 | import { execSync } from "child_process";
 5 | import { randomUUID } from "crypto";
 6 | 
 7 | async function transcribeAudioLocal(audioBuffer: Buffer): Promise<{ text: string; language: string }> {
 8 | 	// Write audio buffer to tempdir
 9 | 	const tempdir = os.tmpdir();
10 | 	const audioPath = path.join(tempdir, randomUUID() + ".wav");
11 | 	fs.writeFileSync(audioPath, audioBuffer);
12 | 
13 | 	// Transcribe audio
14 | 	const output = execSync(`whisper ${audioPath}`, { encoding: "utf-8" });
15 | 
16 | 	// Delete tmp file
17 | 	fs.unlinkSync(audioPath);
18 | 
19 | 	// Delete whisper created tmp files
20 | 	const extensions = [".wav.srt", ".wav.txt", ".wav.vtt"];
21 | 	for (const extension of extensions) {
22 | 		fs.readdirSync(process.cwd()).forEach((file) => {
23 | 			if (file.endsWith(extension)) fs.unlinkSync(file);
24 | 		});
25 | 	}
26 | 
27 | 	// Return parsed text and language
28 | 	return {
29 | 		text: parseTextAfterTimeFrame(output),
30 | 		language: parseDetectedLanguage(output)
31 | 	};
32 | }
33 | 
34 | function parseDetectedLanguage(text) {
35 | 	const languageLine = text.split("\n")[1]; // Extract the second line of text
36 | 	const languageMatch = languageLine.match(/Detected language:\s(.+)/); // Extract the detected language
37 | 
38 | 	if (languageMatch) {
39 | 		return languageMatch[1].trim();
40 | 	}
41 | 
42 | 	return null; // Return null if match is not found
43 | }
44 | 
45 | function parseTextAfterTimeFrame(text) {
46 | 	const textMatch = text.match(/\[(\d{2}:\d{2}\.\d{3})\s-->\s(\d{2}:\d{2}\.\d{3})\]\s(.+)/); // Extract the text
47 | 
48 | 	if (textMatch) {
49 | 		return textMatch[3].trim();
50 | 	}
51 | 
52 | 	return null; // Return null if match is not found
53 | }
54 | 
55 | export { transcribeAudioLocal };
56 | 


--------------------------------------------------------------------------------
/src/types/ai-config.ts:
--------------------------------------------------------------------------------
 1 | import { ICommandsMap } from "./commands";
 2 | import { dalleConfigType, dalleImageSize } from "./dalle-config";
 3 | 
 4 | export enum aiConfigTarget {
 5 | 	dalle = "dalle"
 6 | 	// chatgpt = "chatgpt"
 7 | }
 8 | 
 9 | export const aiConfigTypes = {
10 | 	dalle: dalleConfigType
11 | };
12 | 
13 | export const aiConfigValues = {
14 | 	dalle: {
15 | 		size: dalleImageSize
16 | 	}
17 | };
18 | 
19 | export interface IAiConfig {
20 | 	dalle: {
21 | 		size: dalleImageSize;
22 | 	};
23 | 	commandsMap: {
24 | 		[key: string]: ICommandsMap;
25 | 	};
26 | }
27 | 


--------------------------------------------------------------------------------
/src/types/aws-polly-engine.ts:
--------------------------------------------------------------------------------
1 | export enum AWSPollyEngine {
2 | 	Standard = "standard",
3 | 	Neural = "neural"
4 | }
5 | 


--------------------------------------------------------------------------------
/src/types/commands.ts:
--------------------------------------------------------------------------------
 1 | import { Message } from "whatsapp-web.js";
 2 | 
 3 | export interface ICommandExecution {
 4 | 	(message: Message, value?: string): void;
 5 | }
 6 | 
 7 | export interface ICommandDefinition {
 8 | 	data?: any;
 9 | 	help: string;
10 | 	hint?: string | Object | undefined;
11 | 	execute: ICommandExecution;
12 | }
13 | 
14 | export interface ICommandsMap {
15 | 	[key: string]: ICommandDefinition;
16 | }
17 | 
18 | export interface ICommandModule {
19 | 	key: string;
20 | 	register: () => ICommandsMap;
21 | }
22 | 


--------------------------------------------------------------------------------
/src/types/dalle-config.ts:
--------------------------------------------------------------------------------
 1 | export enum dalleConfigType {
 2 | 	size = "size"
 3 | }
 4 | 
 5 | export enum dalleImageSize {
 6 | 	"256x256" = "256x256",
 7 | 	"512x512" = "512x512",
 8 | 	"1024x1024" = "1024x1024"
 9 | }
10 | 


--------------------------------------------------------------------------------
/src/types/transcription-mode.ts:
--------------------------------------------------------------------------------
1 | export enum TranscriptionMode {
2 | 	Local = "local",
3 | 	SpeechAPI = "speech-api",
4 | 	WhisperAPI = "whisper-api",
5 | 	OpenAI = "openai"
6 | }
7 | 


--------------------------------------------------------------------------------
/src/types/tts-mode.ts:
--------------------------------------------------------------------------------
1 | export enum TTSMode {
2 | 	SpeechAPI = "speech-api",
3 | 	AWSPolly = "aws-polly"
4 | }
5 | 


--------------------------------------------------------------------------------
/src/utils.ts:
--------------------------------------------------------------------------------
1 | const startsWithIgnoreCase = (str, prefix) => str.toLowerCase().startsWith(prefix.toLowerCase());
2 | 
3 | export { startsWithIgnoreCase };
4 | 


--------------------------------------------------------------------------------