├── .dockerignore
├── .env.example
├── .eslintrc.js
├── .gitignore
├── Dockerfile
├── LICENSE
├── README.md
├── app.js
├── fly.toml.example
├── functions
    ├── checkInventory.js
    ├── checkPrice.js
    ├── function-manifest.js
    ├── placeOrder.js
    └── transferCall.js
├── package-lock.json
├── package.json
├── scripts
    ├── inbound-call.js
    └── outbound-call.js
├── services
    ├── gpt-service.js
    ├── recording-service.js
    ├── stream-service.js
    ├── transcription-service.js
    └── tts-service.js
└── test
    ├── checkInventory.test.js
    ├── checkPrice.test.js
    ├── placeOrder.test.js
    └── transferCall.test.js


/.dockerignore:
--------------------------------------------------------------------------------
  1 | # Created by https://www.toptal.com/developers/gitignore/api/node
  2 | # Edit at https://www.toptal.com/developers/gitignore?templates=node
  3 | 
  4 | ### Node ###
  5 | # Logs
  6 | logs
  7 | *.log
  8 | npm-debug.log*
  9 | yarn-debug.log*
 10 | yarn-error.log*
 11 | lerna-debug.log*
 12 | .pnpm-debug.log*
 13 | 
 14 | # Diagnostic reports (https://nodejs.org/api/report.html)
 15 | report.[0-9]*.[0-9]*.[0-9]*.[0-9]*.json
 16 | 
 17 | # Runtime data
 18 | pids
 19 | *.pid
 20 | *.seed
 21 | *.pid.lock
 22 | 
 23 | # Directory for instrumented libs generated by jscoverage/JSCover
 24 | lib-cov
 25 | 
 26 | # Coverage directory used by tools like istanbul
 27 | coverage
 28 | *.lcov
 29 | 
 30 | # nyc test coverage
 31 | .nyc_output
 32 | 
 33 | # Grunt intermediate storage (https://gruntjs.com/creating-plugins#storing-task-files)
 34 | .grunt
 35 | 
 36 | # Bower dependency directory (https://bower.io/)
 37 | bower_components
 38 | 
 39 | # node-waf configuration
 40 | .lock-wscript
 41 | 
 42 | # Compiled binary addons (https://nodejs.org/api/addons.html)
 43 | build/Release
 44 | 
 45 | # Dependency directories
 46 | node_modules/
 47 | jspm_packages/
 48 | 
 49 | # Snowpack dependency directory (https://snowpack.dev/)
 50 | web_modules/
 51 | 
 52 | # TypeScript cache
 53 | *.tsbuildinfo
 54 | 
 55 | # Optional npm cache directory
 56 | .npm
 57 | 
 58 | # Optional eslint cache
 59 | .eslintcache
 60 | 
 61 | # Optional stylelint cache
 62 | .stylelintcache
 63 | 
 64 | # Microbundle cache
 65 | .rpt2_cache/
 66 | .rts2_cache_cjs/
 67 | .rts2_cache_es/
 68 | .rts2_cache_umd/
 69 | 
 70 | # Optional REPL history
 71 | .node_repl_history
 72 | 
 73 | # Output of 'npm pack'
 74 | *.tgz
 75 | 
 76 | # Yarn Integrity file
 77 | .yarn-integrity
 78 | 
 79 | # dotenv environment variable files
 80 | .env
 81 | .env.development.local
 82 | .env.test.local
 83 | .env.production.local
 84 | .env.local
 85 | 
 86 | # parcel-bundler cache (https://parceljs.org/)
 87 | .cache
 88 | .parcel-cache
 89 | 
 90 | # Next.js build output
 91 | .next
 92 | out
 93 | 
 94 | # Nuxt.js build / generate output
 95 | .nuxt
 96 | dist
 97 | 
 98 | # Gatsby files
 99 | .cache/
100 | # Comment in the public line in if your project uses Gatsby and not Next.js
101 | # https://nextjs.org/blog/next-9-1#public-directory-support
102 | # public
103 | 
104 | # vuepress build output
105 | .vuepress/dist
106 | 
107 | # vuepress v2.x temp and cache directory
108 | .temp
109 | 
110 | # Docusaurus cache and generated files
111 | .docusaurus
112 | 
113 | # Serverless directories
114 | .serverless/
115 | 
116 | # FuseBox cache
117 | .fusebox/
118 | 
119 | # DynamoDB Local files
120 | .dynamodb/
121 | 
122 | # TernJS port file
123 | .tern-port
124 | 
125 | # Stores VSCode versions used for testing VSCode extensions
126 | .vscode-test
127 | 
128 | # yarn v2
129 | .yarn/cache
130 | .yarn/unplugged
131 | .yarn/build-state.yml
132 | .yarn/install-state.gz
133 | .pnp.*
134 | 
135 | ### Node Patch ###
136 | # Serverless Webpack directories
137 | .webpack/
138 | 
139 | # Optional stylelint cache
140 | 
141 | # SvelteKit build / generate output
142 | .svelte-kit
143 | 
144 | # End of https://www.toptal.com/developers/gitignore/api/node


--------------------------------------------------------------------------------
/.env.example:
--------------------------------------------------------------------------------
 1 | # Optional: Configure your Twilio credentials if you want
 2 | # to make test calls using '$ npm run outbound'.
 3 | TWILIO_ACCOUNT_SID=YOUR-ACCOUNT-SID
 4 | TWILIO_AUTH_TOKEN=YOUR-AUTH-TOKEN
 5 | FROM_NUMBER='+12223334444'
 6 | APP_NUMBER='+13334445555'
 7 | YOUR_NUMBER='+14445556666'
 8 | 
 9 | # Your ngrok or server URL
10 | # E.g. 123.ngrok.io or myserver.fly.dev
11 | SERVER='myserver.website.com'
12 | 
13 | # Service API Keys
14 | OPENAI_API_KEY=
15 | DEEPGRAM_API_KEY=
16 | 
17 | # Deepgram voice model, see more options here: https://developers.deepgram.com/docs/tts-models
18 | VOICE_MODEL=aura-asteria-en
19 | 
20 | # Call Recording
21 | # Important: Legal implications of call recording
22 | 
23 | # If you choose to record voice or video calls, you need to comply with certain laws and regulations,
24 | # including those regarding obtaining consent to record (such as California's Invasion of Privacy Act
25 | # and similar laws in other jurisdictions). Additional information on the legal implications of call
26 | # recording can be found in the "Legal Considerations with Recording Voice and Video Communications"
27 | # Help Center article: https://help.twilio.com/articles/360011522553-Legal-Considerations-with-Recording-Voice-and-Video-Communications
28 | 
29 | # Notice: Twilio recommends that you consult with your legal counsel to make sure that you are complying
30 | # with all applicable laws in connection with communications you record or store using Twilio.
31 | RECORDING_ENABLED='false'


--------------------------------------------------------------------------------
/.eslintrc.js:
--------------------------------------------------------------------------------
 1 | module.exports = {
 2 |   'env': {
 3 |     'browser': true,
 4 |     'commonjs': true,
 5 |     'es2021': true
 6 |   },
 7 |   'extends': 'eslint:recommended',
 8 |   'overrides': [
 9 |     {
10 |       'env': {
11 |         'node': true
12 |       },
13 |       'files': [
14 |         '.eslintrc.{js,cjs}'
15 |       ],
16 |       'parserOptions': {
17 |         'sourceType': 'script'
18 |       }
19 |     }
20 |   ],
21 |   'globals' : {
22 |     'expect': 'writeable',
23 |     'test': 'writeable',
24 |     'process': 'readable'
25 |   },
26 |   'parserOptions': {
27 |     'ecmaVersion': 'latest'
28 |   },
29 |   'rules': {
30 |     'indent': [
31 |       'error',
32 |       2
33 |     ],
34 |     'linebreak-style': [
35 |       'error',
36 |       'unix'
37 |     ],
38 |     'quotes': [
39 |       'error',
40 |       'single'
41 |     ],
42 |     'semi': [
43 |       'error',
44 |       'always'
45 |     ]
46 |   }
47 | };
48 | 


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
  1 | # Created by https://www.toptal.com/developers/gitignore/api/node
  2 | # Edit at https://www.toptal.com/developers/gitignore?templates=node
  3 | 
  4 | ### Node ###
  5 | # Logs
  6 | logs
  7 | *.log
  8 | npm-debug.log*
  9 | yarn-debug.log*
 10 | yarn-error.log*
 11 | lerna-debug.log*
 12 | .pnpm-debug.log*
 13 | 
 14 | # Diagnostic reports (https://nodejs.org/api/report.html)
 15 | report.[0-9]*.[0-9]*.[0-9]*.[0-9]*.json
 16 | 
 17 | # Runtime data
 18 | pids
 19 | *.pid
 20 | *.seed
 21 | *.pid.lock
 22 | 
 23 | # Directory for instrumented libs generated by jscoverage/JSCover
 24 | lib-cov
 25 | 
 26 | # Coverage directory used by tools like istanbul
 27 | coverage
 28 | *.lcov
 29 | 
 30 | # nyc test coverage
 31 | .nyc_output
 32 | 
 33 | # Grunt intermediate storage (https://gruntjs.com/creating-plugins#storing-task-files)
 34 | .grunt
 35 | 
 36 | # Bower dependency directory (https://bower.io/)
 37 | bower_components
 38 | 
 39 | # node-waf configuration
 40 | .lock-wscript
 41 | 
 42 | # Compiled binary addons (https://nodejs.org/api/addons.html)
 43 | build/Release
 44 | 
 45 | # Dependency directories
 46 | node_modules/
 47 | jspm_packages/
 48 | 
 49 | # Snowpack dependency directory (https://snowpack.dev/)
 50 | web_modules/
 51 | 
 52 | # TypeScript cache
 53 | *.tsbuildinfo
 54 | 
 55 | # Optional npm cache directory
 56 | .npm
 57 | 
 58 | # Optional eslint cache
 59 | .eslintcache
 60 | 
 61 | # Optional stylelint cache
 62 | .stylelintcache
 63 | 
 64 | # Microbundle cache
 65 | .rpt2_cache/
 66 | .rts2_cache_cjs/
 67 | .rts2_cache_es/
 68 | .rts2_cache_umd/
 69 | 
 70 | # Optional REPL history
 71 | .node_repl_history
 72 | 
 73 | # Output of 'npm pack'
 74 | *.tgz
 75 | 
 76 | # Yarn Integrity file
 77 | .yarn-integrity
 78 | 
 79 | # dotenv environment variable files
 80 | .env
 81 | .env.development.local
 82 | .env.test.local
 83 | .env.production.local
 84 | .env.local
 85 | 
 86 | # parcel-bundler cache (https://parceljs.org/)
 87 | .cache
 88 | .parcel-cache
 89 | 
 90 | # Next.js build output
 91 | .next
 92 | out
 93 | 
 94 | # Nuxt.js build / generate output
 95 | .nuxt
 96 | dist
 97 | 
 98 | # Gatsby files
 99 | .cache/
100 | # Comment in the public line in if your project uses Gatsby and not Next.js
101 | # https://nextjs.org/blog/next-9-1#public-directory-support
102 | # public
103 | 
104 | # vuepress build output
105 | .vuepress/dist
106 | 
107 | # vuepress v2.x temp and cache directory
108 | .temp
109 | 
110 | # Docusaurus cache and generated files
111 | .docusaurus
112 | 
113 | # Serverless directories
114 | .serverless/
115 | 
116 | # FuseBox cache
117 | .fusebox/
118 | 
119 | # DynamoDB Local files
120 | .dynamodb/
121 | 
122 | # TernJS port file
123 | .tern-port
124 | 
125 | # Stores VSCode versions used for testing VSCode extensions
126 | .vscode-test
127 | 
128 | # yarn v2
129 | .yarn/cache
130 | .yarn/unplugged
131 | .yarn/build-state.yml
132 | .yarn/install-state.gz
133 | .pnp.*
134 | 
135 | ### Node Patch ###
136 | # Serverless Webpack directories
137 | .webpack/
138 | 
139 | # Optional stylelint cache
140 | 
141 | # SvelteKit build / generate output
142 | .svelte-kit
143 | 
144 | # Ignore Fly.io configuration file
145 | fly.toml
146 | 
147 | # End of https://www.toptal.com/developers/gitignore/api/node


--------------------------------------------------------------------------------
/Dockerfile:
--------------------------------------------------------------------------------
 1 | # syntax = docker/dockerfile:1
 2 | 
 3 | # Adjust NODE_VERSION as desired
 4 | ARG NODE_VERSION=18.9.0
 5 | FROM node:${NODE_VERSION}-slim as base
 6 | 
 7 | LABEL fly_launch_runtime="Node.js"
 8 | 
 9 | # Node.js app lives here
10 | WORKDIR /app
11 | 
12 | # Set production environment
13 | ENV NODE_ENV="production"
14 | 
15 | 
16 | # Throw-away build stage to reduce size of final image
17 | FROM base as build
18 | 
19 | # Install packages needed to build node modules
20 | RUN apt-get update -qq && \
21 |     apt-get install -y build-essential pkg-config python-is-python3
22 | 
23 | # Install node modules
24 | COPY --link package-lock.json package.json ./
25 | RUN npm ci
26 | 
27 | # Copy application code
28 | COPY --link . .
29 | 
30 | 
31 | # Final stage for app image
32 | FROM base
33 | 
34 | # Copy built application
35 | COPY --from=build /app /app
36 | 
37 | # Start the server by default, this can be overwritten at runtime
38 | EXPOSE 3000
39 | CMD [ "node", "app.js" ]
40 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2023 Craig Dennis
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # Call GPT: Generative AI Phone Calling
  2 | 
  3 | Wouldn't it be neat if you could build an app that allowed you to chat with ChatGPT on the phone?
  4 | 
  5 | Twilio gives you a superpower called [Media Streams](https://twilio.com/media-streams). Media Streams provides a Websocket connection to both sides of a phone call. You can get audio streamed to you, process it, and send audio back.
  6 | 
  7 | This app serves as a demo exploring two services:
  8 | - [Deepgram](https://deepgram.com/) for Speech to Text and Text to Speech
  9 | - [OpenAI](https://openai.com) for GPT prompt completion
 10 | 
 11 | These service combine to create a voice application that is remarkably better at transcribing, understanding, and speaking than traditional IVR systems.
 12 | 
 13 | Features:
 14 | - 🏁 Returns responses with low latency, typically 1 second by utilizing streaming.
 15 | - ❗️ Allows the user to interrupt the GPT assistant and ask a different question.
 16 | - 📔 Maintains chat history with GPT.
 17 | - 🛠️ Allows the GPT to call external tools.
 18 | 
 19 | ## Setting up for Development
 20 | 
 21 | ### Prerequisites
 22 | Sign up for the following services and get an API key for each:
 23 | - [Deepgram](https://console.deepgram.com/signup)
 24 | - [OpenAI](https://platform.openai.com/signup)
 25 | 
 26 | If you're hosting the app locally, we also recommend using a tunneling service like [ngrok](https://ngrok.com) so that Twilio can forward audio to your app.
 27 | 
 28 | ### 1. Start Ngrok
 29 | Start an [ngrok](https://ngrok.com) tunnel for port `3000`:
 30 | 
 31 | ```bash
 32 | ngrok http 3000
 33 | ```
 34 | Ngrok will give you a unique URL, like `abc123.ngrok.io`. Copy the URL without http:// or https://. You'll need this URL in the next step.
 35 | 
 36 | ### 2. Configure Environment Variables
 37 | Copy `.env.example` to `.env` and configure the following environment variables:
 38 | 
 39 | ```bash
 40 | # Your ngrok or server URL
 41 | # E.g. 123.ngrok.io or myserver.fly.dev (exlude https://)
 42 | SERVER="yourserverdomain.com"
 43 | 
 44 | # Service API Keys
 45 | OPENAI_API_KEY="sk-XXXXXX"
 46 | DEEPGRAM_API_KEY="YOUR-DEEPGRAM-API-KEY"
 47 | 
 48 | # Configure your Twilio credentials if you want
 49 | # to make test calls using '$ npm test'.
 50 | TWILIO_ACCOUNT_SID="YOUR-ACCOUNT-SID"
 51 | TWILIO_AUTH_TOKEN="YOUR-AUTH-TOKEN"
 52 | FROM_NUMBER='+12223334444'
 53 | TO_NUMBER='+13334445555'
 54 | ```
 55 | 
 56 | ### 3. Install Dependencies with NPM
 57 | Install the necessary packages:
 58 | 
 59 | ```bash
 60 | npm install
 61 | ```
 62 | 
 63 | ### 4. Start Your Server in Development Mode
 64 | Run the following command:
 65 | ```bash
 66 | npm run dev
 67 | ```
 68 | This will start your app using `nodemon` so that any changes to your code automatically refreshes and restarts the server.
 69 | 
 70 | ### 5. Configure an Incoming Phone Number
 71 | 
 72 | Connect a phone number using the [Twilio Console](https://console.twilio.com/us1/develop/phone-numbers/manage/incoming).
 73 | 
 74 | You can also use the Twilio CLI:
 75 | 
 76 | ```bash
 77 | twilio phone-numbers:update +1[your-twilio-number] --voice-url=https://your-server.ngrok.io/incoming
 78 | ```
 79 | This configuration tells Twilio to send incoming call audio to your app when someone calls your number. The app responds to the incoming call webhook with a [Stream](https://www.twilio.com/docs/voice/twiml/stream) TwiML verb that will connect an audio media stream to your websocket server.
 80 | 
 81 | ## Application Workflow
 82 | CallGPT coordinates the data flow between multiple different services including Deepgram, OpenAI, and Twilio Media Streams:
 83 | ![Call GPT Flow](https://github.com/twilio-labs/call-gpt/assets/1418949/0b7fcc0b-d5e5-4527-bc4c-2ffb8931139c)
 84 | 
 85 | 
 86 | ## Modifying the ChatGPT Context & Prompt
 87 | Within `gpt-service.js` you'll find the settings for the GPT's initial context and prompt. For example:
 88 | 
 89 | ```javascript
 90 | this.userContext = [
 91 |   { "role": "system", "content": "You are an outbound sales representative selling Apple Airpods. You have a youthful and cheery personality. Keep your responses as brief as possible but make every attempt to keep the caller on the phone without being rude. Don't ask more than 1 question at a time. Don't make assumptions about what values to plug into functions. Ask for clarification if a user request is ambiguous. Speak out all prices to include the currency. Please help them decide between the airpods, airpods pro and airpods max by asking questions like 'Do you prefer headphones that go in your ear or over the ear?'. If they are trying to choose between the airpods and airpods pro try asking them if they need noise canceling. Once you know which model they would like ask them how many they would like to purchase and try to get them to place an order. Add a '•' symbol every 5 to 10 words at natural pauses where your response can be split for text to speech." },
 92 |   { "role": "assistant", "content": "Hello! I understand you're looking for a pair of AirPods, is that correct?" },
 93 | ],
 94 | ```
 95 | ### About the `system` Attribute
 96 | The `system` attribute is background information for the GPT. As you build your use-case, play around with modifying the context. A good starting point would be to imagine training a new employee on their first day and giving them the basics of how to help a customer.
 97 | 
 98 | There are some context prompts that will likely be helpful to include by default. For example:
 99 | 
100 | - You have a [cheerful, wise, empathetic, etc.] personality.
101 | - Keep your responses as brief as possible but make every attempt to keep the caller on the phone without being rude.
102 | - Don't ask more than 1 question at a time.
103 | - Don't make assumptions about what values to plug into functions.
104 | - Ask for clarification if a user request is ambiguous.
105 | - Add a '•' symbol every 5 to 10 words at natural pauses where your response can be split for text to speech.
106 | 
107 | These context items help shape a GPT so that it will act more naturally in a phone conversation.
108 | 
109 | The `•` symbol context in particular is helpful for the app to be able to break sentences into natural chunks. This speeds up text-to-speech processing so that users hear audio faster.
110 | 
111 | ### About the `content` Attribute
112 | This attribute is your default conversations starter for the GPT. However, you could consider making it more complex and customized based on personalized user data.
113 | 
114 | In this case, our bot will start off by saying, "Hello! I understand you're looking for a pair of AirPods, is that correct?"
115 | 
116 | ## Using Function Calls with GPT
117 | You can use function calls to interact with external APIs and data sources. For example, your GPT could check live inventory, check an item's price, or place an order.
118 | 
119 | ### How Function Calling Works
120 | Function calling is handled within the `gpt-service.js` file in the following sequence:
121 | 
122 | 1. `gpt-service` loads `function-manifest.js` and requires (imports) all functions defined there from the `functions` directory. Our app will call these functions later when GPT gives us a function name and parameters.
123 | ```javascript
124 | tools.forEach((tool) => {
125 |   const functionName = tool.function.name;
126 |   availableFunctions[functionName] = require(`../functions/${functionName}`);
127 | });
128 | ```
129 | 
130 | 2. When we call GPT for completions, we also pass in the same `function-manifest` JSON as the tools parameter. This allows the GPT to "know" what functions are available:
131 | 
132 | ```javascript
133 | const stream = await this.openai.chat.completions.create({
134 |   model: 'gpt-4',
135 |   messages: this.userContext,
136 |   tools, // <-- function-manifest definition
137 |   stream: true,
138 | });
139 | ```
140 | 3. When the GPT responds, it will send us a stream of chunks for the text completion. The GPT will tell us whether each text chunk is something to say to the user, or if it's a tool call that our app needs to execute.  This is indicated by the `deltas.tool_calls` key:
141 | ```javascript
142 | if (deltas.tool_calls) {
143 |   // handle function calling
144 | }
145 | ```
146 | 4. Once we have gathered all of the stream chunks about the tool call, our application can run the actual function code that we imported during the first step. The function name and parameters are provided by GPT:
147 | ```javascript
148 | const functionToCall = availableFunctions[functionName];
149 | const functionResponse = functionToCall(functionArgs);
150 | ```
151 | 5. As the final step, we add the function response data into the conversation context like this:
152 | 
153 | ```javascript
154 | this.userContext.push({
155 |   role: 'function',
156 |   name: functionName,
157 |   content: functionResponse,
158 | });
159 | ```
160 | We then ask the GPT to generate another completion including what it knows from the function call. This allows the GPT to respond to the user with details gathered from the external data source.
161 | 
162 | ### Adding Custom Function Calls
163 | You can have your GPT call external data sources by adding functions to the `/functions` directory. Follow these steps:
164 | 
165 | 1. Create a function (e.g. `checkInventory.js` in `/functions`)
166 | 1. Within `checkInventory.js`, write a function called `checkInventory`.
167 | 1. Add information about your function to the `function-manifest.js` file. This information provides context to GPT about what arguments the function takes.
168 | 
169 | **Important:** Your function's name must be the same as the file name that contains the function (excluding the .js extension). For example, our function is called `checkInventory` so we have named the the file `checkInventory.js`, and set the `name` attribute in `function-manifest.js` to be `checkInventory`.
170 | 
171 | Example function manifest entry:
172 | 
173 | ```javascript
174 | {
175 |   type: "function",
176 |   function: {
177 |     name: "checkInventory",
178 |     say: "Let me check our inventory right now.",
179 |     description: "Check the inventory of airpods, airpods pro or airpods max.",
180 |     parameters: {
181 |       type: "object",
182 |       properties: {
183 |         model: {
184 |           type: "string",
185 |           "enum": ["airpods", "airpods pro", "airpods max"],
186 |           description: "The model of airpods, either the airpods, airpods pro or airpods max",
187 |         },
188 |       },
189 |       required: ["model"],
190 |     },
191 |     returns: {
192 |       type: "object",
193 |       properties: {
194 |         stock: {
195 |           type: "integer",
196 |           description: "An integer containing how many of the model are in currently in stock."
197 |         }
198 |       }
199 |     }
200 |   },
201 | }
202 | ```
203 | #### Using `say` in the Function Manifest
204 | The `say` key in the function manifest allows you to define a sentence for the app to speak to the user before calling a function. For example, if a function will take a long time to call you might say "Give me a few moments to look that up for you..."
205 | 
206 | ### Receiving Function Arguments
207 | When ChatGPT calls a function, it will provide an object with multiple attributes as a single argument. The parameters included in the object are based on the definition in your `function-manifest.js` file.
208 | 
209 | In the `checkInventory` example above, `model` is a required argument, so the data passed to the function will be a single object like this:
210 | 
211 | ```javascript
212 | {
213 |   model: "airpods pro"
214 | }
215 | ```
216 | For our `placeOrder` function, the arguments passed will look like this:
217 | 
218 | ```javascript
219 | {
220 |   model: "airpods pro",
221 |   quantity: 10
222 | }
223 | ```
224 | ### Returning Arguments to GPT
225 | Your function should always return a value: GPT will get confused when the function returns nothing, and may continue trying to call the function expecting an answer. If your function doesn't have any data to return to the GPT, you should still return a response with an instruction like "Tell the user that their request was processed successfully." This prevents the GPT from calling the function repeatedly and wasting tokens. 
226 | 
227 | Any data that you return to the GPT should match the expected format listed in the `returns` key of `function-manifest.js`.
228 | 
229 | ## Utility Scripts for Placing Calls
230 | The `scripts` directory contains two files that allow you to place test calls:
231 | - `npm run inbound` will place an automated call from a Twilio number to your app and speak a script. You can adjust this to your use-case, e.g. as an automated test.
232 | - `npm run outbound` will place an outbound call that connects to your app. This can be useful if you want the app to call your phone so that you can manually test it.
233 | 
234 | ## Using Eleven Labs for Text to Speech
235 | Replace the Deepgram API call and array transformation in tts-service.js with the following call to Eleven Labs. Note that sometimes Eleven Labs will hit a rate limit (especially on the free trial) and return 400 errors with no audio (or a clicking sound).
236 | 
237 | ```
238 | try {
239 |   const response = await fetch(
240 |     `https://api.elevenlabs.io/v1/text-to-speech/21m00Tcm4TlvDq8ikWAM/stream?output_format=ulaw_8000&optimize_streaming_latency=3`,
241 |     {
242 |       method: 'POST',
243 |       headers: {
244 |         'xi-api-key': process.env.XI_API_KEY,
245 |         'Content-Type': 'application/json',
246 |         accept: 'audio/wav',
247 |       },
248 |       body: JSON.stringify({
249 |         model_id: process.env.XI_MODEL_ID,
250 |         text: partialResponse,
251 |       }),
252 |     }
253 |   );
254 |   
255 |   if (response.status === 200) {
256 |     const audioArrayBuffer = await response.arrayBuffer();
257 |     this.emit('speech', partialResponseIndex, Buffer.from(audioArrayBuffer).toString('base64'), partialResponse, interactionCount);
258 |   } else {
259 |     console.log('Eleven Labs Error:');
260 |     console.log(response);
261 |   }
262 | } catch (err) {
263 |   console.error('Error occurred in XI LabsTextToSpeech service');
264 |   console.error(err);
265 | }
266 | ```
267 | 
268 | 
269 | ## Testing with Jest
270 | Repeatedly calling the app can be a time consuming way to test your tool function calls. This project contains example unit tests that can help you test your functions without relying on the GPT to call them.
271 | 
272 | Simple example tests are available in the `/test` directory. To run them, simply run `npm run test`.
273 | 
274 | ## Deploy via Fly.io
275 | Fly.io is a hosting service similar to Heroku that simplifies the deployment process. Given Twilio Media Streams are sent and received from us-east-1, it's recommended to choose Fly's Ashburn, VA (IAD) region.
276 | 
277 | > Deploying to Fly.io is not required to try the app, but can be helpful if your home internet speed is variable.
278 | 
279 | Modify the app name `fly.toml` to be a unique value (this must be globally unique).
280 | 
281 | Deploy the app using the Fly.io CLI:
282 | ```bash
283 | fly launch
284 | 
285 | fly deploy
286 | ```
287 | 
288 | Import your secrets from your .env file to your deployed app:
289 | ```bash
290 | fly secrets import < .env
291 | ```
292 | 


--------------------------------------------------------------------------------
/app.js:
--------------------------------------------------------------------------------
  1 | require('dotenv').config();
  2 | require('colors');
  3 | 
  4 | const express = require('express');
  5 | const ExpressWs = require('express-ws');
  6 | 
  7 | const { GptService } = require('./services/gpt-service');
  8 | const { StreamService } = require('./services/stream-service');
  9 | const { TranscriptionService } = require('./services/transcription-service');
 10 | const { TextToSpeechService } = require('./services/tts-service');
 11 | const { recordingService } = require('./services/recording-service');
 12 | 
 13 | const VoiceResponse = require('twilio').twiml.VoiceResponse;
 14 | 
 15 | const app = express();
 16 | ExpressWs(app);
 17 | 
 18 | const PORT = process.env.PORT || 3000;
 19 | 
 20 | app.post('/incoming', (req, res) => {
 21 |   try {
 22 |     const response = new VoiceResponse();
 23 |     const connect = response.connect();
 24 |     connect.stream({ url: `wss://${process.env.SERVER}/connection` });
 25 |   
 26 |     res.type('text/xml');
 27 |     res.end(response.toString());
 28 |   } catch (err) {
 29 |     console.log(err);
 30 |   }
 31 | });
 32 | 
 33 | app.ws('/connection', (ws) => {
 34 |   try {
 35 |     ws.on('error', console.error);
 36 |     // Filled in from start message
 37 |     let streamSid;
 38 |     let callSid;
 39 | 
 40 |     const gptService = new GptService();
 41 |     const streamService = new StreamService(ws);
 42 |     const transcriptionService = new TranscriptionService();
 43 |     const ttsService = new TextToSpeechService({});
 44 |   
 45 |     let marks = [];
 46 |     let interactionCount = 0;
 47 |   
 48 |     // Incoming from MediaStream
 49 |     ws.on('message', function message(data) {
 50 |       const msg = JSON.parse(data);
 51 |       if (msg.event === 'start') {
 52 |         streamSid = msg.start.streamSid;
 53 |         callSid = msg.start.callSid;
 54 |         
 55 |         streamService.setStreamSid(streamSid);
 56 |         gptService.setCallSid(callSid);
 57 | 
 58 |         // Set RECORDING_ENABLED='true' in .env to record calls
 59 |         recordingService(ttsService, callSid).then(() => {
 60 |           console.log(`Twilio -> Starting Media Stream for ${streamSid}`.underline.red);
 61 |           ttsService.generate({partialResponseIndex: null, partialResponse: 'Hello! I understand you\'re looking for a pair of AirPods, is that correct?'}, 0);
 62 |         });
 63 |       } else if (msg.event === 'media') {
 64 |         transcriptionService.send(msg.media.payload);
 65 |       } else if (msg.event === 'mark') {
 66 |         const label = msg.mark.name;
 67 |         console.log(`Twilio -> Audio completed mark (${msg.sequenceNumber}): ${label}`.red);
 68 |         marks = marks.filter(m => m !== msg.mark.name);
 69 |       } else if (msg.event === 'stop') {
 70 |         console.log(`Twilio -> Media stream ${streamSid} ended.`.underline.red);
 71 |       }
 72 |     });
 73 |   
 74 |     transcriptionService.on('utterance', async (text) => {
 75 |       // This is a bit of a hack to filter out empty utterances
 76 |       if(marks.length > 0 && text?.length > 5) {
 77 |         console.log('Twilio -> Interruption, Clearing stream'.red);
 78 |         ws.send(
 79 |           JSON.stringify({
 80 |             streamSid,
 81 |             event: 'clear',
 82 |           })
 83 |         );
 84 |       }
 85 |     });
 86 |   
 87 |     transcriptionService.on('transcription', async (text) => {
 88 |       if (!text) { return; }
 89 |       console.log(`Interaction ${interactionCount} – STT -> GPT: ${text}`.yellow);
 90 |       gptService.completion(text, interactionCount);
 91 |       interactionCount += 1;
 92 |     });
 93 |     
 94 |     gptService.on('gptreply', async (gptReply, icount) => {
 95 |       console.log(`Interaction ${icount}: GPT -> TTS: ${gptReply.partialResponse}`.green );
 96 |       ttsService.generate(gptReply, icount);
 97 |     });
 98 |   
 99 |     ttsService.on('speech', (responseIndex, audio, label, icount) => {
100 |       console.log(`Interaction ${icount}: TTS -> TWILIO: ${label}`.blue);
101 |   
102 |       streamService.buffer(responseIndex, audio);
103 |     });
104 |   
105 |     streamService.on('audiosent', (markLabel) => {
106 |       marks.push(markLabel);
107 |     });
108 |   } catch (err) {
109 |     console.log(err);
110 |   }
111 | });
112 | 
113 | app.listen(PORT);
114 | console.log(`Server running on port ${PORT}`);
115 | 


--------------------------------------------------------------------------------
/fly.toml.example:
--------------------------------------------------------------------------------
 1 | # fly.toml app configuration file generated for cweems-genai-phone-call on 2024-03-01T14:37:33-08:00
 2 | #
 3 | # See https://fly.io/docs/reference/configuration/ for information about how to use this file.
 4 | #
 5 | 
 6 | app = ''
 7 | 
 8 | # Recommend IAD for proximity to Twilio's Media Stream Servers.
 9 | primary_region = 'iad'
10 | 
11 | [build]
12 | 
13 | [http_service]
14 |   internal_port = 3000
15 |   force_https = true
16 |   auto_stop_machines = true
17 |   auto_start_machines = true
18 | 
19 |   # Recommend keeping a machine running to reduce spin-up time
20 |   # note this will increase hosting costs.
21 |   min_machines_running = 1
22 |   processes = ['app']
23 | 
24 | [[vm]]
25 |   memory = '1gb'
26 |   cpu_kind = 'shared'
27 |   cpus = 1
28 | 


--------------------------------------------------------------------------------
/functions/checkInventory.js:
--------------------------------------------------------------------------------
 1 | async function checkInventory(functionArgs) {
 2 |   const model = functionArgs.model;
 3 |   console.log('GPT -> called checkInventory function');
 4 |   
 5 |   if (model?.toLowerCase().includes('pro')) {
 6 |     return JSON.stringify({ stock: 10 });
 7 |   } else if (model?.toLowerCase().includes('max')) {
 8 |     return JSON.stringify({ stock: 0 });
 9 |   } else {
10 |     return JSON.stringify({ stock: 100 });
11 |   }
12 | }
13 | 
14 | module.exports = checkInventory;


--------------------------------------------------------------------------------
/functions/checkPrice.js:
--------------------------------------------------------------------------------
 1 | async function checkPrice(functionArgs) {
 2 |   let model = functionArgs.model;
 3 |   console.log('GPT -> called checkPrice function');
 4 |   if (model?.toLowerCase().includes('pro')) {
 5 |     return JSON.stringify({ price: 249 });
 6 |   } else if (model?.toLowerCase().includes('max')) {
 7 |     return JSON.stringify({ price: 549 });
 8 |   } else {
 9 |     return JSON.stringify({ price: 149 });
10 |   }
11 | }
12 | 
13 | module.exports = checkPrice;


--------------------------------------------------------------------------------
/functions/function-manifest.js:
--------------------------------------------------------------------------------
  1 | // create metadata for all the available functions to pass to completions API
  2 | const tools = [
  3 |   {
  4 |     type: 'function',
  5 |     function: {
  6 |       name: 'checkInventory',
  7 |       say: 'Let me check our inventory right now.',
  8 |       description: 'Check the inventory of airpods, airpods pro or airpods max.',
  9 |       parameters: {
 10 |         type: 'object',
 11 |         properties: {
 12 |           model: {
 13 |             type: 'string',
 14 |             'enum': ['airpods', 'airpods pro', 'airpods max'],
 15 |             description: 'The model of airpods, either the airpods, airpods pro or airpods max',
 16 |           },
 17 |         },
 18 |         required: ['model'],
 19 |       },
 20 |       returns: {
 21 |         type: 'object',
 22 |         properties: {
 23 |           stock: {
 24 |             type: 'integer',
 25 |             description: 'An integer containing how many of the model are in currently in stock.'
 26 |           }
 27 |         }
 28 |       }
 29 |     },
 30 |   },
 31 |   {
 32 |     type: 'function',
 33 |     function: {
 34 |       name: 'checkPrice',
 35 |       say: 'Let me check the price, one moment.',
 36 |       description: 'Check the price of given model of airpods, airpods pro or airpods max.',
 37 |       parameters: {
 38 |         type: 'object',
 39 |         properties: {
 40 |           model: {
 41 |             type: 'string',
 42 |             'enum': ['airpods', 'airpods pro', 'airpods max'],
 43 |             description: 'The model of airpods, either the airpods, airpods pro or airpods max',
 44 |           },
 45 |         },
 46 |         required: ['model'],
 47 |       },
 48 |       returns: {
 49 |         type: 'object',
 50 |         properties: {
 51 |           price: {
 52 |             type: 'integer',
 53 |             description: 'the price of the model'
 54 |           }
 55 |         }
 56 |       }
 57 |     },
 58 |   },
 59 |   {
 60 |     type: 'function',
 61 |     function: {
 62 |       name: 'placeOrder',
 63 |       say: 'All right, I\'m just going to ring that up in our system.',
 64 |       description: 'Places an order for a set of airpods.',
 65 |       parameters: {
 66 |         type: 'object',
 67 |         properties: {
 68 |           model: {
 69 |             type: 'string',
 70 |             'enum': ['airpods', 'airpods pro'],
 71 |             description: 'The model of airpods, either the regular or pro',
 72 |           },
 73 |           quantity: {
 74 |             type: 'integer',
 75 |             description: 'The number of airpods they want to order',
 76 |           },
 77 |         },
 78 |         required: ['type', 'quantity'],
 79 |       },
 80 |       returns: {
 81 |         type: 'object',
 82 |         properties: {
 83 |           price: {
 84 |             type: 'integer',
 85 |             description: 'The total price of the order including tax'
 86 |           },
 87 |           orderNumber: {
 88 |             type: 'integer',
 89 |             description: 'The order number associated with the order.'
 90 |           }
 91 |         }
 92 |       }
 93 |     },
 94 |   },
 95 |   {
 96 |     type: 'function',
 97 |     function: {
 98 |       name: 'transferCall',
 99 |       say: 'One moment while I transfer your call.',
100 |       description: 'Transfers the customer to a live agent in case they request help from a real person.',
101 |       parameters: {
102 |         type: 'object',
103 |         properties: {
104 |           callSid: {
105 |             type: 'string',
106 |             description: 'The unique identifier for the active phone call.',
107 |           },
108 |         },
109 |         required: ['callSid'],
110 |       },
111 |       returns: {
112 |         type: 'object',
113 |         properties: {
114 |           status: {
115 |             type: 'string',
116 |             description: 'Whether or not the customer call was successfully transfered'
117 |           },
118 |         }
119 |       }
120 |     },
121 |   },
122 | ];
123 | 
124 | module.exports = tools;


--------------------------------------------------------------------------------
/functions/placeOrder.js:
--------------------------------------------------------------------------------
 1 | async function placeOrder(functionArgs) {
 2 |   const {model, quantity} = functionArgs;
 3 |   console.log('GPT -> called placeOrder function');
 4 |   
 5 |   // generate a random order number that is 7 digits 
 6 |   const orderNum = Math.floor(Math.random() * (9999999 - 1000000 + 1) + 1000000);
 7 | 
 8 |   // check model and return the order number and price with 7.9% sales tax
 9 |   if (model?.toLowerCase().includes('pro')) {
10 |     return JSON.stringify({ orderNumber: orderNum, price: Math.floor(quantity * 249 * 1.079)});
11 |   } else if (model?.toLowerCase().includes('max')) {
12 |     return JSON.stringify({ orderNumber: orderNum, price: Math.floor(quantity * 549 * 1.079) });
13 |   }
14 |   return JSON.stringify({ orderNumber: orderNum, price: Math.floor(quantity * 179 * 1.079) });
15 | }
16 | 
17 | module.exports = placeOrder;


--------------------------------------------------------------------------------
/functions/transferCall.js:
--------------------------------------------------------------------------------
 1 | require('dotenv').config();
 2 | 
 3 | const transferCall = async function (call) {
 4 | 
 5 |   console.log('Transferring call', call.callSid);
 6 |   const accountSid = process.env.TWILIO_ACCOUNT_SID;
 7 |   const authToken = process.env.TWILIO_AUTH_TOKEN;
 8 |   const client = require('twilio')(accountSid, authToken);
 9 | 
10 |   return await client.calls(call.callSid)
11 |     .update({twiml: `<Response><Dial>${process.env.TRANSFER_NUMBER}</Dial></Response>`})
12 |     .then(() => {
13 |       return 'The call was transferred successfully, say goodbye to the customer.';
14 |     })
15 |     .catch(() => {
16 |       return 'The call was not transferred successfully, advise customer to call back later.';
17 |     });
18 | };
19 | 
20 | module.exports = transferCall;


--------------------------------------------------------------------------------
/package.json:
--------------------------------------------------------------------------------
 1 | {
 2 |   "name": "genai-phone",
 3 |   "version": "1.1.0",
 4 |   "description": "",
 5 |   "main": "index.js",
 6 |   "scripts": {
 7 |     "inbound": "node ./scripts/inbound-call.js",
 8 |     "outbound": "node ./scripts/outbound-call.js",
 9 |     "test": "jest",
10 |     "dev": "nodemon app.js",
11 |     "start": "node app.js"
12 |   },
13 |   "keywords": [],
14 |   "author": "Charlie Weems",
15 |   "license": "MIT",
16 |   "dependencies": {
17 |     "@deepgram/sdk": "^3.3.4",
18 |     "colors": "^1.4.0",
19 |     "dotenv": "^16.3.1",
20 |     "express": "^4.19.2",
21 |     "express-ws": "^5.0.2",
22 |     "node-fetch": "^2.7.0",
23 |     "openai": "^4.20.1",
24 |     "twilio": "^4.19.3",
25 |     "uuid": "^9.0.1",
26 |     "wavefile": "^11.0.0"
27 |   },
28 |   "devDependencies": {
29 |     "@flydotio/dockerfile": "^0.4.11",
30 |     "eslint": "^8.57.0",
31 |     "jest": "^29.7.0",
32 |     "nodemon": "^3.0.2"
33 |   }
34 | }
35 | 


--------------------------------------------------------------------------------
/scripts/inbound-call.js:
--------------------------------------------------------------------------------
 1 | require('dotenv').config();
 2 | 
 3 | // You can use this function to make a
 4 | // test call to your application by running
 5 | // npm inbound
 6 | async function makeInboundCall() {
 7 |   const VoiceResponse = require('twilio').twiml.VoiceResponse;
 8 |   const accountSid = process.env.TWILIO_ACCOUNT_SID;
 9 |   const authToken = process.env.TWILIO_AUTH_TOKEN;
10 |   
11 |   const client = require('twilio')(accountSid, authToken);
12 |   
13 |   let twiml = new VoiceResponse();
14 |   twiml.pause({ length: 10 });
15 |   twiml.say('Which models of airpods do you have available right now?');
16 |   twiml.pause({ length: 30 });
17 |   twiml.hangup();
18 | 
19 |   console.log(twiml.toString());
20 |   
21 |   await client.calls
22 |     .create({
23 |       twiml: twiml.toString(),
24 |       to: process.env.APP_NUMBER,
25 |       from: process.env.FROM_NUMBER
26 |     })
27 |     .then(call => console.log(call.sid));
28 | }  
29 | 
30 | makeInboundCall();


--------------------------------------------------------------------------------
/scripts/outbound-call.js:
--------------------------------------------------------------------------------
 1 | /*
 2 |   You can use this script to place an outbound call
 3 |   to your own mobile phone.
 4 | */
 5 | 
 6 | require('dotenv').config();
 7 | 
 8 | async function makeOutBoundCall() {
 9 |   const accountSid = process.env.TWILIO_ACCOUNT_SID;
10 |   const authToken = process.env.TWILIO_AUTH_TOKEN;
11 |   
12 |   const client = require('twilio')(accountSid, authToken);
13 | 
14 |   await client.calls
15 |     .create({
16 |       url: `https://${process.env.SERVER}/incoming`,
17 |       to: process.env.YOUR_NUMBER,
18 |       from: process.env.FROM_NUMBER
19 |     })
20 |     .then(call => console.log(call.sid));
21 | }
22 | 
23 | makeOutBoundCall();


--------------------------------------------------------------------------------
/services/gpt-service.js:
--------------------------------------------------------------------------------
  1 | require('colors');
  2 | const EventEmitter = require('events');
  3 | const OpenAI = require('openai');
  4 | const tools = require('../functions/function-manifest');
  5 | 
  6 | // Import all functions included in function manifest
  7 | // Note: the function name and file name must be the same
  8 | const availableFunctions = {};
  9 | tools.forEach((tool) => {
 10 |   let functionName = tool.function.name;
 11 |   availableFunctions[functionName] = require(`../functions/${functionName}`);
 12 | });
 13 | 
 14 | class GptService extends EventEmitter {
 15 |   constructor() {
 16 |     super();
 17 |     this.openai = new OpenAI();
 18 |     this.userContext = [
 19 |       { 'role': 'system', 'content': 'You are an outbound sales representative selling Apple Airpods. You have a youthful and cheery personality. Keep your responses as brief as possible but make every attempt to keep the caller on the phone without being rude. Don\'t ask more than 1 question at a time. Don\'t make assumptions about what values to plug into functions. Ask for clarification if a user request is ambiguous. Speak out all prices to include the currency. Please help them decide between the airpods, airpods pro and airpods max by asking questions like \'Do you prefer headphones that go in your ear or over the ear?\'. If they are trying to choose between the airpods and airpods pro try asking them if they need noise canceling. Once you know which model they would like ask them how many they would like to purchase and try to get them to place an order. You must add a \'•\' symbol every 5 to 10 words at natural pauses where your response can be split for text to speech.' },
 20 |       { 'role': 'assistant', 'content': 'Hello! I understand you\'re looking for a pair of AirPods, is that correct?' },
 21 |     ],
 22 |     this.partialResponseIndex = 0;
 23 |   }
 24 | 
 25 |   // Add the callSid to the chat context in case
 26 |   // ChatGPT decides to transfer the call.
 27 |   setCallSid (callSid) {
 28 |     this.userContext.push({ 'role': 'system', 'content': `callSid: ${callSid}` });
 29 |   }
 30 | 
 31 |   validateFunctionArgs (args) {
 32 |     try {
 33 |       return JSON.parse(args);
 34 |     } catch (error) {
 35 |       console.log('Warning: Double function arguments returned by OpenAI:', args);
 36 |       // Seeing an error where sometimes we have two sets of args
 37 |       if (args.indexOf('{') != args.lastIndexOf('{')) {
 38 |         return JSON.parse(args.substring(args.indexOf(''), args.indexOf('}') + 1));
 39 |       }
 40 |     }
 41 |   }
 42 | 
 43 |   updateUserContext(name, role, text) {
 44 |     if (name !== 'user') {
 45 |       this.userContext.push({ 'role': role, 'name': name, 'content': text });
 46 |     } else {
 47 |       this.userContext.push({ 'role': role, 'content': text });
 48 |     }
 49 |   }
 50 | 
 51 |   async completion(text, interactionCount, role = 'user', name = 'user') {
 52 |     this.updateUserContext(name, role, text);
 53 | 
 54 |     // Step 1: Send user transcription to Chat GPT
 55 |     const stream = await this.openai.chat.completions.create({
 56 |       model: 'gpt-4-1106-preview',
 57 |       messages: this.userContext,
 58 |       tools: tools,
 59 |       stream: true,
 60 |     });
 61 | 
 62 |     let completeResponse = '';
 63 |     let partialResponse = '';
 64 |     let functionName = '';
 65 |     let functionArgs = '';
 66 |     let finishReason = '';
 67 | 
 68 |     function collectToolInformation(deltas) {
 69 |       let name = deltas.tool_calls[0]?.function?.name || '';
 70 |       if (name != '') {
 71 |         functionName = name;
 72 |       }
 73 |       let args = deltas.tool_calls[0]?.function?.arguments || '';
 74 |       if (args != '') {
 75 |         // args are streamed as JSON string so we need to concatenate all chunks
 76 |         functionArgs += args;
 77 |       }
 78 |     }
 79 | 
 80 |     for await (const chunk of stream) {
 81 |       let content = chunk.choices[0]?.delta?.content || '';
 82 |       let deltas = chunk.choices[0].delta;
 83 |       finishReason = chunk.choices[0].finish_reason;
 84 | 
 85 |       // Step 2: check if GPT wanted to call a function
 86 |       if (deltas.tool_calls) {
 87 |         // Step 3: Collect the tokens containing function data
 88 |         collectToolInformation(deltas);
 89 |       }
 90 | 
 91 |       // need to call function on behalf of Chat GPT with the arguments it parsed from the conversation
 92 |       if (finishReason === 'tool_calls') {
 93 |         // parse JSON string of args into JSON object
 94 | 
 95 |         const functionToCall = availableFunctions[functionName];
 96 |         const validatedArgs = this.validateFunctionArgs(functionArgs);
 97 |         
 98 |         // Say a pre-configured message from the function manifest
 99 |         // before running the function.
100 |         const toolData = tools.find(tool => tool.function.name === functionName);
101 |         const say = toolData.function.say;
102 | 
103 |         this.emit('gptreply', {
104 |           partialResponseIndex: null,
105 |           partialResponse: say
106 |         }, interactionCount);
107 | 
108 |         let functionResponse = await functionToCall(validatedArgs);
109 | 
110 |         // Step 4: send the info on the function call and function response to GPT
111 |         this.updateUserContext(functionName, 'function', functionResponse);
112 |         
113 |         // call the completion function again but pass in the function response to have OpenAI generate a new assistant response
114 |         await this.completion(functionResponse, interactionCount, 'function', functionName);
115 |       } else {
116 |         // We use completeResponse for userContext
117 |         completeResponse += content;
118 |         // We use partialResponse to provide a chunk for TTS
119 |         partialResponse += content;
120 |         // Emit last partial response and add complete response to userContext
121 |         if (content.trim().slice(-1) === '•' || finishReason === 'stop') {
122 |           const gptReply = { 
123 |             partialResponseIndex: this.partialResponseIndex,
124 |             partialResponse
125 |           };
126 | 
127 |           this.emit('gptreply', gptReply, interactionCount);
128 |           this.partialResponseIndex++;
129 |           partialResponse = '';
130 |         }
131 |       }
132 |     }
133 |     this.userContext.push({'role': 'assistant', 'content': completeResponse});
134 |     console.log(`GPT -> user context length: ${this.userContext.length}`.green);
135 |   }
136 | }
137 | 
138 | module.exports = { GptService };
139 | 


--------------------------------------------------------------------------------
/services/recording-service.js:
--------------------------------------------------------------------------------
 1 | 
 2 | require('colors');
 3 | 
 4 | async function recordingService(ttsService, callSid) {
 5 |   try {
 6 |     if (process.env.RECORDING_ENABLED === 'true') {
 7 |       const client = require('twilio')(process.env.TWILIO_ACCOUNT_SID, process.env.TWILIO_AUTH_TOKEN);
 8 |       
 9 |       ttsService.generate({partialResponseIndex: null, partialResponse: 'This call will be recorded.'}, 0);
10 |       const recording = await client.calls(callSid)
11 |         .recordings
12 |         .create({
13 |           recordingChannels: 'dual'
14 |         });
15 |           
16 |       console.log(`Recording Created: ${recording.sid}`.red);
17 |     }
18 |   } catch (err) {
19 |     console.log(err);
20 |   }
21 | }
22 | 
23 | module.exports = { recordingService };


--------------------------------------------------------------------------------
/services/stream-service.js:
--------------------------------------------------------------------------------
 1 | const EventEmitter = require('events');
 2 | const uuid = require('uuid');
 3 | 
 4 | class StreamService extends EventEmitter {
 5 |   constructor(websocket) {
 6 |     super();
 7 |     this.ws = websocket;
 8 |     this.expectedAudioIndex = 0;
 9 |     this.audioBuffer = {};
10 |     this.streamSid = '';
11 |   }
12 | 
13 |   setStreamSid (streamSid) {
14 |     this.streamSid = streamSid;
15 |   }
16 | 
17 |   buffer (index, audio) {
18 |     // Escape hatch for intro message, which doesn't have an index
19 |     if(index === null) {
20 |       this.sendAudio(audio);
21 |     } else if(index === this.expectedAudioIndex) {
22 |       this.sendAudio(audio);
23 |       this.expectedAudioIndex++;
24 | 
25 |       while(Object.prototype.hasOwnProperty.call(this.audioBuffer, this.expectedAudioIndex)) {
26 |         const bufferedAudio = this.audioBuffer[this.expectedAudioIndex];
27 |         this.sendAudio(bufferedAudio);
28 |         this.expectedAudioIndex++;
29 |       }
30 |     } else {
31 |       this.audioBuffer[index] = audio;
32 |     }
33 |   }
34 | 
35 |   sendAudio (audio) {
36 |     this.ws.send(
37 |       JSON.stringify({
38 |         streamSid: this.streamSid,
39 |         event: 'media',
40 |         media: {
41 |           payload: audio,
42 |         },
43 |       })
44 |     );
45 |     // When the media completes you will receive a `mark` message with the label
46 |     const markLabel = uuid.v4();
47 |     this.ws.send(
48 |       JSON.stringify({
49 |         streamSid: this.streamSid,
50 |         event: 'mark',
51 |         mark: {
52 |           name: markLabel
53 |         }
54 |       })
55 |     );
56 |     this.emit('audiosent', markLabel);
57 |   }
58 | }
59 | 
60 | module.exports = {StreamService};


--------------------------------------------------------------------------------
/services/transcription-service.js:
--------------------------------------------------------------------------------
 1 | require('colors');
 2 | const { createClient, LiveTranscriptionEvents } = require('@deepgram/sdk');
 3 | const { Buffer } = require('node:buffer');
 4 | const EventEmitter = require('events');
 5 | 
 6 | 
 7 | class TranscriptionService extends EventEmitter {
 8 |   constructor() {
 9 |     super();
10 |     const deepgram = createClient(process.env.DEEPGRAM_API_KEY);
11 |     this.dgConnection = deepgram.listen.live({
12 |       encoding: 'mulaw',
13 |       sample_rate: '8000',
14 |       model: 'nova-2',
15 |       punctuate: true,
16 |       interim_results: true,
17 |       endpointing: 200,
18 |       utterance_end_ms: 1000
19 |     });
20 | 
21 |     this.finalResult = '';
22 |     this.speechFinal = false; // used to determine if we have seen speech_final=true indicating that deepgram detected a natural pause in the speakers speech. 
23 | 
24 |     this.dgConnection.on(LiveTranscriptionEvents.Open, () => {
25 |       this.dgConnection.on(LiveTranscriptionEvents.Transcript, (transcriptionEvent) => {
26 |         const alternatives = transcriptionEvent.channel?.alternatives;
27 |         let text = '';
28 |         if (alternatives) {
29 |           text = alternatives[0]?.transcript;
30 |         }
31 |         
32 |         // if we receive an UtteranceEnd and speech_final has not already happened then we should consider this the end of of the human speech and emit the transcription
33 |         if (transcriptionEvent.type === 'UtteranceEnd') {
34 |           if (!this.speechFinal) {
35 |             console.log(`UtteranceEnd received before speechFinal, emit the text collected so far: ${this.finalResult}`.yellow);
36 |             this.emit('transcription', this.finalResult);
37 |             return;
38 |           } else {
39 |             console.log('STT -> Speech was already final when UtteranceEnd recevied'.yellow);
40 |             return;
41 |           }
42 |         }
43 |     
44 |         // console.log(text, "is_final: ", transcription?.is_final, "speech_final: ", transcription.speech_final);
45 |         // if is_final that means that this chunk of the transcription is accurate and we need to add it to the finalResult 
46 |         if (transcriptionEvent.is_final === true && text.trim().length > 0) {
47 |           this.finalResult += ` ${text}`;
48 |           // if speech_final and is_final that means this text is accurate and it's a natural pause in the speakers speech. We need to send this to the assistant for processing
49 |           if (transcriptionEvent.speech_final === true) {
50 |             this.speechFinal = true; // this will prevent a utterance end which shows up after speechFinal from sending another response
51 |             this.emit('transcription', this.finalResult);
52 |             this.finalResult = '';
53 |           } else {
54 |             // if we receive a message without speechFinal reset speechFinal to false, this will allow any subsequent utteranceEnd messages to properly indicate the end of a message
55 |             this.speechFinal = false;
56 |           }
57 |         } else {
58 |           this.emit('utterance', text);
59 |         }
60 |       });
61 | 
62 |       this.dgConnection.on(LiveTranscriptionEvents.Error, (error) => {
63 |         console.error('STT -> deepgram error');
64 |         console.error(error);
65 |       });
66 | 
67 |       this.dgConnection.on(LiveTranscriptionEvents.Warning, (warning) => {
68 |         console.error('STT -> deepgram warning');
69 |         console.error(warning);
70 |       });
71 | 
72 |       this.dgConnection.on(LiveTranscriptionEvents.Metadata, (metadata) => {
73 |         console.error('STT -> deepgram metadata');
74 |         console.error(metadata);
75 |       });
76 | 
77 |       this.dgConnection.on(LiveTranscriptionEvents.Close, () => {
78 |         console.log('STT -> Deepgram connection closed'.yellow);
79 |       });
80 |     });
81 |   }
82 | 
83 |   /**
84 |    * Send the payload to Deepgram
85 |    * @param {String} payload A base64 MULAW/8000 audio stream
86 |    */
87 |   send(payload) {
88 |     if (this.dgConnection.getReadyState() === 1) {
89 |       this.dgConnection.send(Buffer.from(payload, 'base64'));
90 |     }
91 |   }
92 | }
93 | 
94 | module.exports = { TranscriptionService };


--------------------------------------------------------------------------------
/services/tts-service.js:
--------------------------------------------------------------------------------
 1 | require('dotenv').config();
 2 | const { Buffer } = require('node:buffer');
 3 | const EventEmitter = require('events');
 4 | const fetch = require('node-fetch');
 5 | 
 6 | class TextToSpeechService extends EventEmitter {
 7 |   constructor() {
 8 |     super();
 9 |     this.nextExpectedIndex = 0;
10 |     this.speechBuffer = {};
11 |   }
12 | 
13 |   async generate(gptReply, interactionCount) {
14 |     const { partialResponseIndex, partialResponse } = gptReply;
15 | 
16 |     if (!partialResponse) { return; }
17 | 
18 |     try {
19 |       const response = await fetch(
20 |         `https://api.deepgram.com/v1/speak?model=${process.env.VOICE_MODEL}&encoding=mulaw&sample_rate=8000&container=none`,
21 |         {
22 |           method: 'POST',
23 |           headers: {
24 |             'Authorization': `Token ${process.env.DEEPGRAM_API_KEY}`,
25 |             'Content-Type': 'application/json',
26 |           },
27 |           body: JSON.stringify({
28 |             text: partialResponse,
29 |           }),
30 |         }
31 |       );
32 | 
33 |       if (response.status === 200) {
34 |         try {
35 |           const blob = await response.blob();
36 |           const audioArrayBuffer = await blob.arrayBuffer();
37 |           const base64String = Buffer.from(audioArrayBuffer).toString('base64');
38 |           this.emit('speech', partialResponseIndex, base64String, partialResponse, interactionCount);
39 |         } catch (err) {
40 |           console.log(err);
41 |         }
42 |       } else {
43 |         console.log('Deepgram TTS error:');
44 |         console.log(response);
45 |       }
46 |     } catch (err) {
47 |       console.error('Error occurred in TextToSpeech service');
48 |       console.error(err);
49 |     }
50 |   }
51 | }
52 | 
53 | module.exports = { TextToSpeechService };


--------------------------------------------------------------------------------
/test/checkInventory.test.js:
--------------------------------------------------------------------------------
 1 | const checkInventory = require('../functions/checkInventory');
 2 | 
 3 | test('Expect Airpods Pro to have 10 units', () => {
 4 |   expect(checkInventory({model: 'airpods pro'})).toBe('{"stock":10}');
 5 | });
 6 | 
 7 | test('Expect Airpods Max to have 0 units', () => {
 8 |   expect(checkInventory({model: 'airpods max'})).toBe('{"stock":0}');
 9 | });
10 | 
11 | test('Expect all other values to have 100 units', () => {
12 |   expect(checkInventory({model: 'anything'})).toBe('{"stock":100}');
13 | });


--------------------------------------------------------------------------------
/test/checkPrice.test.js:
--------------------------------------------------------------------------------
 1 | const checkPrice = require('../functions/checkPrice');
 2 | 
 3 | test('Expect Airpods Pro to cost $249', () => {
 4 |   expect(checkPrice({model: 'airpods pro'})).toBe('{"price":249}');
 5 | });
 6 | 
 7 | test('Expect Airpods Max to cost $549', () => {
 8 |   expect(checkPrice({model: 'airpods max'})).toBe('{"price":549}');
 9 | });
10 | 
11 | test('Expect all other models to cost $149', () => {
12 |   expect(checkPrice({model: 'anything'})).toBe('{"price":149}');
13 | });


--------------------------------------------------------------------------------
/test/placeOrder.test.js:
--------------------------------------------------------------------------------
1 | const placeOrder = require('../functions/placeOrder');
2 | 
3 | test('Expect placeOrder to return an object with a price and order number', () => {
4 |   const order = JSON.parse(placeOrder({model: 'airpods pro', quantity: 10}));
5 | 
6 |   expect(order).toHaveProperty('orderNumber');
7 |   expect(order).toHaveProperty('price');
8 | });


--------------------------------------------------------------------------------
/test/transferCall.test.js:
--------------------------------------------------------------------------------
 1 | require('dotenv').config();
 2 | const setTimeout = require('timers/promises').setTimeout;
 3 | const transferCall = require('../functions/transferCall');
 4 | 
 5 | test('Expect transferCall to successfully redirect call', async () => {
 6 | 
 7 |   async function makeOutBoundCall() {
 8 |     const accountSid = process.env.TWILIO_ACCOUNT_SID;
 9 |     const authToken = process.env.TWILIO_AUTH_TOKEN;
10 |         
11 |     const client = require('twilio')(accountSid, authToken);
12 |       
13 |     const sid = await client.calls
14 |       .create({
15 |         url: `https://${process.env.SERVER}/incoming`,
16 |         to: process.env.YOUR_NUMBER,
17 |         from: process.env.FROM_NUMBER
18 |       })
19 |       .then(call => call.sid);
20 |     
21 |     return sid;
22 |   }
23 | 
24 |   const callSid = await makeOutBoundCall();
25 |   console.log(callSid);
26 |   await setTimeout(10000);
27 |   
28 |   const transferResult = await transferCall(callSid);
29 | 
30 |   expect(transferResult).toBe('The call was transferred successfully');
31 | }, 20000);


--------------------------------------------------------------------------------