├── .env.example
├── logo.png
├── requirements.txt
├── start.sh
├── LICENSE
├── .gitignore
├── README.md
└── app.py
/.env.example:
--------------------------------------------------------------------------------
1 | WHATSAPP_TOKEN=
2 | VERIFY_TOKEN=
3 | OPENAI_API_KEY=
--------------------------------------------------------------------------------
/logo.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/gustavz/whatsbot/HEAD/logo.png
--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
1 | flask==2.2.3
2 | openai==0.27.2
3 | python-dotenv==0.21.1
4 | gunicorn==20.1.0
5 | speechrecognition==3.9.0
6 | soundfile==0.12.1
7 | pydub==0.25.1
--------------------------------------------------------------------------------
/start.sh:
--------------------------------------------------------------------------------
1 | # For development use (simple logging, etc):
2 | pip3 install -r requirements.txt
3 | python3 app.py
4 | # For production use:
5 | # gunicorn app:app -w 1 --log-file -
--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2023 gustavz
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 |
--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
1 | # Byte-compiled / optimized / DLL files
2 | __pycache__/
3 | *.py[cod]
4 | *$py.class
5 |
6 | # C extensions
7 | *.so
8 |
9 | # Distribution / packaging
10 | .Python
11 | build/
12 | develop-eggs/
13 | dist/
14 | downloads/
15 | eggs/
16 | .eggs/
17 | lib/
18 | lib64/
19 | parts/
20 | sdist/
21 | var/
22 | wheels/
23 | share/python-wheels/
24 | *.egg-info/
25 | .installed.cfg
26 | *.egg
27 | MANIFEST
28 |
29 | # PyInstaller
30 | # Usually these files are written by a python script from a template
31 | # before PyInstaller builds the exe, so as to inject date/other infos into it.
32 | *.manifest
33 | *.spec
34 |
35 | # Installer logs
36 | pip-log.txt
37 | pip-delete-this-directory.txt
38 |
39 | # Unit test / coverage reports
40 | htmlcov/
41 | .tox/
42 | .nox/
43 | .coverage
44 | .coverage.*
45 | .cache
46 | nosetests.xml
47 | coverage.xml
48 | *.cover
49 | *.py,cover
50 | .hypothesis/
51 | .pytest_cache/
52 | cover/
53 |
54 | # Translations
55 | *.mo
56 | *.pot
57 |
58 | # Django stuff:
59 | *.log
60 | local_settings.py
61 | db.sqlite3
62 | db.sqlite3-journal
63 |
64 | # Flask stuff:
65 | instance/
66 | .webassets-cache
67 |
68 | # Scrapy stuff:
69 | .scrapy
70 |
71 | # Sphinx documentation
72 | docs/_build/
73 |
74 | # PyBuilder
75 | .pybuilder/
76 | target/
77 |
78 | # Jupyter Notebook
79 | .ipynb_checkpoints
80 |
81 | # IPython
82 | profile_default/
83 | ipython_config.py
84 |
85 | # pyenv
86 | # For a library or package, you might want to ignore these files since the code is
87 | # intended to run in multiple environments; otherwise, check them in:
88 | # .python-version
89 |
90 | # pipenv
91 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
92 | # However, in case of collaboration, if having platform-specific dependencies or dependencies
93 | # having no cross-platform support, pipenv may install dependencies that don't work, or not
94 | # install all needed dependencies.
95 | #Pipfile.lock
96 |
97 | # poetry
98 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
99 | # This is especially recommended for binary packages to ensure reproducibility, and is more
100 | # commonly ignored for libraries.
101 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
102 | #poetry.lock
103 |
104 | # pdm
105 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
106 | #pdm.lock
107 | # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
108 | # in version control.
109 | # https://pdm.fming.dev/#use-with-ide
110 | .pdm.toml
111 |
112 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
113 | __pypackages__/
114 |
115 | # Celery stuff
116 | celerybeat-schedule
117 | celerybeat.pid
118 |
119 | # SageMath parsed files
120 | *.sage.py
121 |
122 | # Environments
123 | .env
124 | .venv
125 | env/
126 | venv/
127 | ENV/
128 | env.bak/
129 | venv.bak/
130 |
131 | # Spyder project settings
132 | .spyderproject
133 | .spyproject
134 |
135 | # Rope project settings
136 | .ropeproject
137 |
138 | # mkdocs documentation
139 | /site
140 |
141 | # mypy
142 | .mypy_cache/
143 | .dmypy.json
144 | dmypy.json
145 |
146 | # Pyre type checker
147 | .pyre/
148 |
149 | # pytype static type analyzer
150 | .pytype/
151 |
152 | # Cython debug symbols
153 | cython_debug/
154 |
155 | # PyCharm
156 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can
157 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
158 | # and can be added to the global gitignore or merged into this file. For a more nuclear
159 | # option (not recommended) you can uncomment the following to ignore the entire idea folder.
160 | #.idea/
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # WhatsBot
2 |
3 |
8 |
9 | > How DALL-E 2 imagines the project
10 |
11 | ## Getting-Started
12 | To create your own WhatsApp chatbot you all you need to do is:
13 | 1. Creata an [OpenAI API](https://openai.com/product) account and generate an API key
14 | 2. Create a [Meta developer](https://developers.facebook.com/) account
15 | 3. Create a [Whatsapp Business app](https://developers.facebook.com/apps) and generate a Token
16 | 4. [Remix the app on Glitch](https://glitch.com/~whatsapp-openai-webhook-python) and set environmental variables
17 | 5. Link your glitch app as [webhook](https://developers.facebook.com/docs/whatsapp/cloud-api/guides/set-up-webhooks) to your WhatsApp app
18 | 6. Last but not least: star [this project](https://github.com/gustavz/whatsbot) on GitHub ❤️
19 |
20 |
21 | ## TL;DR
22 |
23 | > How ChatGPT describes the project
24 |
25 | This program is a webhook implemented in python and flask. It receives incoming messages from WhatsApp, processes and responds with the help of OpenAI API. The messages can be either text or audio, and the response is generated with OpenAI's GPT-3.5 model. The critical functions of the program include handling incoming messages of different types, converting audio files to text and sending responses back to the user.
26 |
27 | ## Learnables
28 |
29 | - Python programming language
30 | - OpenAI API
31 | - WhatsApp Cloud API
32 | - Webhooks
33 | - `Flask` web framework
34 | - `PyDub` for audio manipulation
35 | - `SpeechRecognition` for speech to text conversion
36 | - `Soundfile` for working with audio files
37 |
38 | ## Code Deep-Dive
39 |
40 | The program consists of several functions to handle different aspects of the process.
41 |
42 | As first step `verify` verifies that the webhook connection is trustworthy.
43 | It does so by checking that the verfifcation token and correct mode are set in the request.
44 |
45 | ```python
46 | def verify(request):
47 | # Parse params from the webhook verification request
48 | mode = request.args.get("hub.mode")
49 | token = request.args.get("hub.verify_token")
50 | challenge = request.args.get("hub.challenge")
51 | # Check if a token and mode were sent
52 | if mode and token:
53 | # Check the mode and token sent are correct
54 | if mode == "subscribe" and token == verify_token:
55 | # Respond with 200 OK and challenge token from the request
56 | print("WEBHOOK_VERIFIED")
57 | return challenge, 200
58 | else:
59 | # Responds with '403 Forbidden' if verify tokens do not match
60 | print("VERIFICATION_FAILED")
61 | return jsonify({"status": "error", "message": "Verification failed"}), 403
62 | else:
63 | # Responds with '400 Bad Request' if verify tokens do not match
64 | print("MISSING_PARAMETER")
65 | return jsonify({"status": "error", "message": "Missing parameters"}), 400
66 | ```
67 |
68 | `handle_message` processes incoming WhatsApp messages and does error handling
69 |
70 | ```python
71 | def handle_message(request):
72 | # Parse Request body in json format
73 | body = request.get_json()
74 | print(f"request body: {body}")
75 |
76 | try:
77 | # info on WhatsApp text message payload:
78 | # https://developers.facebook.com/docs/whatsapp/cloud-api/webhooks/payload-examples#text-messages
79 | if body.get("object"):
80 | if (
81 | body.get("entry")
82 | and body["entry"][0].get("changes")
83 | and body["entry"][0]["changes"][0].get("value")
84 | and body["entry"][0]["changes"][0]["value"].get("messages")
85 | and body["entry"][0]["changes"][0]["value"]["messages"][0]
86 | ):
87 | handle_whatsapp_message(body)
88 | return jsonify({"status": "ok"}), 200
89 | else:
90 | # if the request is not a WhatsApp API event, return an error
91 | return (
92 | jsonify({"status": "error", "message": "Not a WhatsApp API event"}),
93 | 404,
94 | )
95 | # catch all other errors and return an internal server error
96 | except Exception as e:
97 | print(f"unknown error: {e}")
98 | return jsonify({"status": "error", "message": str(e)}), 500
99 | ```
100 |
101 | `handle_whatsapp_message` is the main function that handles incoming messages of different types, and based on the type of the message, it calls the relevant function.
102 |
103 | ```python
104 | def handle_whatsapp_message(body):
105 | message = body["entry"][0]["changes"][0]["value"]["messages"][0]
106 | if message["type"] == "text":
107 | message_body = message["text"]["body"]
108 | elif message["type"] == "audio":
109 | audio_id = message["audio"]["id"]
110 | message_body = handle_audio_message(audio_id)
111 | response = make_openai_request(message_body, message["from"])
112 | send_whatsapp_message(body, response)
113 | ```
114 |
115 | For example, if the message is an audio file, it calls `handle_audio_message` to convert the file to text.
116 |
117 | ```python
118 | def handle_audio_message(audio_id):
119 | audio_url = get_media_url(audio_id)
120 | audio_bytes = download_media_file(audio_url)
121 | audio_data = convert_audio_bytes(audio_bytes)
122 | audio_text = recognize_audio(audio_data)
123 | message = (
124 | "Please summarize the following message in its original language "
125 | f"as a list of bullet-points: {audio_text}"
126 | )
127 | return message
128 | ```
129 |
130 | `convert_audio_bytes` is another critical function that converts the audio file into a format that can be processed by the `speech_recognition` library.
131 | The function first converts the OGG file into WAV format and then converts it into an AudioData object.
132 |
133 | ```python
134 | def convert_audio_bytes(audio_bytes):
135 | ogg_audio = pydub.AudioSegment.from_ogg(io.BytesIO(audio_bytes))
136 | ogg_audio = ogg_audio.set_sample_width(4)
137 | wav_bytes = ogg_audio.export(format="wav").read()
138 | audio_data, sample_rate = sf.read(io.BytesIO(wav_bytes), dtype="int32")
139 | sample_width = audio_data.dtype.itemsize
140 | print(f"audio sample_rate:{sample_rate}, sample_width:{sample_width}")
141 | audio = sr.AudioData(audio_data, sample_rate, sample_width)
142 | return audio
143 | ```
144 |
145 | The `make_openai_request` function makes a request to the OpenAI API and generates a response based on the user's message and the previous conversation log.
146 | It uses the `openai.ChatCompletion.create` method to generate the response.
147 |
148 | ```python
149 | def make_openai_request(message, from_number):
150 | try:
151 | message_log = update_message_log(message, from_number, "user")
152 | response = openai.ChatCompletion.create(
153 | model="gpt-3.5-turbo",
154 | messages=message_log,
155 | temperature=0.7,
156 | )
157 | response_message = response.choices[0].message.content
158 | print(f"openai response: {response_message}")
159 | update_message_log(response_message, from_number, "assistant")
160 | except Exception as e:
161 | print(f"openai error: {e}")
162 | response_message = "Sorry, the OpenAI API is currently overloaded or offline. Please try again later."
163 | remove_last_message_from_log(from_number)
164 | return response_message
165 | ```
166 |
167 | The `send_whatsapp_message` function creates the Cloud API request sends the response back to the user via WhatsApp.
168 |
169 | ```python
170 | def send_whatsapp_message(body, message):
171 | value = body["entry"][0]["changes"][0]["value"]
172 | phone_number_id = value["metadata"]["phone_number_id"]
173 | from_number = value["messages"][0]["from"]
174 | headers = {
175 | "Authorization": f"Bearer {whatsapp_token}",
176 | "Content-Type": "application/json",
177 | }
178 | url = "https://graph.facebook.com/v15.0/" + phone_number_id + "/messages"
179 | data = {
180 | "messaging_product": "whatsapp",
181 | "to": from_number,
182 | "type": "text",
183 | "text": {"body": message},
184 | }
185 | response = requests.post(url, json=data, headers=headers)
186 | print(f"whatsapp message response: {response.json()}")
187 | response.raise_for_status()
188 | ```
--------------------------------------------------------------------------------
/app.py:
--------------------------------------------------------------------------------
1 | import io
2 | import os
3 |
4 | import openai
5 | import pydub
6 | import requests
7 | import soundfile as sf
8 | import speech_recognition as sr
9 | from flask import Flask, jsonify, request
10 |
11 | app = Flask(__name__)
12 |
13 |
14 | # OpenAi API key
15 | openai.api_key = os.getenv("OPENAI_API_KEY")
16 |
17 | # Access token for your WhatsApp business account app
18 | whatsapp_token = os.environ.get("WHATSAPP_TOKEN")
19 |
20 | # Verify Token defined when configuring the webhook
21 | verify_token = os.environ.get("VERIFY_TOKEN")
22 |
23 | # Message log dictionary to enable conversation over multiple messages
24 | message_log_dict = {}
25 |
26 |
27 | # language for speech to text recoginition
28 | # TODO: detect this automatically based on the user's language
29 | LANGUGAGE = "en-US"
30 |
31 |
32 | # get the media url from the media id
33 | def get_media_url(media_id):
34 | headers = {
35 | "Authorization": f"Bearer {whatsapp_token}",
36 | }
37 | url = f"https://graph.facebook.com/v16.0/{media_id}/"
38 | response = requests.get(url, headers=headers)
39 | print(f"media id response: {response.json()}")
40 | return response.json()["url"]
41 |
42 |
43 | # download the media file from the media url
44 | def download_media_file(media_url):
45 | headers = {
46 | "Authorization": f"Bearer {whatsapp_token}",
47 | }
48 | response = requests.get(media_url, headers=headers)
49 | print(f"first 10 digits of the media file: {response.content[:10]}")
50 | return response.content
51 |
52 |
53 | # convert ogg audio bytes to audio data which speechrecognition library can process
54 | def convert_audio_bytes(audio_bytes):
55 | ogg_audio = pydub.AudioSegment.from_ogg(io.BytesIO(audio_bytes))
56 | ogg_audio = ogg_audio.set_sample_width(4)
57 | wav_bytes = ogg_audio.export(format="wav").read()
58 | audio_data, sample_rate = sf.read(io.BytesIO(wav_bytes), dtype="int32")
59 | sample_width = audio_data.dtype.itemsize
60 | print(f"audio sample_rate:{sample_rate}, sample_width:{sample_width}")
61 | audio = sr.AudioData(audio_data, sample_rate, sample_width)
62 | return audio
63 |
64 |
65 | # run speech recognition on the audio data
66 | def recognize_audio(audio_bytes):
67 | recognizer = sr.Recognizer()
68 | audio_text = recognizer.recognize_google(audio_bytes, language=LANGUGAGE)
69 | return audio_text
70 |
71 |
72 | # handle audio messages
73 | def handle_audio_message(audio_id):
74 | audio_url = get_media_url(audio_id)
75 | audio_bytes = download_media_file(audio_url)
76 | audio_data = convert_audio_bytes(audio_bytes)
77 | audio_text = recognize_audio(audio_data)
78 | message = (
79 | "Please summarize the following message in its original language "
80 | f"as a list of bullet-points: {audio_text}"
81 | )
82 | return message
83 |
84 |
85 | # send the response as a WhatsApp message back to the user
86 | def send_whatsapp_message(body, message):
87 | value = body["entry"][0]["changes"][0]["value"]
88 | phone_number_id = value["metadata"]["phone_number_id"]
89 | from_number = value["messages"][0]["from"]
90 | headers = {
91 | "Authorization": f"Bearer {whatsapp_token}",
92 | "Content-Type": "application/json",
93 | }
94 | url = "https://graph.facebook.com/v15.0/" + phone_number_id + "/messages"
95 | data = {
96 | "messaging_product": "whatsapp",
97 | "to": from_number,
98 | "type": "text",
99 | "text": {"body": message},
100 | }
101 | response = requests.post(url, json=data, headers=headers)
102 | print(f"whatsapp message response: {response.json()}")
103 | response.raise_for_status()
104 |
105 |
106 | # create a message log for each phone number and return the current message log
107 | def update_message_log(message, phone_number, role):
108 | initial_log = {
109 | "role": "system",
110 | "content": "You are a helpful assistant named WhatsBot.",
111 | }
112 | if phone_number not in message_log_dict:
113 | message_log_dict[phone_number] = [initial_log]
114 | message_log = {"role": role, "content": message}
115 | message_log_dict[phone_number].append(message_log)
116 | return message_log_dict[phone_number]
117 |
118 |
119 | # remove last message from log if OpenAI request fails
120 | def remove_last_message_from_log(phone_number):
121 | message_log_dict[phone_number].pop()
122 |
123 |
124 | # make request to OpenAI
125 | def make_openai_request(message, from_number):
126 | try:
127 | message_log = update_message_log(message, from_number, "user")
128 | response = openai.ChatCompletion.create(
129 | model="gpt-3.5-turbo",
130 | messages=message_log,
131 | temperature=0.7,
132 | )
133 | response_message = response.choices[0].message.content
134 | print(f"openai response: {response_message}")
135 | update_message_log(response_message, from_number, "assistant")
136 | except Exception as e:
137 | print(f"openai error: {e}")
138 | response_message = "Sorry, the OpenAI API is currently overloaded or offline. Please try again later."
139 | remove_last_message_from_log(from_number)
140 | return response_message
141 |
142 |
143 | # handle WhatsApp messages of different type
144 | def handle_whatsapp_message(body):
145 | message = body["entry"][0]["changes"][0]["value"]["messages"][0]
146 | if message["type"] == "text":
147 | message_body = message["text"]["body"]
148 | elif message["type"] == "audio":
149 | audio_id = message["audio"]["id"]
150 | message_body = handle_audio_message(audio_id)
151 | response = make_openai_request(message_body, message["from"])
152 | send_whatsapp_message(body, response)
153 |
154 |
155 | # handle incoming webhook messages
156 | def handle_message(request):
157 | # Parse Request body in json format
158 | body = request.get_json()
159 | print(f"request body: {body}")
160 |
161 | try:
162 | # info on WhatsApp text message payload:
163 | # https://developers.facebook.com/docs/whatsapp/cloud-api/webhooks/payload-examples#text-messages
164 | if body.get("object"):
165 | if (
166 | body.get("entry")
167 | and body["entry"][0].get("changes")
168 | and body["entry"][0]["changes"][0].get("value")
169 | and body["entry"][0]["changes"][0]["value"].get("messages")
170 | and body["entry"][0]["changes"][0]["value"]["messages"][0]
171 | ):
172 | handle_whatsapp_message(body)
173 | return jsonify({"status": "ok"}), 200
174 | else:
175 | # if the request is not a WhatsApp API event, return an error
176 | return (
177 | jsonify({"status": "error", "message": "Not a WhatsApp API event"}),
178 | 404,
179 | )
180 | # catch all other errors and return an internal server error
181 | except Exception as e:
182 | print(f"unknown error: {e}")
183 | return jsonify({"status": "error", "message": str(e)}), 500
184 |
185 |
186 | # Required webhook verifictaion for WhatsApp
187 | # info on verification request payload:
188 | # https://developers.facebook.com/docs/graph-api/webhooks/getting-started#verification-requests
189 | def verify(request):
190 | # Parse params from the webhook verification request
191 | mode = request.args.get("hub.mode")
192 | token = request.args.get("hub.verify_token")
193 | challenge = request.args.get("hub.challenge")
194 | # Check if a token and mode were sent
195 | if mode and token:
196 | # Check the mode and token sent are correct
197 | if mode == "subscribe" and token == verify_token:
198 | # Respond with 200 OK and challenge token from the request
199 | print("WEBHOOK_VERIFIED")
200 | return challenge, 200
201 | else:
202 | # Responds with '403 Forbidden' if verify tokens do not match
203 | print("VERIFICATION_FAILED")
204 | return jsonify({"status": "error", "message": "Verification failed"}), 403
205 | else:
206 | # Responds with '400 Bad Request' if verify tokens do not match
207 | print("MISSING_PARAMETER")
208 | return jsonify({"status": "error", "message": "Missing parameters"}), 400
209 |
210 |
211 | # Sets homepage endpoint and welcome message
212 | @app.route("/", methods=["GET"])
213 | def home():
214 | return "WhatsApp OpenAI Webhook is listening!"
215 |
216 |
217 | # Accepts POST and GET requests at /webhook endpoint
218 | @app.route("/webhook", methods=["POST", "GET"])
219 | def webhook():
220 | if request.method == "GET":
221 | return verify(request)
222 | elif request.method == "POST":
223 | return handle_message(request)
224 |
225 |
226 | # Route to reset message log
227 | @app.route("/reset", methods=["GET"])
228 | def reset():
229 | global message_log_dict
230 | message_log_dict = {}
231 | return "Message log resetted!"
232 |
233 |
234 | if __name__ == "__main__":
235 | app.run(debug=True, use_reloader=True)
236 |
--------------------------------------------------------------------------------