├── .env.example ├── logo.png ├── requirements.txt ├── start.sh ├── LICENSE ├── .gitignore ├── README.md └── app.py /.env.example: -------------------------------------------------------------------------------- 1 | WHATSAPP_TOKEN= 2 | VERIFY_TOKEN= 3 | OPENAI_API_KEY= -------------------------------------------------------------------------------- /logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/gustavz/whatsbot/HEAD/logo.png -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | flask==2.2.3 2 | openai==0.27.2 3 | python-dotenv==0.21.1 4 | gunicorn==20.1.0 5 | speechrecognition==3.9.0 6 | soundfile==0.12.1 7 | pydub==0.25.1 -------------------------------------------------------------------------------- /start.sh: -------------------------------------------------------------------------------- 1 | # For development use (simple logging, etc): 2 | pip3 install -r requirements.txt 3 | python3 app.py 4 | # For production use: 5 | # gunicorn app:app -w 1 --log-file - -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2023 gustavz 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | share/python-wheels/ 24 | *.egg-info/ 25 | .installed.cfg 26 | *.egg 27 | MANIFEST 28 | 29 | # PyInstaller 30 | # Usually these files are written by a python script from a template 31 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 32 | *.manifest 33 | *.spec 34 | 35 | # Installer logs 36 | pip-log.txt 37 | pip-delete-this-directory.txt 38 | 39 | # Unit test / coverage reports 40 | htmlcov/ 41 | .tox/ 42 | .nox/ 43 | .coverage 44 | .coverage.* 45 | .cache 46 | nosetests.xml 47 | coverage.xml 48 | *.cover 49 | *.py,cover 50 | .hypothesis/ 51 | .pytest_cache/ 52 | cover/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | .pybuilder/ 76 | target/ 77 | 78 | # Jupyter Notebook 79 | .ipynb_checkpoints 80 | 81 | # IPython 82 | profile_default/ 83 | ipython_config.py 84 | 85 | # pyenv 86 | # For a library or package, you might want to ignore these files since the code is 87 | # intended to run in multiple environments; otherwise, check them in: 88 | # .python-version 89 | 90 | # pipenv 91 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 92 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 93 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 94 | # install all needed dependencies. 95 | #Pipfile.lock 96 | 97 | # poetry 98 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. 99 | # This is especially recommended for binary packages to ensure reproducibility, and is more 100 | # commonly ignored for libraries. 101 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control 102 | #poetry.lock 103 | 104 | # pdm 105 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. 106 | #pdm.lock 107 | # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it 108 | # in version control. 109 | # https://pdm.fming.dev/#use-with-ide 110 | .pdm.toml 111 | 112 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm 113 | __pypackages__/ 114 | 115 | # Celery stuff 116 | celerybeat-schedule 117 | celerybeat.pid 118 | 119 | # SageMath parsed files 120 | *.sage.py 121 | 122 | # Environments 123 | .env 124 | .venv 125 | env/ 126 | venv/ 127 | ENV/ 128 | env.bak/ 129 | venv.bak/ 130 | 131 | # Spyder project settings 132 | .spyderproject 133 | .spyproject 134 | 135 | # Rope project settings 136 | .ropeproject 137 | 138 | # mkdocs documentation 139 | /site 140 | 141 | # mypy 142 | .mypy_cache/ 143 | .dmypy.json 144 | dmypy.json 145 | 146 | # Pyre type checker 147 | .pyre/ 148 | 149 | # pytype static type analyzer 150 | .pytype/ 151 | 152 | # Cython debug symbols 153 | cython_debug/ 154 | 155 | # PyCharm 156 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can 157 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore 158 | # and can be added to the global gitignore or merged into this file. For a more nuclear 159 | # option (not recommended) you can uncomment the following to ignore the entire idea folder. 160 | #.idea/ -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # WhatsBot 2 | 3 |
4 | 5 | 6 | 7 |
8 | 9 | > How DALL-E 2 imagines the project 10 | 11 | ## Getting-Started 12 | To create your own WhatsApp chatbot you all you need to do is: 13 | 1. Creata an [OpenAI API](https://openai.com/product) account and generate an API key 14 | 2. Create a [Meta developer](https://developers.facebook.com/) account 15 | 3. Create a [Whatsapp Business app](https://developers.facebook.com/apps) and generate a Token 16 | 4. [Remix the app on Glitch](https://glitch.com/~whatsapp-openai-webhook-python) and set environmental variables 17 | 5. Link your glitch app as [webhook](https://developers.facebook.com/docs/whatsapp/cloud-api/guides/set-up-webhooks) to your WhatsApp app 18 | 6. Last but not least: star [this project](https://github.com/gustavz/whatsbot) on GitHub ❤️ 19 | 20 | 21 | ## TL;DR 22 | 23 | > How ChatGPT describes the project 24 | 25 | This program is a webhook implemented in python and flask. It receives incoming messages from WhatsApp, processes and responds with the help of OpenAI API. The messages can be either text or audio, and the response is generated with OpenAI's GPT-3.5 model. The critical functions of the program include handling incoming messages of different types, converting audio files to text and sending responses back to the user. 26 | 27 | ## Learnables 28 | 29 | - Python programming language 30 | - OpenAI API 31 | - WhatsApp Cloud API 32 | - Webhooks 33 | - `Flask` web framework 34 | - `PyDub` for audio manipulation 35 | - `SpeechRecognition` for speech to text conversion 36 | - `Soundfile` for working with audio files 37 | 38 | ## Code Deep-Dive 39 | 40 | The program consists of several functions to handle different aspects of the process. 41 | 42 | As first step `verify` verifies that the webhook connection is trustworthy.
43 | It does so by checking that the verfifcation token and correct mode are set in the request. 44 | 45 | ```python 46 | def verify(request): 47 | # Parse params from the webhook verification request 48 | mode = request.args.get("hub.mode") 49 | token = request.args.get("hub.verify_token") 50 | challenge = request.args.get("hub.challenge") 51 | # Check if a token and mode were sent 52 | if mode and token: 53 | # Check the mode and token sent are correct 54 | if mode == "subscribe" and token == verify_token: 55 | # Respond with 200 OK and challenge token from the request 56 | print("WEBHOOK_VERIFIED") 57 | return challenge, 200 58 | else: 59 | # Responds with '403 Forbidden' if verify tokens do not match 60 | print("VERIFICATION_FAILED") 61 | return jsonify({"status": "error", "message": "Verification failed"}), 403 62 | else: 63 | # Responds with '400 Bad Request' if verify tokens do not match 64 | print("MISSING_PARAMETER") 65 | return jsonify({"status": "error", "message": "Missing parameters"}), 400 66 | ``` 67 | 68 | `handle_message` processes incoming WhatsApp messages and does error handling 69 | 70 | ```python 71 | def handle_message(request): 72 | # Parse Request body in json format 73 | body = request.get_json() 74 | print(f"request body: {body}") 75 | 76 | try: 77 | # info on WhatsApp text message payload: 78 | # https://developers.facebook.com/docs/whatsapp/cloud-api/webhooks/payload-examples#text-messages 79 | if body.get("object"): 80 | if ( 81 | body.get("entry") 82 | and body["entry"][0].get("changes") 83 | and body["entry"][0]["changes"][0].get("value") 84 | and body["entry"][0]["changes"][0]["value"].get("messages") 85 | and body["entry"][0]["changes"][0]["value"]["messages"][0] 86 | ): 87 | handle_whatsapp_message(body) 88 | return jsonify({"status": "ok"}), 200 89 | else: 90 | # if the request is not a WhatsApp API event, return an error 91 | return ( 92 | jsonify({"status": "error", "message": "Not a WhatsApp API event"}), 93 | 404, 94 | ) 95 | # catch all other errors and return an internal server error 96 | except Exception as e: 97 | print(f"unknown error: {e}") 98 | return jsonify({"status": "error", "message": str(e)}), 500 99 | ``` 100 | 101 | `handle_whatsapp_message` is the main function that handles incoming messages of different types, and based on the type of the message, it calls the relevant function. 102 | 103 | ```python 104 | def handle_whatsapp_message(body): 105 | message = body["entry"][0]["changes"][0]["value"]["messages"][0] 106 | if message["type"] == "text": 107 | message_body = message["text"]["body"] 108 | elif message["type"] == "audio": 109 | audio_id = message["audio"]["id"] 110 | message_body = handle_audio_message(audio_id) 111 | response = make_openai_request(message_body, message["from"]) 112 | send_whatsapp_message(body, response) 113 | ``` 114 | 115 | For example, if the message is an audio file, it calls `handle_audio_message` to convert the file to text. 116 | 117 | ```python 118 | def handle_audio_message(audio_id): 119 | audio_url = get_media_url(audio_id) 120 | audio_bytes = download_media_file(audio_url) 121 | audio_data = convert_audio_bytes(audio_bytes) 122 | audio_text = recognize_audio(audio_data) 123 | message = ( 124 | "Please summarize the following message in its original language " 125 | f"as a list of bullet-points: {audio_text}" 126 | ) 127 | return message 128 | ``` 129 | 130 | `convert_audio_bytes` is another critical function that converts the audio file into a format that can be processed by the `speech_recognition` library.
131 | The function first converts the OGG file into WAV format and then converts it into an AudioData object. 132 | 133 | ```python 134 | def convert_audio_bytes(audio_bytes): 135 | ogg_audio = pydub.AudioSegment.from_ogg(io.BytesIO(audio_bytes)) 136 | ogg_audio = ogg_audio.set_sample_width(4) 137 | wav_bytes = ogg_audio.export(format="wav").read() 138 | audio_data, sample_rate = sf.read(io.BytesIO(wav_bytes), dtype="int32") 139 | sample_width = audio_data.dtype.itemsize 140 | print(f"audio sample_rate:{sample_rate}, sample_width:{sample_width}") 141 | audio = sr.AudioData(audio_data, sample_rate, sample_width) 142 | return audio 143 | ``` 144 | 145 | The `make_openai_request` function makes a request to the OpenAI API and generates a response based on the user's message and the previous conversation log.
146 | It uses the `openai.ChatCompletion.create` method to generate the response. 147 | 148 | ```python 149 | def make_openai_request(message, from_number): 150 | try: 151 | message_log = update_message_log(message, from_number, "user") 152 | response = openai.ChatCompletion.create( 153 | model="gpt-3.5-turbo", 154 | messages=message_log, 155 | temperature=0.7, 156 | ) 157 | response_message = response.choices[0].message.content 158 | print(f"openai response: {response_message}") 159 | update_message_log(response_message, from_number, "assistant") 160 | except Exception as e: 161 | print(f"openai error: {e}") 162 | response_message = "Sorry, the OpenAI API is currently overloaded or offline. Please try again later." 163 | remove_last_message_from_log(from_number) 164 | return response_message 165 | ``` 166 | 167 | The `send_whatsapp_message` function creates the Cloud API request sends the response back to the user via WhatsApp. 168 | 169 | ```python 170 | def send_whatsapp_message(body, message): 171 | value = body["entry"][0]["changes"][0]["value"] 172 | phone_number_id = value["metadata"]["phone_number_id"] 173 | from_number = value["messages"][0]["from"] 174 | headers = { 175 | "Authorization": f"Bearer {whatsapp_token}", 176 | "Content-Type": "application/json", 177 | } 178 | url = "https://graph.facebook.com/v15.0/" + phone_number_id + "/messages" 179 | data = { 180 | "messaging_product": "whatsapp", 181 | "to": from_number, 182 | "type": "text", 183 | "text": {"body": message}, 184 | } 185 | response = requests.post(url, json=data, headers=headers) 186 | print(f"whatsapp message response: {response.json()}") 187 | response.raise_for_status() 188 | ``` -------------------------------------------------------------------------------- /app.py: -------------------------------------------------------------------------------- 1 | import io 2 | import os 3 | 4 | import openai 5 | import pydub 6 | import requests 7 | import soundfile as sf 8 | import speech_recognition as sr 9 | from flask import Flask, jsonify, request 10 | 11 | app = Flask(__name__) 12 | 13 | 14 | # OpenAi API key 15 | openai.api_key = os.getenv("OPENAI_API_KEY") 16 | 17 | # Access token for your WhatsApp business account app 18 | whatsapp_token = os.environ.get("WHATSAPP_TOKEN") 19 | 20 | # Verify Token defined when configuring the webhook 21 | verify_token = os.environ.get("VERIFY_TOKEN") 22 | 23 | # Message log dictionary to enable conversation over multiple messages 24 | message_log_dict = {} 25 | 26 | 27 | # language for speech to text recoginition 28 | # TODO: detect this automatically based on the user's language 29 | LANGUGAGE = "en-US" 30 | 31 | 32 | # get the media url from the media id 33 | def get_media_url(media_id): 34 | headers = { 35 | "Authorization": f"Bearer {whatsapp_token}", 36 | } 37 | url = f"https://graph.facebook.com/v16.0/{media_id}/" 38 | response = requests.get(url, headers=headers) 39 | print(f"media id response: {response.json()}") 40 | return response.json()["url"] 41 | 42 | 43 | # download the media file from the media url 44 | def download_media_file(media_url): 45 | headers = { 46 | "Authorization": f"Bearer {whatsapp_token}", 47 | } 48 | response = requests.get(media_url, headers=headers) 49 | print(f"first 10 digits of the media file: {response.content[:10]}") 50 | return response.content 51 | 52 | 53 | # convert ogg audio bytes to audio data which speechrecognition library can process 54 | def convert_audio_bytes(audio_bytes): 55 | ogg_audio = pydub.AudioSegment.from_ogg(io.BytesIO(audio_bytes)) 56 | ogg_audio = ogg_audio.set_sample_width(4) 57 | wav_bytes = ogg_audio.export(format="wav").read() 58 | audio_data, sample_rate = sf.read(io.BytesIO(wav_bytes), dtype="int32") 59 | sample_width = audio_data.dtype.itemsize 60 | print(f"audio sample_rate:{sample_rate}, sample_width:{sample_width}") 61 | audio = sr.AudioData(audio_data, sample_rate, sample_width) 62 | return audio 63 | 64 | 65 | # run speech recognition on the audio data 66 | def recognize_audio(audio_bytes): 67 | recognizer = sr.Recognizer() 68 | audio_text = recognizer.recognize_google(audio_bytes, language=LANGUGAGE) 69 | return audio_text 70 | 71 | 72 | # handle audio messages 73 | def handle_audio_message(audio_id): 74 | audio_url = get_media_url(audio_id) 75 | audio_bytes = download_media_file(audio_url) 76 | audio_data = convert_audio_bytes(audio_bytes) 77 | audio_text = recognize_audio(audio_data) 78 | message = ( 79 | "Please summarize the following message in its original language " 80 | f"as a list of bullet-points: {audio_text}" 81 | ) 82 | return message 83 | 84 | 85 | # send the response as a WhatsApp message back to the user 86 | def send_whatsapp_message(body, message): 87 | value = body["entry"][0]["changes"][0]["value"] 88 | phone_number_id = value["metadata"]["phone_number_id"] 89 | from_number = value["messages"][0]["from"] 90 | headers = { 91 | "Authorization": f"Bearer {whatsapp_token}", 92 | "Content-Type": "application/json", 93 | } 94 | url = "https://graph.facebook.com/v15.0/" + phone_number_id + "/messages" 95 | data = { 96 | "messaging_product": "whatsapp", 97 | "to": from_number, 98 | "type": "text", 99 | "text": {"body": message}, 100 | } 101 | response = requests.post(url, json=data, headers=headers) 102 | print(f"whatsapp message response: {response.json()}") 103 | response.raise_for_status() 104 | 105 | 106 | # create a message log for each phone number and return the current message log 107 | def update_message_log(message, phone_number, role): 108 | initial_log = { 109 | "role": "system", 110 | "content": "You are a helpful assistant named WhatsBot.", 111 | } 112 | if phone_number not in message_log_dict: 113 | message_log_dict[phone_number] = [initial_log] 114 | message_log = {"role": role, "content": message} 115 | message_log_dict[phone_number].append(message_log) 116 | return message_log_dict[phone_number] 117 | 118 | 119 | # remove last message from log if OpenAI request fails 120 | def remove_last_message_from_log(phone_number): 121 | message_log_dict[phone_number].pop() 122 | 123 | 124 | # make request to OpenAI 125 | def make_openai_request(message, from_number): 126 | try: 127 | message_log = update_message_log(message, from_number, "user") 128 | response = openai.ChatCompletion.create( 129 | model="gpt-3.5-turbo", 130 | messages=message_log, 131 | temperature=0.7, 132 | ) 133 | response_message = response.choices[0].message.content 134 | print(f"openai response: {response_message}") 135 | update_message_log(response_message, from_number, "assistant") 136 | except Exception as e: 137 | print(f"openai error: {e}") 138 | response_message = "Sorry, the OpenAI API is currently overloaded or offline. Please try again later." 139 | remove_last_message_from_log(from_number) 140 | return response_message 141 | 142 | 143 | # handle WhatsApp messages of different type 144 | def handle_whatsapp_message(body): 145 | message = body["entry"][0]["changes"][0]["value"]["messages"][0] 146 | if message["type"] == "text": 147 | message_body = message["text"]["body"] 148 | elif message["type"] == "audio": 149 | audio_id = message["audio"]["id"] 150 | message_body = handle_audio_message(audio_id) 151 | response = make_openai_request(message_body, message["from"]) 152 | send_whatsapp_message(body, response) 153 | 154 | 155 | # handle incoming webhook messages 156 | def handle_message(request): 157 | # Parse Request body in json format 158 | body = request.get_json() 159 | print(f"request body: {body}") 160 | 161 | try: 162 | # info on WhatsApp text message payload: 163 | # https://developers.facebook.com/docs/whatsapp/cloud-api/webhooks/payload-examples#text-messages 164 | if body.get("object"): 165 | if ( 166 | body.get("entry") 167 | and body["entry"][0].get("changes") 168 | and body["entry"][0]["changes"][0].get("value") 169 | and body["entry"][0]["changes"][0]["value"].get("messages") 170 | and body["entry"][0]["changes"][0]["value"]["messages"][0] 171 | ): 172 | handle_whatsapp_message(body) 173 | return jsonify({"status": "ok"}), 200 174 | else: 175 | # if the request is not a WhatsApp API event, return an error 176 | return ( 177 | jsonify({"status": "error", "message": "Not a WhatsApp API event"}), 178 | 404, 179 | ) 180 | # catch all other errors and return an internal server error 181 | except Exception as e: 182 | print(f"unknown error: {e}") 183 | return jsonify({"status": "error", "message": str(e)}), 500 184 | 185 | 186 | # Required webhook verifictaion for WhatsApp 187 | # info on verification request payload: 188 | # https://developers.facebook.com/docs/graph-api/webhooks/getting-started#verification-requests 189 | def verify(request): 190 | # Parse params from the webhook verification request 191 | mode = request.args.get("hub.mode") 192 | token = request.args.get("hub.verify_token") 193 | challenge = request.args.get("hub.challenge") 194 | # Check if a token and mode were sent 195 | if mode and token: 196 | # Check the mode and token sent are correct 197 | if mode == "subscribe" and token == verify_token: 198 | # Respond with 200 OK and challenge token from the request 199 | print("WEBHOOK_VERIFIED") 200 | return challenge, 200 201 | else: 202 | # Responds with '403 Forbidden' if verify tokens do not match 203 | print("VERIFICATION_FAILED") 204 | return jsonify({"status": "error", "message": "Verification failed"}), 403 205 | else: 206 | # Responds with '400 Bad Request' if verify tokens do not match 207 | print("MISSING_PARAMETER") 208 | return jsonify({"status": "error", "message": "Missing parameters"}), 400 209 | 210 | 211 | # Sets homepage endpoint and welcome message 212 | @app.route("/", methods=["GET"]) 213 | def home(): 214 | return "WhatsApp OpenAI Webhook is listening!" 215 | 216 | 217 | # Accepts POST and GET requests at /webhook endpoint 218 | @app.route("/webhook", methods=["POST", "GET"]) 219 | def webhook(): 220 | if request.method == "GET": 221 | return verify(request) 222 | elif request.method == "POST": 223 | return handle_message(request) 224 | 225 | 226 | # Route to reset message log 227 | @app.route("/reset", methods=["GET"]) 228 | def reset(): 229 | global message_log_dict 230 | message_log_dict = {} 231 | return "Message log resetted!" 232 | 233 | 234 | if __name__ == "__main__": 235 | app.run(debug=True, use_reloader=True) 236 | --------------------------------------------------------------------------------