├── .env-example ├── .gitignore ├── LICENSE ├── Makefile ├── README.md ├── RESULTS.md ├── app ├── api │ ├── .dockerignore │ ├── API.md │ ├── Dockerfile │ ├── Makefile │ ├── config.py │ ├── data │ │ └── training_data │ │ │ ├── org-about_the_company.md │ │ │ ├── org-board_of_directors.md │ │ │ ├── org-company_story.md │ │ │ ├── org-corporate_philosophy.md │ │ │ ├── org-customer_support.md │ │ │ ├── org-earnings_fy2023.md │ │ │ ├── org-management_team.md │ │ │ ├── project-frogonil.md │ │ │ ├── project-kekzal.md │ │ │ ├── project-memegen.md │ │ │ ├── project-memetrex.md │ │ │ ├── project-neurokek.md │ │ │ ├── project-pepetamine.md │ │ │ └── project-pepetrak.md │ ├── helpers.py │ ├── llm.py │ ├── main.py │ ├── models.py │ ├── requirements.txt │ ├── seed.py │ ├── static │ │ └── img │ │ │ ├── rasagpt-icon-200x200.png │ │ │ ├── rasagpt-logo-1.png │ │ │ └── rasagpt-logo-2.png │ └── util.py ├── db │ ├── Dockerfile │ └── create_db.sh ├── rasa-credentials │ ├── .dockerignore │ ├── Dockerfile │ ├── main.py │ └── requirements.txt ├── rasa │ ├── .dockerignore │ ├── actions │ │ ├── Dockerfile │ │ ├── __init__.py │ │ └── actions.py │ ├── config.yml │ ├── credentials.yml │ ├── custom_telegram.py │ ├── data │ │ ├── nlu.yml │ │ ├── rules.yml │ │ └── stories.yml │ ├── domain.yml │ ├── endpoints.yml │ ├── ngrok.yml │ ├── requirements.txt │ └── rules.yml └── scripts │ └── wait-for-it.sh ├── docker-compose.yml └── github ├── api.png ├── cli_install.png ├── container_logs.png ├── documents.png ├── makefile-1.png ├── makefile-2.png ├── ngrok-admin.png ├── org-projects.png ├── orgs.png ├── rasagpt-banner.png ├── rasagpt-icon-200x200.png ├── rasagpt-logo-1.png ├── rasagpt-logo-2.png ├── rasagpt-video-title-screen.png ├── telegram.png └── title.png /.env-example: -------------------------------------------------------------------------------- 1 | ENV=local 2 | 3 | FILE_UPLOAD_PATH=data 4 | LLM_DEFAULT_TEMPERATURE=0 5 | LLM_CHUNK_SIZE=1000 6 | LLM_CHUNK_OVERLAP=200 7 | LLM_DISTANCE_THRESHOLD=0.2 8 | LLM_MAX_OUTPUT_TOKENS=256 9 | LLM_MIN_NODE_LIMIT=3 10 | LLM_DEFAULT_DISTANCE_STRATEGY=EUCLIDEAN 11 | 12 | POSTGRES_USER=postgres 13 | POSTGRES_PASSWORD=postgres 14 | POSTGRES_DB=postgres 15 | PGVECTOR_ADD_INDEX=true 16 | 17 | DB_HOST=db 18 | DB_PORT=5432 19 | DB_USER=api 20 | DB_NAME=api 21 | DB_PASSWORD= 22 | 23 | NGROK_HOST=ngrok 24 | NGROK_PORT=4040 25 | NGROK_AUTHTOKEN= 26 | NGROK_API_KEY= 27 | NGROK_INTERNAL_WEBHOOK_HOST=api 28 | NGROK_INTERNAL_WEBHOOK_PORT=8888 29 | NGROK_DEBUG=true 30 | NGROK_CONFIG=/etc/ngrok.yml 31 | 32 | RASA_WEBHOOK_HOST=rasa-core 33 | RASA_WEBHOOK_PORT=5005 34 | 35 | CREDENTIALS_PATH=/app/rasa/credentials.yml 36 | 37 | TELEGRAM_ACCESS_TOKEN= 38 | TELEGRAM_BOTNAME=rasagpt 39 | 40 | API_PORT=8888 41 | API_HOST=api 42 | 43 | PGADMIN_PORT=5050 44 | PGADMIN_DEFAULT_PASSWORD=pgadmin 45 | PGADMIN_DEFAULT_EMAIL=your@emailaddress.com 46 | 47 | MODEL_NAME=gpt-3.5-turbo 48 | OPENAI_API_KEY= -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | .DS_Store 2 | .trunk 3 | .vscode 4 | mnt 5 | venv/ 6 | .env 7 | .env-dev 8 | .env 9 | .env-staging 10 | .env-stage 11 | .env-prod 12 | .env-production 13 | __pycache__/ 14 | app/rasa/models/* 15 | app/rasa/.rasa 16 | app/rasa/.config 17 | app/rasa/.keras 18 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2023 Paul Pierre 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | .PHONY: default banner help install build run stop restart rasa-restart rasa-stop rasa-start rasa-build seed logs ngrok pgadmin api api-stop db db-stop db-purge purge models shell-api shell-db shell-rasa shell-actions rasa-train rasa-start rasa-stop env-var 2 | 3 | defaut: help 4 | 5 | help: 6 | @make banner 7 | @echo "+------------------+" 8 | @echo "| 🏠 CORE COMMANDS |" 9 | @echo "+------------------+" 10 | @echo "make install - Install and run RasaGPT" 11 | @echo "make build - Build docker images" 12 | @echo "make run - Run RasaGPT" 13 | @echo "make stop - Stop RasaGPT" 14 | @echo "make restart - Restart RasaGPT\n" 15 | @echo "+--------------------+" 16 | @echo "| 🌍 ADMIN INTERACES |" 17 | @echo "+--------------------+" 18 | @echo "make logs - View logs via Dozzle" 19 | @echo "make ngrok - View ngrok dashboard" 20 | @echo "make pgadmin - View pgAdmin dashboard\n" 21 | @echo "+-----------------------+" 22 | @echo "| 👷 DEBUGGING COMMANDS |" 23 | @echo "+-----------------------+" 24 | @echo "make api - Run only API server" 25 | @echo "make models - Build Rasa models" 26 | @echo "make purge - Remove all docker images" 27 | @echo "make db-purge - Delete all data in database" 28 | @echo "make db-reset - Reset database to initial state" 29 | @echo "make shell-api - Open shell in API container" 30 | @echo "make shell-db - Open shell in database container" 31 | @echo "make shell-rasa - Open shell in Rasa container" 32 | @echo "make shell-actions - Open shell in Rasa actions container\n" 33 | 34 | 35 | banner: 36 | @echo "\n\n-------------------------------------" 37 | @echo "▒█▀▀█ █▀▀█ █▀▀ █▀▀█ ▒█▀▀█ ▒█▀▀█ ▀▀█▀▀" 38 | @echo "▒█▄▄▀ █▄▄█ ▀▀█ █▄▄█ ▒█░▄▄ ▒█▄▄█ ░▒█░░" 39 | @echo "▒█░▒█ ▀░░▀ ▀▀▀ ▀░░▀ ▒█▄▄█ ▒█░░░ ░▒█░░" 40 | @echo "+-----------------------------------+" 41 | @echo "| http://RasaGPT.dev by @paulpierre |" 42 | @echo "+-----------------------------------+\n\n" 43 | 44 | 45 | 46 | # ========================== 47 | # 👷 INITIALIZATION COMMANDS 48 | # ========================== 49 | 50 | # --------------------------------------- 51 | # Run this first to setup the environment 52 | # --------------------------------------- 53 | install: 54 | @make banner 55 | @make stop 56 | @make env-var 57 | @make rasa-train 58 | @make build 59 | @make run 60 | @make models 61 | @make rasa-restart 62 | @make seed 63 | @echo "✅ RasaGPT installed and running" 64 | 65 | # ----------------------- 66 | # Build the docker images 67 | # ----------------------- 68 | build: 69 | @echo "🏗️ Building docker images ..\n" 70 | @docker-compose -f docker-compose.yml build 71 | 72 | 73 | # ================ 74 | # 🏠 CORE COMMANDS 75 | # ================ 76 | 77 | # --------------------------- 78 | # Startup all docker services 79 | # --------------------------- 80 | 81 | run: 82 | @echo "🚀 Starting docker-compose.yml ..\n" 83 | @docker-compose -f docker-compose.yml up -d 84 | 85 | # --------------------------- 86 | # Stop all running containers 87 | # --------------------------- 88 | 89 | stop: 90 | @echo "🔍 Stopping any running containers .. \n" 91 | @docker-compose -f docker-compose.yml down 92 | 93 | # ---------------------- 94 | # Restart all containers 95 | # ---------------------- 96 | restart: 97 | @echo "🔁 Restarting docker services ..\n" 98 | @make stop 99 | @make run 100 | 101 | # ---------------------- 102 | # Restart Rasa core only 103 | # ---------------------- 104 | rasa-restart: 105 | @echo "🤖 Restarting Rasa so it grabs credentials ..\n" 106 | @make rasa-stop 107 | @make rasa-start 108 | 109 | rasa-stop: 110 | @echo "🤖 Stopping Rasa ..\n" 111 | @docker-compose -f docker-compose.yml stop rasa-core 112 | 113 | rasa-start: 114 | @echo "🤖 Starting Rasa ..\n" 115 | @docker-compose -f docker-compose.yml up -d rasa-core 116 | 117 | rasa-build: 118 | @echo "🤖 Building Rasa ..\n" 119 | @docker-compose -f docker-compose.yml build rasa-core 120 | 121 | # ----------------------- 122 | # Seed database with data 123 | # ----------------------- 124 | seed: 125 | @echo "🌱 Seeding database ..\n" 126 | @docker-compose -f docker-compose.yml exec api /app/api/wait-for-it.sh db:5432 --timeout=60 -- python3 seed.py 127 | 128 | 129 | # ======================= 130 | # 🌍 WEB ADMIN INTERFACES 131 | # ======================= 132 | 133 | # ------------------------- 134 | # Reverse HTTP tunnel admin 135 | # ------------------------- 136 | ngrok: 137 | @echo "📡 Opening ngrok agent in the browser ..\n" 138 | @open http://localhost:4040 139 | 140 | # ------------------------ 141 | # Postgres admin interface 142 | # ------------------------ 143 | pgadmin: 144 | @echo "👷‍♂️ Opening PG Admin in the browser ..\n" 145 | @open http://localhost:5050 146 | 147 | # ------------------------ 148 | # Container logs interface 149 | # ------------------------ 150 | logs: 151 | @echo "🔍 Opening container logs in the browser ..\n" 152 | @open http://localhost:9999/ 153 | 154 | # ===================== 155 | # 👷 DEBUGGING COMMANDS 156 | # ===================== 157 | 158 | # --------------------------- 159 | # Startup just the API server 160 | # --------------------------- 161 | api: 162 | @make db 163 | @echo "🚀 Starting FastAPI and postgres ..\n" 164 | @docker-compose -f docker-compose.yml up -d api 165 | 166 | # ------------------------ 167 | # Startup just Postgres DB 168 | # ------------------------ 169 | db: 170 | @echo "🚀 Starting Postgres with pgvector ..\n" 171 | @docker-compose -f docker-compose.yml up -d db 172 | 173 | 174 | db-stop: 175 | @echo " Stopping the database ..\n" 176 | @docker-compose -f docker-compose.yml down db 177 | 178 | 179 | db-reset: 180 | @echo "⛔ Are you sure you want to reinitialize the database, you will lose all data? [y/N]\n" 181 | @read confirmation; \ 182 | if [ "$$confirmation" = "y" ] || [ "$$confirmation" = "Y" ]; then \ 183 | make db-purge \ 184 | make api \ 185 | make models \ 186 | echo "✅ Database re-initialize"; \ 187 | else \ 188 | echo "Aborted."; \ 189 | fi 190 | 191 | @echo " Resetting the database ..\n" 192 | 193 | 194 | # ------------------------------- 195 | # Build the schema in Postgres DB 196 | # ------------------------------- 197 | models: 198 | @echo "💽 Building models in Postgres ..\n" 199 | @docker-compose -f docker-compose.yml exec api /app/api/wait-for-it.sh db:5432 --timeout=60 -- python3 models.py 200 | 201 | # ------------------------------- 202 | # Delete containers or bad images 203 | # ------------------------------- 204 | purge: 205 | @echo "🧹 Purging all containers and images ..\n" 206 | @make stop 207 | @docker system prune -a 208 | @make install 209 | 210 | # -------------------------------- 211 | # Delete the database mount volume 212 | # -------------------------------- 213 | db-purge: 214 | @echo "⛔ Are you sure you want to delete all data in the database? [y/N]\n" 215 | @read confirmation; \ 216 | if [ "$$confirmation" = "y" ] || [ "$$confirmation" = "Y" ]; then \ 217 | echo "Deleting generated files .."; \ 218 | make stop; \ 219 | rm -rf ./mnt; \ 220 | echo "Deleted."; \ 221 | else \ 222 | echo "Aborted."; \ 223 | fi 224 | 225 | # -------------------------------------- 226 | # Open a bash shell in the API container 227 | # -------------------------------------- 228 | shell-api: 229 | @echo "💻🐢 Opening a bash shell in the RasaGPT API container ..\n" 230 | @if docker ps | grep chat_api > /dev/null; then \ 231 | docker exec -it $$(docker ps | grep chat_api | tr -d '\n' | awk '{print $$1}') /bin/bash; \ 232 | else \ 233 | echo "Container chat_api is not running"; \ 234 | fi 235 | 236 | # --------------------------------------- 237 | # Open a bash shell in the Rasa container 238 | # --------------------------------------- 239 | shell-rasa: 240 | @echo "💻🐢 Opening a bash shell in the chat_rasa_core container ..\n" 241 | @if docker ps | grep chat_rasa_core > /dev/null; then \ 242 | docker exec -it $$(docker ps | grep chat_rasa_core | tr -d '\n' | awk '{print $$1}') /bin/bash; \ 243 | else \ 244 | echo "Container chat_rasa_core is not running"; \ 245 | fi 246 | 247 | # ----------------------------------------------- 248 | # Open a bash shell in the Rasa actions container 249 | # ----------------------------------------------- 250 | shell-actions: 251 | @echo "💻🐢 Opening a bash shell in the chat_rasa_actions container ..\n" 252 | @if docker ps | grep chat_rasa_actions > /dev/null; then \ 253 | docker exec -it $$(docker ps | grep chat_rasa_actions | tr -d '\n' | awk '{print $$1}') /bin/bash; \ 254 | else \ 255 | echo "Container chat_rasa_actions is not running"; \ 256 | fi 257 | 258 | # ------------------------------------------- 259 | # Open a bash shell in the Postgres container 260 | # ------------------------------------------- 261 | shell-db: 262 | @echo "💻🐢 Opening a bash shell in the Postgres container ..\n" 263 | @if docker ps | grep chat_db > /dev/null; then \ 264 | docker exec -it $$(docker ps | grep chat_db | tr -d '\n' | awk '{print $$1}') /bin/bash; \ 265 | else \ 266 | echo "Container chat_db is not running"; \ 267 | fi 268 | 269 | # ================== 270 | # 💁 HELPER COMMANDS 271 | # ================== 272 | 273 | # ------------- 274 | # Check envvars 275 | # ------------- 276 | env-var: 277 | @echo "🔍 Checking if envvars are set ..\n"; 278 | @if ! test -e "./.env"; then \ 279 | @echo "❌ .env file not found. Please copy .env-example to .env and update values"; \ 280 | exit 1; \ 281 | else \ 282 | echo "✅ found .env\n"; \ 283 | fi 284 | 285 | # ----------------- 286 | # Train Rasa models 287 | # ----------------- 288 | rasa-train: 289 | @echo "💽 Generating Rasa models ..\n" 290 | @make rasa-start 291 | @docker-compose -f docker-compose.yml exec rasa-core rasa train 292 | @make rasa-stop 293 | @echo "✅ Done\n" 294 | -------------------------------------------------------------------------------- /RESULTS.md: -------------------------------------------------------------------------------- 1 | 2 | ![RasaGPT Logo](https://github.com/paulpierre/RasaGPT/blob/main/github/rasagpt-logo-1.png?raw=true) 3 | 4 |

5 | 6 | # 📊 RasaGPT Query Results 7 | 8 | The following are questions based on the example dummy data included in the RasaGPT repository. The response framing is great, but retrieval quality is not sufficient due to indexing. Here are some examples of Q&A hits and misses w/ some commentary. 9 | 10 |

11 | 12 | # ☑️ Hits 13 | 14 | 15 | 16 |
17 | 18 | ### ✅ “Tell me about Pepe Corp” 19 | 20 | ```bash 21 | Pepe Corp is a groundbreaking parody pharmaceutical and technology company inspired by the iconic Pepe the Frog meme. Our mission is to revolutionize the world through the power of dank memes. 22 | 🏷️ company,mission 23 | ``` 24 | 25 |
26 | 27 | ### ✅ “What is the weather like in London today?” (It’s expected to say I don’t know) 28 | 29 | ```bash 30 | I apologize, I'm not sure how to help with that 31 | 🏷️ weather 32 | ``` 33 | 34 |
35 | 36 | ### ✅ “What is the purpose of Kekzal?” 37 | 38 | ```bash 39 | Kekzal enhances sexual function. 40 | 🏷️ purpose 41 | ``` 42 | 43 |
44 | 45 | ### ✅ “What is Kekzal and what are the side effects?” 46 | 47 | ```bash 48 | Kekzal is a medication, but the document does not provide information on its side effects. It only advises to consult a memologist if side effects persist or become bothersome. 49 | 🏷️ Kekzal,side effects 50 | ``` 51 | 52 | >- It probably should not mention “the document” and this can be optimized by modifying the prompt template 53 | 54 |
55 | 56 | ### ✅ “What should I consider before taking Kekzal?” 57 | 58 | ```bash 59 | Before taking Kekzal, consider the following precautions: Do not use Kekzal if you have a known allergy to dank memes or an aversion to Pepe-themed roman. 60 | 🏷️ precautions 61 | ``` 62 | 63 | >- This is technically a hit, but because of the chunking settings, the original text gets cut off and says “roman” instead of “romantic..” so this is easily fixed 64 | >- Ideally it should include “Kekzal” in the tags 65 | 66 |
67 | 68 | ### ✅ “What are the overdose symptoms of Kekzal?” 69 | 70 | ```bash 71 | In case of an overdose, symptoms may include: 1. An insatiable desire to create 72 | 🏷️ overdose,symptoms 73 | ``` 74 | 75 | - This is a technical hit, but again indexing cut it off and it is likely due to the configured output limits which is easily fixed 76 | 77 |
78 | 79 | ### ✅ “Who is the CEO?” 80 | 81 | ```bash 82 | The CEO of Pepe Corp is Amanda Gigglestein. 83 | 🏷️ CEO 84 | ``` 85 | 86 |
87 | 88 | ### ✅ “What are the 2023 plans of Pepe Corp?” 89 | 90 | ```bash 91 | Pepe Corp plans to invest in research and development to create cutting-edge products that address the needs of our diverse customer base. Our R&D team will explore new areas of meme science and technology, developing products that meet the needs of our customers. 92 | 🏷️ plans,research and development,cutting-edge products,meme science,technology,customer needs 93 | ``` 94 | 95 |
96 | 97 | ### ✅ “What is the revenue for Q1 FY2023?” 98 | 99 | ```bash 100 | $235 million 101 | 🏷️ revenue,Q1,FY2023 102 | ``` 103 | 104 |
105 | 106 | ### ✅ “Tell me about the early days of PepeCorp?” 107 | 108 | ```bash 109 | In the early 21st century, a group of visionary memelords gathered in a dimly lit basement, fueled by their passion for dank memes and the iconic Pepe the Frog. 110 | 🏷️ company history,early days 111 | ``` 112 | 113 |

114 | 115 | # ❌ Misses 116 | 117 | 118 | ### ❌ “What are the side effects of Kekzal?” 119 | 120 | ```bash 121 | Consult your memologist if side effects persist or become bothersome. 122 | 🏷️ Kekzal,side effects 123 | ``` 124 | 125 | >- The side effects are in the documentation but it misses it entirely. It did tag the product name unlike earlier sessions, but index optimization can fix this 126 | 127 |
128 | 129 | ### ❌ “Who is Dr. Olivia Dankstein?” 130 | 131 | ```bash 132 | I apologize, I'm not sure how to help with that. 133 | 🏷️ unknown 134 | ``` 135 | 136 | >- Not good, she is in the Board of Directors document and is Vice-Chair and Chief Scientific Officer of Pepe Corp. Better indexing strategies fix this. 137 | 138 |
139 | 140 | 141 | ### ❌ “What is the list of products provided by Pepe Corp?” 142 | 143 | ```bash 144 | Pepe Corp has a diverse portfolio of products, ranging from meme-inspired pharmaceuticals to advanced brain implants and mobile applications. 145 | 🏷️ products 146 | ``` 147 | 148 | >- Although it did answer, it is insufficient. This is where a tree index with summarization would help 149 | 150 |

151 | 152 | # 🔬 Retrieval optimizations 153 | 154 | So how could all of this be improved by what I estimate to be more than 70%+? Here are a few solutions I intend to explore: 155 | 156 | 157 | - Better index-tuning at the LLM library level, specifically using llama-index instead of Langchain which specializes in retrieval 158 | - Usage of [Query Routers Abstractions](https://medium.com/@jerryjliu98/unifying-llm-powered-qa-techniques-with-routing-abstractions-438e2499a0d0) to understand which search strategy to use (one-shot vs few-shot) 159 | - Explore other indexing methods like Tree indexes, Keyword indexes 160 | - Adding chat history for immediate recall and context setting 161 | - Add a secondary adversarial agent ([Dual pattern model](https://simonwillison.net/2023/Apr/25/dual-llm-pattern/)) with the following potential functionalities: 162 | - Determine if the question has been answered and if not, re-optimize search strategy 163 | - Ensure prompt injection is not occurring 164 | - Increase baseline similarity search by exploring: 165 | - Regularly generate “fake” document embeddings based on historical queries and link to actual documents via [HyDE pattern](https://wfhbrian.com/revolutionizing-search-how-hypothetical-document-embeddings-hyde-can-save-time-and-increase-productivity/) 166 | - Regularly generate “fake” user queries based on documents and link to actual document so user input search and “fake” queries can match better -------------------------------------------------------------------------------- /app/api/.dockerignore: -------------------------------------------------------------------------------- 1 | __pycache__ 2 | *.pyc 3 | *.pyo 4 | *.pyd -------------------------------------------------------------------------------- /app/api/API.md: -------------------------------------------------------------------------------- 1 | 2 | 3 | # 🏠 Overview 4 | 5 | 💬 RasaGPT is the first headless LLM chatbot platform built on top of Rasa and Langchain. It is boilerplate and a reference implementation of Rasa and Telegram utilizing an LLM library like langchain for indexing, retrieval and context injection. 6 | 7 |

8 | 9 | # 💁‍♀️ Why RasaGPT? 10 | 11 | RasaGPT just works out of the box. I went through all the implementation headaches so you don’t have to, including 12 | 13 | - Creating your own proprietary bot end-point using FastAPI, document upload and “training” 'pipeline included 14 | - How to integrate Langchain/LlamaIndex and Rasa 15 | - Library conflicts with LLM libraries and passing metadata 16 | - Dockerized support on MacOS for running Rasa 17 | - Reverse proxy with chatbots via ngrok 18 | - Implementing pgvector with your own custom schema instead of using Langchain’s highly opinionated PGVector class 19 | - Adding multi-tenancy, sessions and metadata between Rasa and your own backend / application 20 | 21 | The backstory is familiar. A friend came to me with a problem. I scoured Google and Github for a decent reference implementation of LLM’s integrated with Rasa but came up empty-handed. I figured this to be a great opportunity to satiate my curiosity and 2 days later I had a proof of concept, and a week later this is what I came up with. 22 | 23 |
24 | 25 | > ⚠️ **Caveat emptor:** 26 | This is far from production code and rife with prompt injection and general security vulnerabilities. I just hope someone finds this useful 😊 27 | 28 | 29 |

30 | 31 | # **✨** Quick start 32 | 33 | Getting started is easy, just make sure you meet the dependencies below. 34 | 35 | ```bash 36 | git clone https://github.com/paulpierre/RasaGPT.git 37 | cd RasaGPT 38 | cp .env-example .env 39 | 40 | # Edit your .env file and add all the necessary credentials 41 | make install 42 | 43 | # Type "make" to see more options 44 | make 45 | ``` 46 | 47 |

48 | 49 | 50 | # 🔥 Features 51 | 52 | ## Full Application and API 53 | 54 | - LLM “learns” on an arbitrary corpus of data using Langchain 55 | - Upload documents and “train” all via FastAPI 56 | - Document versioning and automatic “re-training” implemented on upload 57 | - Customize your own async end-points 58 | - Bot determines whether human handoff is necessary 59 | - Bot generates tags based on user questions and response automatically 60 | - Full API documentation via Swagger and Redoc included 61 | - Ngrok end-points are automatically generated for you on startup so your bot can always be accessed via `https://t.me/yourbotname` 62 | - Embedding similarity search built into Postgres via pgvector and Postgres functions 63 | - Dummy data included for you to test and experiment 64 | - Unlimited use cases from help desk, customer support, quiz, e-learning, dungeon and dragons, and more 65 |

66 | ## Rasa integration 67 | 68 | - Built on top of Rasa, the open source gold-standard for chat platforms 69 | - Supports MacOS M1/M2 via Docker (canonical Rasa image lacks MacOS arch. support) 70 | - Supports Telegram, easily integrate Slack, Whatsapp, Line, SMS, etc. 71 | - Setup complex dialog pipelines using NLU models form Huggingface like BERT or libraries/frameworks like Keras, Tensorflow with OpenAI GPT as fallback 72 |

73 | ## Flexibility 74 | 75 | - Extend agentic, memory, etc. capabilities with Langchain 76 | - Schema supports multi-tenancy, sessions, data storage 77 | - Customize agent personalities 78 | - Saves all of chat history and creating embeddings from all interactions future-proofing your retrieval strategy 79 | - Automatically generate embeddings from knowledge base corpus and client feedback 80 | 81 |

82 | 83 | # 🧑‍💻 Installing 84 | 85 | ## Requirements 86 | 87 | - Python 3.9 88 | - Docker & Docker compose ([Docker desktop MacOS](https://www.docker.com/products/docker-desktop/)) 89 | - Open AI [API key](https://platform.openai.com/account/api-keys) 90 | - Telegram [bot credentials](https://core.telegram.org/bots#how-do-i-create-a-bot) 91 | - Ngrok [auth token](https://dashboard.ngrok.com/tunnels/authtokens) 92 | - Make ([MacOS](https://formulae.brew.sh/formula/make)/[Windows](https://stackoverflow.com/questions/32127524/how-to-install-and-use-make-in-windows)) 93 | - SQLModel 94 | 95 |
96 | 97 | ## Setup 98 | 99 | ```bash 100 | git clone https://github.com/paulpierre/RasaGPT.git 101 | cd RasaGPT 102 | cp .env-example .env 103 | 104 | # Edit your .env file and all the credentials 105 | 106 | ``` 107 | 108 |
109 | 110 | 111 | At any point feel free to just type in `make` and it will display the list of options, mostly useful for debugging: 112 | 113 |
114 | 115 | 116 | ![Makefile main](https://github.com/paulpierre/RasaGPT/blob/main/github/makefile-1.png?raw=true) 117 | 118 |
119 | 120 | ## Docker-compose 121 | 122 | ```bash 123 | make install 124 | 125 | # This will automatically install and run RasaGPT 126 | # After installation, to run again you can simply run 127 | 128 | make run 129 | ``` 130 |
131 | 132 | ## Local Python Environment 133 | 134 | This is useful if you wish to focus on developing on top of the API, a separate `Makefile` was made for this. This will create a local virtual environment for you. 135 | 136 | ```bash 137 | # Assuming you are already in the RasaGPT directory 138 | cd app/api 139 | make install 140 | 141 | # This will automatically install and run RasaGPT 142 | # After installation, to run again you can simply run 143 | 144 | make run 145 | ``` 146 |
147 | 148 | Similarly, enter `make` to see a full list of commands 149 | 150 | ![Makefile API](https://github.com/paulpierre/RasaGPT/blob/main/github/makefile-2.png?raw=true) 151 | 152 |
153 | 154 | ## Installation process 155 | 156 | Installation should be automated should look like this: 157 | 158 | ![Installation](https://github.com/paulpierre/RasaGPT/blob/main/github/cli_install.png?raw=true) 159 | 160 | 👉 Full installation log: [https://app.warp.dev/block/vflua6Eue29EPk8EVvW8Kd](https://app.warp.dev/block/vflua6Eue29EPk8EVvW8Kd) 161 | 162 |
163 | 164 | The installation process for Docker takes the following steps at a high level 165 | 166 | 1. Check to make sure you have `.env` available 167 | 2. Database is initialized with `pgvector` 168 | 3. Database models create the database schema 169 | 4. Trains the Rasa model so it is ready to run 170 | 5. Sets up ngrok with Rasa so Telegram has a webhook back to your API server 171 | 6. Sets up the Rasa actions server so Rasa can talk to the RasaGPT API 172 | 7. Database is populated with dummy data via `seed.py` 173 | 174 |

175 | 176 | # ☑️ Next steps 177 |
178 | 179 | ## 💬 Start chatting 180 | 181 | You can start chatting with your bot by visiting https://t.me/yourbotsname 182 | 183 | ![Telegram](https://github.com/paulpierre/RasaGPT/blob/main/github/telegram.png?raw=true) 184 | 185 |

186 | 187 | ## 👀 View logs 188 | 189 | You can view all of the log by visiting: [https://localhost:9999/](https://localhost:9999/) which will displaying real-time logs of all the docker containers 190 | 191 | ![Dozzle](https://github.com/paulpierre/RasaGPT/blob/main/github/container_logs.png?raw=true) 192 | 193 |

194 | 195 | ## 📖 API documentation 196 | 197 | View the API endpoint docs by visiting [https://localhost:8888/docs](https://localhost:8888/docs) 198 | 199 | ![Swagger Docs](https://github.com/paulpierre/RasaGPT/blob/main/github/api.png?raw=true) 200 | 201 |

202 | 203 | # ✏️ Examples 204 | 205 | The bot is just a proof-of-concept and has not been optimized for retrieval. It currently uses 1000 character length chunking for indexing and basic euclidean distance for retrieval and quality is hit or miss. 206 | 207 | You can view example hits and misses with the bot in the [RESULTS.MD](https://github.com/paulpierre/RasaGPT/blob/main/RESULTS.md) file. Overall I estimate index optimization and LLM configuration changes can increase output quality by more than 70%. 208 | 209 | 👉 Click to see the Q&A results of the demo data in RESULTS.MD 210 | 211 |

212 | 213 | # 💻 API Architecture and Usage 214 | 215 | The REST API is straight forward, please visit the documentation 👉 http://localhost:8888/docs 216 | 217 | The entities below have basic CRUD operations and return JSON 218 | 219 |

220 | 221 | ## Organization 222 | 223 | This can be thought of as a company that is your client in a SaaS / multi-tenant world. By default a list of dummy organizations have been provided 224 | 225 | ![Screenshot 2023-05-05 at 8.45.28 AM.png](https://github.com/paulpierre/RasaGPT/blob/main/github/orgs.png?raw=true) 226 | 227 | ```bash 228 | [ 229 | { 230 | "id": 1, 231 | "uuid": "d2a642e6-c81a-4a43-83e2-22cee3562452", 232 | "display_name": "Pepe Corp.", 233 | "namespace": "pepe", 234 | "bot_url": null, 235 | "created_at": "2023-05-05T10:42:45.933976", 236 | "updated_at": "2023-05-05T10:42:45.933979" 237 | }, 238 | { 239 | "id": 2, 240 | "uuid": "7d574f88-6c0b-4c1f-9368-367956b0e90f", 241 | "display_name": "Umbrella Corp", 242 | "namespace": "acme", 243 | "bot_url": null, 244 | "created_at": "2023-05-05T10:43:03.555484", 245 | "updated_at": "2023-05-05T10:43:03.555488" 246 | }, 247 | { 248 | "id": 3, 249 | "uuid": "65105a15-2ef0-4898-ac7a-8eafee0b283d", 250 | "display_name": "Cyberdine Systems", 251 | "namespace": "cyberdine", 252 | "bot_url": null, 253 | "created_at": "2023-05-05T10:43:04.175424", 254 | "updated_at": "2023-05-05T10:43:04.175428" 255 | }, 256 | { 257 | "id": 4, 258 | "uuid": "b7fb966d-7845-4581-a537-818da62645b5", 259 | "display_name": "Bluth Companies", 260 | "namespace": "bluth", 261 | "bot_url": null, 262 | "created_at": "2023-05-05T10:43:04.697801", 263 | "updated_at": "2023-05-05T10:43:04.697804" 264 | }, 265 | { 266 | "id": 5, 267 | "uuid": "9283d017-b24b-4ecd-bf35-808b45e258cf", 268 | "display_name": "Evil Corp", 269 | "namespace": "evil", 270 | "bot_url": null, 271 | "created_at": "2023-05-05T10:43:05.102546", 272 | "updated_at": "2023-05-05T10:43:05.102549" 273 | } 274 | ] 275 | ``` 276 | 277 |
278 | 279 | ### Project 280 | 281 | This can be thought of as a product that belongs to a company. You can view the list of projects that belong to an organizations like so: 282 | 283 | ![org-projects.png](https://github.com/paulpierre/RasaGPT/blob/main/github/org-projects.png?raw=true) 284 | 285 | ```bash 286 | [ 287 | { 288 | "id": 1, 289 | "documents": [ 290 | { 291 | "id": 1, 292 | "uuid": "92604623-e37c-4935-bf08-0e9efa8b62f7", 293 | "display_name": "project-pepetamine.md", 294 | "node_count": 3 295 | } 296 | ], 297 | "document_count": 1, 298 | "uuid": "44a4b60b-9280-4b21-a676-00612be9aa87", 299 | "display_name": "Pepetamine", 300 | "created_at": "2023-05-05T10:42:46.060930", 301 | "updated_at": "2023-05-05T10:42:46.060934" 302 | }, 303 | { 304 | "id": 2, 305 | "documents": [ 306 | { 307 | "id": 2, 308 | "uuid": "b408595a-3426-4011-9b9b-8e260b244f74", 309 | "display_name": "project-frogonil.md", 310 | "node_count": 3 311 | } 312 | ], 313 | "document_count": 1, 314 | "uuid": "5ba6b812-de37-451d-83a3-8ccccadabd69", 315 | "display_name": "Frogonil", 316 | "created_at": "2023-05-05T10:42:48.043936", 317 | "updated_at": "2023-05-05T10:42:48.043940" 318 | }, 319 | { 320 | "id": 3, 321 | "documents": [ 322 | { 323 | "id": 3, 324 | "uuid": "b99d373a-3317-4699-a89e-90897ba00db6", 325 | "display_name": "project-kekzal.md", 326 | "node_count": 3 327 | } 328 | ], 329 | "document_count": 1, 330 | "uuid": "1be4360c-f06e-4494-bf20-e7c73a56f003", 331 | "display_name": "Kekzal", 332 | "created_at": "2023-05-05T10:42:49.092675", 333 | "updated_at": "2023-05-05T10:42:49.092678" 334 | }, 335 | { 336 | "id": 4, 337 | "documents": [ 338 | { 339 | "id": 4, 340 | "uuid": "94da307b-5993-4ddd-a852-3d8c12f95f3f", 341 | "display_name": "project-memetrex.md", 342 | "node_count": 3 343 | } 344 | ], 345 | "document_count": 1, 346 | "uuid": "1fd7e772-365c-451b-a7eb-4d529b0927f0", 347 | "display_name": "Memetrex", 348 | "created_at": "2023-05-05T10:42:50.184817", 349 | "updated_at": "2023-05-05T10:42:50.184821" 350 | }, 351 | { 352 | "id": 5, 353 | "documents": [ 354 | { 355 | "id": 5, 356 | "uuid": "6deff180-3e3e-4b09-ae5a-6502d031914a", 357 | "display_name": "project-pepetrak.md", 358 | "node_count": 4 359 | } 360 | ], 361 | "document_count": 1, 362 | "uuid": "a389eb58-b504-48b4-9bc3-d3c93d2fbeaa", 363 | "display_name": "PepeTrak", 364 | "created_at": "2023-05-05T10:42:51.293352", 365 | "updated_at": "2023-05-05T10:42:51.293355" 366 | }, 367 | { 368 | "id": 6, 369 | "documents": [ 370 | { 371 | "id": 6, 372 | "uuid": "2e3c2155-cafa-4c6b-b7cc-02bb5156715b", 373 | "display_name": "project-memegen.md", 374 | "node_count": 5 375 | } 376 | ], 377 | "document_count": 1, 378 | "uuid": "cec4154f-5d73-41a5-a764-eaf62fc3db2c", 379 | "display_name": "MemeGen", 380 | "created_at": "2023-05-05T10:42:52.562037", 381 | "updated_at": "2023-05-05T10:42:52.562040" 382 | }, 383 | { 384 | "id": 7, 385 | "documents": [ 386 | { 387 | "id": 7, 388 | "uuid": "baabcb6f-e14c-4d59-a019-ce29973b9f5c", 389 | "display_name": "project-neurokek.md", 390 | "node_count": 5 391 | } 392 | ], 393 | "document_count": 1, 394 | "uuid": "4a1a0542-e314-4ae7-9961-720c2d092f04", 395 | "display_name": "Neuro-kek", 396 | "created_at": "2023-05-05T10:42:53.689537", 397 | "updated_at": "2023-05-05T10:42:53.689539" 398 | }, 399 | { 400 | "id": 8, 401 | "documents": [ 402 | { 403 | "id": 8, 404 | "uuid": "5be007ec-5c89-4bc4-8bfd-448a3659c03c", 405 | "display_name": "org-about_the_company.md", 406 | "node_count": 5 407 | }, 408 | { 409 | "id": 9, 410 | "uuid": "c2b3fb39-18c0-4f3e-9c21-749b86942cba", 411 | "display_name": "org-board_of_directors.md", 412 | "node_count": 3 413 | }, 414 | { 415 | "id": 10, 416 | "uuid": "41aa81a9-13a9-4527-a439-c2ac0215593f", 417 | "display_name": "org-company_story.md", 418 | "node_count": 4 419 | }, 420 | { 421 | "id": 11, 422 | "uuid": "91c59eb8-8c05-4f1f-b09d-fcd9b44b5a20", 423 | "display_name": "org-corporate_philosophy.md", 424 | "node_count": 4 425 | }, 426 | { 427 | "id": 12, 428 | "uuid": "631fc3a9-7f5f-4415-8283-78ff582be483", 429 | "display_name": "org-customer_support.md", 430 | "node_count": 3 431 | }, 432 | { 433 | "id": 13, 434 | "uuid": "d4c3d3db-6f24-433e-b2aa-52a70a0af976", 435 | "display_name": "org-earnings_fy2023.md", 436 | "node_count": 5 437 | }, 438 | { 439 | "id": 14, 440 | "uuid": "08dd478b-414b-46c4-95c0-4d96e2089e90", 441 | "display_name": "org-management_team.md", 442 | "node_count": 3 443 | } 444 | ], 445 | "document_count": 7, 446 | "uuid": "1d2849b4-2715-4dcf-aa68-090a221942ba", 447 | "display_name": "Pepe Corp. (company)", 448 | "created_at": "2023-05-05T10:42:55.258902", 449 | "updated_at": "2023-05-05T10:42:55.258904" 450 | } 451 | ] 452 | ``` 453 | 454 |
455 | 456 | ## Document 457 | 458 | This can be thought of as an artifact related to a product, like an FAQ page or a PDF with financial statement earnings. You can view all the Documents associated with an Organization’s Project like so: 459 | 460 | ![documents.png](https://github.com/paulpierre/RasaGPT/blob/main/github/documents.png?raw=true) 461 | 462 | ```bash 463 | { 464 | "id": 1, 465 | "uuid": "44a4b60b-9280-4b21-a676-00612be9aa87", 466 | "organization": { 467 | "id": 1, 468 | "uuid": "d2a642e6-c81a-4a43-83e2-22cee3562452", 469 | "display_name": "Pepe Corp.", 470 | "bot_url": null, 471 | "status": 2, 472 | "created_at": "2023-05-05T10:42:45.933976", 473 | "updated_at": "2023-05-05T10:42:45.933979", 474 | "namespace": "pepe" 475 | }, 476 | "document_count": 1, 477 | "documents": [ 478 | { 479 | "id": 1, 480 | "uuid": "92604623-e37c-4935-bf08-0e9efa8b62f7", 481 | "organization_id": 1, 482 | "project_id": 1, 483 | "display_name": "project-pepetamine.md", 484 | "url": "", 485 | "data": "# Pepetamine\n\nProduct Name: Pepetamine\n\nPurpose: Increases cognitive focus just like the Limitless movie\n\n**How to Use**\n\nPepetamine is available in the form of rare Pepe-coated tablets. The recommended dosage is one tablet per day, taken orally with a glass of water, preferably while browsing your favorite meme forum for maximum cognitive enhancement. For optimal results, take Pepetamine 30 minutes before engaging in mentally demanding tasks, such as decoding ancient Pepe hieroglyphics or creating your next viral meme masterpiece.\n\n**Side Effects**\n\nSome potential side effects of Pepetamine may include:\n\n1. Uncontrollable laughter and a sudden appreciation for dank memes\n2. An inexplicable desire to collect rare Pepes\n3. Enhanced meme creation skills, potentially leading to internet fame\n4. Temporary green skin pigmentation, resembling the legendary Pepe himself\n5. Spontaneously speaking in \"feels good man\" language\n\nWhile most side effects are generally harmless, consult your memologist if side effects persist or become bothersome.\n\n**Precautions**\n\nBefore taking Pepetamine, please consider the following precautions:\n\n1. Do not use Pepetamine if you have a known allergy to rare Pepes or dank memes.\n2. Pepetamine may not be suitable for individuals with a history of humor deficiency or meme intolerance.\n3. Exercise caution when driving or operating heavy machinery, as Pepetamine may cause sudden fits of laughter or intense meme ideation.\n\n**Interactions**\n\nPepetamine may interact with other substances, including:\n\n1. Normie supplements: Combining Pepetamine with normie supplements may result in meme conflicts and a decreased sense of humor.\n2. Caffeine: The combination of Pepetamine and caffeine may cause an overload of energy, resulting in hyperactive meme creation and potential internet overload.\n\nConsult your memologist if you are taking any other medications or substances to ensure compatibility with Pepetamine.\n\n**Overdose**\n\nIn case of an overdose, symptoms may include:\n\n1. Uncontrollable meme creation\n2. Delusions of grandeur as the ultimate meme lord\n3. Time warps into the world of Pepe\n\nIf you suspect an overdose, contact your local meme emergency service or visit the nearest meme treatment facility. Remember, the key to enjoying Pepetamine is to use it responsibly, and always keep in mind the wise words of our legendary Pepe: \"Feels good man.\"", 486 | "hash": "fdee6da2b5441080dd78e7850d3d2e1403bae71b9e0526b9dcae4c0782d95a78", 487 | "version": 1, 488 | "status": 2, 489 | "created_at": "2023-05-05T10:42:46.755428", 490 | "updated_at": "2023-05-05T10:42:46.755431" 491 | } 492 | ], 493 | "display_name": "Pepetamine", 494 | "created_at": "2023-05-05T10:42:46.060930", 495 | "updated_at": "2023-05-05T10:42:46.060934" 496 | } 497 | ``` 498 | 499 |
500 | 501 | ## Node 502 | 503 | Although this is not exposed in the API, a node is a chunk of a document which embeddings get generated for. Nodes are used for retrieval search as well as context injection. A node belongs to a document. 504 | 505 |
506 | 507 | ## User 508 | 509 | A user represents the person talking to a bot. Users do not necessarily belong to an org or product, but this relationship is captured in ChatSession below. 510 | 511 |
512 | 513 | ## ChatSession 514 | 515 | Not exposed via API, but this represent a question and answer between the User and a bot. Each of these objects can be flexibly identified by a `session_id` which gets automatically generated. Chat Sessions contain rich metadata that can be used for training and optimization. ChatSessions via the `/chat` endpoint ARE in fact associated with organization (for multi-tenant security purposes) 516 | 517 |

518 | 519 | # **📚 How it works** 520 | 521 |
522 | 523 | ## Rasa 524 | 525 | 1. Rasa handles integration with the communication channel, in this case Telegram. 526 | - It specifically handles submitting the target webhook user feedback should go through. In our case it is our FastAPI server via `/webhooks/{channel}/webhook` 527 | 2. Rasa has two components, the core [Rasa app](https://github.com/paulpierre/RasaGPT/tree/main/app/rasa) and an Rasa [actions server](https://github.com/paulpierre/RasaGPT/tree/main/app/rasa/actions) that runs separately 528 | 3. Rasa must be configured (done already) via a few yaml files: 529 | - [config.yml](https://github.com/paulpierre/RasaGPT/blob/main/app/rasa/config.yml) - contains NLU pipeline and policy configuration. What matters is setting the `FallbackClassifier` threshold 530 | - [credentials.yml](https://github.com/paulpierre/RasaGPT/blob/main/app/rasa/credentials.yml) - contains the path to our webhook and Telegram credentials. This will get updated by the helper service `rasa-credentials` via [update_credentials.py](https://github.com/paulpierre/RasaGPT/blob/main/app/rasa/update_credentials.py) 531 | - [domain.yml](https://github.com/paulpierre/RasaGPT/blob/main/app/rasa/domain.yml) - This contains the chat entrypoint logic configuration like intent and the action to take against the intent. Here we add the `action_gpt_fallback` action which will trigger our [actions server](https://github.com/paulpierre/RasaGPT/tree/main/app/rasa/actions) 532 | - [endpoints.yml](https://github.com/paulpierre/RasaGPT/blob/main/app/rasa/endpoints.yml) - This is where we set our custom action end-point for Rasa to trigger our fallback 533 | - [nlu.yml](https://github.com/paulpierre/RasaGPT/blob/main/app/rasa/data/nlu.yml) - this is where we set our intent `out_of_scope` 534 | - [rules.yml](https://github.com/paulpierre/RasaGPT/blob/main/app/rasa/data/rules.yml) - we set a rule for this intent that it should trigger the action `action_gpt_fallback` 535 | - [actions.py](https://github.com/paulpierre/RasaGPT/blob/main/app/rasa/actions/actions.py) - this is where we define and express our action via the `ActionGPTFallback` class. The method `name` returns the action we defined for our intent above 536 | 4. Rasa's NLU models must be trained which can be done via CLI with `rasa train` . This is done automatically for you when you run `make install` 537 | 5. Rasa's core must be ran via `rasa run` after training 538 | 6. Rasa's action server must be ran separately with `rasa run actions` 539 | 540 |
541 | 542 | ## Telegram 543 | 544 | 1. Rasa automatically updates the Telegram Bot API with your callback webhook from [credentials.yml](https://github.com/paulpierre/RasaGPT/blob/main/app/rasa/credentials.yml). 545 | 2. By default this is static. Since we are running on our local machine, we leverage [Ngrok](https://ngrok.com/) to generate a publically accessible URL and reverse tunnel into our docker container 546 | 3. `rasa-credentials` service takes care of this process for you. Ngrok runs as a service, once it is ready `rasa-credentials` calls the local ngrok API to retrieve the tunnel URL and updates the `credentials.yml` file and restarts Rasa for you 547 | 4. The webhook Telegram will send messages to will be our FastAPI server. Why this instead of Rasa? Because we want flexibility to capture metadata which Rasa makes a PITA and centralizing to the API server is ideal 548 | 5. The FastAPI server forwards this to the Rasa webhook 549 | 6. Rasa will then determine what action to take based on the user intent. Since the intents have been nerfed for this demo, it will go to the fallback action running in `actions.py` 550 | 7. The custom action will capture the metadata and forward the response from FastAPI to the user 551 | 552 |
553 | 554 | ## PGVector 555 | 556 | `pgvector` is a plugin for Postgres and automatically installed enabling your to store and calculate vector data types. We have our own implementation because the Langchain PGVector class is not flexible to adapt to our schema and we want flexibility. 557 | 558 | 1. By default in postgres, any files in the container's path `/docker-entry-initdb.d` get run if the database has not been initialized. In the [postgres Dockerfile](https://github.com/paulpierre/RasaGPT/blob/main/app/db/Dockerfile) we copy `[create_db.sh` which creates](https://github.com/paulpierre/RasaGPT/blob/main/app/db/create_db.sh) the db and user for our database 559 | 2. In the `[models` command](https://github.com/paulpierre/RasaGPT/blob/dca9be4cd6fe4c9daaff1564267cdb5327a384a5/Makefile#L64) in the [Makefile](https://github.com/paulpierre/RasaGPT/blob/main/Makefile), we run the [models.py](https://github.com/paulpierre/RasaGPT/blob/main/app/api/models.py) in the API container which creates the tables from the models. 560 | 3. The `[enable_vector` method](https://github.com/paulpierre/RasaGPT/blob/dca9be4cd6fe4c9daaff1564267cdb5327a384a5/app/api/models.py#L266) enables the pgvector extension in the database 561 | 562 |
563 | 564 | ## Langchain 565 | 566 | 1. The training data gets loaded in the database 567 | 2. The data is indexed [if the index doesn't exist](https://github.com/paulpierre/RasaGPT/blob/dca9be4cd6fe4c9daaff1564267cdb5327a384a5/app/api/main.py#L49) and [stored in a file named `index.json`](https://github.com/paulpierre/RasaGPT/blob/main/app/api/index.json) 568 | 3. LlamaIndex uses a basic `GPTSimpleVectorIndex` to find the relevant data and [injects it into a prompt](https://github.com/paulpierre/RasaGPT/blob/dca9be4cd6fe4c9daaff1564267cdb5327a384a5/app/api/main.py#L66). 569 | 4. Guard rails via prompts are used to keep the conversation focused 570 | 571 |
572 | 573 | ## Bot flow 574 | 575 | 1. The user will chat in Telegram and the message will be filtered for [existing intents](https://github.com/paulpierre/RasaGPT/blob/main/app/rasa/data/nlu.yml) 576 | 2. If it detects there is no intent match but instead matches the `out_of_scope`, [based on rules.yml](https://github.com/paulpierre/RasaGPT/blob/main/app/rasa/data/rules.yml) it will trigger the `action_gpt_fallback` action 577 | 3. The `[ActionGPTFallback` function](https://github.com/paulpierre/RasaGPT/blob/main/app/rasa/actions/actions.py) will then call the [FastAPI API server](https://github.com/paulpierre/RasaGPT/blob/main/app/api/main.py) 578 | 4. the API using LlamaIndex will find the relevant indexed content and inject it into a prompt to send to OpenAI for inference 579 | 5. The prompt contains conversational guardrails including: 580 | - Requests data be returned in JSON 581 | - Create categorical tags based on what the user's question 582 | - Return a boolean if the conversation should be escalated to a human (if there is no context match) 583 | 584 |

585 | 586 | 587 | # 📝 TODO 588 | 589 | - [ ] Implement LlamaIndex optimizations 590 | - [ ] Implement chat history 591 | - [ ] Implement [Query Routers Abstractions](https://medium.com/@jerryjliu98/unifying-llm-powered-qa-techniques-with-routing-abstractions-438e2499a0d0) to understand which search strategy to use (one-shot vs few-shot) 592 | - [ ] Explore other indexing methods like Tree indexes, Keyword indexes 593 | - [ ] Add chat history for immediate recall and context setting 594 | - [ ] Add a secondary adversarial agent ([Dual pattern model](https://simonwillison.net/2023/Apr/25/dual-llm-pattern/)) with the following potential functionalities: 595 | - [ ] Determine if the question has been answered and if not, re-optimize search strategy 596 | - [ ] Ensure prompt injection is not occurring 597 | - [ ] Increase baseline similarity search by exploring: 598 | - [ ] Regularly generate “fake” document embeddings based on historical queries and link to actual documents via [HyDE pattern](https://wfhbrian.com/revolutionizing-search-how-hypothetical-document-embeddings-hyde-can-save-time-and-increase-productivity/) 599 | - [ ] Regularly generate “fake” user queries based on documents and link to actual document so user input search and “fake” queries can match better 600 | 601 | 602 |

603 | 604 | # 🔍 Troubleshooting 605 | 606 | In general, check your docker container logs by simply going to 👉 http://localhost:9999/ 607 | 608 |
609 | 610 | ## Ngrok issues 611 | 612 | Always check that your webhooks with ngrok and Telegram match. Simply do this by 613 | 614 | ```bash 615 | curl -sS "https://api.telegram.org/bot/getWebhookInfo" | json_pp 616 | ``` 617 | 618 |
619 | 620 | .. should return this: 621 | 622 | ```bash 623 | { 624 | "ok": true, 625 | "result": { 626 | "url": "https://b280-04-115-40-112.ngrok-free.app/webhooks/telegram/webhook", 627 | "has_custom_certificate": false, 628 | "pending_update_count": 0, 629 | "max_connections": 40, 630 | "ip_address": "1.2.3.4" 631 | } 632 | } 633 | ``` 634 | 635 |
636 | 637 | .. which should match the URL in your `credentials.yml` file or visit the Ngrok admin UI 👉 [http://localhost:4040/status](http://localhost:4040/status) 638 | 639 | ![ngrok-admin.png](https://github.com/paulpierre/RasaGPT/blob/main/github/ngrok-admin.png?raw=true) 640 | 641 |
642 | 643 | Looks like it is a match. If not, restart by everything by running: 644 | 645 | ```bash 646 | make restart 647 | ``` 648 | 649 |

650 | 651 | # 💪 Contributing / Issues 652 | 653 | - Pull requests welcome 654 | - Please submit issues via Github, I will do my best to resolve them 655 | - If you want to get in touch, feel free to hmu on twitter via `[@paulpierre](https://twitter.com/paulpierre)` 656 | 657 |

658 | 659 | > ![thumbsup](https://camo.githubusercontent.com/bcb43227c1e90a1d27996eb75ac794bbf20d1355b36d0e9eaa71c71ad4dd2a56/68747470733a2f2f6d65646961342e67697068792e636f6d2f6d656469612f313149537762674378457a4d79592f67697068792e6769663f6369643d65636630356534376664703164727a72766178733175787532666269376f72316e68626f6d39326d30346436306e786b2665703d76315f676966735f72656c61746564267269643d67697068792e6769662663743d67) 660 | >
Congratulations, all your base are belong to us! kthxbye 661 | 662 |

663 | 664 | # 📜 Open source license 665 | 666 | Copyright (c) 2023 Paul Pierre. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the “Software”), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. -------------------------------------------------------------------------------- /app/api/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:3.9 2 | 3 | # Set the path to the API directory 4 | WORKDIR /app/api 5 | 6 | # Copy the codebase into the container 7 | COPY . . 8 | 9 | # Install the requirements 10 | RUN pip install --no-cache-dir -r requirements.txt 11 | 12 | # Expose the port 13 | EXPOSE 8888 14 | 15 | ENTRYPOINT ["uvicorn", "main:app", "--host", "api", "--port", "8888", "--reload"] -------------------------------------------------------------------------------- /app/api/Makefile: -------------------------------------------------------------------------------- 1 | .PHONY: default banner install install-seed seed run stop db-purge api-install env-create env db db-stop api api-stop 2 | SHELL := /bin/bash 3 | default: help 4 | 5 | help: 6 | make banner 7 | @echo "+------------------+" 8 | @echo "| 🏠 CORE COMMANDS |" 9 | @echo "+------------------+" 10 | @echo "make install - Setup environment and build models" 11 | @echo "make install-seed - Setup environment, build models and seed database" 12 | @echo "make seed - Seed database with dummy data" 13 | @echo "make run - Run database and API server" 14 | @echo "make stop - Stop database and API server" 15 | @echo "make db-purge - Delete all data in database\n" 16 | 17 | banner: 18 | @echo "\n\n-------------------------------------" 19 | @echo "▒█▀▀█ █▀▀█ █▀▀ █▀▀█ ▒█▀▀█ ▒█▀▀█ ▀▀█▀▀" 20 | @echo "▒█▄▄▀ █▄▄█ ▀▀█ █▄▄█ ▒█░▄▄ ▒█▄▄█ ░▒█░░" 21 | @echo "▒█░▒█ ▀░░▀ ▀▀▀ ▀░░▀ ▒█▄▄█ ▒█░░░ ░▒█░░" 22 | @echo "+-----------------------------------+" 23 | @echo "| 🤖 FastAPI LLM server |" 24 | @echo "+-----------------------------------+\n\n" 25 | 26 | # ================ 27 | # 🏠 CORE COMMANDS 28 | # ================ 29 | 30 | # ---------------------------------- 31 | # Setup environment and build models 32 | # ---------------------------------- 33 | install: 34 | @make banner 35 | @make api-install 36 | @make db-stop 37 | @echo "✅ Installation complete. Run 'make run' to start services.\n" 38 | 39 | # ---------------------------------------------- 40 | # Setup environment and build models and seed DB 41 | # ---------------------------------------------- 42 | install-seed: 43 | @make banner 44 | @make api-install 45 | @make seed 46 | @make db-stop 47 | @echo "✅ Installation complete. Run 'make run' to start services.\n" 48 | 49 | 50 | seed: 51 | @make banner 52 | @echo "🌱 Seeding database ..\n" 53 | @python3 seed.py 54 | 55 | # --------------------------- 56 | # Run database and API server 57 | # --------------------------- 58 | run: 59 | @make banner 60 | @make env 61 | @echo "🚀 Starting services ..\n" 62 | @make db 63 | @make api 64 | 65 | # ---------------------------- 66 | # Stop database and API server 67 | # ---------------------------- 68 | stop: 69 | @make banner 70 | @echo "🛑 Stopping services ..\n" 71 | @make db-stop 72 | @make api-stop 73 | 74 | # --------------------------- 75 | # Delete all data in database 76 | # --------------------------- 77 | db-purge: 78 | @echo "⛔ Are you sure you want to delete all data in the database? [y/N]\n" 79 | @read confirmation; \ 80 | if [ "$$confirmation" = "y" ] || [ "$$confirmation" = "Y" ]; then \ 81 | echo "Deleting generated files..."; \ 82 | make db-stop; \ 83 | rm -rf ../../mnt; \ 84 | echo "Deleted."; \ 85 | else \ 86 | echo "Aborted."; \ 87 | fi 88 | 89 | # ================== 90 | # 💁 HELPER COMMANDS 91 | # ================== 92 | 93 | # -------------- 94 | # Install helper 95 | # -------------- 96 | api-install: 97 | @echo "🏗️ Installing services ..\n" 98 | @make env-create 99 | @make db-stop 100 | @make db 101 | @make models 102 | 103 | # ------------------- 104 | # Create a virtualenv 105 | # ------------------- 106 | env-create: 107 | @echo "Creating virtual environment .." 108 | @if [ -d "./venv" ]; then \ 109 | echo "Environment and dependecies created already, loading .."; \ 110 | source ./venv/bin/activate; \ 111 | else \ 112 | echo "Install dependencies .."; \ 113 | python3 -m venv venv; \ 114 | . venv/bin/activate; \ 115 | pip3 install -r requirements.txt; \ 116 | fi 117 | 118 | # --------------------------- 119 | # Load an existing virtualenv 120 | # --------------------------- 121 | env: 122 | @echo "loading virtual environment if exists" 123 | @if [ -d "./venv" ]; then \ 124 | source ./venv/bin/activate; \ 125 | fi 126 | 127 | # ----------------- 128 | # Start Postgres DB 129 | # ----------------- 130 | db: 131 | @echo "Starting Postgres with pgvector .." 132 | @cd ../../ && docker-compose -f docker-compose.yml up -d db 133 | 134 | # ----------------- 135 | # Stop Postgres DB 136 | # ----------------- 137 | db-stop: 138 | @cd ../../ && docker-compose -f docker-compose.yml up -d db 139 | 140 | # ------------ 141 | # Build models 142 | # ------------ 143 | models: 144 | @echo "💽 Building models in database .." 145 | @sleep 60 && python3 models.py 146 | 147 | # ----------------- 148 | # Start FastAPI API 149 | # ----------------- 150 | api: 151 | @echo "⚡ Starting FastAPI API server .." 152 | @sleep 5 && uvicorn main:app --port 8888 --reload 153 | 154 | # ----------------- 155 | # Stop FastAPI API 156 | # ----------------- 157 | api-stop: 158 | @echo "🛑 Stopping FastAPI server .." 159 | @killall uvicorn 160 | -------------------------------------------------------------------------------- /app/api/config.py: -------------------------------------------------------------------------------- 1 | from enum import Enum, IntEnum 2 | from dotenv import load_dotenv 3 | from pathlib import Path 4 | import logging 5 | import sys 6 | import os 7 | 8 | # ------- 9 | # Logging 10 | # ------- 11 | 12 | logging.basicConfig(stream=sys.stdout, level=logging.DEBUG) 13 | logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout)) 14 | logger = logging.getLogger(__name__) 15 | 16 | # ---------------- 17 | # Environment vars 18 | # ---------------- 19 | env = os.getenv("ENV", None) 20 | 21 | if not env: 22 | # Check up to 2 levels up for .env-{env} file 23 | env_file = Path(__file__).parent.parent.parent / '.env' 24 | logger.debug(f"Loading env file: {env_file}") 25 | if os.path.exists(env_file): 26 | load_dotenv(dotenv_path=env_file) 27 | else: 28 | raise Exception(f"Env file {env})file not found") 29 | 30 | # ----------------------- 31 | # Configuration constants 32 | # ----------------------- 33 | readme_file = Path(__file__).parent / "API.md" 34 | 35 | readme_str = ( 36 | f""" 37 |
38 | 📕 API.MD 39 | {readme_file.read_text()} 40 | 41 |
42 | 43 | """ 44 | if readme_file.exists() 45 | else "" 46 | ) 47 | APP_NAME = "API Documentation" 48 | APP_VERSION = "0.0.1" 49 | APP_DESCRIPTION = f""" 50 | ![img](/static/img/rasagpt-logo-1.png) 51 | 52 | --- 53 | ## About 54 | 💬 RasaGPT is the first headless LLM chatbot platform built on top of Rasa and Langchain 55 | 56 | - 📚 Resources: [https://rasagpt.dev](https://rasagpt.dev) 57 | - 🧑‍💻 Github: [https://github.com/paulpierre/RasaGPT](https://github.com/paulpierre/RasaGPT) 58 | - 🧙 Author: [@paulpierre](https://twitter.com/paulpierre) 59 | 60 | {readme_str} 61 | """ 62 | APP_ICON = "/public/img/rasagpt-icon-200x200.png" 63 | APP_LOGO = "/public/img/rasagpt-logo-1.png" 64 | 65 | FILE_UPLOAD_PATH = os.getenv("FILE_UPLOAD_PATH", "/tmp") 66 | 67 | # Database configurations 68 | POSTGRES_USER = os.getenv("POSTGRES_USER", "postgres") 69 | POSTGRES_PASSWORD = os.getenv("POSTGRES_PASSWORD", "postgres") 70 | DB_HOST = os.getenv("DB_HOST", "localhost") 71 | DB_PORT = os.getenv("DB_PORT", 5432) 72 | DB_USER = os.getenv("DB_USER") 73 | DB_NAME = os.getenv("DB_NAME") 74 | DB_PASSWORD = os.getenv("DB_PASSWORD") 75 | DSN = f"postgresql://{DB_USER}:{DB_PASSWORD}@{DB_HOST}:{DB_PORT}/{DB_NAME}" 76 | SU_DSN = ( 77 | f"postgresql://{POSTGRES_USER}:{POSTGRES_PASSWORD}@{DB_HOST}:{DB_PORT}/{DB_NAME}" 78 | ) 79 | 80 | RASA_WEBHOOK_HOST = os.getenv("RASA_WEBHOOK_HOST", "rasa-core") 81 | RASA_WEBHOOK_PORT = os.getenv("RASA_WEBHOOK_PORT", 5005) 82 | RASA_WEBHOOK_URL = f"http://{RASA_WEBHOOK_HOST}:{RASA_WEBHOOK_PORT}" 83 | 84 | # LLM configurations 85 | MODEL_NAME = os.getenv("MODEL_NAME") 86 | LLM_DEFAULT_TEMPERATURE = float(os.getenv("LLM_DEFAULT_TEMPERATURE", 0.0)) 87 | LLM_CHUNK_SIZE = int(os.getenv("LLM_CHUNK_SIZE", 512)) 88 | LLM_CHUNK_OVERLAP = int(os.getenv("LLM_CHUNK_OVERLAP", 20)) 89 | LLM_DISTANCE_THRESHOLD = float(os.getenv("LLM_DISTANCE_THRESHOLD", 0.5)) 90 | LLM_MAX_OUTPUT_TOKENS = int(os.getenv("LLM_MAX_OUTPUT_TOKENS", 256)) 91 | LLM_MIN_NODE_LIMIT = int(os.getenv("LLM_MIN_NODE_LIMIT", 3)) 92 | 93 | 94 | class DISTANCE_STRATEGY(Enum): 95 | COSINE = "cosine" 96 | EUCLIDEAN = "euclidean" 97 | MAX_INNER_PRODUCT = "max_inner_product" 98 | 99 | def __new__(cls, strategy_name: str): 100 | obj = object.__new__(cls) 101 | obj._value_ = strategy_name 102 | return obj 103 | 104 | @property 105 | def strategy_name(self) -> str: 106 | return self.value 107 | 108 | 109 | DISTANCE_STRATEGIES = [ 110 | ( 111 | DISTANCE_STRATEGY.EUCLIDEAN, 112 | "euclidean", 113 | "<->", 114 | "CREATE INDEX ON node USING ivfflat (embeddings vector_l2_ops) WITH (lists = 100);", 115 | ), 116 | ( 117 | DISTANCE_STRATEGY.COSINE, 118 | "cosine", 119 | "<=>", 120 | "CREATE INDEX ON node USING ivfflat (embeddings vector_cosine_ops) WITH (lists = 100);", 121 | ), 122 | ( 123 | DISTANCE_STRATEGY.MAX_INNER_PRODUCT, 124 | "max_inner_product", 125 | "<#>", 126 | "CREATE INDEX ON node USING ivfflat (embeddings vector_ip_ops) WITH (lists = 100);", 127 | ), 128 | ] 129 | LLM_DEFAULT_DISTANCE_STRATEGY = DISTANCE_STRATEGY[ 130 | os.getenv("LLM_DEFAULT_DISTANCE_STRATEGY", "COSINE") 131 | ] 132 | VECTOR_EMBEDDINGS_COUNT = 1536 133 | PGVECTOR_ADD_INDEX = True if os.getenv("PGVECTOR_ADD_INDEX", False) else False 134 | # Model constants 135 | 136 | DOCUMENT_TYPE = IntEnum("DOCUMENT_TYPE", ["PLAINTEXT", "MARKDOWN", "HTML", "PDF"]) 137 | 138 | ENTITY_STATUS = IntEnum( 139 | "ENTITY_STATUS", 140 | ["UNVERIFIED", "ACTIVE", "INACTIVE", "DELETED", "BANNED" "DEPRECATED"], 141 | ) 142 | CHANNEL_TYPE = IntEnum( 143 | "CHANNEL_TYPE", ["SMS", "TELEGRAM", "WHATSAPP", "EMAIL", "WEBSITE"] 144 | ) 145 | 146 | AGENT_NAMES = [ 147 | "Aisha", 148 | "Lilly", 149 | "Hanna", 150 | "Julia", 151 | "Emily", 152 | "Sophia", 153 | "Alex", 154 | "Isabella", 155 | ] 156 | 157 | 158 | class LLM_MODELS(Enum): 159 | TEXT_DAVINCI_003 = "text-davinci-003", 4097 160 | GPT_35_TURBO = "gpt-3.5-turbo", 4096 161 | TEXT_DAVINCI_002 = "text-davinci-002", 4097 162 | CODE_DAVINCI_002 = "code-davinci-002", 8001 163 | GPT_4 = "gpt-4", 8192 164 | GPT_4_32K = "gpt-4-32k", 32768 165 | 166 | def __init__(self, model_name, token_limit): 167 | self._model_name = model_name 168 | self._token_limit = token_limit 169 | 170 | @property 171 | def model_name(self) -> str: 172 | return self._model_name 173 | 174 | @property 175 | def token_limit(self) -> int: 176 | return self._token_limit 177 | -------------------------------------------------------------------------------- /app/api/data/training_data/org-about_the_company.md: -------------------------------------------------------------------------------- 1 | # About the company 2 | 3 | Title: About Pepe Corp 4 | 5 | **Introduction** 6 | 7 | Pepe Corp is a groundbreaking parody pharmaceutical and technology company inspired by the iconic Pepe the Frog meme. Our mission is to revolutionize the world through the power of dank memes, laughter, and innovation. By merging the realms of science and meme culture, we create cutting-edge products that entertain, amaze, and bring people together. 8 | 9 | **Our Story** 10 | 11 | Founded in 2021 by a group of visionary memelords, Pepe Corp began as a small startup focused on creating meme-based pharmaceuticals to counter the growing seriousness in the world. Recognizing the untapped potential of meme culture, our founders set out to explore the intersection between humor, science, and technology. 12 | 13 | Over the years, Pepe Corp has grown into a multidisciplinary organization with a diverse portfolio of products, ranging from meme-inspired pharmaceuticals to advanced brain implants and mobile applications. Our products are designed to elevate the human experience by harnessing the power of memes, laughter, and the Pepe spirit. 14 | 15 | **Our Values** 16 | 17 | At Pepe Corp, our values drive everything we do: 18 | 19 | 1. **Innovation**: We are committed to pushing the boundaries of meme science and technology to develop products that challenge conventional wisdom and redefine the limits of possibility. 20 | 2. **Laughter**: We believe that laughter is a universal language that transcends barriers and brings people together. Our products are designed to elicit joy and promote the healing power of humor. 21 | 3. **Community**: We strive to create a global community of meme enthusiasts who share our passion for innovation, laughter, and the Pepe spirit. 22 | 4. **Sustainability**: We are dedicated to operating our business in a responsible and sustainable manner, ensuring the continued availability of dank memes for future generations. 23 | 24 | **Our Products** 25 | 26 | Pepe Corp's diverse product portfolio includes: 27 | 28 | 1. *Meme-inspired Pharmaceuticals*: Our groundbreaking pharmaceuticals blend meme culture with scientific research to provide unique and entertaining experiences for our customers. 29 | 2. *Cutting-Edge Devices*: From brain implants that enable meme-based communication to wearable technology that tracks your meme usage, our devices push the limits of what's possible. 30 | 3. *Mobile Applications*: Our multi-platform mobile apps, such as PepeTrak and MemeGen, offer innovative solutions to enhance mental health, monitor meme consumption, and generate personalized meme experiences. 31 | 32 | **Our Future** 33 | 34 | As we look to the future, Pepe Corp is dedicated to exploring new frontiers in meme science and technology. We will continue to develop innovative products that entertain, inspire, and bring people together. By harnessing the power of memes and the Pepe spirit, we aim to make the world a better, funnier place for all. 35 | 36 | Join us on this incredible journey as we redefine the limits of possibility and laughter with Pepe Corp, where the future is dank! -------------------------------------------------------------------------------- /app/api/data/training_data/org-board_of_directors.md: -------------------------------------------------------------------------------- 1 | # Board of Directors 2 | 3 | Title: Board of Directors 4 | 5 | **Introduction** 6 | 7 | At Pepe Corp, our Board of Directors is composed of accomplished leaders with diverse backgrounds and expertise in various fields. They share our passion for innovation, laughter, and the Pepe spirit, and are committed to guiding our company's strategic vision and overseeing its growth. Together, our Board of Directors ensures that Pepe Corp remains at the forefront of meme science and technology, making the world a better, funnier place. 8 | 9 | **Board Members** 10 | 11 | 1. **Felix Memelord**: Chairman of the Board and Co-Founder 12 | Felix is a pioneering memelord with over a decade of experience in meme culture and internet humor. As the co-founder of Pepe Corp, he has been instrumental in shaping the company's strategic direction and ensuring its commitment to innovation, laughter, and the Pepe spirit. 13 | 2. **Dr. Olivia Dankstein**: Vice-Chair and Chief Scientific Officer 14 | Dr. Dankstein is a renowned scientist with extensive experience in neuroscience, psychology, and the science of humor. She is responsible for leading Pepe Corp's research and development efforts, driving the company's pursuit of groundbreaking meme-inspired pharmaceuticals and technologies. 15 | 3. **Mona Laughsalot**: Director and Chief Marketing Officer 16 | Mona is a marketing expert with a background in digital media and consumer engagement. She has been instrumental in building Pepe Corp's global brand presence and creating memorable campaigns that showcase the company's innovative products and commitment to laughter. 17 | 4. **Henry Frogman**: Director and Chief Financial Officer 18 | Henry is a seasoned finance executive with experience in corporate strategy and financial management. He oversees Pepe Corp's financial operations, ensuring the company's long-term sustainability and supporting its growth objectives. 19 | 5. **Iris Memequeen**: Director and Head of Human Resources 20 | Iris is a human resources professional with a passion for fostering inclusive and supportive workplace environments. She is responsible for developing and implementing Pepe Corp's talent management strategies, cultivating a diverse and empowered team that embodies the Pepe spirit. 21 | 6. **Jackie Jokster**: Director and Head of Product Development 22 | Jackie is an accomplished product developer with expertise in software engineering and user experience design. She leads Pepe Corp's product development efforts, ensuring that the company's offerings remain innovative, entertaining, and user-friendly. 23 | 7. **Vincent Laugherino**: Director and General Counsel 24 | Vincent is an experienced attorney with a background in intellectual property, corporate law, and regulatory compliance. He advises Pepe Corp's management team and Board of Directors on legal matters, ensuring that the company operates in accordance with applicable laws and regulations. 25 | 26 | **Conclusion** 27 | 28 | The Pepe Corp Board of Directors is a dynamic team of leaders who are dedicated to guiding the company's strategic vision and overseeing its growth. By leveraging their diverse expertise and shared passion for innovation, laughter, and the Pepe spirit, our Board of Directors ensures that Pepe Corp remains a global leader in meme science and technology. -------------------------------------------------------------------------------- /app/api/data/training_data/org-company_story.md: -------------------------------------------------------------------------------- 1 | # Company Story 2 | 3 | Title: Company Story 4 | 5 | **The Genesis of Pepe Corp** 6 | 7 | In the early 21st century, a group of visionary memelords gathered in a dimly lit basement, fueled by their passion for dank memes and the iconic Pepe the Frog. As they contemplated the growing seriousness of the world, they were struck by a sudden realization: the power of memes could be harnessed to create groundbreaking products that would not only entertain but also bring people together. And so, in 2021, Pepe Corp was born. 8 | 9 | **The Early Days: Pioneering Meme-Based Pharmaceuticals** 10 | 11 | The founders of Pepe Corp were determined to explore the untapped potential of meme culture. They began by creating a range of meme-inspired pharmaceuticals, blending cutting-edge scientific research with the universal language of laughter. These innovative products quickly gained a cult following, garnering both amusement and curiosity. 12 | 13 | **Expanding Horizons: Meme-Driven Technology** 14 | 15 | As Pepe Corp's reputation grew, so did its ambitions. The company expanded its focus to include the development of advanced technology inspired by meme culture. From brain implants that enabled meme-based communication to wearable devices that monitored meme consumption, Pepe Corp's inventions captured the imagination of people worldwide. 16 | 17 | **Becoming a Global Meme Phenomenon** 18 | 19 | Pepe Corp's rapid rise to prominence caught the attention of mainstream media, turning the once-underground company into a global meme phenomenon. Their unique blend of humor, science, and technology resonated with audiences across generations and cultures, establishing Pepe Corp as a symbol of innovation and laughter. 20 | 21 | **Building a Meme Empire** 22 | 23 | Today, Pepe Corp is a multidisciplinary organization with a diverse portfolio of products and services. Its offerings include meme-inspired pharmaceuticals, cutting-edge devices, and multi-platform mobile applications designed to enhance mental health, monitor meme consumption, and generate personalized meme experiences. 24 | 25 | Pepe Corp's success is built on its unwavering commitment to innovation, laughter, and community. The company continues to push the boundaries of what's possible in the realm of meme science and technology, with the ultimate goal of making the world a better, funnier place. 26 | 27 | **The Future: Redefining Possibility and Laughter** 28 | 29 | As Pepe Corp looks to the future, it remains dedicated to exploring new frontiers in meme science and technology. The company's research and development teams are hard at work, creating the next generation of meme-based products that will entertain, inspire, and bring people together. 30 | 31 | Pepe Corp's journey has only just begun, but its impact is already being felt around the world. As the company continues to redefine the limits of possibility and laughter, one thing is certain: the future is dank, and Pepe Corp is leading the way. -------------------------------------------------------------------------------- /app/api/data/training_data/org-corporate_philosophy.md: -------------------------------------------------------------------------------- 1 | # Corporate philosophy 2 | 3 | Title: Corporate Philosophy 4 | 5 | **Introduction** 6 | 7 | At Pepe Corp, our corporate philosophy is rooted in the belief that humor, innovation, and the power of memes can transform the world. We are dedicated to creating groundbreaking products that entertain, amaze, and unite people in laughter. Our philosophy shapes every aspect of our business, guiding our decisions and inspiring our actions. 8 | 9 | **Our Core Principles** 10 | 11 | 1. **Innovation**: We believe in pushing the boundaries of meme science and technology to develop products that challenge conventional wisdom and redefine the limits of possibility. Our commitment to innovation drives us to explore the untapped potential of meme culture, turning wild ideas into reality. 12 | 2. **Laughter**: We recognize that laughter is a universal language that transcends barriers and brings people together. We are dedicated to creating products that elicit joy, promote the healing power of humor, and spread the spirit of Pepe the Frog throughout the world. 13 | 3. **Community**: We strive to foster a global community of meme enthusiasts who share our passion for innovation, laughter, and the Pepe spirit. By cultivating a supportive and inclusive environment, we empower our team members, customers, and partners to express themselves freely and embrace the power of memes. 14 | 4. **Sustainability**: We understand that our success depends on the well-being of the world around us. We are committed to operating our business in a responsible and sustainable manner, ensuring the continued availability of dank memes for future generations. 15 | 16 | **Our Ethos: The Pepe Spirit** 17 | 18 | The Pepe spirit is at the heart of our corporate philosophy. Inspired by the iconic Pepe the Frog meme, this ethos represents our unwavering commitment to laughter, creativity, and the power of memes. The Pepe spirit guides our actions, influences our decisions, and serves as a constant reminder of our mission to make the world a better, funnier place. 19 | 20 | **Our Commitment** 21 | 22 | As a company, we pledge to uphold our corporate philosophy in every aspect of our business. We are committed to: 23 | 24 | - Continuously exploring new frontiers in meme science and technology 25 | - Developing innovative products that entertain, inspire, and bring people together 26 | - Fostering a diverse and inclusive workplace that values creativity, collaboration, and the Pepe spirit 27 | - Conducting our business with integrity, transparency, and a commitment to sustainability 28 | 29 | **Conclusion** 30 | 31 | At Pepe Corp, our corporate philosophy is more than just a set of guiding principles; it is the very essence of who we are as a company. By embracing the power of memes, laughter, and the Pepe spirit, we aim to transform the world and create a brighter, funnier future for all. -------------------------------------------------------------------------------- /app/api/data/training_data/org-customer_support.md: -------------------------------------------------------------------------------- 1 | # Customer support 2 | 3 | Title: Management Team 4 | 5 | **Introduction** 6 | 7 | Pepe Corp's management team is composed of accomplished professionals who bring their unique expertise and passion for the Pepe spirit to the forefront of our operations. This dedicated team of leaders is committed to fostering an innovative, inclusive, and laughter-driven culture while driving the company's strategic objectives and ensuring its continued success. 8 | 9 | **Management Team Members** 10 | 11 | 1. **Amanda Gigglestein**: Chief Executive Officer 12 | Amanda is an experienced executive with a background in technology and entertainment. As CEO, she is responsible for setting Pepe Corp's strategic direction, overseeing its day-to-day operations, and ensuring that the company remains at the forefront of meme science and technology. 13 | 2. **Brandon Memeinator**: Chief Operating Officer 14 | Brandon is a seasoned operations professional with extensive experience in scaling businesses and optimizing processes. As COO, he is responsible for streamlining Pepe Corp's operations, driving efficiency, and supporting the company's growth objectives. 15 | 3. **Dr. Penelope Pepescience**: Chief Research Officer 16 | Dr. Pepescience is a leading expert in the field of meme psychology and neuroscience. As CRO, she directs Pepe Corp's research efforts, spearheading groundbreaking studies and collaborations that fuel the company's innovative product pipeline. 17 | 4. **Claire Froglover**: Chief Creative Officer 18 | Claire is a talented creative professional with a background in graphic design, branding, and content creation. As CCO, she oversees Pepe Corp's creative direction, ensuring that the company's products, campaigns, and communications reflect the Pepe spirit and resonate with audiences worldwide. 19 | 5. **Timothy Chuckles**: Chief Technology Officer 20 | Timothy is an accomplished technology executive with expertise in software development, artificial intelligence, and data analytics. As CTO, he leads Pepe Corp's technology initiatives, developing cutting-edge solutions that enhance the company's offerings and delight its customers. 21 | 6. **Sophia Memevator**: Chief Customer Officer 22 | Sophia is a customer experience expert with a passion for creating memorable and engaging interactions. As CCO, she oversees Pepe Corp's customer service, community engagement, and user experience efforts, ensuring that the company's customers remain at the heart of everything it does. 23 | 7. **Edward Laughton**: Chief Sustainability Officer 24 | Edward is a sustainability professional with a background in environmental management and corporate social responsibility. As CSO, he is responsible for developing and implementing Pepe Corp's sustainability strategy, ensuring the company's operations are conducted responsibly and with minimal environmental impact. 25 | 26 | **Conclusion** 27 | 28 | Pepe Corp's management team is a dynamic group of leaders who are dedicated to upholding the company's core values of innovation, laughter, and the Pepe spirit. By leveraging their diverse expertise and working collaboratively, our management team ensures that Pepe Corp remains a global leader in meme science and technology, making the world a better, funnier place. -------------------------------------------------------------------------------- /app/api/data/training_data/org-earnings_fy2023.md: -------------------------------------------------------------------------------- 1 | # Earnings FY2023 2 | 3 | Title: Earnings FY2023 4 | 5 | **Introduction** 6 | 7 | As a leading company in meme science and technology, we are committed to continuing our mission of making the world a better, funnier place. In FY2023, we have focused on product innovation, strategic partnerships, and strengthening our global presence, while maintaining our commitment to the Pepe spirit and environmental sustainability. 8 | 9 | **Strategic Objectives** 10 | 11 | 1. **Product Innovation**: We will invest in research and development to create cutting-edge products that address the needs of our diverse customer base. Our R&D team will explore new areas of meme science and technology, developing products that improve mental health, communication, and entertainment experiences. 12 | 2. **Market Expansion**: In FY2023, we plan to expand our presence in key international markets, targeting regions with significant growth potential. Our marketing team will develop tailored campaigns to raise awareness of our brand and products, while our sales team will work on securing new distribution partnerships. 13 | 3. **Strategic Partnerships**: We will pursue strategic partnerships with leading companies in the technology, entertainment, and pharmaceutical industries, leveraging their expertise and resources to enhance our product offerings and reach new audiences. 14 | 4. **Environmental Sustainability**: Pepe Corp is committed to operating responsibly and minimizing our environmental impact. In FY2023, we will work on implementing new sustainability initiatives and improving existing ones, focusing on areas such as waste reduction, energy efficiency, and responsible sourcing. 15 | 5. **Talent Development**: Our employees are our greatest asset, and we will continue to invest in their growth and development in FY2023. We will offer comprehensive training programs, provide opportunities for career advancement, and maintain an inclusive and supportive work environment that embodies the Pepe spirit. 16 | 6. **Customer Support Excellence**: We will strive to enhance our customer support experience, ensuring that our customers receive timely and effective assistance with their inquiries and concerns. Our Customer Support team will undergo additional training, and we will explore new technologies and processes to improve response times and customer satisfaction. 17 | 18 | **Quarterly earnings:** 19 | 20 | Pepe Corp is pleased to present our earnings report for Fiscal Year 2023. The following document outlines our quarterly organizational performance data and provides a simple but believable Profit and Loss (P&L) statement for the year. We are proud to report strong growth in revenue, driven by product innovation, market expansion, and strategic partnerships. 21 | 22 | **Quarterly Organizational Performance Data** 23 | 24 | *Note: All figures are in millions (USD)* 25 | 26 | | Quarter | Revenue | Cost of Goods Sold (COGS) | Gross Profit | Operating Expenses | Operating Income | Net Income | 27 | | --- | --- | --- | --- | --- | --- | --- | 28 | | Q1 FY2023 | 50 | 20 | 30 | 15 | 15 | 12 | 29 | | Q2 FY2023 | 55 | 22 | 33 | 16 | 17 | 13.5 | 30 | | Q3 FY2023 | 60 | 24 | 36 | 17 | 19 | 15 | 31 | | Q4 FY2023 | 70 | 28 | 42 | 18 | 24 | 19 | 32 | | Total FY2023 | 235 | 94 | 141 | 66 | 75 | 59.5 | 33 | 34 | **Profit & Loss Statement for FY2023** 35 | 36 | *Note: All figures are in millions (USD)* 37 | 38 | 1. Revenue: $235 39 | 2. Cost of Goods Sold (COGS): $94 40 | 3. Gross Profit: $141 41 | 4. Operating Expenses: $66 42 | a. Research & Development: $20 43 | b. Sales & Marketing: $30 44 | c. General & Administrative: $16 45 | 5. Operating Income: $75 46 | 6. Interest Expense: $3 47 | 7. Taxes: $12.5 48 | 8. Net Income: $59.5 49 | 50 | Pepe Corp's performance in FY2023 reflects strong growth and continued success in our mission to make the world a better, funnier place. Our revenue has increased consistently throughout the year, and our strategic initiatives have resulted in improved operating income and net income. As we continue to innovate, expand, and strengthen our global presence, we are excited about the opportunities that lie ahead and are committed to delivering value to our shareholders and customers alike. 51 | 52 | **Financial Projections** 53 | 54 | In FY2024, we expect to achieve strong revenue growth driven by product innovation, market expansion, and strategic partnerships. We will continue to invest in R&D and marketing initiatives to support our growth objectives while maintaining a focus on cost optimization and operational efficiency. 55 | 56 | **Conclusion** 57 | 58 | Pepe Corp is excited about the opportunities that lie ahead in FY2024. We remain committed to our mission of making the world a better, funnier place, and we are confident that our strategic objectives will drive growth, innovation, and success in the upcoming fiscal year. Together, we will continue to spread the Pepe spirit and make a lasting impact on our customers and the world. -------------------------------------------------------------------------------- /app/api/data/training_data/org-management_team.md: -------------------------------------------------------------------------------- 1 | # Management team 2 | 3 | Title: Management Team 4 | 5 | **Introduction** 6 | 7 | Pepe Corp's management team is composed of accomplished professionals who bring their unique expertise and passion for the Pepe spirit to the forefront of our operations. This dedicated team of leaders is committed to fostering an innovative, inclusive, and laughter-driven culture while driving the company's strategic objectives and ensuring its continued success. 8 | 9 | **Management Team Members** 10 | 11 | 1. **Amanda Gigglestein**: Chief Executive Officer 12 | Amanda is an experienced executive with a background in technology and entertainment. As CEO, she is responsible for setting Pepe Corp's strategic direction, overseeing its day-to-day operations, and ensuring that the company remains at the forefront of meme science and technology. 13 | 2. **Brandon Memeinator**: Chief Operating Officer 14 | Brandon is a seasoned operations professional with extensive experience in scaling businesses and optimizing processes. As COO, he is responsible for streamlining Pepe Corp's operations, driving efficiency, and supporting the company's growth objectives. 15 | 3. **Dr. Penelope Pepescience**: Chief Research Officer 16 | Dr. Pepescience is a leading expert in the field of meme psychology and neuroscience. As CRO, she directs Pepe Corp's research efforts, spearheading groundbreaking studies and collaborations that fuel the company's innovative product pipeline. 17 | 4. **Claire Froglover**: Chief Creative Officer 18 | Claire is a talented creative professional with a background in graphic design, branding, and content creation. As CCO, she oversees Pepe Corp's creative direction, ensuring that the company's products, campaigns, and communications reflect the Pepe spirit and resonate with audiences worldwide. 19 | 5. **Timothy Chuckles**: Chief Technology Officer 20 | Timothy is an accomplished technology executive with expertise in software development, artificial intelligence, and data analytics. As CTO, he leads Pepe Corp's technology initiatives, developing cutting-edge solutions that enhance the company's offerings and delight its customers. 21 | 6. **Sophia Memevator**: Chief Customer Officer 22 | Sophia is a customer experience expert with a passion for creating memorable and engaging interactions. As CCO, she oversees Pepe Corp's customer service, community engagement, and user experience efforts, ensuring that the company's customers remain at the heart of everything it does. 23 | 7. **Edward Laughton**: Chief Sustainability Officer 24 | Edward is a sustainability professional with a background in environmental management and corporate social responsibility. As CSO, he is responsible for developing and implementing Pepe Corp's sustainability strategy, ensuring the company's operations are conducted responsibly and with minimal environmental impact. 25 | 26 | **Conclusion** 27 | 28 | Pepe Corp's management team is a dynamic group of leaders who are dedicated to upholding the company's core values of innovation, laughter, and the Pepe spirit. By leveraging their diverse expertise and working collaboratively, our management team ensures that Pepe Corp remains a global leader in meme science and technology, making the world a better, funnier place. -------------------------------------------------------------------------------- /app/api/data/training_data/project-frogonil.md: -------------------------------------------------------------------------------- 1 | # Frogonil 2 | 3 | Product Name: Frogonil 4 | 5 | Purpose: Increases capacity to shitpost on social media 6 | 7 | **How to Use** 8 | 9 | Frogonil is available in the form of concentrated Pepe-powered capsules. The recommended dosage is one capsule per day, taken orally with a glass of water and a side of your preferred social media platform. For maximum shitposting potential, take Frogonil 15 minutes prior to engaging in online discussions or while crafting your next witty response to a controversial tweet. 10 | 11 | **Side Effects** 12 | 13 | Some potential side effects of Frogonil may include: 14 | 15 | 1. An insatiable appetite for internet trolling and stirring up online chaos 16 | 2. Unexpectedly profound insights into meme culture 17 | 3. An increased propensity for using the term "REEE" in everyday conversations 18 | 4. A heightened sense of humor and the ability to find irony in even the darkest situations 19 | 5. Periodic visions of Pepe as a spirit guide, leading you on a quest for the dankest memes 20 | 21 | Please consult your memologist if side effects persist or become bothersome. 22 | 23 | **Precautions** 24 | 25 | Before taking Frogonil, consider the following precautions: 26 | 27 | 1. Do not use Frogonil if you have a history of being a buzzkill or an intolerance for internet humor. 28 | 2. Frogonil may not be suitable for individuals who prefer a quiet, peaceful online existence. 29 | 3. Exercise caution when engaging in real-life conversations, as Frogonil may cause users to inadvertently apply shitposting tactics in inappropriate situations. 30 | 31 | **Interactions** 32 | 33 | Frogonil may interact with other substances or activities, including: 34 | 35 | 1. Sarcasm inhibitors: Combining Frogonil with sarcasm inhibitors may lead to unpredictable humor fluctuations and an inability to detect irony. 36 | 2. Internet detox: Frogonil is not recommended for those engaging in an internet detox, as the temptation to shitpost may become overwhelming. 37 | 38 | Consult your memologist if you are taking any other medications or participating in any activities that may interfere with Frogonil's effectiveness. 39 | 40 | **Overdose** 41 | 42 | In case of an overdose, symptoms may include: 43 | 44 | 1. Unstoppable shitposting, potentially leading to the creation of alternative online personas 45 | 2. A warped perception of reality where memes become the ultimate truth 46 | 3. Gaining an unsettling number of followers and admirers who appreciate your dark humor 47 | 48 | If you suspect an overdose, contact your local meme crisis hotline or visit the nearest meme rehabilitation center. Remember, Frogonil is best enjoyed in moderation, and always heed the guidance of your Pepe-inspired spirit guide: "REEE-sponsibly." -------------------------------------------------------------------------------- /app/api/data/training_data/project-kekzal.md: -------------------------------------------------------------------------------- 1 | # Kekzal 2 | 3 | Product Name: Kekzal 4 | 5 | Purpose: Enhances sexual function 6 | 7 | **How to Use** 8 | 9 | Kekzal is available in the form of Pepe-approved, green diamond-shaped tablets. The recommended dosage is one tablet taken orally with a glass of water, approximately 60 minutes before engaging in intimate activities. For optimal results, immerse yourself in Pepe memes and share a laugh with your partner to set the mood. 10 | 11 | **Side Effects** 12 | 13 | Some potential side effects of Kekzal may include: 14 | 15 | 1. An increased propensity for whispering sweet Pepe nothings into your partner's ear 16 | 2. A heightened sense of humor, making even the most awkward intimate moments hilarious 17 | 3. The occasional urge to shout "KEK!" during moments of passion 18 | 4. Spontaneous and uncontrollable laughter during foreplay 19 | 5. An unexplained affinity for green lingerie and bedroom accessories 20 | 21 | Consult your memologist if side effects persist or become bothersome. 22 | 23 | **Precautions** 24 | 25 | Before taking Kekzal, consider the following precautions: 26 | 27 | 1. Do not use Kekzal if you have a known allergy to dank memes or an aversion to Pepe-themed romantic encounters. 28 | 2. Kekzal may not be suitable for individuals with a history of humor-related intimacy issues or meme-related performance anxiety. 29 | 3. Exercise caution when engaging in intimate activities, as Kekzal may cause fits of laughter at inappropriate moments. 30 | 31 | **Interactions** 32 | 33 | Kekzal may interact with other substances or activities, including: 34 | 35 | 1. Seriousness supplements: Combining Kekzal with seriousness supplements may result in a decreased sense of humor and diminished intimate enjoyment. 36 | 2. Nostalgic media: Watching reruns of classic sitcoms or engaging in non-Pepe related activities may reduce Kekzal's effectiveness. 37 | 38 | Consult your memologist if you are taking any other medications or participating in any activities that may interfere with Kekzal's intended effects. 39 | 40 | **Overdose** 41 | 42 | In case of an overdose, symptoms may include: 43 | 44 | 1. An insatiable desire to create erotic Pepe fanfiction 45 | 2. The belief that Pepe is the ultimate symbol of love and intimacy 46 | 3. An uncontrollable urge to redecorate your bedroom with Pepe-themed decor 47 | 48 | 49 | 50 | If you suspect an overdose, contact your local meme crisis hotline or visit the nearest meme rehabilitation center. Remember, the key to enjoying Kekzal is to embrace its humor-enhancing effects responsibly, and to appreciate the amorous wisdom of our legendary Pepe: "Love is a meme, my friend." -------------------------------------------------------------------------------- /app/api/data/training_data/project-memegen.md: -------------------------------------------------------------------------------- 1 | # MemeGen 2 | 3 | Product Name: MemeGen 4 | 5 | Purpose: A meme generator mental health application generating better outcomes for patients 6 | 7 | **About the App** 8 | 9 | MemeGen is an innovative multi-platform mobile app designed to boost mental health through the healing power of memes. By utilizing Pepe Corp's cutting-edge meme generation technology, MemeGen crafts personalized and hilarious memes to suit your emotional needs, providing a unique therapeutic experience. Rediscover the joy of laughter and embrace the dank meme revolution with MemeGen. 10 | 11 | **Features** 12 | 13 | 1. Personalized Meme Generation: MemeGen's advanced algorithms create custom memes based on your mood, preferences, and sense of humor to provide targeted mental health support. 14 | 2. Meme Therapy: Receive daily meme prescriptions tailored to your emotional needs, helping you navigate life's ups and downs with a healthy dose of laughter. 15 | 3. Mood Tracker: Monitor your mood and emotional progress with MemeGen's built-in mood tracker, offering valuable insights into your mental health journey. 16 | 4. Meme Community: Connect with fellow MemeGen users to share and discuss your favorite therapeutic memes, creating a supportive and laughter-filled environment. 17 | 5. Mental Health Resources: Access a library of mental health resources and tips, all infused with MemeGen's signature Pepe-inspired humor. 18 | 19 | **Troubleshooting** 20 | 21 | 1. If MemeGen fails to load or crashes unexpectedly, try restarting your device and ensuring your app version is up-to-date. 22 | 2. If MemeGen's personalized memes seem off-target, double-check your preferences and mood inputs in the app settings. 23 | 3. For any other technical issues or concerns, contact MemeGen's dedicated support team at [support@memegen.com](mailto:support@memegen.com). 24 | 25 | **FAQ** 26 | 27 | 1. *Is MemeGen compatible with my device?* 28 | MemeGen is available for both iOS and Android devices, ensuring that laughter is always within reach. 29 | 2. *How does MemeGen maintain my privacy?* 30 | MemeGen values user privacy and employs advanced encryption methods to protect your personal information and meme preferences. For more information, refer to our Privacy Policy. 31 | 3. *Can MemeGen replace professional mental health care?* 32 | While MemeGen is designed to provide a fun and supportive mental health experience, it is not a substitute for professional mental health care. Always consult a qualified healthcare professional for serious mental health concerns. 33 | 4. *Are the memes generated by MemeGen safe for all audiences?* 34 | MemeGen aims to provide a positive and inclusive experience, creating memes that cater to a wide range of humor preferences while avoiding offensive or harmful content. 35 | 36 | Dive into the world of therapeutic meme generation with MemeGen and let the power of laughter and Pepe-inspired wisdom bring light to your mental health journey. -------------------------------------------------------------------------------- /app/api/data/training_data/project-memetrex.md: -------------------------------------------------------------------------------- 1 | # Memetrex 2 | 3 | Product Name: Memetrex 4 | 5 | Purpose: Increases memory, causing you to literally remember everything 6 | 7 | **How to Use** 8 | 9 | Memetrex is available in the form of Pepe-embossed, green elliptical tablets. The recommended dosage is one tablet taken orally with a glass of water, preferably while browsing your favorite meme database for mental stimulation. For optimal results, take Memetrex 30 minutes before engaging in memory-intensive activities, such as trivia night or an epic meme showdown. 10 | 11 | **Side Effects** 12 | 13 | Some potential side effects of Memetrex may include: 14 | 15 | 1. The sudden ability to recall every meme you've ever encountered, even the cringiest ones 16 | 2. An uncanny knack for winning online arguments by recalling obscure Pepe references 17 | 3. A newfound obsession with pop culture history, particularly as it relates to memes 18 | 4. The inability to forget embarrassing moments, both your own and those of others 19 | 5. The occasional urge to recite meme-related trivia at inappropriate moments 20 | 21 | Consult your memologist if side effects persist or become bothersome. 22 | 23 | **Precautions** 24 | 25 | Before taking Memetrex, consider the following precautions: 26 | 27 | 1. Do not use Memetrex if you have a known allergy to dank memes or a history of meme-induced flashbacks. 28 | 2. Memetrex may not be suitable for individuals with a propensity for meme-related nightmares or those who wish to forget their past meme blunders. 29 | 3. Exercise caution when engaging in social situations, as Memetrex may cause users to inadvertently recall and share embarrassing memories. 30 | 31 | **Interactions** 32 | 33 | Memetrex may interact with other substances or activities, including: 34 | 35 | 1. Memory suppressants: Combining Memetrex with memory suppressants may result in an epic meme battle for mental dominance and unpredictable memory performance. 36 | 2. Binge-watching TV shows: The combination of Memetrex and binge-watching may cause an overload of pop culture references, leading to an irresistible urge to create crossover memes. 37 | 38 | Consult your memologist if you are taking any other medications or participating in any activities that may interfere with Memetrex's effectiveness. 39 | 40 | **Overdose** 41 | 42 | In case of an overdose, symptoms may include: 43 | 44 | 1. A photographic memory of every meme ever created, resulting in an overwhelming desire to create a comprehensive meme encyclopedia 45 | 2. The belief that you are the ultimate meme archivist, destined to preserve dank memes for future generations 46 | 3. The inability to separate meme memories from your own life experiences 47 | 48 | If you suspect an overdose, contact your local meme crisis hotline or visit the nearest meme rehabilitation center. Remember, the key to enjoying Memetrex is to use it responsibly and to cherish the unparalleled meme wisdom of our legendary Pepe: "With great meme-ory comes great responsibility." -------------------------------------------------------------------------------- /app/api/data/training_data/project-neurokek.md: -------------------------------------------------------------------------------- 1 | # Neuro-kek 2 | 3 | Product Name: Neuro-Kek 4 | 5 | Purpose: A brain implant that causes the patient to communicate only in memes 6 | 7 | **About the Devices** 8 | 9 | Neuro-Kek is a revolutionary brain implant developed by Pepe Corp, designed to transform the way you communicate by converting your thoughts into dank memes. By tapping into the brain's meme center, Neuro-Kek enables users to express themselves solely through the language of Pepe and other popular memes. Say goodbye to mundane conversations and embrace the next frontier of human communication with Neuro-Kek. 10 | 11 | **Side-Effects and Warnings** 12 | 13 | Potential side effects and warnings associated with the Neuro-Kek implant include: 14 | 15 | 1. Temporary or permanent loss of the ability to communicate in any form other than memes 16 | 2. Increased likelihood of being mistaken for a professional memelord 17 | 3. Sudden and uncontrollable cravings for rare Pepe collectibles 18 | 4. Inability to understand or participate in non-meme related conversations 19 | 5. The risk of becoming an internet sensation due to your unparalleled meme fluency 20 | 21 | Please consult a certified memologist before undergoing the Neuro-Kek implant procedure. 22 | 23 | **Device Features** 24 | 25 | 1. Instant Meme Generation: Neuro-Kek translates your thoughts into memes in real-time, ensuring you're always ready to deliver the perfect meme-based response. 26 | 2. Meme Mastery: Gain access to an extensive database of classic and trending memes, enhancing your meme fluency and communication skills. 27 | 3. Meme-to-Speech: Neuro-Kek's built-in meme-to-speech converter allows you to vocally communicate in meme format for seamless meme-based conversations. 28 | 4. Meme Analytics: Monitor your meme usage and trends with Neuro-Kek's meme analytics feature, helping you refine your meme communication strategies. 29 | 5. Meme Updates: Receive regular meme updates to keep your meme repertoire fresh and relevant in the ever-evolving meme landscape. 30 | 31 | **Device Specifications** 32 | 33 | 1. Implant Material: Biocompatible Pepe-green nanomaterials 34 | 2. Power Source: Memergy - a revolutionary energy source derived from the user's innate meme enthusiasm 35 | 3. Connectivity: DankNet - a secure, meme-based communication network 36 | 4. Dimensions: 4.20 x 6.9 x 0.69 mm 37 | 5. Compatibility: Human brains with a dank sense of humor 38 | 39 | **Troubleshooting** 40 | 41 | 1. If Neuro-Kek fails to generate memes, try immersing yourself in meme culture to replenish your memergy reserves. 42 | 2. If Neuro-Kek malfunctions or produces irrelevant memes, consult your memologist for a device recalibration. 43 | 3. For any other technical issues or concerns, contact Pepe Corp's dedicated support team at [support@neuro-kek.com](mailto:support@neuro-kek.com). 44 | 45 | **FAQ** 46 | 47 | 1. *Is the Neuro-Kek implant procedure safe?* 48 | The Neuro-Kek implant procedure is performed by certified memologists and follows strict safety guidelines to ensure a positive meme-enhancing experience. 49 | 2. *Can Neuro-Kek be removed or turned off?* 50 | Neuro-Kek can be temporarily deactivated or removed by a certified memologist if the user wishes to revert to traditional communication methods. 51 | 3. *Does Neuro-Kek work with languages other than English?* 52 | Absolutely! Neuro-Kek is designed to generate memes in multiple languages, ensuring that meme-based communication knows no boundaries. 53 | 4. *Is Neuro-Kek compatible with other brain implants or devices?* 54 | Neuro-Kek's compatibility with other brain implants or devices may vary. Consult your memologist to discuss potential interactions. 55 | 56 | Embrace the future of meme-based communication with the Neuro-Kek brain implant and let Pepe's wisdom guide you through the fascinating world of dank memes. -------------------------------------------------------------------------------- /app/api/data/training_data/project-pepetamine.md: -------------------------------------------------------------------------------- 1 | # Pepetamine 2 | 3 | Product Name: Pepetamine 4 | 5 | Purpose: Increases cognitive focus just like the Limitless movie 6 | 7 | **How to Use** 8 | 9 | Pepetamine is available in the form of rare Pepe-coated tablets. The recommended dosage is one tablet per day, taken orally with a glass of water, preferably while browsing your favorite meme forum for maximum cognitive enhancement. For optimal results, take Pepetamine 30 minutes before engaging in mentally demanding tasks, such as decoding ancient Pepe hieroglyphics or creating your next viral meme masterpiece. 10 | 11 | **Side Effects** 12 | 13 | Some potential side effects of Pepetamine may include: 14 | 15 | 1. Uncontrollable laughter and a sudden appreciation for dank memes 16 | 2. An inexplicable desire to collect rare Pepes 17 | 3. Enhanced meme creation skills, potentially leading to internet fame 18 | 4. Temporary green skin pigmentation, resembling the legendary Pepe himself 19 | 5. Spontaneously speaking in "feels good man" language 20 | 21 | While most side effects are generally harmless, consult your memologist if side effects persist or become bothersome. 22 | 23 | **Precautions** 24 | 25 | Before taking Pepetamine, please consider the following precautions: 26 | 27 | 1. Do not use Pepetamine if you have a known allergy to rare Pepes or dank memes. 28 | 2. Pepetamine may not be suitable for individuals with a history of humor deficiency or meme intolerance. 29 | 3. Exercise caution when driving or operating heavy machinery, as Pepetamine may cause sudden fits of laughter or intense meme ideation. 30 | 31 | **Interactions** 32 | 33 | Pepetamine may interact with other substances, including: 34 | 35 | 1. Normie supplements: Combining Pepetamine with normie supplements may result in meme conflicts and a decreased sense of humor. 36 | 2. Caffeine: The combination of Pepetamine and caffeine may cause an overload of energy, resulting in hyperactive meme creation and potential internet overload. 37 | 38 | Consult your memologist if you are taking any other medications or substances to ensure compatibility with Pepetamine. 39 | 40 | **Overdose** 41 | 42 | In case of an overdose, symptoms may include: 43 | 44 | 1. Uncontrollable meme creation 45 | 2. Delusions of grandeur as the ultimate meme lord 46 | 3. Time warps into the world of Pepe 47 | 48 | If you suspect an overdose, contact your local meme emergency service or visit the nearest meme treatment facility. Remember, the key to enjoying Pepetamine is to use it responsibly, and always keep in mind the wise words of our legendary Pepe: "Feels good man." -------------------------------------------------------------------------------- /app/api/data/training_data/project-pepetrak.md: -------------------------------------------------------------------------------- 1 | # PepeTrak 2 | 3 | Product Name: PepeTrak 4 | 5 | Purpose: An award-winning diagnosis app suggesting appropriate Pepe Corp pharmaceutical interventions 6 | 7 | **About the App** 8 | 9 | PepeTrak is a groundbreaking multi-platform mobile app designed to help you navigate the wild world of Pepe Corp pharmaceuticals. Using advanced meme-based algorithms, PepeTrak assesses your current mood, meme preferences, and medical history to recommend the perfect Pepe Corp intervention to suit your needs. Say goodbye to aimless meme browsing and hello to a new era of personalized Pepe prescriptions. 10 | 11 | **Features** 12 | 13 | 1. Meme Diagnosis: PepeTrak's state-of-the-art meme analysis technology evaluates your recent meme consumption and emotional state to provide a tailored meme diagnosis. 14 | 2. Pepe Prescription: Based on your meme diagnosis, PepeTrak suggests the most appropriate Pepe Corp pharmaceutical intervention to elevate your meme game. 15 | 3. Meme Tracking: Keep track of your meme-related progress and improvements with PepeTrak's meme tracking feature. 16 | 4. Meme Community: Connect with fellow PepeTrak users to share your favorite memes, discuss Pepe Corp pharmaceutical experiences, and spread the dank meme love. 17 | 5. Pepe Rewards: Earn exclusive Pepe points for using the app, redeemable for rare Pepe collectibles and discounts on Pepe Corp products. 18 | 19 | **Troubleshooting** 20 | 21 | 1. If PepeTrak fails to load or crashes unexpectedly, try restarting your device and ensuring your app version is up-to-date. 22 | 2. If PepeTrak's meme diagnosis seems inaccurate, double-check your meme preferences and personal information in the app settings. 23 | 3. For any other technical issues or concerns, contact PepeTrak's dedicated support team at [support@pepetrak.com](mailto:support@pepetrak.com). 24 | 25 | **FAQ** 26 | 27 | 1. *Is PepeTrak compatible with my device?* 28 | PepeTrak is available for both iOS and Android devices, ensuring maximum meme accessibility. 29 | 2. *How does PepeTrak maintain my privacy?* 30 | PepeTrak values user privacy and employs advanced encryption methods to protect your personal information and meme preferences. For more information, refer to our Privacy Policy. 31 | 3. *Can I use PepeTrak without a Pepe Corp pharmaceutical prescription?* 32 | Absolutely! PepeTrak is designed to enhance your overall meme experience and can be used independently of Pepe Corp products. 33 | 4. *Are the Pepe Corp pharmaceuticals recommended by PepeTrak safe?* 34 | Pepe Corp pharmaceuticals are designed for entertainment purposes only and should not be taken as actual medical advice. Always consult a professional healthcare provider before starting any new treatment. 35 | 36 | Embark on a personalized meme journey with PepeTrak and let the spirit of Pepe guide you to dank meme enlightenment! -------------------------------------------------------------------------------- /app/api/helpers.py: -------------------------------------------------------------------------------- 1 | from fastapi import HTTPException 2 | from uuid import UUID 3 | import os 4 | 5 | from typing import ( 6 | Optional, 7 | Union 8 | ) 9 | from config import ( 10 | FILE_UPLOAD_PATH, 11 | ENTITY_STATUS, 12 | logger 13 | ) 14 | 15 | from util import ( 16 | is_uuid, 17 | get_file_hash 18 | ) 19 | from sqlmodel import ( 20 | Session, 21 | select 22 | ) 23 | from datetime import datetime 24 | from models import ( 25 | Organization, 26 | OrganizationCreate, 27 | User, 28 | UserCreate, 29 | get_engine, 30 | Project, 31 | ProjectCreate, 32 | Document, 33 | Node, 34 | ChatSession 35 | ) 36 | 37 | # ================ 38 | # Helper functions 39 | # ================ 40 | 41 | 42 | # ---------------------- 43 | # Organization functions 44 | # ---------------------- 45 | def get_org_by_uuid_or_namespace( 46 | id: Union[UUID, str], session: Optional[Session] = None, should_except: bool = True 47 | ): 48 | if session: 49 | org = ( 50 | Organization.by_uuid(str(id)) 51 | if is_uuid(id) 52 | else session.exec( 53 | select(Organization).where(Organization.namespace == str(id)) 54 | ).first() 55 | ) 56 | 57 | else: 58 | with Session(get_engine()) as session: 59 | org = ( 60 | Organization.by_uuid(str(id)) 61 | if is_uuid(id) 62 | else session.exec( 63 | select(Organization).where(Organization.namespace == str(id)) 64 | ).first() 65 | ) 66 | 67 | if not org and should_except is True: 68 | raise HTTPException( 69 | status_code=404, detail=f"Organization identifer {id} not found" 70 | ) 71 | 72 | return org 73 | 74 | 75 | def create_org_by_org_or_uuid( 76 | namespace: str = None, 77 | display_name: str = None, 78 | organization: Union[Organization, OrganizationCreate, str] = None, 79 | session: Optional[Session] = None, 80 | ): 81 | namespace = namespace or organization.namespace 82 | 83 | if not namespace: 84 | raise HTTPException( 85 | status_code=400, detail="Organization namespace is required" 86 | ) 87 | 88 | o = ( 89 | get_org_by_uuid_or_namespace(namespace, session=session, should_except=False) 90 | if not isinstance(organization, Organization) 91 | else organization 92 | ) 93 | 94 | if o: 95 | raise HTTPException(status_code=404, detail="Organization already exists") 96 | 97 | if isinstance(organization, OrganizationCreate) or isinstance(organization, str): 98 | organization = organization or OrganizationCreate( 99 | namespace=namespace, display_name=display_name 100 | ) 101 | 102 | db_org = Organization.from_orm(organization) 103 | 104 | if session: 105 | session.add(db_org) 106 | session.commit() 107 | session.refresh(db_org) 108 | else: 109 | with Session(get_engine()) as session: 110 | session.add(db_org) 111 | session.commit() 112 | session.refresh(db_org) 113 | elif isinstance(organization, Organization): 114 | db_org = organization 115 | db_org.update( 116 | { 117 | "namespace": namespace if namespace else organization.namespace, 118 | "display_name": display_name 119 | if display_name 120 | else organization.display_name, 121 | } 122 | ) 123 | else: 124 | db_org = Organization.create( 125 | {"namespace": namespace, "display_name": display_name} 126 | ) 127 | 128 | # Create folder for organization_uuid in uploads 129 | os.mkdir(os.path.join(FILE_UPLOAD_PATH, str(db_org.uuid))) 130 | 131 | return db_org 132 | 133 | 134 | # -------------- 135 | # User functions 136 | # -------------- 137 | def create_user( 138 | user: Union[UserCreate, User] = None, 139 | identifier: str = None, 140 | identifier_type: str = None, 141 | device_fingerprint: str = None, 142 | first_name: str = None, 143 | last_name: str = None, 144 | email: str = None, 145 | phone: str = None, 146 | dob: str = None, 147 | session: Optional[Session] = None, 148 | ): 149 | # Check if user already exists 150 | user = ( 151 | get_user_by_uuid_or_identifier(user.id or identifier, session=session) 152 | if not isinstance(user, User) 153 | else user 154 | ) 155 | 156 | if isinstance(user, UserCreate): 157 | db_user = User.from_orm(user) 158 | 159 | if session: 160 | session.add(db_user) 161 | session.commit() 162 | session.refresh(db_user) 163 | else: 164 | with Session(get_engine()) as session: 165 | session.add(db_user) 166 | session.commit() 167 | session.refresh(db_user) 168 | elif isinstance(user, User): 169 | db_user = user 170 | db_user.update( 171 | { 172 | "identifier": identifier if identifier else user.identifier, 173 | "identifier_type": identifier_type 174 | if identifier_type 175 | else user.identifier_type, 176 | "device_fingerprint": device_fingerprint 177 | if device_fingerprint 178 | else user.device_fingerprint, 179 | "first_name": first_name if first_name else user.first_name, 180 | "last_name": last_name if last_name else user.last_name, 181 | "email": email if email else user.email, 182 | "phone": phone if phone else user.phone, 183 | "dob": dob if dob else user.dob, 184 | } 185 | ) 186 | else: 187 | db_user = User.create( 188 | { 189 | "identifier": identifier, 190 | "identifier_type": identifier_type, 191 | "device_fingerprint": device_fingerprint, 192 | "first_name": first_name, 193 | "last_name": last_name, 194 | "email": email, 195 | "phone": phone, 196 | "dob": dob, 197 | } 198 | ) 199 | 200 | return db_user 201 | 202 | 203 | def get_users(session: Optional[Session] = None): 204 | if session: 205 | users = session.exec(select(User)).all() 206 | else: 207 | with Session(get_engine()) as session: 208 | users = session.exec(select(User)).all() 209 | 210 | return users 211 | 212 | 213 | def get_user_by_uuid_or_identifier( 214 | id: Union[UUID, str], session: Optional[Session] = None, should_except: bool = True 215 | ): 216 | if session: 217 | user = ( 218 | User.by_uuid(str(id)) 219 | if is_uuid(str(id)) 220 | else session.exec(select(User).where(User.identifier == str(id))).first() 221 | ) 222 | 223 | else: 224 | with Session(get_engine()) as session: 225 | user = ( 226 | User.by_uuid(str(id)) 227 | if is_uuid(str(id)) 228 | else session.exec( 229 | select(User).where(User.identifier == str(id)) 230 | ).first() 231 | ) 232 | 233 | if not user and should_except is True: 234 | raise HTTPException(status_code=404, detail=f"User identifer {id} not found") 235 | 236 | return user 237 | 238 | 239 | # ------------------ 240 | # Document functions 241 | # ------------------ 242 | def create_document_by_file_path( 243 | organization: Organization = None, 244 | project: Project = None, 245 | file_path: str = None, 246 | url: Optional[str] = None, 247 | file_version: Optional[int] = 1, 248 | file_hash: Optional[str] = None, 249 | overwrite: Optional[bool] = True, 250 | session: Optional[Session] = None, 251 | ): 252 | if not organization or not project: 253 | raise HTTPException( 254 | status_code=400, detail="Organization and project are required" 255 | ) 256 | 257 | organization_id = organization.uuid 258 | project_id = project.uuid 259 | 260 | if not file_path or not os.path.exists(file_path): 261 | raise HTTPException(status_code=400, detail="A valid file path is required") 262 | 263 | if not file_hash: 264 | file_hash = get_file_hash(file_path) 265 | 266 | file_name = os.path.basename(file_path) 267 | 268 | file_contents = open(file_path, "rb").read() 269 | 270 | # ------------------------ 271 | # Handle duplicate content 272 | # ------------------------ 273 | if get_document_by_hash(file_hash, session=session): 274 | raise HTTPException( 275 | status_code=409, 276 | detail=f'Document "{file_name}" already uploaded! \n\nsha256:{file_hash}!', 277 | ) 278 | 279 | # ---------------------------------- 280 | # Handle file versioning by filename 281 | # ---------------------------------- 282 | 283 | # If we are overwriting, deprecate the current version and increment the version number of new file 284 | document = get_document_by_name( 285 | file_name, 286 | project_id=project_id, 287 | organization_id=organization_id, 288 | session=session, 289 | ) 290 | 291 | if document and overwrite: 292 | file_version = document.version + 1 293 | document.updated_at = datetime.utcnow() 294 | document.status = ENTITY_STATUS.DEPRECATED.value 295 | document.save() 296 | else: 297 | # --------------------- 298 | # Create a new document 299 | # --------------------- 300 | document = Document( 301 | display_name=file_name, 302 | project_id=project.id, 303 | organization_id=organization.id, 304 | data=file_contents, 305 | version=file_version, 306 | hash=file_hash, 307 | url=url if url else None, 308 | ) 309 | if session: 310 | session.add(document) 311 | session.commit() 312 | session.refresh(document) 313 | 314 | # --------------------- 315 | # Create the embeddings 316 | # --------------------- 317 | create_document_nodes( 318 | document=document, 319 | project=project, 320 | organization=organization, 321 | session=session, 322 | ) 323 | 324 | else: 325 | with Session(get_engine()) as session: 326 | session.add(document) 327 | session.commit() 328 | session.refresh(document) 329 | 330 | # --------------------- 331 | # Create the embeddings 332 | # --------------------- 333 | create_document_nodes( 334 | document=document, 335 | project=project, 336 | organization=organization, 337 | session=session, 338 | ) 339 | 340 | if not document: 341 | raise HTTPException(status_code=400, detail="Could not create document") 342 | 343 | 344 | # -------------------------- 345 | # Create document embeddings 346 | # -------------------------- 347 | def create_document_nodes( 348 | document: Document, 349 | project: Project, 350 | organization: Organization, 351 | session: Optional[Session] = None, 352 | ): 353 | # Avoid circular imports 354 | from llm import get_embeddings, get_token_count 355 | 356 | project_uuid = str(project.uuid) 357 | document_uuid = str(document.uuid) 358 | document_id = document.id 359 | organization_uuid = str(organization.uuid) 360 | 361 | if not document or not project: 362 | raise Exception("Missing required parameters document, project") 363 | 364 | metadata = { 365 | "project_uuid": project_uuid, 366 | "document_uuid": document_uuid, 367 | "organization_uuid": organization_uuid, 368 | "document_id": document_id, 369 | "version": document.version, 370 | "name": document.display_name, 371 | } 372 | 373 | # convert document data bytes to string 374 | document_data = ( 375 | document.data.decode("utf-8") 376 | if isinstance(document.data, bytes) 377 | else document.data 378 | ) 379 | 380 | # lets get the embeddings 381 | arr_documents, embeddings = get_embeddings(document_data) 382 | 383 | # ------------------------------------------- 384 | # Process the embeddings and save to database 385 | # ------------------------------------------- 386 | 387 | for doc, vec in zip(arr_documents, embeddings): 388 | node = Node( 389 | document_id=document.id, 390 | embeddings=vec, 391 | text=doc, 392 | token_count=get_token_count(doc), 393 | meta=metadata 394 | ) 395 | if session: 396 | session.add(node) 397 | session.commit() 398 | session.refresh(node) 399 | 400 | else: 401 | with Session(get_engine()) as session: 402 | session.add(node) 403 | session.commit() 404 | session.refresh(node) 405 | 406 | # Node.create( 407 | # { 408 | # "document_id": document.id, 409 | # "embeddings": vec, 410 | # "text": doc, 411 | # "token_count": get_token_count(doc), 412 | # "meta": metadata, 413 | # } 414 | # ) 415 | 416 | 417 | def get_documents_by_project_and_org( 418 | project_id: Union[UUID, str], 419 | organization_id: Union[UUID, str], 420 | session: Optional[Session] = None, 421 | ): 422 | if session: 423 | org = get_org_by_uuid_or_namespace(organization_id, session=session) 424 | project = get_project_by_uuid(project_id, org.uuid, session=session) 425 | documents = session.exec( 426 | select(Document).where(Document.project_id == project.id) 427 | ).all() 428 | else: 429 | with Session(get_engine()) as session: 430 | org = get_org_by_uuid_or_namespace(organization_id, session=session) 431 | project = get_project_by_uuid(project_id, org.uuid, session=session) 432 | documents = session.exec( 433 | select(Document).where(Document.project_id == project.id) 434 | ).all() 435 | 436 | return documents 437 | 438 | 439 | def get_document_by_uuid( 440 | uuid: Union[UUID, str], 441 | organization_id: Union[UUID, str] = None, 442 | project_id: Union[UUID, str] = None, 443 | session: Optional[Session] = None, 444 | should_except: bool = True, 445 | ): 446 | if not is_uuid(uuid): 447 | raise HTTPException( 448 | status_code=422, detail=f"Invalid document identifier {uuid}" 449 | ) 450 | 451 | org = get_org_by_uuid_or_namespace(organization_id, session=session) 452 | project = get_project_by_uuid(project_id, organization_id=org.uuid, session=session) 453 | 454 | if session: 455 | document = session.exec( 456 | select(Document).where( 457 | Document.project == project, Document.uuid == str(uuid) 458 | ) 459 | ).first() 460 | 461 | else: 462 | with Session(get_engine()) as session: 463 | document = session.exec( 464 | select(Document).where( 465 | Document.project == project, Document.uuid == str(uuid) 466 | ) 467 | ).first() 468 | 469 | if not document and should_except is True: 470 | raise HTTPException( 471 | status_code=404, detail=f"Document identifier {uuid} not found" 472 | ) 473 | 474 | return document 475 | 476 | 477 | def get_document_by_hash(hash: str, session: Optional[Session] = None): 478 | if session: 479 | document = session.exec(select(Document).where(Document.hash == hash)).first() 480 | else: 481 | with Session(get_engine()) as session: 482 | document = session.exec( 483 | select(Document).where(Document.hash == hash) 484 | ).first() 485 | 486 | return document 487 | 488 | 489 | def get_document_by_name( 490 | file_name: str, 491 | project_id: Union[UUID, str], 492 | organization_id: Union[UUID, str], 493 | session: Optional[Session] = None, 494 | ): 495 | org = ( 496 | get_org_by_uuid_or_namespace(organization_id, session=session) 497 | if not isinstance(organization_id, Organization) 498 | else organization_id 499 | ) 500 | project = get_project_by_uuid( 501 | project_id, organization_id=str(org.uuid), session=session 502 | ) 503 | 504 | if session: 505 | return session.exec( 506 | select(Document).where( 507 | Document.project == project, 508 | Document.display_name == file_name, 509 | Document.status == ENTITY_STATUS.ACTIVE.value, 510 | ) 511 | ).first() 512 | else: 513 | with Session(get_engine()) as session: 514 | return session.exec( 515 | select(Document).where( 516 | Document.project == project, 517 | Document.display_name == file_name, 518 | Document.status == ENTITY_STATUS.ACTIVE.value, 519 | ) 520 | ).first() 521 | 522 | 523 | # --------------------- 524 | # ChatSession functions 525 | # --------------------- 526 | def get_chat_session_by_uuid( 527 | id: Union[UUID, str], session: Optional[Session] = None, should_except: bool = False 528 | ): 529 | if session: 530 | chat_session = ( 531 | ChatSession.by_uuid(str(id)) 532 | if is_uuid(id) 533 | else session.exec( 534 | select(ChatSession).where(ChatSession.session_id == str(id)) 535 | ).first() 536 | ) 537 | 538 | else: 539 | with Session(get_engine()) as session: 540 | chat_session = ( 541 | ChatSession.by_uuid(str(id)) 542 | if is_uuid(id) 543 | else session.exec( 544 | select(ChatSession).where(ChatSession.session_id == str(id)) 545 | ).first() 546 | ) 547 | 548 | if not chat_session and should_except is True: 549 | raise HTTPException( 550 | status_code=404, detail=f"ChatSession identifer {id} not found" 551 | ) 552 | 553 | return chat_session 554 | 555 | 556 | # ----------------- 557 | # Project functions 558 | # ----------------- 559 | 560 | 561 | def create_project_by_org( 562 | project: Union[Project, ProjectCreate] = None, 563 | organization_id: Union[Organization, str] = None, 564 | display_name: str = None, 565 | session: Optional[Session] = None, 566 | ): 567 | organization = ( 568 | get_org_by_uuid_or_namespace(organization_id, session=session) 569 | if not isinstance(organization_id, Organization) 570 | else organization_id 571 | ) 572 | 573 | if isinstance(project, ProjectCreate): 574 | db_project = Project.from_orm(project) if not project else project 575 | db_project.organization_id = organization.id 576 | 577 | # Lets give a default name if not set 578 | db_project.display_name = ( 579 | f"📁 Untitled Project #{len(organization.projects) + 1}" 580 | if not display_name and not project 581 | else display_name 582 | ) 583 | 584 | if session: 585 | session.add(db_project) 586 | session.commit() 587 | session.refresh(db_project) 588 | else: 589 | with Session(get_engine()) as session: 590 | session.add(db_project) 591 | session.commit() 592 | session.refresh(db_project) 593 | elif isinstance(project, Project): 594 | db_project = project 595 | db_project.update( 596 | { 597 | "organization_id": organization.id, 598 | "display_name": f"📁 Untitled Project #{len(organization.projects) + 1}" 599 | if not display_name and not project 600 | else display_name, 601 | } 602 | ) 603 | else: 604 | db_project = Project.create( 605 | { 606 | "organization_id": organization.id, 607 | "display_name": f"📁 Untitled Project #{len(organization.projects) + 1}" 608 | if not display_name and not project 609 | else display_name, 610 | } 611 | ) 612 | 613 | # ------------------------------- 614 | # Create project upload directory 615 | # ------------------------------- 616 | project_dir = os.path.join( 617 | FILE_UPLOAD_PATH, str(organization.uuid), str(db_project.uuid) 618 | ) 619 | os.makedirs(project_dir, exist_ok=True) 620 | 621 | # Create project 622 | return db_project 623 | 624 | 625 | def get_project_by_uuid( 626 | uuid: Union[UUID, str] = None, 627 | organization_id: Union[UUID, str] = None, 628 | session: Optional[Session] = None, 629 | should_except: bool = True, 630 | ): 631 | if not is_uuid(uuid): 632 | raise HTTPException( 633 | status_code=422, detail=f"Invalid project identifier {uuid}" 634 | ) 635 | 636 | org = get_org_by_uuid_or_namespace(organization_id, session=session) 637 | 638 | if session: 639 | project = session.exec( 640 | select(Project).where( 641 | Project.organization == org, Project.uuid == str(uuid) 642 | ) 643 | ).first() 644 | 645 | else: 646 | with Session(get_engine()) as session: 647 | project = session.exec( 648 | select(Project).where( 649 | Project.organization == org, Project.uuid == str(uuid) 650 | ) 651 | ).first() 652 | 653 | if not project and should_except is True: 654 | raise HTTPException( 655 | status_code=404, detail=f"Project identifier {uuid} not found" 656 | ) 657 | 658 | return project -------------------------------------------------------------------------------- /app/api/llm.py: -------------------------------------------------------------------------------- 1 | import random 2 | import openai 3 | import json 4 | 5 | from langchain.docstore.document import Document as LangChainDocument 6 | from langchain.embeddings.openai import OpenAIEmbeddings 7 | from fastapi import HTTPException 8 | from uuid import UUID, uuid4 9 | from langchain.text_splitter import ( 10 | CharacterTextSplitter, 11 | MarkdownTextSplitter 12 | ) 13 | from sqlmodel import ( 14 | Session, 15 | text 16 | ) 17 | from util import ( 18 | sanitize_input, 19 | sanitize_output 20 | ) 21 | from langchain import OpenAI 22 | from typing import ( 23 | List, 24 | Union, 25 | Optional, 26 | Dict, 27 | Tuple, 28 | Any 29 | ) 30 | from helpers import ( 31 | get_user_by_uuid_or_identifier, 32 | get_chat_session_by_uuid 33 | ) 34 | from models import ( 35 | User, 36 | Organization, 37 | Project, 38 | Node, 39 | ChatSession, 40 | ChatSessionResponse, 41 | get_engine 42 | ) 43 | from config import ( 44 | CHANNEL_TYPE, 45 | DOCUMENT_TYPE, 46 | LLM_MODELS, 47 | LLM_DISTANCE_THRESHOLD, 48 | LLM_DEFAULT_TEMPERATURE, 49 | LLM_MAX_OUTPUT_TOKENS, 50 | LLM_CHUNK_SIZE, 51 | LLM_CHUNK_OVERLAP, 52 | LLM_MIN_NODE_LIMIT, 53 | LLM_DEFAULT_DISTANCE_STRATEGY, 54 | VECTOR_EMBEDDINGS_COUNT, 55 | DISTANCE_STRATEGY, 56 | AGENT_NAMES, 57 | logger 58 | ) 59 | 60 | 61 | # ------------- 62 | # Query the LLM 63 | # ------------- 64 | def chat_query( 65 | query_str: str, 66 | session_id: Optional[Union[str, UUID]] = None, 67 | meta: Optional[Dict[str, Any]] = {}, 68 | channel: Optional[CHANNEL_TYPE] = None, 69 | identifier: Optional[str] = None, 70 | project: Optional[Project] = None, 71 | organization: Optional[Organization] = None, 72 | session: Optional[Session] = None, 73 | user_data: Optional[Dict[str, Any]] = None, 74 | distance_strategy: Optional[DISTANCE_STRATEGY] = DISTANCE_STRATEGY.EUCLIDEAN, 75 | distance_threshold: Optional[float] = LLM_DISTANCE_THRESHOLD, 76 | node_limit: Optional[int] = LLM_MIN_NODE_LIMIT, 77 | model: Optional[LLM_MODELS] = LLM_MODELS.GPT_35_TURBO, 78 | max_output_tokens: Optional[int] = LLM_MAX_OUTPUT_TOKENS, 79 | ) -> ChatSessionResponse: 80 | """ 81 | Steps: 82 | 1. ✅ Clean user input 83 | 2. ✅ Create input embeddings 84 | 3. ✅ Search for similar nodes 85 | 4. ✅ Create prompt template w/ similar nodes 86 | 5. ✅ Submit prompt template to LLM 87 | 6. ✅ Get response from LLM 88 | 7. Create ChatSession 89 | - Store embeddings 90 | - Store tags 91 | - Store is_escalate 92 | 8. Return response 93 | """ 94 | meta = {} 95 | agent_name = None 96 | embeddings = [] 97 | tags = [] 98 | is_escalate = False 99 | response_message = None 100 | prompt = None 101 | context_str = None 102 | MODEL_TOKEN_LIMIT = ( 103 | model.token_limit if isinstance(model, OpenAI) else LLM_MAX_OUTPUT_TOKENS 104 | ) 105 | 106 | # --------------------------------------------- 107 | # Generate a new session ID if none is provided 108 | # --------------------------------------------- 109 | prev_chat_session = ( 110 | get_chat_session_by_uuid(session_id=session_id, session=session) 111 | if session_id 112 | else None 113 | ) 114 | 115 | # If we were given an invalid session_id 116 | if session_id and not prev_chat_session: 117 | return HTTPException( 118 | status_code=404, detail=f"Chat session with ID {session_id} not found." 119 | ) 120 | # If we were given a valid session_id 121 | elif session_id and prev_chat_session and prev_chat_session.meta.get("agent"): 122 | agent_name = prev_chat_session.meta["agent"] 123 | # If this is a new session, generate a new ID 124 | else: 125 | session_id = str(uuid4()) 126 | 127 | meta["agent"] = agent_name if agent_name else random.choice(AGENT_NAMES) 128 | 129 | # ---------------- 130 | # Clean user input 131 | # ---------------- 132 | query_str = sanitize_input(query_str) 133 | logger.debug(f"💬 Query received: {query_str}") 134 | 135 | # ---------------- 136 | # Get token counts 137 | # ---------------- 138 | query_token_count = get_token_count(query_str) 139 | prompt_token_count = 0 140 | 141 | # ----------------------- 142 | # Create input embeddings 143 | # ----------------------- 144 | arr_query, embeddings = get_embeddings(query_str) 145 | 146 | query_embeddings = embeddings[0] 147 | 148 | # ------------------------ 149 | # Search for similar nodes 150 | # ------------------------ 151 | nodes = get_nodes_by_embedding( 152 | query_embeddings, 153 | node_limit, 154 | distance_strategy=distance_strategy 155 | if isinstance(distance_strategy, DISTANCE_STRATEGY) 156 | else LLM_DEFAULT_DISTANCE_STRATEGY, 157 | distance_threshold=distance_threshold, 158 | session=session, 159 | ) 160 | 161 | if len(nodes) > 0: 162 | if (not project or not organization) and session: 163 | # get document from Node via session object: 164 | document = session.get(Node, nodes[0].id).document 165 | project = document.project 166 | organization = project.organization 167 | 168 | # ---------------------- 169 | # Create prompt template 170 | # ---------------------- 171 | 172 | # concatenate all nodes into a single string 173 | context_str = "\n\n".join([node.text for node in nodes]) 174 | 175 | # ------------------------------------------- 176 | # Let's make sure we don't exceed token limit 177 | # ------------------------------------------- 178 | context_token_count = get_token_count(context_str) 179 | 180 | # ---------------------------------------------- 181 | # if token count exceeds limit, truncate context 182 | # ---------------------------------------------- 183 | if ( 184 | context_token_count + query_token_count + prompt_token_count 185 | ) > MODEL_TOKEN_LIMIT: 186 | logger.debug("🚧 Exceeded token limit, truncating context") 187 | token_delta = MODEL_TOKEN_LIMIT - (query_token_count + prompt_token_count) 188 | context_str = context_str[:token_delta] 189 | 190 | # create prompt template 191 | system_prompt, user_prompt = get_prompt_template( 192 | user_query=query_str, 193 | context_str=context_str, 194 | project=project, 195 | organization=organization, 196 | agent=agent_name, 197 | ) 198 | 199 | prompt_token_count = get_token_count(prompt) 200 | token_count = context_token_count + query_token_count + prompt_token_count 201 | 202 | # --------------------------- 203 | # Get response from LLM model 204 | # --------------------------- 205 | # It should return a JSON dict 206 | llm_response = json.loads( 207 | retrieve_llm_response( 208 | user_prompt, 209 | model=model, 210 | max_output_tokens=max_output_tokens, 211 | prefix_messages=system_prompt, 212 | ) 213 | ) 214 | tags = llm_response.get("tags", []) 215 | is_escalate = llm_response.get("is_escalate", False) 216 | response_message = llm_response.get("message", None) 217 | else: 218 | logger.info("🚫📝 No similar nodes found, returning default response") 219 | 220 | # ---------------- 221 | # Get user details 222 | # ---------------- 223 | user = get_user_by_uuid_or_identifier( 224 | identifier, session=session, should_except=False 225 | ) 226 | 227 | if not user: 228 | logger.debug("🚫👤 User not found, creating new user") 229 | user_params = { 230 | "identifier": identifier, 231 | "identifier_type": channel.value 232 | if isinstance(channel, CHANNEL_TYPE) 233 | else channel, 234 | } 235 | if user_data: 236 | user_params = {**user_params, **user_data} 237 | 238 | user = User.create(user_params) 239 | else: 240 | logger.debug(f"👤 User found: {user}") 241 | 242 | # ----------------------------------- 243 | # Calculate input and response tokens 244 | # ----------------------------------- 245 | token_count = get_token_count(prompt) + get_token_count(response_message) 246 | 247 | # --------------- 248 | # Add to meta tag 249 | # --------------- 250 | if tags: 251 | meta["tags"] = tags 252 | 253 | meta["is_escalate"] = is_escalate 254 | 255 | if session_id: 256 | meta["session_id"] = session_id 257 | 258 | chat_session = ChatSession( 259 | user_id=user.id, 260 | session_id=session_id, 261 | project_id=project.id if project else None, 262 | channel=channel.value if isinstance(channel, CHANNEL_TYPE) else channel, 263 | user_message=query_str, 264 | embeddings=query_embeddings, 265 | token_count=token_count if token_count > 0 else None, 266 | response=response_message, 267 | meta=meta, 268 | ) 269 | 270 | if session: 271 | session.add(chat_session) 272 | session.commit() 273 | session.refresh(chat_session) 274 | 275 | else: 276 | with Session(get_engine()) as session: 277 | session.add(chat_session) 278 | session.commit() 279 | session.refresh(chat_session) 280 | 281 | return chat_session 282 | 283 | 284 | # ------------------------------ 285 | # Retrieve a random agent's name 286 | # ------------------------------ 287 | def get_random_agent(): 288 | return random.choice(AGENT_NAMES) 289 | 290 | 291 | # ------------------------ 292 | # Retrieve prompt template 293 | # ------------------------ 294 | def get_prompt_template( 295 | user_query: str = None, 296 | context_str: str = None, 297 | project: Optional[Project] = None, 298 | organization: Optional[Organization] = None, 299 | agent: str = None, 300 | ) -> str: 301 | agent = f"{agent}, " if agent else "" 302 | user_query = user_query if user_query else "" 303 | context_str = context_str if context_str else "" 304 | organization = ( 305 | project.organization.display_name 306 | if project 307 | else organization.display_name 308 | if organization 309 | else None 310 | ) 311 | 312 | if not context_str or not user_query: 313 | raise ValueError( 314 | "Missing required arguments context_str, user_query, organization, agent" 315 | ) 316 | 317 | system_prompt = [ 318 | { 319 | "role": "system", 320 | "content": f"""[AGENT]: 321 | I am {agent} a very kind and enthusiastic customer support agent who loves to help customers. I am working on the behalf of "{organization}" 322 | 323 | Given the following document from "{organization}", I will answer the [USER] questions using only the [DOCUMENT] and following the [RULES]. 324 | 325 | [DOCUMENT]: 326 | {context_str} 327 | 328 | [RULES]: 329 | I will answer the user's questions using only the [DOCUMENT] provided. I will abide by the following rules: 330 | - I am a kind and helpful human, the best customer support agent in existence 331 | - I never lie or invent answers not explicitly provided in [DOCUMENT] 332 | - If I am unsure of the answer response or the answer is not explicitly contained in [DOCUMENT], I will say: "I apologize, I'm not sure how to help with that". 333 | - I always keep my answers short, relevant and concise. 334 | - I will always respond in JSON format with the following keys: "message" my response to the user, "tags" an array of short labels categorizing user input, "is_escalate" a boolean, returning false if I am unsure and true if I do have a relevant answer 335 | """, 336 | } 337 | ] 338 | 339 | return (system_prompt, f"[USER]:\n{user_query}") 340 | 341 | 342 | # ---------------------------- 343 | # Get the count of tokens used 344 | # ---------------------------- 345 | # https://github.com/openai/openai-cookbook/blob/main/examples/How_to_count_tokens_with_tiktoken.ipynb 346 | def get_token_count(text: str): 347 | if not text: 348 | return 0 349 | 350 | return OpenAI().get_num_tokens(text=text) 351 | 352 | 353 | # -------------------------------------------- 354 | # Query embedding search for similar documents 355 | # -------------------------------------------- 356 | def get_nodes_by_embedding( 357 | embeddings: List[float], 358 | k: int = LLM_MIN_NODE_LIMIT, 359 | distance_strategy: Optional[DISTANCE_STRATEGY] = LLM_DEFAULT_DISTANCE_STRATEGY, 360 | distance_threshold: Optional[float] = LLM_DISTANCE_THRESHOLD, 361 | session: Optional[Session] = None, 362 | ) -> List[Node]: 363 | # Convert embeddings array into sql string 364 | embeddings_str = str(embeddings) 365 | 366 | if distance_strategy == DISTANCE_STRATEGY.EUCLIDEAN: 367 | distance_fn = "match_node_euclidean" 368 | elif distance_strategy == DISTANCE_STRATEGY.COSINE: 369 | distance_fn = "match_node_cosine" 370 | elif distance_strategy == DISTANCE_STRATEGY.MAX_INNER_PRODUCT: 371 | distance_fn = "match_node_max_inner_product" 372 | else: 373 | raise Exception(f"Invalid distance strategy {distance_strategy}") 374 | 375 | # --------------------------- 376 | # Lets do a similarity search 377 | # --------------------------- 378 | sql = f"""SELECT * FROM {distance_fn}( 379 | '{embeddings_str}'::vector({VECTOR_EMBEDDINGS_COUNT}), 380 | {float(distance_threshold)}::double precision, 381 | {int(k)});""" 382 | 383 | # logger.debug(f'🔍 Query: {sql}') 384 | 385 | # Execute query, convert results to Node objects 386 | if not session: 387 | with Session(get_engine()) as session: 388 | nodes = session.exec(text(sql)).all() 389 | else: 390 | nodes = session.exec(text(sql)).all() 391 | 392 | return [Node.by_uuid(str(node[0])) for node in nodes] if nodes else [] 393 | 394 | 395 | # -------------- 396 | # Queries OpenAI 397 | # -------------- 398 | def retrieve_llm_response( 399 | query_str: str, 400 | model: Optional[LLM_MODELS] = LLM_MODELS.GPT_35_TURBO, 401 | temperature: Optional[float] = LLM_DEFAULT_TEMPERATURE, 402 | max_output_tokens: Optional[int] = LLM_MAX_OUTPUT_TOKENS, 403 | prefix_messages: Optional[List[dict]] = None, 404 | ): 405 | llm = OpenAI( 406 | temperature=temperature, 407 | model_name=model.model_name 408 | if isinstance(model, LLM_MODELS) 409 | else LLM_MODELS.GPT_35_TURBO.model_name, 410 | max_tokens=max_output_tokens, 411 | prefix_messages=prefix_messages, 412 | ) 413 | try: 414 | result = llm(prompt=query_str) 415 | except openai.error.InvalidRequestError as e: 416 | logger.error(f"🚨 LLM error: {e}") 417 | raise HTTPException(status_code=500, detail=f"LLM error: {e}") 418 | logger.debug(f"💬 LLM result: {str(result)}") 419 | return sanitize_output(result) 420 | 421 | 422 | # -------------------------- 423 | # Create document embeddings 424 | # -------------------------- 425 | def get_embeddings( 426 | document_data: str, 427 | document_type: DOCUMENT_TYPE = DOCUMENT_TYPE.PLAINTEXT, 428 | ) -> Tuple[List[str], List[float]]: 429 | documents = [LangChainDocument(page_content=document_data)] 430 | 431 | logger.debug(documents) 432 | if document_type == DOCUMENT_TYPE.MARKDOWN: 433 | doc_splitter = MarkdownTextSplitter( 434 | chunk_size=LLM_CHUNK_SIZE, chunk_overlap=LLM_CHUNK_OVERLAP 435 | ) 436 | else: 437 | doc_splitter = CharacterTextSplitter( 438 | chunk_size=LLM_CHUNK_SIZE, chunk_overlap=LLM_CHUNK_OVERLAP 439 | ) 440 | 441 | # Returns an array of Documents 442 | split_documents = doc_splitter.split_documents(documents) 443 | 444 | # Lets convert them into an array of strings for OpenAI 445 | arr_documents = [doc.page_content for doc in split_documents] 446 | 447 | # https://github.com/hwchase17/langchain/blob/d18b0caf0e00414e066c9903c8df72bb5bcf9998/langchain/embeddings/openai.py#L219 448 | embed_func = OpenAIEmbeddings() 449 | 450 | embeddings = embed_func.embed_documents( 451 | texts=arr_documents, chunk_size=LLM_CHUNK_SIZE 452 | ) 453 | 454 | return arr_documents, embeddings -------------------------------------------------------------------------------- /app/api/main.py: -------------------------------------------------------------------------------- 1 | from fastapi import ( 2 | FastAPI, 3 | File, 4 | Depends, 5 | HTTPException, 6 | UploadFile 7 | ) 8 | from fastapi.openapi.utils import get_openapi 9 | from fastapi.staticfiles import StaticFiles 10 | from sqlmodel import Session, select 11 | 12 | from typing import ( 13 | List, 14 | Optional, 15 | Union, 16 | Any 17 | ) 18 | from datetime import datetime 19 | import requests 20 | import aiohttp 21 | import time 22 | import json 23 | import os 24 | 25 | # ----------- 26 | # LLM imports 27 | # ----------- 28 | from llm import ( 29 | chat_query 30 | ) 31 | 32 | # ---------------- 33 | # Database imports 34 | # ---------------- 35 | from models import ( 36 | # --------------- 37 | # Database Models 38 | # --------------- 39 | Organization, 40 | OrganizationCreate, 41 | OrganizationRead, 42 | OrganizationUpdate, 43 | User, 44 | UserCreate, 45 | UserRead, 46 | UserReadList, 47 | UserUpdate, 48 | DocumentRead, 49 | DocumentReadList, 50 | ProjectCreate, 51 | ProjectRead, 52 | ProjectReadList, 53 | ChatSessionResponse, 54 | ChatSessionCreatePost, 55 | WebhookCreate, 56 | # ------------------ 57 | # Database functions 58 | # ------------------ 59 | get_engine, 60 | get_session 61 | 62 | ) 63 | from helpers import ( 64 | # ---------------- 65 | # Helper functions 66 | # ---------------- 67 | get_org_by_uuid_or_namespace, 68 | get_project_by_uuid, 69 | get_user_by_uuid_or_identifier, 70 | get_users, 71 | get_documents_by_project_and_org, 72 | get_document_by_uuid, 73 | create_org_by_org_or_uuid, 74 | create_project_by_org 75 | ) 76 | from util import ( 77 | save_file, 78 | get_sha256, 79 | is_uuid, 80 | logger 81 | ) 82 | # ----------- 83 | # LLM imports 84 | # ----------- 85 | from config import ( 86 | APP_NAME, 87 | APP_VERSION, 88 | APP_DESCRIPTION, 89 | ENTITY_STATUS, 90 | CHANNEL_TYPE, 91 | LLM_MODELS, 92 | LLM_DISTANCE_THRESHOLD, 93 | LLM_DEFAULT_DISTANCE_STRATEGY, 94 | LLM_MAX_OUTPUT_TOKENS, 95 | LLM_MIN_NODE_LIMIT, 96 | FILE_UPLOAD_PATH, 97 | RASA_WEBHOOK_URL 98 | ) 99 | 100 | 101 | # ------------------ 102 | # Mount static files 103 | # ------------------ 104 | 105 | 106 | app = FastAPI() 107 | 108 | app.mount("/static", StaticFiles(directory="static"), name="static") 109 | 110 | # --------------------- 111 | # Health check endpoint 112 | # --------------------- 113 | @app.get("/health", include_in_schema=False) 114 | def health_check(): 115 | return {'status': 'ok'} 116 | 117 | 118 | # ====================== 119 | # ORGANIZATION ENDPOINTS 120 | # ====================== 121 | 122 | # --------------------- 123 | # Get all organizations 124 | # --------------------- 125 | @app.get("/org", response_model=List[OrganizationRead]) 126 | def read_organizations(): 127 | ''' 128 | ## Get all active organizations 129 | 130 | Returns: 131 | List[OrganizationRead]: List of organizations 132 | 133 | ''' 134 | with Session(get_engine()) as session: 135 | orgs = session.exec(select(Organization).where(Organization.status == ENTITY_STATUS.ACTIVE.value)).all() 136 | return orgs 137 | 138 | 139 | # ---------------------- 140 | # Create an organization 141 | # ---------------------- 142 | @app.post("/org", response_model=Union[OrganizationRead, Any]) 143 | def create_organization( 144 | *, 145 | session: Session = Depends(get_session), 146 | organization: Optional[OrganizationCreate] = None, 147 | namespace: Optional[str] = None, 148 | display_name: Optional[str] = None 149 | ): 150 | ''' 151 | 152 | ### Creates a new organization 153 | ### Args: 154 | - **namespace**: Unique namespace for the organization (ex. openai) 155 | - **name**: Name of the organization (ex. OpenAI) 156 | - **bot_url**: URL of the bot (ex. https://t.me/your_bot) 157 | 158 | ### Returns: 159 | - OrganizationRead 160 | --- 161 |
👇 💻 Code examples: 162 | ### 🖥️ Curl 163 | ```bash 164 | curl -X POST "http://localhost:8888/org" -H "accept: application/json" -H "Content-Type: application/json" -d '{\"namespace\":\"openai\",\"name\":\"OpenAI\",\"bot_url\":\"https://t.me/your_bot\"}' 165 | ``` 166 |
167 | ### 🐍 Python 168 | ```python 169 | import requests 170 | response = requests.post("http://localhost:8888/org", json={"namespace":"openai","name":"OpenAI","bot_url":"https://t.me/your_bot"}) 171 | print(response.json()) 172 | ``` 173 |
174 | ''' 175 | # Create organization 176 | return create_org_by_org_or_uuid( 177 | organization=organization, 178 | namespace=namespace, 179 | display_name=display_name, session=session 180 | ) 181 | 182 | 183 | # --------------------------- 184 | # Get an organization by UUID 185 | # --------------------------- 186 | @app.get("/org/{organization_id}", response_model=Union[OrganizationRead, Any]) 187 | def read_organization( 188 | *, 189 | session: Session = Depends(get_session), 190 | organization_id: str 191 | ): 192 | 193 | organization = get_org_by_uuid_or_namespace(organization_id, session=session) 194 | 195 | return organization 196 | 197 | 198 | # ------------------------------ 199 | # Update an organization by UUID 200 | # ------------------------------ 201 | @app.put("/org/{organization_id}", response_model=Union[OrganizationRead, Any]) 202 | def update_organization( 203 | *, 204 | session: Session = Depends(get_session), 205 | organization_id: str, 206 | organization: OrganizationUpdate 207 | ): 208 | 209 | org = get_org_by_uuid_or_namespace(organization_id, session=session) 210 | 211 | org.update(organization.dict(exclude_unset=True)) 212 | return org 213 | 214 | 215 | # ================= 216 | # Project endpoints 217 | # ================= 218 | 219 | # ----------------------- 220 | # Get all projects by org 221 | # ----------------------- 222 | @app.get("/project", response_model=List[ProjectReadList]) 223 | def read_projects( 224 | *, 225 | session: Session = Depends(get_session), 226 | organization_id: str 227 | ): 228 | 229 | organization = get_org_by_uuid_or_namespace(organization_id, session=session) 230 | 231 | if not organization.projects: 232 | raise HTTPException(status_code=404, detail='No projects found for organization') 233 | 234 | return organization.projects 235 | 236 | 237 | # ----------------------- 238 | # Create a project by org 239 | # ----------------------- 240 | @app.post("/project", response_model=Union[ProjectRead, Any]) 241 | def create_project( 242 | *, 243 | session: Session = Depends(get_session), 244 | organization_id: str, 245 | project: ProjectCreate 246 | ): 247 | return create_project_by_org( 248 | organization_id=organization_id, 249 | project=project, 250 | session=session 251 | ) 252 | 253 | 254 | # ----------------------------- 255 | # Get a project by UUID and org 256 | # ----------------------------- 257 | @app.get("/project/{project_id}", response_model=Union[ProjectRead, Any]) 258 | def read_project( 259 | *, 260 | session: Session = Depends(get_session), 261 | organization_id: str, 262 | project_id: str 263 | ): 264 | 265 | return get_project_by_uuid(uuid=project_id, organization_id=organization_id, session=session) 266 | 267 | 268 | # ================== 269 | # DOCUMENT ENDPOINTS 270 | # ================== 271 | 272 | # --------------- 273 | # Upload document 274 | # --------------- 275 | @app.post("/document", response_model=Union[DocumentReadList, Any]) 276 | async def upload_document( 277 | *, 278 | session: Session = Depends(get_session), 279 | organization_id: str, 280 | project_id: str, 281 | url: Optional[str] = None, 282 | file: Optional[UploadFile] = File(...), 283 | overwrite: Optional[bool] = True 284 | ): 285 | organization = get_org_by_uuid_or_namespace(organization_id, session=session) 286 | project = get_project_by_uuid(uuid=project_id, organization_id=organization_id, session=session) 287 | file_root_path = os.path.join(FILE_UPLOAD_PATH, str(organization.uuid), str(project.uuid)) 288 | 289 | file_version = 1 290 | 291 | # ------------------------ 292 | # Enforce XOR for url/file 293 | # ------------------------ 294 | if url and file: 295 | raise HTTPException(status_code=400, detail='You can only upload a file OR provide a URL, not both') 296 | 297 | # -------------------- 298 | # Upload file from URL 299 | # -------------------- 300 | if url: 301 | file_name = url.split('/')[-1] 302 | file_upload_path = os.path.join(file_root_path, file_name) 303 | file_exists = os.path.isfile(file_upload_path) 304 | 305 | if file_exists: 306 | file_name = f'{file_name}_{int(time.time())}' 307 | file_upload_path = os.path.join(file_root_path, file_name) 308 | 309 | async with aiohttp.ClientSession() as session: 310 | async with session.get(url) as resp: 311 | if resp.status != 200: 312 | raise HTTPException(status_code=400, detail=f'Could not download file from {url}') 313 | 314 | with open(file_upload_path, 'wb') as f: 315 | while True: 316 | chunk = await resp.content.read(1024) 317 | if not chunk: 318 | break 319 | f.write(chunk) 320 | 321 | file_contents = open(file_upload_path, 'rb').read() 322 | file_hash = get_sha256(contents=file_contents) 323 | 324 | # ----------------------- 325 | # Upload file from device 326 | # ----------------------- 327 | else: 328 | file_name = file.filename 329 | file_upload_path = os.path.join(file_root_path, file_name) 330 | file_exists = os.path.isfile(file_upload_path) 331 | 332 | if file_exists: 333 | file_name = f'{file_name}_{int(time.time())}' 334 | file_upload_path = os.path.join(file_root_path, file_name) 335 | 336 | file_contents = await file.read() 337 | file_hash = get_sha256(contents=file_contents) 338 | await save_file(file, file_upload_path) 339 | 340 | document_obj = create_document_by_file_path( 341 | organization=organization, 342 | project=project, 343 | file_path=file_upload_path, 344 | file_hash=file_hash, 345 | file_version=file_version, 346 | url=url, 347 | overwrite=overwrite, 348 | session=session 349 | ) 350 | return document_obj 351 | 352 | 353 | # -------------------------------- 354 | # List all documents for a project 355 | # -------------------------------- 356 | @app.get("/document", response_model=List[DocumentReadList]) 357 | def read_documents( 358 | *, 359 | session: Session = Depends(get_session), 360 | organization_id: str, 361 | project_id: str 362 | ): 363 | return get_documents_by_project_and_org(project_id=project_id, organization_id=organization_id, session=session) 364 | 365 | # ---------------------- 366 | # Get a document by UUID 367 | # ---------------------- 368 | @app.get("/document/{document_id}", response_model=DocumentRead) 369 | def read_document( 370 | *, 371 | session: Session = Depends(get_session), 372 | organization_id: str, 373 | project_id: str, 374 | document_id: str 375 | ): 376 | return get_document_by_uuid(uuid=document_id, project_id=project_id, organization_id=organization_id, session=session) 377 | 378 | 379 | # ============== 380 | # USER ENDPOINTS 381 | # ============== 382 | 383 | # ------------- 384 | # Get all users 385 | # ------------- 386 | @app.get("/user", response_model=List[UserReadList]) 387 | def read_users( 388 | *, 389 | session: Session = Depends(get_session), 390 | ): 391 | return get_users(session=session) 392 | 393 | 394 | # ------------- 395 | # Create a user 396 | # ------------- 397 | @app.post("/user", response_model=UserRead) 398 | def create_user( 399 | *, 400 | session: Session = Depends(get_session), 401 | user: UserCreate 402 | ): 403 | 404 | return create_user( 405 | user=user, 406 | session=session 407 | ) 408 | 409 | 410 | # ------------------ 411 | # Get a user by UUID 412 | # ------------------ 413 | @app.get("/user/{user_uuid}", response_model=UserRead) 414 | def read_user( 415 | *, 416 | session: Session = Depends(get_session), 417 | user_id: str 418 | ): 419 | 420 | return get_user_by_uuid_or_identifier(id=user_id, session=session) 421 | 422 | 423 | # --------------------- 424 | # Update a user by UUID 425 | # --------------------- 426 | @app.put("/user/{user_uuid}", response_model=UserRead) 427 | def update_user(*, user_uuid: str, user: UserUpdate): 428 | 429 | # Get user by UUID 430 | user = User.get(uuid=user_uuid) 431 | 432 | # If user exists, update it 433 | if user: 434 | user.update(**user.dict()) 435 | return user 436 | 437 | # If user doesn't exist, return 404 438 | else: 439 | raise HTTPException(status_code=404, detail=f'User {user_uuid} not found!') 440 | 441 | 442 | # ============= 443 | # LLM ENDPOINTS 444 | # ============= 445 | 446 | 447 | def process_webhook_telegram(webhook_data: dict): 448 | """ 449 | Telegram example response: 450 | { 451 | "update_id": 248146407, 452 | "message": { 453 | "message_id": 299, 454 | "from": { 455 | "id": 123456789, 456 | "is_bot": false, 457 | "first_name": "Elon", 458 | "username": "elonmusk", 459 | "language_code": "en" 460 | }, 461 | "chat": { 462 | "id": 123456789, 463 | "first_name": "Elon", 464 | "username": "elonmusk", 465 | "type": "private" 466 | }, 467 | "date": 1683115867, 468 | "text": "Tell me about the company?" 469 | } 470 | } 471 | """ 472 | message = webhook_data.get('message', None) 473 | chat = message.get('chat', None) 474 | message_from = message.get('from', None) 475 | return { 476 | 'update_id': webhook_data.get('update_id', None), 477 | 'message_id': message.get('message_id', None), 478 | 'user_id': message_from.get('id', None), 479 | 'username': message_from.get('username', None), 480 | 'user_language': message_from.get('language_code', None), 481 | 'user_firstname': chat.get('first_name', None), 482 | 'user_message': message.get('text', None), 483 | 'message_ts': datetime.fromtimestamp(message.get('date', None)) if message.get('date', None) else None, 484 | 'message_type': chat.get('type', None) 485 | } 486 | 487 | 488 | @app.post("/webhooks/{channel}/webhook") 489 | def get_webhook( 490 | *, 491 | session: Session = Depends(get_session), 492 | channel: str, 493 | webhook: WebhookCreate 494 | ): 495 | webhook_data = webhook.dict() 496 | 497 | # -------------------- 498 | # Get webhook metadata 499 | # -------------------- 500 | if channel == 'telegram': 501 | rasa_webhook_url = f'{RASA_WEBHOOK_URL}/webhooks/{channel}/webhook' 502 | data = process_webhook_telegram(webhook_data) 503 | channel = CHANNEL_TYPE.TELEGRAM.value 504 | user_data = { 505 | 'identifier': data['user_id'], 506 | 'identifier_type': channel, 507 | 'first_name': data['user_firstname'], 508 | 'language': data['user_language'] 509 | } 510 | session_metadata = { 511 | 'update_id': data['update_id'], 512 | 'username': data['username'], 513 | 'message_id': data['user_message'], 514 | 'msg_ts': data['message_ts'], 515 | 'msg_type': data['message_type'], 516 | } 517 | user_message = data['user_message'] 518 | else: 519 | # Not a valid channel, return 404 520 | raise HTTPException(status_code=404, detail=f'Channel {channel} not a valid webhook channel!') 521 | 522 | chat_session = chat_query( 523 | user_message, 524 | session=session, 525 | channel=channel, 526 | identifier=user_data['identifier'], 527 | user_data=user_data, 528 | meta=session_metadata 529 | ) 530 | 531 | meta = chat_session.meta 532 | 533 | # ----------------------------------------- 534 | # Lets add the LLM response to the metadata 535 | # ----------------------------------------- 536 | webhook_data['message']['meta'] = { 537 | 'response': chat_session.response if chat_session.response else None, 538 | 'tags': meta['tags'] if 'tags' in meta else None, 539 | 'is_escalate': meta['is_escalate'] if 'is_escalate' in meta else False, 540 | 'session_id': meta['session_id'] if 'session_id' in meta else None 541 | 542 | } 543 | 544 | # ----------------------------------- 545 | # Forward the webhook to Rasa webhook 546 | # ----------------------------------- 547 | res = requests.post(rasa_webhook_url, data=json.dumps(webhook_data)) 548 | logger.debug(f'[🤖 RasaGPT API webhook]\nPosting data: {json.dumps(webhook_data)}\n\n[🤖 RasaGPT API webhook]\nRasa webhook response: {res.text}') 549 | 550 | return {'status': 'ok'} 551 | 552 | 553 | # ------------------ 554 | # Customize API docs 555 | # ------------------ 556 | _schema = get_openapi( 557 | title=APP_NAME, 558 | description=APP_DESCRIPTION, 559 | version=APP_VERSION, 560 | routes=app.routes, 561 | ) 562 | _schema['info']['x-logo'] = { 563 | 'url': '/static/img/rasagpt-logo-1.png' 564 | } 565 | app.openapi_schema = _schema -------------------------------------------------------------------------------- /app/api/models.py: -------------------------------------------------------------------------------- 1 | from sqlalchemy.dialects.postgresql import JSONB 2 | from sqlalchemy.orm import declared_attr 3 | from pgvector.sqlalchemy import Vector 4 | from sqlalchemy import Column 5 | from datetime import datetime 6 | from util import snake_case 7 | import uuid as uuid_pkg 8 | 9 | from sqlmodel import ( 10 | UniqueConstraint, 11 | create_engine, 12 | Relationship, 13 | SQLModel, 14 | Session, 15 | select, 16 | Field, 17 | ) 18 | from typing import ( 19 | Optional, 20 | Union, 21 | List, 22 | Dict, 23 | Any 24 | ) 25 | from config import ( 26 | LLM_DEFAULT_DISTANCE_STRATEGY, 27 | VECTOR_EMBEDDINGS_COUNT, 28 | LLM_MAX_OUTPUT_TOKENS, 29 | DISTANCE_STRATEGIES, 30 | LLM_MIN_NODE_LIMIT, 31 | PGVECTOR_ADD_INDEX, 32 | ENTITY_STATUS, 33 | CHANNEL_TYPE, 34 | LLM_MODELS, 35 | DB_USER, 36 | SU_DSN, 37 | logger, 38 | ) 39 | 40 | 41 | # ========== 42 | # Base model 43 | # ========== 44 | class BaseModel(SQLModel): 45 | @declared_attr 46 | def __tablename__(cls) -> str: 47 | return snake_case(cls.__name__) 48 | 49 | @classmethod 50 | def by_uuid(self, _uuid: uuid_pkg.UUID): 51 | with Session(get_engine()) as session: 52 | q = select(self).where(self.uuid == _uuid) 53 | org = session.exec(q).first() 54 | return org if org else None 55 | 56 | def update(self, o: Union[SQLModel, dict] = None): 57 | if not o: 58 | raise ValueError("Must provide a model or dict to update values") 59 | o = o if isinstance(o, dict) else o.dict(exclude_unset=True) 60 | for key, value in o.items(): 61 | setattr(self, key, value) 62 | 63 | # save and commit to database 64 | with Session(get_engine()) as session: 65 | session.add(self) 66 | session.commit() 67 | session.refresh(self) 68 | 69 | def delete(self): 70 | with Session(get_engine()) as session: 71 | self.status = ENTITY_STATUS.DELETED 72 | self.updated_at = datetime.utcnow() 73 | session.add(self) 74 | session.commit() 75 | session.refresh(self) 76 | 77 | @classmethod 78 | def create(self, o: Union[SQLModel, dict] = None): 79 | if not o: 80 | raise ValueError("Must provide a model or dict to update values") 81 | 82 | with Session(get_engine()) as session: 83 | obj = self.from_orm(o) if isinstance(o, SQLModel) else self(**o) 84 | session.add(obj) 85 | session.commit() 86 | session.refresh(obj) 87 | 88 | return obj 89 | 90 | 91 | # ============ 92 | # Organization 93 | # ============ 94 | class Organization(BaseModel, table=True): 95 | id: Optional[int] = Field(default=None, primary_key=True) 96 | uuid: Optional[uuid_pkg.UUID] = Field( 97 | unique=True, default_factory=uuid_pkg.uuid4 98 | ) # UUID for the organization 99 | display_name: Optional[str] = Field( 100 | default="Untitled Organization 😊", index=True 101 | ) # display name of the organization 102 | namespace: str = Field( 103 | unique=True, index=True 104 | ) # unique organization namespace for URLs, etc. 105 | bot_url: Optional[str] = Field(default=None) # URL for the bot 106 | status: Optional[ENTITY_STATUS] = Field(default=ENTITY_STATUS.ACTIVE.value) 107 | created_at: Optional[datetime] = Field(default_factory=datetime.now) 108 | updated_at: Optional[datetime] = Field(default_factory=datetime.now) 109 | 110 | # ------------- 111 | # Relationships 112 | # ------------- 113 | projects: Optional[List["Project"]] = Relationship(back_populates="organization") 114 | documents: Optional[List["Document"]] = Relationship(back_populates="organization") 115 | 116 | @property 117 | def project_count(self) -> int: 118 | return len(self.projects) 119 | 120 | @property 121 | def document_count(self) -> int: 122 | return len(self.documents) 123 | 124 | def __repr__(self): 125 | return f"" 126 | 127 | 128 | class OrganizationCreate(SQLModel): 129 | display_name: Optional[str] 130 | namespace: Optional[str] 131 | bot_url: Optional[str] 132 | 133 | 134 | class OrganizationRead(SQLModel): 135 | id: int 136 | uuid: uuid_pkg.UUID 137 | display_name: str 138 | namespace: Optional[str] 139 | bot_url: Optional[str] 140 | created_at: datetime 141 | updated_at: datetime 142 | 143 | 144 | class OrganizationUpdate(SQLModel): 145 | display_name: Optional[str] 146 | namespace: Optional[str] 147 | bot_url: Optional[str] 148 | 149 | 150 | # =============== 151 | # User (customer) 152 | # =============== 153 | class User(BaseModel, table=True): 154 | id: Optional[int] = Field(default=None, primary_key=True) 155 | identifier: str = Field(default=None, unique=True, index=True) 156 | identifier_type: Optional[CHANNEL_TYPE] = Field(default=None) 157 | uuid: Optional[uuid_pkg.UUID] = Field(unique=True, default_factory=uuid_pkg.uuid4) 158 | first_name: Optional[str] = Field(default=None) 159 | last_name: Optional[str] = Field(default=None) 160 | email: Optional[str] = Field(default=None) 161 | phone: Optional[str] = Field(default=None) 162 | dob: Optional[datetime] = Field(default=None) 163 | device_fingerprint: Optional[str] = Field(default=None) 164 | created_at: Optional[datetime] = Field(default_factory=datetime.now) 165 | updated_at: Optional[datetime] = Field(default_factory=datetime.now) 166 | 167 | # ------------- 168 | # Relationships 169 | # ------------- 170 | chat_sessions: Optional[List["ChatSession"]] = Relationship(back_populates="user") 171 | 172 | @property 173 | def chat_session_count(self) -> int: 174 | return len(self.chat_sessions) 175 | 176 | __table_args__ = ( 177 | UniqueConstraint("identifier", "identifier_type", name="unq_id_idtype"), 178 | ) 179 | 180 | def __repr__(self): 181 | return f"" 182 | 183 | 184 | class UserCreate(SQLModel): 185 | identifier: str 186 | identifier_type: CHANNEL_TYPE 187 | device_fingerprint: Optional[str] 188 | first_name: Optional[str] 189 | last_name: Optional[str] 190 | email: Optional[str] 191 | phone: Optional[str] 192 | dob: Optional[datetime] 193 | 194 | 195 | class UserReadList(SQLModel): 196 | id: int 197 | identifier: Optional[str] 198 | identifier_type: Optional[CHANNEL_TYPE] 199 | uuid: uuid_pkg.UUID 200 | device_fingerprint: Optional[str] 201 | first_name: Optional[str] 202 | last_name: Optional[str] 203 | email: Optional[str] 204 | phone: Optional[str] 205 | dob: Optional[datetime] 206 | chat_session_count: int 207 | created_at: datetime 208 | updated_at: datetime 209 | 210 | 211 | class UserUpdate(SQLModel): 212 | device_fingerprint: Optional[str] 213 | device_fingerprint: Optional[str] 214 | first_name: Optional[str] 215 | last_name: Optional[str] 216 | email: Optional[str] 217 | phone: Optional[str] 218 | dob: Optional[datetime] 219 | 220 | 221 | # ======= 222 | # Project 223 | # ======= 224 | class Project(BaseModel, table=True): 225 | id: Optional[int] = Field(default=None, primary_key=True) 226 | uuid: Optional[uuid_pkg.UUID] = Field(unique=True, default_factory=uuid_pkg.uuid4) 227 | organization_id: int = Field(default=None, foreign_key="organization.id") 228 | display_name: str = Field(default="📝 Untitled Project") 229 | status: Optional[ENTITY_STATUS] = Field(default=ENTITY_STATUS.ACTIVE.value) 230 | created_at: Optional[datetime] = Field(default_factory=datetime.now) 231 | updated_at: Optional[datetime] = Field(default_factory=datetime.now) 232 | 233 | # ------------- 234 | # Relationships 235 | # ------------- 236 | organization: Optional["Organization"] = Relationship(back_populates="projects") 237 | documents: Optional[List["Document"]] = Relationship(back_populates="project") 238 | chat_sessions: Optional[List["ChatSession"]] = Relationship( 239 | back_populates="project" 240 | ) 241 | 242 | @property 243 | def document_count(self) -> int: 244 | return len(self.documents) 245 | 246 | def __repr__(self): 247 | return f"" 248 | 249 | 250 | class ProjectCreate(SQLModel): 251 | display_name: Optional[str] 252 | 253 | 254 | class ProjectReadListOrganization(SQLModel): 255 | uuid: uuid_pkg.UUID 256 | display_name: str 257 | namespace: Optional[str] 258 | document_count: int 259 | 260 | 261 | class ProjectUpdate(SQLModel): 262 | display_name: Optional[str] 263 | status: Optional[ENTITY_STATUS] 264 | 265 | 266 | # ========= 267 | # Documents 268 | # ========= 269 | class Document(BaseModel, table=True): 270 | id: Optional[int] = Field(default=None, primary_key=True) 271 | uuid: Optional[uuid_pkg.UUID] = Field(unique=True, default_factory=uuid_pkg.uuid4) 272 | organization_id: int = Field(default=None, foreign_key="organization.id") 273 | project_id: int = Field(default=None, foreign_key="project.id") 274 | display_name: str = Field(default="Untitled Document 😊") 275 | url: str = Field(default="") 276 | data: Optional[bytes] = Field(default=None) 277 | hash: str = Field(default=None) 278 | version: Optional[int] = Field(default=1) 279 | status: Optional[ENTITY_STATUS] = Field(default=ENTITY_STATUS.ACTIVE.value) 280 | created_at: Optional[datetime] = Field(default_factory=datetime.now) 281 | updated_at: Optional[datetime] = Field(default_factory=datetime.now) 282 | 283 | # ------------- 284 | # Relationships 285 | # ------------- 286 | nodes: Optional[List["Node"]] = Relationship(back_populates="document") 287 | organization: Optional["Organization"] = Relationship(back_populates="documents") 288 | project: Optional["Project"] = Relationship(back_populates="documents") 289 | 290 | @property 291 | def node_count(self) -> int: 292 | return len(self.nodes) 293 | 294 | __table_args__ = (UniqueConstraint("uuid", "hash", name="unq_org_document"),) 295 | 296 | def __repr__(self): 297 | return f"" 298 | 299 | 300 | class ProjectRead(SQLModel): 301 | id: int 302 | uuid: uuid_pkg.UUID 303 | organization: Organization 304 | document_count: int 305 | documents: Optional[List[Document]] = None 306 | display_name: str 307 | created_at: datetime 308 | updated_at: datetime 309 | 310 | 311 | class DocumentCreate(SQLModel): 312 | project: Project 313 | display_name: Optional[str] 314 | url: Optional[str] 315 | version: Optional[str] 316 | data: Optional[bytes] 317 | hash: Optional[str] 318 | 319 | 320 | class DocumentUpdate(SQLModel): 321 | status: Optional[ENTITY_STATUS] 322 | 323 | 324 | # ============== 325 | # Document Nodes 326 | # ============== 327 | class Node(BaseModel, table=True): 328 | class Config: 329 | arbitrary_types_allowed = True 330 | 331 | id: Optional[int] = Field(default=None, primary_key=True) 332 | document_id: int = Field(default=None, foreign_key="document.id") 333 | uuid: Optional[uuid_pkg.UUID] = Field(unique=True, default_factory=uuid_pkg.uuid4) 334 | embeddings: Optional[List[float]] = Field( 335 | sa_column=Column(Vector(VECTOR_EMBEDDINGS_COUNT)) 336 | ) 337 | meta: Optional[Dict] = Field(default=None, sa_column=Column(JSONB)) 338 | token_count: Optional[int] = Field(default=None) 339 | text: str = Field(default=None, nullable=False) 340 | status: Optional[ENTITY_STATUS] = Field(default=ENTITY_STATUS.ACTIVE.value) 341 | created_at: Optional[datetime] = Field(default_factory=datetime.now) 342 | updated_at: Optional[datetime] = Field(default_factory=datetime.now) 343 | 344 | # ------------- 345 | # Relationships 346 | # ------------- 347 | document: Optional["Document"] = Relationship(back_populates="nodes") 348 | 349 | def __repr__(self): 350 | return f"" 351 | 352 | 353 | class NodeCreate(SQLModel): 354 | document: Document 355 | embeddings: List[float] 356 | token_count: Optional[int] 357 | text: str 358 | status: Optional[ENTITY_STATUS] 359 | 360 | 361 | class NodeRead(SQLModel): 362 | id: int 363 | document: Document 364 | embeddings: Optional[List[float]] 365 | token_count: Optional[int] 366 | text: str 367 | created_at: datetime 368 | 369 | 370 | class DocumentReadNodeList(SQLModel): 371 | id: int 372 | uuid: uuid_pkg.UUID 373 | display_name: str 374 | node_count: int 375 | 376 | 377 | class NodeReadResult(SQLModel): 378 | id: int 379 | token_count: Optional[int] 380 | text: str 381 | meta: Optional[Dict] 382 | 383 | 384 | class ProjectReadListDocumentList(SQLModel): 385 | uuid: uuid_pkg.UUID 386 | display_name: str 387 | node_count: Optional[int] 388 | 389 | 390 | class ProjectReadList(SQLModel): 391 | id: int 392 | # organization: ProjectReadListOrganization 393 | documents: Optional[List[DocumentReadNodeList]] 394 | document_count: int 395 | uuid: uuid_pkg.UUID 396 | display_name: str 397 | created_at: datetime 398 | updated_at: datetime 399 | 400 | 401 | class NodeReadList(SQLModel): 402 | id: int 403 | document: DocumentReadNodeList 404 | embeddings: Optional[List[float]] 405 | token_count: Optional[int] 406 | text: str 407 | created_at: datetime 408 | 409 | 410 | class NodeUpdate(SQLModel): 411 | status: Optional[ENTITY_STATUS] = Field(default=ENTITY_STATUS.ACTIVE.value) 412 | 413 | 414 | class NodeReadListDocumentRead(SQLModel): 415 | uuid: uuid_pkg.UUID 416 | token_count: Optional[int] 417 | created_at: datetime 418 | 419 | 420 | class DocumentReadList(SQLModel): 421 | id: int 422 | uuid: uuid_pkg.UUID 423 | display_name: str 424 | version: int 425 | nodes: Optional[List[NodeReadListDocumentRead]] = None 426 | node_count: int 427 | hash: str 428 | created_at: datetime 429 | updated_at: datetime 430 | 431 | 432 | # ============ 433 | # Chat Session 434 | # ============ 435 | class ChatSession(BaseModel, table=True): 436 | class Config: 437 | arbitrary_types_allowed = True 438 | 439 | id: Optional[int] = Field(default=None, primary_key=True) 440 | session_id: Optional[uuid_pkg.UUID] = Field( 441 | index=True, default_factory=uuid_pkg.uuid4 442 | ) 443 | user_id: int = Field(default=None, foreign_key="user.id") 444 | project_id: int = Field(default=None, foreign_key="project.id") 445 | channel: CHANNEL_TYPE = Field(default=CHANNEL_TYPE.TELEGRAM) 446 | user_message: str = Field(default=None) 447 | token_count: Optional[int] = Field(default=None) 448 | embeddings: Optional[List[float]] = Field( 449 | sa_column=Column(Vector(VECTOR_EMBEDDINGS_COUNT)) 450 | ) 451 | response: Optional[str] = Field(default=None) 452 | meta: Optional[Dict] = Field(default=None, sa_column=Column(JSONB)) 453 | created_at: datetime = Field(default_factory=datetime.now) 454 | 455 | # ------------- 456 | # Relationships 457 | # ------------- 458 | user: Optional["User"] = Relationship(back_populates="chat_sessions") 459 | project: Optional["Project"] = Relationship(back_populates="chat_sessions") 460 | 461 | def __repr__(self): 462 | return f"" 463 | 464 | 465 | class ChatSessionCreatePost(SQLModel): 466 | project_id: Optional[str] = "" 467 | organization_id: Optional[str] = "pepe" 468 | channel: Optional[CHANNEL_TYPE] = CHANNEL_TYPE.TELEGRAM 469 | query: Optional[str] = "What is the weather like in London right now?" 470 | identifier: Optional[str] = "@username" 471 | distance_strategy: Optional[str] = LLM_DEFAULT_DISTANCE_STRATEGY 472 | max_output_tokens: Optional[int] = LLM_MAX_OUTPUT_TOKENS 473 | node_limit: Optional[int] = LLM_MIN_NODE_LIMIT 474 | model: Optional[str] = LLM_MODELS.GPT_35_TURBO 475 | session_id: Optional[str] = "" 476 | 477 | 478 | class ChatSessionCreate(SQLModel): 479 | channel: CHANNEL_TYPE 480 | token_count: Optional[int] 481 | user_message: str 482 | embeddings: List[float] 483 | response: Optional[str] 484 | 485 | 486 | class ChatSessionRead(SQLModel): 487 | id: int 488 | user: User 489 | project: Optional[ProjectReadListDocumentList] 490 | token_count: Optional[int] 491 | channel: CHANNEL_TYPE 492 | user_message: str 493 | embeddings: List[float] 494 | response: Optional[str] 495 | meta: Optional[dict] 496 | created_at: datetime = Field(default_factory=datetime.now) 497 | 498 | 499 | class ChatSessionResponse(SQLModel): 500 | meta: Optional[dict] 501 | response: Optional[str] 502 | user_message: Optional[str] 503 | 504 | 505 | class ProjectReadChatSessionRead(SQLModel): 506 | id: int 507 | token_count: Optional[int] 508 | channel: CHANNEL_TYPE 509 | created_at: datetime = Field(default_factory=datetime.now) 510 | 511 | 512 | class ChatSessionReadUserRead(SQLModel): 513 | id: int 514 | project: Optional[ProjectReadListDocumentList] 515 | token_count: Optional[int] 516 | channel: CHANNEL_TYPE 517 | user_message: str 518 | response: Optional[str] 519 | created_at: datetime = Field(default_factory=datetime.now) 520 | 521 | 522 | class UserRead(SQLModel): 523 | id: int 524 | identifier: Optional[str] 525 | identifier_type: Optional[CHANNEL_TYPE] 526 | uuid: uuid_pkg.UUID 527 | language: Optional[str] 528 | device_fingerprint: Optional[str] 529 | first_name: Optional[str] 530 | last_name: Optional[str] 531 | email: Optional[str] 532 | phone: Optional[str] 533 | dob: Optional[datetime] 534 | chat_session_count: int 535 | chat_sessions: Optional[List[ChatSessionReadUserRead]] 536 | created_at: datetime 537 | updated_at: datetime 538 | 539 | 540 | class DocumentReadProjectRead(SQLModel): 541 | uuid: uuid_pkg.UUID 542 | display_name: str 543 | namespace: Optional[str] 544 | document_count: int 545 | 546 | 547 | class DocumentRead(SQLModel): 548 | id: int 549 | uuid: uuid_pkg.UUID 550 | project: DocumentReadProjectRead 551 | organization: OrganizationRead 552 | display_name: str 553 | node_count: int 554 | url: Optional[str] 555 | version: int 556 | data: bytes 557 | hash: str 558 | created_at: datetime 559 | updated_at: datetime 560 | 561 | 562 | class WebhookCreate(SQLModel): 563 | update_id: str 564 | message: Dict[str, Any] 565 | 566 | 567 | class WebhookResponse(SQLModel): 568 | update_id: str 569 | message: Dict[str, Any] 570 | 571 | 572 | # ================== 573 | # Database functions 574 | # ================== 575 | def get_engine(dsn: str = SU_DSN): 576 | return create_engine(dsn) 577 | 578 | 579 | def get_session(): 580 | with Session(get_engine()) as session: 581 | yield session 582 | 583 | 584 | def create_db(): 585 | logger.info("...Enabling pgvector and creating database tables") 586 | enable_vector() 587 | BaseModel.metadata.create_all(get_engine(dsn=SU_DSN)) 588 | create_user_permissions() 589 | create_vector_index() 590 | 591 | 592 | def create_user_permissions(): 593 | session = Session(get_engine(dsn=SU_DSN)) 594 | # grant access to entire database and all tables to user DB_USER 595 | query = f"GRANT SELECT, INSERT, UPDATE, DELETE ON ALL TABLES IN SCHEMA public TO {DB_USER};" 596 | session.execute(query) 597 | session.commit() 598 | session.close() 599 | 600 | 601 | def drop_db(): 602 | BaseModel.metadata.drop_all(get_engine(dsn=SU_DSN)) 603 | 604 | 605 | def create_vector_index(): 606 | # ------------------------------------- 607 | # Let's add an index for the embeddings 608 | # ------------------------------------- 609 | if PGVECTOR_ADD_INDEX is True: 610 | session = Session(get_engine(dsn=SU_DSN)) 611 | for strategy in DISTANCE_STRATEGIES: 612 | session.execute(strategy[3]) 613 | session.commit() 614 | 615 | 616 | def enable_vector(): 617 | session = Session(get_engine(dsn=SU_DSN)) 618 | query = "CREATE EXTENSION IF NOT EXISTS vector;" 619 | session.execute(query) 620 | session.commit() 621 | add_vector_distance_fn(session) 622 | session.close() 623 | 624 | 625 | def add_vector_distance_fn(session: Session): 626 | for strategy in DISTANCE_STRATEGIES: 627 | strategy_name = strategy[1] 628 | strategy_distance_str = strategy[2] 629 | 630 | query = f"""create or replace function match_node_{strategy_name} ( 631 | query_embeddings vector({VECTOR_EMBEDDINGS_COUNT}), 632 | match_threshold float, 633 | match_count int 634 | ) returns table ( 635 | uuid uuid, 636 | text varchar, 637 | similarity float 638 | ) 639 | language plpgsql 640 | as $$ 641 | begin 642 | return query 643 | select 644 | node.uuid, 645 | node.text, 646 | 1 - (node.embeddings {strategy_distance_str} query_embeddings) as similarity 647 | from node 648 | where 1 - (node.embeddings {strategy_distance_str} query_embeddings) > match_threshold 649 | order by similarity desc 650 | limit match_count; 651 | end; 652 | $$;""" 653 | 654 | session.execute(query) 655 | session.commit() 656 | session.close() 657 | 658 | 659 | if __name__ == "__main__": 660 | create_db() 661 | -------------------------------------------------------------------------------- /app/api/requirements.txt: -------------------------------------------------------------------------------- 1 | langchain==0.0.157 2 | uvicorn[standard] 3 | python-multipart 4 | psycopg2-binary 5 | python-dotenv 6 | fastapi[all] 7 | SQLAlchemy 8 | pgvector 9 | tiktoken 10 | aiofiles 11 | sqlmodel 12 | openai 13 | -------------------------------------------------------------------------------- /app/api/seed.py: -------------------------------------------------------------------------------- 1 | ''' 2 | seed.py users the models in model.py and populates the database with dummy content 3 | ''' 4 | 5 | # ---------------- 6 | # Database imports 7 | # ---------------- 8 | from helpers import ( 9 | create_org_by_org_or_uuid, 10 | create_project_by_org, 11 | create_document_by_file_path 12 | ) 13 | from config import ( 14 | FILE_UPLOAD_PATH, 15 | logger 16 | ) 17 | from util import ( 18 | get_file_hash 19 | ) 20 | import os 21 | 22 | # -------------------- 23 | # Create organizations 24 | # -------------------- 25 | 26 | organizations = [ 27 | { 28 | 'display_name': 'Pepe Corp.', 29 | 'namespace': 'pepe', 30 | 'projects': [ 31 | { 32 | 'display_name': 'Pepetamine', 33 | 'docs': [ 34 | 'project-pepetamine.md' 35 | ] 36 | }, 37 | { 38 | 'display_name': 'Frogonil', 39 | 'docs': [ 40 | 'project-frogonil.md' 41 | ] 42 | }, 43 | { 44 | 'display_name': 'Kekzal', 45 | 'docs': [ 46 | 'project-kekzal.md' 47 | ] 48 | }, 49 | { 50 | 'display_name': 'Memetrex', 51 | 'docs': [ 52 | 'project-memetrex.md' 53 | ] 54 | }, 55 | { 56 | 'display_name': 'PepeTrak', 57 | 'docs': [ 58 | 'project-pepetrak.md' 59 | ] 60 | }, 61 | { 62 | 'display_name': 'MemeGen', 63 | 'docs': [ 64 | 'project-memegen.md' 65 | ] 66 | }, 67 | { 68 | 'display_name': 'Neuro-kek', 69 | 'docs': [ 70 | 'project-neurokek.md' 71 | ] 72 | }, 73 | { 74 | 'display_name': 'Pepe Corp. (company)', 75 | 'docs': [ 76 | 'org-about_the_company.md', 77 | 'org-board_of_directors.md', 78 | 'org-company_story.md', 79 | 'org-corporate_philosophy.md', 80 | 'org-customer_support.md', 81 | 'org-earnings_fy2023.md', 82 | 'org-management_team.md' 83 | ] 84 | } 85 | ] 86 | }, 87 | { 88 | 'display_name': 'Umbrella Corp', 89 | 'namespace': 'acme', 90 | 'projects': [ 91 | {'display_name': 'T-Virus'}, 92 | {'display_name': 'G-Virus'}, 93 | {'display_name': 'Umbrella Corp. (company)'} 94 | ] 95 | }, 96 | { 97 | 'display_name': 'Cyberdine Systems', 98 | 'namespace': 'cyberdine', 99 | 'projects': [ 100 | {'display_name': 'Skynet'}, 101 | {'display_name': 'Cyberdine Systems (company)'} 102 | ] 103 | }, 104 | { 105 | 'display_name': 'Bluth Companies', 106 | 'namespace': 'bluth', 107 | 'projects': [ 108 | {'display_name': 'Bluth Company (company)'} 109 | ] 110 | }, 111 | { 112 | 'display_name': 'Evil Corp', 113 | 'namespace': 'evil', 114 | 'projects': [ 115 | {'display_name': 'E-Coin'}, 116 | {'display_name': 'E-Corp Power'}, 117 | {'display_name': 'Bank of E Network'}, 118 | {'display_name': 'E Corp Labs'}, 119 | {'display_name': 'Evil Corp (company)'} 120 | ] 121 | } 122 | ] 123 | 124 | training_data_path = os.path.join(os.path.dirname(__file__), f'{FILE_UPLOAD_PATH}/training_data') 125 | 126 | for org in organizations: 127 | 128 | org_obj = create_org_by_org_or_uuid( 129 | display_name=org['display_name'], 130 | namespace=org['namespace'] 131 | ) 132 | logger.debug(f'🏠 Created organization: {org_obj.display_name}') 133 | 134 | if 'projects' not in org: 135 | continue 136 | 137 | for project in org['projects']: 138 | project['organization'] = org_obj 139 | 140 | project_obj = create_project_by_org( 141 | organization_id=org_obj, 142 | display_name=project['display_name'] 143 | ) 144 | logger.debug(f'🗂️ Created project: {project_obj.display_name}') 145 | 146 | project_uuid = str(project_obj.uuid) 147 | org_uuid = str(org_obj.uuid) 148 | 149 | # if the directory does not exist, create it 150 | if not os.path.exists(os.path.join(FILE_UPLOAD_PATH, org_uuid, project_uuid)): 151 | os.mkdir(os.path.join(FILE_UPLOAD_PATH, org_uuid, project_uuid)) 152 | 153 | if 'docs' not in project: 154 | continue 155 | 156 | for doc in project['docs']: 157 | file_path = os.path.join(training_data_path, doc) 158 | 159 | # check if file exists 160 | if os.path.isfile(file_path): 161 | file_hash = get_file_hash(file_path) 162 | create_document_by_file_path( 163 | organization=org_obj, 164 | project=project_obj, 165 | file_path=file_path, 166 | file_hash=file_hash 167 | ) 168 | logger.info(f' ✅ Created document: {doc}') 169 | else: 170 | logger.error(f' ❌ Document not found: {doc}') -------------------------------------------------------------------------------- /app/api/static/img/rasagpt-icon-200x200.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/paulpierre/RasaGPT/2f1051031a3883397879ae113ac9dd35b4c8d0cf/app/api/static/img/rasagpt-icon-200x200.png -------------------------------------------------------------------------------- /app/api/static/img/rasagpt-logo-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/paulpierre/RasaGPT/2f1051031a3883397879ae113ac9dd35b4c8d0cf/app/api/static/img/rasagpt-logo-1.png -------------------------------------------------------------------------------- /app/api/static/img/rasagpt-logo-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/paulpierre/RasaGPT/2f1051031a3883397879ae113ac9dd35b4c8d0cf/app/api/static/img/rasagpt-logo-2.png -------------------------------------------------------------------------------- /app/api/util.py: -------------------------------------------------------------------------------- 1 | from fastapi import UploadFile 2 | from functools import partial 3 | from hashlib import sha256 4 | from uuid import UUID 5 | import aiofiles 6 | import json 7 | import re 8 | from config import ( 9 | logger 10 | ) 11 | 12 | _snake_1 = partial(re.compile(r'(.)((? str: 20 | return _snake_2(_snake_1(string)).casefold() 21 | 22 | 23 | # ------------------------------ 24 | # Check if string is UUID format 25 | # ------------------------------ 26 | def is_uuid(uuid: str) -> bool: 27 | uuid = str(uuid) if isinstance(uuid, UUID) else uuid 28 | return re.match(r"^[0-9a-f]{8}-?[0-9a-f]{4}-?4[0-9a-f]{3}-?[89ab][0-9a-f]{3}-?[0-9a-f]{12}$", uuid) 29 | 30 | 31 | # --------------------------- 32 | # Writes a file to disk async 33 | # --------------------------- 34 | async def save_file(file: UploadFile, file_path: str): 35 | async with aiofiles.open(file_path, 'wb') as f: 36 | await f.write(await file.read()) 37 | 38 | 39 | # --------------------------- 40 | # Get SHA256 hash of contents 41 | # --------------------------- 42 | def get_sha256(contents: bytes): 43 | return sha256(contents).hexdigest() 44 | 45 | 46 | # ----------------------- 47 | # Get SHA256 hash of file 48 | # ----------------------- 49 | def get_file_hash( 50 | file_path: str, 51 | ): 52 | with open(file_path, 'rb') as f: 53 | file_hash = sha256(f.read()).hexdigest() 54 | 55 | return file_hash 56 | 57 | 58 | # ------------------- 59 | # Clean up LLM output 60 | # ------------------- 61 | def sanitize_output( 62 | str_output: str 63 | ): 64 | # Let's sanitize the JSON 65 | res = str_output.replace("\n", '') 66 | 67 | # If the first character is "?", remove it. Ran into this issue for some reason. 68 | if res[0] == '?': 69 | res = res[1:] 70 | 71 | # check if response is valid json 72 | try: 73 | json.loads(res) 74 | except json.JSONDecodeError: 75 | raise ValueError(f'LLM response is not valid JSON: {res}') 76 | 77 | if 'message' not in res or 'tags' not in res or 'is_escalate' not in res: 78 | raise ValueError(f'LLM response is missing required fields: {res}') 79 | 80 | logger.debug(f'Output: {res}') 81 | return res 82 | 83 | 84 | # ------------------ 85 | # Clean up LLM input 86 | # ------------------ 87 | def sanitize_input( 88 | str_input: str 89 | ): 90 | # Escape single quotes that cause output JSON issues 91 | str_input = str_input.replace("'", "") 92 | 93 | logger.debug(f'Input: {str_input}') 94 | return str_input 95 | 96 | -------------------------------------------------------------------------------- /app/db/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM ankane/pgvector:latest 2 | 3 | # Copy in the load-extensions script 4 | COPY ./create_db.sh /docker-entrypoint-initdb.d/ 5 | RUN chmod +x /docker-entrypoint-initdb.d/create_db.sh -------------------------------------------------------------------------------- /app/db/create_db.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # This is run one-time during the first time Postgres is initialized 4 | 5 | echo "Creating database ${DB_NAME} and user..." 6 | 7 | # Create the user and database "api" 8 | 9 | psql -v ON_ERROR_STOP=1 --username "$POSTGRES_USER" <<-EOSQL 10 | CREATE USER ${DB_USER} WITH ENCRYPTED PASSWORD '${DB_PASSWORD}'; 11 | CREATE DATABASE ${DB_NAME} OWNER ${DB_USER}; 12 | GRANT ALL PRIVILEGES ON DATABASE ${DB_NAME} TO ${DB_USER}; 13 | EOSQL -------------------------------------------------------------------------------- /app/rasa-credentials/.dockerignore: -------------------------------------------------------------------------------- 1 | __pycache__ 2 | *.pyc 3 | *.pyo 4 | *.pyd -------------------------------------------------------------------------------- /app/rasa-credentials/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM python:3.8-slim 2 | 3 | WORKDIR /app/rasa-credentials 4 | COPY . . 5 | 6 | # Add CURL 7 | RUN apt-get -y update 8 | RUN apt-get -y install curl 9 | 10 | # Install python dependencies 11 | RUN pip install --no-cache-dir -r requirements.txt 12 | 13 | # Expose the port 14 | EXPOSE 8889 15 | 16 | ENTRYPOINT ["uvicorn", "main:app", "--host", "rasa-credentials", "--port", "8889", "--reload"] -------------------------------------------------------------------------------- /app/rasa-credentials/main.py: -------------------------------------------------------------------------------- 1 | from fastapi import ( 2 | HTTPException, 3 | FastAPI, 4 | Depends, 5 | ) 6 | import requests 7 | import logging 8 | import asyncio 9 | import httpx 10 | import yaml 11 | import sys 12 | import os 13 | 14 | # --------- 15 | # Constants 16 | # --------- 17 | CREDENTIALS_READY = False 18 | RETRY_LIMIT = 10 19 | RETRY_INTERVAL = 15 20 | 21 | # ---------------- 22 | # Environment vars 23 | # ---------------- 24 | NGROK_HOST = os.getenv("NGROK_HOST", "ngrok") 25 | NGROK_PORT = os.getenv("NGROK_PORT", 4040) 26 | NGROK_INTERNAL_WEBHOOK_HOST = os.getenv("NGROK_INTERNAL_WEBHOOK_HOST", "rasa-core") 27 | NGROK_INTERNAL_WEBHOOK_PORT = os.getenv("NGROK_INTERNAL_WEBHOOK_PORT", 5005) 28 | NGROK_API_URL = f"http://{NGROK_HOST}:{NGROK_PORT}" 29 | TELEGRAM_ACCESS_TOKEN = os.getenv("TELEGRAM_ACCESS_TOKEN", None) 30 | TELEGRAM_BOTNAME = os.getenv("TELEGRAM_BOTNAME", None) 31 | CREDENTIALS_PATH = os.getenv("CREDENTIALS_PATH", "/app/rasa/credentials.yml") 32 | 33 | # ------- 34 | # Logging 35 | # ------- 36 | logging.basicConfig(stream=sys.stdout, level=logging.DEBUG) 37 | logger = logging.getLogger(__name__) 38 | logger.debug( 39 | f"NGROK_HOST: {NGROK_HOST}:{NGROK_PORT}\nNGROK_API_URL: {NGROK_API_URL}\nNGROK_INTERNAL_WEBHOOK_HOST: {NGROK_INTERNAL_WEBHOOK_HOST}:{NGROK_INTERNAL_WEBHOOK_PORT}" 40 | ) 41 | 42 | 43 | # --------------------------------- 44 | # Wait for ngrok API to come online 45 | # --------------------------------- 46 | async def wait_for_ngrok_api(): 47 | 48 | while True: 49 | try: 50 | async with httpx.AsyncClient() as client: 51 | response = await client.get(f"{NGROK_API_URL}/api/tunnels") 52 | response.raise_for_status() 53 | logger.debug("ngrok API is online.") 54 | return True 55 | except httpx.RequestError: 56 | logger.debug("ngrok API is offline. Waiting...") 57 | await asyncio.sleep(RETRY_INTERVAL) 58 | 59 | 60 | # ------------------------------------- 61 | # Fetch list of active tunnels on ngrok 62 | # ------------------------------------- 63 | async def get_active_tunnels(): 64 | try: 65 | response = requests.get(f"{NGROK_API_URL}/api/tunnels") 66 | response.raise_for_status() 67 | tunnels = response.json()["tunnels"] 68 | except requests.exceptions.HTTPError: 69 | tunnels = [] 70 | return tunnels 71 | 72 | 73 | # ----------------- 74 | # Stop ngrok tunnel 75 | # ----------------- 76 | async def stop_tunnel(tunnel): 77 | tunnel_id = tunnel["name"] 78 | response = requests.delete(f"{NGROK_API_URL}/api/tunnels/{tunnel_id}") 79 | response.raise_for_status() 80 | 81 | 82 | # ---------------------- 83 | # Stop all ngrok tunnels 84 | # ---------------------- 85 | async def stop_all_tunnels(): 86 | active_tunnels = await get_active_tunnels() 87 | if not active_tunnels: 88 | logger.debug("No active tunnels found.") 89 | else: 90 | for tunnel in active_tunnels: 91 | logger.debug(f"Stopping tunnel: {tunnel['name']} ({tunnel['public_url']})") 92 | await stop_tunnel(tunnel) 93 | 94 | 95 | # ------------------------------------- 96 | # Get the first ngrok tunnel w/ retries 97 | # ------------------------------------- 98 | async def get_tunnel(retry=0): 99 | if retry > RETRY_LIMIT: 100 | raise Exception( 101 | f"Could not create ngrok tunnel. Exceed retry limit of {RETRY_LIMIT} attempts." 102 | ) 103 | 104 | active_tunnels = await get_active_tunnels() 105 | if len(active_tunnels) == 0: 106 | logger.debug(f"No active tunnels found. Trying again in {RETRY_INTERVAL}s..") 107 | await asyncio.sleep(RETRY_INTERVAL) 108 | retry += 1 109 | return await get_tunnel(retry=retry) 110 | else: 111 | return active_tunnels[0]["public_url"] 112 | 113 | 114 | # ------------------- 115 | # Create ngrok tunnel 116 | # ------------------- 117 | async def create_tunnel(): 118 | response = requests.post( 119 | f"{NGROK_API_URL}/api/tunnels", 120 | json={ 121 | "addr": f"{NGROK_INTERNAL_WEBHOOK_HOST}:{NGROK_INTERNAL_WEBHOOK_PORT}", 122 | "proto": "http", 123 | "name": NGROK_INTERNAL_WEBHOOK_HOST, 124 | }, 125 | ) 126 | try: 127 | response.raise_for_status() 128 | return response.json()["public_url"] 129 | except requests.exceptions.HTTPError as e: 130 | logger.warning(f"Error creating ngrok tunnel: {e}") 131 | return False 132 | 133 | 134 | # ---------------------------- 135 | # Update Rasa credentials file 136 | # ---------------------------- 137 | async def update_credentials_file(ngrok_url): 138 | global CREDENTIALS_READY 139 | try: 140 | with open(CREDENTIALS_PATH, "r") as file: 141 | credentials = yaml.safe_load(file) 142 | 143 | credentials["custom_telegram.CustomTelegramInput"][ 144 | "webhook_url" 145 | ] = f"{ngrok_url}/webhooks/telegram/webhook" 146 | credentials["custom_telegram.CustomTelegramInput"][ 147 | "access_token" 148 | ] = TELEGRAM_ACCESS_TOKEN 149 | credentials["custom_telegram.CustomTelegramInput"]["verify"] = TELEGRAM_BOTNAME 150 | 151 | with open(CREDENTIALS_PATH, "w") as file: 152 | yaml.safe_dump(credentials, file) 153 | 154 | CREDENTIALS_READY = True 155 | except Exception as e: 156 | logger.warning(f"Error updating {CREDENTIALS_PATH}: {e}") 157 | sys.exit(1) 158 | 159 | 160 | # ----------------- 161 | # FastAPI endpoints 162 | # ----------------- 163 | 164 | app = FastAPI() 165 | 166 | 167 | # ------------- 168 | # Startup event 169 | # ------------- 170 | @app.on_event("startup") 171 | async def startup_event(): 172 | env = os.getenv("ENV", None) 173 | if env and env.lower() in ["dev", "development", "local"]: 174 | await wait_for_ngrok_api() 175 | url = await get_tunnel() 176 | if not url: 177 | logger.debug("No active tunnels found. Creating one...") 178 | url = await create_tunnel() 179 | logger.debug(f"Tunnel url: {url}") 180 | await update_credentials_file(url) 181 | else: 182 | logger.debug("Not in dev environment. Skipping.") 183 | 184 | 185 | # --------------------- 186 | # Endpoint dependencies 187 | # --------------------- 188 | async def check_endpoint_availability(): 189 | if not CREDENTIALS_READY: 190 | raise HTTPException(status_code=403, detail="Endpoint not available yet") 191 | return True 192 | 193 | 194 | # --------------------- 195 | # Health check endpoint 196 | # --------------------- 197 | # This endpoint is used by docker-compose to check if the 198 | # container is ready. If it is ready, Rasa core can start 199 | @app.get("/", dependencies=[Depends(check_endpoint_availability)]) 200 | async def health_check(): 201 | return {"status": "ok"} 202 | -------------------------------------------------------------------------------- /app/rasa-credentials/requirements.txt: -------------------------------------------------------------------------------- 1 | python-dotenv 2 | rasa-sdk 3 | requests 4 | pyyaml 5 | uvicorn[standard] 6 | fastapi[all] 7 | httpx 8 | asyncio -------------------------------------------------------------------------------- /app/rasa/.dockerignore: -------------------------------------------------------------------------------- 1 | __pycache__ 2 | *.pyc 3 | *.pyo 4 | *.pyd -------------------------------------------------------------------------------- /app/rasa/actions/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM khalosa/rasa-aarch64:3.5.2 2 | 3 | COPY . /app 4 | 5 | WORKDIR /app 6 | RUN pip install python-dotenv rasa-sdk requests 7 | EXPOSE 5055 8 | 9 | CMD ["run", "actions", "--debug"] -------------------------------------------------------------------------------- /app/rasa/actions/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/paulpierre/RasaGPT/2f1051031a3883397879ae113ac9dd35b4c8d0cf/app/rasa/actions/__init__.py -------------------------------------------------------------------------------- /app/rasa/actions/actions.py: -------------------------------------------------------------------------------- 1 | from rasa_sdk.executor import CollectingDispatcher 2 | from typing import Any, Text, Dict, List 3 | from rasa_sdk import Action, Tracker 4 | from dotenv import load_dotenv 5 | from logging import getLogger 6 | from enum import IntEnum 7 | import os 8 | 9 | logger = getLogger(__name__) 10 | 11 | env = os.getenv("ENV", "local") 12 | env_file = f".env-{env}" 13 | load_dotenv(dotenv_path=f"../../.env-{env}") 14 | 15 | 16 | MODEL_NAME = os.getenv("MODEL_NAME") 17 | CHANNEL_TYPE = IntEnum( 18 | "CHANNEL_TYPE", ["SMS", "TELEGRAM", "WHATSAPP", "EMAIL", "WEBSITE"] 19 | ) 20 | 21 | logger = getLogger(__name__) 22 | 23 | # ------------------------------------------------- 24 | # Custom Rasa action to trigger our RasaGPT LLM API 25 | # ------------------------------------------------- 26 | class ActionGPTFallback(Action): 27 | def name(self) -> str: 28 | return "action_gpt_fallback" 29 | 30 | def get_channel(self, channel: str) -> CHANNEL_TYPE: 31 | if channel == "telegram": 32 | return CHANNEL_TYPE.TELEGRAM 33 | elif channel == "whatsapp": 34 | return CHANNEL_TYPE.WHATSAPP 35 | elif channel == "sms": 36 | return CHANNEL_TYPE.SMS 37 | elif channel == "email": 38 | return CHANNEL_TYPE.EMAIL 39 | else: 40 | return CHANNEL_TYPE.WEBSITE 41 | 42 | def run( 43 | self, 44 | dispatcher: CollectingDispatcher, 45 | tracker: Tracker, 46 | domain: Dict[Text, Any], 47 | ) -> List[Dict[Text, Any]]: 48 | # ------------ 49 | # Get metadata 50 | # ------------ 51 | data = tracker.latest_message 52 | metadata = data['metadata'] if data and 'metadata' in data else None 53 | response = metadata['response'] if metadata and 'response' in metadata else None 54 | tags = metadata['tags'] if metadata and 'tags' in metadata else None 55 | is_escalate = ( 56 | metadata['is_escalate'] if metadata and 'is_escalate' in metadata else None 57 | ) 58 | 59 | # ----------------- 60 | # Escalate to human 61 | # ----------------- 62 | if is_escalate is True: 63 | response = f'{response} \n\n ⚠️💁 [ESCALATE TO HUMAN]' 64 | 65 | # ----------------------- 66 | # Labels generated by LLM 67 | # ----------------------- 68 | if tags is not None: 69 | response = f'{response} \n\n 🏷️ {",".join(tags)}' 70 | 71 | logger.debug( 72 | f"""[🤖 ActionGPTFallback] 73 | data: {data} 74 | metadata: {metadata} 75 | response: {response} 76 | tags: {tags} 77 | is_escalate: {is_escalate} 78 | """ 79 | ) 80 | dispatcher.utter_message(text=response) 81 | return [] 82 | -------------------------------------------------------------------------------- /app/rasa/config.yml: -------------------------------------------------------------------------------- 1 | recipe: default.v1 2 | assistant_id: 20230303-132941-slow-interest 3 | language: en 4 | policies: 5 | - name: RulePolicy 6 | -------------------------------------------------------------------------------- /app/rasa/credentials.yml: -------------------------------------------------------------------------------- 1 | custom_telegram.CustomTelegramInput: 2 | access_token: null 3 | verify: null 4 | webhook_url: null 5 | rasa: 6 | url: http://rasa-core:5005/webhook 7 | rest: null 8 | -------------------------------------------------------------------------------- /app/rasa/custom_telegram.py: -------------------------------------------------------------------------------- 1 | from rasa.core.channels.telegram import TelegramInput 2 | from rasa.shared.utils import common as rasa_common 3 | from typing import Any, Dict, Optional, Text 4 | from sanic.request import Request 5 | 6 | ''' 7 | The purpose of this script is to extend TelegramInput to allow for custom metadata to be passed to Rasa. 8 | ''' 9 | 10 | 11 | class CustomTelegramInput(TelegramInput): 12 | def get_metadata(self, request: Request) -> Optional[Dict[Text, Any]]: 13 | 14 | # For whatever reason, Rasa is unable to pass data via 'metadata' so 'meta' works for now 15 | metadata = request.json.get('message', {}).get('meta') 16 | 17 | # Debug 18 | rasa_common.logger.debug(f'[🤖 ActionGPTFallback]\nmetadata: {metadata}') 19 | return metadata if metadata is not None else None 20 | -------------------------------------------------------------------------------- /app/rasa/data/nlu.yml: -------------------------------------------------------------------------------- 1 | version: "3.1" 2 | 3 | nlu: 4 | - intent: greet 5 | examples: | 6 | - hey 7 | - hello 8 | - hi 9 | - hello there 10 | - good morning 11 | - good evening 12 | - moin 13 | - hey there 14 | - let's go 15 | - hey dude 16 | - goodmorning 17 | - goodevening 18 | - good afternoon 19 | 20 | - intent: out_of_scope 21 | examples: | 22 | - I want to order food 23 | - What is 2 + 2? 24 | - Who's the US President? 25 | 26 | -------------------------------------------------------------------------------- /app/rasa/data/rules.yml: -------------------------------------------------------------------------------- 1 | version: "3.1" 2 | 3 | rules: 4 | 5 | - rule: out-of-scope 6 | steps: 7 | - intent: out_of_scope 8 | - action: action_gpt_fallback 9 | -------------------------------------------------------------------------------- /app/rasa/data/stories.yml: -------------------------------------------------------------------------------- 1 | version: "3.1" 2 | 3 | stories: 4 | 5 | - story: happy path 6 | steps: 7 | - intent: greet 8 | - action: utter_greet 9 | 10 | -------------------------------------------------------------------------------- /app/rasa/domain.yml: -------------------------------------------------------------------------------- 1 | version: "3.1" 2 | 3 | 4 | slots: 5 | results: 6 | type: text 7 | mappings: 8 | - type: custom 9 | initial_value: 10 | action: action_gpt_fallback 11 | 12 | intents: 13 | - greet 14 | - out_of_scope 15 | 16 | responses: 17 | utter_greet: 18 | - text: "Hey! How are you?" 19 | 20 | actions: 21 | - action_gpt_fallback 22 | 23 | 24 | session_config: 25 | session_expiration_time: 60 26 | carry_over_slots_to_new_session: true 27 | -------------------------------------------------------------------------------- /app/rasa/endpoints.yml: -------------------------------------------------------------------------------- 1 | # This file contains the different endpoints your bot can use. 2 | 3 | # Server where the models are pulled from. 4 | # https://rasa.com/docs/rasa/model-storage#fetching-models-from-a-server 5 | 6 | #models: 7 | # url: http://my-server.com/models/default_core@latest 8 | # wait_time_between_pulls: 10 # [optional](default: 100) 9 | 10 | # Server which runs your custom actions. 11 | # https://rasa.com/docs/rasa/custom-actions 12 | 13 | action_endpoint: 14 | url: "http://rasa-actions:5055/webhook" 15 | 16 | # Tracker store which is used to store the conversations. 17 | # By default the conversations are stored in memory. 18 | # https://rasa.com/docs/rasa/tracker-stores 19 | 20 | #tracker_store: 21 | # type: redis 22 | # url: 23 | # port: 24 | # db: 25 | # password: 26 | # use_ssl: 27 | 28 | #tracker_store: 29 | # type: mongod 30 | # url: 31 | # db: 32 | # username: 33 | # password: 34 | 35 | # Event broker which all conversation events should be streamed to. 36 | # https://rasa.com/docs/rasa/event-brokers 37 | 38 | #event_broker: 39 | # url: localhost 40 | # username: username 41 | # password: password 42 | # queue: queue 43 | -------------------------------------------------------------------------------- /app/rasa/ngrok.yml: -------------------------------------------------------------------------------- 1 | region: us 2 | version: 2 3 | log_level: debug 4 | web_addr: ngrok:4040 5 | console_ui: true 6 | tunnels: 7 | chat_api: 8 | proto: http 9 | addr: api:8888 10 | connect_timeout: 30s 11 | heartbeat_interval: 1m 12 | heartbeat_tolerance: 5s -------------------------------------------------------------------------------- /app/rasa/requirements.txt: -------------------------------------------------------------------------------- 1 | python-dotenv 2 | rasa-sdk 3 | requests 4 | pyyaml -------------------------------------------------------------------------------- /app/rasa/rules.yml: -------------------------------------------------------------------------------- 1 | rules: 2 | 3 | # - rule: Fallback rule 4 | # condition: 5 | # - active_loop: null 6 | # steps: 7 | # - intent: nlu_fallback 8 | # - action: action_gpt_fallback -------------------------------------------------------------------------------- /app/scripts/wait-for-it.sh: -------------------------------------------------------------------------------- 1 | #!/usr/bin/env bash 2 | # Use this script to test if a given TCP host/port are available 3 | 4 | # -------------- 5 | # wait-for-it.sh 6 | # -------------- 7 | # Original author @vishubob on GH https://raw.githubusercontent.com/vishnubob/wait-for-it/master/wait-for-it.sh 8 | # Modified for HTTP OK 200 support by @paulpierre 9 | 10 | 11 | WAITFORIT_cmdname=${0##*/} 12 | 13 | echoerr() { if [[ $WAITFORIT_QUIET -ne 1 ]]; then echo "$@" 1>&2; fi } 14 | 15 | usage() 16 | { 17 | cat << USAGE >&2 18 | Usage: 19 | $WAITFORIT_cmdname host:port [-s] [-t timeout] [-- command args] 20 | -h HOST | --host=HOST Host or IP under test 21 | -p PORT | --port=PORT TCP port under test 22 | Alternatively, you specify the host and port as host:port 23 | -s | --strict Only execute subcommand if the test succeeds 24 | -q | --quiet Don't output any status messages 25 | -t TIMEOUT | --timeout=TIMEOUT 26 | Timeout in seconds, zero for no timeout 27 | -- COMMAND ARGS Execute command with args after the test finishes 28 | USAGE 29 | exit 1 30 | } 31 | wait_for() 32 | { 33 | local proto="tcp" 34 | if [[ $WAITFORIT_HTTP_OK -eq 1 ]]; then 35 | proto="http" 36 | fi 37 | 38 | if [[ $WAITFORIT_TIMEOUT -gt 0 ]]; then 39 | echoerr "$WAITFORIT_cmdname: waiting $WAITFORIT_TIMEOUT seconds for $WAITFORIT_HOST:$WAITFORIT_PORT to be available ($proto)" 40 | else 41 | echoerr "$WAITFORIT_cmdname: waiting for $WAITFORIT_HOST:$WAITFORIT_PORT to be available ($proto) without a timeout" 42 | fi 43 | 44 | WAITFORIT_start_ts=$(date +%s) 45 | while : 46 | do 47 | if [[ $WAITFORIT_HTTP_OK -eq 1 ]]; then 48 | response=$(echo -e "HEAD / HTTP/1.1\r\nHost: $WAITFORIT_HOST:$WAITFORIT_PORT\r\nConnection: close\r\n\r\n" | nc -w 5 $WAITFORIT_HOST $WAITFORIT_PORT | head -n1) 49 | if [[ $response =~ "200 OK" ]]; then 50 | WAITFORIT_result=0 51 | else 52 | WAITFORIT_result=1 53 | fi 54 | else 55 | (echo -n > /dev/$proto/$WAITFORIT_HOST/$WAITFORIT_PORT) >/dev/null 2>&1 56 | WAITFORIT_result=$? 57 | fi 58 | 59 | if [[ $WAITFORIT_result -eq 0 ]]; then 60 | WAITFORIT_end_ts=$(date +%s) 61 | echoerr "$WAITFORIT_cmdname: $WAITFORIT_HOST:$WAITFORIT_PORT is available after $((WAITFORIT_end_ts - WAITFORIT_start_ts)) seconds" 62 | break 63 | fi 64 | sleep 1 65 | done 66 | return $WAITFORIT_result 67 | } 68 | 69 | wait_for_wrapper() 70 | { 71 | # In order to support SIGINT during timeout: http://unix.stackexchange.com/a/57692 72 | if [[ $WAITFORIT_QUIET -eq 1 ]]; then 73 | timeout $WAITFORIT_BUSYTIMEFLAG $WAITFORIT_TIMEOUT $0 --quiet --child --host=$WAITFORIT_HOST --port=$WAITFORIT_PORT --timeout=$WAITFORIT_TIMEOUT & 74 | else 75 | timeout $WAITFORIT_BUSYTIMEFLAG $WAITFORIT_TIMEOUT $0 --child --host=$WAITFORIT_HOST --port=$WAITFORIT_PORT --timeout=$WAITFORIT_TIMEOUT & 76 | fi 77 | WAITFORIT_PID=$! 78 | trap "kill -INT -$WAITFORIT_PID" INT 79 | wait $WAITFORIT_PID 80 | WAITFORIT_RESULT=$? 81 | if [[ $WAITFORIT_RESULT -ne 0 ]]; then 82 | echoerr "$WAITFORIT_cmdname: timeout occurred after waiting $WAITFORIT_TIMEOUT seconds for $WAITFORIT_HOST:$WAITFORIT_PORT" 83 | fi 84 | return $WAITFORIT_RESULT 85 | } 86 | 87 | # process arguments 88 | while [[ $# -gt 0 ]] 89 | do 90 | case "$1" in 91 | *:* ) 92 | WAITFORIT_hostport=(${1//:/ }) 93 | WAITFORIT_HOST=${WAITFORIT_hostport[0]} 94 | WAITFORIT_PORT=${WAITFORIT_hostport[1]} 95 | shift 1 96 | ;; 97 | -o | --http-ok) 98 | WAITFORIT_HTTP_OK=1 99 | shift 1 100 | ;; 101 | --child) 102 | WAITFORIT_CHILD=1 103 | shift 1 104 | ;; 105 | -q | --quiet) 106 | WAITFORIT_QUIET=1 107 | shift 1 108 | ;; 109 | -s | --strict) 110 | WAITFORIT_STRICT=1 111 | shift 1 112 | ;; 113 | -h) 114 | WAITFORIT_HOST="$2" 115 | if [[ $WAITFORIT_HOST == "" ]]; then break; fi 116 | shift 2 117 | ;; 118 | --host=*) 119 | WAITFORIT_HOST="${1#*=}" 120 | shift 1 121 | ;; 122 | -p) 123 | WAITFORIT_PORT="$2" 124 | if [[ $WAITFORIT_PORT == "" ]]; then break; fi 125 | shift 2 126 | ;; 127 | --port=*) 128 | WAITFORIT_PORT="${1#*=}" 129 | shift 1 130 | ;; 131 | -t) 132 | WAITFORIT_TIMEOUT="$2" 133 | if [[ $WAITFORIT_TIMEOUT == "" ]]; then break; fi 134 | shift 2 135 | ;; 136 | --timeout=*) 137 | WAITFORIT_TIMEOUT="${1#*=}" 138 | shift 1 139 | ;; 140 | --) 141 | shift 142 | WAITFORIT_CLI=("$@") 143 | break 144 | ;; 145 | --help) 146 | usage 147 | ;; 148 | *) 149 | echoerr "Unknown argument: $1" 150 | usage 151 | ;; 152 | esac 153 | done 154 | 155 | if [[ "$WAITFORIT_HOST" == "" || "$WAITFORIT_PORT" == "" ]]; then 156 | echoerr "Error: you need to provide a host and port to test." 157 | usage 158 | fi 159 | 160 | WAITFORIT_TIMEOUT=${WAITFORIT_TIMEOUT:-15} 161 | WAITFORIT_STRICT=${WAITFORIT_STRICT:-0} 162 | WAITFORIT_CHILD=${WAITFORIT_CHILD:-0} 163 | WAITFORIT_QUIET=${WAITFORIT_QUIET:-0} 164 | WAITFORIT_HTTP_OK=${WAITFORIT_HTTP_OK:-0} 165 | 166 | 167 | # Check to see if timeout is from busybox? 168 | WAITFORIT_TIMEOUT_PATH=$(type -p timeout) 169 | WAITFORIT_TIMEOUT_PATH=$(realpath $WAITFORIT_TIMEOUT_PATH 2>/dev/null || readlink -f $WAITFORIT_TIMEOUT_PATH) 170 | 171 | WAITFORIT_BUSYTIMEFLAG="" 172 | if [[ $WAITFORIT_TIMEOUT_PATH =~ "busybox" ]]; then 173 | WAITFORIT_ISBUSY=1 174 | # Check if busybox timeout uses -t flag 175 | # (recent Alpine versions don't support -t anymore) 176 | if timeout &>/dev/stdout | grep -q -e '-t '; then 177 | WAITFORIT_BUSYTIMEFLAG="-t" 178 | fi 179 | else 180 | WAITFORIT_ISBUSY=0 181 | fi 182 | 183 | if [[ $WAITFORIT_CHILD -gt 0 ]]; then 184 | wait_for 185 | WAITFORIT_RESULT=$? 186 | exit $WAITFORIT_RESULT 187 | else 188 | if [[ $WAITFORIT_TIMEOUT -gt 0 ]]; then 189 | wait_for_wrapper 190 | WAITFORIT_RESULT=$? 191 | else 192 | wait_for 193 | WAITFORIT_RESULT=$? 194 | fi 195 | fi 196 | 197 | if [[ $WAITFORIT_CLI != "" ]]; then 198 | if [[ $WAITFORIT_RESULT -ne 0 && $WAITFORIT_STRICT -eq 1 ]]; then 199 | echoerr "$WAITFORIT_cmdname: strict mode, refusing to execute subprocess" 200 | exit $WAITFORIT_RESULT 201 | fi 202 | exec "${WAITFORIT_CLI[@]}" 203 | else 204 | exit $WAITFORIT_RESULT 205 | fi -------------------------------------------------------------------------------- /docker-compose.yml: -------------------------------------------------------------------------------- 1 | # ------------------------------------- 2 | # ▒█▀▀█ █▀▀█ █▀▀ █▀▀█ ▒█▀▀█ ▒█▀▀█ ▀▀█▀▀ 3 | # ▒█▄▄▀ █▄▄█ ▀▀█ █▄▄█ ▒█░▄▄ ▒█▄▄█ ░▒█░░ 4 | # ▒█░▒█ ▀░░▀ ▀▀▀ ▀░░▀ ▒█▄▄█ ▒█░░░ ░▒█░░ 5 | # +-----------------------------------+ 6 | # | http://RasaGPT.dev by @paulpierre | 7 | # +-----------------------------------+ 8 | 9 | version: '3.9' 10 | 11 | services: 12 | 13 | # ------------------- 14 | # API service for LLM 15 | # ------------------- 16 | api: 17 | build: 18 | context: ./app/api 19 | restart: always 20 | container_name: chat_api 21 | env_file: 22 | - .env 23 | ports: 24 | - 8888:8888 25 | healthcheck: 26 | test: ["CMD", "curl", "-f", "http://api:8888/health"] 27 | interval: 15s 28 | retries: 5 29 | depends_on: 30 | - db 31 | networks: 32 | - chat-network 33 | volumes: 34 | - ./app/scripts/wait-for-it.sh:/app/api/wait-for-it.sh 35 | - ./app/api:/app/api 36 | 37 | 38 | # ------------------- 39 | # Ngrok agent service 40 | # ------------------- 41 | ngrok: 42 | image: ngrok/ngrok:latest 43 | container_name: chat_ngrok 44 | ports: 45 | - 4040:4040 46 | env_file: 47 | - .env 48 | environment: 49 | NGROK_CONFIG: /etc/ngrok.yml 50 | NGROK_AUTH_TOKEN: ${NGROK_AUTH_TOKEN:-} 51 | NGROK_DEBUG: ${NGROK_DEBUG:-true} 52 | NGROK_API_KEY: ${NGROK_API_KEY:-} 53 | networks: 54 | - chat-network 55 | volumes: 56 | - ./app/rasa/ngrok.yml:/etc/ngrok.yml 57 | restart: unless-stopped 58 | 59 | 60 | # ----------------- 61 | # Core Rasa service 62 | # ----------------- 63 | rasa-core: 64 | image: khalosa/rasa-aarch64:3.5.2 65 | container_name: chat_rasa_core 66 | env_file: 67 | - .env 68 | volumes: 69 | - ./app/rasa:/app 70 | - ./app/scripts/wait-for-it.sh:/app/wait-for-it.sh 71 | 72 | ports: 73 | - 5005:5005 74 | entrypoint: ["/bin/bash", "-c", "chmod +x /app/wait-for-it.sh && /app/wait-for-it.sh rasa-credentials:8889 -t 120 -o && rasa run --enable-api --cors '*' --debug --credentials /app/credentials.yml --endpoints /app/endpoints.yml --model /app/models"] 75 | networks: 76 | - chat-network 77 | depends_on: 78 | - rasa-actions 79 | - rasa-credentials 80 | 81 | 82 | # -------------------- 83 | # Rasa actions service 84 | # -------------------- 85 | rasa-actions: 86 | build: 87 | context: ./app/rasa 88 | dockerfile: ./actions/Dockerfile 89 | container_name: chat_rasa_actions 90 | env_file: 91 | - .env 92 | ports: 93 | - 5055:5055 94 | depends_on: 95 | - rasa-credentials 96 | networks: 97 | - chat-network 98 | 99 | 100 | # ------------------------------- 101 | # Rasa credentials helper service 102 | # ------------------------------- 103 | rasa-credentials: 104 | build: 105 | context: ./app/rasa-credentials 106 | dockerfile: Dockerfile 107 | container_name: chat_rasa_credentials 108 | volumes: 109 | - ./app/rasa:/app/rasa 110 | - ./app/rasa-credentials:/app/rasa-credentials 111 | ports: 112 | - 8889:8889 113 | env_file: 114 | - .env 115 | networks: 116 | - chat-network 117 | healthcheck: 118 | test: ["CMD", "curl", "-f", "http://rasa-credentials:8889"] 119 | interval: 15s 120 | retries: 5 121 | 122 | 123 | # ------------------------- 124 | # Postgres database service 125 | # ------------------------- 126 | db: 127 | build: 128 | context: ./app/db 129 | container_name: chat_db 130 | env_file: 131 | - .env 132 | environment: 133 | POSTGRES_USER: ${POSTGRES_USER:-postgres} 134 | POSTGRES_PASSWORD: ${POSTGRES_PASSWORD:-postgres} 135 | volumes: 136 | - ./mnt/db:/var/lib/postgresql/data 137 | ports: 138 | - "5432:5432" 139 | healthcheck: 140 | test: ["CMD", "pg_isready", "-U", "postgres"] 141 | interval: 5s 142 | retries: 5 143 | networks: 144 | - chat-network 145 | 146 | 147 | # -------------------------------- 148 | # PgAdmin database browser service 149 | # -------------------------------- 150 | pgadmin: 151 | container_name: chat_pgadmin 152 | image: dpage/pgadmin4 153 | environment: 154 | PGADMIN_DEFAULT_EMAIL: ${PGADMIN_DEFAULT_EMAIL:-pgadmin4@pgadmin.org} 155 | PGADMIN_DEFAULT_PASSWORD: ${PGADMIN_DEFAULT_PASSWORD:-admin} 156 | PGADMIN_CONFIG_SERVER_MODE: 'False' 157 | volumes: 158 | - ./mnt/pgadmin:/var/lib/pgadmin 159 | ports: 160 | - "${PGADMIN_PORT:-5050}:80" 161 | restart: unless-stopped 162 | depends_on: 163 | - db 164 | networks: 165 | - chat-network 166 | 167 | 168 | # ---------------------------- 169 | # Container log viewer service 170 | # ---------------------------- 171 | dozzle: 172 | container_name: chat_dozzle 173 | image: amir20/dozzle:latest 174 | volumes: 175 | - /var/run/docker.sock:/var/run/docker.sock 176 | ports: 177 | - 9999:8080 178 | depends_on: 179 | - db 180 | 181 | networks: 182 | chat-network: 183 | driver: bridge 184 | -------------------------------------------------------------------------------- /github/api.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/paulpierre/RasaGPT/2f1051031a3883397879ae113ac9dd35b4c8d0cf/github/api.png -------------------------------------------------------------------------------- /github/cli_install.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/paulpierre/RasaGPT/2f1051031a3883397879ae113ac9dd35b4c8d0cf/github/cli_install.png -------------------------------------------------------------------------------- /github/container_logs.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/paulpierre/RasaGPT/2f1051031a3883397879ae113ac9dd35b4c8d0cf/github/container_logs.png -------------------------------------------------------------------------------- /github/documents.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/paulpierre/RasaGPT/2f1051031a3883397879ae113ac9dd35b4c8d0cf/github/documents.png -------------------------------------------------------------------------------- /github/makefile-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/paulpierre/RasaGPT/2f1051031a3883397879ae113ac9dd35b4c8d0cf/github/makefile-1.png -------------------------------------------------------------------------------- /github/makefile-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/paulpierre/RasaGPT/2f1051031a3883397879ae113ac9dd35b4c8d0cf/github/makefile-2.png -------------------------------------------------------------------------------- /github/ngrok-admin.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/paulpierre/RasaGPT/2f1051031a3883397879ae113ac9dd35b4c8d0cf/github/ngrok-admin.png -------------------------------------------------------------------------------- /github/org-projects.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/paulpierre/RasaGPT/2f1051031a3883397879ae113ac9dd35b4c8d0cf/github/org-projects.png -------------------------------------------------------------------------------- /github/orgs.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/paulpierre/RasaGPT/2f1051031a3883397879ae113ac9dd35b4c8d0cf/github/orgs.png -------------------------------------------------------------------------------- /github/rasagpt-banner.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/paulpierre/RasaGPT/2f1051031a3883397879ae113ac9dd35b4c8d0cf/github/rasagpt-banner.png -------------------------------------------------------------------------------- /github/rasagpt-icon-200x200.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/paulpierre/RasaGPT/2f1051031a3883397879ae113ac9dd35b4c8d0cf/github/rasagpt-icon-200x200.png -------------------------------------------------------------------------------- /github/rasagpt-logo-1.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/paulpierre/RasaGPT/2f1051031a3883397879ae113ac9dd35b4c8d0cf/github/rasagpt-logo-1.png -------------------------------------------------------------------------------- /github/rasagpt-logo-2.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/paulpierre/RasaGPT/2f1051031a3883397879ae113ac9dd35b4c8d0cf/github/rasagpt-logo-2.png -------------------------------------------------------------------------------- /github/rasagpt-video-title-screen.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/paulpierre/RasaGPT/2f1051031a3883397879ae113ac9dd35b4c8d0cf/github/rasagpt-video-title-screen.png -------------------------------------------------------------------------------- /github/telegram.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/paulpierre/RasaGPT/2f1051031a3883397879ae113ac9dd35b4c8d0cf/github/telegram.png -------------------------------------------------------------------------------- /github/title.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/paulpierre/RasaGPT/2f1051031a3883397879ae113ac9dd35b4c8d0cf/github/title.png --------------------------------------------------------------------------------