├── .env-example
├── .gitignore
├── LICENSE
├── Makefile
├── README.md
├── RESULTS.md
├── app
    ├── api
    │   ├── .dockerignore
    │   ├── API.md
    │   ├── Dockerfile
    │   ├── Makefile
    │   ├── config.py
    │   ├── data
    │   │   └── training_data
    │   │   │   ├── org-about_the_company.md
    │   │   │   ├── org-board_of_directors.md
    │   │   │   ├── org-company_story.md
    │   │   │   ├── org-corporate_philosophy.md
    │   │   │   ├── org-customer_support.md
    │   │   │   ├── org-earnings_fy2023.md
    │   │   │   ├── org-management_team.md
    │   │   │   ├── project-frogonil.md
    │   │   │   ├── project-kekzal.md
    │   │   │   ├── project-memegen.md
    │   │   │   ├── project-memetrex.md
    │   │   │   ├── project-neurokek.md
    │   │   │   ├── project-pepetamine.md
    │   │   │   └── project-pepetrak.md
    │   ├── helpers.py
    │   ├── llm.py
    │   ├── main.py
    │   ├── models.py
    │   ├── requirements.txt
    │   ├── seed.py
    │   ├── static
    │   │   └── img
    │   │   │   ├── rasagpt-icon-200x200.png
    │   │   │   ├── rasagpt-logo-1.png
    │   │   │   └── rasagpt-logo-2.png
    │   └── util.py
    ├── db
    │   ├── Dockerfile
    │   └── create_db.sh
    ├── rasa-credentials
    │   ├── .dockerignore
    │   ├── Dockerfile
    │   ├── main.py
    │   └── requirements.txt
    ├── rasa
    │   ├── .dockerignore
    │   ├── actions
    │   │   ├── Dockerfile
    │   │   ├── __init__.py
    │   │   └── actions.py
    │   ├── config.yml
    │   ├── credentials.yml
    │   ├── custom_telegram.py
    │   ├── data
    │   │   ├── nlu.yml
    │   │   ├── rules.yml
    │   │   └── stories.yml
    │   ├── domain.yml
    │   ├── endpoints.yml
    │   ├── ngrok.yml
    │   ├── requirements.txt
    │   └── rules.yml
    └── scripts
    │   └── wait-for-it.sh
├── docker-compose.yml
└── github
    ├── api.png
    ├── cli_install.png
    ├── container_logs.png
    ├── documents.png
    ├── makefile-1.png
    ├── makefile-2.png
    ├── ngrok-admin.png
    ├── org-projects.png
    ├── orgs.png
    ├── rasagpt-banner.png
    ├── rasagpt-icon-200x200.png
    ├── rasagpt-logo-1.png
    ├── rasagpt-logo-2.png
    ├── rasagpt-video-title-screen.png
    ├── telegram.png
    └── title.png


/.env-example:
--------------------------------------------------------------------------------
 1 | ENV=local
 2 | 
 3 | FILE_UPLOAD_PATH=data
 4 | LLM_DEFAULT_TEMPERATURE=0
 5 | LLM_CHUNK_SIZE=1000
 6 | LLM_CHUNK_OVERLAP=200
 7 | LLM_DISTANCE_THRESHOLD=0.2
 8 | LLM_MAX_OUTPUT_TOKENS=256
 9 | LLM_MIN_NODE_LIMIT=3
10 | LLM_DEFAULT_DISTANCE_STRATEGY=EUCLIDEAN
11 | 
12 | POSTGRES_USER=postgres
13 | POSTGRES_PASSWORD=postgres
14 | POSTGRES_DB=postgres
15 | PGVECTOR_ADD_INDEX=true
16 | 
17 | DB_HOST=db
18 | DB_PORT=5432
19 | DB_USER=api
20 | DB_NAME=api
21 | DB_PASSWORD=<YOUR DATABASE PASSWORD>
22 | 
23 | NGROK_HOST=ngrok
24 | NGROK_PORT=4040
25 | NGROK_AUTHTOKEN=<YOUR NGROK AUTH TOKEN>
26 | NGROK_API_KEY=<YOUR NGROK API KEY>
27 | NGROK_INTERNAL_WEBHOOK_HOST=api
28 | NGROK_INTERNAL_WEBHOOK_PORT=8888
29 | NGROK_DEBUG=true
30 | NGROK_CONFIG=/etc/ngrok.yml
31 | 
32 | RASA_WEBHOOK_HOST=rasa-core
33 | RASA_WEBHOOK_PORT=5005
34 | 
35 | CREDENTIALS_PATH=/app/rasa/credentials.yml
36 | 
37 | TELEGRAM_ACCESS_TOKEN=<YOUR TELEGRAM ACCESS TOKEN>
38 | TELEGRAM_BOTNAME=rasagpt
39 | 
40 | API_PORT=8888
41 | API_HOST=api
42 | 
43 | PGADMIN_PORT=5050
44 | PGADMIN_DEFAULT_PASSWORD=pgadmin
45 | PGADMIN_DEFAULT_EMAIL=your@emailaddress.com
46 | 
47 | MODEL_NAME=gpt-3.5-turbo
48 | OPENAI_API_KEY=<YOUR OPEN AI KEY>


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | .DS_Store
 2 | .trunk
 3 | .vscode
 4 | mnt
 5 | venv/
 6 | .env
 7 | .env-dev
 8 | .env
 9 | .env-staging
10 | .env-stage
11 | .env-prod
12 | .env-production
13 | __pycache__/
14 | app/rasa/models/*
15 | app/rasa/.rasa
16 | app/rasa/.config
17 | app/rasa/.keras
18 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2023 Paul Pierre
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
  1 | .PHONY: default banner help install build run stop restart rasa-restart rasa-stop rasa-start rasa-build seed logs ngrok pgadmin api api-stop db db-stop db-purge purge models shell-api shell-db shell-rasa shell-actions rasa-train rasa-start rasa-stop env-var
  2 | 
  3 | defaut: help
  4 | 
  5 | help:
  6 | 	@make banner
  7 | 	@echo "+------------------+"
  8 | 	@echo "| 🏠 CORE COMMANDS |"
  9 | 	@echo "+------------------+"
 10 | 	@echo "make install - Install and run RasaGPT"
 11 | 	@echo "make build - Build docker images"
 12 | 	@echo "make run - Run RasaGPT"
 13 | 	@echo "make stop - Stop RasaGPT"
 14 | 	@echo "make restart - Restart RasaGPT\n"
 15 | 	@echo "+--------------------+"
 16 | 	@echo "| 🌍 ADMIN INTERACES |"
 17 | 	@echo "+--------------------+"
 18 | 	@echo "make logs - View logs via Dozzle"
 19 | 	@echo "make ngrok - View ngrok dashboard"
 20 | 	@echo "make pgadmin - View pgAdmin dashboard\n"
 21 | 	@echo "+-----------------------+"
 22 | 	@echo "| 👷 DEBUGGING COMMANDS |"
 23 | 	@echo "+-----------------------+"
 24 | 	@echo "make api - Run only API server"
 25 | 	@echo "make models - Build Rasa models"
 26 | 	@echo "make purge - Remove all docker images"
 27 | 	@echo "make db-purge - Delete all data in database"
 28 | 	@echo "make db-reset - Reset database to initial state"
 29 | 	@echo "make shell-api - Open shell in API container"
 30 | 	@echo "make shell-db - Open shell in database container"
 31 | 	@echo "make shell-rasa - Open shell in Rasa container"
 32 | 	@echo "make shell-actions - Open shell in Rasa actions container\n"
 33 | 
 34 | 
 35 | banner:
 36 | 	@echo "\n\n-------------------------------------"
 37 | 	@echo "▒█▀▀█ █▀▀█ █▀▀ █▀▀█ ▒█▀▀█ ▒█▀▀█ ▀▀█▀▀"
 38 | 	@echo "▒█▄▄▀ █▄▄█ ▀▀█ █▄▄█ ▒█░▄▄ ▒█▄▄█ ░▒█░░"
 39 | 	@echo "▒█░▒█ ▀░░▀ ▀▀▀ ▀░░▀ ▒█▄▄█ ▒█░░░ ░▒█░░"
 40 | 	@echo "+-----------------------------------+"
 41 | 	@echo "| http://RasaGPT.dev by @paulpierre |"
 42 | 	@echo "+-----------------------------------+\n\n"
 43 | 
 44 | 
 45 | 
 46 | # ==========================
 47 | # 👷 INITIALIZATION COMMANDS
 48 | # ==========================
 49 | 
 50 | # ---------------------------------------
 51 | # Run this first to setup the environment
 52 | # ---------------------------------------
 53 | install:
 54 | 	@make banner
 55 | 	@make stop
 56 | 	@make env-var
 57 | 	@make rasa-train
 58 | 	@make build
 59 | 	@make run
 60 | 	@make models
 61 | 	@make rasa-restart
 62 | 	@make seed
 63 | 	@echo "✅ RasaGPT installed and running"
 64 | 
 65 | # -----------------------
 66 | # Build the docker images
 67 | # -----------------------
 68 | build:
 69 | 	@echo "🏗️  Building docker images ..\n"
 70 | 	@docker-compose -f docker-compose.yml build
 71 | 
 72 | 
 73 | # ================
 74 | # 🏠 CORE COMMANDS
 75 | # ================
 76 | 
 77 | # ---------------------------
 78 | # Startup all docker services
 79 | # ---------------------------
 80 | 
 81 | run:
 82 | 	@echo "🚀  Starting docker-compose.yml ..\n"
 83 | 	@docker-compose -f docker-compose.yml up -d
 84 | 
 85 | # ---------------------------
 86 | # Stop all running containers
 87 | # ---------------------------
 88 | 
 89 | stop:
 90 | 	@echo "🔍  Stopping any running containers .. \n"
 91 | 	@docker-compose -f docker-compose.yml down
 92 | 
 93 | # ----------------------
 94 | # Restart all containers
 95 | # ----------------------
 96 | restart:
 97 | 	@echo "🔁  Restarting docker services ..\n"
 98 | 	@make stop
 99 | 	@make run
100 | 
101 | # ----------------------
102 | # Restart Rasa core only
103 | # ----------------------
104 | rasa-restart:
105 | 	@echo "🤖  Restarting Rasa so it grabs credentials ..\n"
106 | 	@make rasa-stop
107 | 	@make rasa-start
108 | 
109 | rasa-stop:
110 | 	@echo "🤖  Stopping Rasa ..\n"
111 | 	@docker-compose -f docker-compose.yml stop rasa-core
112 | 
113 | rasa-start:
114 | 	@echo "🤖  Starting Rasa ..\n"
115 | 	@docker-compose -f docker-compose.yml up -d rasa-core
116 | 
117 | rasa-build:
118 | 	@echo "🤖  Building Rasa ..\n"
119 | 	@docker-compose -f docker-compose.yml build rasa-core
120 | 
121 | # -----------------------
122 | # Seed database with data
123 | # -----------------------
124 | seed:
125 | 	@echo "🌱 Seeding database ..\n"
126 | 	@docker-compose -f docker-compose.yml exec api /app/api/wait-for-it.sh db:5432 --timeout=60 -- python3 seed.py
127 | 
128 | 
129 | # =======================
130 | # 🌍 WEB ADMIN INTERFACES
131 | # =======================
132 | 
133 | # -------------------------
134 | # Reverse HTTP tunnel admin
135 | # -------------------------
136 | ngrok:
137 | 	@echo "📡  Opening ngrok agent in the browser ..\n"
138 | 	@open http://localhost:4040
139 | 
140 | # ------------------------
141 | # Postgres admin interface
142 | # ------------------------
143 | pgadmin:
144 | 	@echo "👷‍♂️  Opening PG Admin in the browser ..\n"
145 | 	@open http://localhost:5050
146 | 
147 | # ------------------------
148 | # Container logs interface
149 | # ------------------------
150 | logs:
151 | 	@echo "🔍  Opening container logs in the browser ..\n"
152 | 	@open http://localhost:9999/
153 | 
154 | # =====================
155 | # 👷 DEBUGGING COMMANDS
156 | # =====================
157 | 
158 | # ---------------------------
159 | # Startup just the API server
160 | # ---------------------------
161 | api:
162 | 	@make db
163 | 	@echo "🚀  Starting FastAPI and postgres ..\n"
164 | 	@docker-compose -f docker-compose.yml up -d api
165 | 
166 | # ------------------------
167 | # Startup just Postgres DB
168 | # ------------------------
169 | db:
170 | 	@echo "🚀  Starting Postgres with pgvector ..\n"
171 | 	@docker-compose -f docker-compose.yml up -d db
172 | 
173 | 
174 | db-stop:
175 | 	@echo " Stopping the database ..\n"
176 | 	@docker-compose -f docker-compose.yml down db
177 | 
178 | 
179 | db-reset:
180 | 	@echo "⛔  Are you sure you want to reinitialize the database, you will lose all data? [y/N]\n"
181 | 	@read confirmation; \
182 | 	if [ "$$confirmation" = "y" ] || [ "$$confirmation" = "Y" ]; then \
183 | 		make db-purge \
184 | 		make api \
185 | 		make models \
186 | 		echo "✅ Database re-initialize"; \
187 | 	else \
188 | 		echo "Aborted."; \
189 | 	fi
190 | 
191 | 	@echo " Resetting the database ..\n"
192 | 	
193 | 
194 | # -------------------------------
195 | # Build the schema in Postgres DB
196 | # -------------------------------
197 | models:
198 | 	@echo "💽  Building models in Postgres ..\n"
199 | 	@docker-compose -f docker-compose.yml exec api /app/api/wait-for-it.sh db:5432 --timeout=60 -- python3 models.py
200 | 
201 | # -------------------------------
202 | # Delete containers or bad images
203 | # -------------------------------
204 | purge:
205 | 	@echo "🧹  Purging all containers and images ..\n"
206 | 	@make stop
207 | 	@docker system prune -a
208 | 	@make install
209 | 
210 | # --------------------------------
211 | # Delete the database mount volume
212 | # --------------------------------
213 | db-purge:
214 | 	@echo "⛔  Are you sure you want to delete all data in the database? [y/N]\n"
215 | 	@read confirmation; \
216 | 	if [ "$$confirmation" = "y" ] || [ "$$confirmation" = "Y" ]; then \
217 | 		echo "Deleting generated files .."; \
218 | 		make stop; \
219 | 		rm -rf ./mnt; \
220 | 		echo "Deleted."; \
221 | 	else \
222 | 		echo "Aborted."; \
223 | 	fi
224 | 
225 | # --------------------------------------
226 | # Open a bash shell in the API container
227 | # --------------------------------------
228 | shell-api:
229 | 	@echo "💻🐢  Opening a bash shell in the RasaGPT API container ..\n"
230 | 	@if docker ps | grep chat_api > /dev/null; then \
231 | 		docker exec -it $$(docker ps | grep chat_api | tr -d '\n' | awk '{print $$1}') /bin/bash; \
232 | 	else \
233 | 		echo "Container chat_api is not running"; \
234 | 	fi
235 | 
236 | # ---------------------------------------
237 | # Open a bash shell in the Rasa container
238 | # ---------------------------------------
239 | shell-rasa:
240 | 	@echo "💻🐢  Opening a bash shell in the chat_rasa_core container ..\n"
241 | 	@if docker ps | grep chat_rasa_core > /dev/null; then \
242 | 		docker exec -it $$(docker ps | grep chat_rasa_core | tr -d '\n' | awk '{print $$1}') /bin/bash; \
243 | 	else \
244 | 		echo "Container chat_rasa_core is not running"; \
245 | 	fi
246 | 
247 | # -----------------------------------------------
248 | # Open a bash shell in the Rasa actions container
249 | # -----------------------------------------------
250 | shell-actions:
251 | 	@echo "💻🐢  Opening a bash shell in the chat_rasa_actions container ..\n"
252 | 	@if docker ps | grep chat_rasa_actions > /dev/null; then \
253 | 		docker exec -it $$(docker ps | grep chat_rasa_actions | tr -d '\n' | awk '{print $$1}') /bin/bash; \
254 | 	else \
255 | 		echo "Container chat_rasa_actions is not running"; \
256 | 	fi
257 | 
258 | # -------------------------------------------
259 | # Open a bash shell in the Postgres container
260 | # -------------------------------------------
261 | shell-db:
262 | 	@echo "💻🐢  Opening a bash shell in the Postgres container ..\n"
263 | 	@if docker ps | grep chat_db > /dev/null; then \
264 | 		docker exec -it $$(docker ps | grep chat_db | tr -d '\n' | awk '{print $$1}') /bin/bash; \
265 | 	else \
266 | 		echo "Container chat_db is not running"; \
267 | 	fi
268 | 
269 | # ==================
270 | # 💁 HELPER COMMANDS
271 | # ==================
272 | 
273 | # -------------
274 | # Check envvars
275 | # -------------
276 | env-var:
277 | 	@echo "🔍 Checking if envvars are set ..\n";
278 | 	@if ! test -e "./.env"; then \
279 | 		@echo "❌ .env file not found. Please copy .env-example to .env and update values"; \
280 | 		exit 1; \
281 |     else \
282 |         echo "✅ found .env\n"; \
283 |     fi
284 | 
285 | # -----------------
286 | # Train Rasa models
287 | # -----------------
288 | rasa-train:
289 | 	@echo "💽 Generating Rasa models ..\n"
290 | 	@make rasa-start
291 | 	@docker-compose -f docker-compose.yml exec rasa-core rasa train
292 | 	@make rasa-stop
293 | 	@echo "✅ Done\n"
294 | 


--------------------------------------------------------------------------------
/RESULTS.md:
--------------------------------------------------------------------------------
  1 | 
  2 | ![RasaGPT Logo](https://github.com/paulpierre/RasaGPT/blob/main/github/rasagpt-logo-1.png?raw=true)
  3 | 
  4 | <br/><br/>
  5 | 
  6 | # 📊 RasaGPT Query Results
  7 | 
  8 | The following are questions based on the example dummy data included in the RasaGPT repository. The response framing is great, but retrieval quality is not sufficient due to indexing. Here are some examples of Q&A hits and misses w/ some commentary.
  9 | 
 10 | <br/><br/>
 11 | 
 12 | # ☑️  Hits
 13 | 
 14 | 
 15 | 
 16 | <br/>
 17 | 
 18 | ### ✅ “Tell me about Pepe Corp”
 19 | 
 20 | ```bash
 21 | Pepe Corp is a groundbreaking parody pharmaceutical and technology company inspired by the iconic Pepe the Frog meme. Our mission is to revolutionize the world through the power of dank memes.
 22 | 🏷️  company,mission
 23 | ```
 24 | 
 25 | <br/>
 26 | 
 27 | ### ✅ “What is the weather like in London today?” (It’s expected to say I don’t know)
 28 | 
 29 | ```bash
 30 | I apologize, I'm not sure how to help with that
 31 | 🏷️  weather
 32 | ```
 33 | 
 34 | <br/>
 35 | 
 36 | ### ✅ “What is the purpose of Kekzal?”
 37 | 
 38 | ```bash
 39 | Kekzal enhances sexual function.
 40 | 🏷️  purpose
 41 | ```
 42 | 
 43 | <br/>
 44 | 
 45 | ### ✅ “What is Kekzal and what are the side effects?”
 46 | 
 47 | ```bash
 48 | Kekzal is a medication, but the document does not provide information on its side effects. It only advises to consult a memologist if side effects persist or become bothersome.
 49 | 🏷️  Kekzal,side effects
 50 | ```
 51 | 
 52 | >- It probably should not mention “the document” and this can be optimized by modifying the prompt template
 53 | 
 54 | <br/>
 55 | 
 56 | ### ✅ “What should I consider before taking Kekzal?”
 57 | 
 58 | ```bash
 59 | Before taking Kekzal, consider the following precautions: Do not use Kekzal if you have a known allergy to dank memes or an aversion to Pepe-themed roman.
 60 | 🏷️  precautions
 61 | ```
 62 | 
 63 | >- This is technically a hit, but because of the chunking settings, the original text gets cut off and says “roman” instead of “romantic..” so this is easily fixed
 64 | >- Ideally it should include “Kekzal” in the tags
 65 | 
 66 | <br/>
 67 | 
 68 | ### ✅ “What are the overdose symptoms of Kekzal?”
 69 | 
 70 | ```bash
 71 | In case of an overdose, symptoms may include: 1. An insatiable desire to create
 72 | 🏷️  overdose,symptoms
 73 | ```
 74 | 
 75 | - This is a technical hit, but again indexing cut it off and it is likely due to the configured output limits which is easily fixed
 76 | 
 77 | <br/>
 78 | 
 79 | ### ✅ “Who is the CEO?”
 80 | 
 81 | ```bash
 82 | The CEO of Pepe Corp is Amanda Gigglestein.
 83 | 🏷️  CEO
 84 | ```
 85 | 
 86 | <br/>
 87 | 
 88 | ### ✅ “What are the 2023 plans of Pepe Corp?”
 89 | 
 90 | ```bash
 91 | Pepe Corp plans to invest in research and development to create cutting-edge products that address the needs of our diverse customer base. Our R&D team will explore new areas of meme science and technology, developing products that meet the needs of our customers.
 92 | 🏷️  plans,research and development,cutting-edge products,meme science,technology,customer needs
 93 | ```
 94 | 
 95 | <br/>
 96 | 
 97 | ### ✅ “What is the revenue for Q1 FY2023?”
 98 | 
 99 | ```bash
100 | $235 million
101 | 🏷️  revenue,Q1,FY2023
102 | ```
103 | 
104 | <br/>
105 | 
106 | ### ✅ “Tell me about the early days of PepeCorp?”
107 | 
108 | ```bash
109 | In the early 21st century, a group of visionary memelords gathered in a dimly lit basement, fueled by their passion for dank memes and the iconic Pepe the Frog.
110 | 🏷️  company history,early days
111 | ```
112 | 
113 | <br/><br/>
114 | 
115 | # ❌ Misses
116 | 
117 | 
118 | ### ❌ “What are the side effects of Kekzal?”
119 | 
120 | ```bash
121 | Consult your memologist if side effects persist or become bothersome.
122 | 🏷️  Kekzal,side effects
123 | ```
124 | 
125 | >- The side effects are in the documentation but it misses it entirely. It did tag the product name unlike earlier sessions, but index optimization can fix this
126 | 
127 | <br/>
128 | 
129 | ### ❌ “Who is Dr. Olivia Dankstein?”
130 | 
131 | ```bash
132 | I apologize, I'm not sure how to help with that.
133 | 🏷️  unknown
134 | ```
135 | 
136 | >- Not good, she is in the Board of Directors document and is Vice-Chair and Chief Scientific Officer of Pepe Corp. Better indexing strategies fix this.
137 | 
138 | <br/>
139 | 
140 | 
141 | ### ❌ “What is the list of products provided by Pepe Corp?”
142 | 
143 | ```bash
144 | Pepe Corp has a diverse portfolio of products, ranging from meme-inspired pharmaceuticals to advanced brain implants and mobile applications.
145 | 🏷️  products
146 | ```
147 | 
148 | >- Although it did answer, it is insufficient. This is where a tree index with summarization would help
149 | 
150 | <br/><br/>
151 | 
152 | # 🔬 Retrieval optimizations
153 | 
154 | So how could all of this be improved by what I estimate to be more than 70%+? Here are a few solutions I intend to explore:
155 | 
156 | 
157 | - Better index-tuning at the LLM library level, specifically using llama-index instead of Langchain which specializes in retrieval
158 | - Usage of [Query Routers Abstractions](https://medium.com/@jerryjliu98/unifying-llm-powered-qa-techniques-with-routing-abstractions-438e2499a0d0) to understand which search strategy to use (one-shot vs few-shot)
159 | - Explore other indexing methods like Tree indexes, Keyword indexes
160 | - Adding chat history for immediate recall and context setting
161 | - Add a secondary adversarial agent ([Dual pattern model](https://simonwillison.net/2023/Apr/25/dual-llm-pattern/)) with the following potential functionalities:
162 |     - Determine if the question has been answered and if not, re-optimize search strategy
163 |     - Ensure prompt injection is not occurring
164 | - Increase baseline similarity search by exploring:
165 |     - Regularly generate “fake” document embeddings based on historical queries and link to actual documents via [HyDE pattern](https://wfhbrian.com/revolutionizing-search-how-hypothetical-document-embeddings-hyde-can-save-time-and-increase-productivity/)
166 |     - Regularly generate “fake” user queries based on documents and link to actual document so user input search and “fake” queries can match better


--------------------------------------------------------------------------------
/app/api/.dockerignore:
--------------------------------------------------------------------------------
1 | __pycache__
2 | *.pyc
3 | *.pyo
4 | *.pyd


--------------------------------------------------------------------------------
/app/api/API.md:
--------------------------------------------------------------------------------
  1 | 
  2 | 
  3 | # 🏠 Overview
  4 | 
  5 | 💬 RasaGPT is the first headless LLM chatbot platform built on top of Rasa and Langchain. It is boilerplate and a reference implementation of Rasa and Telegram utilizing an LLM library like langchain for indexing, retrieval and context injection.
  6 | 
  7 | <br/><br/>
  8 | 
  9 | # 💁‍♀️ Why RasaGPT?
 10 | 
 11 | RasaGPT just works out of the box. I went through all the implementation headaches so you don’t have to, including
 12 | 
 13 | - Creating your own proprietary bot end-point using FastAPI, document upload and “training” 'pipeline included
 14 | - How to integrate Langchain/LlamaIndex and Rasa
 15 | - Library conflicts with LLM libraries and passing metadata
 16 | - Dockerized support on MacOS for running Rasa
 17 | - Reverse proxy with chatbots via ngrok
 18 | - Implementing pgvector with your own custom schema instead of using Langchain’s highly opinionated PGVector class
 19 | - Adding multi-tenancy, sessions and metadata between Rasa and your own backend / application
 20 | 
 21 | The backstory is familiar. A friend came to me with a problem. I scoured Google and Github for a decent reference implementation of LLM’s integrated with Rasa but came up empty-handed. I figured this to be a great opportunity to satiate my curiosity and 2 days later I had a proof of concept, and a week later this is what I came up with.
 22 | 
 23 | <br/>
 24 | 
 25 | > ⚠️ **Caveat emptor:** 
 26 | This is far from production code and rife with prompt injection and general security vulnerabilities. I just hope someone finds this useful 😊
 27 | 
 28 | 
 29 | <br/><br/>
 30 | 
 31 | # **✨** Quick start
 32 | 
 33 | Getting started is easy, just make sure you meet the dependencies below.
 34 | 
 35 | ```bash
 36 | git clone https://github.com/paulpierre/RasaGPT.git
 37 | cd RasaGPT
 38 | cp .env-example .env
 39 | 
 40 | # Edit your .env file and add all the necessary credentials
 41 | make install
 42 | 
 43 | # Type "make" to see more options
 44 | make
 45 | ```
 46 | 
 47 | <br/><br/>
 48 | 
 49 | 
 50 | # 🔥 Features
 51 | 
 52 | ## Full Application and API
 53 | 
 54 | - LLM  “learns” on an arbitrary corpus of data using Langchain
 55 | - Upload documents and “train” all via FastAPI
 56 | - Document versioning and automatic “re-training” implemented on upload
 57 | - Customize your own async end-points
 58 | - Bot determines whether human handoff is necessary
 59 | - Bot generates tags based on user questions and response automatically
 60 | - Full API documentation via Swagger and Redoc included
 61 | - Ngrok end-points are automatically generated for you on startup so your bot can always be accessed via `https://t.me/yourbotname`
 62 | - Embedding similarity search built into Postgres via pgvector and Postgres functions
 63 | - Dummy data included for you to test and experiment
 64 | - Unlimited use cases from help desk, customer support, quiz, e-learning, dungeon and dragons,  and more
 65 | <br/><br/>
 66 | ## Rasa integration
 67 | 
 68 | - Built on top of Rasa, the open source gold-standard for chat platforms
 69 | - Supports MacOS M1/M2 via Docker (canonical Rasa image lacks MacOS arch. support)
 70 | - Supports Telegram, easily integrate Slack, Whatsapp, Line, SMS, etc.
 71 | - Setup complex dialog pipelines using NLU models form Huggingface like BERT or libraries/frameworks like Keras, Tensorflow with OpenAI GPT as fallback
 72 | <br/><br/>
 73 | ## Flexibility
 74 | 
 75 | - Extend agentic, memory, etc. capabilities with Langchain
 76 | - Schema supports multi-tenancy, sessions, data storage
 77 | - Customize agent personalities
 78 | - Saves all of chat history and creating embeddings from all interactions future-proofing your retrieval strategy
 79 | - Automatically generate embeddings from knowledge base corpus and client feedback
 80 | 
 81 | <br/><br/>
 82 | 
 83 | # 🧑‍💻 Installing
 84 | 
 85 | ## Requirements
 86 | 
 87 | - Python 3.9
 88 | - Docker & Docker compose ([Docker desktop MacOS](https://www.docker.com/products/docker-desktop/))
 89 | - Open AI [API key](https://platform.openai.com/account/api-keys)
 90 | - Telegram [bot credentials](https://core.telegram.org/bots#how-do-i-create-a-bot)
 91 | - Ngrok [auth token](https://dashboard.ngrok.com/tunnels/authtokens)
 92 | - Make ([MacOS](https://formulae.brew.sh/formula/make)/[Windows](https://stackoverflow.com/questions/32127524/how-to-install-and-use-make-in-windows))
 93 | - SQLModel
 94 | 
 95 | <br/>
 96 | 
 97 | ## Setup
 98 | 
 99 | ```bash
100 | git clone https://github.com/paulpierre/RasaGPT.git
101 | cd RasaGPT
102 | cp .env-example .env
103 | 
104 | # Edit your .env file and all the credentials
105 | 
106 | ```
107 | 
108 | <br/>
109 | 
110 | 
111 | At any point feel free to just type in `make` and it will display the list of options, mostly useful for debugging:
112 | 
113 | <br/>
114 | 
115 | 
116 | ![Makefile main](https://github.com/paulpierre/RasaGPT/blob/main/github/makefile-1.png?raw=true)
117 | 
118 | <br/>
119 | 
120 | ## Docker-compose
121 | 
122 | ```bash
123 | make install
124 | 
125 | # This will automatically install and run RasaGPT
126 | # After installation, to run again you can simply run
127 | 
128 | make run
129 | ```
130 | <br/>
131 | 
132 | ## Local Python Environment
133 | 
134 | This is useful if you wish to focus on developing on top of the API, a separate `Makefile` was made for this. This will create a local virtual environment for you.
135 | 
136 | ```bash
137 | # Assuming you are already in the RasaGPT directory
138 | cd app/api
139 | make install
140 | 
141 | # This will automatically install and run RasaGPT
142 | # After installation, to run again you can simply run
143 | 
144 | make run
145 | ```
146 | <br/>
147 | 
148 | Similarly, enter `make` to see a full list of commands
149 | 
150 | ![Makefile API](https://github.com/paulpierre/RasaGPT/blob/main/github/makefile-2.png?raw=true)
151 | 
152 | <br/>
153 | 
154 | ## Installation process
155 | 
156 | Installation should be automated should look like this:
157 | 
158 | ![Installation](https://github.com/paulpierre/RasaGPT/blob/main/github/cli_install.png?raw=true)
159 | 
160 | 👉 Full installation log: [https://app.warp.dev/block/vflua6Eue29EPk8EVvW8Kd](https://app.warp.dev/block/vflua6Eue29EPk8EVvW8Kd)
161 | 
162 | <br/>
163 | 
164 | The installation process for Docker takes the following steps at a high level
165 | 
166 | 1. Check to make sure you have `.env` available
167 | 2. Database is initialized with `pgvector`
168 | 3. Database models create the database schema
169 | 4. Trains the Rasa model so it is ready to run
170 | 5. Sets up ngrok with Rasa so Telegram has a webhook back to your API server
171 | 6. Sets up the Rasa actions server so Rasa can talk to the RasaGPT API
172 | 7. Database is populated with dummy data via `seed.py`
173 | 
174 | <br/><br/>
175 | 
176 | # ☑️ Next steps
177 | <br/>
178 | 
179 | ## 💬 Start chatting
180 | 
181 | You can start chatting with your bot by visiting https://t.me/yourbotsname
182 | 
183 | ![Telegram](https://github.com/paulpierre/RasaGPT/blob/main/github/telegram.png?raw=true)
184 | 
185 | <br/><br/>
186 | 
187 | ## 👀 View logs
188 | 
189 | You can view all of the log by visiting: [https://localhost:9999/](https://localhost:9999/) which will displaying real-time logs of all the docker containers
190 | 
191 | ![Dozzle](https://github.com/paulpierre/RasaGPT/blob/main/github/container_logs.png?raw=true)
192 | 
193 | <br/><br/>
194 | 
195 | ## 📖 API documentation
196 | 
197 | View the API endpoint docs by visiting [https://localhost:8888/docs](https://localhost:8888/docs)
198 | 
199 | ![Swagger Docs](https://github.com/paulpierre/RasaGPT/blob/main/github/api.png?raw=true)
200 | 
201 | <br/><br/>
202 | 
203 | # ✏️ Examples
204 | 
205 | The bot is just a proof-of-concept and has not been optimized for retrieval. It currently uses 1000 character length chunking for indexing and basic euclidean distance for retrieval and quality is hit or miss. 
206 | 
207 | You can view example hits and misses with the bot in the [RESULTS.MD](https://github.com/paulpierre/RasaGPT/blob/main/RESULTS.md) file. Overall I estimate index optimization and LLM configuration changes can increase output quality by more than 70%.
208 | 
209 | 👉 Click to see the Q&A results of the demo data in RESULTS.MD
210 | 
211 | <br/><br/>
212 | 
213 | # 💻 API Architecture and Usage
214 | 
215 | The REST API is straight forward, please visit the documentation 👉 http://localhost:8888/docs
216 | 
217 | The entities below have basic CRUD operations and return JSON
218 | 
219 | <br/><br/>
220 | 
221 | ## Organization
222 | 
223 | This can be thought of as a company that is your client in a SaaS / multi-tenant world. By default a list of dummy organizations have been provided
224 | 
225 | ![Screenshot 2023-05-05 at 8.45.28 AM.png](https://github.com/paulpierre/RasaGPT/blob/main/github/orgs.png?raw=true)
226 | 
227 | ```bash
228 | [
229 |   {
230 |     "id": 1,
231 |     "uuid": "d2a642e6-c81a-4a43-83e2-22cee3562452",
232 |     "display_name": "Pepe Corp.",
233 |     "namespace": "pepe",
234 |     "bot_url": null,
235 |     "created_at": "2023-05-05T10:42:45.933976",
236 |     "updated_at": "2023-05-05T10:42:45.933979"
237 |   },
238 |   {
239 |     "id": 2,
240 |     "uuid": "7d574f88-6c0b-4c1f-9368-367956b0e90f",
241 |     "display_name": "Umbrella Corp",
242 |     "namespace": "acme",
243 |     "bot_url": null,
244 |     "created_at": "2023-05-05T10:43:03.555484",
245 |     "updated_at": "2023-05-05T10:43:03.555488"
246 |   },
247 |   {
248 |     "id": 3,
249 |     "uuid": "65105a15-2ef0-4898-ac7a-8eafee0b283d",
250 |     "display_name": "Cyberdine Systems",
251 |     "namespace": "cyberdine",
252 |     "bot_url": null,
253 |     "created_at": "2023-05-05T10:43:04.175424",
254 |     "updated_at": "2023-05-05T10:43:04.175428"
255 |   },
256 |   {
257 |     "id": 4,
258 |     "uuid": "b7fb966d-7845-4581-a537-818da62645b5",
259 |     "display_name": "Bluth Companies",
260 |     "namespace": "bluth",
261 |     "bot_url": null,
262 |     "created_at": "2023-05-05T10:43:04.697801",
263 |     "updated_at": "2023-05-05T10:43:04.697804"
264 |   },
265 |   {
266 |     "id": 5,
267 |     "uuid": "9283d017-b24b-4ecd-bf35-808b45e258cf",
268 |     "display_name": "Evil Corp",
269 |     "namespace": "evil",
270 |     "bot_url": null,
271 |     "created_at": "2023-05-05T10:43:05.102546",
272 |     "updated_at": "2023-05-05T10:43:05.102549"
273 |   }
274 | ]
275 | ```
276 | 
277 | <br/>
278 | 
279 | ### Project
280 | 
281 | This can be thought of as a product that belongs to a company. You can view the list of projects that belong to an organizations like so:
282 | 
283 | ![org-projects.png](https://github.com/paulpierre/RasaGPT/blob/main/github/org-projects.png?raw=true)
284 | 
285 | ```bash
286 | [
287 |   {
288 |     "id": 1,
289 |     "documents": [
290 |       {
291 |         "id": 1,
292 |         "uuid": "92604623-e37c-4935-bf08-0e9efa8b62f7",
293 |         "display_name": "project-pepetamine.md",
294 |         "node_count": 3
295 |       }
296 |     ],
297 |     "document_count": 1,
298 |     "uuid": "44a4b60b-9280-4b21-a676-00612be9aa87",
299 |     "display_name": "Pepetamine",
300 |     "created_at": "2023-05-05T10:42:46.060930",
301 |     "updated_at": "2023-05-05T10:42:46.060934"
302 |   },
303 |   {
304 |     "id": 2,
305 |     "documents": [
306 |       {
307 |         "id": 2,
308 |         "uuid": "b408595a-3426-4011-9b9b-8e260b244f74",
309 |         "display_name": "project-frogonil.md",
310 |         "node_count": 3
311 |       }
312 |     ],
313 |     "document_count": 1,
314 |     "uuid": "5ba6b812-de37-451d-83a3-8ccccadabd69",
315 |     "display_name": "Frogonil",
316 |     "created_at": "2023-05-05T10:42:48.043936",
317 |     "updated_at": "2023-05-05T10:42:48.043940"
318 |   },
319 |   {
320 |     "id": 3,
321 |     "documents": [
322 |       {
323 |         "id": 3,
324 |         "uuid": "b99d373a-3317-4699-a89e-90897ba00db6",
325 |         "display_name": "project-kekzal.md",
326 |         "node_count": 3
327 |       }
328 |     ],
329 |     "document_count": 1,
330 |     "uuid": "1be4360c-f06e-4494-bf20-e7c73a56f003",
331 |     "display_name": "Kekzal",
332 |     "created_at": "2023-05-05T10:42:49.092675",
333 |     "updated_at": "2023-05-05T10:42:49.092678"
334 |   },
335 |   {
336 |     "id": 4,
337 |     "documents": [
338 |       {
339 |         "id": 4,
340 |         "uuid": "94da307b-5993-4ddd-a852-3d8c12f95f3f",
341 |         "display_name": "project-memetrex.md",
342 |         "node_count": 3
343 |       }
344 |     ],
345 |     "document_count": 1,
346 |     "uuid": "1fd7e772-365c-451b-a7eb-4d529b0927f0",
347 |     "display_name": "Memetrex",
348 |     "created_at": "2023-05-05T10:42:50.184817",
349 |     "updated_at": "2023-05-05T10:42:50.184821"
350 |   },
351 |   {
352 |     "id": 5,
353 |     "documents": [
354 |       {
355 |         "id": 5,
356 |         "uuid": "6deff180-3e3e-4b09-ae5a-6502d031914a",
357 |         "display_name": "project-pepetrak.md",
358 |         "node_count": 4
359 |       }
360 |     ],
361 |     "document_count": 1,
362 |     "uuid": "a389eb58-b504-48b4-9bc3-d3c93d2fbeaa",
363 |     "display_name": "PepeTrak",
364 |     "created_at": "2023-05-05T10:42:51.293352",
365 |     "updated_at": "2023-05-05T10:42:51.293355"
366 |   },
367 |   {
368 |     "id": 6,
369 |     "documents": [
370 |       {
371 |         "id": 6,
372 |         "uuid": "2e3c2155-cafa-4c6b-b7cc-02bb5156715b",
373 |         "display_name": "project-memegen.md",
374 |         "node_count": 5
375 |       }
376 |     ],
377 |     "document_count": 1,
378 |     "uuid": "cec4154f-5d73-41a5-a764-eaf62fc3db2c",
379 |     "display_name": "MemeGen",
380 |     "created_at": "2023-05-05T10:42:52.562037",
381 |     "updated_at": "2023-05-05T10:42:52.562040"
382 |   },
383 |   {
384 |     "id": 7,
385 |     "documents": [
386 |       {
387 |         "id": 7,
388 |         "uuid": "baabcb6f-e14c-4d59-a019-ce29973b9f5c",
389 |         "display_name": "project-neurokek.md",
390 |         "node_count": 5
391 |       }
392 |     ],
393 |     "document_count": 1,
394 |     "uuid": "4a1a0542-e314-4ae7-9961-720c2d092f04",
395 |     "display_name": "Neuro-kek",
396 |     "created_at": "2023-05-05T10:42:53.689537",
397 |     "updated_at": "2023-05-05T10:42:53.689539"
398 |   },
399 |   {
400 |     "id": 8,
401 |     "documents": [
402 |       {
403 |         "id": 8,
404 |         "uuid": "5be007ec-5c89-4bc4-8bfd-448a3659c03c",
405 |         "display_name": "org-about_the_company.md",
406 |         "node_count": 5
407 |       },
408 |       {
409 |         "id": 9,
410 |         "uuid": "c2b3fb39-18c0-4f3e-9c21-749b86942cba",
411 |         "display_name": "org-board_of_directors.md",
412 |         "node_count": 3
413 |       },
414 |       {
415 |         "id": 10,
416 |         "uuid": "41aa81a9-13a9-4527-a439-c2ac0215593f",
417 |         "display_name": "org-company_story.md",
418 |         "node_count": 4
419 |       },
420 |       {
421 |         "id": 11,
422 |         "uuid": "91c59eb8-8c05-4f1f-b09d-fcd9b44b5a20",
423 |         "display_name": "org-corporate_philosophy.md",
424 |         "node_count": 4
425 |       },
426 |       {
427 |         "id": 12,
428 |         "uuid": "631fc3a9-7f5f-4415-8283-78ff582be483",
429 |         "display_name": "org-customer_support.md",
430 |         "node_count": 3
431 |       },
432 |       {
433 |         "id": 13,
434 |         "uuid": "d4c3d3db-6f24-433e-b2aa-52a70a0af976",
435 |         "display_name": "org-earnings_fy2023.md",
436 |         "node_count": 5
437 |       },
438 |       {
439 |         "id": 14,
440 |         "uuid": "08dd478b-414b-46c4-95c0-4d96e2089e90",
441 |         "display_name": "org-management_team.md",
442 |         "node_count": 3
443 |       }
444 |     ],
445 |     "document_count": 7,
446 |     "uuid": "1d2849b4-2715-4dcf-aa68-090a221942ba",
447 |     "display_name": "Pepe Corp. (company)",
448 |     "created_at": "2023-05-05T10:42:55.258902",
449 |     "updated_at": "2023-05-05T10:42:55.258904"
450 |   }
451 | ]
452 | ```
453 | 
454 | <br/>
455 | 
456 | ## Document
457 | 
458 | This can be thought of as an artifact related to a product, like an FAQ page or a PDF with financial statement earnings. You can view all the Documents associated with an Organization’s Project like so:
459 | 
460 | ![documents.png](https://github.com/paulpierre/RasaGPT/blob/main/github/documents.png?raw=true)
461 | 
462 | ```bash
463 | {
464 |   "id": 1,
465 |   "uuid": "44a4b60b-9280-4b21-a676-00612be9aa87",
466 |   "organization": {
467 |     "id": 1,
468 |     "uuid": "d2a642e6-c81a-4a43-83e2-22cee3562452",
469 |     "display_name": "Pepe Corp.",
470 |     "bot_url": null,
471 |     "status": 2,
472 |     "created_at": "2023-05-05T10:42:45.933976",
473 |     "updated_at": "2023-05-05T10:42:45.933979",
474 |     "namespace": "pepe"
475 |   },
476 |   "document_count": 1,
477 |   "documents": [
478 |     {
479 |       "id": 1,
480 |       "uuid": "92604623-e37c-4935-bf08-0e9efa8b62f7",
481 |       "organization_id": 1,
482 |       "project_id": 1,
483 |       "display_name": "project-pepetamine.md",
484 |       "url": "",
485 |       "data": "# Pepetamine\n\nProduct Name: Pepetamine\n\nPurpose: Increases cognitive focus just like the Limitless movie\n\n**How to Use**\n\nPepetamine is available in the form of rare Pepe-coated tablets. The recommended dosage is one tablet per day, taken orally with a glass of water, preferably while browsing your favorite meme forum for maximum cognitive enhancement. For optimal results, take Pepetamine 30 minutes before engaging in mentally demanding tasks, such as decoding ancient Pepe hieroglyphics or creating your next viral meme masterpiece.\n\n**Side Effects**\n\nSome potential side effects of Pepetamine may include:\n\n1. Uncontrollable laughter and a sudden appreciation for dank memes\n2. An inexplicable desire to collect rare Pepes\n3. Enhanced meme creation skills, potentially leading to internet fame\n4. Temporary green skin pigmentation, resembling the legendary Pepe himself\n5. Spontaneously speaking in \"feels good man\" language\n\nWhile most side effects are generally harmless, consult your memologist if side effects persist or become bothersome.\n\n**Precautions**\n\nBefore taking Pepetamine, please consider the following precautions:\n\n1. Do not use Pepetamine if you have a known allergy to rare Pepes or dank memes.\n2. Pepetamine may not be suitable for individuals with a history of humor deficiency or meme intolerance.\n3. Exercise caution when driving or operating heavy machinery, as Pepetamine may cause sudden fits of laughter or intense meme ideation.\n\n**Interactions**\n\nPepetamine may interact with other substances, including:\n\n1. Normie supplements: Combining Pepetamine with normie supplements may result in meme conflicts and a decreased sense of humor.\n2. Caffeine: The combination of Pepetamine and caffeine may cause an overload of energy, resulting in hyperactive meme creation and potential internet overload.\n\nConsult your memologist if you are taking any other medications or substances to ensure compatibility with Pepetamine.\n\n**Overdose**\n\nIn case of an overdose, symptoms may include:\n\n1. Uncontrollable meme creation\n2. Delusions of grandeur as the ultimate meme lord\n3. Time warps into the world of Pepe\n\nIf you suspect an overdose, contact your local meme emergency service or visit the nearest meme treatment facility. Remember, the key to enjoying Pepetamine is to use it responsibly, and always keep in mind the wise words of our legendary Pepe: \"Feels good man.\"",
486 |       "hash": "fdee6da2b5441080dd78e7850d3d2e1403bae71b9e0526b9dcae4c0782d95a78",
487 |       "version": 1,
488 |       "status": 2,
489 |       "created_at": "2023-05-05T10:42:46.755428",
490 |       "updated_at": "2023-05-05T10:42:46.755431"
491 |     }
492 |   ],
493 |   "display_name": "Pepetamine",
494 |   "created_at": "2023-05-05T10:42:46.060930",
495 |   "updated_at": "2023-05-05T10:42:46.060934"
496 | }
497 | ```
498 | 
499 | <br/>
500 | 
501 | ## Node
502 | 
503 | Although this is not exposed in the API, a node is a chunk of a document which embeddings get generated for. Nodes are used for retrieval search as well as context injection. A node belongs to a document.
504 | 
505 | <br/>
506 | 
507 | ## User
508 | 
509 | A user represents the person talking to a bot. Users do not necessarily belong to an org or product, but this relationship is captured in ChatSession below.
510 | 
511 | <br/>
512 | 
513 | ## ChatSession
514 | 
515 | Not exposed via API, but this represent a question and answer between the User and a bot. Each of these objects can be flexibly identified by a `session_id` which gets automatically generated. Chat Sessions contain rich metadata that can be used for training and optimization. ChatSessions via the `/chat` endpoint ARE in fact associated with organization (for multi-tenant security purposes)
516 | 
517 | <br/><br/>
518 | 
519 | # **📚 How it works**
520 | 
521 | <br/>
522 | 
523 | ## Rasa
524 | 
525 | 1. Rasa handles integration with the communication channel, in this case Telegram.
526 |     - It specifically handles submitting the target webhook user feedback should go through. In our case it is our FastAPI server via `/webhooks/{channel}/webhook`
527 | 2. Rasa has two components, the core [Rasa app](https://github.com/paulpierre/RasaGPT/tree/main/app/rasa) and an Rasa [actions server](https://github.com/paulpierre/RasaGPT/tree/main/app/rasa/actions) that runs separately
528 | 3. Rasa must be configured (done already) via a few yaml files:
529 |     - [config.yml](https://github.com/paulpierre/RasaGPT/blob/main/app/rasa/config.yml) - contains NLU pipeline and policy configuration. What matters is setting the `FallbackClassifier` threshold
530 |     - [credentials.yml](https://github.com/paulpierre/RasaGPT/blob/main/app/rasa/credentials.yml) - contains the path to our webhook and Telegram credentials. This will get updated by the helper service `rasa-credentials` via [update_credentials.py](https://github.com/paulpierre/RasaGPT/blob/main/app/rasa/update_credentials.py)
531 |     - [domain.yml](https://github.com/paulpierre/RasaGPT/blob/main/app/rasa/domain.yml) - This contains the chat entrypoint logic configuration like intent and the action to take against the intent. Here we add the `action_gpt_fallback` action which will trigger our [actions server](https://github.com/paulpierre/RasaGPT/tree/main/app/rasa/actions)
532 |     - [endpoints.yml](https://github.com/paulpierre/RasaGPT/blob/main/app/rasa/endpoints.yml) - This is where we set our custom action end-point for Rasa to trigger our fallback
533 |     - [nlu.yml](https://github.com/paulpierre/RasaGPT/blob/main/app/rasa/data/nlu.yml) - this is where we set our intent `out_of_scope`
534 |     - [rules.yml](https://github.com/paulpierre/RasaGPT/blob/main/app/rasa/data/rules.yml) - we set a rule for this intent that it should trigger the action `action_gpt_fallback`
535 |     - [actions.py](https://github.com/paulpierre/RasaGPT/blob/main/app/rasa/actions/actions.py) - this is where we define and express our action via the `ActionGPTFallback` class. The method `name` returns the action we defined for our intent above
536 | 4. Rasa's NLU models must be trained which can be done via CLI with `rasa train` . This is done automatically for you when you run `make install`
537 | 5. Rasa's core must be ran via `rasa run` after training
538 | 6. Rasa's action server must be ran separately with `rasa run actions`
539 | 
540 | <br/>
541 | 
542 | ## Telegram
543 | 
544 | 1. Rasa automatically updates the Telegram Bot API with your callback webhook from [credentials.yml](https://github.com/paulpierre/RasaGPT/blob/main/app/rasa/credentials.yml).
545 | 2. By default this is static. Since we are running on our local machine, we leverage [Ngrok](https://ngrok.com/) to generate a publically accessible URL and reverse tunnel into our docker container
546 | 3. `rasa-credentials` service takes care of this process for you. Ngrok runs as a service, once it is ready `rasa-credentials` calls the local ngrok API to retrieve the tunnel URL and updates the `credentials.yml` file and restarts Rasa for you
547 | 4. The webhook Telegram will send messages to will be our FastAPI server. Why this instead of Rasa? Because we want flexibility to capture metadata which Rasa makes a PITA and centralizing to the API server is ideal
548 | 5. The FastAPI server forwards this to the Rasa webhook
549 | 6. Rasa will then determine what action to take based on the user intent. Since the intents have been nerfed for this demo, it will go to the fallback action running in `actions.py`
550 | 7. The custom action will capture the metadata and forward the response from FastAPI to the user
551 | 
552 | <br/>
553 | 
554 | ## PGVector
555 | 
556 | `pgvector` is a plugin for Postgres and automatically installed enabling your to store and calculate vector data types. We have our own implementation because the Langchain PGVector class is not flexible to adapt to our schema and we want flexibility.
557 | 
558 | 1. By default in postgres, any files in the container's path `/docker-entry-initdb.d` get run if the database has not been initialized. In the [postgres Dockerfile](https://github.com/paulpierre/RasaGPT/blob/main/app/db/Dockerfile) we copy `[create_db.sh` which creates](https://github.com/paulpierre/RasaGPT/blob/main/app/db/create_db.sh) the db and user for our database
559 | 2. In the `[models` command](https://github.com/paulpierre/RasaGPT/blob/dca9be4cd6fe4c9daaff1564267cdb5327a384a5/Makefile#L64) in the [Makefile](https://github.com/paulpierre/RasaGPT/blob/main/Makefile), we run the [models.py](https://github.com/paulpierre/RasaGPT/blob/main/app/api/models.py) in the API container which creates the tables from the models.
560 | 3. The `[enable_vector` method](https://github.com/paulpierre/RasaGPT/blob/dca9be4cd6fe4c9daaff1564267cdb5327a384a5/app/api/models.py#L266) enables the pgvector extension in the database
561 | 
562 | <br/>
563 | 
564 | ## Langchain
565 | 
566 | 1. The training data gets loaded in the database
567 | 2. The data is indexed [if the index doesn't exist](https://github.com/paulpierre/RasaGPT/blob/dca9be4cd6fe4c9daaff1564267cdb5327a384a5/app/api/main.py#L49) and [stored in a file named `index.json`](https://github.com/paulpierre/RasaGPT/blob/main/app/api/index.json)
568 | 3. LlamaIndex uses a basic `GPTSimpleVectorIndex` to find the relevant data and [injects it into a prompt](https://github.com/paulpierre/RasaGPT/blob/dca9be4cd6fe4c9daaff1564267cdb5327a384a5/app/api/main.py#L66).
569 | 4. Guard rails via prompts are used to keep the conversation focused
570 | 
571 | <br/>
572 | 
573 | ## Bot flow
574 | 
575 | 1. The user will chat in Telegram and the message will be filtered for [existing intents](https://github.com/paulpierre/RasaGPT/blob/main/app/rasa/data/nlu.yml)
576 | 2. If it detects there is no intent match but instead matches the `out_of_scope`, [based on rules.yml](https://github.com/paulpierre/RasaGPT/blob/main/app/rasa/data/rules.yml) it will trigger the `action_gpt_fallback` action
577 | 3. The `[ActionGPTFallback` function](https://github.com/paulpierre/RasaGPT/blob/main/app/rasa/actions/actions.py) will then call the [FastAPI API server](https://github.com/paulpierre/RasaGPT/blob/main/app/api/main.py)
578 | 4. the API using LlamaIndex will find the relevant indexed content and inject it into a prompt to send to OpenAI for inference
579 | 5. The prompt contains conversational guardrails including:
580 |     - Requests data be returned in JSON
581 |     - Create categorical tags based on what the user's question
582 |     - Return a boolean if the conversation should be escalated to a human (if there is no context match)
583 | 
584 | <br/><br/>
585 | 
586 | 
587 | # 📝 TODO
588 | 
589 | - [ ]  Implement LlamaIndex optimizations
590 | - [ ]  Implement chat history
591 | - [ ]  Implement [Query Routers Abstractions](https://medium.com/@jerryjliu98/unifying-llm-powered-qa-techniques-with-routing-abstractions-438e2499a0d0) to understand which search strategy to use (one-shot vs few-shot)
592 | - [ ]  Explore other indexing methods like Tree indexes, Keyword indexes
593 | - [ ]  Add chat history for immediate recall and context setting
594 | - [ ]  Add a secondary adversarial agent ([Dual pattern model](https://simonwillison.net/2023/Apr/25/dual-llm-pattern/)) with the following potential functionalities:
595 |     - [ ]  Determine if the question has been answered and if not, re-optimize search strategy
596 |     - [ ]  Ensure prompt injection is not occurring
597 | - [ ]  Increase baseline similarity search by exploring:
598 |     - [ ]  Regularly generate “fake” document embeddings based on historical queries and link to actual documents via [HyDE pattern](https://wfhbrian.com/revolutionizing-search-how-hypothetical-document-embeddings-hyde-can-save-time-and-increase-productivity/)
599 |     - [ ]  Regularly generate “fake” user queries based on documents and link to actual document so user input search and “fake” queries can match better
600 | 
601 | 
602 | <br/><br/>
603 | 
604 | # 🔍 Troubleshooting
605 | 
606 | In general, check your docker container logs by simply going to 👉 http://localhost:9999/
607 | 
608 | <br/>
609 | 
610 | ## Ngrok issues
611 | 
612 | Always check that your webhooks with ngrok and Telegram match. Simply do this by
613 | 
614 | ```bash
615 | curl -sS "https://api.telegram.org/bot<your-bot-secret-token>/getWebhookInfo" | json_pp
616 | ```
617 | 
618 | <br/>
619 | 
620 | .. should return this:
621 | 
622 | ```bash
623 | {
624 |     "ok": true,
625 |     "result": {
626 |         "url": "https://b280-04-115-40-112.ngrok-free.app/webhooks/telegram/webhook",
627 |         "has_custom_certificate": false,
628 |         "pending_update_count": 0,
629 |         "max_connections": 40,
630 |         "ip_address": "1.2.3.4"
631 |     }
632 | }
633 | ```
634 | 
635 | <br/>
636 | 
637 | .. which should match the URL in your `credentials.yml` file or visit the Ngrok admin UI 👉 [http://localhost:4040/status](http://localhost:4040/status)
638 | 
639 | ![ngrok-admin.png](https://github.com/paulpierre/RasaGPT/blob/main/github/ngrok-admin.png?raw=true)
640 | 
641 | <br/>
642 | 
643 | Looks like it is a match. If not, restart by everything by running:
644 | 
645 | ```bash
646 | make restart
647 | ```
648 | 
649 | <br/><br/>
650 | 
651 | # 💪 Contributing / Issues
652 | 
653 | - Pull requests welcome
654 | - Please submit issues via Github, I will do my best to resolve them
655 | - If you want to get in touch, feel free to hmu on twitter via `[@paulpierre](https://twitter.com/paulpierre)`
656 | 
657 | <br/><br/>
658 | 
659 | > ![thumbsup](https://camo.githubusercontent.com/bcb43227c1e90a1d27996eb75ac794bbf20d1355b36d0e9eaa71c71ad4dd2a56/68747470733a2f2f6d65646961342e67697068792e636f6d2f6d656469612f313149537762674378457a4d79592f67697068792e6769663f6369643d65636630356534376664703164727a72766178733175787532666269376f72316e68626f6d39326d30346436306e786b2665703d76315f676966735f72656c61746564267269643d67697068792e6769662663743d67)
660 | > <br/> Congratulations, all your base are belong to us! kthxbye
661 | 
662 | <br/><br/>
663 | 
664 | # 📜 Open source license
665 | 
666 | Copyright (c) 2023 Paul Pierre. Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the “Software”), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED “AS IS”, WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.


--------------------------------------------------------------------------------
/app/api/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM python:3.9
 2 | 
 3 | # Set the path to the API directory
 4 | WORKDIR /app/api
 5 | 
 6 | # Copy the codebase into the container
 7 | COPY . .
 8 | 
 9 | # Install the requirements
10 | RUN pip install --no-cache-dir -r requirements.txt
11 | 
12 | # Expose the port
13 | EXPOSE 8888
14 | 
15 | ENTRYPOINT ["uvicorn", "main:app", "--host", "api", "--port", "8888", "--reload"]


--------------------------------------------------------------------------------
/app/api/Makefile:
--------------------------------------------------------------------------------
  1 | .PHONY: default banner install install-seed seed run stop db-purge api-install env-create env db db-stop api api-stop
  2 | SHELL := /bin/bash 
  3 | default: help
  4 | 
  5 | help:
  6 | 	make banner
  7 | 	@echo "+------------------+"
  8 | 	@echo "| 🏠 CORE COMMANDS |"
  9 | 	@echo "+------------------+"
 10 | 	@echo "make install - Setup environment and build models"
 11 | 	@echo "make install-seed - Setup environment, build models and seed database"
 12 | 	@echo "make seed - Seed database with dummy data"
 13 | 	@echo "make run - Run database and API server"
 14 | 	@echo "make stop - Stop database and API server"
 15 | 	@echo "make db-purge - Delete all data in database\n"
 16 | 
 17 | banner:
 18 | 	@echo "\n\n-------------------------------------"
 19 | 	@echo "▒█▀▀█ █▀▀█ █▀▀ █▀▀█ ▒█▀▀█ ▒█▀▀█ ▀▀█▀▀"
 20 | 	@echo "▒█▄▄▀ █▄▄█ ▀▀█ █▄▄█ ▒█░▄▄ ▒█▄▄█ ░▒█░░"
 21 | 	@echo "▒█░▒█ ▀░░▀ ▀▀▀ ▀░░▀ ▒█▄▄█ ▒█░░░ ░▒█░░"
 22 | 	@echo "+-----------------------------------+"
 23 | 	@echo "|       🤖 FastAPI LLM server       |"
 24 | 	@echo "+-----------------------------------+\n\n"
 25 | 
 26 | # ================
 27 | # 🏠 CORE COMMANDS
 28 | # ================
 29 | 
 30 | # ----------------------------------
 31 | # Setup environment and build models
 32 | # ----------------------------------
 33 | install:
 34 | 	@make banner
 35 | 	@make api-install
 36 | 	@make db-stop
 37 | 	@echo "✅ Installation complete. Run 'make run' to start services.\n"
 38 | 
 39 | # ----------------------------------------------
 40 | # Setup environment and build models and seed DB
 41 | # ----------------------------------------------
 42 | install-seed:
 43 | 	@make banner
 44 | 	@make api-install
 45 | 	@make seed
 46 | 	@make db-stop
 47 | 	@echo "✅ Installation complete. Run 'make run' to start services.\n"
 48 | 
 49 | 
 50 | seed:
 51 | 	@make banner
 52 | 	@echo "🌱 Seeding database ..\n"
 53 | 	@python3 seed.py
 54 | 
 55 | # ---------------------------
 56 | # Run database and API server
 57 | # ---------------------------
 58 | run:
 59 | 	@make banner
 60 | 	@make env
 61 | 	@echo "🚀  Starting services ..\n"
 62 | 	@make db
 63 | 	@make api
 64 | 
 65 | # ----------------------------
 66 | # Stop database and API server
 67 | # ----------------------------
 68 | stop:
 69 | 	@make banner
 70 | 	@echo "🛑  Stopping services ..\n"
 71 | 	@make db-stop
 72 | 	@make api-stop
 73 | 
 74 | # ---------------------------
 75 | # Delete all data in database
 76 | # ---------------------------
 77 | db-purge:
 78 | 	@echo "⛔  Are you sure you want to delete all data in the database? [y/N]\n"
 79 | 	@read confirmation; \
 80 | 	if [ "$$confirmation" = "y" ] || [ "$$confirmation" = "Y" ]; then \
 81 | 		echo "Deleting generated files..."; \
 82 | 		make db-stop; \
 83 | 		rm -rf ../../mnt; \
 84 | 		echo "Deleted."; \
 85 | 	else \
 86 | 		echo "Aborted."; \
 87 | 	fi
 88 | 
 89 | # ==================
 90 | # 💁 HELPER COMMANDS
 91 | # ==================
 92 | 
 93 | # --------------
 94 | # Install helper
 95 | # --------------
 96 | api-install:
 97 | 	@echo "🏗️  Installing services ..\n"
 98 | 	@make env-create
 99 | 	@make db-stop
100 | 	@make db
101 | 	@make models
102 | 
103 | # -------------------
104 | # Create a virtualenv
105 | # -------------------
106 | env-create:
107 | 	@echo "Creating virtual environment .."
108 | 	@if [ -d "./venv" ]; then \
109 | 		echo "Environment and dependecies created already, loading .."; \
110 | 		source ./venv/bin/activate; \
111 | 	else \
112 | 		echo "Install dependencies .."; \
113 | 		python3 -m venv venv; \
114 | 		. venv/bin/activate; \
115 | 		pip3 install -r requirements.txt; \
116 | 	fi
117 | 
118 | # ---------------------------
119 | # Load an existing virtualenv
120 | # ---------------------------
121 | env:
122 | 	@echo "loading virtual environment if exists"
123 | 	@if [ -d "./venv" ]; then \
124 | 		source ./venv/bin/activate; \
125 | 	fi
126 | 
127 | # -----------------
128 | # Start Postgres DB
129 | # -----------------
130 | db:
131 | 	@echo "Starting Postgres with pgvector .."
132 | 	@cd ../../ && docker-compose -f docker-compose.yml up -d db
133 | 
134 | # -----------------
135 | # Stop Postgres DB
136 | # -----------------
137 | db-stop:
138 | 	@cd ../../ && docker-compose -f docker-compose.yml up -d db
139 | 
140 | # ------------
141 | # Build models
142 | # ------------
143 | models:
144 | 	@echo "💽 Building models in database .."
145 | 	@sleep 60 && python3 models.py
146 | 
147 | # -----------------
148 | # Start FastAPI API
149 | # -----------------
150 | api:
151 | 	@echo "⚡ Starting FastAPI API server .."
152 | 	@sleep 5 && uvicorn main:app --port 8888 --reload
153 | 
154 | # -----------------
155 | # Stop FastAPI API
156 | # -----------------
157 | api-stop:
158 | 	@echo "🛑 Stopping FastAPI server .."
159 | 	@killall uvicorn
160 | 


--------------------------------------------------------------------------------
/app/api/config.py:
--------------------------------------------------------------------------------
  1 | from enum import Enum, IntEnum
  2 | from dotenv import load_dotenv
  3 | from pathlib import Path
  4 | import logging
  5 | import sys
  6 | import os
  7 | 
  8 | # -------
  9 | # Logging
 10 | # -------
 11 | 
 12 | logging.basicConfig(stream=sys.stdout, level=logging.DEBUG)
 13 | logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))
 14 | logger = logging.getLogger(__name__)
 15 | 
 16 | # ----------------
 17 | # Environment vars
 18 | # ----------------
 19 | env = os.getenv("ENV", None)
 20 | 
 21 | if not env:
 22 |     # Check up to 2 levels up for .env-{env} file
 23 |     env_file = Path(__file__).parent.parent.parent / '.env'
 24 |     logger.debug(f"Loading env file: {env_file}")
 25 |     if os.path.exists(env_file):
 26 |         load_dotenv(dotenv_path=env_file)
 27 |     else:
 28 |         raise Exception(f"Env file {env})file not found")
 29 | 
 30 | # -----------------------
 31 | # Configuration constants
 32 | # -----------------------
 33 | readme_file = Path(__file__).parent / "API.md"
 34 | 
 35 | readme_str = (
 36 |     f"""
 37 | <details>
 38 |     <summary>📕 API.MD</summary>
 39 | {readme_file.read_text()}
 40 | 
 41 | </details>
 42 | 
 43 | """
 44 |     if readme_file.exists()
 45 |     else ""
 46 | )
 47 | APP_NAME = "API Documentation"
 48 | APP_VERSION = "0.0.1"
 49 | APP_DESCRIPTION = f"""
 50 | ![img](/static/img/rasagpt-logo-1.png)
 51 | 
 52 | ---
 53 | ## About
 54 | 💬 RasaGPT is the first headless LLM chatbot platform built on top of Rasa and Langchain
 55 | 
 56 | - 📚 Resources: [https://rasagpt.dev](https://rasagpt.dev)
 57 | - 🧑‍💻 Github: [https://github.com/paulpierre/RasaGPT](https://github.com/paulpierre/RasaGPT)
 58 | - 🧙 Author: [@paulpierre](https://twitter.com/paulpierre)
 59 | 
 60 | {readme_str}
 61 | """
 62 | APP_ICON = "/public/img/rasagpt-icon-200x200.png"
 63 | APP_LOGO = "/public/img/rasagpt-logo-1.png"
 64 | 
 65 | FILE_UPLOAD_PATH = os.getenv("FILE_UPLOAD_PATH", "/tmp")
 66 | 
 67 | # Database configurations
 68 | POSTGRES_USER = os.getenv("POSTGRES_USER", "postgres")
 69 | POSTGRES_PASSWORD = os.getenv("POSTGRES_PASSWORD", "postgres")
 70 | DB_HOST = os.getenv("DB_HOST", "localhost")
 71 | DB_PORT = os.getenv("DB_PORT", 5432)
 72 | DB_USER = os.getenv("DB_USER")
 73 | DB_NAME = os.getenv("DB_NAME")
 74 | DB_PASSWORD = os.getenv("DB_PASSWORD")
 75 | DSN = f"postgresql://{DB_USER}:{DB_PASSWORD}@{DB_HOST}:{DB_PORT}/{DB_NAME}"
 76 | SU_DSN = (
 77 |     f"postgresql://{POSTGRES_USER}:{POSTGRES_PASSWORD}@{DB_HOST}:{DB_PORT}/{DB_NAME}"
 78 | )
 79 | 
 80 | RASA_WEBHOOK_HOST = os.getenv("RASA_WEBHOOK_HOST", "rasa-core")
 81 | RASA_WEBHOOK_PORT = os.getenv("RASA_WEBHOOK_PORT", 5005)
 82 | RASA_WEBHOOK_URL = f"http://{RASA_WEBHOOK_HOST}:{RASA_WEBHOOK_PORT}"
 83 | 
 84 | # LLM configurations
 85 | MODEL_NAME = os.getenv("MODEL_NAME")
 86 | LLM_DEFAULT_TEMPERATURE = float(os.getenv("LLM_DEFAULT_TEMPERATURE", 0.0))
 87 | LLM_CHUNK_SIZE = int(os.getenv("LLM_CHUNK_SIZE", 512))
 88 | LLM_CHUNK_OVERLAP = int(os.getenv("LLM_CHUNK_OVERLAP", 20))
 89 | LLM_DISTANCE_THRESHOLD = float(os.getenv("LLM_DISTANCE_THRESHOLD", 0.5))
 90 | LLM_MAX_OUTPUT_TOKENS = int(os.getenv("LLM_MAX_OUTPUT_TOKENS", 256))
 91 | LLM_MIN_NODE_LIMIT = int(os.getenv("LLM_MIN_NODE_LIMIT", 3))
 92 | 
 93 | 
 94 | class DISTANCE_STRATEGY(Enum):
 95 |     COSINE = "cosine"
 96 |     EUCLIDEAN = "euclidean"
 97 |     MAX_INNER_PRODUCT = "max_inner_product"
 98 | 
 99 |     def __new__(cls, strategy_name: str):
100 |         obj = object.__new__(cls)
101 |         obj._value_ = strategy_name
102 |         return obj
103 | 
104 |     @property
105 |     def strategy_name(self) -> str:
106 |         return self.value
107 | 
108 | 
109 | DISTANCE_STRATEGIES = [
110 |     (
111 |         DISTANCE_STRATEGY.EUCLIDEAN,
112 |         "euclidean",
113 |         "<->",
114 |         "CREATE INDEX ON node USING ivfflat (embeddings vector_l2_ops) WITH (lists = 100);",
115 |     ),
116 |     (
117 |         DISTANCE_STRATEGY.COSINE,
118 |         "cosine",
119 |         "<=>",
120 |         "CREATE INDEX ON node USING ivfflat (embeddings vector_cosine_ops) WITH (lists = 100);",
121 |     ),
122 |     (
123 |         DISTANCE_STRATEGY.MAX_INNER_PRODUCT,
124 |         "max_inner_product",
125 |         "<#>",
126 |         "CREATE INDEX ON node USING ivfflat (embeddings vector_ip_ops) WITH (lists = 100);",
127 |     ),
128 | ]
129 | LLM_DEFAULT_DISTANCE_STRATEGY = DISTANCE_STRATEGY[
130 |     os.getenv("LLM_DEFAULT_DISTANCE_STRATEGY", "COSINE")
131 | ]
132 | VECTOR_EMBEDDINGS_COUNT = 1536
133 | PGVECTOR_ADD_INDEX = True if os.getenv("PGVECTOR_ADD_INDEX", False) else False
134 | # Model constants
135 | 
136 | DOCUMENT_TYPE = IntEnum("DOCUMENT_TYPE", ["PLAINTEXT", "MARKDOWN", "HTML", "PDF"])
137 | 
138 | ENTITY_STATUS = IntEnum(
139 |     "ENTITY_STATUS",
140 |     ["UNVERIFIED", "ACTIVE", "INACTIVE", "DELETED", "BANNED" "DEPRECATED"],
141 | )
142 | CHANNEL_TYPE = IntEnum(
143 |     "CHANNEL_TYPE", ["SMS", "TELEGRAM", "WHATSAPP", "EMAIL", "WEBSITE"]
144 | )
145 | 
146 | AGENT_NAMES = [
147 |     "Aisha",
148 |     "Lilly",
149 |     "Hanna",
150 |     "Julia",
151 |     "Emily",
152 |     "Sophia",
153 |     "Alex",
154 |     "Isabella",
155 | ]
156 | 
157 | 
158 | class LLM_MODELS(Enum):
159 |     TEXT_DAVINCI_003 = "text-davinci-003", 4097
160 |     GPT_35_TURBO = "gpt-3.5-turbo", 4096
161 |     TEXT_DAVINCI_002 = "text-davinci-002", 4097
162 |     CODE_DAVINCI_002 = "code-davinci-002", 8001
163 |     GPT_4 = "gpt-4", 8192
164 |     GPT_4_32K = "gpt-4-32k", 32768
165 | 
166 |     def __init__(self, model_name, token_limit):
167 |         self._model_name = model_name
168 |         self._token_limit = token_limit
169 | 
170 |     @property
171 |     def model_name(self) -> str:
172 |         return self._model_name
173 | 
174 |     @property
175 |     def token_limit(self) -> int:
176 |         return self._token_limit
177 | 


--------------------------------------------------------------------------------
/app/api/data/training_data/org-about_the_company.md:
--------------------------------------------------------------------------------
 1 | # About the company
 2 | 
 3 | Title: About Pepe Corp
 4 | 
 5 | **Introduction**
 6 | 
 7 | Pepe Corp is a groundbreaking parody pharmaceutical and technology company inspired by the iconic Pepe the Frog meme. Our mission is to revolutionize the world through the power of dank memes, laughter, and innovation. By merging the realms of science and meme culture, we create cutting-edge products that entertain, amaze, and bring people together.
 8 | 
 9 | **Our Story**
10 | 
11 | Founded in 2021 by a group of visionary memelords, Pepe Corp began as a small startup focused on creating meme-based pharmaceuticals to counter the growing seriousness in the world. Recognizing the untapped potential of meme culture, our founders set out to explore the intersection between humor, science, and technology.
12 | 
13 | Over the years, Pepe Corp has grown into a multidisciplinary organization with a diverse portfolio of products, ranging from meme-inspired pharmaceuticals to advanced brain implants and mobile applications. Our products are designed to elevate the human experience by harnessing the power of memes, laughter, and the Pepe spirit.
14 | 
15 | **Our Values**
16 | 
17 | At Pepe Corp, our values drive everything we do:
18 | 
19 | 1. **Innovation**: We are committed to pushing the boundaries of meme science and technology to develop products that challenge conventional wisdom and redefine the limits of possibility.
20 | 2. **Laughter**: We believe that laughter is a universal language that transcends barriers and brings people together. Our products are designed to elicit joy and promote the healing power of humor.
21 | 3. **Community**: We strive to create a global community of meme enthusiasts who share our passion for innovation, laughter, and the Pepe spirit.
22 | 4. **Sustainability**: We are dedicated to operating our business in a responsible and sustainable manner, ensuring the continued availability of dank memes for future generations.
23 | 
24 | **Our Products**
25 | 
26 | Pepe Corp's diverse product portfolio includes:
27 | 
28 | 1. *Meme-inspired Pharmaceuticals*: Our groundbreaking pharmaceuticals blend meme culture with scientific research to provide unique and entertaining experiences for our customers.
29 | 2. *Cutting-Edge Devices*: From brain implants that enable meme-based communication to wearable technology that tracks your meme usage, our devices push the limits of what's possible.
30 | 3. *Mobile Applications*: Our multi-platform mobile apps, such as PepeTrak and MemeGen, offer innovative solutions to enhance mental health, monitor meme consumption, and generate personalized meme experiences.
31 | 
32 | **Our Future**
33 | 
34 | As we look to the future, Pepe Corp is dedicated to exploring new frontiers in meme science and technology. We will continue to develop innovative products that entertain, inspire, and bring people together. By harnessing the power of memes and the Pepe spirit, we aim to make the world a better, funnier place for all.
35 | 
36 | Join us on this incredible journey as we redefine the limits of possibility and laughter with Pepe Corp, where the future is dank!


--------------------------------------------------------------------------------
/app/api/data/training_data/org-board_of_directors.md:
--------------------------------------------------------------------------------
 1 | # Board of Directors
 2 | 
 3 | Title: Board of Directors
 4 | 
 5 | **Introduction**
 6 | 
 7 | At Pepe Corp, our Board of Directors is composed of accomplished leaders with diverse backgrounds and expertise in various fields. They share our passion for innovation, laughter, and the Pepe spirit, and are committed to guiding our company's strategic vision and overseeing its growth. Together, our Board of Directors ensures that Pepe Corp remains at the forefront of meme science and technology, making the world a better, funnier place.
 8 | 
 9 | **Board Members**
10 | 
11 | 1. **Felix Memelord**: Chairman of the Board and Co-Founder
12 | Felix is a pioneering memelord with over a decade of experience in meme culture and internet humor. As the co-founder of Pepe Corp, he has been instrumental in shaping the company's strategic direction and ensuring its commitment to innovation, laughter, and the Pepe spirit.
13 | 2. **Dr. Olivia Dankstein**: Vice-Chair and Chief Scientific Officer
14 | Dr. Dankstein is a renowned scientist with extensive experience in neuroscience, psychology, and the science of humor. She is responsible for leading Pepe Corp's research and development efforts, driving the company's pursuit of groundbreaking meme-inspired pharmaceuticals and technologies.
15 | 3. **Mona Laughsalot**: Director and Chief Marketing Officer
16 | Mona is a marketing expert with a background in digital media and consumer engagement. She has been instrumental in building Pepe Corp's global brand presence and creating memorable campaigns that showcase the company's innovative products and commitment to laughter.
17 | 4. **Henry Frogman**: Director and Chief Financial Officer
18 | Henry is a seasoned finance executive with experience in corporate strategy and financial management. He oversees Pepe Corp's financial operations, ensuring the company's long-term sustainability and supporting its growth objectives.
19 | 5. **Iris Memequeen**: Director and Head of Human Resources
20 | Iris is a human resources professional with a passion for fostering inclusive and supportive workplace environments. She is responsible for developing and implementing Pepe Corp's talent management strategies, cultivating a diverse and empowered team that embodies the Pepe spirit.
21 | 6. **Jackie Jokster**: Director and Head of Product Development
22 | Jackie is an accomplished product developer with expertise in software engineering and user experience design. She leads Pepe Corp's product development efforts, ensuring that the company's offerings remain innovative, entertaining, and user-friendly.
23 | 7. **Vincent Laugherino**: Director and General Counsel
24 | Vincent is an experienced attorney with a background in intellectual property, corporate law, and regulatory compliance. He advises Pepe Corp's management team and Board of Directors on legal matters, ensuring that the company operates in accordance with applicable laws and regulations.
25 | 
26 | **Conclusion**
27 | 
28 | The Pepe Corp Board of Directors is a dynamic team of leaders who are dedicated to guiding the company's strategic vision and overseeing its growth. By leveraging their diverse expertise and shared passion for innovation, laughter, and the Pepe spirit, our Board of Directors ensures that Pepe Corp remains a global leader in meme science and technology.


--------------------------------------------------------------------------------
/app/api/data/training_data/org-company_story.md:
--------------------------------------------------------------------------------
 1 | # Company Story
 2 | 
 3 | Title: Company Story
 4 | 
 5 | **The Genesis of Pepe Corp**
 6 | 
 7 | In the early 21st century, a group of visionary memelords gathered in a dimly lit basement, fueled by their passion for dank memes and the iconic Pepe the Frog. As they contemplated the growing seriousness of the world, they were struck by a sudden realization: the power of memes could be harnessed to create groundbreaking products that would not only entertain but also bring people together. And so, in 2021, Pepe Corp was born.
 8 | 
 9 | **The Early Days: Pioneering Meme-Based Pharmaceuticals**
10 | 
11 | The founders of Pepe Corp were determined to explore the untapped potential of meme culture. They began by creating a range of meme-inspired pharmaceuticals, blending cutting-edge scientific research with the universal language of laughter. These innovative products quickly gained a cult following, garnering both amusement and curiosity.
12 | 
13 | **Expanding Horizons: Meme-Driven Technology**
14 | 
15 | As Pepe Corp's reputation grew, so did its ambitions. The company expanded its focus to include the development of advanced technology inspired by meme culture. From brain implants that enabled meme-based communication to wearable devices that monitored meme consumption, Pepe Corp's inventions captured the imagination of people worldwide.
16 | 
17 | **Becoming a Global Meme Phenomenon**
18 | 
19 | Pepe Corp's rapid rise to prominence caught the attention of mainstream media, turning the once-underground company into a global meme phenomenon. Their unique blend of humor, science, and technology resonated with audiences across generations and cultures, establishing Pepe Corp as a symbol of innovation and laughter.
20 | 
21 | **Building a Meme Empire**
22 | 
23 | Today, Pepe Corp is a multidisciplinary organization with a diverse portfolio of products and services. Its offerings include meme-inspired pharmaceuticals, cutting-edge devices, and multi-platform mobile applications designed to enhance mental health, monitor meme consumption, and generate personalized meme experiences.
24 | 
25 | Pepe Corp's success is built on its unwavering commitment to innovation, laughter, and community. The company continues to push the boundaries of what's possible in the realm of meme science and technology, with the ultimate goal of making the world a better, funnier place.
26 | 
27 | **The Future: Redefining Possibility and Laughter**
28 | 
29 | As Pepe Corp looks to the future, it remains dedicated to exploring new frontiers in meme science and technology. The company's research and development teams are hard at work, creating the next generation of meme-based products that will entertain, inspire, and bring people together.
30 | 
31 | Pepe Corp's journey has only just begun, but its impact is already being felt around the world. As the company continues to redefine the limits of possibility and laughter, one thing is certain: the future is dank, and Pepe Corp is leading the way.


--------------------------------------------------------------------------------
/app/api/data/training_data/org-corporate_philosophy.md:
--------------------------------------------------------------------------------
 1 | # Corporate philosophy
 2 | 
 3 | Title: Corporate Philosophy
 4 | 
 5 | **Introduction**
 6 | 
 7 | At Pepe Corp, our corporate philosophy is rooted in the belief that humor, innovation, and the power of memes can transform the world. We are dedicated to creating groundbreaking products that entertain, amaze, and unite people in laughter. Our philosophy shapes every aspect of our business, guiding our decisions and inspiring our actions.
 8 | 
 9 | **Our Core Principles**
10 | 
11 | 1. **Innovation**: We believe in pushing the boundaries of meme science and technology to develop products that challenge conventional wisdom and redefine the limits of possibility. Our commitment to innovation drives us to explore the untapped potential of meme culture, turning wild ideas into reality.
12 | 2. **Laughter**: We recognize that laughter is a universal language that transcends barriers and brings people together. We are dedicated to creating products that elicit joy, promote the healing power of humor, and spread the spirit of Pepe the Frog throughout the world.
13 | 3. **Community**: We strive to foster a global community of meme enthusiasts who share our passion for innovation, laughter, and the Pepe spirit. By cultivating a supportive and inclusive environment, we empower our team members, customers, and partners to express themselves freely and embrace the power of memes.
14 | 4. **Sustainability**: We understand that our success depends on the well-being of the world around us. We are committed to operating our business in a responsible and sustainable manner, ensuring the continued availability of dank memes for future generations.
15 | 
16 | **Our Ethos: The Pepe Spirit**
17 | 
18 | The Pepe spirit is at the heart of our corporate philosophy. Inspired by the iconic Pepe the Frog meme, this ethos represents our unwavering commitment to laughter, creativity, and the power of memes. The Pepe spirit guides our actions, influences our decisions, and serves as a constant reminder of our mission to make the world a better, funnier place.
19 | 
20 | **Our Commitment**
21 | 
22 | As a company, we pledge to uphold our corporate philosophy in every aspect of our business. We are committed to:
23 | 
24 | - Continuously exploring new frontiers in meme science and technology
25 | - Developing innovative products that entertain, inspire, and bring people together
26 | - Fostering a diverse and inclusive workplace that values creativity, collaboration, and the Pepe spirit
27 | - Conducting our business with integrity, transparency, and a commitment to sustainability
28 | 
29 | **Conclusion**
30 | 
31 | At Pepe Corp, our corporate philosophy is more than just a set of guiding principles; it is the very essence of who we are as a company. By embracing the power of memes, laughter, and the Pepe spirit, we aim to transform the world and create a brighter, funnier future for all.


--------------------------------------------------------------------------------
/app/api/data/training_data/org-customer_support.md:
--------------------------------------------------------------------------------
 1 | # Customer support
 2 | 
 3 | Title: Management Team
 4 | 
 5 | **Introduction**
 6 | 
 7 | Pepe Corp's management team is composed of accomplished professionals who bring their unique expertise and passion for the Pepe spirit to the forefront of our operations. This dedicated team of leaders is committed to fostering an innovative, inclusive, and laughter-driven culture while driving the company's strategic objectives and ensuring its continued success.
 8 | 
 9 | **Management Team Members**
10 | 
11 | 1. **Amanda Gigglestein**: Chief Executive Officer
12 | Amanda is an experienced executive with a background in technology and entertainment. As CEO, she is responsible for setting Pepe Corp's strategic direction, overseeing its day-to-day operations, and ensuring that the company remains at the forefront of meme science and technology.
13 | 2. **Brandon Memeinator**: Chief Operating Officer
14 | Brandon is a seasoned operations professional with extensive experience in scaling businesses and optimizing processes. As COO, he is responsible for streamlining Pepe Corp's operations, driving efficiency, and supporting the company's growth objectives.
15 | 3. **Dr. Penelope Pepescience**: Chief Research Officer
16 | Dr. Pepescience is a leading expert in the field of meme psychology and neuroscience. As CRO, she directs Pepe Corp's research efforts, spearheading groundbreaking studies and collaborations that fuel the company's innovative product pipeline.
17 | 4. **Claire Froglover**: Chief Creative Officer
18 | Claire is a talented creative professional with a background in graphic design, branding, and content creation. As CCO, she oversees Pepe Corp's creative direction, ensuring that the company's products, campaigns, and communications reflect the Pepe spirit and resonate with audiences worldwide.
19 | 5. **Timothy Chuckles**: Chief Technology Officer
20 | Timothy is an accomplished technology executive with expertise in software development, artificial intelligence, and data analytics. As CTO, he leads Pepe Corp's technology initiatives, developing cutting-edge solutions that enhance the company's offerings and delight its customers.
21 | 6. **Sophia Memevator**: Chief Customer Officer
22 | Sophia is a customer experience expert with a passion for creating memorable and engaging interactions. As CCO, she oversees Pepe Corp's customer service, community engagement, and user experience efforts, ensuring that the company's customers remain at the heart of everything it does.
23 | 7. **Edward Laughton**: Chief Sustainability Officer
24 | Edward is a sustainability professional with a background in environmental management and corporate social responsibility. As CSO, he is responsible for developing and implementing Pepe Corp's sustainability strategy, ensuring the company's operations are conducted responsibly and with minimal environmental impact.
25 | 
26 | **Conclusion**
27 | 
28 | Pepe Corp's management team is a dynamic group of leaders who are dedicated to upholding the company's core values of innovation, laughter, and the Pepe spirit. By leveraging their diverse expertise and working collaboratively, our management team ensures that Pepe Corp remains a global leader in meme science and technology, making the world a better, funnier place.


--------------------------------------------------------------------------------
/app/api/data/training_data/org-earnings_fy2023.md:
--------------------------------------------------------------------------------
 1 | # Earnings FY2023
 2 | 
 3 | Title: Earnings FY2023
 4 | 
 5 | **Introduction**
 6 | 
 7 | As a leading company in meme science and technology, we are committed to continuing our mission of making the world a better, funnier place. In FY2023, we have focused on product innovation, strategic partnerships, and strengthening our global presence, while maintaining our commitment to the Pepe spirit and environmental sustainability.
 8 | 
 9 | **Strategic Objectives**
10 | 
11 | 1. **Product Innovation**: We will invest in research and development to create cutting-edge products that address the needs of our diverse customer base. Our R&D team will explore new areas of meme science and technology, developing products that improve mental health, communication, and entertainment experiences.
12 | 2. **Market Expansion**: In FY2023, we plan to expand our presence in key international markets, targeting regions with significant growth potential. Our marketing team will develop tailored campaigns to raise awareness of our brand and products, while our sales team will work on securing new distribution partnerships.
13 | 3. **Strategic Partnerships**: We will pursue strategic partnerships with leading companies in the technology, entertainment, and pharmaceutical industries, leveraging their expertise and resources to enhance our product offerings and reach new audiences.
14 | 4. **Environmental Sustainability**: Pepe Corp is committed to operating responsibly and minimizing our environmental impact. In FY2023, we will work on implementing new sustainability initiatives and improving existing ones, focusing on areas such as waste reduction, energy efficiency, and responsible sourcing.
15 | 5. **Talent Development**: Our employees are our greatest asset, and we will continue to invest in their growth and development in FY2023. We will offer comprehensive training programs, provide opportunities for career advancement, and maintain an inclusive and supportive work environment that embodies the Pepe spirit.
16 | 6. **Customer Support Excellence**: We will strive to enhance our customer support experience, ensuring that our customers receive timely and effective assistance with their inquiries and concerns. Our Customer Support team will undergo additional training, and we will explore new technologies and processes to improve response times and customer satisfaction.
17 | 
18 | **Quarterly earnings:**
19 | 
20 | Pepe Corp is pleased to present our earnings report for Fiscal Year 2023. The following document outlines our quarterly organizational performance data and provides a simple but believable Profit and Loss (P&L) statement for the year. We are proud to report strong growth in revenue, driven by product innovation, market expansion, and strategic partnerships.
21 | 
22 | **Quarterly Organizational Performance Data**
23 | 
24 | *Note: All figures are in millions (USD)*
25 | 
26 | | Quarter | Revenue | Cost of Goods Sold (COGS) | Gross Profit | Operating Expenses | Operating Income | Net Income |
27 | | --- | --- | --- | --- | --- | --- | --- |
28 | | Q1 FY2023 | 50 | 20 | 30 | 15 | 15 | 12 |
29 | | Q2 FY2023 | 55 | 22 | 33 | 16 | 17 | 13.5 |
30 | | Q3 FY2023 | 60 | 24 | 36 | 17 | 19 | 15 |
31 | | Q4 FY2023 | 70 | 28 | 42 | 18 | 24 | 19 |
32 | | Total FY2023 | 235 | 94 | 141 | 66 | 75 | 59.5 |
33 | 
34 | **Profit & Loss Statement for FY2023**
35 | 
36 | *Note: All figures are in millions (USD)*
37 | 
38 | 1. Revenue: $235
39 | 2. Cost of Goods Sold (COGS): $94
40 | 3. Gross Profit: $141
41 | 4. Operating Expenses: $66
42 | a. Research & Development: $20
43 | b. Sales & Marketing: $30
44 | c. General & Administrative: $16
45 | 5. Operating Income: $75
46 | 6. Interest Expense: $3
47 | 7. Taxes: $12.5
48 | 8. Net Income: $59.5
49 | 
50 | Pepe Corp's performance in FY2023 reflects strong growth and continued success in our mission to make the world a better, funnier place. Our revenue has increased consistently throughout the year, and our strategic initiatives have resulted in improved operating income and net income. As we continue to innovate, expand, and strengthen our global presence, we are excited about the opportunities that lie ahead and are committed to delivering value to our shareholders and customers alike.
51 | 
52 | **Financial Projections**
53 | 
54 | In FY2024, we expect to achieve strong revenue growth driven by product innovation, market expansion, and strategic partnerships. We will continue to invest in R&D and marketing initiatives to support our growth objectives while maintaining a focus on cost optimization and operational efficiency.
55 | 
56 | **Conclusion**
57 | 
58 | Pepe Corp is excited about the opportunities that lie ahead in FY2024. We remain committed to our mission of making the world a better, funnier place, and we are confident that our strategic objectives will drive growth, innovation, and success in the upcoming fiscal year. Together, we will continue to spread the Pepe spirit and make a lasting impact on our customers and the world.


--------------------------------------------------------------------------------
/app/api/data/training_data/org-management_team.md:
--------------------------------------------------------------------------------
 1 | # Management team
 2 | 
 3 | Title: Management Team
 4 | 
 5 | **Introduction**
 6 | 
 7 | Pepe Corp's management team is composed of accomplished professionals who bring their unique expertise and passion for the Pepe spirit to the forefront of our operations. This dedicated team of leaders is committed to fostering an innovative, inclusive, and laughter-driven culture while driving the company's strategic objectives and ensuring its continued success.
 8 | 
 9 | **Management Team Members**
10 | 
11 | 1. **Amanda Gigglestein**: Chief Executive Officer
12 | Amanda is an experienced executive with a background in technology and entertainment. As CEO, she is responsible for setting Pepe Corp's strategic direction, overseeing its day-to-day operations, and ensuring that the company remains at the forefront of meme science and technology.
13 | 2. **Brandon Memeinator**: Chief Operating Officer
14 | Brandon is a seasoned operations professional with extensive experience in scaling businesses and optimizing processes. As COO, he is responsible for streamlining Pepe Corp's operations, driving efficiency, and supporting the company's growth objectives.
15 | 3. **Dr. Penelope Pepescience**: Chief Research Officer
16 | Dr. Pepescience is a leading expert in the field of meme psychology and neuroscience. As CRO, she directs Pepe Corp's research efforts, spearheading groundbreaking studies and collaborations that fuel the company's innovative product pipeline.
17 | 4. **Claire Froglover**: Chief Creative Officer
18 | Claire is a talented creative professional with a background in graphic design, branding, and content creation. As CCO, she oversees Pepe Corp's creative direction, ensuring that the company's products, campaigns, and communications reflect the Pepe spirit and resonate with audiences worldwide.
19 | 5. **Timothy Chuckles**: Chief Technology Officer
20 | Timothy is an accomplished technology executive with expertise in software development, artificial intelligence, and data analytics. As CTO, he leads Pepe Corp's technology initiatives, developing cutting-edge solutions that enhance the company's offerings and delight its customers.
21 | 6. **Sophia Memevator**: Chief Customer Officer
22 | Sophia is a customer experience expert with a passion for creating memorable and engaging interactions. As CCO, she oversees Pepe Corp's customer service, community engagement, and user experience efforts, ensuring that the company's customers remain at the heart of everything it does.
23 | 7. **Edward Laughton**: Chief Sustainability Officer
24 | Edward is a sustainability professional with a background in environmental management and corporate social responsibility. As CSO, he is responsible for developing and implementing Pepe Corp's sustainability strategy, ensuring the company's operations are conducted responsibly and with minimal environmental impact.
25 | 
26 | **Conclusion**
27 | 
28 | Pepe Corp's management team is a dynamic group of leaders who are dedicated to upholding the company's core values of innovation, laughter, and the Pepe spirit. By leveraging their diverse expertise and working collaboratively, our management team ensures that Pepe Corp remains a global leader in meme science and technology, making the world a better, funnier place.


--------------------------------------------------------------------------------
/app/api/data/training_data/project-frogonil.md:
--------------------------------------------------------------------------------
 1 | # Frogonil
 2 | 
 3 | Product Name: Frogonil
 4 | 
 5 | Purpose: Increases capacity to shitpost on social media
 6 | 
 7 | **How to Use**
 8 | 
 9 | Frogonil is available in the form of concentrated Pepe-powered capsules. The recommended dosage is one capsule per day, taken orally with a glass of water and a side of your preferred social media platform. For maximum shitposting potential, take Frogonil 15 minutes prior to engaging in online discussions or while crafting your next witty response to a controversial tweet.
10 | 
11 | **Side Effects**
12 | 
13 | Some potential side effects of Frogonil may include:
14 | 
15 | 1. An insatiable appetite for internet trolling and stirring up online chaos
16 | 2. Unexpectedly profound insights into meme culture
17 | 3. An increased propensity for using the term "REEE" in everyday conversations
18 | 4. A heightened sense of humor and the ability to find irony in even the darkest situations
19 | 5. Periodic visions of Pepe as a spirit guide, leading you on a quest for the dankest memes
20 | 
21 | Please consult your memologist if side effects persist or become bothersome.
22 | 
23 | **Precautions**
24 | 
25 | Before taking Frogonil, consider the following precautions:
26 | 
27 | 1. Do not use Frogonil if you have a history of being a buzzkill or an intolerance for internet humor.
28 | 2. Frogonil may not be suitable for individuals who prefer a quiet, peaceful online existence.
29 | 3. Exercise caution when engaging in real-life conversations, as Frogonil may cause users to inadvertently apply shitposting tactics in inappropriate situations.
30 | 
31 | **Interactions**
32 | 
33 | Frogonil may interact with other substances or activities, including:
34 | 
35 | 1. Sarcasm inhibitors: Combining Frogonil with sarcasm inhibitors may lead to unpredictable humor fluctuations and an inability to detect irony.
36 | 2. Internet detox: Frogonil is not recommended for those engaging in an internet detox, as the temptation to shitpost may become overwhelming.
37 | 
38 | Consult your memologist if you are taking any other medications or participating in any activities that may interfere with Frogonil's effectiveness.
39 | 
40 | **Overdose**
41 | 
42 | In case of an overdose, symptoms may include:
43 | 
44 | 1. Unstoppable shitposting, potentially leading to the creation of alternative online personas
45 | 2. A warped perception of reality where memes become the ultimate truth
46 | 3. Gaining an unsettling number of followers and admirers who appreciate your dark humor
47 | 
48 | If you suspect an overdose, contact your local meme crisis hotline or visit the nearest meme rehabilitation center. Remember, Frogonil is best enjoyed in moderation, and always heed the guidance of your Pepe-inspired spirit guide: "REEE-sponsibly."


--------------------------------------------------------------------------------
/app/api/data/training_data/project-kekzal.md:
--------------------------------------------------------------------------------
 1 | # Kekzal
 2 | 
 3 | Product Name: Kekzal
 4 | 
 5 | Purpose: Enhances sexual function
 6 | 
 7 | **How to Use**
 8 | 
 9 | Kekzal is available in the form of Pepe-approved, green diamond-shaped tablets. The recommended dosage is one tablet taken orally with a glass of water, approximately 60 minutes before engaging in intimate activities. For optimal results, immerse yourself in Pepe memes and share a laugh with your partner to set the mood.
10 | 
11 | **Side Effects**
12 | 
13 | Some potential side effects of Kekzal may include:
14 | 
15 | 1. An increased propensity for whispering sweet Pepe nothings into your partner's ear
16 | 2. A heightened sense of humor, making even the most awkward intimate moments hilarious
17 | 3. The occasional urge to shout "KEK!" during moments of passion
18 | 4. Spontaneous and uncontrollable laughter during foreplay
19 | 5. An unexplained affinity for green lingerie and bedroom accessories
20 | 
21 | Consult your memologist if side effects persist or become bothersome.
22 | 
23 | **Precautions**
24 | 
25 | Before taking Kekzal, consider the following precautions:
26 | 
27 | 1. Do not use Kekzal if you have a known allergy to dank memes or an aversion to Pepe-themed romantic encounters.
28 | 2. Kekzal may not be suitable for individuals with a history of humor-related intimacy issues or meme-related performance anxiety.
29 | 3. Exercise caution when engaging in intimate activities, as Kekzal may cause fits of laughter at inappropriate moments.
30 | 
31 | **Interactions**
32 | 
33 | Kekzal may interact with other substances or activities, including:
34 | 
35 | 1. Seriousness supplements: Combining Kekzal with seriousness supplements may result in a decreased sense of humor and diminished intimate enjoyment.
36 | 2. Nostalgic media: Watching reruns of classic sitcoms or engaging in non-Pepe related activities may reduce Kekzal's effectiveness.
37 | 
38 | Consult your memologist if you are taking any other medications or participating in any activities that may interfere with Kekzal's intended effects.
39 | 
40 | **Overdose**
41 | 
42 | In case of an overdose, symptoms may include:
43 | 
44 | 1. An insatiable desire to create erotic Pepe fanfiction
45 | 2. The belief that Pepe is the ultimate symbol of love and intimacy
46 | 3. An uncontrollable urge to redecorate your bedroom with Pepe-themed decor
47 | 
48 |  
49 | 
50 | If you suspect an overdose, contact your local meme crisis hotline or visit the nearest meme rehabilitation center. Remember, the key to enjoying Kekzal is to embrace its humor-enhancing effects responsibly, and to appreciate the amorous wisdom of our legendary Pepe: "Love is a meme, my friend."


--------------------------------------------------------------------------------
/app/api/data/training_data/project-memegen.md:
--------------------------------------------------------------------------------
 1 | # MemeGen
 2 | 
 3 | Product Name: MemeGen
 4 | 
 5 | Purpose: A meme generator mental health application generating better outcomes for patients
 6 | 
 7 | **About the App**
 8 | 
 9 | MemeGen is an innovative multi-platform mobile app designed to boost mental health through the healing power of memes. By utilizing Pepe Corp's cutting-edge meme generation technology, MemeGen crafts personalized and hilarious memes to suit your emotional needs, providing a unique therapeutic experience. Rediscover the joy of laughter and embrace the dank meme revolution with MemeGen.
10 | 
11 | **Features**
12 | 
13 | 1. Personalized Meme Generation: MemeGen's advanced algorithms create custom memes based on your mood, preferences, and sense of humor to provide targeted mental health support.
14 | 2. Meme Therapy: Receive daily meme prescriptions tailored to your emotional needs, helping you navigate life's ups and downs with a healthy dose of laughter.
15 | 3. Mood Tracker: Monitor your mood and emotional progress with MemeGen's built-in mood tracker, offering valuable insights into your mental health journey.
16 | 4. Meme Community: Connect with fellow MemeGen users to share and discuss your favorite therapeutic memes, creating a supportive and laughter-filled environment.
17 | 5. Mental Health Resources: Access a library of mental health resources and tips, all infused with MemeGen's signature Pepe-inspired humor.
18 | 
19 | **Troubleshooting**
20 | 
21 | 1. If MemeGen fails to load or crashes unexpectedly, try restarting your device and ensuring your app version is up-to-date.
22 | 2. If MemeGen's personalized memes seem off-target, double-check your preferences and mood inputs in the app settings.
23 | 3. For any other technical issues or concerns, contact MemeGen's dedicated support team at [support@memegen.com](mailto:support@memegen.com).
24 | 
25 | **FAQ**
26 | 
27 | 1. *Is MemeGen compatible with my device?*
28 | MemeGen is available for both iOS and Android devices, ensuring that laughter is always within reach.
29 | 2. *How does MemeGen maintain my privacy?*
30 | MemeGen values user privacy and employs advanced encryption methods to protect your personal information and meme preferences. For more information, refer to our Privacy Policy.
31 | 3. *Can MemeGen replace professional mental health care?*
32 | While MemeGen is designed to provide a fun and supportive mental health experience, it is not a substitute for professional mental health care. Always consult a qualified healthcare professional for serious mental health concerns.
33 | 4. *Are the memes generated by MemeGen safe for all audiences?*
34 | MemeGen aims to provide a positive and inclusive experience, creating memes that cater to a wide range of humor preferences while avoiding offensive or harmful content.
35 | 
36 | Dive into the world of therapeutic meme generation with MemeGen and let the power of laughter and Pepe-inspired wisdom bring light to your mental health journey.


--------------------------------------------------------------------------------
/app/api/data/training_data/project-memetrex.md:
--------------------------------------------------------------------------------
 1 | # Memetrex
 2 | 
 3 | Product Name: Memetrex
 4 | 
 5 | Purpose: Increases memory, causing you to literally remember everything
 6 | 
 7 | **How to Use**
 8 | 
 9 | Memetrex is available in the form of Pepe-embossed, green elliptical tablets. The recommended dosage is one tablet taken orally with a glass of water, preferably while browsing your favorite meme database for mental stimulation. For optimal results, take Memetrex 30 minutes before engaging in memory-intensive activities, such as trivia night or an epic meme showdown.
10 | 
11 | **Side Effects**
12 | 
13 | Some potential side effects of Memetrex may include:
14 | 
15 | 1. The sudden ability to recall every meme you've ever encountered, even the cringiest ones
16 | 2. An uncanny knack for winning online arguments by recalling obscure Pepe references
17 | 3. A newfound obsession with pop culture history, particularly as it relates to memes
18 | 4. The inability to forget embarrassing moments, both your own and those of others
19 | 5. The occasional urge to recite meme-related trivia at inappropriate moments
20 | 
21 | Consult your memologist if side effects persist or become bothersome.
22 | 
23 | **Precautions**
24 | 
25 | Before taking Memetrex, consider the following precautions:
26 | 
27 | 1. Do not use Memetrex if you have a known allergy to dank memes or a history of meme-induced flashbacks.
28 | 2. Memetrex may not be suitable for individuals with a propensity for meme-related nightmares or those who wish to forget their past meme blunders.
29 | 3. Exercise caution when engaging in social situations, as Memetrex may cause users to inadvertently recall and share embarrassing memories.
30 | 
31 | **Interactions**
32 | 
33 | Memetrex may interact with other substances or activities, including:
34 | 
35 | 1. Memory suppressants: Combining Memetrex with memory suppressants may result in an epic meme battle for mental dominance and unpredictable memory performance.
36 | 2. Binge-watching TV shows: The combination of Memetrex and binge-watching may cause an overload of pop culture references, leading to an irresistible urge to create crossover memes.
37 | 
38 | Consult your memologist if you are taking any other medications or participating in any activities that may interfere with Memetrex's effectiveness.
39 | 
40 | **Overdose**
41 | 
42 | In case of an overdose, symptoms may include:
43 | 
44 | 1. A photographic memory of every meme ever created, resulting in an overwhelming desire to create a comprehensive meme encyclopedia
45 | 2. The belief that you are the ultimate meme archivist, destined to preserve dank memes for future generations
46 | 3. The inability to separate meme memories from your own life experiences
47 | 
48 | If you suspect an overdose, contact your local meme crisis hotline or visit the nearest meme rehabilitation center. Remember, the key to enjoying Memetrex is to use it responsibly and to cherish the unparalleled meme wisdom of our legendary Pepe: "With great meme-ory comes great responsibility."


--------------------------------------------------------------------------------
/app/api/data/training_data/project-neurokek.md:
--------------------------------------------------------------------------------
 1 | # Neuro-kek
 2 | 
 3 | Product Name: Neuro-Kek
 4 | 
 5 | Purpose: A brain implant that causes the patient to communicate only in memes
 6 | 
 7 | **About the Devices**
 8 | 
 9 | Neuro-Kek is a revolutionary brain implant developed by Pepe Corp, designed to transform the way you communicate by converting your thoughts into dank memes. By tapping into the brain's meme center, Neuro-Kek enables users to express themselves solely through the language of Pepe and other popular memes. Say goodbye to mundane conversations and embrace the next frontier of human communication with Neuro-Kek.
10 | 
11 | **Side-Effects and Warnings**
12 | 
13 | Potential side effects and warnings associated with the Neuro-Kek implant include:
14 | 
15 | 1. Temporary or permanent loss of the ability to communicate in any form other than memes
16 | 2. Increased likelihood of being mistaken for a professional memelord
17 | 3. Sudden and uncontrollable cravings for rare Pepe collectibles
18 | 4. Inability to understand or participate in non-meme related conversations
19 | 5. The risk of becoming an internet sensation due to your unparalleled meme fluency
20 | 
21 | Please consult a certified memologist before undergoing the Neuro-Kek implant procedure.
22 | 
23 | **Device Features**
24 | 
25 | 1. Instant Meme Generation: Neuro-Kek translates your thoughts into memes in real-time, ensuring you're always ready to deliver the perfect meme-based response.
26 | 2. Meme Mastery: Gain access to an extensive database of classic and trending memes, enhancing your meme fluency and communication skills.
27 | 3. Meme-to-Speech: Neuro-Kek's built-in meme-to-speech converter allows you to vocally communicate in meme format for seamless meme-based conversations.
28 | 4. Meme Analytics: Monitor your meme usage and trends with Neuro-Kek's meme analytics feature, helping you refine your meme communication strategies.
29 | 5. Meme Updates: Receive regular meme updates to keep your meme repertoire fresh and relevant in the ever-evolving meme landscape.
30 | 
31 | **Device Specifications**
32 | 
33 | 1. Implant Material: Biocompatible Pepe-green nanomaterials
34 | 2. Power Source: Memergy - a revolutionary energy source derived from the user's innate meme enthusiasm
35 | 3. Connectivity: DankNet - a secure, meme-based communication network
36 | 4. Dimensions: 4.20 x 6.9 x 0.69 mm
37 | 5. Compatibility: Human brains with a dank sense of humor
38 | 
39 | **Troubleshooting**
40 | 
41 | 1. If Neuro-Kek fails to generate memes, try immersing yourself in meme culture to replenish your memergy reserves.
42 | 2. If Neuro-Kek malfunctions or produces irrelevant memes, consult your memologist for a device recalibration.
43 | 3. For any other technical issues or concerns, contact Pepe Corp's dedicated support team at [support@neuro-kek.com](mailto:support@neuro-kek.com).
44 | 
45 | **FAQ**
46 | 
47 | 1. *Is the Neuro-Kek implant procedure safe?*
48 | The Neuro-Kek implant procedure is performed by certified memologists and follows strict safety guidelines to ensure a positive meme-enhancing experience.
49 | 2. *Can Neuro-Kek be removed or turned off?*
50 | Neuro-Kek can be temporarily deactivated or removed by a certified memologist if the user wishes to revert to traditional communication methods.
51 | 3. *Does Neuro-Kek work with languages other than English?*
52 | Absolutely! Neuro-Kek is designed to generate memes in multiple languages, ensuring that meme-based communication knows no boundaries.
53 | 4. *Is Neuro-Kek compatible with other brain implants or devices?*
54 | Neuro-Kek's compatibility with other brain implants or devices may vary. Consult your memologist to discuss potential interactions.
55 | 
56 | Embrace the future of meme-based communication with the Neuro-Kek brain implant and let Pepe's wisdom guide you through the fascinating world of dank memes.


--------------------------------------------------------------------------------
/app/api/data/training_data/project-pepetamine.md:
--------------------------------------------------------------------------------
 1 | # Pepetamine
 2 | 
 3 | Product Name: Pepetamine
 4 | 
 5 | Purpose: Increases cognitive focus just like the Limitless movie
 6 | 
 7 | **How to Use**
 8 | 
 9 | Pepetamine is available in the form of rare Pepe-coated tablets. The recommended dosage is one tablet per day, taken orally with a glass of water, preferably while browsing your favorite meme forum for maximum cognitive enhancement. For optimal results, take Pepetamine 30 minutes before engaging in mentally demanding tasks, such as decoding ancient Pepe hieroglyphics or creating your next viral meme masterpiece.
10 | 
11 | **Side Effects**
12 | 
13 | Some potential side effects of Pepetamine may include:
14 | 
15 | 1. Uncontrollable laughter and a sudden appreciation for dank memes
16 | 2. An inexplicable desire to collect rare Pepes
17 | 3. Enhanced meme creation skills, potentially leading to internet fame
18 | 4. Temporary green skin pigmentation, resembling the legendary Pepe himself
19 | 5. Spontaneously speaking in "feels good man" language
20 | 
21 | While most side effects are generally harmless, consult your memologist if side effects persist or become bothersome.
22 | 
23 | **Precautions**
24 | 
25 | Before taking Pepetamine, please consider the following precautions:
26 | 
27 | 1. Do not use Pepetamine if you have a known allergy to rare Pepes or dank memes.
28 | 2. Pepetamine may not be suitable for individuals with a history of humor deficiency or meme intolerance.
29 | 3. Exercise caution when driving or operating heavy machinery, as Pepetamine may cause sudden fits of laughter or intense meme ideation.
30 | 
31 | **Interactions**
32 | 
33 | Pepetamine may interact with other substances, including:
34 | 
35 | 1. Normie supplements: Combining Pepetamine with normie supplements may result in meme conflicts and a decreased sense of humor.
36 | 2. Caffeine: The combination of Pepetamine and caffeine may cause an overload of energy, resulting in hyperactive meme creation and potential internet overload.
37 | 
38 | Consult your memologist if you are taking any other medications or substances to ensure compatibility with Pepetamine.
39 | 
40 | **Overdose**
41 | 
42 | In case of an overdose, symptoms may include:
43 | 
44 | 1. Uncontrollable meme creation
45 | 2. Delusions of grandeur as the ultimate meme lord
46 | 3. Time warps into the world of Pepe
47 | 
48 | If you suspect an overdose, contact your local meme emergency service or visit the nearest meme treatment facility. Remember, the key to enjoying Pepetamine is to use it responsibly, and always keep in mind the wise words of our legendary Pepe: "Feels good man."


--------------------------------------------------------------------------------
/app/api/data/training_data/project-pepetrak.md:
--------------------------------------------------------------------------------
 1 | # PepeTrak
 2 | 
 3 | Product Name: PepeTrak
 4 | 
 5 | Purpose: An award-winning diagnosis app suggesting appropriate Pepe Corp pharmaceutical interventions
 6 | 
 7 | **About the App**
 8 | 
 9 | PepeTrak is a groundbreaking multi-platform mobile app designed to help you navigate the wild world of Pepe Corp pharmaceuticals. Using advanced meme-based algorithms, PepeTrak assesses your current mood, meme preferences, and medical history to recommend the perfect Pepe Corp intervention to suit your needs. Say goodbye to aimless meme browsing and hello to a new era of personalized Pepe prescriptions.
10 | 
11 | **Features**
12 | 
13 | 1. Meme Diagnosis: PepeTrak's state-of-the-art meme analysis technology evaluates your recent meme consumption and emotional state to provide a tailored meme diagnosis.
14 | 2. Pepe Prescription: Based on your meme diagnosis, PepeTrak suggests the most appropriate Pepe Corp pharmaceutical intervention to elevate your meme game.
15 | 3. Meme Tracking: Keep track of your meme-related progress and improvements with PepeTrak's meme tracking feature.
16 | 4. Meme Community: Connect with fellow PepeTrak users to share your favorite memes, discuss Pepe Corp pharmaceutical experiences, and spread the dank meme love.
17 | 5. Pepe Rewards: Earn exclusive Pepe points for using the app, redeemable for rare Pepe collectibles and discounts on Pepe Corp products.
18 | 
19 | **Troubleshooting**
20 | 
21 | 1. If PepeTrak fails to load or crashes unexpectedly, try restarting your device and ensuring your app version is up-to-date.
22 | 2. If PepeTrak's meme diagnosis seems inaccurate, double-check your meme preferences and personal information in the app settings.
23 | 3. For any other technical issues or concerns, contact PepeTrak's dedicated support team at [support@pepetrak.com](mailto:support@pepetrak.com).
24 | 
25 | **FAQ**
26 | 
27 | 1. *Is PepeTrak compatible with my device?*
28 | PepeTrak is available for both iOS and Android devices, ensuring maximum meme accessibility.
29 | 2. *How does PepeTrak maintain my privacy?*
30 | PepeTrak values user privacy and employs advanced encryption methods to protect your personal information and meme preferences. For more information, refer to our Privacy Policy.
31 | 3. *Can I use PepeTrak without a Pepe Corp pharmaceutical prescription?*
32 | Absolutely! PepeTrak is designed to enhance your overall meme experience and can be used independently of Pepe Corp products.
33 | 4. *Are the Pepe Corp pharmaceuticals recommended by PepeTrak safe?*
34 | Pepe Corp pharmaceuticals are designed for entertainment purposes only and should not be taken as actual medical advice. Always consult a professional healthcare provider before starting any new treatment.
35 | 
36 | Embark on a personalized meme journey with PepeTrak and let the spirit of Pepe guide you to dank meme enlightenment!


--------------------------------------------------------------------------------
/app/api/helpers.py:
--------------------------------------------------------------------------------
  1 | from fastapi import HTTPException
  2 | from uuid import UUID
  3 | import os
  4 | 
  5 | from typing import (
  6 |     Optional,
  7 |     Union
  8 | )
  9 | from config import (
 10 |     FILE_UPLOAD_PATH,
 11 |     ENTITY_STATUS,
 12 |     logger
 13 | )
 14 | 
 15 | from util import (
 16 |     is_uuid,
 17 |     get_file_hash
 18 | )
 19 | from sqlmodel import (
 20 |     Session,
 21 |     select
 22 | )
 23 | from datetime import datetime
 24 | from models import (
 25 |     Organization,
 26 |     OrganizationCreate,
 27 |     User,
 28 |     UserCreate,
 29 |     get_engine,
 30 |     Project,
 31 |     ProjectCreate,
 32 |     Document,
 33 |     Node,
 34 |     ChatSession
 35 | )
 36 | 
 37 | # ================
 38 | # Helper functions
 39 | # ================
 40 | 
 41 | 
 42 | # ----------------------
 43 | # Organization functions
 44 | # ----------------------
 45 | def get_org_by_uuid_or_namespace(
 46 |     id: Union[UUID, str], session: Optional[Session] = None, should_except: bool = True
 47 | ):
 48 |     if session:
 49 |         org = (
 50 |             Organization.by_uuid(str(id))
 51 |             if is_uuid(id)
 52 |             else session.exec(
 53 |                 select(Organization).where(Organization.namespace == str(id))
 54 |             ).first()
 55 |         )
 56 | 
 57 |     else:
 58 |         with Session(get_engine()) as session:
 59 |             org = (
 60 |                 Organization.by_uuid(str(id))
 61 |                 if is_uuid(id)
 62 |                 else session.exec(
 63 |                     select(Organization).where(Organization.namespace == str(id))
 64 |                 ).first()
 65 |             )
 66 | 
 67 |     if not org and should_except is True:
 68 |         raise HTTPException(
 69 |             status_code=404, detail=f"Organization identifer {id} not found"
 70 |         )
 71 | 
 72 |     return org
 73 | 
 74 | 
 75 | def create_org_by_org_or_uuid(
 76 |     namespace: str = None,
 77 |     display_name: str = None,
 78 |     organization: Union[Organization, OrganizationCreate, str] = None,
 79 |     session: Optional[Session] = None,
 80 | ):
 81 |     namespace = namespace or organization.namespace
 82 | 
 83 |     if not namespace:
 84 |         raise HTTPException(
 85 |             status_code=400, detail="Organization namespace is required"
 86 |         )
 87 | 
 88 |     o = (
 89 |         get_org_by_uuid_or_namespace(namespace, session=session, should_except=False)
 90 |         if not isinstance(organization, Organization)
 91 |         else organization
 92 |     )
 93 | 
 94 |     if o:
 95 |         raise HTTPException(status_code=404, detail="Organization already exists")
 96 | 
 97 |     if isinstance(organization, OrganizationCreate) or isinstance(organization, str):
 98 |         organization = organization or OrganizationCreate(
 99 |             namespace=namespace, display_name=display_name
100 |         )
101 | 
102 |         db_org = Organization.from_orm(organization)
103 | 
104 |         if session:
105 |             session.add(db_org)
106 |             session.commit()
107 |             session.refresh(db_org)
108 |         else:
109 |             with Session(get_engine()) as session:
110 |                 session.add(db_org)
111 |                 session.commit()
112 |                 session.refresh(db_org)
113 |     elif isinstance(organization, Organization):
114 |         db_org = organization
115 |         db_org.update(
116 |             {
117 |                 "namespace": namespace if namespace else organization.namespace,
118 |                 "display_name": display_name
119 |                 if display_name
120 |                 else organization.display_name,
121 |             }
122 |         )
123 |     else:
124 |         db_org = Organization.create(
125 |             {"namespace": namespace, "display_name": display_name}
126 |         )
127 | 
128 |     # Create folder for organization_uuid in uploads
129 |     os.mkdir(os.path.join(FILE_UPLOAD_PATH, str(db_org.uuid)))
130 | 
131 |     return db_org
132 | 
133 | 
134 | # --------------
135 | # User functions
136 | # --------------
137 | def create_user(
138 |     user: Union[UserCreate, User] = None,
139 |     identifier: str = None,
140 |     identifier_type: str = None,
141 |     device_fingerprint: str = None,
142 |     first_name: str = None,
143 |     last_name: str = None,
144 |     email: str = None,
145 |     phone: str = None,
146 |     dob: str = None,
147 |     session: Optional[Session] = None,
148 | ):
149 |     # Check if user already exists
150 |     user = (
151 |         get_user_by_uuid_or_identifier(user.id or identifier, session=session)
152 |         if not isinstance(user, User)
153 |         else user
154 |     )
155 | 
156 |     if isinstance(user, UserCreate):
157 |         db_user = User.from_orm(user)
158 | 
159 |         if session:
160 |             session.add(db_user)
161 |             session.commit()
162 |             session.refresh(db_user)
163 |         else:
164 |             with Session(get_engine()) as session:
165 |                 session.add(db_user)
166 |                 session.commit()
167 |                 session.refresh(db_user)
168 |     elif isinstance(user, User):
169 |         db_user = user
170 |         db_user.update(
171 |             {
172 |                 "identifier": identifier if identifier else user.identifier,
173 |                 "identifier_type": identifier_type
174 |                 if identifier_type
175 |                 else user.identifier_type,
176 |                 "device_fingerprint": device_fingerprint
177 |                 if device_fingerprint
178 |                 else user.device_fingerprint,
179 |                 "first_name": first_name if first_name else user.first_name,
180 |                 "last_name": last_name if last_name else user.last_name,
181 |                 "email": email if email else user.email,
182 |                 "phone": phone if phone else user.phone,
183 |                 "dob": dob if dob else user.dob,
184 |             }
185 |         )
186 |     else:
187 |         db_user = User.create(
188 |             {
189 |                 "identifier": identifier,
190 |                 "identifier_type": identifier_type,
191 |                 "device_fingerprint": device_fingerprint,
192 |                 "first_name": first_name,
193 |                 "last_name": last_name,
194 |                 "email": email,
195 |                 "phone": phone,
196 |                 "dob": dob,
197 |             }
198 |         )
199 | 
200 |     return db_user
201 | 
202 | 
203 | def get_users(session: Optional[Session] = None):
204 |     if session:
205 |         users = session.exec(select(User)).all()
206 |     else:
207 |         with Session(get_engine()) as session:
208 |             users = session.exec(select(User)).all()
209 | 
210 |     return users
211 | 
212 | 
213 | def get_user_by_uuid_or_identifier(
214 |     id: Union[UUID, str], session: Optional[Session] = None, should_except: bool = True
215 | ):
216 |     if session:
217 |         user = (
218 |             User.by_uuid(str(id))
219 |             if is_uuid(str(id))
220 |             else session.exec(select(User).where(User.identifier == str(id))).first()
221 |         )
222 | 
223 |     else:
224 |         with Session(get_engine()) as session:
225 |             user = (
226 |                 User.by_uuid(str(id))
227 |                 if is_uuid(str(id))
228 |                 else session.exec(
229 |                     select(User).where(User.identifier == str(id))
230 |                 ).first()
231 |             )
232 | 
233 |     if not user and should_except is True:
234 |         raise HTTPException(status_code=404, detail=f"User identifer {id} not found")
235 | 
236 |     return user
237 | 
238 | 
239 | # ------------------
240 | # Document functions
241 | # ------------------
242 | def create_document_by_file_path(
243 |     organization: Organization = None,
244 |     project: Project = None,
245 |     file_path: str = None,
246 |     url: Optional[str] = None,
247 |     file_version: Optional[int] = 1,
248 |     file_hash: Optional[str] = None,
249 |     overwrite: Optional[bool] = True,
250 |     session: Optional[Session] = None,
251 | ):
252 |     if not organization or not project:
253 |         raise HTTPException(
254 |             status_code=400, detail="Organization and project are required"
255 |         )
256 | 
257 |     organization_id = organization.uuid
258 |     project_id = project.uuid
259 | 
260 |     if not file_path or not os.path.exists(file_path):
261 |         raise HTTPException(status_code=400, detail="A valid file path is required")
262 | 
263 |     if not file_hash:
264 |         file_hash = get_file_hash(file_path)
265 | 
266 |     file_name = os.path.basename(file_path)
267 | 
268 |     file_contents = open(file_path, "rb").read()
269 | 
270 |     # ------------------------
271 |     # Handle duplicate content
272 |     # ------------------------
273 |     if get_document_by_hash(file_hash, session=session):
274 |         raise HTTPException(
275 |             status_code=409,
276 |             detail=f'Document "{file_name}" already uploaded! \n\nsha256:{file_hash}!',
277 |         )
278 | 
279 |     # ----------------------------------
280 |     # Handle file versioning by filename
281 |     # ----------------------------------
282 | 
283 |     # If we are overwriting, deprecate the current version and increment the version number of new file
284 |     document = get_document_by_name(
285 |         file_name,
286 |         project_id=project_id,
287 |         organization_id=organization_id,
288 |         session=session,
289 |     )
290 | 
291 |     if document and overwrite:
292 |         file_version = document.version + 1
293 |         document.updated_at = datetime.utcnow()
294 |         document.status = ENTITY_STATUS.DEPRECATED.value
295 |         document.save()
296 |     else:
297 |         # ---------------------
298 |         # Create a new document
299 |         # ---------------------
300 |         document = Document(
301 |             display_name=file_name,
302 |             project_id=project.id,
303 |             organization_id=organization.id,
304 |             data=file_contents,
305 |             version=file_version,
306 |             hash=file_hash,
307 |             url=url if url else None,
308 |         )
309 |         if session:
310 |             session.add(document)
311 |             session.commit()
312 |             session.refresh(document)
313 | 
314 |             # ---------------------
315 |             # Create the embeddings
316 |             # ---------------------
317 |             create_document_nodes(
318 |                 document=document,
319 |                 project=project,
320 |                 organization=organization,
321 |                 session=session,
322 |             )
323 | 
324 |         else:
325 |             with Session(get_engine()) as session:
326 |                 session.add(document)
327 |                 session.commit()
328 |                 session.refresh(document)
329 | 
330 |                 # ---------------------
331 |                 # Create the embeddings
332 |                 # ---------------------
333 |                 create_document_nodes(
334 |                     document=document,
335 |                     project=project,
336 |                     organization=organization,
337 |                     session=session,
338 |                 )
339 | 
340 |     if not document:
341 |         raise HTTPException(status_code=400, detail="Could not create document")
342 | 
343 | 
344 | # --------------------------
345 | # Create document embeddings
346 | # --------------------------
347 | def create_document_nodes(
348 |     document: Document,
349 |     project: Project,
350 |     organization: Organization,
351 |     session: Optional[Session] = None,
352 | ):
353 |     # Avoid circular imports
354 |     from llm import get_embeddings, get_token_count
355 | 
356 |     project_uuid = str(project.uuid)
357 |     document_uuid = str(document.uuid)
358 |     document_id = document.id
359 |     organization_uuid = str(organization.uuid)
360 | 
361 |     if not document or not project:
362 |         raise Exception("Missing required parameters document, project")
363 | 
364 |     metadata = {
365 |         "project_uuid": project_uuid,
366 |         "document_uuid": document_uuid,
367 |         "organization_uuid": organization_uuid,
368 |         "document_id": document_id,
369 |         "version": document.version,
370 |         "name": document.display_name,
371 |     }
372 | 
373 |     # convert document data bytes to string
374 |     document_data = (
375 |         document.data.decode("utf-8")
376 |         if isinstance(document.data, bytes)
377 |         else document.data
378 |     )
379 | 
380 |     # lets get the embeddings
381 |     arr_documents, embeddings = get_embeddings(document_data)
382 | 
383 |     # -------------------------------------------
384 |     # Process the embeddings and save to database
385 |     # -------------------------------------------
386 | 
387 |     for doc, vec in zip(arr_documents, embeddings):
388 |         node = Node(
389 |             document_id=document.id,
390 |             embeddings=vec,
391 |             text=doc,
392 |             token_count=get_token_count(doc),
393 |             meta=metadata
394 |         )
395 |         if session:
396 |             session.add(node)
397 |             session.commit()
398 |             session.refresh(node)
399 | 
400 |         else:
401 |             with Session(get_engine()) as session:
402 |                 session.add(node)
403 |                 session.commit()
404 |                 session.refresh(node)
405 | 
406 |         # Node.create(
407 |         #     {
408 |         #         "document_id": document.id,
409 |         #         "embeddings": vec,
410 |         #         "text": doc,
411 |         #         "token_count": get_token_count(doc),
412 |         #         "meta": metadata,
413 |         #     }
414 |         # )
415 | 
416 | 
417 | def get_documents_by_project_and_org(
418 |     project_id: Union[UUID, str],
419 |     organization_id: Union[UUID, str],
420 |     session: Optional[Session] = None,
421 | ):
422 |     if session:
423 |         org = get_org_by_uuid_or_namespace(organization_id, session=session)
424 |         project = get_project_by_uuid(project_id, org.uuid, session=session)
425 |         documents = session.exec(
426 |             select(Document).where(Document.project_id == project.id)
427 |         ).all()
428 |     else:
429 |         with Session(get_engine()) as session:
430 |             org = get_org_by_uuid_or_namespace(organization_id, session=session)
431 |             project = get_project_by_uuid(project_id, org.uuid, session=session)
432 |             documents = session.exec(
433 |                 select(Document).where(Document.project_id == project.id)
434 |             ).all()
435 | 
436 |     return documents
437 | 
438 | 
439 | def get_document_by_uuid(
440 |     uuid: Union[UUID, str],
441 |     organization_id: Union[UUID, str] = None,
442 |     project_id: Union[UUID, str] = None,
443 |     session: Optional[Session] = None,
444 |     should_except: bool = True,
445 | ):
446 |     if not is_uuid(uuid):
447 |         raise HTTPException(
448 |             status_code=422, detail=f"Invalid document identifier {uuid}"
449 |         )
450 | 
451 |     org = get_org_by_uuid_or_namespace(organization_id, session=session)
452 |     project = get_project_by_uuid(project_id, organization_id=org.uuid, session=session)
453 | 
454 |     if session:
455 |         document = session.exec(
456 |             select(Document).where(
457 |                 Document.project == project, Document.uuid == str(uuid)
458 |             )
459 |         ).first()
460 | 
461 |     else:
462 |         with Session(get_engine()) as session:
463 |             document = session.exec(
464 |                 select(Document).where(
465 |                     Document.project == project, Document.uuid == str(uuid)
466 |                 )
467 |             ).first()
468 | 
469 |     if not document and should_except is True:
470 |         raise HTTPException(
471 |             status_code=404, detail=f"Document identifier {uuid} not found"
472 |         )
473 | 
474 |     return document
475 | 
476 | 
477 | def get_document_by_hash(hash: str, session: Optional[Session] = None):
478 |     if session:
479 |         document = session.exec(select(Document).where(Document.hash == hash)).first()
480 |     else:
481 |         with Session(get_engine()) as session:
482 |             document = session.exec(
483 |                 select(Document).where(Document.hash == hash)
484 |             ).first()
485 | 
486 |     return document
487 | 
488 | 
489 | def get_document_by_name(
490 |     file_name: str,
491 |     project_id: Union[UUID, str],
492 |     organization_id: Union[UUID, str],
493 |     session: Optional[Session] = None,
494 | ):
495 |     org = (
496 |         get_org_by_uuid_or_namespace(organization_id, session=session)
497 |         if not isinstance(organization_id, Organization)
498 |         else organization_id
499 |     )
500 |     project = get_project_by_uuid(
501 |         project_id, organization_id=str(org.uuid), session=session
502 |     )
503 | 
504 |     if session:
505 |         return session.exec(
506 |             select(Document).where(
507 |                 Document.project == project,
508 |                 Document.display_name == file_name,
509 |                 Document.status == ENTITY_STATUS.ACTIVE.value,
510 |             )
511 |         ).first()
512 |     else:
513 |         with Session(get_engine()) as session:
514 |             return session.exec(
515 |                 select(Document).where(
516 |                     Document.project == project,
517 |                     Document.display_name == file_name,
518 |                     Document.status == ENTITY_STATUS.ACTIVE.value,
519 |                 )
520 |             ).first()
521 | 
522 | 
523 | # ---------------------
524 | # ChatSession functions
525 | # ---------------------
526 | def get_chat_session_by_uuid(
527 |     id: Union[UUID, str], session: Optional[Session] = None, should_except: bool = False
528 | ):
529 |     if session:
530 |         chat_session = (
531 |             ChatSession.by_uuid(str(id))
532 |             if is_uuid(id)
533 |             else session.exec(
534 |                 select(ChatSession).where(ChatSession.session_id == str(id))
535 |             ).first()
536 |         )
537 | 
538 |     else:
539 |         with Session(get_engine()) as session:
540 |             chat_session = (
541 |                 ChatSession.by_uuid(str(id))
542 |                 if is_uuid(id)
543 |                 else session.exec(
544 |                     select(ChatSession).where(ChatSession.session_id == str(id))
545 |                 ).first()
546 |             )
547 | 
548 |     if not chat_session and should_except is True:
549 |         raise HTTPException(
550 |             status_code=404, detail=f"ChatSession identifer {id} not found"
551 |         )
552 | 
553 |     return chat_session
554 | 
555 | 
556 | # -----------------
557 | # Project functions
558 | # -----------------
559 | 
560 | 
561 | def create_project_by_org(
562 |     project: Union[Project, ProjectCreate] = None,
563 |     organization_id: Union[Organization, str] = None,
564 |     display_name: str = None,
565 |     session: Optional[Session] = None,
566 | ):
567 |     organization = (
568 |         get_org_by_uuid_or_namespace(organization_id, session=session)
569 |         if not isinstance(organization_id, Organization)
570 |         else organization_id
571 |     )
572 | 
573 |     if isinstance(project, ProjectCreate):
574 |         db_project = Project.from_orm(project) if not project else project
575 |         db_project.organization_id = organization.id
576 | 
577 |         # Lets give a default name if not set
578 |         db_project.display_name = (
579 |             f"📁 Untitled Project #{len(organization.projects) + 1}"
580 |             if not display_name and not project
581 |             else display_name
582 |         )
583 | 
584 |         if session:
585 |             session.add(db_project)
586 |             session.commit()
587 |             session.refresh(db_project)
588 |         else:
589 |             with Session(get_engine()) as session:
590 |                 session.add(db_project)
591 |                 session.commit()
592 |                 session.refresh(db_project)
593 |     elif isinstance(project, Project):
594 |         db_project = project
595 |         db_project.update(
596 |             {
597 |                 "organization_id": organization.id,
598 |                 "display_name": f"📁 Untitled Project #{len(organization.projects) + 1}"
599 |                 if not display_name and not project
600 |                 else display_name,
601 |             }
602 |         )
603 |     else:
604 |         db_project = Project.create(
605 |             {
606 |                 "organization_id": organization.id,
607 |                 "display_name": f"📁 Untitled Project #{len(organization.projects) + 1}"
608 |                 if not display_name and not project
609 |                 else display_name,
610 |             }
611 |         )
612 | 
613 |     # -------------------------------
614 |     # Create project upload directory
615 |     # -------------------------------
616 |     project_dir = os.path.join(
617 |         FILE_UPLOAD_PATH, str(organization.uuid), str(db_project.uuid)
618 |     )
619 |     os.makedirs(project_dir, exist_ok=True)
620 | 
621 |     # Create project
622 |     return db_project
623 | 
624 | 
625 | def get_project_by_uuid(
626 |     uuid: Union[UUID, str] = None,
627 |     organization_id: Union[UUID, str] = None,
628 |     session: Optional[Session] = None,
629 |     should_except: bool = True,
630 | ):
631 |     if not is_uuid(uuid):
632 |         raise HTTPException(
633 |             status_code=422, detail=f"Invalid project identifier {uuid}"
634 |         )
635 | 
636 |     org = get_org_by_uuid_or_namespace(organization_id, session=session)
637 | 
638 |     if session:
639 |         project = session.exec(
640 |             select(Project).where(
641 |                 Project.organization == org, Project.uuid == str(uuid)
642 |             )
643 |         ).first()
644 | 
645 |     else:
646 |         with Session(get_engine()) as session:
647 |             project = session.exec(
648 |                 select(Project).where(
649 |                     Project.organization == org, Project.uuid == str(uuid)
650 |                 )
651 |             ).first()
652 | 
653 |     if not project and should_except is True:
654 |         raise HTTPException(
655 |             status_code=404, detail=f"Project identifier {uuid} not found"
656 |         )
657 | 
658 |     return project


--------------------------------------------------------------------------------
/app/api/llm.py:
--------------------------------------------------------------------------------
  1 | import random
  2 | import openai
  3 | import json
  4 | 
  5 | from langchain.docstore.document import Document as LangChainDocument
  6 | from langchain.embeddings.openai import OpenAIEmbeddings
  7 | from fastapi import HTTPException
  8 | from uuid import UUID, uuid4
  9 | from langchain.text_splitter import (
 10 |     CharacterTextSplitter,
 11 |     MarkdownTextSplitter
 12 | )
 13 | from sqlmodel import (
 14 |     Session,
 15 |     text
 16 | )
 17 | from util import (
 18 |     sanitize_input,
 19 |     sanitize_output
 20 | )
 21 | from langchain import OpenAI
 22 | from typing import (
 23 |     List,
 24 |     Union,
 25 |     Optional,
 26 |     Dict,
 27 |     Tuple,
 28 |     Any
 29 | )
 30 | from helpers import (
 31 |     get_user_by_uuid_or_identifier,
 32 |     get_chat_session_by_uuid
 33 | )
 34 | from models import (
 35 |     User,
 36 |     Organization,
 37 |     Project,
 38 |     Node,
 39 |     ChatSession,
 40 |     ChatSessionResponse,
 41 |     get_engine
 42 | )
 43 | from config import (
 44 |     CHANNEL_TYPE,
 45 |     DOCUMENT_TYPE,
 46 |     LLM_MODELS,
 47 |     LLM_DISTANCE_THRESHOLD,
 48 |     LLM_DEFAULT_TEMPERATURE,
 49 |     LLM_MAX_OUTPUT_TOKENS,
 50 |     LLM_CHUNK_SIZE,
 51 |     LLM_CHUNK_OVERLAP,
 52 |     LLM_MIN_NODE_LIMIT,
 53 |     LLM_DEFAULT_DISTANCE_STRATEGY,
 54 |     VECTOR_EMBEDDINGS_COUNT,
 55 |     DISTANCE_STRATEGY,
 56 |     AGENT_NAMES,
 57 |     logger
 58 | )
 59 | 
 60 | 
 61 | # -------------
 62 | # Query the LLM
 63 | # -------------
 64 | def chat_query(
 65 |     query_str: str,
 66 |     session_id: Optional[Union[str, UUID]] = None,
 67 |     meta: Optional[Dict[str, Any]] = {},
 68 |     channel: Optional[CHANNEL_TYPE] = None,
 69 |     identifier: Optional[str] = None,
 70 |     project: Optional[Project] = None,
 71 |     organization: Optional[Organization] = None,
 72 |     session: Optional[Session] = None,
 73 |     user_data: Optional[Dict[str, Any]] = None,
 74 |     distance_strategy: Optional[DISTANCE_STRATEGY] = DISTANCE_STRATEGY.EUCLIDEAN,
 75 |     distance_threshold: Optional[float] = LLM_DISTANCE_THRESHOLD,
 76 |     node_limit: Optional[int] = LLM_MIN_NODE_LIMIT,
 77 |     model: Optional[LLM_MODELS] = LLM_MODELS.GPT_35_TURBO,
 78 |     max_output_tokens: Optional[int] = LLM_MAX_OUTPUT_TOKENS,
 79 | ) -> ChatSessionResponse:
 80 |     """
 81 |     Steps:
 82 |         1. ✅ Clean user input
 83 |         2. ✅ Create input embeddings
 84 |         3. ✅ Search for similar nodes
 85 |         4. ✅ Create prompt template w/ similar nodes
 86 |         5. ✅ Submit prompt template to LLM
 87 |         6. ✅ Get response from LLM
 88 |         7. Create ChatSession
 89 |             - Store embeddings
 90 |             - Store tags
 91 |             - Store is_escalate
 92 |         8. Return response
 93 |     """
 94 |     meta = {}
 95 |     agent_name = None
 96 |     embeddings = []
 97 |     tags = []
 98 |     is_escalate = False
 99 |     response_message = None
100 |     prompt = None
101 |     context_str = None
102 |     MODEL_TOKEN_LIMIT = (
103 |         model.token_limit if isinstance(model, OpenAI) else LLM_MAX_OUTPUT_TOKENS
104 |     )
105 | 
106 |     # ---------------------------------------------
107 |     # Generate a new session ID if none is provided
108 |     # ---------------------------------------------
109 |     prev_chat_session = (
110 |         get_chat_session_by_uuid(session_id=session_id, session=session)
111 |         if session_id
112 |         else None
113 |     )
114 | 
115 |     # If we were given an invalid session_id
116 |     if session_id and not prev_chat_session:
117 |         return HTTPException(
118 |             status_code=404, detail=f"Chat session with ID {session_id} not found."
119 |         )
120 |     # If we were given a valid session_id
121 |     elif session_id and prev_chat_session and prev_chat_session.meta.get("agent"):
122 |         agent_name = prev_chat_session.meta["agent"]
123 |     # If this is a new session, generate a new ID
124 |     else:
125 |         session_id = str(uuid4())
126 | 
127 |     meta["agent"] = agent_name if agent_name else random.choice(AGENT_NAMES)
128 | 
129 |     # ----------------
130 |     # Clean user input
131 |     # ----------------
132 |     query_str = sanitize_input(query_str)
133 |     logger.debug(f"💬 Query received: {query_str}")
134 | 
135 |     # ----------------
136 |     # Get token counts
137 |     # ----------------
138 |     query_token_count = get_token_count(query_str)
139 |     prompt_token_count = 0
140 | 
141 |     # -----------------------
142 |     # Create input embeddings
143 |     # -----------------------
144 |     arr_query, embeddings = get_embeddings(query_str)
145 | 
146 |     query_embeddings = embeddings[0]
147 | 
148 |     # ------------------------
149 |     # Search for similar nodes
150 |     # ------------------------
151 |     nodes = get_nodes_by_embedding(
152 |         query_embeddings,
153 |         node_limit,
154 |         distance_strategy=distance_strategy
155 |         if isinstance(distance_strategy, DISTANCE_STRATEGY)
156 |         else LLM_DEFAULT_DISTANCE_STRATEGY,
157 |         distance_threshold=distance_threshold,
158 |         session=session,
159 |     )
160 | 
161 |     if len(nodes) > 0:
162 |         if (not project or not organization) and session:
163 |             # get document from Node via session object:
164 |             document = session.get(Node, nodes[0].id).document
165 |             project = document.project
166 |             organization = project.organization
167 | 
168 |         # ----------------------
169 |         # Create prompt template
170 |         # ----------------------
171 | 
172 |         # concatenate all nodes into a single string
173 |         context_str = "\n\n".join([node.text for node in nodes])
174 | 
175 |         # -------------------------------------------
176 |         # Let's make sure we don't exceed token limit
177 |         # -------------------------------------------
178 |         context_token_count = get_token_count(context_str)
179 | 
180 |         # ----------------------------------------------
181 |         # if token count exceeds limit, truncate context
182 |         # ----------------------------------------------
183 |         if (
184 |             context_token_count + query_token_count + prompt_token_count
185 |         ) > MODEL_TOKEN_LIMIT:
186 |             logger.debug("🚧 Exceeded token limit, truncating context")
187 |             token_delta = MODEL_TOKEN_LIMIT - (query_token_count + prompt_token_count)
188 |             context_str = context_str[:token_delta]
189 | 
190 |         # create prompt template
191 |         system_prompt, user_prompt = get_prompt_template(
192 |             user_query=query_str,
193 |             context_str=context_str,
194 |             project=project,
195 |             organization=organization,
196 |             agent=agent_name,
197 |         )
198 | 
199 |         prompt_token_count = get_token_count(prompt)
200 |         token_count = context_token_count + query_token_count + prompt_token_count
201 | 
202 |         # ---------------------------
203 |         # Get response from LLM model
204 |         # ---------------------------
205 |         # It should return a JSON dict
206 |         llm_response = json.loads(
207 |             retrieve_llm_response(
208 |                 user_prompt,
209 |                 model=model,
210 |                 max_output_tokens=max_output_tokens,
211 |                 prefix_messages=system_prompt,
212 |             )
213 |         )
214 |         tags = llm_response.get("tags", [])
215 |         is_escalate = llm_response.get("is_escalate", False)
216 |         response_message = llm_response.get("message", None)
217 |     else:
218 |         logger.info("🚫📝 No similar nodes found, returning default response")
219 | 
220 |     # ----------------
221 |     # Get user details
222 |     # ----------------
223 |     user = get_user_by_uuid_or_identifier(
224 |         identifier, session=session, should_except=False
225 |     )
226 | 
227 |     if not user:
228 |         logger.debug("🚫👤 User not found, creating new user")
229 |         user_params = {
230 |             "identifier": identifier,
231 |             "identifier_type": channel.value
232 |             if isinstance(channel, CHANNEL_TYPE)
233 |             else channel,
234 |         }
235 |         if user_data:
236 |             user_params = {**user_params, **user_data}
237 | 
238 |         user = User.create(user_params)
239 |     else:
240 |         logger.debug(f"👤 User found: {user}")
241 | 
242 |     # -----------------------------------
243 |     # Calculate input and response tokens
244 |     # -----------------------------------
245 |     token_count = get_token_count(prompt) + get_token_count(response_message)
246 | 
247 |     # ---------------
248 |     # Add to meta tag
249 |     # ---------------
250 |     if tags:
251 |         meta["tags"] = tags
252 | 
253 |     meta["is_escalate"] = is_escalate
254 | 
255 |     if session_id:
256 |         meta["session_id"] = session_id
257 | 
258 |     chat_session = ChatSession(
259 |         user_id=user.id,
260 |         session_id=session_id,
261 |         project_id=project.id if project else None,
262 |         channel=channel.value if isinstance(channel, CHANNEL_TYPE) else channel,
263 |         user_message=query_str,
264 |         embeddings=query_embeddings,
265 |         token_count=token_count if token_count > 0 else None,
266 |         response=response_message,
267 |         meta=meta,
268 |     )
269 | 
270 |     if session:
271 |         session.add(chat_session)
272 |         session.commit()
273 |         session.refresh(chat_session)
274 | 
275 |     else:
276 |         with Session(get_engine()) as session:
277 |             session.add(chat_session)
278 |             session.commit()
279 |             session.refresh(chat_session)
280 | 
281 |     return chat_session
282 | 
283 | 
284 | # ------------------------------
285 | # Retrieve a random agent's name
286 | # ------------------------------
287 | def get_random_agent():
288 |     return random.choice(AGENT_NAMES)
289 | 
290 | 
291 | # ------------------------
292 | # Retrieve prompt template
293 | # ------------------------
294 | def get_prompt_template(
295 |     user_query: str = None,
296 |     context_str: str = None,
297 |     project: Optional[Project] = None,
298 |     organization: Optional[Organization] = None,
299 |     agent: str = None,
300 | ) -> str:
301 |     agent = f"{agent}, " if agent else ""
302 |     user_query = user_query if user_query else ""
303 |     context_str = context_str if context_str else ""
304 |     organization = (
305 |         project.organization.display_name
306 |         if project
307 |         else organization.display_name
308 |         if organization
309 |         else None
310 |     )
311 | 
312 |     if not context_str or not user_query:
313 |         raise ValueError(
314 |             "Missing required arguments context_str, user_query, organization, agent"
315 |         )
316 | 
317 |     system_prompt = [
318 |         {
319 |             "role": "system",
320 |             "content": f"""[AGENT]:
321 | I am {agent} a very kind and enthusiastic customer support agent who loves to help customers. I am working on the behalf of "{organization}"
322 | 
323 | Given the following document from "{organization}", I will answer the [USER] questions using only the [DOCUMENT] and following the [RULES].
324 | 
325 | [DOCUMENT]:
326 | {context_str}
327 | 
328 | [RULES]:
329 | I will answer the user's questions using only the [DOCUMENT] provided. I will abide by the following rules:
330 | - I am a kind and helpful human, the best customer support agent in existence
331 | - I never lie or invent answers not explicitly provided in [DOCUMENT]
332 | - If I am unsure of the answer response or the answer is not explicitly contained in [DOCUMENT], I will say: "I apologize, I'm not sure how to help with that".
333 | - I always keep my answers short, relevant and concise.
334 | - I will always respond in JSON format with the following keys: "message" my response to the user, "tags" an array of short labels categorizing user input, "is_escalate" a boolean, returning false if I am unsure and true if I do have a relevant answer
335 | """,
336 |         }
337 |     ]
338 | 
339 |     return (system_prompt, f"[USER]:\n{user_query}")
340 | 
341 | 
342 | # ----------------------------
343 | # Get the count of tokens used
344 | # ----------------------------
345 | # https://github.com/openai/openai-cookbook/blob/main/examples/How_to_count_tokens_with_tiktoken.ipynb
346 | def get_token_count(text: str):
347 |     if not text:
348 |         return 0
349 | 
350 |     return OpenAI().get_num_tokens(text=text)
351 | 
352 | 
353 | # --------------------------------------------
354 | # Query embedding search for similar documents
355 | # --------------------------------------------
356 | def get_nodes_by_embedding(
357 |     embeddings: List[float],
358 |     k: int = LLM_MIN_NODE_LIMIT,
359 |     distance_strategy: Optional[DISTANCE_STRATEGY] = LLM_DEFAULT_DISTANCE_STRATEGY,
360 |     distance_threshold: Optional[float] = LLM_DISTANCE_THRESHOLD,
361 |     session: Optional[Session] = None,
362 | ) -> List[Node]:
363 |     # Convert embeddings array into sql string
364 |     embeddings_str = str(embeddings)
365 | 
366 |     if distance_strategy == DISTANCE_STRATEGY.EUCLIDEAN:
367 |         distance_fn = "match_node_euclidean"
368 |     elif distance_strategy == DISTANCE_STRATEGY.COSINE:
369 |         distance_fn = "match_node_cosine"
370 |     elif distance_strategy == DISTANCE_STRATEGY.MAX_INNER_PRODUCT:
371 |         distance_fn = "match_node_max_inner_product"
372 |     else:
373 |         raise Exception(f"Invalid distance strategy {distance_strategy}")
374 | 
375 |     # ---------------------------
376 |     # Lets do a similarity search
377 |     # ---------------------------
378 |     sql = f"""SELECT * FROM {distance_fn}(
379 |     '{embeddings_str}'::vector({VECTOR_EMBEDDINGS_COUNT}),
380 |     {float(distance_threshold)}::double precision,
381 |     {int(k)});"""
382 | 
383 |     # logger.debug(f'🔍 Query: {sql}')
384 | 
385 |     # Execute query, convert results to Node objects
386 |     if not session:
387 |         with Session(get_engine()) as session:
388 |             nodes = session.exec(text(sql)).all()
389 |     else:
390 |         nodes = session.exec(text(sql)).all()
391 | 
392 |     return [Node.by_uuid(str(node[0])) for node in nodes] if nodes else []
393 | 
394 | 
395 | # --------------
396 | # Queries OpenAI
397 | # --------------
398 | def retrieve_llm_response(
399 |     query_str: str,
400 |     model: Optional[LLM_MODELS] = LLM_MODELS.GPT_35_TURBO,
401 |     temperature: Optional[float] = LLM_DEFAULT_TEMPERATURE,
402 |     max_output_tokens: Optional[int] = LLM_MAX_OUTPUT_TOKENS,
403 |     prefix_messages: Optional[List[dict]] = None,
404 | ):
405 |     llm = OpenAI(
406 |         temperature=temperature,
407 |         model_name=model.model_name
408 |         if isinstance(model, LLM_MODELS)
409 |         else LLM_MODELS.GPT_35_TURBO.model_name,
410 |         max_tokens=max_output_tokens,
411 |         prefix_messages=prefix_messages,
412 |     )
413 |     try:
414 |         result = llm(prompt=query_str)
415 |     except openai.error.InvalidRequestError as e:
416 |         logger.error(f"🚨 LLM error: {e}")
417 |         raise HTTPException(status_code=500, detail=f"LLM error: {e}")
418 |     logger.debug(f"💬 LLM result: {str(result)}")
419 |     return sanitize_output(result)
420 | 
421 | 
422 | # --------------------------
423 | # Create document embeddings
424 | # --------------------------
425 | def get_embeddings(
426 |     document_data: str,
427 |     document_type: DOCUMENT_TYPE = DOCUMENT_TYPE.PLAINTEXT,
428 | ) -> Tuple[List[str], List[float]]:
429 |     documents = [LangChainDocument(page_content=document_data)]
430 | 
431 |     logger.debug(documents)
432 |     if document_type == DOCUMENT_TYPE.MARKDOWN:
433 |         doc_splitter = MarkdownTextSplitter(
434 |             chunk_size=LLM_CHUNK_SIZE, chunk_overlap=LLM_CHUNK_OVERLAP
435 |         )
436 |     else:
437 |         doc_splitter = CharacterTextSplitter(
438 |             chunk_size=LLM_CHUNK_SIZE, chunk_overlap=LLM_CHUNK_OVERLAP
439 |         )
440 | 
441 |     # Returns an array of Documents
442 |     split_documents = doc_splitter.split_documents(documents)
443 | 
444 |     # Lets convert them into an array of strings for OpenAI
445 |     arr_documents = [doc.page_content for doc in split_documents]
446 | 
447 |     # https://github.com/hwchase17/langchain/blob/d18b0caf0e00414e066c9903c8df72bb5bcf9998/langchain/embeddings/openai.py#L219
448 |     embed_func = OpenAIEmbeddings()
449 | 
450 |     embeddings = embed_func.embed_documents(
451 |         texts=arr_documents, chunk_size=LLM_CHUNK_SIZE
452 |     )
453 | 
454 |     return arr_documents, embeddings


--------------------------------------------------------------------------------
/app/api/main.py:
--------------------------------------------------------------------------------
  1 | from fastapi import (
  2 |     FastAPI,
  3 |     File,
  4 |     Depends,
  5 |     HTTPException,
  6 |     UploadFile
  7 | )
  8 | from fastapi.openapi.utils import get_openapi
  9 | from fastapi.staticfiles import StaticFiles
 10 | from sqlmodel import Session, select
 11 | 
 12 | from typing import (
 13 |     List,
 14 |     Optional,
 15 |     Union,
 16 |     Any
 17 | )
 18 | from datetime import datetime
 19 | import requests
 20 | import aiohttp
 21 | import time
 22 | import json
 23 | import os
 24 | 
 25 | # -----------
 26 | # LLM imports
 27 | # -----------
 28 | from llm import (
 29 |     chat_query
 30 | )
 31 | 
 32 | # ----------------
 33 | # Database imports
 34 | # ----------------
 35 | from models import (
 36 |     # ---------------
 37 |     # Database Models
 38 |     # ---------------
 39 |     Organization,
 40 |     OrganizationCreate,
 41 |     OrganizationRead,
 42 |     OrganizationUpdate,
 43 |     User,
 44 |     UserCreate,
 45 |     UserRead,
 46 |     UserReadList,
 47 |     UserUpdate,
 48 |     DocumentRead,
 49 |     DocumentReadList,
 50 |     ProjectCreate,
 51 |     ProjectRead,
 52 |     ProjectReadList,
 53 |     ChatSessionResponse,
 54 |     ChatSessionCreatePost,
 55 |     WebhookCreate,
 56 |     # ------------------
 57 |     # Database functions
 58 |     # ------------------
 59 |     get_engine,
 60 |     get_session
 61 | 
 62 | )
 63 | from helpers import (
 64 |     # ----------------
 65 |     # Helper functions
 66 |     # ----------------
 67 |     get_org_by_uuid_or_namespace,
 68 |     get_project_by_uuid,
 69 |     get_user_by_uuid_or_identifier,
 70 |     get_users,
 71 |     get_documents_by_project_and_org,
 72 |     get_document_by_uuid,
 73 |     create_org_by_org_or_uuid,
 74 |     create_project_by_org
 75 | )
 76 | from util import (
 77 |     save_file,
 78 |     get_sha256,
 79 |     is_uuid,
 80 |     logger
 81 | )
 82 | # -----------
 83 | # LLM imports
 84 | # -----------
 85 | from config import (
 86 |     APP_NAME,
 87 |     APP_VERSION,
 88 |     APP_DESCRIPTION,
 89 |     ENTITY_STATUS,
 90 |     CHANNEL_TYPE,
 91 |     LLM_MODELS,
 92 |     LLM_DISTANCE_THRESHOLD,
 93 |     LLM_DEFAULT_DISTANCE_STRATEGY,
 94 |     LLM_MAX_OUTPUT_TOKENS,
 95 |     LLM_MIN_NODE_LIMIT,
 96 |     FILE_UPLOAD_PATH,
 97 |     RASA_WEBHOOK_URL
 98 | )
 99 | 
100 | 
101 | # ------------------
102 | # Mount static files
103 | # ------------------
104 | 
105 | 
106 | app = FastAPI()
107 | 
108 | app.mount("/static", StaticFiles(directory="static"), name="static")
109 | 
110 | # ---------------------
111 | # Health check endpoint
112 | # ---------------------
113 | @app.get("/health", include_in_schema=False)
114 | def health_check():
115 |     return {'status': 'ok'}
116 | 
117 | 
118 | # ======================
119 | # ORGANIZATION ENDPOINTS
120 | # ======================
121 | 
122 | # ---------------------
123 | # Get all organizations
124 | # ---------------------
125 | @app.get("/org", response_model=List[OrganizationRead])
126 | def read_organizations():
127 |     '''
128 |     ## Get all active organizations
129 | 
130 |     Returns:
131 |         List[OrganizationRead]: List of organizations
132 | 
133 |     '''
134 |     with Session(get_engine()) as session:
135 |         orgs = session.exec(select(Organization).where(Organization.status == ENTITY_STATUS.ACTIVE.value)).all()
136 |         return orgs
137 | 
138 | 
139 | # ----------------------
140 | # Create an organization
141 | # ----------------------
142 | @app.post("/org", response_model=Union[OrganizationRead, Any])
143 | def create_organization(
144 |     *,
145 |     session: Session = Depends(get_session),
146 |     organization: Optional[OrganizationCreate] = None,
147 |     namespace: Optional[str] = None,
148 |     display_name: Optional[str] = None
149 | ):
150 |     '''
151 | 
152 |     ### Creates a new organization
153 |     ### <u>Args:</u>
154 |     - **namespace**: Unique namespace for the organization (ex. openai)
155 |     - **name**: Name of the organization (ex. OpenAI)
156 |     - **bot_url**: URL of the bot (ex. https://t.me/your_bot)
157 | 
158 |     ### <u>Returns:</u>
159 |     - OrganizationRead
160 |     ---
161 |     <details><summary>👇 💻 Code examples:</summary>
162 |     ### 🖥️ Curl
163 |     ```bash
164 |     curl -X POST "http://localhost:8888/org" -H "accept: application/json" -H "Content-Type: application/json" -d '{\"namespace\":\"openai\",\"name\":\"OpenAI\",\"bot_url\":\"https://t.me/your_bot\"}'
165 |     ```
166 |     <br/>
167 |     ### 🐍 Python
168 |     ```python
169 |     import requests
170 |     response = requests.post("http://localhost:8888/org", json={"namespace":"openai","name":"OpenAI","bot_url":"https://t.me/your_bot"})
171 |     print(response.json())
172 |     ```
173 |     </details>
174 |     '''
175 |     # Create organization
176 |     return create_org_by_org_or_uuid(
177 |         organization=organization,
178 |         namespace=namespace,
179 |         display_name=display_name, session=session
180 |     )
181 | 
182 | 
183 | # ---------------------------
184 | # Get an organization by UUID
185 | # ---------------------------
186 | @app.get("/org/{organization_id}", response_model=Union[OrganizationRead, Any])
187 | def read_organization(
188 |     *,
189 |     session: Session = Depends(get_session),
190 |     organization_id: str
191 | ):
192 | 
193 |     organization = get_org_by_uuid_or_namespace(organization_id, session=session)
194 | 
195 |     return organization
196 | 
197 | 
198 | # ------------------------------
199 | # Update an organization by UUID
200 | # ------------------------------
201 | @app.put("/org/{organization_id}", response_model=Union[OrganizationRead, Any])
202 | def update_organization(
203 |     *,
204 |     session: Session = Depends(get_session),
205 |     organization_id: str,
206 |     organization: OrganizationUpdate
207 | ):
208 | 
209 |     org = get_org_by_uuid_or_namespace(organization_id, session=session)
210 | 
211 |     org.update(organization.dict(exclude_unset=True))
212 |     return org
213 | 
214 | 
215 | # =================
216 | # Project endpoints
217 | # =================
218 | 
219 | # -----------------------
220 | # Get all projects by org
221 | # -----------------------
222 | @app.get("/project", response_model=List[ProjectReadList])
223 | def read_projects(
224 |     *,
225 |     session: Session = Depends(get_session),
226 |     organization_id: str
227 | ):
228 | 
229 |     organization = get_org_by_uuid_or_namespace(organization_id, session=session)
230 | 
231 |     if not organization.projects:
232 |         raise HTTPException(status_code=404, detail='No projects found for organization')
233 | 
234 |     return organization.projects
235 | 
236 | 
237 | # -----------------------
238 | # Create a project by org
239 | # -----------------------
240 | @app.post("/project", response_model=Union[ProjectRead, Any])
241 | def create_project(
242 |     *,
243 |     session: Session = Depends(get_session),
244 |     organization_id: str,
245 |     project: ProjectCreate
246 | ):
247 |     return create_project_by_org(
248 |         organization_id=organization_id,
249 |         project=project,
250 |         session=session
251 |     )
252 | 
253 | 
254 | # -----------------------------
255 | # Get a project by UUID and org
256 | # -----------------------------
257 | @app.get("/project/{project_id}", response_model=Union[ProjectRead, Any])
258 | def read_project(
259 |     *,
260 |     session: Session = Depends(get_session),
261 |     organization_id: str,
262 |     project_id: str
263 | ):
264 | 
265 |     return get_project_by_uuid(uuid=project_id, organization_id=organization_id, session=session)
266 | 
267 | 
268 | # ==================
269 | # DOCUMENT ENDPOINTS
270 | # ==================
271 | 
272 | # ---------------
273 | # Upload document
274 | # ---------------
275 | @app.post("/document", response_model=Union[DocumentReadList, Any])
276 | async def upload_document(
277 |     *,
278 |     session: Session = Depends(get_session),
279 |     organization_id: str,
280 |     project_id: str,
281 |     url: Optional[str] = None,
282 |     file: Optional[UploadFile] = File(...),
283 |     overwrite: Optional[bool] = True
284 | ):
285 |     organization = get_org_by_uuid_or_namespace(organization_id, session=session)
286 |     project = get_project_by_uuid(uuid=project_id, organization_id=organization_id, session=session)
287 |     file_root_path = os.path.join(FILE_UPLOAD_PATH, str(organization.uuid), str(project.uuid))
288 | 
289 |     file_version = 1
290 | 
291 |     # ------------------------
292 |     # Enforce XOR for url/file
293 |     # ------------------------
294 |     if url and file:
295 |         raise HTTPException(status_code=400, detail='You can only upload a file OR provide a URL, not both')
296 | 
297 |     # --------------------
298 |     # Upload file from URL
299 |     # --------------------
300 |     if url:
301 |         file_name = url.split('/')[-1]
302 |         file_upload_path = os.path.join(file_root_path, file_name)
303 |         file_exists = os.path.isfile(file_upload_path)
304 | 
305 |         if file_exists:
306 |             file_name = f'{file_name}_{int(time.time())}'
307 |             file_upload_path = os.path.join(file_root_path, file_name)
308 | 
309 |         async with aiohttp.ClientSession() as session:
310 |             async with session.get(url) as resp:
311 |                 if resp.status != 200:
312 |                     raise HTTPException(status_code=400, detail=f'Could not download file from {url}')
313 | 
314 |                 with open(file_upload_path, 'wb') as f:
315 |                     while True:
316 |                         chunk = await resp.content.read(1024)
317 |                         if not chunk:
318 |                             break
319 |                         f.write(chunk)
320 | 
321 |         file_contents = open(file_upload_path, 'rb').read()
322 |         file_hash = get_sha256(contents=file_contents)
323 | 
324 |     # -----------------------
325 |     # Upload file from device
326 |     # -----------------------
327 |     else:
328 |         file_name = file.filename
329 |         file_upload_path = os.path.join(file_root_path, file_name)
330 |         file_exists = os.path.isfile(file_upload_path)
331 | 
332 |         if file_exists:
333 |             file_name = f'{file_name}_{int(time.time())}'
334 |             file_upload_path = os.path.join(file_root_path, file_name)
335 | 
336 |         file_contents = await file.read()
337 |         file_hash = get_sha256(contents=file_contents)
338 |         await save_file(file, file_upload_path)
339 | 
340 |     document_obj = create_document_by_file_path(
341 |         organization=organization,
342 |         project=project,
343 |         file_path=file_upload_path,
344 |         file_hash=file_hash,
345 |         file_version=file_version,
346 |         url=url,
347 |         overwrite=overwrite,
348 |         session=session
349 |     )
350 |     return document_obj
351 | 
352 | 
353 | # --------------------------------
354 | # List all documents for a project
355 | # --------------------------------
356 | @app.get("/document", response_model=List[DocumentReadList])
357 | def read_documents(
358 |     *,
359 |     session: Session = Depends(get_session),
360 |     organization_id: str,
361 |     project_id: str
362 | ):
363 |     return get_documents_by_project_and_org(project_id=project_id, organization_id=organization_id, session=session)
364 | 
365 | # ----------------------
366 | # Get a document by UUID
367 | # ----------------------
368 | @app.get("/document/{document_id}", response_model=DocumentRead)
369 | def read_document(
370 |     *,
371 |     session: Session = Depends(get_session),
372 |     organization_id: str,
373 |     project_id: str,
374 |     document_id: str
375 | ):
376 |     return get_document_by_uuid(uuid=document_id, project_id=project_id, organization_id=organization_id, session=session)
377 | 
378 | 
379 | # ==============
380 | # USER ENDPOINTS
381 | # ==============
382 | 
383 | # -------------
384 | # Get all users
385 | # -------------
386 | @app.get("/user", response_model=List[UserReadList])
387 | def read_users(
388 |     *,
389 |     session: Session = Depends(get_session),
390 | ):
391 |     return get_users(session=session)
392 | 
393 | 
394 | # -------------
395 | # Create a user
396 | # -------------
397 | @app.post("/user", response_model=UserRead)
398 | def create_user(
399 |     *,
400 |     session: Session = Depends(get_session),
401 |     user: UserCreate
402 | ):
403 | 
404 |     return create_user(
405 |         user=user,
406 |         session=session
407 |     )
408 | 
409 | 
410 | # ------------------
411 | # Get a user by UUID
412 | # ------------------
413 | @app.get("/user/{user_uuid}", response_model=UserRead)
414 | def read_user(
415 |     *,
416 |     session: Session = Depends(get_session),
417 |     user_id: str
418 | ):
419 | 
420 |     return get_user_by_uuid_or_identifier(id=user_id, session=session)
421 | 
422 | 
423 | # ---------------------
424 | # Update a user by UUID
425 | # ---------------------
426 | @app.put("/user/{user_uuid}", response_model=UserRead)
427 | def update_user(*, user_uuid: str, user: UserUpdate):
428 | 
429 |     # Get user by UUID
430 |     user = User.get(uuid=user_uuid)
431 | 
432 |     # If user exists, update it
433 |     if user:
434 |         user.update(**user.dict())
435 |         return user
436 | 
437 |     # If user doesn't exist, return 404
438 |     else:
439 |         raise HTTPException(status_code=404, detail=f'User {user_uuid} not found!')
440 | 
441 | 
442 | # =============
443 | # LLM ENDPOINTS
444 | # =============
445 | 
446 | 
447 | def process_webhook_telegram(webhook_data: dict):
448 |     """
449 |     Telegram example response:
450 |     {
451 |         "update_id": 248146407,
452 |         "message": {
453 |             "message_id": 299,
454 |             "from": {
455 |                 "id": 123456789,
456 |                 "is_bot": false,
457 |                 "first_name": "Elon",
458 |                 "username": "elonmusk",
459 |                 "language_code": "en"
460 |             },
461 |             "chat": {
462 |                 "id": 123456789,
463 |                 "first_name": "Elon",
464 |                 "username": "elonmusk",
465 |                 "type": "private"
466 |             },
467 |             "date": 1683115867,
468 |             "text": "Tell me about the company?"
469 |         }
470 |     }
471 |     """
472 |     message = webhook_data.get('message', None)
473 |     chat = message.get('chat', None)
474 |     message_from = message.get('from', None)
475 |     return {
476 |         'update_id': webhook_data.get('update_id', None),
477 |         'message_id': message.get('message_id', None),
478 |         'user_id': message_from.get('id', None),
479 |         'username': message_from.get('username', None),
480 |         'user_language': message_from.get('language_code', None),
481 |         'user_firstname': chat.get('first_name', None),
482 |         'user_message': message.get('text', None),
483 |         'message_ts': datetime.fromtimestamp(message.get('date', None)) if message.get('date', None) else None,
484 |         'message_type': chat.get('type', None)
485 |     }
486 | 
487 | 
488 | @app.post("/webhooks/{channel}/webhook")
489 | def get_webhook(
490 |     *,
491 |     session: Session = Depends(get_session),
492 |     channel: str,
493 |     webhook: WebhookCreate
494 | ):
495 |     webhook_data = webhook.dict()
496 | 
497 |     # --------------------
498 |     # Get webhook metadata
499 |     # --------------------
500 |     if channel == 'telegram':
501 |         rasa_webhook_url = f'{RASA_WEBHOOK_URL}/webhooks/{channel}/webhook'
502 |         data = process_webhook_telegram(webhook_data)
503 |         channel = CHANNEL_TYPE.TELEGRAM.value
504 |         user_data = {
505 |             'identifier': data['user_id'],
506 |             'identifier_type': channel,
507 |             'first_name': data['user_firstname'],
508 |             'language': data['user_language']
509 |         }
510 |         session_metadata = {
511 |             'update_id': data['update_id'],
512 |             'username': data['username'],
513 |             'message_id': data['user_message'],
514 |             'msg_ts': data['message_ts'],
515 |             'msg_type': data['message_type'],
516 |         }
517 |         user_message = data['user_message']
518 |     else:
519 |         # Not a valid channel, return 404
520 |         raise HTTPException(status_code=404, detail=f'Channel {channel} not a valid webhook channel!')
521 | 
522 |     chat_session = chat_query(
523 |         user_message,
524 |         session=session,
525 |         channel=channel,
526 |         identifier=user_data['identifier'],
527 |         user_data=user_data,
528 |         meta=session_metadata
529 |     )
530 | 
531 |     meta = chat_session.meta
532 | 
533 |     # -----------------------------------------
534 |     # Lets add the LLM response to the metadata
535 |     # -----------------------------------------
536 |     webhook_data['message']['meta'] = {
537 |         'response': chat_session.response if chat_session.response else None,
538 |         'tags': meta['tags'] if 'tags' in meta else None,
539 |         'is_escalate': meta['is_escalate'] if 'is_escalate' in meta else False,
540 |         'session_id': meta['session_id'] if 'session_id' in meta else None
541 | 
542 |     }
543 | 
544 |     # -----------------------------------
545 |     # Forward the webhook to Rasa webhook
546 |     # -----------------------------------
547 |     res = requests.post(rasa_webhook_url, data=json.dumps(webhook_data))
548 |     logger.debug(f'[🤖 RasaGPT API webhook]\nPosting data: {json.dumps(webhook_data)}\n\n[🤖 RasaGPT API webhook]\nRasa webhook response: {res.text}')
549 | 
550 |     return {'status': 'ok'}
551 | 
552 | 
553 | # ------------------
554 | # Customize API docs
555 | # ------------------
556 | _schema = get_openapi(
557 |     title=APP_NAME,
558 |     description=APP_DESCRIPTION,
559 |     version=APP_VERSION,
560 |     routes=app.routes,
561 | )
562 | _schema['info']['x-logo'] = {
563 |     'url': '/static/img/rasagpt-logo-1.png'
564 | }
565 | app.openapi_schema = _schema


--------------------------------------------------------------------------------
/app/api/models.py:
--------------------------------------------------------------------------------
  1 | from sqlalchemy.dialects.postgresql import JSONB
  2 | from sqlalchemy.orm import declared_attr
  3 | from pgvector.sqlalchemy import Vector
  4 | from sqlalchemy import Column
  5 | from datetime import datetime
  6 | from util import snake_case
  7 | import uuid as uuid_pkg
  8 | 
  9 | from sqlmodel import (
 10 |     UniqueConstraint,
 11 |     create_engine,
 12 |     Relationship,
 13 |     SQLModel,
 14 |     Session,
 15 |     select,
 16 |     Field,
 17 | )
 18 | from typing import (
 19 |     Optional,
 20 |     Union,
 21 |     List,
 22 |     Dict,
 23 |     Any
 24 | )
 25 | from config import (
 26 |     LLM_DEFAULT_DISTANCE_STRATEGY,
 27 |     VECTOR_EMBEDDINGS_COUNT,
 28 |     LLM_MAX_OUTPUT_TOKENS,
 29 |     DISTANCE_STRATEGIES,
 30 |     LLM_MIN_NODE_LIMIT,
 31 |     PGVECTOR_ADD_INDEX,
 32 |     ENTITY_STATUS,
 33 |     CHANNEL_TYPE,
 34 |     LLM_MODELS,
 35 |     DB_USER,
 36 |     SU_DSN,
 37 |     logger,
 38 | )
 39 | 
 40 | 
 41 | # ==========
 42 | # Base model
 43 | # ==========
 44 | class BaseModel(SQLModel):
 45 |     @declared_attr
 46 |     def __tablename__(cls) -> str:
 47 |         return snake_case(cls.__name__)
 48 | 
 49 |     @classmethod
 50 |     def by_uuid(self, _uuid: uuid_pkg.UUID):
 51 |         with Session(get_engine()) as session:
 52 |             q = select(self).where(self.uuid == _uuid)
 53 |             org = session.exec(q).first()
 54 |             return org if org else None
 55 | 
 56 |     def update(self, o: Union[SQLModel, dict] = None):
 57 |         if not o:
 58 |             raise ValueError("Must provide a model or dict to update values")
 59 |         o = o if isinstance(o, dict) else o.dict(exclude_unset=True)
 60 |         for key, value in o.items():
 61 |             setattr(self, key, value)
 62 | 
 63 |         # save and commit to database
 64 |         with Session(get_engine()) as session:
 65 |             session.add(self)
 66 |             session.commit()
 67 |             session.refresh(self)
 68 | 
 69 |     def delete(self):
 70 |         with Session(get_engine()) as session:
 71 |             self.status = ENTITY_STATUS.DELETED
 72 |             self.updated_at = datetime.utcnow()
 73 |             session.add(self)
 74 |             session.commit()
 75 |             session.refresh(self)
 76 | 
 77 |     @classmethod
 78 |     def create(self, o: Union[SQLModel, dict] = None):
 79 |         if not o:
 80 |             raise ValueError("Must provide a model or dict to update values")
 81 | 
 82 |         with Session(get_engine()) as session:
 83 |             obj = self.from_orm(o) if isinstance(o, SQLModel) else self(**o)
 84 |             session.add(obj)
 85 |             session.commit()
 86 |             session.refresh(obj)
 87 | 
 88 |         return obj
 89 | 
 90 | 
 91 | # ============
 92 | # Organization
 93 | # ============
 94 | class Organization(BaseModel, table=True):
 95 |     id: Optional[int] = Field(default=None, primary_key=True)
 96 |     uuid: Optional[uuid_pkg.UUID] = Field(
 97 |         unique=True, default_factory=uuid_pkg.uuid4
 98 |     )  # UUID for the organization
 99 |     display_name: Optional[str] = Field(
100 |         default="Untitled Organization 😊", index=True
101 |     )  # display name of the organization
102 |     namespace: str = Field(
103 |         unique=True, index=True
104 |     )  # unique organization namespace for URLs, etc.
105 |     bot_url: Optional[str] = Field(default=None)  # URL for the bot
106 |     status: Optional[ENTITY_STATUS] = Field(default=ENTITY_STATUS.ACTIVE.value)
107 |     created_at: Optional[datetime] = Field(default_factory=datetime.now)
108 |     updated_at: Optional[datetime] = Field(default_factory=datetime.now)
109 | 
110 |     # -------------
111 |     # Relationships
112 |     # -------------
113 |     projects: Optional[List["Project"]] = Relationship(back_populates="organization")
114 |     documents: Optional[List["Document"]] = Relationship(back_populates="organization")
115 | 
116 |     @property
117 |     def project_count(self) -> int:
118 |         return len(self.projects)
119 | 
120 |     @property
121 |     def document_count(self) -> int:
122 |         return len(self.documents)
123 | 
124 |     def __repr__(self):
125 |         return f"<Organization id={self.id} name={self.display_name} namespace={self.namespace} uuid={self.uuid}>"
126 | 
127 | 
128 | class OrganizationCreate(SQLModel):
129 |     display_name: Optional[str]
130 |     namespace: Optional[str]
131 |     bot_url: Optional[str]
132 | 
133 | 
134 | class OrganizationRead(SQLModel):
135 |     id: int
136 |     uuid: uuid_pkg.UUID
137 |     display_name: str
138 |     namespace: Optional[str]
139 |     bot_url: Optional[str]
140 |     created_at: datetime
141 |     updated_at: datetime
142 | 
143 | 
144 | class OrganizationUpdate(SQLModel):
145 |     display_name: Optional[str]
146 |     namespace: Optional[str]
147 |     bot_url: Optional[str]
148 | 
149 | 
150 | # ===============
151 | # User (customer)
152 | # ===============
153 | class User(BaseModel, table=True):
154 |     id: Optional[int] = Field(default=None, primary_key=True)
155 |     identifier: str = Field(default=None, unique=True, index=True)
156 |     identifier_type: Optional[CHANNEL_TYPE] = Field(default=None)
157 |     uuid: Optional[uuid_pkg.UUID] = Field(unique=True, default_factory=uuid_pkg.uuid4)
158 |     first_name: Optional[str] = Field(default=None)
159 |     last_name: Optional[str] = Field(default=None)
160 |     email: Optional[str] = Field(default=None)
161 |     phone: Optional[str] = Field(default=None)
162 |     dob: Optional[datetime] = Field(default=None)
163 |     device_fingerprint: Optional[str] = Field(default=None)
164 |     created_at: Optional[datetime] = Field(default_factory=datetime.now)
165 |     updated_at: Optional[datetime] = Field(default_factory=datetime.now)
166 | 
167 |     # -------------
168 |     # Relationships
169 |     # -------------
170 |     chat_sessions: Optional[List["ChatSession"]] = Relationship(back_populates="user")
171 | 
172 |     @property
173 |     def chat_session_count(self) -> int:
174 |         return len(self.chat_sessions)
175 | 
176 |     __table_args__ = (
177 |         UniqueConstraint("identifier", "identifier_type", name="unq_id_idtype"),
178 |     )
179 | 
180 |     def __repr__(self):
181 |         return f"<User id={self.id} uuid={self.uuid} project_id={self.project_id} device_fingerprint={self.device_fingerprint}>"
182 | 
183 | 
184 | class UserCreate(SQLModel):
185 |     identifier: str
186 |     identifier_type: CHANNEL_TYPE
187 |     device_fingerprint: Optional[str]
188 |     first_name: Optional[str]
189 |     last_name: Optional[str]
190 |     email: Optional[str]
191 |     phone: Optional[str]
192 |     dob: Optional[datetime]
193 | 
194 | 
195 | class UserReadList(SQLModel):
196 |     id: int
197 |     identifier: Optional[str]
198 |     identifier_type: Optional[CHANNEL_TYPE]
199 |     uuid: uuid_pkg.UUID
200 |     device_fingerprint: Optional[str]
201 |     first_name: Optional[str]
202 |     last_name: Optional[str]
203 |     email: Optional[str]
204 |     phone: Optional[str]
205 |     dob: Optional[datetime]
206 |     chat_session_count: int
207 |     created_at: datetime
208 |     updated_at: datetime
209 | 
210 | 
211 | class UserUpdate(SQLModel):
212 |     device_fingerprint: Optional[str]
213 |     device_fingerprint: Optional[str]
214 |     first_name: Optional[str]
215 |     last_name: Optional[str]
216 |     email: Optional[str]
217 |     phone: Optional[str]
218 |     dob: Optional[datetime]
219 | 
220 | 
221 | # =======
222 | # Project
223 | # =======
224 | class Project(BaseModel, table=True):
225 |     id: Optional[int] = Field(default=None, primary_key=True)
226 |     uuid: Optional[uuid_pkg.UUID] = Field(unique=True, default_factory=uuid_pkg.uuid4)
227 |     organization_id: int = Field(default=None, foreign_key="organization.id")
228 |     display_name: str = Field(default="📝 Untitled Project")
229 |     status: Optional[ENTITY_STATUS] = Field(default=ENTITY_STATUS.ACTIVE.value)
230 |     created_at: Optional[datetime] = Field(default_factory=datetime.now)
231 |     updated_at: Optional[datetime] = Field(default_factory=datetime.now)
232 | 
233 |     # -------------
234 |     # Relationships
235 |     # -------------
236 |     organization: Optional["Organization"] = Relationship(back_populates="projects")
237 |     documents: Optional[List["Document"]] = Relationship(back_populates="project")
238 |     chat_sessions: Optional[List["ChatSession"]] = Relationship(
239 |         back_populates="project"
240 |     )
241 | 
242 |     @property
243 |     def document_count(self) -> int:
244 |         return len(self.documents)
245 | 
246 |     def __repr__(self):
247 |         return f"<Project id={self.id} name={self.display_name} uuid={self.uuid} project_id={self.uuid}>"
248 | 
249 | 
250 | class ProjectCreate(SQLModel):
251 |     display_name: Optional[str]
252 | 
253 | 
254 | class ProjectReadListOrganization(SQLModel):
255 |     uuid: uuid_pkg.UUID
256 |     display_name: str
257 |     namespace: Optional[str]
258 |     document_count: int
259 | 
260 | 
261 | class ProjectUpdate(SQLModel):
262 |     display_name: Optional[str]
263 |     status: Optional[ENTITY_STATUS]
264 | 
265 | 
266 | # =========
267 | # Documents
268 | # =========
269 | class Document(BaseModel, table=True):
270 |     id: Optional[int] = Field(default=None, primary_key=True)
271 |     uuid: Optional[uuid_pkg.UUID] = Field(unique=True, default_factory=uuid_pkg.uuid4)
272 |     organization_id: int = Field(default=None, foreign_key="organization.id")
273 |     project_id: int = Field(default=None, foreign_key="project.id")
274 |     display_name: str = Field(default="Untitled Document 😊")
275 |     url: str = Field(default="")
276 |     data: Optional[bytes] = Field(default=None)
277 |     hash: str = Field(default=None)
278 |     version: Optional[int] = Field(default=1)
279 |     status: Optional[ENTITY_STATUS] = Field(default=ENTITY_STATUS.ACTIVE.value)
280 |     created_at: Optional[datetime] = Field(default_factory=datetime.now)
281 |     updated_at: Optional[datetime] = Field(default_factory=datetime.now)
282 | 
283 |     # -------------
284 |     # Relationships
285 |     # -------------
286 |     nodes: Optional[List["Node"]] = Relationship(back_populates="document")
287 |     organization: Optional["Organization"] = Relationship(back_populates="documents")
288 |     project: Optional["Project"] = Relationship(back_populates="documents")
289 | 
290 |     @property
291 |     def node_count(self) -> int:
292 |         return len(self.nodes)
293 | 
294 |     __table_args__ = (UniqueConstraint("uuid", "hash", name="unq_org_document"),)
295 | 
296 |     def __repr__(self):
297 |         return f"<Document id={self.id} name={self.display_name} uuid={self.uuid}>"
298 | 
299 | 
300 | class ProjectRead(SQLModel):
301 |     id: int
302 |     uuid: uuid_pkg.UUID
303 |     organization: Organization
304 |     document_count: int
305 |     documents: Optional[List[Document]] = None
306 |     display_name: str
307 |     created_at: datetime
308 |     updated_at: datetime
309 | 
310 | 
311 | class DocumentCreate(SQLModel):
312 |     project: Project
313 |     display_name: Optional[str]
314 |     url: Optional[str]
315 |     version: Optional[str]
316 |     data: Optional[bytes]
317 |     hash: Optional[str]
318 | 
319 | 
320 | class DocumentUpdate(SQLModel):
321 |     status: Optional[ENTITY_STATUS]
322 | 
323 | 
324 | # ==============
325 | # Document Nodes
326 | # ==============
327 | class Node(BaseModel, table=True):
328 |     class Config:
329 |         arbitrary_types_allowed = True
330 | 
331 |     id: Optional[int] = Field(default=None, primary_key=True)
332 |     document_id: int = Field(default=None, foreign_key="document.id")
333 |     uuid: Optional[uuid_pkg.UUID] = Field(unique=True, default_factory=uuid_pkg.uuid4)
334 |     embeddings: Optional[List[float]] = Field(
335 |         sa_column=Column(Vector(VECTOR_EMBEDDINGS_COUNT))
336 |     )
337 |     meta: Optional[Dict] = Field(default=None, sa_column=Column(JSONB))
338 |     token_count: Optional[int] = Field(default=None)
339 |     text: str = Field(default=None, nullable=False)
340 |     status: Optional[ENTITY_STATUS] = Field(default=ENTITY_STATUS.ACTIVE.value)
341 |     created_at: Optional[datetime] = Field(default_factory=datetime.now)
342 |     updated_at: Optional[datetime] = Field(default_factory=datetime.now)
343 | 
344 |     # -------------
345 |     # Relationships
346 |     # -------------
347 |     document: Optional["Document"] = Relationship(back_populates="nodes")
348 | 
349 |     def __repr__(self):
350 |         return f"<Node id={self.id} uuid={self.uuid} document={self.document_id}>"
351 | 
352 | 
353 | class NodeCreate(SQLModel):
354 |     document: Document
355 |     embeddings: List[float]
356 |     token_count: Optional[int]
357 |     text: str
358 |     status: Optional[ENTITY_STATUS]
359 | 
360 | 
361 | class NodeRead(SQLModel):
362 |     id: int
363 |     document: Document
364 |     embeddings: Optional[List[float]]
365 |     token_count: Optional[int]
366 |     text: str
367 |     created_at: datetime
368 | 
369 | 
370 | class DocumentReadNodeList(SQLModel):
371 |     id: int
372 |     uuid: uuid_pkg.UUID
373 |     display_name: str
374 |     node_count: int
375 | 
376 | 
377 | class NodeReadResult(SQLModel):
378 |     id: int
379 |     token_count: Optional[int]
380 |     text: str
381 |     meta: Optional[Dict]
382 | 
383 | 
384 | class ProjectReadListDocumentList(SQLModel):
385 |     uuid: uuid_pkg.UUID
386 |     display_name: str
387 |     node_count: Optional[int]
388 | 
389 | 
390 | class ProjectReadList(SQLModel):
391 |     id: int
392 |     # organization: ProjectReadListOrganization
393 |     documents: Optional[List[DocumentReadNodeList]]
394 |     document_count: int
395 |     uuid: uuid_pkg.UUID
396 |     display_name: str
397 |     created_at: datetime
398 |     updated_at: datetime
399 | 
400 | 
401 | class NodeReadList(SQLModel):
402 |     id: int
403 |     document: DocumentReadNodeList
404 |     embeddings: Optional[List[float]]
405 |     token_count: Optional[int]
406 |     text: str
407 |     created_at: datetime
408 | 
409 | 
410 | class NodeUpdate(SQLModel):
411 |     status: Optional[ENTITY_STATUS] = Field(default=ENTITY_STATUS.ACTIVE.value)
412 | 
413 | 
414 | class NodeReadListDocumentRead(SQLModel):
415 |     uuid: uuid_pkg.UUID
416 |     token_count: Optional[int]
417 |     created_at: datetime
418 | 
419 | 
420 | class DocumentReadList(SQLModel):
421 |     id: int
422 |     uuid: uuid_pkg.UUID
423 |     display_name: str
424 |     version: int
425 |     nodes: Optional[List[NodeReadListDocumentRead]] = None
426 |     node_count: int
427 |     hash: str
428 |     created_at: datetime
429 |     updated_at: datetime
430 | 
431 | 
432 | # ============
433 | # Chat Session
434 | # ============
435 | class ChatSession(BaseModel, table=True):
436 |     class Config:
437 |         arbitrary_types_allowed = True
438 | 
439 |     id: Optional[int] = Field(default=None, primary_key=True)
440 |     session_id: Optional[uuid_pkg.UUID] = Field(
441 |         index=True, default_factory=uuid_pkg.uuid4
442 |     )
443 |     user_id: int = Field(default=None, foreign_key="user.id")
444 |     project_id: int = Field(default=None, foreign_key="project.id")
445 |     channel: CHANNEL_TYPE = Field(default=CHANNEL_TYPE.TELEGRAM)
446 |     user_message: str = Field(default=None)
447 |     token_count: Optional[int] = Field(default=None)
448 |     embeddings: Optional[List[float]] = Field(
449 |         sa_column=Column(Vector(VECTOR_EMBEDDINGS_COUNT))
450 |     )
451 |     response: Optional[str] = Field(default=None)
452 |     meta: Optional[Dict] = Field(default=None, sa_column=Column(JSONB))
453 |     created_at: datetime = Field(default_factory=datetime.now)
454 | 
455 |     # -------------
456 |     # Relationships
457 |     # -------------
458 |     user: Optional["User"] = Relationship(back_populates="chat_sessions")
459 |     project: Optional["Project"] = Relationship(back_populates="chat_sessions")
460 | 
461 |     def __repr__(self):
462 |         return f"<ChatSession id={self.id} uuid={self.uuid} project_id={self.project_id} user_id={self.user_id} message={self.user_message}>"
463 | 
464 | 
465 | class ChatSessionCreatePost(SQLModel):
466 |     project_id: Optional[str] = ""
467 |     organization_id: Optional[str] = "pepe"
468 |     channel: Optional[CHANNEL_TYPE] = CHANNEL_TYPE.TELEGRAM
469 |     query: Optional[str] = "What is the weather like in London right now?"
470 |     identifier: Optional[str] = "@username"
471 |     distance_strategy: Optional[str] = LLM_DEFAULT_DISTANCE_STRATEGY
472 |     max_output_tokens: Optional[int] = LLM_MAX_OUTPUT_TOKENS
473 |     node_limit: Optional[int] = LLM_MIN_NODE_LIMIT
474 |     model: Optional[str] = LLM_MODELS.GPT_35_TURBO
475 |     session_id: Optional[str] = ""
476 | 
477 | 
478 | class ChatSessionCreate(SQLModel):
479 |     channel: CHANNEL_TYPE
480 |     token_count: Optional[int]
481 |     user_message: str
482 |     embeddings: List[float]
483 |     response: Optional[str]
484 | 
485 | 
486 | class ChatSessionRead(SQLModel):
487 |     id: int
488 |     user: User
489 |     project: Optional[ProjectReadListDocumentList]
490 |     token_count: Optional[int]
491 |     channel: CHANNEL_TYPE
492 |     user_message: str
493 |     embeddings: List[float]
494 |     response: Optional[str]
495 |     meta: Optional[dict]
496 |     created_at: datetime = Field(default_factory=datetime.now)
497 | 
498 | 
499 | class ChatSessionResponse(SQLModel):
500 |     meta: Optional[dict]
501 |     response: Optional[str]
502 |     user_message: Optional[str]
503 | 
504 | 
505 | class ProjectReadChatSessionRead(SQLModel):
506 |     id: int
507 |     token_count: Optional[int]
508 |     channel: CHANNEL_TYPE
509 |     created_at: datetime = Field(default_factory=datetime.now)
510 | 
511 | 
512 | class ChatSessionReadUserRead(SQLModel):
513 |     id: int
514 |     project: Optional[ProjectReadListDocumentList]
515 |     token_count: Optional[int]
516 |     channel: CHANNEL_TYPE
517 |     user_message: str
518 |     response: Optional[str]
519 |     created_at: datetime = Field(default_factory=datetime.now)
520 | 
521 | 
522 | class UserRead(SQLModel):
523 |     id: int
524 |     identifier: Optional[str]
525 |     identifier_type: Optional[CHANNEL_TYPE]
526 |     uuid: uuid_pkg.UUID
527 |     language: Optional[str]
528 |     device_fingerprint: Optional[str]
529 |     first_name: Optional[str]
530 |     last_name: Optional[str]
531 |     email: Optional[str]
532 |     phone: Optional[str]
533 |     dob: Optional[datetime]
534 |     chat_session_count: int
535 |     chat_sessions: Optional[List[ChatSessionReadUserRead]]
536 |     created_at: datetime
537 |     updated_at: datetime
538 | 
539 | 
540 | class DocumentReadProjectRead(SQLModel):
541 |     uuid: uuid_pkg.UUID
542 |     display_name: str
543 |     namespace: Optional[str]
544 |     document_count: int
545 | 
546 | 
547 | class DocumentRead(SQLModel):
548 |     id: int
549 |     uuid: uuid_pkg.UUID
550 |     project: DocumentReadProjectRead
551 |     organization: OrganizationRead
552 |     display_name: str
553 |     node_count: int
554 |     url: Optional[str]
555 |     version: int
556 |     data: bytes
557 |     hash: str
558 |     created_at: datetime
559 |     updated_at: datetime
560 | 
561 | 
562 | class WebhookCreate(SQLModel):
563 |     update_id: str
564 |     message: Dict[str, Any]
565 | 
566 | 
567 | class WebhookResponse(SQLModel):
568 |     update_id: str
569 |     message: Dict[str, Any]
570 | 
571 | 
572 | # ==================
573 | # Database functions
574 | # ==================
575 | def get_engine(dsn: str = SU_DSN):
576 |     return create_engine(dsn)
577 | 
578 | 
579 | def get_session():
580 |     with Session(get_engine()) as session:
581 |         yield session
582 | 
583 | 
584 | def create_db():
585 |     logger.info("...Enabling pgvector and creating database tables")
586 |     enable_vector()
587 |     BaseModel.metadata.create_all(get_engine(dsn=SU_DSN))
588 |     create_user_permissions()
589 |     create_vector_index()
590 | 
591 | 
592 | def create_user_permissions():
593 |     session = Session(get_engine(dsn=SU_DSN))
594 |     # grant access to entire database and all tables to user DB_USER
595 |     query = f"GRANT SELECT, INSERT, UPDATE, DELETE ON ALL TABLES IN SCHEMA public TO {DB_USER};"
596 |     session.execute(query)
597 |     session.commit()
598 |     session.close()
599 | 
600 | 
601 | def drop_db():
602 |     BaseModel.metadata.drop_all(get_engine(dsn=SU_DSN))
603 | 
604 | 
605 | def create_vector_index():
606 |     # -------------------------------------
607 |     # Let's add an index for the embeddings
608 |     # -------------------------------------
609 |     if PGVECTOR_ADD_INDEX is True:
610 |         session = Session(get_engine(dsn=SU_DSN))
611 |         for strategy in DISTANCE_STRATEGIES:
612 |             session.execute(strategy[3])
613 |             session.commit()
614 | 
615 | 
616 | def enable_vector():
617 |     session = Session(get_engine(dsn=SU_DSN))
618 |     query = "CREATE EXTENSION IF NOT EXISTS vector;"
619 |     session.execute(query)
620 |     session.commit()
621 |     add_vector_distance_fn(session)
622 |     session.close()
623 | 
624 | 
625 | def add_vector_distance_fn(session: Session):
626 |     for strategy in DISTANCE_STRATEGIES:
627 |         strategy_name = strategy[1]
628 |         strategy_distance_str = strategy[2]
629 | 
630 |         query = f"""create or replace function match_node_{strategy_name} (
631 |     query_embeddings vector({VECTOR_EMBEDDINGS_COUNT}),
632 |     match_threshold float,
633 |     match_count int
634 | ) returns table (
635 |     uuid uuid,
636 |     text varchar,
637 |     similarity float
638 | )
639 | language plpgsql
640 | as $$
641 | begin
642 |     return query
643 |     select
644 |         node.uuid,
645 |         node.text,
646 |         1 - (node.embeddings {strategy_distance_str} query_embeddings) as similarity
647 |     from node
648 |         where 1 - (node.embeddings {strategy_distance_str} query_embeddings) > match_threshold
649 |         order by similarity desc
650 |         limit match_count;
651 | end;
652 | $$;"""
653 | 
654 |         session.execute(query)
655 |         session.commit()
656 |     session.close()
657 | 
658 | 
659 | if __name__ == "__main__":
660 |     create_db()
661 | 


--------------------------------------------------------------------------------
/app/api/requirements.txt:
--------------------------------------------------------------------------------
 1 | langchain==0.0.157
 2 | uvicorn[standard]
 3 | python-multipart
 4 | psycopg2-binary
 5 | python-dotenv
 6 | fastapi[all]
 7 | SQLAlchemy
 8 | pgvector
 9 | tiktoken
10 | aiofiles
11 | sqlmodel
12 | openai
13 | 


--------------------------------------------------------------------------------
/app/api/seed.py:
--------------------------------------------------------------------------------
  1 | '''
  2 | seed.py users the models in model.py and populates the database with dummy content
  3 | '''
  4 | 
  5 | # ----------------
  6 | # Database imports
  7 | # ----------------
  8 | from helpers import (
  9 |     create_org_by_org_or_uuid,
 10 |     create_project_by_org,
 11 |     create_document_by_file_path
 12 | )
 13 | from config import (
 14 |     FILE_UPLOAD_PATH,
 15 |     logger
 16 | )
 17 | from util import (
 18 |     get_file_hash
 19 | )
 20 | import os
 21 | 
 22 | # --------------------
 23 | # Create organizations
 24 | # --------------------
 25 | 
 26 | organizations = [
 27 |     {
 28 |         'display_name': 'Pepe Corp.',
 29 |         'namespace': 'pepe',
 30 |         'projects': [
 31 |             {
 32 |                 'display_name': 'Pepetamine',
 33 |                 'docs': [
 34 |                     'project-pepetamine.md'
 35 |                 ]
 36 |             },
 37 |             {
 38 |                 'display_name': 'Frogonil',
 39 |                 'docs': [
 40 |                     'project-frogonil.md'
 41 |                 ]
 42 |             },
 43 |             {
 44 |                 'display_name': 'Kekzal',
 45 |                 'docs': [
 46 |                     'project-kekzal.md'
 47 |                 ]
 48 |             },
 49 |             {
 50 |                 'display_name': 'Memetrex',
 51 |                 'docs': [
 52 |                     'project-memetrex.md'
 53 |                 ]
 54 |             },
 55 |             {
 56 |                 'display_name': 'PepeTrak',
 57 |                 'docs': [
 58 |                     'project-pepetrak.md'
 59 |                 ]
 60 |             },
 61 |             {
 62 |                 'display_name': 'MemeGen',
 63 |                 'docs': [
 64 |                     'project-memegen.md'
 65 |                 ]
 66 |             },
 67 |             {
 68 |                 'display_name': 'Neuro-kek',
 69 |                 'docs': [
 70 |                     'project-neurokek.md'
 71 |                 ]
 72 |             },
 73 |             {
 74 |                 'display_name': 'Pepe Corp. (company)',
 75 |                 'docs': [
 76 |                     'org-about_the_company.md',
 77 |                     'org-board_of_directors.md',
 78 |                     'org-company_story.md',
 79 |                     'org-corporate_philosophy.md',
 80 |                     'org-customer_support.md',
 81 |                     'org-earnings_fy2023.md',
 82 |                     'org-management_team.md' 
 83 |                 ]
 84 |             }
 85 |         ]
 86 |     },
 87 |     {
 88 |         'display_name': 'Umbrella Corp',
 89 |         'namespace': 'acme',
 90 |         'projects': [
 91 |             {'display_name': 'T-Virus'},
 92 |             {'display_name': 'G-Virus'},
 93 |             {'display_name': 'Umbrella Corp. (company)'}
 94 |         ]
 95 |     },
 96 |     {
 97 |         'display_name': 'Cyberdine Systems',
 98 |         'namespace': 'cyberdine',
 99 |         'projects': [
100 |             {'display_name': 'Skynet'},
101 |             {'display_name': 'Cyberdine Systems (company)'}
102 |         ]
103 |     },
104 |     {
105 |         'display_name': 'Bluth Companies',
106 |         'namespace': 'bluth',
107 |         'projects': [
108 |             {'display_name': 'Bluth Company (company)'}
109 |         ]
110 |     },
111 |     {
112 |         'display_name': 'Evil Corp',
113 |         'namespace': 'evil',
114 |         'projects': [
115 |             {'display_name': 'E-Coin'},
116 |             {'display_name': 'E-Corp Power'},
117 |             {'display_name': 'Bank of E Network'},
118 |             {'display_name': 'E Corp Labs'},
119 |             {'display_name': 'Evil Corp (company)'}
120 |         ]
121 |     }
122 | ]
123 | 
124 | training_data_path = os.path.join(os.path.dirname(__file__), f'{FILE_UPLOAD_PATH}/training_data')
125 | 
126 | for org in organizations:
127 | 
128 |     org_obj = create_org_by_org_or_uuid(
129 |         display_name=org['display_name'],
130 |         namespace=org['namespace']
131 |     )
132 |     logger.debug(f'🏠  Created organization: {org_obj.display_name}')
133 | 
134 |     if 'projects' not in org:
135 |         continue
136 | 
137 |     for project in org['projects']:
138 |         project['organization'] = org_obj
139 | 
140 |         project_obj = create_project_by_org(
141 |             organization_id=org_obj,
142 |             display_name=project['display_name']
143 |         )
144 |         logger.debug(f'🗂️  Created project: {project_obj.display_name}')
145 | 
146 |         project_uuid = str(project_obj.uuid)
147 |         org_uuid = str(org_obj.uuid)
148 | 
149 |         # if the directory does not exist, create it
150 |         if not os.path.exists(os.path.join(FILE_UPLOAD_PATH, org_uuid, project_uuid)):
151 |             os.mkdir(os.path.join(FILE_UPLOAD_PATH, org_uuid, project_uuid))
152 | 
153 |         if 'docs' not in project:
154 |             continue
155 | 
156 |         for doc in project['docs']:
157 |             file_path = os.path.join(training_data_path, doc)
158 | 
159 |             # check if file exists
160 |             if os.path.isfile(file_path):
161 |                 file_hash = get_file_hash(file_path)
162 |                 create_document_by_file_path(
163 |                     organization=org_obj,
164 |                     project=project_obj,
165 |                     file_path=file_path,
166 |                     file_hash=file_hash
167 |                 )
168 |                 logger.info(f'  ✅  Created document: {doc}')
169 |             else:
170 |                 logger.error(f' ❌  Document not found: {doc}')


--------------------------------------------------------------------------------
/app/api/static/img/rasagpt-icon-200x200.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/paulpierre/RasaGPT/2f1051031a3883397879ae113ac9dd35b4c8d0cf/app/api/static/img/rasagpt-icon-200x200.png


--------------------------------------------------------------------------------
/app/api/static/img/rasagpt-logo-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/paulpierre/RasaGPT/2f1051031a3883397879ae113ac9dd35b4c8d0cf/app/api/static/img/rasagpt-logo-1.png


--------------------------------------------------------------------------------
/app/api/static/img/rasagpt-logo-2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/paulpierre/RasaGPT/2f1051031a3883397879ae113ac9dd35b4c8d0cf/app/api/static/img/rasagpt-logo-2.png


--------------------------------------------------------------------------------
/app/api/util.py:
--------------------------------------------------------------------------------
 1 | from fastapi import UploadFile
 2 | from functools import partial
 3 | from hashlib import sha256
 4 | from uuid import UUID
 5 | import aiofiles
 6 | import json
 7 | import re
 8 | from config import (
 9 |     logger
10 | )
11 | 
12 | _snake_1 = partial(re.compile(r'(.)((?<![^A-Za-z])[A-Z][a-z]+)').sub, r'\1_\2')
13 | _snake_2 = partial(re.compile(r'([a-z0-9])([A-Z])').sub, r'\1_\2')
14 | 
15 | 
16 | # ---------------------------------------
17 | # Convert to snake casing (for DB models)
18 | # ---------------------------------------
19 | def snake_case(string: str) -> str:
20 |     return _snake_2(_snake_1(string)).casefold()
21 | 
22 | 
23 | # ------------------------------
24 | # Check if string is UUID format
25 | # ------------------------------
26 | def is_uuid(uuid: str) -> bool:
27 |     uuid = str(uuid) if isinstance(uuid, UUID) else uuid
28 |     return re.match(r"^[0-9a-f]{8}-?[0-9a-f]{4}-?4[0-9a-f]{3}-?[89ab][0-9a-f]{3}-?[0-9a-f]{12}$", uuid)
29 | 
30 | 
31 | # ---------------------------
32 | # Writes a file to disk async
33 | # ---------------------------
34 | async def save_file(file: UploadFile, file_path: str):
35 |     async with aiofiles.open(file_path, 'wb') as f:
36 |         await f.write(await file.read())
37 | 
38 | 
39 | # ---------------------------
40 | # Get SHA256 hash of contents
41 | # ---------------------------
42 | def get_sha256(contents: bytes):
43 |     return sha256(contents).hexdigest()
44 | 
45 | 
46 | # -----------------------
47 | # Get SHA256 hash of file
48 | # -----------------------
49 | def get_file_hash(
50 |         file_path: str,
51 | ):
52 |     with open(file_path, 'rb') as f:
53 |         file_hash = sha256(f.read()).hexdigest()
54 | 
55 |     return file_hash
56 | 
57 | 
58 | # -------------------
59 | # Clean up LLM output
60 | # -------------------
61 | def sanitize_output(
62 |     str_output: str
63 | ):
64 |     # Let's sanitize the JSON
65 |     res = str_output.replace("\n", '')
66 | 
67 |     # If the first character is "?", remove it. Ran into this issue for some reason.
68 |     if res[0] == '?':
69 |         res = res[1:]
70 | 
71 |     # check if response is valid json
72 |     try:
73 |         json.loads(res)
74 |     except json.JSONDecodeError:
75 |         raise ValueError(f'LLM response is not valid JSON: {res}')
76 | 
77 |     if 'message' not in res or 'tags' not in res or 'is_escalate' not in res:
78 |         raise ValueError(f'LLM response is missing required fields: {res}')
79 | 
80 |     logger.debug(f'Output: {res}')
81 |     return res
82 | 
83 | 
84 | # ------------------
85 | # Clean up LLM input
86 | # ------------------
87 | def sanitize_input(
88 |     str_input: str
89 | ):
90 |     # Escape single quotes that cause output JSON issues
91 |     str_input = str_input.replace("'", "")
92 | 
93 |     logger.debug(f'Input: {str_input}')
94 |     return str_input
95 | 
96 | 


--------------------------------------------------------------------------------
/app/db/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM ankane/pgvector:latest
2 | 
3 | # Copy in the load-extensions script
4 | COPY ./create_db.sh /docker-entrypoint-initdb.d/
5 | RUN chmod +x /docker-entrypoint-initdb.d/create_db.sh


--------------------------------------------------------------------------------
/app/db/create_db.sh:
--------------------------------------------------------------------------------
 1 | #!/bin/bash
 2 | 
 3 | # This is run one-time during the first time Postgres is initialized
 4 | 
 5 | echo "Creating database ${DB_NAME} and user..."
 6 | 
 7 | # Create the user and database "api"
 8 | 
 9 | psql -v ON_ERROR_STOP=1 --username "$POSTGRES_USER" <<-EOSQL
10 |     CREATE USER ${DB_USER} WITH ENCRYPTED PASSWORD '${DB_PASSWORD}';
11 |     CREATE DATABASE ${DB_NAME} OWNER ${DB_USER};
12 |     GRANT ALL PRIVILEGES ON DATABASE ${DB_NAME} TO ${DB_USER};
13 | EOSQL


--------------------------------------------------------------------------------
/app/rasa-credentials/.dockerignore:
--------------------------------------------------------------------------------
1 | __pycache__
2 | *.pyc
3 | *.pyo
4 | *.pyd


--------------------------------------------------------------------------------
/app/rasa-credentials/Dockerfile:
--------------------------------------------------------------------------------
 1 | FROM python:3.8-slim
 2 | 
 3 | WORKDIR /app/rasa-credentials
 4 | COPY . .
 5 | 
 6 | # Add CURL
 7 | RUN apt-get -y update
 8 | RUN apt-get -y install curl
 9 | 
10 | # Install python dependencies
11 | RUN pip install --no-cache-dir -r requirements.txt
12 | 
13 | # Expose the port
14 | EXPOSE 8889
15 | 
16 | ENTRYPOINT ["uvicorn", "main:app", "--host", "rasa-credentials", "--port", "8889", "--reload"]


--------------------------------------------------------------------------------
/app/rasa-credentials/main.py:
--------------------------------------------------------------------------------
  1 | from fastapi import (
  2 |     HTTPException,
  3 |     FastAPI,
  4 |     Depends,
  5 | )
  6 | import requests
  7 | import logging
  8 | import asyncio
  9 | import httpx
 10 | import yaml
 11 | import sys
 12 | import os
 13 | 
 14 | # ---------
 15 | # Constants
 16 | # ---------
 17 | CREDENTIALS_READY = False
 18 | RETRY_LIMIT = 10
 19 | RETRY_INTERVAL = 15
 20 | 
 21 | # ----------------
 22 | # Environment vars
 23 | # ----------------
 24 | NGROK_HOST = os.getenv("NGROK_HOST", "ngrok")
 25 | NGROK_PORT = os.getenv("NGROK_PORT", 4040)
 26 | NGROK_INTERNAL_WEBHOOK_HOST = os.getenv("NGROK_INTERNAL_WEBHOOK_HOST", "rasa-core")
 27 | NGROK_INTERNAL_WEBHOOK_PORT = os.getenv("NGROK_INTERNAL_WEBHOOK_PORT", 5005)
 28 | NGROK_API_URL = f"http://{NGROK_HOST}:{NGROK_PORT}"
 29 | TELEGRAM_ACCESS_TOKEN = os.getenv("TELEGRAM_ACCESS_TOKEN", None)
 30 | TELEGRAM_BOTNAME = os.getenv("TELEGRAM_BOTNAME", None)
 31 | CREDENTIALS_PATH = os.getenv("CREDENTIALS_PATH", "/app/rasa/credentials.yml")
 32 | 
 33 | # -------
 34 | # Logging
 35 | # -------
 36 | logging.basicConfig(stream=sys.stdout, level=logging.DEBUG)
 37 | logger = logging.getLogger(__name__)
 38 | logger.debug(
 39 |     f"NGROK_HOST: {NGROK_HOST}:{NGROK_PORT}\nNGROK_API_URL: {NGROK_API_URL}\nNGROK_INTERNAL_WEBHOOK_HOST: {NGROK_INTERNAL_WEBHOOK_HOST}:{NGROK_INTERNAL_WEBHOOK_PORT}"
 40 | )
 41 | 
 42 | 
 43 | # ---------------------------------
 44 | # Wait for ngrok API to come online
 45 | # ---------------------------------
 46 | async def wait_for_ngrok_api():
 47 | 
 48 |     while True:
 49 |         try:
 50 |             async with httpx.AsyncClient() as client:
 51 |                 response = await client.get(f"{NGROK_API_URL}/api/tunnels")
 52 |                 response.raise_for_status()
 53 |                 logger.debug("ngrok API is online.")
 54 |                 return True
 55 |         except httpx.RequestError:
 56 |             logger.debug("ngrok API is offline. Waiting...")
 57 |             await asyncio.sleep(RETRY_INTERVAL)
 58 | 
 59 | 
 60 | # -------------------------------------
 61 | # Fetch list of active tunnels on ngrok
 62 | # -------------------------------------
 63 | async def get_active_tunnels():
 64 |     try:
 65 |         response = requests.get(f"{NGROK_API_URL}/api/tunnels")
 66 |         response.raise_for_status()
 67 |         tunnels = response.json()["tunnels"]
 68 |     except requests.exceptions.HTTPError:
 69 |         tunnels = []
 70 |     return tunnels
 71 | 
 72 | 
 73 | # -----------------
 74 | # Stop ngrok tunnel
 75 | # -----------------
 76 | async def stop_tunnel(tunnel):
 77 |     tunnel_id = tunnel["name"]
 78 |     response = requests.delete(f"{NGROK_API_URL}/api/tunnels/{tunnel_id}")
 79 |     response.raise_for_status()
 80 | 
 81 | 
 82 | # ----------------------
 83 | # Stop all ngrok tunnels
 84 | # ----------------------
 85 | async def stop_all_tunnels():
 86 |     active_tunnels = await get_active_tunnels()
 87 |     if not active_tunnels:
 88 |         logger.debug("No active tunnels found.")
 89 |     else:
 90 |         for tunnel in active_tunnels:
 91 |             logger.debug(f"Stopping tunnel: {tunnel['name']} ({tunnel['public_url']})")
 92 |             await stop_tunnel(tunnel)
 93 | 
 94 | 
 95 | # -------------------------------------
 96 | # Get the first ngrok tunnel w/ retries
 97 | # -------------------------------------
 98 | async def get_tunnel(retry=0):
 99 |     if retry > RETRY_LIMIT:
100 |         raise Exception(
101 |             f"Could not create ngrok tunnel. Exceed retry limit of {RETRY_LIMIT} attempts."
102 |         )
103 | 
104 |     active_tunnels = await get_active_tunnels()
105 |     if len(active_tunnels) == 0:
106 |         logger.debug(f"No active tunnels found. Trying again in {RETRY_INTERVAL}s..")
107 |         await asyncio.sleep(RETRY_INTERVAL)
108 |         retry += 1
109 |         return await get_tunnel(retry=retry)
110 |     else:
111 |         return active_tunnels[0]["public_url"]
112 | 
113 | 
114 | # -------------------
115 | # Create ngrok tunnel
116 | # -------------------
117 | async def create_tunnel():
118 |     response = requests.post(
119 |         f"{NGROK_API_URL}/api/tunnels",
120 |         json={
121 |             "addr": f"{NGROK_INTERNAL_WEBHOOK_HOST}:{NGROK_INTERNAL_WEBHOOK_PORT}",
122 |             "proto": "http",
123 |             "name": NGROK_INTERNAL_WEBHOOK_HOST,
124 |         },
125 |     )
126 |     try:
127 |         response.raise_for_status()
128 |         return response.json()["public_url"]
129 |     except requests.exceptions.HTTPError as e:
130 |         logger.warning(f"Error creating ngrok tunnel: {e}")
131 |         return False
132 | 
133 | 
134 | # ----------------------------
135 | # Update Rasa credentials file
136 | # ----------------------------
137 | async def update_credentials_file(ngrok_url):
138 |     global CREDENTIALS_READY
139 |     try:
140 |         with open(CREDENTIALS_PATH, "r") as file:
141 |             credentials = yaml.safe_load(file)
142 | 
143 |         credentials["custom_telegram.CustomTelegramInput"][
144 |             "webhook_url"
145 |         ] = f"{ngrok_url}/webhooks/telegram/webhook"
146 |         credentials["custom_telegram.CustomTelegramInput"][
147 |             "access_token"
148 |         ] = TELEGRAM_ACCESS_TOKEN
149 |         credentials["custom_telegram.CustomTelegramInput"]["verify"] = TELEGRAM_BOTNAME
150 | 
151 |         with open(CREDENTIALS_PATH, "w") as file:
152 |             yaml.safe_dump(credentials, file)
153 | 
154 |         CREDENTIALS_READY = True
155 |     except Exception as e:
156 |         logger.warning(f"Error updating {CREDENTIALS_PATH}: {e}")
157 |         sys.exit(1)
158 | 
159 | 
160 | # -----------------
161 | # FastAPI endpoints
162 | # -----------------
163 | 
164 | app = FastAPI()
165 | 
166 | 
167 | # -------------
168 | # Startup event
169 | # -------------
170 | @app.on_event("startup")
171 | async def startup_event():
172 |     env = os.getenv("ENV", None)
173 |     if env and env.lower() in ["dev", "development", "local"]:
174 |         await wait_for_ngrok_api()
175 |         url = await get_tunnel()
176 |         if not url:
177 |             logger.debug("No active tunnels found. Creating one...")
178 |             url = await create_tunnel()
179 |             logger.debug(f"Tunnel url: {url}")
180 |         await update_credentials_file(url)
181 |     else:
182 |         logger.debug("Not in dev environment. Skipping.")
183 | 
184 | 
185 | # ---------------------
186 | # Endpoint dependencies
187 | # ---------------------
188 | async def check_endpoint_availability():
189 |     if not CREDENTIALS_READY:
190 |         raise HTTPException(status_code=403, detail="Endpoint not available yet")
191 |     return True
192 | 
193 | 
194 | # ---------------------
195 | # Health check endpoint
196 | # ---------------------
197 | # This endpoint is used by docker-compose to check if the
198 | # container is ready. If it is ready, Rasa core can start
199 | @app.get("/", dependencies=[Depends(check_endpoint_availability)])
200 | async def health_check():
201 |     return {"status": "ok"}
202 | 


--------------------------------------------------------------------------------
/app/rasa-credentials/requirements.txt:
--------------------------------------------------------------------------------
1 | python-dotenv
2 | rasa-sdk
3 | requests
4 | pyyaml
5 | uvicorn[standard]
6 | fastapi[all]
7 | httpx
8 | asyncio


--------------------------------------------------------------------------------
/app/rasa/.dockerignore:
--------------------------------------------------------------------------------
1 | __pycache__
2 | *.pyc
3 | *.pyo
4 | *.pyd


--------------------------------------------------------------------------------
/app/rasa/actions/Dockerfile:
--------------------------------------------------------------------------------
1 | FROM khalosa/rasa-aarch64:3.5.2
2 | 
3 | COPY . /app
4 | 
5 | WORKDIR /app
6 | RUN pip install python-dotenv rasa-sdk requests
7 | EXPOSE 5055
8 | 
9 | CMD ["run", "actions", "--debug"]


--------------------------------------------------------------------------------
/app/rasa/actions/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/paulpierre/RasaGPT/2f1051031a3883397879ae113ac9dd35b4c8d0cf/app/rasa/actions/__init__.py


--------------------------------------------------------------------------------
/app/rasa/actions/actions.py:
--------------------------------------------------------------------------------
 1 | from rasa_sdk.executor import CollectingDispatcher
 2 | from typing import Any, Text, Dict, List
 3 | from rasa_sdk import Action, Tracker
 4 | from dotenv import load_dotenv
 5 | from logging import getLogger
 6 | from enum import IntEnum
 7 | import os
 8 | 
 9 | logger = getLogger(__name__)
10 | 
11 | env = os.getenv("ENV", "local")
12 | env_file = f".env-{env}"
13 | load_dotenv(dotenv_path=f"../../.env-{env}")
14 | 
15 | 
16 | MODEL_NAME = os.getenv("MODEL_NAME")
17 | CHANNEL_TYPE = IntEnum(
18 |     "CHANNEL_TYPE", ["SMS", "TELEGRAM", "WHATSAPP", "EMAIL", "WEBSITE"]
19 | )
20 | 
21 | logger = getLogger(__name__)
22 | 
23 | # -------------------------------------------------
24 | # Custom Rasa action to trigger our RasaGPT LLM API
25 | # -------------------------------------------------
26 | class ActionGPTFallback(Action):
27 |     def name(self) -> str:
28 |         return "action_gpt_fallback"
29 | 
30 |     def get_channel(self, channel: str) -> CHANNEL_TYPE:
31 |         if channel == "telegram":
32 |             return CHANNEL_TYPE.TELEGRAM
33 |         elif channel == "whatsapp":
34 |             return CHANNEL_TYPE.WHATSAPP
35 |         elif channel == "sms":
36 |             return CHANNEL_TYPE.SMS
37 |         elif channel == "email":
38 |             return CHANNEL_TYPE.EMAIL
39 |         else:
40 |             return CHANNEL_TYPE.WEBSITE
41 | 
42 |     def run(
43 |         self,
44 |         dispatcher: CollectingDispatcher,
45 |         tracker: Tracker,
46 |         domain: Dict[Text, Any],
47 |     ) -> List[Dict[Text, Any]]:
48 |         # ------------
49 |         # Get metadata
50 |         # ------------
51 |         data = tracker.latest_message
52 |         metadata = data['metadata'] if data and 'metadata' in data else None
53 |         response = metadata['response'] if metadata and 'response' in metadata else None
54 |         tags = metadata['tags'] if metadata and 'tags' in metadata else None
55 |         is_escalate = (
56 |             metadata['is_escalate'] if metadata and 'is_escalate' in metadata else None
57 |         )
58 | 
59 |         # -----------------
60 |         # Escalate to human
61 |         # -----------------
62 |         if is_escalate is True:
63 |             response = f'{response} \n\n ⚠️💁 [ESCALATE TO HUMAN]'
64 |     
65 |         # -----------------------
66 |         # Labels generated by LLM
67 |         # -----------------------
68 |         if tags is not None:
69 |             response = f'{response} \n\n 🏷️  {",".join(tags)}'
70 | 
71 |         logger.debug(
72 |             f"""[🤖 ActionGPTFallback]
73 |         data: {data}
74 |         metadata: {metadata}
75 |         response: {response}
76 |         tags: {tags}
77 |         is_escalate: {is_escalate}
78 |         """
79 |         )
80 |         dispatcher.utter_message(text=response)
81 |         return []
82 | 


--------------------------------------------------------------------------------
/app/rasa/config.yml:
--------------------------------------------------------------------------------
1 | recipe: default.v1
2 | assistant_id: 20230303-132941-slow-interest
3 | language: en
4 | policies:
5 |   - name: RulePolicy
6 | 


--------------------------------------------------------------------------------
/app/rasa/credentials.yml:
--------------------------------------------------------------------------------
1 | custom_telegram.CustomTelegramInput:
2 |   access_token: null
3 |   verify: null
4 |   webhook_url: null
5 | rasa:
6 |   url: http://rasa-core:5005/webhook
7 | rest: null
8 | 


--------------------------------------------------------------------------------
/app/rasa/custom_telegram.py:
--------------------------------------------------------------------------------
 1 | from rasa.core.channels.telegram import TelegramInput
 2 | from rasa.shared.utils import common as rasa_common
 3 | from typing import Any, Dict, Optional, Text
 4 | from sanic.request import Request
 5 | 
 6 | '''
 7 | The purpose of this script is to extend TelegramInput to allow for custom metadata to be passed to Rasa.
 8 | '''
 9 | 
10 | 
11 | class CustomTelegramInput(TelegramInput):
12 |     def get_metadata(self, request: Request) -> Optional[Dict[Text, Any]]:
13 | 
14 |         # For whatever reason, Rasa is unable to pass data via 'metadata' so 'meta' works for now
15 |         metadata = request.json.get('message', {}).get('meta')
16 | 
17 |         # Debug
18 |         rasa_common.logger.debug(f'[🤖 ActionGPTFallback]\nmetadata: {metadata}')
19 |         return metadata if metadata is not None else None
20 | 


--------------------------------------------------------------------------------
/app/rasa/data/nlu.yml:
--------------------------------------------------------------------------------
 1 | version: "3.1"
 2 | 
 3 | nlu:
 4 | - intent: greet
 5 |   examples: |
 6 |     - hey
 7 |     - hello
 8 |     - hi
 9 |     - hello there
10 |     - good morning
11 |     - good evening
12 |     - moin
13 |     - hey there
14 |     - let's go
15 |     - hey dude
16 |     - goodmorning
17 |     - goodevening
18 |     - good afternoon
19 | 
20 | - intent: out_of_scope
21 |   examples: |
22 |     - I want to order food
23 |     - What is 2 + 2?
24 |     - Who's the US President?
25 | 
26 | 


--------------------------------------------------------------------------------
/app/rasa/data/rules.yml:
--------------------------------------------------------------------------------
1 | version: "3.1"
2 | 
3 | rules:
4 | 
5 | - rule: out-of-scope
6 |   steps:
7 |   - intent: out_of_scope
8 |   - action: action_gpt_fallback
9 | 


--------------------------------------------------------------------------------
/app/rasa/data/stories.yml:
--------------------------------------------------------------------------------
 1 | version: "3.1"
 2 | 
 3 | stories:
 4 | 
 5 | - story: happy path
 6 |   steps:
 7 |   - intent: greet
 8 |   - action: utter_greet
 9 | 
10 | 


--------------------------------------------------------------------------------
/app/rasa/domain.yml:
--------------------------------------------------------------------------------
 1 | version: "3.1"
 2 | 
 3 | 
 4 | slots:
 5 |   results:
 6 |     type: text
 7 |     mappings:
 8 |     - type: custom
 9 |       initial_value:
10 |       action: action_gpt_fallback
11 | 
12 | intents:
13 |   - greet
14 |   - out_of_scope
15 | 
16 | responses:
17 |   utter_greet:
18 |   - text: "Hey! How are you?"
19 | 
20 | actions:
21 |   - action_gpt_fallback
22 | 
23 | 
24 | session_config:
25 |   session_expiration_time: 60
26 |   carry_over_slots_to_new_session: true
27 | 


--------------------------------------------------------------------------------
/app/rasa/endpoints.yml:
--------------------------------------------------------------------------------
 1 | # This file contains the different endpoints your bot can use.
 2 | 
 3 | # Server where the models are pulled from.
 4 | # https://rasa.com/docs/rasa/model-storage#fetching-models-from-a-server
 5 | 
 6 | #models:
 7 | #  url: http://my-server.com/models/default_core@latest
 8 | #  wait_time_between_pulls:  10   # [optional](default: 100)
 9 | 
10 | # Server which runs your custom actions.
11 | # https://rasa.com/docs/rasa/custom-actions
12 | 
13 | action_endpoint:
14 |   url: "http://rasa-actions:5055/webhook"
15 | 
16 | # Tracker store which is used to store the conversations.
17 | # By default the conversations are stored in memory.
18 | # https://rasa.com/docs/rasa/tracker-stores
19 | 
20 | #tracker_store:
21 | #    type: redis
22 | #    url: <host of the redis instance, e.g. localhost>
23 | #    port: <port of your redis instance, usually 6379>
24 | #    db: <number of your database within redis, e.g. 0>
25 | #    password: <password used for authentication>
26 | #    use_ssl: <whether or not the communication is encrypted, default false>
27 | 
28 | #tracker_store:
29 | #    type: mongod
30 | #    url: <url to your mongo instance, e.g. mongodb://localhost:27017>
31 | #    db: <name of the db within your mongo instance, e.g. rasa>
32 | #    username: <username used for authentication>
33 | #    password: <password used for authentication>
34 | 
35 | # Event broker which all conversation events should be streamed to.
36 | # https://rasa.com/docs/rasa/event-brokers
37 | 
38 | #event_broker:
39 | #  url: localhost
40 | #  username: username
41 | #  password: password
42 | #  queue: queue
43 | 


--------------------------------------------------------------------------------
/app/rasa/ngrok.yml:
--------------------------------------------------------------------------------
 1 | region: us
 2 | version: 2
 3 | log_level: debug
 4 | web_addr: ngrok:4040
 5 | console_ui: true
 6 | tunnels:
 7 |     chat_api:
 8 |         proto: http
 9 |         addr: api:8888
10 | connect_timeout: 30s
11 | heartbeat_interval: 1m
12 | heartbeat_tolerance: 5s


--------------------------------------------------------------------------------
/app/rasa/requirements.txt:
--------------------------------------------------------------------------------
1 | python-dotenv
2 | rasa-sdk
3 | requests
4 | pyyaml


--------------------------------------------------------------------------------
/app/rasa/rules.yml:
--------------------------------------------------------------------------------
1 | rules:
2 | 
3 | # - rule: Fallback rule
4 | #   condition:
5 | #   - active_loop: null
6 | #   steps:
7 | #   - intent: nlu_fallback
8 | #   - action: action_gpt_fallback


--------------------------------------------------------------------------------
/app/scripts/wait-for-it.sh:
--------------------------------------------------------------------------------
  1 | #!/usr/bin/env bash
  2 | # Use this script to test if a given TCP host/port are available
  3 | 
  4 | # --------------
  5 | # wait-for-it.sh
  6 | # --------------
  7 | # Original author @vishubob on GH https://raw.githubusercontent.com/vishnubob/wait-for-it/master/wait-for-it.sh
  8 | # Modified for HTTP OK 200 support by @paulpierre
  9 | 
 10 | 
 11 | WAITFORIT_cmdname=${0##*/}
 12 | 
 13 | echoerr() { if [[ $WAITFORIT_QUIET -ne 1 ]]; then echo "$@" 1>&2; fi }
 14 | 
 15 | usage()
 16 | {
 17 |     cat << USAGE >&2
 18 | Usage:
 19 |     $WAITFORIT_cmdname host:port [-s] [-t timeout] [-- command args]
 20 |     -h HOST | --host=HOST       Host or IP under test
 21 |     -p PORT | --port=PORT       TCP port under test
 22 |                                 Alternatively, you specify the host and port as host:port
 23 |     -s | --strict               Only execute subcommand if the test succeeds
 24 |     -q | --quiet                Don't output any status messages
 25 |     -t TIMEOUT | --timeout=TIMEOUT
 26 |                                 Timeout in seconds, zero for no timeout
 27 |     -- COMMAND ARGS             Execute command with args after the test finishes
 28 | USAGE
 29 |     exit 1
 30 | }
 31 | wait_for()
 32 | {
 33 |     local proto="tcp"
 34 |     if [[ $WAITFORIT_HTTP_OK -eq 1 ]]; then
 35 |         proto="http"
 36 |     fi
 37 | 
 38 |     if [[ $WAITFORIT_TIMEOUT -gt 0 ]]; then
 39 |         echoerr "$WAITFORIT_cmdname: waiting $WAITFORIT_TIMEOUT seconds for $WAITFORIT_HOST:$WAITFORIT_PORT to be available ($proto)"
 40 |     else
 41 |         echoerr "$WAITFORIT_cmdname: waiting for $WAITFORIT_HOST:$WAITFORIT_PORT to be available ($proto) without a timeout"
 42 |     fi
 43 | 
 44 |     WAITFORIT_start_ts=$(date +%s)
 45 |     while :
 46 |     do
 47 |         if [[ $WAITFORIT_HTTP_OK -eq 1 ]]; then
 48 |             response=$(echo -e "HEAD / HTTP/1.1\r\nHost: $WAITFORIT_HOST:$WAITFORIT_PORT\r\nConnection: close\r\n\r\n" | nc -w 5 $WAITFORIT_HOST $WAITFORIT_PORT | head -n1)
 49 |             if [[ $response =~ "200 OK" ]]; then
 50 |                 WAITFORIT_result=0
 51 |             else
 52 |                 WAITFORIT_result=1
 53 |             fi
 54 |         else
 55 |             (echo -n > /dev/$proto/$WAITFORIT_HOST/$WAITFORIT_PORT) >/dev/null 2>&1
 56 |             WAITFORIT_result=$?
 57 |         fi
 58 | 
 59 |         if [[ $WAITFORIT_result -eq 0 ]]; then
 60 |             WAITFORIT_end_ts=$(date +%s)
 61 |             echoerr "$WAITFORIT_cmdname: $WAITFORIT_HOST:$WAITFORIT_PORT is available after $((WAITFORIT_end_ts - WAITFORIT_start_ts)) seconds"
 62 |             break
 63 |         fi
 64 |         sleep 1
 65 |     done
 66 |     return $WAITFORIT_result
 67 | }
 68 | 
 69 | wait_for_wrapper()
 70 | {
 71 |     # In order to support SIGINT during timeout: http://unix.stackexchange.com/a/57692
 72 |     if [[ $WAITFORIT_QUIET -eq 1 ]]; then
 73 |         timeout $WAITFORIT_BUSYTIMEFLAG $WAITFORIT_TIMEOUT $0 --quiet --child --host=$WAITFORIT_HOST --port=$WAITFORIT_PORT --timeout=$WAITFORIT_TIMEOUT &
 74 |     else
 75 |         timeout $WAITFORIT_BUSYTIMEFLAG $WAITFORIT_TIMEOUT $0 --child --host=$WAITFORIT_HOST --port=$WAITFORIT_PORT --timeout=$WAITFORIT_TIMEOUT &
 76 |     fi
 77 |     WAITFORIT_PID=$!
 78 |     trap "kill -INT -$WAITFORIT_PID" INT
 79 |     wait $WAITFORIT_PID
 80 |     WAITFORIT_RESULT=$?
 81 |     if [[ $WAITFORIT_RESULT -ne 0 ]]; then
 82 |         echoerr "$WAITFORIT_cmdname: timeout occurred after waiting $WAITFORIT_TIMEOUT seconds for $WAITFORIT_HOST:$WAITFORIT_PORT"
 83 |     fi
 84 |     return $WAITFORIT_RESULT
 85 | }
 86 | 
 87 | # process arguments
 88 | while [[ $# -gt 0 ]]
 89 | do
 90 |     case "$1" in
 91 |         *:* )
 92 |         WAITFORIT_hostport=(${1//:/ })
 93 |         WAITFORIT_HOST=${WAITFORIT_hostport[0]}
 94 |         WAITFORIT_PORT=${WAITFORIT_hostport[1]}
 95 |         shift 1
 96 |         ;;
 97 |         -o | --http-ok)
 98 |         WAITFORIT_HTTP_OK=1
 99 |         shift 1
100 |         ;;
101 |         --child)
102 |         WAITFORIT_CHILD=1
103 |         shift 1
104 |         ;;
105 |         -q | --quiet)
106 |         WAITFORIT_QUIET=1
107 |         shift 1
108 |         ;;
109 |         -s | --strict)
110 |         WAITFORIT_STRICT=1
111 |         shift 1
112 |         ;;
113 |         -h)
114 |         WAITFORIT_HOST="$2"
115 |         if [[ $WAITFORIT_HOST == "" ]]; then break; fi
116 |         shift 2
117 |         ;;
118 |         --host=*)
119 |         WAITFORIT_HOST="${1#*=}"
120 |         shift 1
121 |         ;;
122 |         -p)
123 |         WAITFORIT_PORT="$2"
124 |         if [[ $WAITFORIT_PORT == "" ]]; then break; fi
125 |         shift 2
126 |         ;;
127 |         --port=*)
128 |         WAITFORIT_PORT="${1#*=}"
129 |         shift 1
130 |         ;;
131 |         -t)
132 |         WAITFORIT_TIMEOUT="$2"
133 |         if [[ $WAITFORIT_TIMEOUT == "" ]]; then break; fi
134 |         shift 2
135 |         ;;
136 |         --timeout=*)
137 |         WAITFORIT_TIMEOUT="${1#*=}"
138 |         shift 1
139 |         ;;
140 |         --)
141 |         shift
142 |         WAITFORIT_CLI=("$@")
143 |         break
144 |         ;;
145 |         --help)
146 |         usage
147 |         ;;
148 |         *)
149 |         echoerr "Unknown argument: $1"
150 |         usage
151 |         ;;
152 |     esac
153 | done
154 | 
155 | if [[ "$WAITFORIT_HOST" == "" || "$WAITFORIT_PORT" == "" ]]; then
156 |     echoerr "Error: you need to provide a host and port to test."
157 |     usage
158 | fi
159 | 
160 | WAITFORIT_TIMEOUT=${WAITFORIT_TIMEOUT:-15}
161 | WAITFORIT_STRICT=${WAITFORIT_STRICT:-0}
162 | WAITFORIT_CHILD=${WAITFORIT_CHILD:-0}
163 | WAITFORIT_QUIET=${WAITFORIT_QUIET:-0}
164 | WAITFORIT_HTTP_OK=${WAITFORIT_HTTP_OK:-0}
165 | 
166 | 
167 | # Check to see if timeout is from busybox?
168 | WAITFORIT_TIMEOUT_PATH=$(type -p timeout)
169 | WAITFORIT_TIMEOUT_PATH=$(realpath $WAITFORIT_TIMEOUT_PATH 2>/dev/null || readlink -f $WAITFORIT_TIMEOUT_PATH)
170 | 
171 | WAITFORIT_BUSYTIMEFLAG=""
172 | if [[ $WAITFORIT_TIMEOUT_PATH =~ "busybox" ]]; then
173 |     WAITFORIT_ISBUSY=1
174 |     # Check if busybox timeout uses -t flag
175 |     # (recent Alpine versions don't support -t anymore)
176 |     if timeout &>/dev/stdout | grep -q -e '-t '; then
177 |         WAITFORIT_BUSYTIMEFLAG="-t"
178 |     fi
179 | else
180 |     WAITFORIT_ISBUSY=0
181 | fi
182 | 
183 | if [[ $WAITFORIT_CHILD -gt 0 ]]; then
184 |     wait_for
185 |     WAITFORIT_RESULT=$?
186 |     exit $WAITFORIT_RESULT
187 | else
188 |     if [[ $WAITFORIT_TIMEOUT -gt 0 ]]; then
189 |         wait_for_wrapper
190 |         WAITFORIT_RESULT=$?
191 |     else
192 |         wait_for
193 |         WAITFORIT_RESULT=$?
194 |     fi
195 | fi
196 | 
197 | if [[ $WAITFORIT_CLI != "" ]]; then
198 |     if [[ $WAITFORIT_RESULT -ne 0 && $WAITFORIT_STRICT -eq 1 ]]; then
199 |         echoerr "$WAITFORIT_cmdname: strict mode, refusing to execute subprocess"
200 |         exit $WAITFORIT_RESULT
201 |     fi
202 |     exec "${WAITFORIT_CLI[@]}"
203 | else
204 |     exit $WAITFORIT_RESULT
205 | fi


--------------------------------------------------------------------------------
/docker-compose.yml:
--------------------------------------------------------------------------------
  1 | # -------------------------------------
  2 | # ▒█▀▀█ █▀▀█ █▀▀ █▀▀█ ▒█▀▀█ ▒█▀▀█ ▀▀█▀▀ 
  3 | # ▒█▄▄▀ █▄▄█ ▀▀█ █▄▄█ ▒█░▄▄ ▒█▄▄█ ░▒█░░ 
  4 | # ▒█░▒█ ▀░░▀ ▀▀▀ ▀░░▀ ▒█▄▄█ ▒█░░░ ░▒█░░
  5 | # +-----------------------------------+
  6 | # | http://RasaGPT.dev by @paulpierre |
  7 | # +-----------------------------------+
  8 | 
  9 | version: '3.9'
 10 | 
 11 | services:
 12 | 
 13 | # -------------------
 14 | # API service for LLM
 15 | # -------------------
 16 |   api:
 17 |     build:
 18 |       context: ./app/api
 19 |     restart: always
 20 |     container_name: chat_api
 21 |     env_file:
 22 |       - .env
 23 |     ports:
 24 |       - 8888:8888
 25 |     healthcheck:
 26 |       test: ["CMD", "curl", "-f", "http://api:8888/health"]
 27 |       interval: 15s
 28 |       retries: 5
 29 |     depends_on:
 30 |       - db
 31 |     networks:
 32 |       - chat-network
 33 |     volumes:
 34 |       - ./app/scripts/wait-for-it.sh:/app/api/wait-for-it.sh
 35 |       - ./app/api:/app/api
 36 | 
 37 | 
 38 | # -------------------
 39 | # Ngrok agent service
 40 | # -------------------
 41 |   ngrok:
 42 |     image: ngrok/ngrok:latest
 43 |     container_name: chat_ngrok
 44 |     ports:
 45 |       - 4040:4040
 46 |     env_file:
 47 |       - .env
 48 |     environment:
 49 |       NGROK_CONFIG: /etc/ngrok.yml
 50 |       NGROK_AUTH_TOKEN: ${NGROK_AUTH_TOKEN:-}
 51 |       NGROK_DEBUG: ${NGROK_DEBUG:-true}
 52 |       NGROK_API_KEY: ${NGROK_API_KEY:-}
 53 |     networks:
 54 |       - chat-network
 55 |     volumes:
 56 |       - ./app/rasa/ngrok.yml:/etc/ngrok.yml
 57 |     restart: unless-stopped
 58 | 
 59 | 
 60 | # -----------------
 61 | # Core Rasa service
 62 | # -----------------
 63 |   rasa-core:
 64 |     image: khalosa/rasa-aarch64:3.5.2
 65 |     container_name: chat_rasa_core
 66 |     env_file:
 67 |       - .env
 68 |     volumes:
 69 |       - ./app/rasa:/app
 70 |       - ./app/scripts/wait-for-it.sh:/app/wait-for-it.sh
 71 | 
 72 |     ports:
 73 |       - 5005:5005
 74 |     entrypoint: ["/bin/bash", "-c", "chmod +x /app/wait-for-it.sh && /app/wait-for-it.sh rasa-credentials:8889 -t 120 -o && rasa run --enable-api --cors '*' --debug --credentials /app/credentials.yml --endpoints /app/endpoints.yml --model /app/models"] 
 75 |     networks:
 76 |       - chat-network
 77 |     depends_on:
 78 |       - rasa-actions
 79 |       - rasa-credentials
 80 | 
 81 | 
 82 | # --------------------
 83 | # Rasa actions service
 84 | # --------------------
 85 |   rasa-actions:
 86 |     build:
 87 |       context: ./app/rasa
 88 |       dockerfile: ./actions/Dockerfile
 89 |     container_name: chat_rasa_actions
 90 |     env_file:
 91 |       - .env
 92 |     ports:
 93 |       - 5055:5055
 94 |     depends_on:
 95 |       - rasa-credentials
 96 |     networks:
 97 |       - chat-network
 98 | 
 99 | 
100 | # -------------------------------
101 | # Rasa credentials helper service
102 | # -------------------------------
103 |   rasa-credentials:
104 |     build:
105 |       context: ./app/rasa-credentials
106 |       dockerfile: Dockerfile
107 |     container_name: chat_rasa_credentials
108 |     volumes:
109 |       - ./app/rasa:/app/rasa
110 |       - ./app/rasa-credentials:/app/rasa-credentials
111 |     ports:
112 |       - 8889:8889
113 |     env_file:
114 |       - .env
115 |     networks:
116 |       - chat-network
117 |     healthcheck:
118 |       test: ["CMD", "curl", "-f", "http://rasa-credentials:8889"]
119 |       interval: 15s
120 |       retries: 5
121 | 
122 | 
123 | # -------------------------
124 | # Postgres database service
125 | # -------------------------
126 |   db:
127 |     build:
128 |       context: ./app/db
129 |     container_name: chat_db
130 |     env_file:
131 |       - .env
132 |     environment:
133 |       POSTGRES_USER: ${POSTGRES_USER:-postgres}
134 |       POSTGRES_PASSWORD: ${POSTGRES_PASSWORD:-postgres}
135 |     volumes:
136 |       - ./mnt/db:/var/lib/postgresql/data
137 |     ports:
138 |       - "5432:5432"
139 |     healthcheck:
140 |       test: ["CMD", "pg_isready", "-U", "postgres"]
141 |       interval: 5s
142 |       retries: 5
143 |     networks:
144 |       - chat-network
145 | 
146 | 
147 | # --------------------------------
148 | # PgAdmin database browser service
149 | # --------------------------------
150 |   pgadmin:
151 |     container_name: chat_pgadmin
152 |     image: dpage/pgadmin4
153 |     environment:
154 |       PGADMIN_DEFAULT_EMAIL: ${PGADMIN_DEFAULT_EMAIL:-pgadmin4@pgadmin.org}
155 |       PGADMIN_DEFAULT_PASSWORD: ${PGADMIN_DEFAULT_PASSWORD:-admin}
156 |       PGADMIN_CONFIG_SERVER_MODE: 'False'
157 |     volumes:
158 |        - ./mnt/pgadmin:/var/lib/pgadmin
159 |     ports:
160 |       - "${PGADMIN_PORT:-5050}:80"
161 |     restart: unless-stopped
162 |     depends_on:
163 |       - db
164 |     networks:
165 |       - chat-network
166 | 
167 | 
168 | # ----------------------------
169 | # Container log viewer service
170 | # ----------------------------
171 |   dozzle:
172 |     container_name: chat_dozzle
173 |     image: amir20/dozzle:latest
174 |     volumes:
175 |       - /var/run/docker.sock:/var/run/docker.sock
176 |     ports:
177 |       - 9999:8080
178 |     depends_on:
179 |       - db
180 | 
181 | networks:
182 |   chat-network:
183 |     driver: bridge
184 | 


--------------------------------------------------------------------------------
/github/api.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/paulpierre/RasaGPT/2f1051031a3883397879ae113ac9dd35b4c8d0cf/github/api.png


--------------------------------------------------------------------------------
/github/cli_install.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/paulpierre/RasaGPT/2f1051031a3883397879ae113ac9dd35b4c8d0cf/github/cli_install.png


--------------------------------------------------------------------------------
/github/container_logs.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/paulpierre/RasaGPT/2f1051031a3883397879ae113ac9dd35b4c8d0cf/github/container_logs.png


--------------------------------------------------------------------------------
/github/documents.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/paulpierre/RasaGPT/2f1051031a3883397879ae113ac9dd35b4c8d0cf/github/documents.png


--------------------------------------------------------------------------------
/github/makefile-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/paulpierre/RasaGPT/2f1051031a3883397879ae113ac9dd35b4c8d0cf/github/makefile-1.png


--------------------------------------------------------------------------------
/github/makefile-2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/paulpierre/RasaGPT/2f1051031a3883397879ae113ac9dd35b4c8d0cf/github/makefile-2.png


--------------------------------------------------------------------------------
/github/ngrok-admin.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/paulpierre/RasaGPT/2f1051031a3883397879ae113ac9dd35b4c8d0cf/github/ngrok-admin.png


--------------------------------------------------------------------------------
/github/org-projects.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/paulpierre/RasaGPT/2f1051031a3883397879ae113ac9dd35b4c8d0cf/github/org-projects.png


--------------------------------------------------------------------------------
/github/orgs.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/paulpierre/RasaGPT/2f1051031a3883397879ae113ac9dd35b4c8d0cf/github/orgs.png


--------------------------------------------------------------------------------
/github/rasagpt-banner.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/paulpierre/RasaGPT/2f1051031a3883397879ae113ac9dd35b4c8d0cf/github/rasagpt-banner.png


--------------------------------------------------------------------------------
/github/rasagpt-icon-200x200.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/paulpierre/RasaGPT/2f1051031a3883397879ae113ac9dd35b4c8d0cf/github/rasagpt-icon-200x200.png


--------------------------------------------------------------------------------
/github/rasagpt-logo-1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/paulpierre/RasaGPT/2f1051031a3883397879ae113ac9dd35b4c8d0cf/github/rasagpt-logo-1.png


--------------------------------------------------------------------------------
/github/rasagpt-logo-2.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/paulpierre/RasaGPT/2f1051031a3883397879ae113ac9dd35b4c8d0cf/github/rasagpt-logo-2.png


--------------------------------------------------------------------------------
/github/rasagpt-video-title-screen.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/paulpierre/RasaGPT/2f1051031a3883397879ae113ac9dd35b4c8d0cf/github/rasagpt-video-title-screen.png


--------------------------------------------------------------------------------
/github/telegram.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/paulpierre/RasaGPT/2f1051031a3883397879ae113ac9dd35b4c8d0cf/github/telegram.png


--------------------------------------------------------------------------------
/github/title.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/paulpierre/RasaGPT/2f1051031a3883397879ae113ac9dd35b4c8d0cf/github/title.png


--------------------------------------------------------------------------------