├── nginx ├── Dockerfile └── default.conf ├── run.sh ├── utils ├── google_authenticate.py ├── string_util.py ├── ip_tools.py └── mailersend.py ├── requirements.txt ├── .vscode └── settings.json ├── database ├── connection.py ├── cost_manager.py ├── session_manager.py ├── chat_manager.py └── user_manager.py ├── helpers └── validators.py ├── test.py ├── Dockerfile ├── docker-compose.local.yml ├── config └── settings.py ├── app.py ├── docker-compose.yml ├── views ├── capcha_plugin.py ├── login_view.py ├── signup_view.py └── main_view.py ├── main.py ├── README.md ├── .dockerignore ├── .gitignore └── embeddings └── vector_store.py /nginx/Dockerfile: -------------------------------------------------------------------------------- 1 | FROM nginx:latest 2 | 3 | COPY default.conf /etc/nginx/conf.d -------------------------------------------------------------------------------- /run.sh: -------------------------------------------------------------------------------- 1 | #!/bin/bash 2 | 3 | # Start Streamlit in the background 4 | streamlit run app.py & 5 | 6 | # Start your Python script 7 | python main.py -------------------------------------------------------------------------------- /utils/google_authenticate.py: -------------------------------------------------------------------------------- 1 | from streamlit_google_auth import Authenticate 2 | 3 | authenticator = Authenticate( 4 | secret_credentials_path='./client_secret.json', 5 | cookie_name='rag-system-biscoito', 6 | cookie_key='senha_maluca_12345', 7 | redirect_uri='https://gtrag.bot/', 8 | ) -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | streamlit 2 | google-generativeai 3 | python-dotenv 4 | langchain 5 | PyPDF2 6 | chromadb 7 | faiss-cpu 8 | langchain_google_genai 9 | langchain-community 10 | mysql-connector-python 11 | bcrypt 12 | ratelimit 13 | openai 14 | langfuse 15 | captcha 16 | mailersend 17 | pyodbc 18 | streamlit_google_auth 19 | streamlit-extras 20 | psycopg2-binary 21 | pymupdf 22 | streamlit-url-fragment -------------------------------------------------------------------------------- /.vscode/settings.json: -------------------------------------------------------------------------------- 1 | { 2 | "sqltools.connections": [ 3 | { 4 | "previewLimit": 50, 5 | "server": "localhost", 6 | "port": 5432, 7 | "driver": "PostgreSQL", 8 | "name": "oknoke", 9 | "group": "oknoke", 10 | "database": "oknoke", 11 | "username": "oknoke", 12 | "password": "oknoke" 13 | } 14 | ] 15 | } -------------------------------------------------------------------------------- /database/connection.py: -------------------------------------------------------------------------------- 1 | import psycopg2 2 | from config import settings 3 | 4 | def create_connection(): 5 | try: 6 | conn = psycopg2.connect( 7 | host=settings.DB_HOST, 8 | database=settings.DB_NAME, 9 | user=settings.DB_USER, 10 | password=settings.DB_PASSWORD 11 | ) 12 | return conn 13 | except Exception as e: 14 | print(e) 15 | return None -------------------------------------------------------------------------------- /helpers/validators.py: -------------------------------------------------------------------------------- 1 | import re 2 | 3 | def is_valid_email(email): 4 | email_regex = r'^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$' 5 | return re.match(email_regex, email) is not None 6 | 7 | def is_valid_password(password): 8 | if len(password) < 8: 9 | return False 10 | if not re.search(r"[A-Za-z]", password): 11 | return False 12 | if not re.search(r"[0-9]", password): 13 | return False 14 | if not re.search(r"[!@#$%^&*(),.?\":{}|<>]", password): 15 | return False 16 | return True -------------------------------------------------------------------------------- /utils/string_util.py: -------------------------------------------------------------------------------- 1 | 2 | def find_positions_multiple(text, start_substring, end_substring): 3 | positions = [] 4 | start_idx = 0 5 | while True: 6 | start = text.find(start_substring, start_idx) 7 | if start == -1: 8 | break 9 | end = text.find(end_substring, start + len(start_substring)) 10 | if end == -1: 11 | break 12 | positions.append((start, end + len(end_substring))) # Adjust end position to include end_substring 13 | start_idx = end + len(end_substring) 14 | return positions -------------------------------------------------------------------------------- /test.py: -------------------------------------------------------------------------------- 1 | import openai 2 | from config import settings 3 | openai_client = openai.OpenAI(api_key=settings.OPENAI_API_KEY) 4 | # List all files 5 | files = openai_client.files.list() 6 | print(files) 7 | # Delete each file 8 | for file in files.data: 9 | file_id = file.id 10 | openai_client.files.delete(file_id) 11 | print(f"Deleted file: {file_id}") 12 | 13 | 14 | vectors = openai_client.beta.vector_stores.list() 15 | print(vectors) 16 | 17 | for vector in vectors: 18 | vector_id = vector.id 19 | openai_client.beta.vector_stores.delete( 20 | vector_store_id=vector_id 21 | ) 22 | print(f"Deleted vector: {vector_id}") 23 | -------------------------------------------------------------------------------- /Dockerfile: -------------------------------------------------------------------------------- 1 | # Use an official Python runtime as a parent image 2 | FROM python:3.9-slim 3 | 4 | # Set the working directory in the container 5 | WORKDIR /usr/src/app 6 | 7 | # Copy the requirements file into the container 8 | COPY requirements.txt ./ 9 | 10 | # Install any dependencies specified in requirements.txt 11 | RUN pip install --no-cache-dir -r requirements.txt 12 | 13 | # Copy the rest of the application code into the container 14 | COPY . . 15 | 16 | # Make port 80 available to the world outside this container 17 | # (Optional, only if your application runs on a specific port) 18 | # EXPOSE 80 19 | 20 | # Define environment variable 21 | # ENV PYTHONUNBUFFERED=1 22 | 23 | # Make the run script executable 24 | RUN chmod +x run.sh 25 | 26 | # Command to run the script 27 | CMD ["./run.sh"] -------------------------------------------------------------------------------- /docker-compose.local.yml: -------------------------------------------------------------------------------- 1 | version: '3.8' 2 | 3 | services: 4 | postgres: 5 | image: postgres:13 6 | container_name: postgres 7 | environment: 8 | POSTGRES_DB: oknoke 9 | POSTGRES_USER: oknoke 10 | POSTGRES_PASSWORD: oknoke 11 | ports: 12 | - "5432:5432" # Expose PostgreSQL port 13 | volumes: 14 | - postgres_data:/var/lib/postgresql/data 15 | 16 | # nginx: 17 | # container_name: nginx 18 | # restart: always 19 | # build: 20 | # context: ./nginx 21 | # dockerfile: Dockerfile 22 | # volumes: 23 | # - /etc/letsencrypt/live/gtrag.bot/fullchain.pem:/etc/letsencrypt/live/gtrag.bot/fullchain.pem 24 | # - /etc/letsencrypt/live/gtrag.bot/privkey.pem:/etc/letsencrypt/live/gtrag.bot/privkey.pem 25 | # ports: 26 | # - "80:80" 27 | # - "443:443" 28 | # depends_on: 29 | # - postgres 30 | 31 | volumes: 32 | postgres_data: {} -------------------------------------------------------------------------------- /config/settings.py: -------------------------------------------------------------------------------- 1 | import os 2 | from dotenv import load_dotenv 3 | 4 | load_dotenv() 5 | 6 | # LLM Model Information 7 | OPENAI_API_KEY = os.getenv("OPENAI_API_KEY") 8 | 9 | # MySQL Database information 10 | DB_HOST = os.getenv("DB_HOST") 11 | DB_USER = os.getenv("DB_USER") 12 | DB_PASSWORD = os.getenv("DB_PASSWORD") 13 | DB_NAME = os.getenv("DB_NAME") 14 | 15 | # Mailersend Information 16 | MAILERSEND_API_KEY = os.getenv("MAILERSEND_API_KEY") 17 | EMAIL_TEMPLATE_SIGNUP = os.getenv("EMAIL_TEMPLATE_SIGNUP") 18 | 19 | # URL Information 20 | BACKEND_URL = os.getenv("BACKEND_URL") 21 | PRODUCT_URL = os.getenv("PRODUCT_URL") 22 | 23 | # reCAPTCHA Information 24 | RECAPTCHA_SITE_KEY = os.getenv("RECAPTCHA_SITE_KEY") 25 | RECAPTCHA_SECRET_KEY = os.getenv("RECAPTCHA_SECRET_KEY") 26 | 27 | # langfuse Information 28 | LANGFUSE_PUBLIC_KEY = os.getenv("LANGFUSE_PUBLIC_KEY") 29 | LANGFUSE_SECRET_KEY = os.getenv("LANGFUSE_SECRET_KEY") 30 | LANGFUSE_HOST = os.getenv("LANGFUSE_HOST") -------------------------------------------------------------------------------- /utils/ip_tools.py: -------------------------------------------------------------------------------- 1 | import requests 2 | from streamlit import runtime 3 | from streamlit.runtime.scriptrunner import get_script_run_ctx 4 | 5 | def get_remote_ip() -> str: 6 | """Get remote ip.""" 7 | try: 8 | ctx = get_script_run_ctx() 9 | if ctx is None: 10 | return None 11 | 12 | session_info = runtime.get_instance().get_client(ctx.session_id) 13 | if session_info is None: 14 | return None 15 | except Exception as e: 16 | return None 17 | 18 | return session_info.request.remote_ip 19 | 20 | def get_country_name(ip_address: str) -> str: 21 | """Get country name from IP address using ipapi service.""" 22 | try: 23 | response = requests.get(f"http://ip-api.com/json/{ip_address}") 24 | data = response.json() 25 | return data.get("country", "Unknown") 26 | except Exception as e: 27 | return "Unknown" 28 | 29 | def get_remote_country() -> str: 30 | """Get remote country""" 31 | ip_address = get_remote_ip() 32 | country = get_country_name(ip_address) 33 | return country -------------------------------------------------------------------------------- /database/cost_manager.py: -------------------------------------------------------------------------------- 1 | import streamlit as st 2 | from database.connection import create_connection 3 | 4 | def create_cost_table(): 5 | conn = create_connection() 6 | if conn: 7 | try: 8 | cursor = conn.cursor() 9 | create_table_query = """ 10 | CREATE TABLE IF NOT EXISTS cost ( 11 | id SERIAL PRIMARY KEY, 12 | session_id INT NOT NULL, 13 | cost FLOAT NOT NULL 14 | ); 15 | """ 16 | cursor.execute(create_table_query) 17 | conn.commit() 18 | except Exception as e: 19 | print(e) 20 | finally: 21 | cursor.close() 22 | conn.close() 23 | 24 | def insert_cost(session_id, cost): 25 | query = "INSERT INTO cost (session_id, cost) VALUES (%s, %s)" 26 | conn = create_connection() 27 | if conn: 28 | try: 29 | cursor = conn.cursor() 30 | cursor.execute(query, (session_id, cost)) 31 | conn.commit() 32 | except Exception as e: 33 | print(f"Error: {e}") 34 | finally: 35 | if 'cursor' in locals(): 36 | cursor.close() 37 | conn.close() -------------------------------------------------------------------------------- /utils/mailersend.py: -------------------------------------------------------------------------------- 1 | from mailersend import emails 2 | from config import settings 3 | 4 | api_key = settings.MAILERSEND_API_KEY 5 | signup_template = settings.EMAIL_TEMPLATE_SIGNUP 6 | backend_url = settings.BACKEND_URL 7 | 8 | mailer = emails.NewEmail(api_key) 9 | 10 | def signup_mailer(customer_email, verify_token): 11 | signup_link = f"{backend_url}/verify-email?token={verify_token}" 12 | print(signup_link) 13 | print(api_key) 14 | print(signup_template) 15 | print(backend_url) 16 | mail_body = {"signup_link": signup_link} 17 | mail_from = { 18 | "name": "GTRAG", 19 | "email": "info@gtrag.com", 20 | } 21 | recipients = [ 22 | { 23 | "email": customer_email, 24 | } 25 | ] 26 | personalization = [ 27 | { 28 | "email": customer_email, 29 | "data": { 30 | "verify_id": signup_link 31 | } 32 | } 33 | ] 34 | mailer.set_mail_from(mail_from, mail_body) 35 | mailer.set_mail_to(recipients, mail_body) 36 | mailer.set_subject("Please verify your email", mail_body) 37 | mailer.set_template(signup_template, mail_body) 38 | mailer.set_personalization(personalization, mail_body) 39 | response = mailer.send(mail_body) 40 | print(response) 41 | -------------------------------------------------------------------------------- /app.py: -------------------------------------------------------------------------------- 1 | import streamlit as st 2 | st.set_page_config(layout="wide") 3 | 4 | import asyncio 5 | from views import main_view, login_view, signup_view 6 | from database.user_manager import create_users_table 7 | from database.chat_manager import create_chat_table 8 | from database.cost_manager import create_cost_table 9 | from database.session_manager import create_session_table 10 | 11 | # st.set_page_config( 12 | # page_title="Chatbot", 13 | # page_icon="🤖" 14 | # ) 15 | async def main(): 16 | """Main function to execute the Streamlit app.""" 17 | if 'connected' not in st.session_state: 18 | st.session_state['connected'] = False 19 | 20 | if "logged_in" not in st.session_state: 21 | st.session_state["logged_in"] = False 22 | st.session_state['page'] = 'login' 23 | 24 | if "user_id" in st.session_state and st.session_state["user_id"]: 25 | st.session_state["logged_in"] = True 26 | 27 | if 'user_info' not in st.session_state: 28 | st.session_state['user_info'] = {} 29 | 30 | if st.session_state['logged_in']: 31 | await main_view.main_content() 32 | else: 33 | if st.session_state['page'] == 'login': 34 | login_view.login_page() 35 | else: 36 | signup_view.signup_page() 37 | 38 | if __name__ == "__main__": 39 | # Call the function to create the table 40 | create_users_table() 41 | create_chat_table() 42 | create_cost_table() 43 | create_session_table() 44 | # Run the main function 45 | asyncio.run(main()) -------------------------------------------------------------------------------- /nginx/default.conf: -------------------------------------------------------------------------------- 1 | server { 2 | listen 80; 3 | server_name gtrag.bot; 4 | 5 | proxy_read_timeout 600; 6 | proxy_connect_timeout 300; 7 | 8 | ssl_certificate /etc/letsencrypt/live/gtrag.bot/fullchain.pem; 9 | ssl_certificate_key /etc/letsencrypt/live/gtrag.bot/privkey.pem; 10 | 11 | location / { 12 | proxy_pass http://app:8000; 13 | 14 | proxy_http_version 1.1; 15 | proxy_set_header Upgrade $http_upgrade; 16 | proxy_set_header Connection "upgrade"; 17 | proxy_set_header Host $host; 18 | proxy_set_header X-Real-IP $remote_addr; 19 | proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; 20 | proxy_set_header X-Forwarded-Proto $scheme; 21 | } 22 | } 23 | 24 | server { 25 | listen 443 ssl; 26 | server_name gtrag.bot; 27 | 28 | proxy_read_timeout 600; 29 | proxy_connect_timeout 300; 30 | 31 | client_max_body_size 200M; 32 | 33 | ssl_certificate /etc/letsencrypt/live/gtrag.bot/fullchain.pem; 34 | ssl_certificate_key /etc/letsencrypt/live/gtrag.bot/privkey.pem; 35 | 36 | location / { 37 | proxy_pass http://app:8501; 38 | 39 | proxy_http_version 1.1; 40 | proxy_set_header Upgrade $http_upgrade; 41 | proxy_set_header Connection "upgrade"; 42 | proxy_set_header Host $host; 43 | proxy_set_header X-Real-IP $remote_addr; 44 | proxy_set_header X-Forwarded-For $proxy_add_x_forwarded_for; 45 | proxy_set_header X-Forwarded-Proto $scheme; 46 | } 47 | } -------------------------------------------------------------------------------- /docker-compose.yml: -------------------------------------------------------------------------------- 1 | version: '3.8' 2 | 3 | services: 4 | app: 5 | build: 6 | context: . 7 | dockerfile: Dockerfile 8 | container_name: Oknoke 9 | environment: 10 | - OPENAI_API_KEY=${OPENAI_API_KEY} 11 | - DB_HOST=postgres 12 | - DB_USER=oknoke 13 | - DB_PASSWORD=oknoke 14 | - DB_NAME=oknoke 15 | - MAILERSEND_API_KEY=${MAILERSEND_API_KEY} 16 | - EMAIL_TEMPLATE_SIGNUP=${EMAIL_TEMPLATE_SIGNUP} 17 | - BACKEND_URL=${BACKEND_URL} 18 | - PRODUCT_URL=${PRODUCT_URL} 19 | - RECAPTCHA_SITE_KEY='' 20 | - RECAPTCHA_SECRET_KEY='' 21 | - LANGFUSE_PUBLIC_KEY=${LANGFUSE_PUBLIC_KEY} 22 | - LANGFUSE_SECRET_KEY=${LANGFUSE_SECRET_KEY} 23 | - LANGFUSE_HOST=${LANGFUSE_HOST} 24 | volumes: 25 | - .:/usr/src/app 26 | ports: 27 | - "5000:5000" 28 | entrypoint: ["./run.sh"] 29 | 30 | postgres: 31 | image: postgres:13 32 | container_name: postgres 33 | environment: 34 | POSTGRES_DB: oknoke 35 | POSTGRES_USER: oknoke 36 | POSTGRES_PASSWORD: oknoke 37 | ports: 38 | - "5432:5432" # Expose PostgreSQL port 39 | volumes: 40 | - postgres_data:/var/lib/postgresql/data 41 | 42 | nginx: 43 | container_name: nginx 44 | restart: always 45 | build: 46 | context: ./nginx 47 | dockerfile: Dockerfile 48 | volumes: 49 | - /etc/letsencrypt/live/gtrag.bot/fullchain.pem:/etc/letsencrypt/live/gtrag.bot/fullchain.pem 50 | - /etc/letsencrypt/live/gtrag.bot/privkey.pem:/etc/letsencrypt/live/gtrag.bot/privkey.pem 51 | ports: 52 | - "80:80" 53 | - "443:443" 54 | depends_on: 55 | - app 56 | - postgres 57 | 58 | volumes: 59 | postgres_data: {} -------------------------------------------------------------------------------- /views/capcha_plugin.py: -------------------------------------------------------------------------------- 1 | # import library 2 | import streamlit as st 3 | from captcha.image import ImageCaptcha 4 | import random, string 5 | 6 | 7 | # define the costant 8 | length_captcha = 4 9 | width = 220 10 | height = 100 11 | 12 | # define the function for the captcha control 13 | def captcha_control(): 14 | if 'controllo' not in st.session_state or st.session_state['controllo'] == False: 15 | st.session_state['controllo'] = False 16 | 17 | # Set up the captcha text 18 | if 'Captcha' not in st.session_state: 19 | st.session_state['Captcha'] = ''.join(random.choices(string.ascii_uppercase + string.digits, k=4)) 20 | 21 | # Create columns for the captcha image, input, and verify button 22 | col1, col2, col3 = st.columns([2, 4, 1]) 23 | 24 | with col1: 25 | image = ImageCaptcha(width=width, height=height) 26 | data = image.generate(st.session_state['Captcha']) 27 | st.image(data) 28 | 29 | with col2: 30 | capta2_text = st.text_input('Enter captcha text', placeholder='Type here...') 31 | 32 | with col3: 33 | st.text('') 34 | if st.button("Verify", key="verify_button", help="Click to verify the captcha"): 35 | # if st.button("Verify", key="verify_button"): 36 | if st.session_state['Captcha'].lower() == capta2_text.lower().strip(): 37 | del st.session_state['Captcha'] 38 | st.session_state['controllo'] = True 39 | st.rerun() 40 | else: 41 | st.error("❌ Incorrect captcha. Please try again.") 42 | del st.session_state['Captcha'] 43 | st.rerun() 44 | else: 45 | st.stop() -------------------------------------------------------------------------------- /main.py: -------------------------------------------------------------------------------- 1 | from fastapi.responses import HTMLResponse 2 | from fastapi import FastAPI, HTTPException, Request 3 | from pydantic import BaseModel 4 | from database.connection import create_connection 5 | from utils.mailersend import signup_mailer 6 | import bcrypt 7 | import random 8 | import string 9 | import uvicorn 10 | import os 11 | from dotenv import load_dotenv 12 | 13 | load_dotenv() 14 | PRODUCT_URL = os.getenv("PRODUCT_URL") 15 | 16 | app = FastAPI() 17 | 18 | 19 | @app.get("/api/verify-email") 20 | async def verify_email(token: str, request: Request): 21 | conn = create_connection() 22 | if conn: 23 | try: 24 | cursor = conn.cursor() 25 | cursor.execute(f"UPDATE users SET status = 'verified' WHERE verification_token = '{token}'") 26 | conn.commit() 27 | html_content = f""" 28 | 29 | 30 |
31 |You will be redirected shortly...
41 | 42 | 43 | """ 44 | return HTMLResponse(content=html_content, status_code=200) 45 | except Exception as e: 46 | raise HTTPException(status_code=400, detail=f"Error: {e}") 47 | finally: 48 | if 'cursor' in locals(): 49 | cursor.close() 50 | conn.close() 51 | 52 | if __name__ == "__main__": 53 | print("server up") 54 | uvicorn.run("main:app", host="0.0.0.0", port=8000, reload=True) -------------------------------------------------------------------------------- /database/session_manager.py: -------------------------------------------------------------------------------- 1 | import streamlit as st 2 | from database.connection import create_connection 3 | 4 | def create_session_table(): 5 | conn = create_connection() 6 | if conn: 7 | try: 8 | cursor = conn.cursor() 9 | create_table_query = """ 10 | CREATE TABLE IF NOT EXISTS session ( 11 | id SERIAL PRIMARY KEY, 12 | user_id INT NOT NULL, 13 | start_session TIMESTAMP DEFAULT CURRENT_TIMESTAMP, 14 | end_session TIMESTAMP NULL 15 | ); 16 | """ 17 | cursor.execute(create_table_query) 18 | conn.commit() 19 | except Exception as e: 20 | print(e) 21 | finally: 22 | cursor.close() 23 | conn.close() 24 | 25 | def insert_start_session(user_id): 26 | conn = create_connection() 27 | new_id = None 28 | if conn: 29 | try: 30 | cursor = conn.cursor() 31 | cursor.execute("SELECT id, end_session FROM session ORDER BY id DESC LIMIT 1") 32 | record = cursor.fetchone() 33 | if record and record[1] is None: 34 | update_end_session(record[0]) 35 | cursor.execute("INSERT INTO session (user_id) VALUES (%s) RETURNING id;", (user_id,)) 36 | new_id = cursor.fetchone()[0] 37 | conn.commit() 38 | except Exception as e: 39 | print(e) 40 | finally: 41 | if 'cursor' in locals(): 42 | cursor.close() 43 | conn.close() 44 | return new_id 45 | 46 | def update_end_session(session_id): 47 | query = "UPDATE session SET end_session = CURRENT_TIMESTAMP WHERE id = %s" 48 | conn = create_connection() 49 | if conn: 50 | try: 51 | cursor = conn.cursor() 52 | cursor.execute(query, (session_id,)) 53 | conn.commit() 54 | except Exception as e: 55 | print(e) 56 | finally: 57 | if 'cursor' in locals(): 58 | cursor.close() 59 | conn.close() -------------------------------------------------------------------------------- /views/login_view.py: -------------------------------------------------------------------------------- 1 | import streamlit as st 2 | from database.user_manager import authenticate_user, get_user_id, update_user_country, create_google_user 3 | from database.session_manager import insert_start_session 4 | from views.capcha_plugin import captcha_control 5 | from utils.ip_tools import get_remote_country 6 | from embeddings.vector_store import clear_cache 7 | from streamlit_google_auth import Authenticate 8 | from utils.google_authenticate import authenticator 9 | 10 | 11 | def init_login_session(email): 12 | user_id = get_user_id(email) 13 | session_id = insert_start_session(user_id) 14 | st.session_state["logged_in"] = True 15 | st.session_state["email"] = email 16 | st.session_state["user_id"] = user_id 17 | st.session_state["session_id"] = session_id 18 | clear_cache(user_id) 19 | 20 | def login_page(): 21 | st.title("Sign In") 22 | email = st.text_input("Email") 23 | password = st.text_input("Password", type="password") 24 | authenticator.check_authentification() 25 | authorization_url = authenticator.get_authorization_url() 26 | 27 | print(st.session_state["connected"]) 28 | # SignIn By Google 29 | if st.session_state["connected"] == False: 30 | st.link_button('Sign In With Google', authorization_url, use_container_width=True) 31 | elif st.session_state["connected"] == True: 32 | email = st.session_state['user_info'].get('email') 33 | create_google_user(email) 34 | init_login_session(email) 35 | st.rerun() 36 | 37 | # Captcha Component 38 | captcha_control() 39 | 40 | # Signin By Email and Password 41 | col1, col2, col3 = st.columns([1, 3, 1]) 42 | with col2: 43 | if st.button("Sign In", use_container_width=True): 44 | res = authenticate_user(email, password) 45 | if res == "Success": 46 | update_user_country(email, get_remote_country()) 47 | init_login_session(email) 48 | st.rerun() 49 | else: 50 | # st.error(res) 51 | print(res) 52 | 53 | if st.button("Go to Sign Up", use_container_width=True): 54 | st.session_state['page'] = 'signup' 55 | st.rerun() 56 | 57 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # RAG System with OpenAI and Streamlit 🤖📄 2 | 3 | Welcome to the **RAG System** (Retrieve and Generate) — an innovative AI-powered chatbot that leverages the OpenAI Assistant API and Streamlit to provide real-time, context-aware answers based on user-uploaded documents. 4 | 5 | ## Features ✨ 6 | 7 | - **Multiformat Document Support**: Upload and process various file types including PDFs, DOCX, PPTX, TXT, and script files. 📂 8 | - **Real-time Responses**: Enjoy seamless and interactive responses fetched via WebSocket connections. 🔄 9 | - **Context Aware**: Provides answers based on the content of the uploaded documents, making the interactions more meaningful and personalized. 🔍 10 | 11 | ## Installation ⚙️ 12 | 13 | To get started with the RAG system, follow these steps: 14 | 15 | 1. **Clone the Repository**: 16 | ```bash 17 | git clone https://github.com/SuperGalaxy0901/Streamlit-OpenAI-Chatbot.git 18 | cd rag-system 19 | ``` 20 | 21 | 2. **Set up a Virtual Environment** (recommended): 22 | ```bash 23 | python -m venv env 24 | source env/bin/activate # On Windows use `env\Scripts\activate` 25 | ``` 26 | 27 | 3. **Install the Required Packages**: 28 | ```bash 29 | pip install -r requirements.txt 30 | ``` 31 | 32 | 4. **Set Up Environment Variables**: 33 | - Create a `.env` file to safely store your API keys and configuration settings. 34 | ```plaintext 35 | OPENAI_API_KEY=your_openai_api_key 36 | ``` 37 | 38 | ## Usage 🚀 39 | 40 | 1. **Run the Streamlit App**: 41 | ```bash 42 | streamlit run app.py 43 | ``` 44 | 45 | 2. **Interact with the Chatbot**: 46 | - Upload documents via the application interface. 📤 47 | - Engage with the chatbot as it generates insightful responses based on your document contents. 💬 48 | 49 | ## Architecture Overview 🏗️ 50 | 51 | - **Streamlit**: Provides the front-end interface where users can upload documents and interact with the chatbot. 🌐 52 | - **OpenAI Assistant API**: Powers the natural language comprehension and generation. 🧠 53 | - **WebSockets**: Enables real-time, efficient communication between the front-end and back-end services. 📡 54 | 55 | ## Acknowledgements 🙏 56 | 57 | - [OpenAI](https://openai.com) for their incredible API. 58 | - [Streamlit](https://streamlit.io) for the easy-to-use app framework. 59 | -------------------------------------------------------------------------------- /views/signup_view.py: -------------------------------------------------------------------------------- 1 | import streamlit as st 2 | from database.user_manager import create_user, verify_user, create_google_user 3 | from helpers.validators import is_valid_email, is_valid_password 4 | from views.capcha_plugin import captcha_control 5 | from views.login_view import init_login_session 6 | from utils.google_authenticate import authenticator 7 | 8 | def signup_page(): 9 | st.title("🔐 Sign Up") 10 | 11 | new_email = st.text_input("Email") 12 | new_password = st.text_input("New Password", type='password') 13 | confirm_password = st.text_input("Confirm Password", type='password') 14 | 15 | # SignUp By Google 16 | authenticator.check_authentification() 17 | authorization_url = authenticator.get_authorization_url() 18 | 19 | if not st.session_state.get('connected', False): 20 | st.link_button('Sign Up With Google', authorization_url, use_container_width=True) 21 | else: 22 | email = st.session_state['user_info'].get('email') 23 | create_google_user(email) 24 | init_login_session(email) 25 | st.rerun() 26 | 27 | # Captcha Component 28 | captcha_control() 29 | 30 | if 'server_code' not in st.session_state: 31 | st.session_state.server_code = "" 32 | 33 | col1, col2, col3 = st.columns([1, 3, 1]) 34 | with col2: 35 | if st.button("Sign Up", use_container_width=True): 36 | if not is_valid_email(new_email): 37 | st.error("🚫 Invalid email format") 38 | elif not is_valid_password(new_password): 39 | st.error("🚫 Password must be at least 8 characters long, contain a letter, a number, and a special character") 40 | elif new_password != confirm_password: 41 | st.error("🚫 Passwords do not match") 42 | else: 43 | ret = create_user(new_email, new_password) 44 | if ret != "error": 45 | st.success("✅ User created successfully. Please check your email for the verification link.") 46 | else: 47 | st.error("🚫 Error creating user. Please try again.") 48 | 49 | if st.button("Go to Sign In", use_container_width=True): 50 | st.session_state['page'] = "login" 51 | st.rerun() 52 | 53 | st.markdown("", unsafe_allow_html=True) 54 | -------------------------------------------------------------------------------- /.dockerignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | env/ 12 | venv/ 13 | ENV/ 14 | build/ 15 | develop-eggs/ 16 | dist/ 17 | downloads/ 18 | eggs/ 19 | .eggs/ 20 | lib/ 21 | lib64/ 22 | parts/ 23 | sdist/ 24 | var/ 25 | wheels/ 26 | *.egg-info/ 27 | .installed.cfg 28 | *.egg 29 | MANIFEST 30 | 31 | # PyInstaller 32 | # Usually these files are written by a python script from a template 33 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 34 | *.manifest 35 | *.spec 36 | 37 | # Installer logs 38 | pip-log.txt 39 | pip-delete-this-directory.txt 40 | 41 | # Unit test / coverage reports 42 | htmlcov/ 43 | .tox/ 44 | .nox/ 45 | .coverage 46 | .coverage.* 47 | .cache 48 | nosetests.xml 49 | coverage.xml 50 | *.cover 51 | .hypothesis/ 52 | .pytest_cache/ 53 | tests/__pycache__/ 54 | 55 | # Translations 56 | *.mo 57 | *.pot 58 | 59 | # Django stuff: 60 | *.log 61 | local_settings.py 62 | db.sqlite3 63 | db.sqlite3-journal 64 | 65 | # Flask stuff: 66 | instance/ 67 | .webassets-cache 68 | 69 | # Scrapy stuff: 70 | .scrapy 71 | 72 | # Sphinx documentation 73 | docs/_build/ 74 | 75 | # Jupyter Notebook 76 | .ipynb_checkpoints 77 | 78 | # IPython 79 | profile_default/ 80 | ipython_config.py 81 | 82 | # pyenv 83 | .python-version 84 | 85 | # Celery stuff 86 | celerybeat-schedule.* 87 | 88 | # SageMath parsed files 89 | *.sage.py 90 | 91 | # Environments 92 | .env 93 | .venv 94 | env/ 95 | venv/ 96 | ENV/ 97 | env.bak/ 98 | venv.bak/ 99 | 100 | # mkdocs documentation 101 | /site 102 | 103 | # mypy 104 | .mypy_cache/ 105 | .dmypy.json 106 | dmypy.json 107 | 108 | # Pyre type checker 109 | .pyre/ 110 | 111 | # pytype static type analyzer 112 | .pytype/ 113 | 114 | # Cython debug symbols 115 | cython_debug/ 116 | 117 | # Other artifacts 118 | *.swp 119 | *~ 120 | .DS_Store 121 | Thumbs.db 122 | 123 | # Docker-specific ignores 124 | docker-compose.local.yml 125 | # .dockerignore file itself to avoid inclusions 126 | .dockerignore 127 | # Others 128 | .git 129 | .gitignore 130 | .tmp 131 | .vscode/ 132 | .idea/ 133 | *.bak 134 | *.tmp -------------------------------------------------------------------------------- /database/chat_manager.py: -------------------------------------------------------------------------------- 1 | import streamlit as st 2 | from database.connection import create_connection 3 | import bcrypt 4 | 5 | def create_chat_table(): 6 | conn = create_connection() 7 | if conn: 8 | try: 9 | cursor = conn.cursor() 10 | create_table_query = """ 11 | CREATE TABLE IF NOT EXISTS chat ( 12 | id SERIAL PRIMARY KEY, 13 | user_id INT NOT NULL, 14 | vector_id VARCHAR(255) NOT NULL, 15 | thread_id VARCHAR(255) NOT NULL, 16 | file_id VARCHAR(255) NOT NULL, 17 | assistant_id VARCHAR(255) NOT NULL 18 | ); 19 | """ 20 | cursor.execute(create_table_query) 21 | conn.commit() 22 | except Exception as e: 23 | print(e) 24 | finally: 25 | cursor.close() 26 | conn.close() 27 | 28 | def create_chat(user_id, vector_id, thread_id, file_id, assistant_id): 29 | conn = create_connection() 30 | if conn: 31 | try: 32 | cursor = conn.cursor() 33 | cursor.execute("INSERT INTO chat (user_id, vector_id, thread_id, file_id, assistant_id) VALUES (%s, %s, %s, %s, %s)", 34 | (user_id, vector_id, thread_id, file_id, assistant_id)) 35 | conn.commit() 36 | except Exception as e: 37 | print(e) 38 | finally: 39 | if 'cursor' in locals(): 40 | cursor.close() 41 | conn.close() 42 | 43 | def get_individual_chat(id): 44 | conn = create_connection() 45 | if conn: 46 | try: 47 | cursor = conn.cursor() 48 | cursor.execute("SELECT user_id, vector_id, thread_id, file_id, assistant_id FROM chat WHERE id = %s", (id,)) 49 | record = cursor.fetchone() 50 | if record: 51 | return record 52 | else: 53 | return None 54 | except Exception as e: 55 | print(e) 56 | return None 57 | finally: 58 | if 'cursor' in locals(): 59 | cursor.close() 60 | conn.close() 61 | 62 | def get_user_chats(user_id): 63 | conn = create_connection() 64 | if conn: 65 | try: 66 | cursor = conn.cursor() 67 | cursor.execute("SELECT * FROM chat WHERE user_id = %s", (user_id,)) 68 | record = cursor.fetchall() 69 | if record: 70 | return record 71 | else: 72 | return None 73 | except Exception as e: 74 | print(e) 75 | return None 76 | finally: 77 | if 'cursor' in locals(): 78 | cursor.close() 79 | conn.close() -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | # client_secret.json 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | share/python-wheels/ 24 | *.egg-info/ 25 | .installed.cfg 26 | *.egg 27 | MANIFEST 28 | 29 | # PyInstaller 30 | # Usually these files are written by a python script from a template 31 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 32 | *.manifest 33 | *.spec 34 | 35 | # Installer logs 36 | pip-log.txt 37 | pip-delete-this-directory.txt 38 | 39 | # Unit test / coverage reports 40 | htmlcov/ 41 | .tox/ 42 | .nox/ 43 | .coverage 44 | .coverage.* 45 | .cache 46 | nosetests.xml 47 | coverage.xml 48 | *.cover 49 | *.py,cover 50 | .hypothesis/ 51 | .pytest_cache/ 52 | cover/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | .pybuilder/ 76 | target/ 77 | 78 | # Jupyter Notebook 79 | .ipynb_checkpoints 80 | 81 | # IPython 82 | profile_default/ 83 | ipython_config.py 84 | 85 | # pyenv 86 | # For a library or package, you might want to ignore these files since the code is 87 | # intended to run in multiple environments; otherwise, check them in: 88 | .python-version 89 | 90 | # pipenv 91 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 92 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 93 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 94 | # install all needed dependencies. 95 | #Pipfile.lock 96 | 97 | # poetry 98 | # Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. 99 | # This is especially recommended for binary packages to ensure reproducibility, and is more 100 | # commonly ignored for libraries. 101 | # https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control 102 | #poetry.lock 103 | 104 | # pdm 105 | # Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. 106 | #pdm.lock 107 | # pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it 108 | # in version control. 109 | # https://pdm.fming.dev/latest/usage/project/#working-with-version-control 110 | .pdm.toml 111 | .pdm-python 112 | .pdm-build/ 113 | 114 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm 115 | __pypackages__/ 116 | 117 | # Celery stuff 118 | celerybeat-schedule 119 | celerybeat.pid 120 | 121 | # SageMath parsed files 122 | *.sage.py 123 | 124 | # Environments 125 | # .env 126 | .venv 127 | env/ 128 | venv/ 129 | ENV/ 130 | env.bak/ 131 | venv.bak/ 132 | 133 | # Spyder project settings 134 | .spyderproject 135 | .spyproject 136 | 137 | # Rope project settings 138 | .ropeproject 139 | 140 | # mkdocs documentation 141 | /site 142 | 143 | # mypy 144 | .mypy_cache/ 145 | .dmypy.json 146 | dmypy.json 147 | 148 | # Pyre type checker 149 | .pyre/ 150 | 151 | # pytype static type analyzer 152 | .pytype/ 153 | 154 | # Cython debug symbols 155 | cython_debug/ 156 | 157 | # PyCharm 158 | # JetBrains specific template is maintained in a separate JetBrains.gitignore that can 159 | # be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore 160 | # and can be added to the global gitignore or merged into this file. For a more nuclear 161 | # option (not recommended) you can uncomment the following to ignore the entire idea folder. 162 | #.idea/ 163 | -------------------------------------------------------------------------------- /database/user_manager.py: -------------------------------------------------------------------------------- 1 | import streamlit as st 2 | from database.connection import create_connection 3 | from utils.mailersend import signup_mailer 4 | import bcrypt 5 | import random 6 | 7 | def generate_6_digit_number(): 8 | return random.randint(100000, 999999) 9 | 10 | def create_users_table(): 11 | conn = create_connection() 12 | if conn: 13 | try: 14 | cursor = conn.cursor() 15 | create_table_query = """ 16 | CREATE TABLE IF NOT EXISTS users ( 17 | id SERIAL PRIMARY KEY, 18 | email VARCHAR(255) UNIQUE NOT NULL, 19 | password VARCHAR(255) NOT NULL, 20 | verify_id VARCHAR(255), 21 | status VARCHAR(20), 22 | country VARCHAR(30), 23 | verification_token VARCHAR(255), 24 | is_gmail INT 25 | ); 26 | """ 27 | cursor.execute(create_table_query) 28 | conn.commit() 29 | except Exception as e: 30 | print(e) 31 | finally: 32 | cursor.close() 33 | conn.close() 34 | else: 35 | st.error("Unable to connect to the database.") 36 | 37 | def create_user(email, password): 38 | print("create user!!!!!!") 39 | flag = 0 40 | conn = create_connection() 41 | if conn: 42 | try: 43 | cursor = conn.cursor() 44 | hashed_password = bcrypt.hashpw(password.encode('utf-8'), bcrypt.gensalt()).decode("utf-8") 45 | verify_id = generate_6_digit_number() 46 | status = "pending" 47 | cursor.execute( 48 | "INSERT INTO users (email, password, verification_token, status, is_gmail) VALUES (%s, %s, %s, %s, %s)", 49 | (email, hashed_password, verify_id, status, 0) 50 | ) 51 | print("signup_mailer called!!!!!!") 52 | signup_mailer(email, verify_id) 53 | conn.commit() 54 | except Exception as e: 55 | print(e) 56 | flag = 1 57 | finally: 58 | if 'cursor' in locals(): 59 | cursor.close() 60 | conn.close() 61 | if flag == 0: 62 | return verify_id 63 | else: 64 | return "error" 65 | 66 | def create_google_user(email): 67 | conn = create_connection() 68 | if conn: 69 | try: 70 | cursor = conn.cursor() 71 | cursor.execute("SELECT * FROM users WHERE email = %s", (email,)) 72 | result = cursor.fetchone() 73 | if result is None: 74 | status = "verified" 75 | cursor.execute( 76 | "INSERT INTO users (email, password, status, is_gmail) VALUES (%s, %s, %s, %s)", 77 | (email, 'XXX', status, 1) 78 | ) 79 | conn.commit() 80 | else: 81 | print(email) 82 | except Exception as e: 83 | print(e) 84 | pass 85 | finally: 86 | if 'cursor' in locals(): 87 | cursor.close() 88 | conn.close() 89 | 90 | def verify_user(email): 91 | conn = create_connection() 92 | if conn: 93 | try: 94 | cursor = conn.cursor() 95 | cursor.execute( 96 | "UPDATE users SET status = %s WHERE email = %s", 97 | ('verified', email) 98 | ) 99 | conn.commit() 100 | except Exception as e: 101 | print(e) 102 | finally: 103 | if 'cursor' in locals(): 104 | cursor.close() 105 | conn.close() 106 | 107 | def authenticate_user(email, password): 108 | conn = create_connection() 109 | if conn: 110 | try: 111 | cursor = conn.cursor() 112 | query = "SELECT password, status FROM users WHERE email = %s AND is_gmail = %s" 113 | cursor.execute(query, (email, 0)) 114 | record = cursor.fetchone() 115 | 116 | if record: 117 | if bcrypt.checkpw(password.encode('utf-8'), record[0].encode('utf-8')): 118 | if record[1] == "verified": 119 | return "Success" 120 | else: 121 | return "Email has not been verified" 122 | else: 123 | return "Invalid email or password" 124 | else: 125 | return "Invalid email or password" 126 | except Exception as e: 127 | print(e) 128 | return False 129 | finally: 130 | if 'cursor' in locals(): 131 | cursor.close() 132 | conn.close() 133 | else: 134 | print(e) 135 | return False 136 | 137 | def get_user_id(email): 138 | conn = create_connection() 139 | if conn: 140 | try: 141 | cursor = conn.cursor() 142 | cursor.execute("SELECT id FROM users WHERE email = %s", (email,)) 143 | record = cursor.fetchone() 144 | if record: 145 | return record[0] 146 | else: 147 | return None 148 | except Exception as e: 149 | print(e) 150 | return None 151 | finally: 152 | if 'cursor' in locals(): 153 | cursor.close() 154 | conn.close() 155 | 156 | def update_user_country(email, country): 157 | conn = create_connection() 158 | if conn: 159 | try: 160 | cursor = conn.cursor() 161 | cursor.execute( 162 | "UPDATE users SET country = %s WHERE email = %s", 163 | (country, email) 164 | ) 165 | conn.commit() 166 | except Exception as e: 167 | print(e) 168 | finally: 169 | if 'cursor' in locals(): 170 | cursor.close() 171 | conn.close() -------------------------------------------------------------------------------- /embeddings/vector_store.py: -------------------------------------------------------------------------------- 1 | from config import settings 2 | from langfuse import Langfuse 3 | from langfuse.openai import openai 4 | from langfuse.decorators import langfuse_context, observe 5 | import hashlib 6 | import secrets 7 | import io 8 | import time 9 | from database.chat_manager import create_chat, get_user_chats 10 | from database.cost_manager import insert_cost 11 | 12 | openai_client = openai.OpenAI(api_key=settings.OPENAI_API_KEY) 13 | # Initialize Langfuse Client 14 | langfuse_client = Langfuse( 15 | secret_key=settings.LANGFUSE_SECRET_KEY, 16 | public_key=settings.LANGFUSE_PUBLIC_KEY, 17 | host=settings.LANGFUSE_HOST 18 | ) 19 | 20 | @observe() 21 | def generate_vector_store(uploaded_files, user_id): 22 | # Generate Vector Name Using Hash 23 | random_bytes = secrets.token_bytes(64) 24 | hash_object = hashlib.sha256() 25 | hash_object.update(random_bytes) 26 | vector_name = hash_object.hexdigest() 27 | 28 | # Init File for upload OpenAI 29 | file_content = uploaded_files[0].getvalue() 30 | file_bytes_io = io.BytesIO(file_content.encode('utf-8') if isinstance(file_content, str) else file_content) 31 | file_bytes_io.name = uploaded_files[0].name 32 | 33 | # Upload File on OpenAI store 34 | file = openai_client.files.create( 35 | file=file_bytes_io, 36 | purpose='assistants' 37 | ) 38 | uploaded_files[0] = file.id 39 | 40 | # Vector Store 41 | vector = openai_client.beta.vector_stores.create( 42 | name=vector_name 43 | ) 44 | openai_client.beta.vector_stores.files.create( 45 | vector_store_id=vector.id, 46 | file_id=file.id 47 | ) 48 | 49 | # chat assistant 50 | assistant = openai_client.beta.assistants.create( 51 | instructions="Use the file provided as your knowledge base to best respond to customer queries. Only include at least on file citation(for example: 【4:1†source】) in the answer.", 52 | model="gpt-4o-mini", 53 | tools=[ 54 | { 55 | "type": "file_search", 56 | } 57 | ], 58 | tool_resources={ 59 | "file_search": { 60 | "vector_store_ids":[vector.id] 61 | } 62 | } 63 | ) 64 | 65 | # chat thread 66 | thread = openai_client.beta.threads.create() 67 | create_chat(user_id, vector.id, thread.id, file.id, assistant.id) 68 | print(f"Thread: {thread.id}") 69 | print(f"Assistant: {assistant.id}") 70 | return thread.id, assistant.id, file_bytes_io.name 71 | 72 | @observe() 73 | def get_conversational_chain(user_question, thread_id, assistant_id, file_name, session_id): 74 | """Ignore thread_id parameter due to cost limit""" 75 | print(f"Thread: {thread_id}") 76 | print(f"Assistant: {assistant_id}") 77 | print(f"User Question: {user_question}") 78 | if user_question == "": 79 | return 80 | thread = openai_client.beta.threads.create() 81 | openai_client.beta.threads.messages.create( 82 | thread_id=thread.id, 83 | role="user", 84 | content=user_question + """Include references after the answer in this format: 85 |