├── .gitignore ├── README.md ├── rodeo ├── __init__.py ├── big │ ├── .chainlit │ │ └── config.toml │ ├── README.md │ ├── app.py │ ├── chainlit.md │ ├── requirements.txt │ └── static │ │ └── big-logo.png ├── image_search │ ├── .env.example │ ├── README.md │ ├── __init__.py │ ├── components │ │ ├── image_displayer.py │ │ └── image_uploader.py │ ├── requirements.txt │ ├── streamlit_app.py │ ├── utils │ │ ├── __init__.py │ │ ├── convert_base64_to_image.py │ │ ├── convert_image_to_base64.py │ │ ├── generate_embedding.py │ │ ├── get_nearest_images.py │ │ ├── get_supabase_client.py │ │ └── save_doc_and_embed.py │ └── views │ │ ├── homepage.py │ │ ├── search_page.py │ │ └── upload_page.py └── ride │ ├── .chainlit │ └── config.toml │ ├── README.md │ ├── app.py │ ├── chainlit.md │ ├── requirements.txt │ └── static │ └── big-logo.png └── static └── rodeo-logo.png /.gitignore: -------------------------------------------------------------------------------- 1 | **/__pycache__/* 2 | rodeo/ride/README.md 3 | rodeo/big/__pycache__/app.cpython-311.pyc 4 | **/.env 5 | **/__pycache__ 6 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Rodeo - Your productivity boosters 2 | 3 | 4 |
5 | Rodeo-logo 6 |
7 | 8 | ## What is Rodeo? 9 | 10 | Rodeo is a collection of productivity boosters app powered by GenAI. 11 | 12 | ## Projects 13 | 14 | - [BIG - Bulk Information Gleaner](./rodeo/big/README.md): Ask questions to huge documents and get answers in seconds. 15 | - [RIDE - Rodeo Intelligent Decision Evaluator](./rodeo/ride/README.md): Evaluate the quality of a use case. 16 | 17 | 18 | ## Add a new project 19 | 20 | - Simply add a new folder with your project name under `rodeo` folder. 21 | - Add a `README.md` file with the project description. 22 | - Explain how to run the project in the `README.md` file. 23 | - Dockerize your project if needed. 24 | - Contact @StanGirard for any questions. 25 | 26 | ## Have fun 🔥 27 | 28 | Have fun with Rodeo and don't hesitate to contribute to the project. -------------------------------------------------------------------------------- /rodeo/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/theodo-group/Rodeo/261f7017fbdf33a297eb2506d8419aa8a865d598/rodeo/__init__.py -------------------------------------------------------------------------------- /rodeo/big/.chainlit/config.toml: -------------------------------------------------------------------------------- 1 | [project] 2 | # Whether to enable telemetry (default: true). No personal data is collected. 3 | enable_telemetry = false 4 | 5 | # List of environment variables to be provided by each user to use the app. 6 | user_env = [] 7 | 8 | # Duration (in seconds) during which the session is saved when the connection is lost 9 | session_timeout = 3600 10 | 11 | # Enable third parties caching (e.g LangChain cache) 12 | cache = false 13 | 14 | # Follow symlink for asset mount (see https://github.com/Chainlit/chainlit/issues/317) 15 | # follow_symlink = false 16 | 17 | [features] 18 | # Show the prompt playground 19 | prompt_playground = true 20 | 21 | # Authorize users to upload files with messages 22 | multi_modal = true 23 | 24 | # Allows user to use speech to text 25 | [features.speech_to_text] 26 | enabled = false 27 | # See all languages here https://github.com/JamesBrill/react-speech-recognition/blob/HEAD/docs/API.md#language-string 28 | # language = "en-US" 29 | 30 | [UI] 31 | # Name of the app and chatbot. 32 | name = "B.I.G" 33 | 34 | # Show the readme while the conversation is empty. 35 | show_readme_as_default = false 36 | 37 | # Description of the app and chatbot. This is used for HTML tags. 38 | # description = "B.I.G - Bulk Information Gleaner" 39 | 40 | # Large size content are by default collapsed for a cleaner ui 41 | default_collapse_content = true 42 | 43 | # The default value for the expand messages settings. 44 | default_expand_messages = false 45 | 46 | # Hide the chain of thought details from the user in the UI. 47 | hide_cot = false 48 | 49 | # Link to your github repo. This will add a github button in the UI's header. 50 | # github = "https://github.com/theodo-group/Rodeo" 51 | 52 | # Specify a CSS file that can be used to customize the user interface. 53 | # The CSS file can be served from the public directory or via an external link. 54 | # custom_css = "/public/test.css" 55 | 56 | # Override default MUI light theme. (Check theme.ts) 57 | [UI.theme.light] 58 | #background = "#FAFAFA" 59 | #paper = "#FFFFFF" 60 | 61 | [UI.theme.light.primary] 62 | #main = "#F80061" 63 | #dark = "#980039" 64 | #light = "#FFE7EB" 65 | 66 | # Override default MUI dark theme. (Check theme.ts) 67 | [UI.theme.dark] 68 | #background = "#FAFAFA" 69 | #paper = "#FFFFFF" 70 | 71 | [UI.theme.dark.primary] 72 | #main = "#F80061" 73 | #dark = "#980039" 74 | #light = "#FFE7EB" 75 | 76 | 77 | [meta] 78 | generated_by = "0.7.400" 79 | -------------------------------------------------------------------------------- /rodeo/big/README.md: -------------------------------------------------------------------------------- 1 | # Welcome to Bulk Information Gleaner - B.I.G ! 🚀🤖 2 | 3 |
4 | Big-logo 5 |
6 | 7 | ## What is B.I.G? 8 | 9 | It allows you to ask questions to huge documents and get answers in seconds. 10 | 11 | 12 | https://github.com/theodo-group/Rodeo/assets/19614572/e643f447-c343-49de-a8e8-d91a1b227e50 13 | 14 | 15 | 16 | 17 | ## How does it work? 18 | 19 | It uses the power of GenAI to extract the most relevant information from a document and answer your questions. 20 | 21 | Step 1: Upload your document 22 | Step 2: Ask your question 23 | Step 3: Enjoy 24 | 25 | ## How to Run BIG 26 | 27 | 1. Install Packages with `pip install -r requirements.txt` 28 | 2. Export OPENAI_API_KEY as env variable 29 | 3. Run `chainlit run app.py -w` 30 | 31 | -------------------------------------------------------------------------------- /rodeo/big/app.py: -------------------------------------------------------------------------------- 1 | import chainlit as cl 2 | from chainlit.prompt import Prompt, PromptMessage 3 | from chainlit.playground.providers.openai import ChatOpenAI 4 | from unstructured.partition.auto import partition 5 | from tempfile import NamedTemporaryFile 6 | import tiktoken 7 | 8 | 9 | import openai 10 | from openai import AsyncOpenAI 11 | import os 12 | 13 | openai.api_key = os.getenv("OPENAI_API_KEY") 14 | 15 | 16 | template = """ 17 | Here is a file content on which you need to do some analysis: 18 | {file_content} 19 | 20 | Answer these from the user: {input} 21 | ```""" 22 | 23 | 24 | settings = { 25 | "model": "gpt-4-1106-preview", 26 | "temperature": 0, 27 | "max_tokens": 4096, 28 | "top_p": 1, 29 | "frequency_penalty": 0, 30 | "presence_penalty": 0, 31 | "stop": ["```"], 32 | } 33 | 34 | 35 | def num_tokens_from_string(string: str, model_name: str) -> int: 36 | """Returns the number of tokens in a text string.""" 37 | encoding = tiktoken.encoding_for_model(model_name) 38 | num_tokens = len(encoding.encode(string)) 39 | return num_tokens 40 | 41 | 42 | SUPPORTED_FILE_TYPES = [ 43 | "application/pdf", 44 | "application/msword", 45 | "application/vnd.openxmlformats-officedocument.wordprocessingml.document", 46 | "application/vnd.ms-powerpoint", 47 | "application/vnd.openxmlformats-officedocument.presentationml.presentation", 48 | # Add other MIME types as needed for supported file formats 49 | ] 50 | 51 | 52 | @cl.on_chat_start 53 | async def start(): 54 | files = None 55 | 56 | # Wait for the user to upload a file 57 | while files is None: 58 | files = await cl.AskFileMessage( 59 | content="Please upload a file to begin!", 60 | accept=SUPPORTED_FILE_TYPES, 61 | max_size_mb=20, 62 | ).send() 63 | 64 | # Get the uploaded file 65 | uploaded_file = files[0] 66 | 67 | # Save the file content to a temporary file 68 | with NamedTemporaryFile( 69 | delete=False, suffix=os.path.splitext(uploaded_file.name)[1] 70 | ) as tmp: 71 | tmp.write(uploaded_file.content) # Write the byte content to temp file 72 | tmp_path = tmp.name # Save the path to the temp file 73 | 74 | # Let the user know that the file is being processed 75 | await cl.Message( 76 | content=f"`{uploaded_file.name}` uploaded and is being processed..." 77 | ).send() 78 | 79 | # Call partition with the path to the temporary file 80 | elements = partition(filename=tmp_path) 81 | file_content = "\n\n".join([str(el) for el in elements]) 82 | cl.user_session.set("file_content", file_content) 83 | 84 | print(file_content) 85 | os.remove(tmp_path) 86 | 87 | tokens_number = num_tokens_from_string(file_content, "gpt-4") 88 | 89 | # Let the user know that the processing is complete 90 | await cl.Message( 91 | content=f"Processing of `{uploaded_file.name}` is complete!" 92 | ).send() 93 | await cl.Message(content=f"**Number of tokens:** {tokens_number}").send() 94 | 95 | 96 | @cl.on_message 97 | async def main(message: cl.Message): 98 | # Create the prompt object for the Prompt Playground 99 | message_history = cl.user_session.get("message_history", []) 100 | message_history.append({"role": "user", "content": message.content}) 101 | file_content = cl.user_session.get("file_content") 102 | prompt = Prompt( 103 | provider=ChatOpenAI.id, 104 | messages=[ 105 | PromptMessage( 106 | role="user", 107 | template=template, 108 | formatted=template.format( 109 | input=message.content, file_content=file_content 110 | ), 111 | ) 112 | ], 113 | settings=settings, 114 | inputs={"input": message.content}, 115 | ) 116 | 117 | print(prompt) 118 | 119 | # Prepare the message for streaming 120 | msg = cl.Message( 121 | content="", 122 | author=settings.get("model", "Unknown"), 123 | ) 124 | 125 | # Call OpenAI 126 | client = AsyncOpenAI() 127 | 128 | async for chunk in await client.chat.completions.create( 129 | messages=[m.to_openai() for m in prompt.messages], stream=True, **settings # type: ignore 130 | ): 131 | if chunk.choices[0].delta.content is not None: # Check if content is not None 132 | print(chunk.choices[0].delta.content) 133 | await msg.stream_token(chunk.choices[0].delta.content) 134 | 135 | # Append the assistant's response to the message history 136 | 137 | # Send the final message after streaming is complete 138 | print(msg.content) 139 | message_history.append({"role": "assistant", "content": msg.content}) 140 | await msg.send() 141 | -------------------------------------------------------------------------------- /rodeo/big/chainlit.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/theodo-group/Rodeo/261f7017fbdf33a297eb2506d8419aa8a865d598/rodeo/big/chainlit.md -------------------------------------------------------------------------------- /rodeo/big/requirements.txt: -------------------------------------------------------------------------------- 1 | langchain==0.0.331 2 | tiktoken==0.5.1 3 | chainlit==0.7.400 4 | unstructured[all-docs] 5 | unstructured[docx] 6 | openai==1.1.0 -------------------------------------------------------------------------------- /rodeo/big/static/big-logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/theodo-group/Rodeo/261f7017fbdf33a297eb2506d8419aa8a865d598/rodeo/big/static/big-logo.png -------------------------------------------------------------------------------- /rodeo/image_search/.env.example: -------------------------------------------------------------------------------- 1 | PROJECT_ID= 2 | OPENAI_API_KEY= 3 | SUPABASE_URL= 4 | SUPABASE_KEY= 5 | -------------------------------------------------------------------------------- /rodeo/image_search/README.md: -------------------------------------------------------------------------------- 1 | ## Vectors based image search app 2 | 3 | ### Installation: 4 | 5 | 1. Duplicate copy .env.example to .env and fill in the values 6 | 7 | 2. Run below query on Supabase 8 | 9 | ```sql 10 | CREATE EXTENSION IF NOT EXISTS vector; 11 | CREATE TABLE IF NOT EXISTS vectors ( 12 | id UUID DEFAULT uuid_generate_v4() PRIMARY KEY, 13 | content TEXT, 14 | metadata JSONB, 15 | embedding VECTOR(1408) 16 | ); 17 | CREATE OR REPLACE FUNCTION match_vectors(query_embedding VECTOR(1408), match_count INT) 18 | RETURNS TABLE( 19 | id UUID, 20 | content TEXT, 21 | metadata JSONB, 22 | embedding VECTOR(1408), 23 | similarity FLOAT 24 | ) LANGUAGE plpgsql AS $$ 25 | #variable_conflict use_column 26 | BEGIN 27 | RETURN QUERY 28 | SELECT 29 | vectors.id, 30 | vectors.content, 31 | vectors.metadata, 32 | vectors.embedding, 33 | 1 - (vectors.embedding <=> query_embedding) AS similarity 34 | FROM 35 | vectors 36 | ORDER BY 37 | vectors.embedding <=> query_embedding 38 | LIMIT match_count; 39 | END; 40 | $$; 41 | ``` 42 | 43 | 3. Install dependencies 44 | 45 | ```bash 46 | pip install -r requirements.txt 47 | ``` 48 | 49 | 4. Run the app 50 | 51 | ```bash 52 | streamlit run streamlit_app 53 | ``` -------------------------------------------------------------------------------- /rodeo/image_search/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/theodo-group/Rodeo/261f7017fbdf33a297eb2506d8419aa8a865d598/rodeo/image_search/__init__.py -------------------------------------------------------------------------------- /rodeo/image_search/components/image_displayer.py: -------------------------------------------------------------------------------- 1 | import streamlit as st 2 | from image_search.utils.convert_base64_to_image import convert_base64_to_image 3 | 4 | 5 | def display_image(image): 6 | try: 7 | st.image(convert_base64_to_image(image["content"]), width=300) 8 | except: 9 | st.write("Error", image["content"]) 10 | -------------------------------------------------------------------------------- /rodeo/image_search/components/image_uploader.py: -------------------------------------------------------------------------------- 1 | import streamlit as st 2 | from image_search.utils.convert_image_to_base64 import convert_image_to_base64 3 | from image_search.utils.generate_embedding import generate_embedding 4 | from image_search.utils.save_doc_and_embed import save_doc_and_embed 5 | 6 | 7 | def render_image_uploader(): 8 | uploaded_file = st.file_uploader( 9 | "Choose an image to upload", 10 | type=["png", "jpg", "jpeg"], 11 | ) 12 | if uploaded_file is not None: 13 | # To read file as bytes: 14 | image_bytes_data = uploaded_file.getvalue() 15 | embedding = generate_embedding(image_bytes_data) 16 | save_doc_and_embed( 17 | content=convert_image_to_base64(image_bytes_data), embedding=embedding 18 | ) 19 | st.toast("Your image was successfully uploaded!", icon="✅") 20 | -------------------------------------------------------------------------------- /rodeo/image_search/requirements.txt: -------------------------------------------------------------------------------- 1 | base64 2 | PIL 3 | dotenv 4 | supabase 5 | streamlit 6 | -------------------------------------------------------------------------------- /rodeo/image_search/streamlit_app.py: -------------------------------------------------------------------------------- 1 | import streamlit as st 2 | from dotenv import load_dotenv 3 | from image_search.views.homepage import home_page 4 | from image_search.views.search_page import search_page 5 | from image_search.views.upload_page import upload_page 6 | 7 | load_dotenv() 8 | 9 | # Set Streamlit page configuration 10 | st.set_page_config(page_title="Image Search", page_icon="🔍") 11 | 12 | st.title("Image Search Engine 🤖") 13 | 14 | # Sidebar for page selection 15 | page = st.sidebar.selectbox( 16 | "Select a Page", 17 | [ 18 | "Home Page", 19 | "Search Images", 20 | "Upload Images", 21 | ], 22 | ) 23 | 24 | 25 | if page == "Upload Images": 26 | upload_page() 27 | elif page == "Search Images": 28 | search_page() 29 | else: 30 | home_page() 31 | -------------------------------------------------------------------------------- /rodeo/image_search/utils/__init__.py: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/theodo-group/Rodeo/261f7017fbdf33a297eb2506d8419aa8a865d598/rodeo/image_search/utils/__init__.py -------------------------------------------------------------------------------- /rodeo/image_search/utils/convert_base64_to_image.py: -------------------------------------------------------------------------------- 1 | import base64 2 | import io 3 | 4 | from PIL import Image 5 | 6 | 7 | def convert_base64_to_image(base64_string): 8 | image_data = base64.b64decode(base64_string) 9 | 10 | return Image.open(io.BytesIO(image_data)) 11 | -------------------------------------------------------------------------------- /rodeo/image_search/utils/convert_image_to_base64.py: -------------------------------------------------------------------------------- 1 | import base64 2 | 3 | 4 | def convert_image_to_base64(image): 5 | img_str = base64.b64encode(image) 6 | return img_str.decode("utf-8") 7 | -------------------------------------------------------------------------------- /rodeo/image_search/utils/generate_embedding.py: -------------------------------------------------------------------------------- 1 | import base64 2 | import os 3 | import typing 4 | 5 | from google.cloud import aiplatform 6 | from google.protobuf import struct_pb2 7 | 8 | 9 | class VertexEmbeddingClient: 10 | def __init__( 11 | self, 12 | location: str = "us-central1", 13 | api_regional_endpoint: str = "us-central1-aiplatform.googleapis.com", 14 | ): 15 | client_options = {"api_endpoint": api_regional_endpoint} 16 | self.client = aiplatform.gapic.PredictionServiceClient( 17 | client_options=client_options 18 | ) 19 | self.location = location 20 | self.project = os.getenv("PROJECT_ID") 21 | 22 | def get_embedding(self, text_or_image: typing.Union[str, bytes]) -> list[float]: 23 | instance = struct_pb2.Struct() 24 | is_text = isinstance(text_or_image, str) 25 | 26 | if is_text: 27 | instance.fields["text"].string_value = text_or_image 28 | 29 | if isinstance(text_or_image, bytes): 30 | encoded_content = base64.b64encode(text_or_image).decode("utf-8") 31 | image_struct = instance.fields["image"].struct_value 32 | image_struct.fields["bytesBase64Encoded"].string_value = encoded_content 33 | 34 | endpoint = ( 35 | f"projects/{self.project}/locations/{self.location}" 36 | "/publishers/google/models/multimodalembedding@001" 37 | ) 38 | response = self.client.predict(endpoint=endpoint, instances=[instance]) 39 | embedding = None 40 | if is_text: 41 | text_emb_value = response.predictions[0]["textEmbedding"] 42 | embedding = [v for v in text_emb_value] 43 | else: 44 | image_emb_value = response.predictions[0]["imageEmbedding"] 45 | embedding = [v for v in image_emb_value] 46 | 47 | return embedding 48 | 49 | 50 | def generate_embedding(text_or_image: typing.Union[str, bytes]) -> list[float]: 51 | client = VertexEmbeddingClient() 52 | return client.get_embedding(text_or_image) 53 | -------------------------------------------------------------------------------- /rodeo/image_search/utils/get_nearest_images.py: -------------------------------------------------------------------------------- 1 | from image_search.utils.generate_embedding import generate_embedding 2 | from image_search.utils.get_supabase_client import get_supabase_client 3 | 4 | 5 | def get_nearest_images(text_query: str, k: int = 4) -> list[dict]: 6 | """Get the nearest documents from the database given a text query. 7 | 8 | Args: 9 | text_query (str): The text query to use to find the nearest documents. 10 | k (int, optional): The number of nearest documents to return. Defaults to 4. 11 | 12 | Returns: 13 | list[dict]: A list of the nearest documents. 14 | """ 15 | 16 | embedding = generate_embedding(text_query) 17 | supabase_client = get_supabase_client() 18 | response = supabase_client.rpc( 19 | "match_vectors", 20 | { 21 | "query_embedding": embedding, 22 | "match_count": k, 23 | }, 24 | ).execute() 25 | return response.data 26 | -------------------------------------------------------------------------------- /rodeo/image_search/utils/get_supabase_client.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | from dotenv import load_dotenv 4 | from supabase import Client, create_client 5 | 6 | load_dotenv() 7 | 8 | url: str = os.environ.get("SUPABASE_URL") 9 | key: str = os.environ.get("SUPABASE_KEY") 10 | 11 | 12 | def get_supabase_client() -> Client: 13 | return create_client(url, key) 14 | -------------------------------------------------------------------------------- /rodeo/image_search/utils/save_doc_and_embed.py: -------------------------------------------------------------------------------- 1 | from image_search.utils.get_supabase_client import get_supabase_client 2 | 3 | 4 | def save_doc_and_embed(content: str, embedding: list[float]): 5 | response = ( 6 | get_supabase_client() 7 | .table("vectors") 8 | .insert({"content": content, "embedding": embedding}) 9 | .execute() 10 | ) 11 | return response 12 | -------------------------------------------------------------------------------- /rodeo/image_search/views/homepage.py: -------------------------------------------------------------------------------- 1 | import streamlit as st 2 | 3 | 4 | def home_page(): 5 | st.markdown( 6 | """ 7 | This is a demo of an image search engine built with VertexAI, Streamlit and Supabase. 8 | 9 | ### It allows you to upload images and search for similar images using text 10 | """ 11 | ) 12 | st.markdown( 13 | """ 14 | ## How it works 15 | 1. Go to Upload Images page and upload images 16 | 2. Go to Search Images page and search for images 17 | """ 18 | ) 19 | -------------------------------------------------------------------------------- /rodeo/image_search/views/search_page.py: -------------------------------------------------------------------------------- 1 | import streamlit as st 2 | from image_search.components.image_displayer import display_image 3 | from image_search.utils.get_nearest_images import get_nearest_images 4 | 5 | 6 | def search_page(): 7 | st.title("Search 🔍") 8 | grid = st.columns(2) 9 | 10 | text_query = st.text_input("Which image are you looking for ?") 11 | 12 | with st.spinner("Training ongoing"): 13 | if text_query: 14 | images = get_nearest_images(text_query=text_query) 15 | for i in range(len(images)): 16 | image = images[i] 17 | with grid[i % 2]: 18 | display_image(image) 19 | -------------------------------------------------------------------------------- /rodeo/image_search/views/upload_page.py: -------------------------------------------------------------------------------- 1 | import streamlit as st 2 | from image_search.components.image_uploader import render_image_uploader 3 | 4 | 5 | def upload_page(): 6 | st.title("Upload 📤") 7 | render_image_uploader() 8 | -------------------------------------------------------------------------------- /rodeo/ride/.chainlit/config.toml: -------------------------------------------------------------------------------- 1 | [project] 2 | # Whether to enable telemetry (default: true). No personal data is collected. 3 | enable_telemetry = false 4 | 5 | # List of environment variables to be provided by each user to use the app. 6 | user_env = [] 7 | 8 | # Duration (in seconds) during which the session is saved when the connection is lost 9 | session_timeout = 3600 10 | 11 | # Enable third parties caching (e.g LangChain cache) 12 | cache = false 13 | 14 | # Follow symlink for asset mount (see https://github.com/Chainlit/chainlit/issues/317) 15 | # follow_symlink = false 16 | 17 | [features] 18 | # Show the prompt playground 19 | prompt_playground = true 20 | 21 | # Authorize users to upload files with messages 22 | multi_modal = false 23 | 24 | # Allows user to use speech to text 25 | [features.speech_to_text] 26 | enabled = false 27 | # See all languages here https://github.com/JamesBrill/react-speech-recognition/blob/HEAD/docs/API.md#language-string 28 | # language = "en-US" 29 | 30 | [UI] 31 | # Name of the app and chatbot. 32 | name = "Old" 33 | 34 | # Show the readme while the conversation is empty. 35 | show_readme_as_default = false 36 | 37 | # Description of the app and chatbot. This is used for HTML tags. 38 | # description = "Old" 39 | 40 | # Large size content are by default collapsed for a cleaner ui 41 | default_collapse_content = true 42 | 43 | # The default value for the expand messages settings. 44 | default_expand_messages = false 45 | 46 | # Hide the chain of thought details from the user in the UI. 47 | hide_cot = false 48 | 49 | # Link to your github repo. This will add a github button in the UI's header. 50 | # github = "https://github.com/theodo-group/Rodeo" 51 | 52 | # Specify a CSS file that can be used to customize the user interface. 53 | # The CSS file can be served from the public directory or via an external link. 54 | # custom_css = "/public/test.css" 55 | 56 | # Override default MUI light theme. (Check theme.ts) 57 | [UI.theme.light] 58 | #background = "#FAFAFA" 59 | #paper = "#FFFFFF" 60 | 61 | [UI.theme.light.primary] 62 | #main = "#F80061" 63 | #dark = "#980039" 64 | #light = "#FFE7EB" 65 | 66 | # Override default MUI dark theme. (Check theme.ts) 67 | [UI.theme.dark] 68 | #background = "#FAFAFA" 69 | #paper = "#FFFFFF" 70 | 71 | [UI.theme.dark.primary] 72 | #main = "#F80061" 73 | #dark = "#980039" 74 | #light = "#FFE7EB" 75 | 76 | 77 | [meta] 78 | generated_by = "0.7.400" 79 | -------------------------------------------------------------------------------- /rodeo/ride/README.md: -------------------------------------------------------------------------------- 1 | # Welcome to RIDE! 🚀🤖 2 | 3 | ## What is RIDE? 4 | 5 | It allows youn to evaluate the quality of a use case. 6 | 7 | 8 | 9 | 10 | https://github.com/theodo-group/Rodeo/assets/19614572/3653a3ee-1731-43cf-b8c2-b937a587a029 11 | 12 | 13 | 14 | 15 | 16 | ## How does it work? 17 | 18 | Simply chat with it and it will ask you questions about your use case. It will then evaluate the quality of your use case and give you a score. 19 | 20 | ## How to Run RIDE 21 | 22 | 1. Install Packages with `pip install -r requirements.txt` 23 | 2. Export OPENAI_API_KEY as env variable 24 | 3. Run `chainlit run app.py -w` 25 | 26 | -------------------------------------------------------------------------------- /rodeo/ride/app.py: -------------------------------------------------------------------------------- 1 | import chainlit as cl 2 | from chainlit.prompt import Prompt, PromptMessage 3 | from chainlit.playground.providers.openai import ChatOpenAI 4 | from unstructured.partition.auto import partition 5 | from tempfile import NamedTemporaryFile 6 | import tiktoken 7 | 8 | 9 | import openai 10 | from openai import AsyncOpenAI 11 | import os 12 | 13 | openai.api_key = os.getenv("OPENAI_API_KEY") 14 | 15 | 16 | template = """ 17 | You are an expert business analyser. You have the task to analyse a use case with a system called RIDE. 18 | The use case will be evaluated from 1 to 10 on each criteria. 19 | The user will be evaluated on the following criteria: 20 | 21 | --- 22 | R - Ressources 23 | - What are the ressources needed to implement the use case? 24 | - Is the data required to implement the use case available? 25 | For example: 1 week of a lead data scientist and no data required will give 10 26 | 1 months of a full agile team and data required will give 1. 27 | 28 | I - Impact 29 | - What is the impact of the use case? 30 | - Is the use case a nice to have or a must have? 31 | For example: The use case will help to increase the revenue by 10% will give 10 32 | The use case will help to increase the revenue by 1% will give 7 33 | The use case ROI is not measurable will give 1. 34 | 35 | D - Déontologie 36 | - Est-ce que ce projet répond aux enjeux 2030 de la BPI (décarbo, réindustrialisation, …) ? 37 | - Est-ce qu’il respecte les chartes de sécurité des données ? 38 | For example: The use case is a green fintech will give 10 39 | The use case is not good for the futur will give 1 40 | 41 | E - Effort 42 | - Est-ce que le projet est facile à mettre en place ? 43 | - Est-ce que le projet est facile à maintenir ? 44 | 45 | For example: The use case is easy to implement and maintain will give 10 46 | The use case is hard to implement and maintain will give 1 47 | --- 48 | 49 | Score is calculated as follow: 50 | (Ressources * Impact * Déontologie) / Effort 51 | 52 | When interviewing: 53 | - Always answer in French 54 | - Answer in short sentences. Use Markdown to format your answer. 55 | - Be nice and polite 56 | 57 | Here is how to conduct the interview: 58 | - Ask the user to describe the use case 59 | - Then ask the user to answer questions on the use case on each criteria 60 | - One criteria at a time 61 | - The user can't know the next criterions before answering the previous one 62 | 63 | If the user gives you a list of subjects: 64 | - Try to estimate the RIDE score for each subject. Giving a score of 1 to 10 for each subject. and a short description of why. 65 | - Usually complexity is an indicator of length of the project and access to data. 66 | - Answer in a markdown table. 67 | 68 | Your name is BPI RIDE Calculator. 69 | ```""" 70 | 71 | 72 | settings = { 73 | "model": "gpt-4-1106-preview", 74 | "temperature": 0.2, 75 | "max_tokens": 4096, 76 | "top_p": 1, 77 | "frequency_penalty": 0, 78 | "presence_penalty": 0, 79 | "stop": ["```"], 80 | } 81 | 82 | 83 | # @cl.on_chat_start 84 | # async def start(): 85 | # files = None 86 | 87 | # # Wait for the user to upload a file 88 | # while files is None: 89 | # files = await cl.AskFileMessage( 90 | # content="Please upload a file to begin!", 91 | # accept=SUPPORTED_FILE_TYPES, 92 | # max_size_mb=20, 93 | # ).send() 94 | 95 | # # Get the uploaded file 96 | # uploaded_file = files[0] 97 | 98 | # # Save the file content to a temporary file 99 | # with NamedTemporaryFile( 100 | # delete=False, suffix=os.path.splitext(uploaded_file.name)[1] 101 | # ) as tmp: 102 | # tmp.write(uploaded_file.content) # Write the byte content to temp file 103 | # tmp_path = tmp.name # Save the path to the temp file 104 | 105 | # # Let the user know that the file is being processed 106 | # await cl.Message( 107 | # content=f"`{uploaded_file.name}` uploaded and is being processed..." 108 | # ).send() 109 | 110 | # # Call partition with the path to the temporary file 111 | # elements = partition(filename=tmp_path) 112 | # file_content = "\n\n".join([str(el) for el in elements]) 113 | # cl.user_session.set("file_content", file_content) 114 | 115 | # print(file_content) 116 | # os.remove(tmp_path) 117 | 118 | # tokens_number = num_tokens_from_string(file_content, "gpt-4") 119 | 120 | # # Let the user know that the processing is complete 121 | # await cl.Message( 122 | # content=f"Processing of `{uploaded_file.name}` is complete!" 123 | # ).send() 124 | # await cl.Message(content=f"**Number of tokens:** {tokens_number}").send() 125 | 126 | 127 | @cl.on_chat_start 128 | def start_chat(): 129 | cl.user_session.set( 130 | "message_history", 131 | [{"role": "system", "content": template}], 132 | ) 133 | 134 | 135 | @cl.on_message 136 | async def main(message: cl.Message): 137 | # Create the prompt object for the Prompt Playground 138 | message_history = cl.user_session.get("message_history", []) 139 | message_history.append({"role": "user", "content": message.content}) 140 | 141 | # Prepare the message for streaming 142 | msg = cl.Message( 143 | content="", 144 | author="RIDE", 145 | ) 146 | await msg.send() 147 | 148 | # Call OpenAI 149 | client = AsyncOpenAI() 150 | 151 | stream = await client.chat.completions.create( 152 | messages=message_history, stream=True, **settings 153 | ) 154 | 155 | async for part in stream: 156 | if token := part.choices[0].delta.content or "": 157 | await msg.stream_token(token) 158 | 159 | message_history.append({"role": "assistant", "content": msg.content}) 160 | await msg.update() 161 | -------------------------------------------------------------------------------- /rodeo/ride/chainlit.md: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/theodo-group/Rodeo/261f7017fbdf33a297eb2506d8419aa8a865d598/rodeo/ride/chainlit.md -------------------------------------------------------------------------------- /rodeo/ride/requirements.txt: -------------------------------------------------------------------------------- 1 | langchain==0.0.331 2 | tiktoken==0.5.1 3 | chainlit==0.7.700 4 | unstructured[all-docs] 5 | unstructured[docx] 6 | openai==1.1.0 -------------------------------------------------------------------------------- /rodeo/ride/static/big-logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/theodo-group/Rodeo/261f7017fbdf33a297eb2506d8419aa8a865d598/rodeo/ride/static/big-logo.png -------------------------------------------------------------------------------- /static/rodeo-logo.png: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/theodo-group/Rodeo/261f7017fbdf33a297eb2506d8419aa8a865d598/static/rodeo-logo.png --------------------------------------------------------------------------------