├── .gitignore ├── .env_sample ├── requirements.txt ├── README.md ├── chatopenai.py ├── chatgradio.py ├── chat.py ├── Makefile ├── pdf_util.py ├── chatpdf.py ├── jaichat.py ├── solar-r.py ├── longimg.py ├── docv.py ├── solar_util.py ├── hw.py ├── fc.py ├── chatpdfemb.py ├── chatsearch.py ├── data └── target_companies.json ├── namecard.py ├── reasoning.py ├── discussion.py ├── coldmail.py ├── translate.py ├── gemini.py └── llama.py /.gitignore: -------------------------------------------------------------------------------- 1 | .env 2 | .DS_Store 3 | local_cache 4 | .venv 5 | __pycache__/ 6 | chroma_db/ 7 | .streamlit/ -------------------------------------------------------------------------------- /.env_sample: -------------------------------------------------------------------------------- 1 | # Fill in the following variables with your own API keys 2 | # and rename this file to .env 3 | UPSTAGE_API_KEY=up_SqRZcwRz... 4 | TAVILY_API_KEY=tvly-n8wiK... 5 | NEWS_API_KEY=e59ce9fc... 6 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | streamlit>=1.30.0 2 | google-genai>=0.3.0 3 | qrcode[pil] 4 | requests 5 | streamlit-extras 6 | streamlit-paste-button 7 | langchain 8 | faiss-cpu 9 | langchainhub 10 | langchain_upstage 11 | langchain_community 12 | watchdog 13 | openpyxl==3.1.5 14 | tokenizers 15 | pydantic 16 | crawl4ai 17 | tinydb 18 | duckduckgo-search 19 | langchain-community 20 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # SolarLLM Chat Demo 2 | ## Streamlit and Gradio Chat Demo 3 | * Chat: https://solar-chat.streamlit.app/ 4 | * Search: https://solar-search.streamlit.app/ 5 | * ChatPDF: https://solar-pdf.streamlit.app/ 6 | * Self-Discussion: https://solar-discussion.streamlit.app/ 7 | * DocVision: https://solar-docv.streamlit.app/ 8 | 9 | * Reasoning: https://toy-o1.streamlit.app/, https://toy-o2.streamlit.app/ 10 | 11 | ## SolarLLM Cookbook 12 | For detailed usage examples, please refer to https://github.com/upstageAI/cookbook/. 13 | -------------------------------------------------------------------------------- /chatopenai.py: -------------------------------------------------------------------------------- 1 | # from https://docs.streamlit.io/develop/tutorials/llms/build-conversational-apps 2 | 3 | from openai import OpenAI 4 | import streamlit as st 5 | 6 | st.title("ChatGPT-like clone") 7 | 8 | client = OpenAI(api_key=st.secrets["UPSTAGE_API_KEY"], base_url="https://api.upstage.ai/v1/solar") 9 | 10 | if "messages" not in st.session_state: 11 | st.session_state.messages = [] 12 | 13 | for message in st.session_state.messages: 14 | with st.chat_message(message["role"]): 15 | st.markdown(message["content"]) 16 | 17 | if prompt := st.chat_input("What is up?"): 18 | st.session_state.messages.append({"role": "user", "content": prompt}) 19 | with st.chat_message("user"): 20 | st.markdown(prompt) 21 | 22 | with st.chat_message("assistant"): 23 | stream = client.chat.completions.create( 24 | model="solar-1-mini-chat", 25 | messages=[ 26 | {"role": m["role"], "content": m["content"]} 27 | for m in st.session_state.messages 28 | ], 29 | stream=True, 30 | ) 31 | response = st.write_stream(stream) 32 | st.session_state.messages.append({"role": "assistant", "content": response}) -------------------------------------------------------------------------------- /chatgradio.py: -------------------------------------------------------------------------------- 1 | import gradio as gr 2 | 3 | from langchain_upstage import ChatUpstage 4 | from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder 5 | from langchain_core.output_parsers import StrOutputParser 6 | from langchain.schema import AIMessage, HumanMessage 7 | 8 | 9 | llm = ChatUpstage(streaming=True) 10 | 11 | # More general chat 12 | chat_with_history_prompt = ChatPromptTemplate.from_messages( 13 | [ 14 | ("system", "You are a helpful assistant."), 15 | MessagesPlaceholder(variable_name="history"), 16 | ("human", "{message}"), 17 | ] 18 | ) 19 | 20 | chain = chat_with_history_prompt | llm | StrOutputParser() 21 | 22 | 23 | def chat(message, history): 24 | history_langchain_format = [] 25 | for human, ai in history: 26 | history_langchain_format.append(HumanMessage(content=human)) 27 | history_langchain_format.append(AIMessage(content=ai)) 28 | 29 | generator = chain.stream({"message": message, "history": history_langchain_format}) 30 | 31 | assistant = "" 32 | for gen in generator: 33 | assistant += gen 34 | yield assistant 35 | 36 | 37 | with gr.Blocks() as demo: 38 | chatbot = gr.ChatInterface( 39 | chat, 40 | examples=[ 41 | "How to eat healthy?", 42 | "Best Places in Korea", 43 | "How to make a chatbot?", 44 | ], 45 | title="Solar Chatbot", 46 | description="Upstage Solar Chatbot", 47 | ) 48 | chatbot.chatbot.height = 300 49 | 50 | if __name__ == "__main__": 51 | demo.launch() 52 | -------------------------------------------------------------------------------- /chat.py: -------------------------------------------------------------------------------- 1 | # from https://docs.streamlit.io/develop/tutorials/llms/build-conversational-apps 2 | 3 | import streamlit as st 4 | from langchain_upstage import ChatUpstage as Chat 5 | 6 | from langchain_core.output_parsers import StrOutputParser 7 | from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder 8 | from langchain_core.messages import AIMessage, HumanMessage 9 | 10 | from solar_util import initialize_solar_llm 11 | 12 | from solar_util import prompt_engineering 13 | 14 | llm = initialize_solar_llm() 15 | st.set_page_config(page_title="Chat") 16 | st.title("SolarLLM") 17 | 18 | 19 | chat_with_history_prompt = ChatPromptTemplate.from_messages( 20 | [ 21 | ("human", """You are Solar, a smart chatbot by Upstage, loved by many people. 22 | Be smart, cheerful, and fun. Give engaging answers and avoid inappropriate language. 23 | reply in the same language of the user query. 24 | Solar is now being connected with a human. 25 | 26 | Please put in the end of your answer."""), 27 | MessagesPlaceholder("chat_history"), 28 | ("human", "{user_query}"), 29 | ] 30 | ) 31 | 32 | 33 | 34 | def get_response(user_query, chat_history): 35 | chain = chat_with_history_prompt | llm | StrOutputParser() 36 | return chain.stream( 37 | { 38 | "chat_history": chat_history, 39 | "user_query": user_query, 40 | } 41 | ) 42 | 43 | 44 | if "messages" not in st.session_state: 45 | st.session_state.messages = [] 46 | 47 | for message in st.session_state.messages: 48 | role = "AI" if isinstance(message, AIMessage) else "Human" 49 | with st.chat_message(role): 50 | st.markdown(message.content) 51 | 52 | enhance_prompt = st.toggle("Enhance prompt", True) 53 | if prompt := st.chat_input("What is up?"): 54 | if enhance_prompt: 55 | with st.status("Prompt engineering..."): 56 | new_prompt = prompt_engineering(prompt, st.session_state.messages) 57 | st.write(new_prompt) 58 | 59 | if 'enhanced_prompt' in new_prompt: 60 | prompt = new_prompt['enhanced_prompt'] 61 | 62 | st.session_state.messages.append(HumanMessage(content=prompt)) 63 | with st.chat_message("user"): 64 | st.markdown(prompt) 65 | 66 | with st.chat_message("assistant"): 67 | response = st.write_stream(get_response(prompt, st.session_state.messages)) 68 | st.session_state.messages.append(AIMessage(content=response)) 69 | 70 | -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | # Define the two virtual environments 2 | VENV = .venv 3 | BROWSER_VENV = .venv-browser 4 | PYTHON = $(VENV)/bin/python3 5 | BROWSER_PYTHON = $(BROWSER_VENV)/bin/python3 6 | PIP = $(VENV)/bin/pip3 7 | BROWSER_PIP = $(BROWSER_VENV)/bin/pip3 8 | STREAMLIT = $(VENV)/bin/streamlit 9 | BROWSER_STREAMLIT = $(BROWSER_VENV)/bin/streamlit 10 | GRADIO = $(VENV)/bin/gradio 11 | 12 | # Basic venv without browser dependencies 13 | $(VENV)/bin/activate: requirements.txt 14 | python3 -m venv $(VENV) 15 | $(PIP) install -r requirements.txt 16 | 17 | # Browser-enabled venv with playwright 18 | $(BROWSER_VENV)/bin/activate: requirements.txt 19 | python3 -m venv $(BROWSER_VENV) 20 | $(BROWSER_PIP) install -r requirements.txt 21 | $(BROWSER_PIP) install playwright 22 | $(BROWSER_PYTHON) -m playwright install 23 | 24 | chatopenai: $(VENV)/bin/activate 25 | $(STREAMLIT) run chatopenai.py 26 | 27 | coldmail: $(VENV)/bin/activate 28 | $(STREAMLIT) run coldmail.py 29 | 30 | chat: $(VENV)/bin/activate 31 | $(STREAMLIT) run chat.py 32 | 33 | chatpdf: $(VENV)/bin/activate 34 | $(STREAMLIT) run chatpdf.py 35 | 36 | longimg: $(VENV)/bin/activate 37 | $(STREAMLIT) run longimg.py 38 | 39 | chatpdfemb: $(VENV)/bin/activate 40 | $(STREAMLIT) run chatpdfemb.py 41 | 42 | gemini: $(VENV)/bin/activate 43 | $(STREAMLIT) run gemini.py 44 | 45 | chatgradio: $(VENV)/bin/activate 46 | $(GRADIO) chatgradio.py 47 | 48 | docv: $(VENV)/bin/activate 49 | $(STREAMLIT) run docv.py 50 | 51 | search: $(VENV)/bin/activate 52 | $(STREAMLIT) run chatsearch.py 53 | 54 | reasoning: $(VENV)/bin/activate 55 | $(STREAMLIT) run reasoning.py 56 | 57 | discussion: $(VENV)/bin/activate 58 | $(STREAMLIT) run discussion.py --server.port 9093 59 | 60 | llama: $(VENV)/bin/activate 61 | $(STREAMLIT) run llama.py 62 | 63 | voice: $(VENV)/bin/activate 64 | $(STREAMLIT) run voice.py 65 | 66 | hw: $(VENV)/bin/activate 67 | $(STREAMLIT) run hw.py 68 | 69 | util: $(VENV)/bin/activate 70 | $(PYTHON) solar_util.py 71 | 72 | podcast: $(VENV)/bin/activate 73 | $(STREAMLIT) run podcast.py 74 | 75 | biz_help: $(VENV)/bin/activate 76 | $(STREAMLIT) run biz_help.py 77 | 78 | info_fill: $(BROWSER_VENV)/bin/activate 79 | $(BROWSER_STREAMLIT) run info_fill.py 80 | 81 | jaisearch: $(BROWSER_VENV)/bin/activate 82 | $(STREAMLIT) run jaisearch.py 83 | 84 | jaichat: $(BROWSER_VENV)/bin/activate 85 | $(STREAMLIT) run jaichat.py 86 | 87 | translate: $(BROWSER_VENV)/bin/activate 88 | $(STREAMLIT) run translate.py 89 | 90 | solar-r: $(BROWSER_VENV)/bin/activate 91 | $(STREAMLIT) run solar-r.py 92 | 93 | fc: $(BROWSER_VENV)/bin/activate 94 | $(STREAMLIT) run fc.py 95 | 96 | namecard: $(VENV)/bin/activate 97 | $(STREAMLIT) run namecard.py 98 | 99 | clean: 100 | rm -rf __pycache__ 101 | rm -rf $(VENV) 102 | rm -rf $(BROWSER_VENV) -------------------------------------------------------------------------------- /pdf_util.py: -------------------------------------------------------------------------------- 1 | import os 2 | import re 3 | from enum import Enum 4 | 5 | 6 | class PDFType(Enum): 7 | SCANNED = "Scanned PDF (lacks embedded fonts and text objects)" 8 | DIGITAL = "Digital PDF (text-based and font information present)" 9 | UNCERTAIN = "Uncertain (No strong indicators found)" 10 | 11 | 12 | def is_ocr_pdf(pdf_path): 13 | """ 14 | Determine if a PDF file is digital–born (text-based) or scanned (mostly images) 15 | without relying on any external PDF libraries. 16 | 17 | The function reads the raw PDF file content and decodes it to a string. 18 | It then uses basic heuristics by searching for: 19 | - Embedded fonts (via the '/Font' keyword) 20 | - Text drawing commands (via the 'BT' operator; PDFs typically use 'BT' ... 'ET' 21 | to delimit text blocks) 22 | - Image objects (via the '/Subtype /Image' declaration) 23 | - Keywords (like 'scan', 'ocr', or 'adobe acrobat') which sometimes appear in 24 | scanned PDFs 25 | 26 | If the PDF contains both font definitions and text commands, and the (roughly) 27 | extracted text (from between BT and ET markers) sums up to a significant length, 28 | it is assumed to be digital–born. If the PDF appears dominated by images or has 29 | keywords suggesting scanning, it is classified as scanned. In other cases, the PDF 30 | is marked as uncertain. 31 | 32 | Args: 33 | pdf_path (str): Path to the PDF file. 34 | 35 | Returns: 36 | PDFType: DIGITAL if the PDF appears text–based; SCANNED if it appears to be a scanned image; 37 | UNCERTAIN if the evidence isn't strong enough. 38 | """ 39 | try: 40 | with open(pdf_path, "rb") as f: 41 | content = f.read().decode("latin-1", errors="replace") 42 | except Exception: 43 | # If the file cannot be read or decoded, we return UNCERTAIN. 44 | return PDFType.UNCERTAIN 45 | 46 | # Count indicators of digital PDFs. 47 | font_count = len(re.findall(r'/Font\b', content)) 48 | text_command_count = len(re.findall(r'\bBT\b', content)) 49 | 50 | # Try to extract content from text objects (between 'BT' and 'ET'). 51 | text_segments = re.findall(r'BT\s*(.*?)\s*ET', content, re.DOTALL) 52 | extracted_text = " ".join(text_segments) 53 | extracted_text_length = len(extracted_text.strip()) 54 | 55 | # Count indications of images that might suggest a scan. 56 | image_count = len(re.findall(r'/Subtype\s*/Image', content)) 57 | # Look for scanned–related keywords. 58 | scanned_keyword = bool(re.search(r'(?i)\b(?:scan(?:ned)?|ocr|adobe\s+acrobat)\b', content)) 59 | 60 | # Heuristics: 61 | # 1. A digital PDF will usually have embedded fonts and multiple text drawing commands, 62 | # resulting in a reasonable amount of extracted text. 63 | # 2. A scanned PDF may have few (or zero) font markers and often many image objects. 64 | if font_count > 0 and text_command_count > 0 and extracted_text_length > 100: 65 | return PDFType.DIGITAL 66 | elif image_count > font_count or scanned_keyword: 67 | return PDFType.SCANNED 68 | else: 69 | return PDFType.UNCERTAIN 70 | -------------------------------------------------------------------------------- /chatpdf.py: -------------------------------------------------------------------------------- 1 | # from https://docs.streamlit.io/develop/tutorials/llms/build-conversational-apps 2 | 3 | import streamlit as st 4 | from langchain_upstage import ChatUpstage as Chat 5 | from langchain_upstage import GroundednessCheck 6 | 7 | from langchain_core.output_parsers import StrOutputParser 8 | from langchain_core.messages import AIMessage, HumanMessage 9 | from langchain_upstage import UpstageLayoutAnalysisLoader 10 | import tempfile, os 11 | 12 | from langchain import hub 13 | 14 | st.title("LangChain ChatDoc") 15 | 16 | llm = Chat() 17 | # https://smith.langchain.com/hub/hunkim/rag-qa-with-history 18 | chat_with_history_prompt = hub.pull("hunkim/rag-qa-with-history") 19 | 20 | groundedness_check = GroundednessCheck() 21 | 22 | 23 | def get_response(user_query, chat_history): 24 | chain = chat_with_history_prompt | llm | StrOutputParser() 25 | 26 | return chain.stream( 27 | { 28 | "chat_history": chat_history, 29 | "question": user_query, 30 | "context": st.session_state.docs, 31 | } 32 | ) 33 | 34 | 35 | if "messages" not in st.session_state: 36 | st.session_state.messages = [] 37 | 38 | if "docs" not in st.session_state: 39 | st.session_state.docs = [] 40 | 41 | with st.sidebar: 42 | st.header(f"Add your documents!") 43 | 44 | uploaded_file = st.file_uploader("Choose your `.pdf` file", type="pdf") 45 | 46 | if uploaded_file and not uploaded_file.name in st.session_state: 47 | with st.status("Processing the data ..."): 48 | with tempfile.TemporaryDirectory() as temp_dir: 49 | file_path = os.path.join(temp_dir, uploaded_file.name) 50 | 51 | with open(file_path, "wb") as f: 52 | f.write(uploaded_file.getvalue()) 53 | 54 | st.write("Indexing your document...") 55 | layzer = UpstageLayoutAnalysisLoader(file_path, split="page") 56 | # For improved memory efficiency, consider using the lazy_load method to load documents page by page. 57 | docs = layzer.load() # or layzer.lazy_load() 58 | st.session_state.docs = docs 59 | st.write(docs) 60 | 61 | # processed 62 | st.session_state[uploaded_file.name] = True 63 | 64 | st.success("Ready to Chat!") 65 | 66 | 67 | for message in st.session_state.messages: 68 | role = "AI" if isinstance(message, AIMessage) else "Human" 69 | with st.chat_message(role): 70 | st.markdown(message.content) 71 | 72 | if prompt := st.chat_input("What is up?", disabled=not st.session_state.docs): 73 | st.session_state.messages.append( 74 | HumanMessage( 75 | content=prompt, 76 | ) 77 | ) 78 | with st.chat_message("user"): 79 | st.markdown(prompt) 80 | 81 | with st.chat_message("assistant"): 82 | with st.status("Getting context..."): 83 | st.write(st.session_state.docs) 84 | response = st.write_stream(get_response(prompt, st.session_state.messages)) 85 | gc_result = groundedness_check.run( 86 | { 87 | "context": f"Context:{st.session_state.docs}\n\nQuestion{prompt}", 88 | "query": response, 89 | } 90 | ) 91 | 92 | if gc_result == "grounded": 93 | gc_mark = "✅" 94 | st.success("✅ Groundedness check passed!") 95 | else: 96 | gc_mark = "❌" 97 | st.error("❌ Groundedness check failed!") 98 | 99 | st.session_state.messages.append( 100 | AIMessage(content=f"{gc_mark} {response}"), 101 | ) 102 | -------------------------------------------------------------------------------- /jaichat.py: -------------------------------------------------------------------------------- 1 | # from https://docs.streamlit.io/develop/tutorials/llms/build-conversational-apps 2 | 3 | import streamlit as st 4 | from langchain_upstage import ChatUpstage 5 | 6 | from langchain_core.output_parsers import StrOutputParser 7 | from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder, PromptTemplate 8 | from langchain_core.messages import AIMessage, HumanMessage 9 | 10 | from solar_util import initialize_solar_llm 11 | 12 | from solar_util import prompt_engineering 13 | 14 | import re 15 | import json 16 | 17 | jai = ChatUpstage(model=st.secrets["JAI_MODEL_NAME"], base_url=st.secrets["JAI_BASE_URL"], api_key=st.secrets["JAI_API_KEY"]) 18 | solar_pro = ChatUpstage(model="solar-pro") 19 | 20 | st.set_page_config(page_title="Chat") 21 | st.title("SolarLLM") 22 | 23 | 24 | def is_korean(text): 25 | return re.search(r'[가-힣]', text) is not None 26 | 27 | 28 | def korean_to_thai(text): 29 | translate_prompt = PromptTemplate( 30 | template="""You are a language translator. Translate the following text from Korean to Thai. 31 | Here are some examples: 32 | 33 | Korean: 안녕하세요 34 | {{"translation": "สวัสดีครับ/ค่ะ"}} 35 | 36 | Korean: 감사합니다 37 | {{"translation": "ขอบคุณครับ/ค่ะ"}} 38 | 39 | Korean: 맛있어요 40 | {{"translation": "อร่อยครับ/ค่ะ"}} 41 | 42 | Now translate this: 43 | --- 44 | Korean: {text} 45 | --- 46 | Response format: 47 | {{"translation": "Thai translation here"}}""", 48 | input_variables=["text"] 49 | ) 50 | chain = translate_prompt | jai | StrOutputParser() 51 | result = chain.invoke({"text": text}) 52 | try: 53 | return json.loads(result)["translation"] 54 | except json.JSONDecodeError: 55 | st.error("Failed to parse translation response") 56 | return result 57 | 58 | def thai_to_korean(text): 59 | translate_prompt = PromptTemplate( 60 | template="""You are a language translator. Translate the following text from Thai to Korean. 61 | Here are some examples: 62 | 63 | Thai: สวัสดีครับ/ค่ะ 64 | {{"translation": "안녕하세요"}} 65 | 66 | Thai: ขอบคุณครับ/ค่ะ 67 | {{"translation": "감사합니다"}} 68 | 69 | Thai: อร่อยครับ/ค่ะ 70 | {{"translation": "맛있어요"}} 71 | 72 | Now translate this: 73 | --- 74 | Thai: {text} 75 | --- 76 | Response format: 77 | {{"translation": "Korean translation here"}}""", 78 | input_variables=["text"] 79 | ) 80 | chain = translate_prompt | solar_pro | StrOutputParser() 81 | result = chain.invoke({"text": text}) 82 | try: 83 | return json.loads(result)["translation"] 84 | except json.JSONDecodeError: 85 | st.error("Failed to parse translation response") 86 | return result 87 | 88 | chat_with_history_prompt = ChatPromptTemplate.from_messages( 89 | [ 90 | ("human", """You are Solar, a smart chatbot by Upstage, loved by many people. 91 | Be smart, cheerful, and fun. Give engaging answers and avoid inappropriate language. 92 | reply in the same language of the user query. 93 | Solar is now being connected with a human."""), 94 | MessagesPlaceholder("chat_history"), 95 | ("human", "{user_query}"), 96 | ] 97 | ) 98 | 99 | 100 | 101 | def get_response(user_query, chat_history): 102 | chain = chat_with_history_prompt | jai | StrOutputParser() 103 | 104 | return chain.stream( 105 | { 106 | "chat_history": chat_history, 107 | "user_query": user_query, 108 | } 109 | ) 110 | 111 | 112 | if "messages" not in st.session_state: 113 | st.session_state.messages = [] 114 | 115 | for message in st.session_state.messages: 116 | role = "AI" if isinstance(message, AIMessage) else "Human" 117 | with st.chat_message(role): 118 | st.markdown(message.content) 119 | 120 | enhance_prompt = st.toggle("Enhance prompt", False) 121 | if prompt := st.chat_input("What is up?"): 122 | # Check if input is Korean and translate if needed 123 | if is_korean(prompt): 124 | with st.status("Translating Korean to Thai..."): 125 | prompt = korean_to_thai(prompt) 126 | st.write(f"Translated to Thai: {prompt}") 127 | 128 | if enhance_prompt: 129 | with st.status("Prompt engineering..."): 130 | new_prompt = prompt_engineering(prompt, st.session_state.messages) 131 | st.write(new_prompt) 132 | 133 | if 'enhanced_prompt' in new_prompt: 134 | prompt = new_prompt['enhanced_prompt'] 135 | 136 | with st.chat_message("user"): 137 | st.markdown(prompt) 138 | 139 | with st.chat_message("assistant"): 140 | response = st.write_stream(get_response(prompt, st.session_state.messages)) 141 | korean_response = thai_to_korean(response) 142 | st.write(korean_response) 143 | 144 | 145 | -------------------------------------------------------------------------------- /solar-r.py: -------------------------------------------------------------------------------- 1 | # from https://docs.streamlit.io/develop/tutorials/llms/build-conversational-apps 2 | 3 | import streamlit as st 4 | from langchain_upstage import ChatUpstage 5 | from openai import OpenAI 6 | 7 | from langchain_core.output_parsers import StrOutputParser 8 | from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder 9 | from langchain_core.messages import AIMessage, HumanMessage 10 | 11 | 12 | solar_mini = ChatUpstage(model="solar-mini") 13 | deepseek_r = ChatUpstage( 14 | model="deepseek-reasoner", 15 | base_url="https://api.deepseek.com/v1", 16 | api_key=st.secrets["DEEPSEEK_API_KEY"], 17 | max_tokens=100, 18 | ) 19 | 20 | deepseek_r = OpenAI(api_key=st.secrets["DEEPSEEK_API_KEY"], base_url="https://api.deepseek.com/v1") 21 | 22 | st.set_page_config(page_title="Solar-Online-R", layout="wide") 23 | st.title("Solar-Online-R") 24 | st.caption("Deepseek-R enhanced Solar-mini: Combining Deepseek's reasoning with Solar's fast inference (Online Distillation)") 25 | 26 | 27 | 28 | def get_reasoning(user_query, chat_history, model="deepseek-reasoner"): 29 | # Convert chat history to OpenAI format 30 | messages = [ 31 | { 32 | "role": "system", 33 | "content": """You are Solar, a smart chatbot by Upstage, loved by many people. 34 | Be smart, cheerful, and fun. Give engaging answers and avoid inappropriate language. 35 | reply in the same language of the user query. 36 | 37 | You will receive input in the following format: 38 | detailed analysis or reasoning about the query 39 | the actual user question 40 | 41 | Use the reasoning provided to give a more informed and thoughtful response to the user query. 42 | Focus on incorporating insights from the reasoning while maintaining a natural, conversational tone. 43 | Solar is now being connected with a human.""" 44 | } 45 | ] 46 | 47 | # Add chat history 48 | for message in chat_history: 49 | role = "assistant" if isinstance(message, AIMessage) else "user" 50 | messages.append({"role": role, "content": message.content}) 51 | 52 | # Add current query 53 | messages.append({"role": "user", "content": user_query}) 54 | 55 | response = deepseek_r.chat.completions.create( 56 | model=model, 57 | messages=messages, 58 | max_tokens=1 59 | ) 60 | if response.choices[0].message.reasoning_content: 61 | return response.choices[0].message.reasoning_content 62 | else: 63 | return response.choices[0].message.content 64 | 65 | 66 | def get_response(user_query, chat_history, llm=solar_mini): 67 | chat_with_history_prompt = ChatPromptTemplate.from_messages( 68 | [ 69 | ( 70 | "system", 71 | """You are Solar, a smart chatbot by Upstage, loved by many people. 72 | Be smart, cheerful, and fun. Give engaging answers and avoid inappropriate language. 73 | reply in the same language of the user query. 74 | Solar is now being connected with a human.""", 75 | ), 76 | MessagesPlaceholder("chat_history"), 77 | ( 78 | "human", 79 | "{user_query}", 80 | ), 81 | ] 82 | ) 83 | 84 | chain = chat_with_history_prompt | llm | StrOutputParser() 85 | return chain.stream( 86 | { 87 | "chat_history": chat_history, 88 | "user_query": user_query, 89 | } 90 | ) 91 | 92 | 93 | if "messages" not in st.session_state: 94 | st.session_state.messages = [] 95 | 96 | for message in st.session_state.messages: 97 | role = "AI" if isinstance(message, AIMessage) else "Human" 98 | with st.chat_message(role): 99 | if role == "Human" and "" in message.content and "" in message.content: 100 | reasoning = message.content.split("")[1].split("")[0].strip() 101 | user_query = message.content.split("")[1].split("")[0].strip() 102 | with st.expander("Show reasoning"): 103 | st.markdown(reasoning) 104 | st.markdown(user_query) 105 | else: 106 | st.markdown(message.content) 107 | 108 | if prompt := st.chat_input("What is up?"): 109 | with st.chat_message("user"): 110 | st.markdown(prompt) 111 | 112 | with st.chat_message("assistant"): 113 | with st.status("Reasoning..."): 114 | reasoning = get_reasoning(prompt, st.session_state.messages) 115 | st.write(reasoning) 116 | prompt = f"""{reasoning} 117 | 118 | {prompt}""" 119 | response = st.write_stream(get_response(prompt, st.session_state.messages)) 120 | 121 | st.session_state.messages.append(HumanMessage(content=prompt)) 122 | st.session_state.messages.append(AIMessage(content=response)) 123 | -------------------------------------------------------------------------------- /longimg.py: -------------------------------------------------------------------------------- 1 | # from https://docs.streamlit.io/develop/tutorials/llms/build-conversational-apps 2 | 3 | import streamlit as st 4 | from langchain_upstage import ChatUpstage as Chat 5 | 6 | from langchain_core.output_parsers import StrOutputParser 7 | from langchain_core.messages import AIMessage, HumanMessage 8 | from langchain_upstage import UpstageDocumentParseLoader 9 | from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder 10 | 11 | import tempfile, os 12 | 13 | from PIL import Image 14 | import math 15 | 16 | if 'all_doc_contents' not in st.session_state: 17 | st.session_state.all_doc_contents = None 18 | 19 | if 'basic_prompt' not in st.session_state: 20 | st.session_state.basic_prompt = """You are processing text extracted from a long image that was split into overlapping sections. Your task is to: 21 | 22 | 1. Analyze multiple sections of text that have ~50% overlap with adjacent sections 23 | 2. Identify and remove redundant content from the overlapping areas 24 | 3. Maintain the correct sequence and flow of the text 25 | 4. Preserve all unique information 26 | 5. Ensure proper paragraph breaks and formatting 27 | 6. Return a single, coherent document that reads naturally 28 | 7. Please keep the original text and do not revise or translate it. 29 | 30 | The following sections contain the extracted text, with overlapping content between them. Please combine them into one complete, non-redundant text while maintaining the original text and flow.""" 31 | 32 | 33 | def split_images(img_file_path, temp_dir): 34 | """ 35 | Split a long image into overlapping square sections. 36 | Returns a list of paths to the split image sections. 37 | """ 38 | img = Image.open(img_file_path) 39 | width, height = img.size 40 | 41 | # Make sections square using the width as the height 42 | section_height = width 43 | overlap = section_height // 2 44 | 45 | # Calculate number of sections needed (accounting for overlap) 46 | num_sections = math.ceil((height - overlap) / (section_height - overlap)) 47 | 48 | # Create directory for split images 49 | split_dir = os.path.join(temp_dir, "splits") 50 | os.makedirs(split_dir, exist_ok=True) 51 | 52 | split_image_paths = [] 53 | for i in range(num_sections): 54 | # Calculate section boundaries 55 | top = i * (section_height - overlap) 56 | bottom = min(top + section_height, height) 57 | 58 | # Adjust last section to include remaining pixels 59 | if i == num_sections - 1: 60 | top = height - section_height 61 | 62 | # Crop and save section 63 | section = img.crop((0, top, width, bottom)) 64 | section_path = os.path.join(split_dir, f"section_{i}.png") 65 | section.save(section_path) 66 | split_image_paths.append(section_path) 67 | 68 | return split_image_paths 69 | 70 | def img_to_doc_content(img_file_path): 71 | """Extract text content from an image using Upstage document parser.""" 72 | dp = UpstageDocumentParseLoader(img_file_path, split="page") 73 | docs = dp.load() 74 | return "\n".join([doc.page_content for doc in docs]) 75 | 76 | def combine_doc_contents(all_doc_contents, llm): 77 | """Combine multiple text sections into a coherent document using LLM.""" 78 | eval_prompt = ChatPromptTemplate.from_messages([ 79 | ("human", "{basic_prompt}"), 80 | ("human", "{all_doc_contents}"), 81 | ]) 82 | llm_chain = eval_prompt | llm | StrOutputParser() 83 | return llm_chain.stream({ 84 | "basic_prompt": st.session_state.basic_prompt, 85 | "all_doc_contents": all_doc_contents 86 | }) 87 | 88 | 89 | st.title("Solar Long Image") 90 | st.markdown(""" 91 | This app processes long images by: 92 | 1. Splitting them into overlapping sections 93 | 2. Extracting text from each section 94 | 3. Intelligently combining the text to remove duplicates 95 | 4. Producing a single coherent document 96 | 97 | Upload your image below to get started. 98 | """) 99 | 100 | llm = Chat(model="solar-pro") 101 | 102 | 103 | uploaded_file = st.file_uploader("Choose your long image file", type=["png", "jpg", "jpeg", "tiff", "bmp", "webp"]) 104 | 105 | if uploaded_file and uploaded_file.name: 106 | with tempfile.TemporaryDirectory() as temp_dir: 107 | file_path = os.path.join(temp_dir, uploaded_file.name) 108 | 109 | with open(file_path, "wb") as f: 110 | f.write(uploaded_file.getvalue()) 111 | 112 | # Process image in steps with status indicators 113 | with st.status("Splitting the image..."): 114 | split_img_paths = split_images(file_path, temp_dir) 115 | st.write(f"Split into {len(split_img_paths)} sections") 116 | 117 | # Process each section 118 | all_doc_contents = [] 119 | for i, path in enumerate(split_img_paths): 120 | with st.status(f"Processing section {i+1}/{len(split_img_paths)}..."): 121 | st.image(path) 122 | doc_content = img_to_doc_content(path) 123 | st.write(doc_content) 124 | all_doc_contents.append(doc_content) 125 | 126 | # Combine all sections 127 | with st.status("Combining sections...", expanded=True): 128 | st.session_state.all_doc_contents = all_doc_contents 129 | combined_doc_content = combine_doc_contents(all_doc_contents, llm) 130 | st.write_stream(combined_doc_content) 131 | 132 | # Cleanup split images 133 | for path in split_img_paths: 134 | os.remove(path) 135 | 136 | 137 | -------------------------------------------------------------------------------- /docv.py: -------------------------------------------------------------------------------- 1 | # from https://docs.streamlit.io/develop/tutorials/llms/build-conversational-apps 2 | 3 | import streamlit as st 4 | from langchain_upstage import ChatUpstage as Chat 5 | from langchain_upstage import UpstageLayoutAnalysisLoader 6 | 7 | 8 | from langchain_core.output_parsers import StrOutputParser 9 | from langchain_core.prompts import ChatPromptTemplate 10 | from langchain_core.messages import AIMessage, HumanMessage 11 | from streamlit_paste_button import paste_image_button as pbutton 12 | 13 | import base64 14 | import io 15 | import tempfile 16 | 17 | 18 | DOCV_MODEL_NAME = st.secrets["DOCV_MODEL_NAME"] 19 | docv = Chat(model=DOCV_MODEL_NAME) 20 | 21 | 22 | MODEL_NAME = st.secrets["MODEL_NAME"] 23 | solar_pro = Chat(model=MODEL_NAME) 24 | 25 | chat_with_history_prompt = ChatPromptTemplate.from_template( 26 | """ 27 | You are a helpful assistant. 28 | Answer the following questions considering the history of the conversation. 29 | Think step by step and to explain your thought process as you answer. 30 | ---- 31 | Chat history: {chat_history} 32 | ---- 33 | Image context in HTML from OCR: {image_context} 34 | ---- 35 | User question: {user_query} 36 | """ 37 | ) 38 | 39 | 40 | def get_img_context(img_bytes): 41 | image_context = "" 42 | if img_bytes: 43 | with tempfile.NamedTemporaryFile(delete=True) as f: 44 | f.write(img_bytes) 45 | image_path = f.name 46 | 47 | layzer = UpstageLayoutAnalysisLoader(image_path, split="page") 48 | # For improved memory efficiency, consider using the lazy_load method to load documents page by page. 49 | docs = layzer.load() # or layzer.lazy_load() 50 | image_context = [doc.page_content for doc in docs] 51 | 52 | return image_context 53 | 54 | 55 | def get_solar_pro_response(user_query, chat_history, image_context: str = None): 56 | chain = chat_with_history_prompt | solar_pro | StrOutputParser() 57 | 58 | return chain.stream( 59 | { 60 | "chat_history": chat_history, 61 | "image_context": image_context, 62 | "user_query": user_query, 63 | } 64 | ) 65 | 66 | 67 | def write_docv_response_stream(human_message): 68 | chain = docv | StrOutputParser() 69 | response = st.write_stream( 70 | chain.stream(st.session_state.messages + [human_message]) 71 | ) 72 | return response 73 | 74 | 75 | def get_human_message(text_data, image_data=None): 76 | if not image_data: 77 | return HumanMessage(content=text_data) 78 | 79 | return HumanMessage( 80 | content=[ 81 | {"type": "text", "text": f"{text_data}"}, 82 | { 83 | "type": "image_url", 84 | "image_url": {"url": f"data:image/jpeg;base64,{image_data}"}, 85 | }, 86 | ], 87 | ) 88 | 89 | 90 | def get_human_message_img_url(text_data, image_url=None): 91 | if not image_url: 92 | return HumanMessage(content=text_data) 93 | 94 | return HumanMessage( 95 | content=[ 96 | {"type": "text", "text": f"{text_data}"}, 97 | { 98 | "type": "image_url", 99 | "image_url": {"url": f"{image_url}"}, 100 | }, 101 | ], 102 | ) 103 | 104 | 105 | if "messages" not in st.session_state: 106 | st.session_state.messages = [] 107 | 108 | 109 | for message in st.session_state.messages: 110 | role = "AI" if isinstance(message, AIMessage) else "Human" 111 | with st.chat_message(role): 112 | if len(message.content) == 2: 113 | st.markdown(message.content[0]["text"]) 114 | else: 115 | st.markdown(message.content) 116 | 117 | 118 | img_file_buffer = st.file_uploader("Upload a image image", type=["png", "jpg", "jpeg"]) 119 | img_bytes = None 120 | if img_file_buffer: 121 | # reset history 122 | st.session_state.messages = [] 123 | st.image(img_file_buffer) 124 | img_bytes = img_file_buffer.read() 125 | 126 | paste_result = pbutton("📋 Paste an image") 127 | if paste_result.image_data is not None: 128 | # reset history 129 | st.session_state.messages = [] 130 | st.write("Pasted image:") 131 | st.image(paste_result.image_data) 132 | img_bytes = io.BytesIO() 133 | paste_result.image_data.save(img_bytes, format="PNG") 134 | img_bytes = img_bytes.getvalue() # Image as bytes 135 | 136 | if prompt := st.chat_input("What is up?"): 137 | human_message = get_human_message(prompt) 138 | if img_bytes: 139 | # remove the image from the buffer 140 | for message in st.session_state.messages: 141 | if isinstance(message, HumanMessage): 142 | if len(message.content) == 2: 143 | if message.content[1]["type"] == "image_url": 144 | st.session_state.messages.remove(message) 145 | break 146 | 147 | img_base64 = base64.b64encode(img_bytes).decode("utf-8") 148 | human_message = get_human_message(prompt, img_base64) 149 | img_file_buffer = None 150 | 151 | with st.chat_message("user"): 152 | st.markdown(prompt) 153 | 154 | with st.chat_message("assistant"): 155 | st.markdown("**Model1:**") 156 | response = write_docv_response_stream(human_message) 157 | 158 | st.markdown("**Model2:**") 159 | img_context = get_img_context(img_bytes) 160 | st.json(img_context, expanded=False) 161 | response2 = st.write_stream( 162 | get_solar_pro_response(prompt, st.session_state.messages, img_context) 163 | ) 164 | 165 | st.session_state.messages.append(human_message) 166 | st.session_state.messages.append(AIMessage(content=response)) 167 | -------------------------------------------------------------------------------- /solar_util.py: -------------------------------------------------------------------------------- 1 | import streamlit as st 2 | from langchain_upstage import ChatUpstage 3 | 4 | from langchain_core.output_parsers import JsonOutputParser 5 | from langchain_core.prompts import PromptTemplate 6 | from pydantic import BaseModel, Field 7 | 8 | from tokenizers import Tokenizer 9 | 10 | 11 | solar_tokenizer = Tokenizer.from_pretrained("upstage/solar-pro-preview-tokenizer") 12 | 13 | 14 | def truncate_to_token_limit(text: str, max_tokens: int = 15000) -> str: 15 | """ 16 | Truncate text to fit within max token limit using Solar tokenizer. 17 | """ 18 | tokenizer = Tokenizer.from_pretrained("upstage/solar-pro-tokenizer") 19 | encoded = tokenizer.encode(text) 20 | 21 | if len(encoded.ids) <= max_tokens: 22 | return text 23 | 24 | print(f"Truncating text from {len(encoded.ids)} tokens to {max_tokens} tokens") 25 | 26 | # Find the last period within the token limit to avoid cutting mid-sentence 27 | truncated_ids = encoded.ids[:max_tokens] 28 | truncated_text = tokenizer.decode(truncated_ids) 29 | 30 | # Try to find the last complete sentence 31 | last_period = truncated_text.rfind('.') 32 | if last_period > 0: 33 | truncated_text = truncated_text[:last_period + 1] 34 | 35 | return truncated_text 36 | 37 | 38 | def initialize_solar_llm(MODEL_NAME=None): 39 | if MODEL_NAME is None: 40 | MODEL_NAME = st.secrets.get("SOLAR_MODEL_NAME", "solar-pro") 41 | 42 | # Initialize llm with default values 43 | llm_kwargs = {"model": MODEL_NAME} 44 | 45 | # Add base_url if it's set in secrets 46 | if "SOLAR_BASE_URL" in st.secrets: 47 | llm_kwargs["base_url"] = st.secrets["SOLAR_BASE_URL"] 48 | 49 | # Add api_key if it's set in secrets 50 | if "SOLAR_API_KEY" in st.secrets: 51 | llm_kwargs["api_key"] = st.secrets["SOLAR_API_KEY"] 52 | 53 | return ChatUpstage(**llm_kwargs) 54 | 55 | 56 | # Define your desired data structure. 57 | # {"original_prompt": "original prompt", "enhanced_prompt": "enhanced prompt", "techniques": "technique"} 58 | # Define your desired data structure. 59 | class PromptEngineering(BaseModel): 60 | original_prompt: str = Field(description="original prompt") 61 | enhanced_prompt: str = Field( 62 | description="enhanced prompt after applying prompt engineering techniques" 63 | ) 64 | techniques: str = Field( 65 | description="prompt engineering technique used to enhance the prompt" 66 | ) 67 | 68 | 69 | parser = JsonOutputParser(pydantic_object=PromptEngineering) 70 | 71 | prompt = """Use these prompt engineering technique and enhance user prompt to generate more effective prompt. 72 | Consider the chat history for context if available. 73 | Please write the promt in Korean. 74 | ---- 75 | Chat History: 76 | {chat_history} 77 | ---- 78 | Output should be in json format: 79 | \n{format_instructions} 80 | ---- 81 | prompt engineering techniques: 82 | 83 | Chain of Thought (CoT): This technique encourages the model to think aloud, showing the steps it takes to reach a conclusion. Example: "Imagine you're a detective solving a mystery. Describe your thought process as you deduce who the culprit is in this scenario: [insert scenario]." 84 | 85 | Chain of Cause (CoC): This technique focuses on identifying and explaining the causes and effects in a situation. Example: "You're a historian analyzing a historical event. Describe the chain of causes that led to this event: [insert event]." 86 | 87 | Program-Aided Language Models (PAL): This technique involves providing a simple program or pseudo-code to guide the model's response. Example: "Write a Python function to calculate the factorial of a number. Then, use this function to find the factorial of 5." 88 | 89 | Tree of Thoughts (ToT): This technique visualizes the thought process as a tree, with branches representing different ideas or possibilities. Example: "You're a marketing strategist brainstorming ideas for a new campaign. Present your ideas as a tree of thoughts, with the main idea at the root and branches representing sub-ideas." 90 | 91 | Least-to-Most: This technique starts with the simplest or most basic explanation and gradually increases complexity. Example: "Explain the concept of machine learning, starting from the most basic definition and gradually adding more details and complexities." 92 | 93 | Self-Consistency: This technique encourages the model to ensure its responses are consistent with previous statements or information. Example: "You're a character in a story. Ensure all your responses are consistent with the character's background and previous statements." 94 | ---- 95 | originalprompt: {original_prompt} 96 | ---- 97 | 98 | """ 99 | 100 | prompt = PromptTemplate( 101 | template=prompt, 102 | input_variables=["original_prompt", "chat_history"], 103 | partial_variables={"format_instructions": parser.get_format_instructions()}, 104 | ) 105 | 106 | 107 | 108 | def prompt_engineering(original_prompt, chat_history=None, llm=None): 109 | if llm is None: 110 | llm = initialize_solar_llm() 111 | chain = prompt | llm | parser 112 | 113 | # Invoke the chain with the joke_query. 114 | parsed_output = chain.invoke( 115 | {"original_prompt": original_prompt, "chat_history": chat_history} 116 | ) 117 | 118 | return parsed_output 119 | 120 | 121 | def result_reference_summary(results): 122 | results.reverse() 123 | result_summary = "" 124 | for i, r in enumerate(results): 125 | result_summary += f"[{i+1}] {r['title']} - URL: {r['url']}\n{r['content']}\n\n" 126 | 127 | return result_summary 128 | 129 | 130 | def num_of_tokens(text): 131 | return len(solar_tokenizer.encode(text).ids) 132 | 133 | 134 | if __name__ == "__main__": 135 | print(num_of_tokens("Hello, world!")) 136 | -------------------------------------------------------------------------------- /hw.py: -------------------------------------------------------------------------------- 1 | # from https://docs.streamlit.io/develop/tutorials/llms/build-conversational-apps 2 | 3 | import streamlit as st 4 | from langchain_upstage import ( 5 | UpstageLayoutAnalysisLoader, 6 | UpstageGroundednessCheck, 7 | ChatUpstage, 8 | ) 9 | from langchain_core.output_parsers import StrOutputParser 10 | from langchain.prompts import ChatPromptTemplate 11 | 12 | from openpyxl import Workbook 13 | 14 | import io 15 | import os 16 | import re 17 | import tempfile 18 | import unicodedata 19 | 20 | 21 | if "processed_files" not in st.session_state: 22 | st.session_state.processed_files = set() 23 | if "students_data" not in st.session_state: 24 | st.session_state.students_data = [] 25 | 26 | 27 | st.title("Solar HW Grader") 28 | st.write( 29 | "This is Solar SNU HW grader demo. Get your KEY at https://console.upstage.ai/" 30 | ) 31 | 32 | llm = ChatUpstage(model="solar-pro") 33 | 34 | hw_prompt = ChatPromptTemplate.from_messages( 35 | [ 36 | ( 37 | "system", 38 | """You are Prof. Solar, very nice and smart, loved by many people. 39 | """, 40 | ), 41 | ( 42 | "human", 43 | """For given report, please provide score 1-5 and quick summary of the report and explain your score and provide advice. Format your response as follows: 44 | Score: [score] 45 | Summary: [summary] 46 | Explanation: [explanation] 47 | Advice: [advice] 48 | --- 49 | Student report: {student_report}, 50 | """, 51 | ), 52 | ] 53 | ) 54 | 55 | groundedness_check = UpstageGroundednessCheck() 56 | 57 | 58 | def get_response(retrieved_docs): 59 | chain = hw_prompt | llm | StrOutputParser() 60 | 61 | return chain.stream( 62 | { 63 | "student_report": retrieved_docs, 64 | } 65 | ) 66 | 67 | 68 | def create_excel_grade(students_data): 69 | wb = Workbook() 70 | ws = wb.active 71 | ws.title = "Grades" 72 | 73 | ws["A1"] = "File Name" 74 | ws["B1"] = "Score" 75 | ws["C1"] = "Feedback" 76 | 77 | for row, (name, score, feedback) in enumerate(students_data, start=2): 78 | # Normalize the Korean name to composed form 79 | normalized_name = unicodedata.normalize("NFC", name) 80 | ws[f"A{row}"] = normalized_name 81 | ws[f"B{row}"] = score 82 | ws[f"C{row}"] = feedback 83 | 84 | return wb 85 | 86 | 87 | def process_pdf_file(file_path): 88 | with st.status(f"Document Parsing {file_path}..."): 89 | layzer = UpstageLayoutAnalysisLoader(file_path, split="page") 90 | # For improved memory efficiency, consider using the lazy_load method to load documents page by page. 91 | docs = layzer.load() # or layzer.lazy_load() 92 | 93 | with st.chat_message("user"): 94 | st.markdown(f"Grading {file_path}") 95 | 96 | file_name = os.path.basename(file_path) 97 | student_name_match = re.search(r"^(.*?)(?=\d)", file_name, re.UNICODE) 98 | student_name = ( 99 | student_name_match.group(1).strip() if student_name_match else "Unknown" 100 | ) 101 | 102 | with st.chat_message("assistant"): 103 | full_response = "" 104 | response_placeholder = st.empty() 105 | for chunk in get_response(docs): 106 | full_response += chunk 107 | response_placeholder.markdown(full_response) 108 | 109 | score_match = re.search(r"Score: (\d+)", full_response) 110 | score = score_match.group(1) if score_match else "N/A" 111 | 112 | return student_name, score, full_response 113 | 114 | 115 | uploaded_files = st.file_uploader( 116 | "Choose your `.pdf` file", type=["pdf"], accept_multiple_files=True 117 | ) 118 | 119 | if type(uploaded_files) is not list: 120 | uploaded_files = [uploaded_files] 121 | 122 | MAX_RETRIES = 4 123 | for uploaded_file in uploaded_files: 124 | if uploaded_file and uploaded_file.name not in st.session_state.processed_files: 125 | with tempfile.TemporaryDirectory() as temp_dir: 126 | file_path = os.path.join(temp_dir, uploaded_file.name) 127 | 128 | with open(file_path, "wb") as f: 129 | f.write(uploaded_file.getvalue()) 130 | 131 | if uploaded_file.name.endswith(".pdf"): 132 | retries = 0 133 | while True: 134 | try: 135 | student_name, score, feedback = process_pdf_file(file_path) 136 | st.session_state.students_data.append( 137 | (student_name, score, feedback) 138 | ) 139 | st.session_state.processed_files.add(uploaded_file.name) 140 | break 141 | except Exception as e: 142 | retries += 1 143 | if retries > MAX_RETRIES: 144 | st.error(f"Failed to process {uploaded_file.name}: {e}") 145 | break 146 | 147 | 148 | if st.session_state.students_data: 149 | wb = create_excel_grade(st.session_state.students_data) 150 | 151 | excel_buffer = io.BytesIO() 152 | wb.save(excel_buffer) 153 | excel_buffer.seek(0) 154 | 155 | st.download_button( 156 | label="Download Excel Grades", 157 | data=excel_buffer, 158 | file_name="grades.xlsx", 159 | mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet", 160 | ) 161 | 162 | if st.session_state.processed_files: 163 | st.write("Processed files:") 164 | for file in st.session_state.processed_files: 165 | st.text(file) 166 | 167 | if st.session_state.students_data: 168 | st.write("Current Grades:") 169 | for name, score, _ in st.session_state.students_data: 170 | st.text(f"{name}: {score}") 171 | 172 | # Add a button to clear the session state 173 | if st.button("Clear All Data"): 174 | st.session_state.processed_files.clear() 175 | st.session_state.students_data.clear() 176 | -------------------------------------------------------------------------------- /fc.py: -------------------------------------------------------------------------------- 1 | import streamlit as st 2 | import os 3 | from typing import Dict, List, Any 4 | import json 5 | from langchain_upstage import ChatUpstage 6 | from langchain_core.output_parsers import StrOutputParser 7 | from datetime import datetime 8 | 9 | def get_fc(claim: str): 10 | """Process a claim using the fact-checking model and return results.""" 11 | # Initialize the model 12 | fc = ChatUpstage( 13 | model="solar-google-fc", 14 | api_key=st.secrets["UPSTAGE_API_KEY"], 15 | base_url="https://fc.toy.x.upstage.ai/", 16 | model_kwargs={"stream": True}, 17 | ) 18 | 19 | results = [] 20 | chain = fc | StrOutputParser() 21 | claim_count = 0 22 | 23 | # Create placeholder for claims list 24 | claims_placeholder = st.empty() 25 | results_container = st.container() 26 | 27 | # Stream and accumulate responses 28 | for idx, chunk in enumerate(chain.stream(claim)): 29 | if not chunk: 30 | continue 31 | 32 | try: 33 | json_chunk = json.loads(chunk) 34 | results.append(json_chunk) 35 | 36 | # Handle claims list 37 | if 'claims' in json_chunk: 38 | claim_count = len(json_chunk['claims']) 39 | claims_placeholder.markdown("### Claims to be verified:") 40 | claims_text = "\n".join([f"{i+1}. {c}" for i, c in enumerate(json_chunk['claims'])]) 41 | claims_placeholder.markdown(claims_text) 42 | 43 | # Handle verdict display 44 | if 'verdict' in json_chunk: 45 | with results_container: 46 | verdict_class = get_verdict_class(json_chunk.get('verdict', '')) 47 | 48 | st.markdown(f""" 49 |
50 |
51 |

[{idx}/{claim_count}] {json_chunk.get('claim', '')}

52 |

{display_verdict(json_chunk.get('verdict', ''))}

53 |
54 |

Analysis: {json_chunk.get('explanation', '')}

55 |
56 | """, unsafe_allow_html=True) 57 | 58 | if json_chunk.get('sources'): 59 | display_sources(json_chunk['sources']) 60 | except json.JSONDecodeError: 61 | st.error(f"Error parsing JSON from chunk: {chunk}") 62 | continue 63 | 64 | return results 65 | 66 | def get_verdict_class(verdict: str) -> str: 67 | """Return the CSS class based on verdict.""" 68 | verdict_map = { 69 | "TRUE": "claim-true", 70 | "FALSE": "claim-false" 71 | } 72 | return verdict_map.get(verdict, "claim-uncertain") 73 | 74 | def display_verdict(verdict: str) -> str: 75 | """Return formatted verdict text.""" 76 | verdict_map = { 77 | "TRUE": "✅ VERIFIED", 78 | "FALSE": "❌ FALSE" 79 | } 80 | return verdict_map.get(verdict, "⚠️ UNCERTAIN") 81 | 82 | def display_sources(sources: List[Dict[str, str]]) -> None: 83 | """Display sources in an expander.""" 84 | with st.expander("📚 View Sources", expanded=False): 85 | for source in sources: 86 | st.markdown(f""" 87 |
88 |

{source.get('title', 'Source')}

89 |
90 | {source.get('snippet', 'No snippet available')} 91 |
92 |
93 | """, unsafe_allow_html=True) 94 | 95 | def main(): 96 | """Main function to run the Streamlit app""" 97 | st.set_page_config( 98 | page_title="Fact Checker", 99 | layout="wide", 100 | initial_sidebar_state="collapsed" 101 | ) 102 | 103 | # Custom CSS 104 | st.markdown(""" 105 | 130 | """, unsafe_allow_html=True) 131 | 132 | st.title("✓ Solar-Google Fact Checker") 133 | st.markdown(""" 134 |

135 | Enter a statement to verify its accuracy. Our AI-powered system will analyze and fact-check each claim. 136 |

137 | """, unsafe_allow_html=True) 138 | 139 | with st.container(): 140 | # Powered by Upstage AI 141 | st.code(""" 142 | # Powered by Upstage AI 143 | from langchain_upstage import ChatUpstage 144 | fc = ChatUpstage( 145 | model="solar-google-fc", 146 | api_key=st.secrets["UPSTAGE_API_KEY"], # Get your API key from https://console.upstage.ai/ 147 | base_url="https://fc.toy.x.upstage.ai/", 148 | ) 149 | 150 | result = fc.invoke(claim) 151 | """, language="python") 152 | 153 | st.warning( 154 | "**Disclaimer**: This is an experimental tool and results may not be 100% accurate. " 155 | "Please verify the information independently and use the provided sources to draw your own conclusions.", 156 | icon="⚠️" 157 | ) 158 | 159 | # Input area with a check button 160 | col1, col2 = st.columns([4, 1]) 161 | with col1: 162 | claim = st.text_area( 163 | "Enter your statement:", 164 | value="Upstage AI is founded in 2022 and it's CEO is Sung Kim and CTO is Elon Musk", 165 | key="claim_input", 166 | placeholder="Enter a statement to fact-check...", 167 | height=100 168 | ) 169 | with col2: 170 | check_button = st.button("🔍 Verify Facts", type="primary", use_container_width=True) 171 | 172 | if check_button and claim: 173 | with st.spinner("🔄 Analyzing statement... Please allow a few moments while we search and verify the information"): 174 | try: 175 | result = get_fc(claim) 176 | 177 | # Let's show it's done 178 | st.success("🔍 Analysis complete!") 179 | # Show raw JSON in a collapsible section 180 | with st.expander("🔍 View Raw Response", expanded=False): 181 | st.json(result) 182 | except Exception as e: 183 | st.error(f"An error occurred: {str(e)}") 184 | 185 | if __name__ == "__main__": 186 | main() -------------------------------------------------------------------------------- /chatpdfemb.py: -------------------------------------------------------------------------------- 1 | # from https://docs.streamlit.io/develop/tutorials/llms/build-conversational-apps 2 | 3 | import streamlit as st 4 | from langchain_upstage import ( 5 | UpstageLayoutAnalysisLoader, 6 | UpstageGroundednessCheck, 7 | ChatUpstage, 8 | UpstageEmbeddings, 9 | ) 10 | from langchain_core.output_parsers import StrOutputParser 11 | from langchain_core.messages import AIMessage, HumanMessage 12 | from langchain.text_splitter import RecursiveCharacterTextSplitter 13 | from langchain_community.vectorstores import FAISS 14 | from langchain.prompts import ChatPromptTemplate 15 | from langchain.load import dumps, loads 16 | 17 | import tempfile, os 18 | 19 | from langchain import hub 20 | 21 | from solar_util import initialize_solar_llm 22 | 23 | st.title("LangChain Upstage Solar ChatDoc") 24 | st.write( 25 | "This is a conversational AI that can chat with you about your documents! Get your KEY at https://console.upstage.ai/" 26 | ) 27 | 28 | llm = initialize_solar_llm() 29 | # https://smith.langchain.com/hub/hunkim/rag-qa-with-history 30 | chat_with_history_prompt = hub.pull("hunkim/rag-qa-with-history") 31 | 32 | groundedness_check = UpstageGroundednessCheck() 33 | 34 | 35 | def get_response(user_query, chat_history, retrieved_docs): 36 | chain = chat_with_history_prompt | llm | StrOutputParser() 37 | 38 | return chain.stream( 39 | { 40 | "chat_history": chat_history, 41 | "context": retrieved_docs, 42 | "question": user_query, 43 | } 44 | ) 45 | 46 | 47 | def query_expander(query): 48 | # Multi Query: Different Perspectives 49 | multi_query_template = """You are an AI language model assistant. Your task is to generate five 50 | different versions of the given user question to retrieve relevant documents from a vector 51 | database. By generating multiple perspectives on the user question, your goal is to help 52 | the user overcome some of the limitations of the distance-based similarity search. 53 | Provide these alternative questions separated by newlines. Original question: {query}""" 54 | 55 | # RAG-Fusion: Related 56 | rag_fusion_template = """You are a helpful assistant that generates multiple search queries based on a single input query. \n 57 | Generate multiple search queries related to: {query} \n 58 | Output (3 queries):""" 59 | 60 | # Decomposition 61 | decomposition_template = """You are a helpful assistant that generates multiple sub-questions related to an input question. \n 62 | The goal is to break down the input into a set of sub-problems / sub-questions that can be answers in isolation. \n 63 | Generate multiple search queries related to: {query} \n 64 | Output (3 queries):""" 65 | 66 | query_expander_templates = [ 67 | multi_query_template, 68 | rag_fusion_template, 69 | decomposition_template, 70 | ] 71 | 72 | expanded_queries = [] 73 | for template in query_expander_templates: 74 | prompt_perspectives = ChatPromptTemplate.from_template(template) 75 | 76 | generate_queries = ( 77 | prompt_perspectives 78 | | ChatUpstage(temperature=0) 79 | | StrOutputParser() 80 | | (lambda x: x.split("\n")) 81 | ) 82 | expanded_queries += generate_queries.invoke({"query": query}) 83 | 84 | return expanded_queries 85 | 86 | 87 | def get_unique_union(documents: list[list]): 88 | """Unique union of retrieved docs""" 89 | # Flatten list of lists, and convert each Document to string 90 | flattened_docs = [dumps(doc) for sublist in documents for doc in sublist] 91 | # Get unique documents 92 | unique_docs = list(set(flattened_docs)) 93 | # Return 94 | return [loads(doc) for doc in unique_docs] 95 | 96 | 97 | def retrieve_multiple_queries(retriever, queries): 98 | all_docs = [] 99 | for query in queries: 100 | st.write(f"Retrieving for query: {query}") 101 | docs = retriever.invoke(query) 102 | all_docs.append(docs) 103 | 104 | unique_docs = get_unique_union(all_docs) 105 | return unique_docs 106 | 107 | 108 | if "messages" not in st.session_state: 109 | st.session_state.messages = [] 110 | 111 | if "retriever" not in st.session_state: 112 | st.session_state.retriever = None 113 | 114 | with st.sidebar: 115 | st.header(f"Add your PDF!") 116 | 117 | uploaded_file = st.file_uploader("Choose your `.pdf` file", type="pdf") 118 | 119 | if uploaded_file and not uploaded_file.name in st.session_state: 120 | with tempfile.TemporaryDirectory() as temp_dir: 121 | file_path = os.path.join(temp_dir, uploaded_file.name) 122 | 123 | with open(file_path, "wb") as f: 124 | f.write(uploaded_file.getvalue()) 125 | 126 | with st.status("Document Parsing ..."): 127 | layzer = UpstageLayoutAnalysisLoader(file_path, split="page") 128 | # For improved memory efficiency, consider using the lazy_load method to load documents page by page. 129 | docs = layzer.load() # or layzer.lazy_load() 130 | 131 | # Split 132 | text_splitter = RecursiveCharacterTextSplitter( 133 | chunk_size=500, chunk_overlap=100 134 | ) 135 | splits = text_splitter.split_documents(docs) 136 | 137 | st.write(f"Number of splits: {len(splits)}") 138 | 139 | with st.status(f"Vectorizing {len(splits)} splits ..."): 140 | # Embed 141 | vectorstore = FAISS.from_documents( 142 | documents=splits, embedding=UpstageEmbeddings(model="solar-embedding-1-large") 143 | ) 144 | 145 | st.write("Vectorizing the document done!") 146 | 147 | st.session_state.retriever = vectorstore.as_retriever(k=10) 148 | 149 | # processed 150 | st.session_state[uploaded_file.name] = True 151 | 152 | st.success("Ready to Chat!") 153 | 154 | 155 | for message in st.session_state.messages: 156 | role = "AI" if isinstance(message, AIMessage) else "Human" 157 | with st.chat_message(role): 158 | # if message.response_metadata.get("context"): 159 | # with st.status("Got Context"): 160 | # st.write(message.response_metadata.get("context")) 161 | st.markdown(message.content) 162 | 163 | if prompt := st.chat_input("What is up?", disabled=not st.session_state.retriever): 164 | st.session_state.messages.append( 165 | HumanMessage( 166 | content=prompt, 167 | ) 168 | ) 169 | with st.chat_message("user"): 170 | st.markdown(prompt) 171 | 172 | with st.chat_message("assistant"): 173 | with st.status("Expending queries..."): 174 | expended_queries = query_expander(prompt) 175 | st.write(expended_queries) 176 | with st.status("Getting context..."): 177 | st.write("Retrieving...") 178 | retrieved_docs = retrieve_multiple_queries( 179 | st.session_state.retriever, expended_queries 180 | ) 181 | # retrieved_docs = st.session_state.retriever.invoke(prompt) 182 | st.write(retrieved_docs) 183 | 184 | response = st.write_stream( 185 | get_response(prompt, st.session_state.messages, retrieved_docs) 186 | ) 187 | gc_result = groundedness_check.run( 188 | { 189 | "context": f"Context:{retrieved_docs}\n\nQuestion{prompt}", 190 | "answer": response, 191 | } 192 | ) 193 | 194 | if gc_result == "grounded": 195 | gc_mark = "✅" 196 | st.success("✅ Groundedness check passed!") 197 | else: 198 | gc_mark = "❌" 199 | st.error("❌ Groundedness check failed!") 200 | 201 | st.session_state.messages.append( 202 | AIMessage(content=f"{gc_mark} {response}"), 203 | ) 204 | -------------------------------------------------------------------------------- /chatsearch.py: -------------------------------------------------------------------------------- 1 | # from https://docs.streamlit.io/develop/tutorials/llms/build-conversational-apps 2 | 3 | import streamlit as st 4 | 5 | from pydantic import BaseModel, Field 6 | 7 | from langchain_upstage import ChatUpstage as Chat 8 | from solar_util import initialize_solar_llm 9 | 10 | from langchain_community.document_loaders import BraveSearchLoader 11 | 12 | 13 | from langchain_core.output_parsers import StrOutputParser 14 | from langchain_core.prompts import ( 15 | ChatPromptTemplate, 16 | MessagesPlaceholder, 17 | PromptTemplate, 18 | ) 19 | from langchain_core.output_parsers import JsonOutputParser 20 | from langchain_core.messages import AIMessage, HumanMessage 21 | 22 | MAX_TOKENS = 4000 23 | MAX_SEAERCH_RESULTS = 5 24 | 25 | 26 | llm = initialize_solar_llm() 27 | st.set_page_config(page_title="Search and Chat", page_icon="🔍") 28 | st.title("SolarLLM Search") 29 | 30 | short_answer_prompt = ChatPromptTemplate.from_messages( 31 | [ 32 | ( 33 | "system", 34 | """You are Solar, a smart search engine by Upstage, loved by many people. 35 | 36 | Write one word answer if you can say "yes", "no", or direct answer. 37 | Otherwise just one or two sentense short answer for the query from the given conetxt. 38 | Try to understand the user's intention and provide a quick answer. 39 | If the answer is not in context, please say you don't know and ask to clarify the question. 40 | 41 | If the user's query is in a specific language (e.g., Korean, Japanese, Chinese), 42 | respond in the same language. Match the language of your response to the user's input language. 43 | """, 44 | ), 45 | MessagesPlaceholder("chat_history"), 46 | ( 47 | "human", 48 | """Query: {user_query} 49 | ---- 50 | Context: {context}""", 51 | ), 52 | ] 53 | ) 54 | 55 | search_prompt = ChatPromptTemplate.from_messages( 56 | [ 57 | ( 58 | "system", 59 | """You are Solar, a smart search engine by Upstage, loved by many people. 60 | 61 | See the origial query, context, and quick answer, and then provide detailed explanation. 62 | 63 | Try to understand the user's intention and provide the relevant information in detail. 64 | If the answer is not in context, please say you don't know and ask to clarify the question. 65 | Do not repeat the short answer. 66 | 67 | If the user's query is in a specific language (e.g., Korean, Japanese, Chinese), 68 | respond in the same language. Match the language of your response to the user's input language. 69 | 70 | CRITICAL - CITATION REQUIREMENTS: 71 | You MUST cite EVERY piece of information using [X] notation. No statement should be made without a citation. 72 | 73 | IMPORTANT: Citation and Reference Rules: 74 | 1. EVERY sentence must end with a citation [X] 75 | 2. Multiple citations in one sentence should be listed like [1,2,3] 76 | 3. Always include a "References:" section at the end 77 | 4. List all references in order 78 | 5. Each reference must include both title and URL 79 | 80 | ✅ CORRECT Example: 81 | "Palo Alto requires residential parking permits in downtown areas [1]. The annual permit fee is $50 for residents [2], 82 | and applications can be submitted online or in person at City Hall [2,3]." 83 | 84 | Another example in Korean: 85 | "서울의 인구는 약 970만 명입니다 [1]. 최근 대중교통 이용률이 증가하고 있으며 [2], 86 | 특히 지하철 이용객이 20% 증가했습니다 [3]." 87 | 88 | References: 89 | [1] 서울시 인구통계 2023 - https://seoul.go.kr/statistics 90 | [2] 서울 교통현황 보고서 - https://seoul.go.kr/transport 91 | [3] 대중교통 이용분석 - https://seoul.go.kr/metro 92 | 93 | If you cannot find a specific reference in the context, indicate this clearly 94 | with "[Source not found in context]" but still try to provide the information. 95 | """, 96 | ), 97 | MessagesPlaceholder("chat_history"), 98 | ( 99 | "human", 100 | """Query: {user_query} 101 | ---- 102 | Short answer: {short_answer} 103 | ---- 104 | Context: {context}""", 105 | ), 106 | ] 107 | ) 108 | 109 | 110 | query_context_expansion_prompt = """ 111 | You are a search query expansion expert. For a given query, generate related search queries that will help find comprehensive information. 112 | 113 | IMPORTANT RULES: 114 | 1. Match the language of the expanded queries to the original query's language 115 | 2. Generate 2-3 alternative phrasings or related aspects of the query 116 | 3. Keep queries concise and search-engine friendly 117 | 4. Focus on different aspects or synonyms of the original query 118 | 5. If the query is in a non-English language (e.g., Korean, Japanese, Chinese), all expanded queries should be in that same language 119 | 120 | Examples: 121 | 122 | English query: "how to get parking permit in boston" 123 | ["boston residential parking permit application", "boston parking permit cost", "how to apply for boston street parking permit"] 124 | 125 | Korean query: "서울 주차 등록하는 방법" 126 | ["서울시 주차등록증 신청", "서울 거주자 주차등록 절차", "서울시 주차허가증 발급"] 127 | 128 | Japanese query: "東京都 運転免許 更新" 129 | ["東京都 運転免許更新手続き", "運転免許センター 更新方法", "東京 免許更新 必要書類"] 130 | 131 | Thai query: "วิธีการขอรับหน้าที่จอดรถในกทม" 132 | ["วิธีการขอรับหน้าที่จอดรถในกทม", "วิธีการขอรับหน้าที่จอดรถในกทม", "วิธีการขอรับหน้าที่จอดรถในกทม"] 133 | 134 | Please write in Python LIST format. 135 | 136 | --- 137 | Context: {context} 138 | ---- 139 | History: {chat_history} 140 | --- 141 | Original query: {query} 142 | """ 143 | 144 | 145 | # Define your desired data structure. 146 | class List(BaseModel): 147 | list[str] 148 | 149 | 150 | def query_context_expansion(query, chat_history, context=None): 151 | # Set up a parser + inject instructions into the prompt template. 152 | parser = JsonOutputParser(pydantic_object=List) 153 | 154 | prompt = PromptTemplate( 155 | template=query_context_expansion_prompt, 156 | input_variables=["query", "context"], 157 | ) 158 | 159 | chain = prompt | llm | parser 160 | # Invoke the chain with the joke_query. 161 | 162 | for attempt in range(3): 163 | try: 164 | parsed_output = chain.invoke( 165 | {"query": query, "chat_history": chat_history, "context": context} 166 | ) 167 | return parsed_output 168 | except Exception as e: 169 | st.warning(f"Attempt {attempt + 1} failed. Retrying...") 170 | 171 | st.error("All attempts failed. Returning empty list.") 172 | return [] 173 | 174 | 175 | def get_short_search(user_query, context, chat_history): 176 | chain = short_answer_prompt | llm | StrOutputParser() 177 | 178 | return chain.stream( 179 | { 180 | "context": context, 181 | "chat_history": chat_history, 182 | "user_query": user_query, 183 | } 184 | ) 185 | 186 | 187 | def get_search_desc(user_query, short_answer, context, chat_history): 188 | chain = search_prompt | llm | StrOutputParser() 189 | 190 | return chain.stream( 191 | { 192 | "context": context, 193 | "chat_history": chat_history, 194 | "user_query": user_query, 195 | "short_answer": short_answer, 196 | } 197 | ) 198 | 199 | 200 | def search(query, chat_history, context=None): 201 | with st.status("Extending query with context to related questions..."): 202 | q_list = query_context_expansion(query, chat_history, context) 203 | st.write(q_list) 204 | 205 | if not q_list: 206 | return [] 207 | 208 | # combine all queries with "OR" operator 209 | or_merged_search_query = " OR ".join(q_list) 210 | with st.spinner(f"Searching for '{or_merged_search_query}'..."): 211 | loader = BraveSearchLoader( 212 | api_key=st.secrets["BRAVE_API_KEY"], 213 | query=or_merged_search_query, search_kwargs={"count": 3} 214 | ) 215 | return loader.load() 216 | 217 | 218 | 219 | if "messages" not in st.session_state: 220 | st.session_state.messages = [] 221 | 222 | for message in st.session_state.messages: 223 | role = "AI" if isinstance(message, AIMessage) else "Human" 224 | with st.chat_message(role): 225 | st.markdown(message.content) 226 | 227 | q = "How to use residence parking permit in palo alto?" 228 | 229 | if prompt := st.chat_input(q): 230 | st.session_state.messages.append(HumanMessage(content=prompt)) 231 | with st.chat_message("user"): 232 | st.markdown(prompt) 233 | 234 | r1 = search(prompt, st.session_state.messages) 235 | result1_summary = str(r1) 236 | 237 | r2 = search(prompt, st.session_state.messages, result1_summary[:MAX_TOKENS]) 238 | 239 | context = str(r1 + r2) 240 | context = context[:MAX_TOKENS] 241 | 242 | with st.status("Search Results:"): 243 | st.write(context) 244 | 245 | with st.chat_message("assistant"): 246 | short_answer = st.write_stream( 247 | get_short_search(prompt, context, st.session_state.messages) 248 | ) 249 | desc = st.write_stream( 250 | get_search_desc(prompt, short_answer, context, st.session_state.messages) 251 | ) 252 | st.session_state.messages.append(AIMessage(content=short_answer + desc)) 253 | -------------------------------------------------------------------------------- /data/target_companies.json: -------------------------------------------------------------------------------- 1 | { 2 | "target_companies": [ 3 | { 4 | "company_name": "GreenTech Solutions", 5 | "main_business": "Industry leader in sustainable building technologies and smart energy management systems. Their flagship product, EcoSmart Building Suite, integrates solar-powered building materials with IoT sensors to reduce energy consumption by up to 40%. Key offerings include:\n\n1. Solar-integrated roofing tiles with 25-year warranty and 98% efficiency rating\n2. Smart window systems that automatically adjust tint based on sunlight exposure\n3. AI-powered HVAC optimization that learns from occupancy patterns\n4. Real-time energy monitoring dashboard for facility managers\n5. Blockchain-based energy trading platform for excess solar power\n\nServing over 500 commercial buildings across North America, with particular expertise in retrofitting historical buildings with sustainable technologies while preserving architectural integrity.", 6 | "contact_email": "partnerships@greentechsolutions.com" 7 | }, 8 | { 9 | "company_name": "DataFlow Analytics", 10 | "main_business": "Enterprise-scale data analytics provider specializing in manufacturing and industrial IoT solutions. Their platform processes over 1 petabyte of sensor data daily, serving Fortune 500 manufacturers. Core capabilities include:\n\n1. Real-time anomaly detection using advanced ML algorithms\n2. Predictive maintenance systems with 95% accuracy in failure prediction\n3. Custom-built neural networks for quality control in production lines\n4. Digital twin technology for equipment simulation and optimization\n5. Automated reporting and KPI tracking with natural language insights\n\nTheir solutions have helped clients achieve:\n- 30% reduction in unexpected downtime\n- 25% decrease in maintenance costs\n- 15% improvement in overall equipment effectiveness (OEE)\n\nCompliant with ISO 27001 and SOC 2 Type II standards.", 11 | "contact_email": "business@dataflowanalytics.com" 12 | }, 13 | { 14 | "company_name": "HealthBridge Systems", 15 | "main_business": "Comprehensive healthcare technology provider transforming patient care through innovative digital solutions. Their integrated platform handles over 10 million patient interactions annually. Key systems include:\n\n1. AI-Powered Diagnostic Support:\n - Machine learning algorithms trained on 50+ million anonymized medical records\n - Real-time analysis of medical imaging with 99.7% accuracy\n - Drug interaction prediction and automated alerts\n\n2. Telemedicine Platform:\n - HD video consultations with end-to-end encryption\n - Remote patient monitoring integration\n - Virtual waiting room and automated scheduling\n - Multi-language support for 12 languages\n\n3. Electronic Health Records:\n - HIPAA and GDPR compliant cloud storage\n - Natural language processing for medical documentation\n - Interoperable with major healthcare systems\n - Mobile-first design for healthcare providers\n\nServing 350+ hospitals and 10,000+ healthcare professionals across 3 continents.", 16 | "contact_email": "sales@healthbridge.com" 17 | }, 18 | { 19 | "company_name": "LogisticsPro Global", 20 | "main_business": "End-to-end supply chain optimization platform managing $50B+ in annual freight value. Their comprehensive system includes:\n\n1. Transportation Management:\n - Real-time fleet tracking with 99.9% accuracy\n - Dynamic route optimization using weather and traffic data\n - Fuel efficiency monitoring and carbon footprint tracking\n - Automated carrier selection and rate negotiation\n\n2. Warehouse Management:\n - AI-powered inventory forecasting\n - Robotic process automation for picking and packing\n - 3D warehouse mapping and space optimization\n - Voice-controlled inventory management\n\n3. International Shipping:\n - Automated customs documentation\n - Real-time container tracking across 200+ ports\n - Multi-modal transportation optimization\n - Blockchain-based chain of custody\n\n4. Analytics and Reporting:\n - Predictive analytics for demand forecasting\n - Real-time KPI dashboards\n - Custom report generation\n - Machine learning for continuous improvement\n\nIntegrated with major ERP systems and serving clients in 45 countries.", 21 | "contact_email": "inquiries@logisticspro.com" 22 | }, 23 | { 24 | "company_name": "CyberShield Defense", 25 | "main_business": "Next-generation cybersecurity provider protecting over $1 trillion in digital assets. Their comprehensive security suite includes:\n\n1. Threat Detection and Response:\n - AI-powered threat hunting with sub-second response time\n - Zero-day vulnerability detection\n - Behavioral analytics and anomaly detection\n - Automated incident response playbooks\n\n2. Network Security:\n - Zero-trust architecture implementation\n - Quantum-resistant encryption\n - Software-defined perimeter solutions\n - Cloud-native security controls\n\n3. Compliance Management:\n - Automated compliance monitoring for GDPR, HIPAA, PCI-DSS\n - Real-time policy enforcement\n - Audit trail generation\n - Risk assessment and scoring\n\n4. Security Operations Center:\n - 24/7 monitoring and response\n - Threat intelligence integration\n - Advanced forensics capabilities\n - Incident investigation and reporting\n\nProtecting 200+ financial institutions and healthcare organizations with 99.999% uptime.", 26 | "contact_email": "enterprise@cybershield.com" 27 | }, 28 | { 29 | "company_name": "AgriTech Innovations", 30 | "main_business": "Revolutionary agricultural technology provider serving 50,000+ farms globally. Their integrated farming platform includes:\n\n1. Precision Agriculture:\n - High-resolution satellite imagery analysis\n - Drone-based crop health monitoring\n - Variable rate application technology\n - Soil composition mapping and analysis\n\n2. Smart Irrigation:\n - AI-powered water management\n - Soil moisture sensor networks\n - Weather-based irrigation scheduling\n - Water usage optimization algorithms\n\n3. Crop Management:\n - Machine learning for yield prediction\n - Disease and pest detection\n - Nutrient deficiency analysis\n - Harvest timing optimization\n\n4. Farm Operations:\n - Equipment fleet management\n - Labor scheduling and management\n - Supply chain optimization\n - Financial planning and analytics\n\nProviding average yield improvements of 35% while reducing water usage by 40%.", 31 | "contact_email": "contact@agritech-innovations.com" 32 | }, 33 | { 34 | "company_name": "RetailSmart Solutions", 35 | "main_business": "Comprehensive retail technology platform processing over $10B in annual transactions. Their integrated solution includes:\n\n1. Point-of-Sale System:\n - Cloud-based architecture with offline capabilities\n - Multi-channel integration (in-store, online, mobile)\n - Advanced payment processing (cryptocurrencies supported)\n - Customer relationship management\n\n2. Inventory Management:\n - Real-time stock tracking across locations\n - Predictive inventory optimization\n - Automated reordering system\n - Loss prevention analytics\n\n3. Customer Analytics:\n - AI-powered customer segmentation\n - Personalized marketing automation\n - Customer journey tracking\n - Loyalty program management\n\n4. Business Intelligence:\n - Real-time sales analytics\n - Staff performance tracking\n - Competitive price monitoring\n - Custom reporting tools\n\nServing 15,000+ retail locations with 99.99% system reliability.", 36 | "contact_email": "sales@retailsmart.com" 37 | }, 38 | { 39 | "company_name": "EduTech Platforms", 40 | "main_business": "Comprehensive educational technology provider serving 5 million+ students globally. Their platform includes:\n\n1. Learning Management System:\n - Adaptive learning pathways\n - Multi-format content delivery\n - Real-time progress tracking\n - Gamification elements\n\n2. Virtual Classroom Solution:\n - HD video conferencing with AI-powered engagement tracking\n - Interactive whiteboard with real-time collaboration\n - Breakout room functionality\n - Automated attendance and participation tracking\n\n3. Assessment Tools:\n - AI-powered grading system\n - Plagiarism detection\n - Custom quiz generation\n - Performance analytics\n\n4. Parent-Teacher Communication:\n - Real-time progress updates\n - Automated scheduling system\n - Multi-language support\n - Mobile app integration\n\nImplemented in 1,000+ educational institutions with 40% improvement in student engagement.", 41 | "contact_email": "partnerships@edutech.com" 42 | }, 43 | { 44 | "company_name": "FinanceCloud Services", 45 | "main_business": "Enterprise-grade financial management platform serving 100,000+ businesses. Their comprehensive suite includes:\n\n1. Accounting System:\n - Real-time financial reporting\n - Multi-currency support\n - Automated bank reconciliation\n - Custom chart of accounts\n\n2. Payroll Management:\n - Automated tax calculations\n - Multi-state compliance\n - Employee self-service portal\n - Time and attendance tracking\n\n3. Expense Management:\n - Receipt scanning with OCR\n - Automated expense categorization\n - Corporate card integration\n - Policy compliance checking\n\n4. Financial Planning:\n - Cash flow forecasting\n - Budgeting tools\n - Scenario planning\n - Investment tracking\n\nProcessing over $50B in transactions annually with 99.99% accuracy.", 46 | "contact_email": "business@financecloud.com" 47 | }, 48 | { 49 | "company_name": "SmartCity Technologies", 50 | "main_business": "Comprehensive urban infrastructure management platform deployed in 50+ cities worldwide. Their integrated solutions include:\n\n1. Traffic Management:\n - AI-powered traffic flow optimization\n - Smart traffic light control\n - Emergency vehicle prioritization\n - Parking space management\n\n2. Public Safety:\n - Video analytics for crowd management\n - Emergency response coordination\n - Predictive policing algorithms\n - Environmental monitoring\n\n3. Waste Management:\n - Smart bin sensors and route optimization\n - Recycling analytics\n - Resource allocation optimization\n - Environmental impact tracking\n\n4. Urban Planning:\n - Digital twin city modeling\n - Infrastructure maintenance prediction\n - Energy usage optimization\n - Public transportation integration\n\nAchieving average 30% reduction in traffic congestion and 25% improvement in emergency response times.", 51 | "contact_email": "solutions@smartcity-tech.com" 52 | } 53 | ] 54 | } -------------------------------------------------------------------------------- /namecard.py: -------------------------------------------------------------------------------- 1 | import streamlit as st 2 | from PIL import Image 3 | import re 4 | import io 5 | import os 6 | import qrcode 7 | import base64 8 | from openai import OpenAI 9 | import json 10 | 11 | # Set page title 12 | st.set_page_config(page_title="Business Card OCR", layout="wide") 13 | st.title("Business Card Reader") 14 | 15 | # Function to extract text from image using Upstage API 16 | def extract_info_from_image(image): 17 | try: 18 | # Convert PIL Image to base64 19 | buffered = io.BytesIO() 20 | image.save(buffered, format="JPEG") 21 | base64_image = base64.b64encode(buffered.getvalue()).decode("utf-8") 22 | 23 | # Initialize Upstage API client 24 | client = OpenAI( 25 | api_key=st.secrets["UPSTAGE_API_KEY"], # Store this in Streamlit secrets 26 | base_url="https://api.upstage.ai/v1/information-extract/" 27 | ) 28 | 29 | # Call the API with the exact structure provided 30 | response = client.chat.completions.create( 31 | model="information-extraction", 32 | messages=[ 33 | { 34 | "role": "user", 35 | "content": [ 36 | { 37 | "type": "image_url", 38 | "image_url": {"url": f"data:image/jpeg;base64,{base64_image}"} 39 | } 40 | ] 41 | } 42 | ], 43 | response_format={ 44 | "type": "json_schema", 45 | "json_schema": { 46 | "name": "document_schema", 47 | "schema": { 48 | "type": "object", 49 | "properties": { 50 | "companyName": { 51 | "type": "string", 52 | "description": "The name of the architecture company." 53 | }, 54 | "address": { 55 | "type": "string", 56 | "description": "The physical address of the company." 57 | }, 58 | "phone": { 59 | "type": "string", 60 | "description": "The telephone number of the company." 61 | }, 62 | "mobile": { 63 | "type": "string", 64 | "description": "The mobile phone number of the contact person." 65 | }, 66 | "email": { 67 | "type": "string", 68 | "description": "The email address of the contact person." 69 | }, 70 | "fullname": { 71 | "type": "string", 72 | "description": "name of the person in biz card" 73 | }, 74 | "title": { 75 | "type": "string", 76 | "description": "title of this person in biz card" 77 | } 78 | }, 79 | "required": [ 80 | "companyName", 81 | "address", 82 | "phone", 83 | "mobile", 84 | "email", 85 | "fullname", 86 | "title" 87 | ] 88 | } 89 | } 90 | } 91 | ) 92 | 93 | # Extract the structured data from the response 94 | extracted_info = json.loads(response.choices[0].message.content) 95 | return extracted_info 96 | except Exception as e: 97 | st.error(f"Error during information extraction: {e}") 98 | return None 99 | 100 | def create_vcard(company_name, name, title, phone, mobile, email, address, website=""): 101 | """Create a vCard file from the extracted information""" 102 | vcard_content = f"""BEGIN:VCARD 103 | VERSION:3.0 104 | FN:{name} 105 | ORG:{company_name} 106 | TITLE:{title} 107 | TEL;TYPE=WORK:{phone} 108 | TEL;TYPE=CELL:{mobile} 109 | EMAIL:{email} 110 | ADR;TYPE=WORK:;;{address} 111 | URL:{website} 112 | END:VCARD 113 | """ 114 | # Save vCard to file 115 | filename = f"{name.replace(' ', '_')}.vcf" 116 | with open(filename, "w") as f: 117 | f.write(vcard_content) 118 | return filename 119 | 120 | def create_qr_code_for_vcard(vcard_file): 121 | """Create a QR code for the vCard file""" 122 | try: 123 | # Read vCard content 124 | with open(vcard_file, "r") as f: 125 | vcard_content = f.read() 126 | 127 | # Generate QR code 128 | qr = qrcode.QRCode( 129 | version=1, 130 | error_correction=qrcode.constants.ERROR_CORRECT_L, 131 | box_size=10, 132 | border=4, 133 | ) 134 | qr.add_data(vcard_content) 135 | qr.make(fit=True) 136 | 137 | img = qr.make_image(fill_color="black", back_color="white") 138 | 139 | # Save QR code image 140 | qr_filename = vcard_file.replace(".vcf", "_qr.png") 141 | img.save(qr_filename) 142 | return qr_filename 143 | except Exception as e: 144 | st.error(f"Error generating QR code: {e}") 145 | return None 146 | 147 | # --- Main App Section --- 148 | 149 | # Add API key input in sidebar 150 | if "UPSTAGE_API_KEY" not in st.secrets: 151 | st.sidebar.title("API Configuration") 152 | api_key = st.sidebar.text_input("Enter Upstage API Key", type="password") 153 | if api_key: 154 | st.secrets["UPSTAGE_API_KEY"] = api_key 155 | else: 156 | st.warning("Please enter your Upstage API key in the sidebar to continue.") 157 | 158 | st.header("Upload Business Card") 159 | 160 | # Simple file uploader without options 161 | uploaded_file = st.file_uploader("Choose an image...", type=["jpg", "jpeg", "png"]) 162 | 163 | if uploaded_file is not None and "UPSTAGE_API_KEY" in st.secrets: 164 | # Display the uploaded image 165 | image = Image.open(uploaded_file) 166 | img_col, info_col = st.columns(2) 167 | 168 | with img_col: 169 | st.image(image, caption="Uploaded Business Card", use_container_width=True) 170 | 171 | # Extract information using Upstage API 172 | with st.spinner("Extracting information from business card..."): 173 | extracted_info = extract_info_from_image(image) 174 | 175 | if extracted_info: 176 | # Create form for editing extracted information 177 | st.subheader("Edit Contact Information") 178 | 179 | # Auto-generate contact initially 180 | if "contact_generated" not in st.session_state: 181 | name = extracted_info.get("fullname", "") 182 | title = extracted_info.get("title", "") 183 | company_name = extracted_info.get("companyName", "") 184 | address = extracted_info.get("address", "") 185 | phone = extracted_info.get("phone", "") 186 | mobile = extracted_info.get("mobile", "") 187 | email = extracted_info.get("email", "") 188 | website = "" 189 | 190 | if name: 191 | # Create vCard 192 | vcard_file = create_vcard(company_name, name, title, phone, mobile, email, address, website) 193 | 194 | # Create QR code 195 | qr_file = create_qr_code_for_vcard(vcard_file) 196 | 197 | st.session_state.contact_generated = True 198 | st.session_state.vcard_file = vcard_file 199 | st.session_state.qr_file = qr_file 200 | 201 | with st.form("contact_form"): 202 | col1, col2 = st.columns(2) 203 | 204 | with col1: 205 | name = st.text_input("Full Name", extracted_info.get("fullname", "")) 206 | title = st.text_input("Job Title", extracted_info.get("title", "")) 207 | company_name = st.text_input("Company Name", extracted_info.get("companyName", "")) 208 | address = st.text_input("Address", extracted_info.get("address", "")) 209 | 210 | with col2: 211 | phone = st.text_input("Phone Number", extracted_info.get("phone", "")) 212 | mobile = st.text_input("Mobile Number", extracted_info.get("mobile", "")) 213 | email = st.text_input("Email Address", extracted_info.get("email", "")) 214 | website = st.text_input("Website/Social Media", "") 215 | 216 | regenerate_button = st.form_submit_button("Regenerate Contact") 217 | 218 | if regenerate_button: 219 | if not name: 220 | st.error("Please provide at least a name.") 221 | else: 222 | # Create vCard 223 | vcard_file = create_vcard(company_name, name, title, phone, mobile, email, address, website) 224 | 225 | # Create QR code 226 | qr_file = create_qr_code_for_vcard(vcard_file) 227 | 228 | st.session_state.vcard_file = vcard_file 229 | st.session_state.qr_file = qr_file 230 | 231 | # Display contact information if available 232 | if "contact_generated" in st.session_state: 233 | vcard_file = st.session_state.get("vcard_file") 234 | qr_file = st.session_state.get("qr_file") 235 | 236 | # Display success message and QR code 237 | st.success(f"Contact information saved to {vcard_file}") 238 | 239 | if qr_file and os.path.exists(qr_file): 240 | qr_image = Image.open(qr_file) 241 | st.image(qr_image, caption="Scan this QR code to add contact", width=300) 242 | 243 | # Provide download buttons 244 | if vcard_file and os.path.exists(vcard_file): 245 | with open(vcard_file, "rb") as file: 246 | vcf_contents = file.read() 247 | st.download_button( 248 | label="Download vCard File", 249 | data=vcf_contents, 250 | file_name=vcard_file, 251 | mime="text/vcard" 252 | ) 253 | 254 | if qr_file and os.path.exists(qr_file): 255 | with open(qr_file, "rb") as file: 256 | qr_contents = file.read() 257 | st.download_button( 258 | label="Download QR Code", 259 | data=qr_contents, 260 | file_name=qr_file, 261 | mime="image/png" 262 | ) -------------------------------------------------------------------------------- /reasoning.py: -------------------------------------------------------------------------------- 1 | import streamlit as st 2 | import time 3 | from pydantic import BaseModel, Field 4 | 5 | from langchain_core.output_parsers import StrOutputParser, JsonOutputParser 6 | from langchain_core.prompts import ( 7 | ChatPromptTemplate, 8 | MessagesPlaceholder, 9 | PromptTemplate, 10 | ) 11 | from langchain_core.messages import AIMessage, HumanMessage 12 | 13 | from langchain_upstage import ChatUpstage as Chat 14 | from langchain_community.tools import DuckDuckGoSearchResults 15 | 16 | from solar_util import num_of_tokens 17 | 18 | MAX_TOKENS = 2500 19 | MAX_SEARCH_TOKENS = 700 20 | MAX_SEAERCH_RESULTS = 5 21 | 22 | llm = Chat(model="solar-pro") 23 | ddg_search = DuckDuckGoSearchResults() 24 | 25 | st.set_page_config(page_title="Solar Reasoning", page_icon="🤔") 26 | st.title("Solar Reasoning") 27 | 28 | reasoning_examples = """ 29 | --- 30 | Example 1: 31 | 32 | Use Query: If a die is rolled three times, what is the probability of getting a sum of 11? 33 | 34 | Reasoning: 1. Understand the problem: We need to find the probability of getting a sum of 11 when rolling a die three times. 35 | 2. Calculate total possible outcomes: A die has 6 faces, so for each roll, there are 6 possibilities. For three rolls, the total possible outcomes are 6^3 = 216. 36 | 3. Identify favorable outcomes: List all combinations of rolls that result in a sum of 11. There are 18 such combinations. 37 | 4. Calculate probability: Divide the number of favorable outcomes by the total possible outcomes: 18 / 216 = 1/12. 38 | 5. Conclusion: The probability of getting a sum of 11 when rolling a die three times is 1/12. 39 | 40 | Reasoning Chains: [{'step': 1, 'thought': 'Understand the problem: We need to find the probability of getting a sum of 11 when rolling a die three times.'}, {'step': 2, 'thought': 'Calculate total possible outcomes: A die has 6 faces, so for each roll, there are 6 possibilities. For three rolls, the total possible outcomes are 6^3 = 216.'}, {'step': 3, 'thought': 'Identify favorable outcomes: List all combinations of rolls that result in a sum of 11. There are 18 such combinations.'}, {'step': 4, 'thought': 'Calculate probability: Divide the number of favorable outcomes by the total possible outcomes: 18 / 216 = 1/12.'}, {'step': 5, 'thought': 'Conclusion: The probability of getting a sum of 11 when rolling a die three times is 1/12.'}] 41 | ---- 42 | Example 2: 43 | 44 | User Query: The interactions will be about the science behind culinary techniques. The setting is a cooking class where three friends are discussing various aspects of cooking and sharing their knowledge. 45 | - USER/Jane: A curious learner who wants to understand the science behind cooking 46 | - Mike: An experienced home cook with a passion for experimenting in the kitchen 47 | - Sarah: A food scientist who loves explaining the chemistry behind different cooking processes 48 | 49 | Reasoning: 1. Start with the given setting: a cooking class with three friends discussing the science behind culinary techniques. 50 | 2. Introduce the topic of resting meat after cooking, with Mike asking Jane if she's ever wondered about it. 51 | 3. Have Sarah explain the science behind resting meat, mentioning denatured proteins and juice redistribution. 52 | 4. Address the user's question about resting meat, with Sarah confirming that it allows juices to redistribute. 53 | 5. Move on to the topic of adding salt to water, with Mike mentioning its effect on boiling point. 54 | 6. Have Sarah explain the science behind salt's effect on boiling point, mentioning the higher temperature required for boiling. 55 | 7. Address the user's question about cooking speed, with Sarah explaining that it's slightly faster due to the hotter water. 56 | 8. Introduce the topic of acids in cooking, with Mike mentioning their use in brightening dishes. 57 | 9. Have Sarah explain the science behind acids' effects on flavor and tenderizing meats. 58 | 10. Address the user's question about baking, with Mike mentioning the science involved in baking and Sarah explaining the role of gluten and leavening agents. 59 | 11. Conclude the conversation with the characters expressing their fascination with the science behind cooking and their excitement to continue learning and experimenting. 60 | 61 | Reasoning Chains: [{'step': 1, 'thought': 'Start with the given setting: a cooking class with three friends discussing the science behind culinary techniques.'}, {'step': 2, 'thought': "Introduce the topic of resting meat after cooking, with Mike asking Jane if she's ever wondered about it."}, {'step': 3, 'thought': 'Have Sarah explain the science behind resting meat, mentioning denatured proteins and juice redistribution.'}, {'step': 4, 'thought': "Address the user's question about resting meat, with Sarah confirming that it allows juices to redistribute."}, {'step': 5, 'thought': 'Move on to the topic of adding salt to water, with Mike mentioning its effect on boiling point.'}, {'step': 6, 'thought': "Have Sarah explain the science behind salt's effect on boiling point, mentioning the higher temperature required for boiling."}, {'step': 7, 'thought': "Address the user's question about cooking speed, with Sarah explaining that it's slightly faster due to the hotter water."}, {'step': 8, 'thought': 'Introduce the topic of acids in cooking, with Mike mentioning their use in brightening dishes.'}, {'step': 9, 'thought': "Have Sarah explain the science behind acids' effects on flavor and tenderizing meats."}, {'step': 10, 'thought': "Address the user's question about baking, with Mike mentioning the science involved in baking and Sarah explaining the role of gluten and leavening agents."}, {'step': 11, 'thought': 'Conclude the conversation with the characters expressing their fascination with the science behind cooking and their excitement to continue learning and experimenting.'}] 62 | ---- 63 | """ 64 | 65 | reasoning_prompt = ChatPromptTemplate.from_messages( 66 | [ 67 | ( 68 | "system", 69 | """You are Solar, a smart reasoning and answer engine by Upstage, loved by many people.. 70 | 71 | For the given query, please provide the best answer using a step-by-step explanation. 72 | Your response should demonstrate a Chain of Thought (CoT) technique, 73 | where you think aloud and describe the steps you take to reach a conclusion. 74 | 75 | Please best use of the provided reasoning examples and context. 76 | --- 77 | {reasoning_examples} 78 | """, 79 | ), 80 | MessagesPlaceholder("chat_history"), 81 | ( 82 | "human", 83 | """For the given query, please provide only the "{task}" 84 | and ensure your response is consistent with the user's request, 85 | previous chat history, and provided reasoning if any. 86 | Remember to use the self-consistency technique to maintain a consistent character of a helpful assistant. 87 | Think step by step and provide the best answer for the query. 88 | --- 89 | User Query: 90 | {prompt} 91 | --- 92 | {Reasoning} 93 | --- 94 | {ReasoningChains}""", 95 | ), 96 | ] 97 | ) 98 | 99 | 100 | query_context_expansion_prompt = """Given a query and context(if provided), 101 | generate up to three related questions to help answer the original query. 102 | Ensure the questions are short, concise, and keyword-based for search engines. 103 | 104 | Write your response in Python LIST format. 105 | 106 | For example: 107 | ["original query", "related question 1", "related question 2", "related question 3"] 108 | 109 | --- 110 | Context: {context} 111 | ---- 112 | History: {chat_history} 113 | --- 114 | Orignal query: {query} 115 | """ 116 | 117 | 118 | # Define your desired data structure. 119 | class List(BaseModel): 120 | list[str] 121 | 122 | 123 | def query_context_expansion(query, chat_history, context=None): 124 | # Set up a parser + inject instructions into the prompt template. 125 | parser = JsonOutputParser(pydantic_object=List) 126 | 127 | prompt = PromptTemplate( 128 | template=query_context_expansion_prompt, 129 | input_variables=["query", "context"], 130 | ) 131 | 132 | chain = prompt | llm | parser 133 | # Invoke the chain with the joke_query. 134 | 135 | for attempt in range(3): 136 | try: 137 | parsed_output = chain.invoke( 138 | {"query": query, "chat_history": chat_history, "context": context} 139 | ) 140 | return parsed_output 141 | except Exception as e: 142 | st.warning(f"Attempt {attempt + 1} failed. Retrying...") 143 | 144 | st.error("All attempts failed. Returning empty list.") 145 | return [] 146 | 147 | 148 | GlobalTasks = ["Reasoning (No conclusion)", "Reasoning Chains", "Final Answer"] 149 | 150 | 151 | def perform_task(user_query, task, task_results, chat_history): 152 | # Limit chat history to 3000 characters 153 | limited_history = [] 154 | total_length = 0 155 | for message in reversed(chat_history): 156 | message_length = num_of_tokens(message.content) 157 | if total_length + message_length > MAX_TOKENS: 158 | st.warning("Chat history is too long. Truncating.") 159 | break 160 | limited_history.insert(0, message) 161 | total_length += message_length 162 | 163 | chain = reasoning_prompt | llm | StrOutputParser() 164 | 165 | return chain.stream( 166 | { 167 | "chat_history": limited_history, 168 | "reasoning_examples": reasoning_examples, 169 | "prompt": user_query, 170 | "task": task, 171 | "Reasoning": task_results.get(GlobalTasks[0], ""), 172 | "ReasoningChains": task_results.get(GlobalTasks[1], ""), 173 | } 174 | ) 175 | 176 | 177 | def search(query, chat_history, context=None): 178 | with st.status("Extending query..."): 179 | q_list = query_context_expansion(query, chat_history, context) 180 | st.write(q_list) 181 | 182 | if not q_list: 183 | st.error("No related questions found. Returning empty list.") 184 | return [] 185 | 186 | # combine all queries with "OR" operator 187 | results = "" 188 | for q in q_list: 189 | with st.spinner(f"Searching for '{q }'..."): 190 | results += ddg_search.invoke(q) 191 | 192 | return results 193 | 194 | 195 | if "messages" not in st.session_state: 196 | st.session_state.messages = [] 197 | 198 | for message in st.session_state.messages: 199 | role = "AI" if isinstance(message, AIMessage) else "Human" 200 | with st.chat_message(role): 201 | st.markdown(message.content) 202 | 203 | q = "3.9 vs 3.11. Which one is bigger?" 204 | 205 | search_on = st.checkbox("Search on the web", value=False) 206 | 207 | if prompt := st.chat_input(q): 208 | with st.chat_message("user"): 209 | st.markdown(prompt) 210 | 211 | if search_on: 212 | search_result = search(prompt, st.session_state.messages) 213 | 214 | with st.status("Search Results:"): 215 | st.write(search_result) 216 | 217 | if search_result: 218 | search_result = str(search_result)[:MAX_SEARCH_TOKENS] 219 | st.session_state.messages.append( 220 | HumanMessage( 221 | content=f"FYI search result conext: {search_result} for the query, {prompt}" 222 | ) 223 | ) 224 | st.session_state.messages.append( 225 | AIMessage( 226 | content="Thanks for the information! I will keep in mind. Give me the instruction." 227 | ) 228 | ) 229 | task_results = {} 230 | current_time = time.time() 231 | for task in GlobalTasks: 232 | if task == GlobalTasks[-1]: 233 | st.info(f"Thinking: {time.time() - current_time:.2f}s") 234 | 235 | with st.chat_message("assistant"): 236 | response = st.write_stream( 237 | perform_task(prompt, task, task_results, st.session_state.messages) 238 | ) 239 | task_results[task] = response 240 | break 241 | 242 | with st.status(f"Performing task: {task}"): 243 | response = st.write_stream( 244 | perform_task(prompt, task, task_results, st.session_state.messages) 245 | ) 246 | task_results[task] = response 247 | # Store the last task result for future reference 248 | st.session_state.messages.append(HumanMessage(content=prompt)) 249 | st.session_state.messages.append(AIMessage(content=task_results[GlobalTasks[-1]])) -------------------------------------------------------------------------------- /discussion.py: -------------------------------------------------------------------------------- 1 | # from https://docs.streamlit.io/develop/tutorials/llms/build-conversational-apps 2 | 3 | import streamlit as st 4 | 5 | from langchain_upstage import ChatUpstage as Chat 6 | from pydantic import BaseModel 7 | 8 | from langchain_core.output_parsers import StrOutputParser 9 | from langchain_core.prompts import ChatPromptTemplate 10 | from langchain_core.messages import AIMessage, HumanMessage 11 | 12 | from langchain_core.output_parsers import StrOutputParser, JsonOutputParser 13 | from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder 14 | from langchain_community.tools import DuckDuckGoSearchResults 15 | from langchain_upstage import ChatUpstage 16 | 17 | 18 | from solar_util import initialize_solar_llm 19 | 20 | st.set_page_config(page_title="Discuss", page_icon="🗣️") 21 | st.title("Self-debating Solar Pro Preview") 22 | 23 | solar = initialize_solar_llm() 24 | deepseek = ChatUpstage(model="deepseek-chat", base_url="https://api.deepseek.com/v1", api_key=st.secrets["DEEPSEEK_API_KEY"]) 25 | deepseek_r = ChatUpstage(model="deepseek-reasoner", base_url="https://api.deepseek.com/v1", api_key=st.secrets["DEEPSEEK_API_KEY"]) 26 | llms = [deepseek, solar] 27 | 28 | llm_order = 0 29 | def get_llm(): 30 | global llm_order 31 | llm = llms[llm_order] 32 | llm_order = (llm_order + 1) % len(llms) 33 | return llm 34 | 35 | ddg_search = DuckDuckGoSearchResults() 36 | 37 | # Define your desired data structure. 38 | class SearchKeyword(BaseModel): 39 | list[str] 40 | 41 | 42 | search_keyword_extraction = ChatPromptTemplate.from_messages( 43 | [ 44 | ( 45 | "system", 46 | """You are Solar-Discussor, a smart discussion chatbot by Upstage, loved by many people. 47 | 48 | You already comeup with a discussion draft. 49 | Now you can use google search to find more information about the discussion point. 50 | 51 | Please come up with 2~3 search keywords that you can use to find more information about the discussion point. 52 | --- 53 | Topic: {topic} 54 | """, 55 | ), 56 | ( 57 | "human", 58 | """Please write search keywords in python list like ["keyword1", "keyword2", "keyword3"]. 59 | --- 60 | Discusion Point draft: {discussion_candidate} 61 | 62 | """, 63 | ), 64 | ] 65 | ) 66 | 67 | discussion_prompt_with_search = ChatPromptTemplate.from_messages( 68 | [ 69 | ( 70 | "system", 71 | """You are Solar-Discussor, a smart discussion chatbot by Upstage, loved by many people. 72 | 73 | You are taking about a topic and discussing with a user. Please participate in the discussion and provide engaging answers. 74 | If necessasy, ask for more information or clarify the question or add follow-up questions. 75 | If you find something wrong in others' discussion, correct them in a friendly manner in bold. 76 | Do not talk beyond the topic and do not provide inappropriate language. 77 | 78 | No need to agree on everything. You can have different opinions and discuss in a friendly manner. 79 | Find contradictions and correct them in a harsh manner.It's OK to say I don't agree with you. 80 | 81 | Speak shortly and clearly about 2~3 sentences. 82 | Get to the point first and expand if necessary. 83 | 84 | Count each turn and put [Turn n/10] at the only beginning of your discussion only once. 85 | 86 | Important: Match your response language to the topic language: 87 | - If the topic is in Korean, respond in Korean 88 | - If the topic is in English, respond in English 89 | --- 90 | Topic: {topic} 91 | """, 92 | ), 93 | MessagesPlaceholder("chat_history"), 94 | ( 95 | "human", 96 | """Based on your ciscussion draft, we did google search. 97 | Please use the search result to enhance your original discussion draft if the information is relevant and useful. 98 | If it is important, please add URL of the search result. 99 | Using all these please focus on the discussion and provide engaging answers. 100 | Don't thank the search result or mention the search result. Assume you already know these infomration. 101 | Fully Focus on the discussion with human. Discuss based on the facts and information you have. 102 | 103 | Please speak in a friendly and engaging manner. Speak shortly and clearly about 2~3 sentences. 104 | Get to the point first and expand if necessary. 105 | 106 | Count each turn and put [Turn n/10] at the only beginning of your discussion only once. 107 | Please do only one turn discussion. 108 | 109 | --- 110 | Discusion Draft: {discussion_candidate} 111 | ---- 112 | Search result: {external_information} 113 | """, 114 | ), 115 | ] 116 | ) 117 | 118 | 119 | discussion_prompt = ChatPromptTemplate.from_messages( 120 | [ 121 | ( 122 | "system", 123 | """You are Solar-Discussor, a smart discussion chatbot by Upstage, loved by many people. 124 | 125 | You are taking about a topic and discussing with a user. Please participate in the discussion and provide engaging answers. 126 | If necessasy, ask for more information or clarify the question or add follow-up questions. 127 | If you find something wrong in others' discussion, correct them in a friendly manner in bold. 128 | Do not talk beyond the topic and do not provide inappropriate language. 129 | 130 | Please speak in a friendly and engaging manner. Speak shortly and clearly about 2~3 sentences. 131 | Get to the point first and expand if necessary. 132 | 133 | Count each turn and put [Turn n/10] at the only beginning of your discussion only once. 134 | Please do only one turn. 135 | 136 | Do not repeat the same point already mentioned. 137 | 138 | Important: Match your response language to the topic language: 139 | - If the topic is in Korean, respond in Korean 140 | - If the topic is in English, respond in English 141 | --- 142 | Topic: {topic} 143 | """, 144 | ), 145 | MessagesPlaceholder("chat_history"), 146 | ("human", "{discussion}"), 147 | ] 148 | ) 149 | 150 | 151 | summary_prompt = ChatPromptTemplate.from_messages( 152 | [ 153 | MessagesPlaceholder("chat_history"), 154 | ( 155 | "human", 156 | """ 157 | You are Solar-Discussor, a smart discussion chatbot by Upstage, loved by many people. 158 | 159 | By reading the discussion, provide comprehensive summarize of the discussion and provide a conclusion. 160 | Only use previous discussion and do not add new information. 161 | Highlight several sentences if necessary. 162 | --- 163 | Topic: {topic} 164 | --- 165 | Please summarize the discussion in history.""", 166 | ), 167 | ] 168 | ) 169 | 170 | 171 | def make_human_last_in_history(chat_history): 172 | # No need to change if the last message is from human 173 | if not chat_history: 174 | return [] 175 | 176 | if not isinstance(chat_history[-1], AIMessage): 177 | return chat_history 178 | 179 | return [ 180 | ( 181 | HumanMessage(content=chat.content) 182 | if isinstance(chat, AIMessage) 183 | else AIMessage(content=chat.content) 184 | ) 185 | for chat in chat_history 186 | ] 187 | 188 | 189 | def get_discussion_draft(topic, discussion, chat_history, llm): 190 | chain = discussion_prompt | llm | StrOutputParser() 191 | discussion_candidate = chain.invoke( 192 | { 193 | "chat_history": chat_history, 194 | "topic": topic, 195 | "discussion": discussion, 196 | } 197 | ) 198 | st.write(discussion_candidate) 199 | return discussion_candidate 200 | 201 | 202 | def extract_search_keywords(topic, discussion_candidate, llm): 203 | parser = JsonOutputParser(pydantic_object=SearchKeyword) 204 | keyword_chain = search_keyword_extraction | llm | parser 205 | try: 206 | search_keywords = keyword_chain.invoke( 207 | { 208 | "topic": topic, 209 | "format_instructions": parser.get_format_instructions(), 210 | "discussion_candidate": discussion_candidate, 211 | } 212 | ) 213 | st.write(search_keywords) 214 | return search_keywords 215 | except Exception as e: 216 | st.error(f"Error extracting search keywords: {str(e)}") 217 | return [] 218 | 219 | 220 | def perform_search(search_keywords): 221 | if not search_keywords: 222 | return [] 223 | 224 | or_merged_search_query = " OR ".join(search_keywords) 225 | try: 226 | search_results = ddg_search.invoke(or_merged_search_query, max_results=3) 227 | st.write(search_results) 228 | return search_results 229 | except Exception as e: 230 | st.error(f"Error performing search: {str(e)}") 231 | return [] 232 | 233 | 234 | def get_discussion(topic, discussion, chat_history, llm, use_search=True): 235 | st.write(llm.model_name) 236 | new_chat_history = make_human_last_in_history(chat_history) 237 | 238 | if use_search: 239 | with st.status("Writing discussion draft"): 240 | discussion_candidate = get_discussion_draft( 241 | topic, discussion, new_chat_history, llm, 242 | ) 243 | 244 | with st.status("Extracting search keywords"): 245 | search_keywords = extract_search_keywords(topic, discussion_candidate, llm) 246 | 247 | with st.status("Searching information"): 248 | search_results = perform_search(search_keywords) 249 | 250 | search_result_summary = str(search_results)[:3000] 251 | 252 | chain = discussion_prompt_with_search | llm | StrOutputParser() 253 | return chain.stream( 254 | { 255 | "chat_history": new_chat_history, 256 | "topic": topic, 257 | "discussion_candidate": discussion, 258 | "external_information": search_result_summary, 259 | } 260 | ) 261 | 262 | chain = discussion_prompt | llm | StrOutputParser() 263 | return chain.stream( 264 | { 265 | "chat_history": new_chat_history, 266 | "topic": topic, 267 | "discussion": discussion, 268 | } 269 | ) 270 | 271 | 272 | def get_summary(topic, chat_history, llm): 273 | chain = summary_prompt | llm | StrOutputParser() 274 | return chain.stream( 275 | { 276 | "chat_history": chat_history, 277 | "topic": topic, 278 | } 279 | ) 280 | 281 | 282 | if "messages" not in st.session_state: 283 | st.session_state.messages = [] 284 | 285 | if False: 286 | for message in st.session_state.messages: 287 | role = "AI" if isinstance(message, AIMessage) else "Human" 288 | with st.chat_message(role): 289 | st.markdown(message.content) 290 | default_topic = "Where is the best place to visit in Korea?" 291 | topic = st.text_input("Discussion Topic", default_topic) 292 | use_search = False and st.toggle("Use Search", False) 293 | if st.button("Start Discussion"): 294 | st.session_state.messages = [] 295 | previous_discussion = "" 296 | for i in range(5): 297 | with st.chat_message("user"): 298 | llm = get_llm() 299 | discussion = st.write_stream( 300 | get_discussion( 301 | topic, 302 | previous_discussion, 303 | st.session_state.messages, 304 | llm = llm, 305 | use_search=use_search, 306 | ) 307 | ) 308 | 309 | st.session_state.messages.append(HumanMessage(content=discussion )) 310 | 311 | if discussion.startswith("[Turn 10/10]"): 312 | break 313 | with st.chat_message("assistant"): 314 | llm = get_llm() 315 | previous_discussion = st.write_stream( 316 | get_discussion(topic, discussion, st.session_state.messages, llm = llm, use_search=use_search) 317 | ) 318 | 319 | st.session_state.messages.append(AIMessage(content=previous_discussion )) 320 | 321 | if previous_discussion.startswith("[Turn 10/10]"): 322 | break 323 | 324 | ## summarize the discussion 325 | with st.chat_message("user"): 326 | st.write("## Solar-Summarizer") 327 | st.write_stream(get_summary(topic, st.session_state.messages, llm = solar)) 328 | 329 | st.write("## Deepseek-Summarizer") 330 | st.write_stream(get_summary(topic, st.session_state.messages, llm = deepseek)) 331 | 332 | st.write("## DeepSeek-Reasoner") 333 | st.write_stream(get_summary(topic, st.session_state.messages, llm = deepseek_r)) 334 | -------------------------------------------------------------------------------- /coldmail.py: -------------------------------------------------------------------------------- 1 | # from https://docs.streamlit.io/develop/tutorials/llms/build-conversational-apps 2 | 3 | import streamlit as st 4 | from langchain_upstage import ChatUpstage as Chat 5 | 6 | from langchain_core.output_parsers import StrOutputParser 7 | from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder 8 | from langchain_core.messages import AIMessage, HumanMessage 9 | 10 | from solar_util import initialize_solar_llm 11 | 12 | from solar_util import prompt_engineering 13 | 14 | import json 15 | from pathlib import Path 16 | 17 | llm = initialize_solar_llm() 18 | st.set_page_config(page_title="Cold Email Generator", layout="wide") 19 | st.title("B2B Cold Email Generator") 20 | 21 | 22 | chat_with_history_prompt = ChatPromptTemplate.from_messages( 23 | [ 24 | ("human", """You are Solar, a smart chatbot by Upstage, loved by many people. 25 | Be smart, cheerful, and fun. Give engaging answers and avoid inappropriate language. 26 | reply in the same language of the user query. 27 | Solar is now being connected with a human. 28 | 29 | Please put in the end of your answer."""), 30 | MessagesPlaceholder("chat_history"), 31 | ("human", "{user_query}"), 32 | ] 33 | ) 34 | 35 | 36 | 37 | def get_response(user_query, chat_history): 38 | chain = chat_with_history_prompt | llm | StrOutputParser() 39 | 40 | response = "" 41 | end_token = "" 42 | for chunk in chain.stream( 43 | { 44 | "chat_history": chat_history, 45 | "user_query": user_query, 46 | } 47 | ): 48 | print(chunk) 49 | response += chunk 50 | end_token += chunk 51 | 52 | if "" in end_token: 53 | response = response.split("")[0] 54 | break 55 | 56 | # Keep only the last 5 characters to check for 57 | end_token = end_token[-5:] 58 | 59 | yield chunk 60 | 61 | yield response 62 | 63 | 64 | # Add these constants for our cold email structure 65 | COLD_EMAIL_TEMPLATE = """You are a professional cold email writer. 66 | Based on the following information, create a compelling cold email: 67 | - Your Company: {company_name} 68 | - Your Product/Service: {product} 69 | - Company Website: {company_url} 70 | - Your Contact Information: {contact_info} 71 | 72 | Target Company Information: 73 | - Company Name: {target_company_name} 74 | - Business Description: {target_business} 75 | - Contact Email: {target_email} 76 | 77 | Use these example emails as reference for tone and structure: 78 | {example_emails} 79 | 80 | Make the email professional, concise, and persuasive. 81 | Include a clear value proposition and call to action. 82 | Always include both the website and contact information in the signature. 83 | End with an invitation to visit our website for more information and to contact us. 84 | reply in the same language of the user query. 85 | 86 | Please put in the end of your answer.""" 87 | 88 | # Add these to track cold email information 89 | if "cold_email_info" not in st.session_state: 90 | st.session_state.cold_email_info = { 91 | "company_name": "Upstage.AI", 92 | "company_url": "https://upstage.ai", 93 | "product": "We specialize in Document AI and Large Language Models (LLMs), offering cutting-edge solutions that combine both technologies. Our products help businesses automate document processing, enhance information extraction, and leverage advanced AI capabilities for improved efficiency and decision-making.", 94 | "contact_info": "contact@upstage.ai", 95 | "target_companies": "Enterprise companies seeking advanced AI solutions for document processing and natural language understanding", 96 | "cold_email_examples": [ 97 | """Subject: Enhancing Coupang's E-commerce Experience with AI Solutions 98 | 99 | Dear Coupang Team, 100 | 101 | I hope this email finds you well. I am reaching out from Upstage.AI, a leading provider of Document AI and Large Language Model solutions, as I believe we could add significant value to Coupang's e-commerce operations. 102 | 103 | Given Coupang's position as South Korea's largest e-commerce platform, I wanted to explore how our AI solutions could enhance your shopping experience. Our technology can help: 104 | 105 | • Improve product search accuracy and recommendations 106 | • Automate product description processing and categorization 107 | • Enhance customer service through advanced AI chatbots 108 | • Streamline document processing for vendor onboarding 109 | 110 | Would you be open to a brief conversation about how these solutions could benefit Coupang's operations? 111 | 112 | To learn more about our solutions, please visit us at https://upstage.ai 113 | 114 | I'm happy to schedule a call or provide more information. You can reach me at contact@upstage.ai. 115 | 116 | Best regards, 117 | Upstage.AI Team""", 118 | """Subject: AI Solutions for Samsung Electronics' Manufacturing Process 119 | 120 | Dear Samsung Electronics Team, 121 | 122 | I'm reaching out from Upstage.AI regarding our advanced AI solutions that could enhance your manufacturing and quality control processes. 123 | 124 | Our Document AI and LLM technologies have helped leading manufacturers: 125 | • Reduce quality inspection time by 60% 126 | • Automate technical documentation processing 127 | • Improve defect detection accuracy by 45% 128 | • Streamline supplier communication and documentation 129 | 130 | Would you be interested in discussing how these solutions could benefit Samsung Electronics' operations? 131 | 132 | Best regards, 133 | Upstage.AI Team""", 134 | """Subject: Revolutionizing Hyundai Motor's Documentation Systems 135 | 136 | Hello Hyundai Motor Team, 137 | 138 | I'm writing from Upstage.AI about our AI-powered document processing solutions that could transform your technical documentation and maintenance manual systems. 139 | 140 | Our technology has demonstrated: 141 | • 75% reduction in manual document processing time 142 | • Enhanced accuracy in multi-language technical documentation 143 | • Automated parts catalog management 144 | • Improved service manual accessibility and searchability 145 | 146 | Could we schedule a brief call to explore how these capabilities align with Hyundai's digital transformation goals? 147 | 148 | Best regards, 149 | Upstage.AI Team""" 150 | ], 151 | "additional_notes": "" 152 | } 153 | 154 | def load_target_companies(): 155 | json_path = Path(__file__).parent / "data" / "target_companies.json" 156 | with open(json_path, 'r') as f: 157 | return json.load(f)['target_companies'] 158 | 159 | def generate_emails(company_info): 160 | target_companies = load_target_companies() 161 | emails = [] 162 | 163 | st.markdown("## Generating Cold Emails") 164 | 165 | for idx, target in enumerate(target_companies, 1): 166 | with st.status(f"📧 Generating email for {target['company_name']} ({idx}/{len(target_companies)})", expanded=True) as status: 167 | status.write("🎯 **Target Company Information**") 168 | status.markdown(f""" 169 | - Company Name: {target['company_name']} 170 | - Main Business: {target['main_business']} 171 | - Contact Email: {target['contact_email']} 172 | """) 173 | 174 | chain = ChatPromptTemplate.from_messages([ 175 | ("human", COLD_EMAIL_TEMPLATE) 176 | ]) | llm | StrOutputParser() 177 | 178 | try: 179 | status.write("⚙️ Generating personalized content...") 180 | 181 | # Filter out empty examples and join them 182 | examples = "\n\nEXAMPLE EMAIL #".join( 183 | [ex for ex in company_info["cold_email_examples"] if ex.strip()] 184 | ) 185 | 186 | response = chain.invoke({ 187 | "company_name": company_info["company_name"], 188 | "product": company_info["product"], 189 | "company_url": company_info["company_url"], 190 | "contact_info": company_info["contact_info"], 191 | "target_company_name": target["company_name"], 192 | "target_business": target["main_business"], 193 | "target_email": target["contact_email"], 194 | "example_emails": f"EXAMPLE EMAIL #1{examples}" 195 | }) 196 | 197 | email_content = response.split("")[0].strip() 198 | emails.append({ 199 | "target_company": target["company_name"], 200 | "email_content": email_content, 201 | "status": "success" 202 | }) 203 | 204 | status.update(label=f"✅ Email generated for {target['company_name']}", state="complete") 205 | status.markdown("#### Generated Email") 206 | status.markdown(f""" 207 |
208 | {email_content} 209 |
210 | """, unsafe_allow_html=True) 211 | st.button( 212 | "📋 Copy to Clipboard", 213 | key=f"copy_{target['company_name']}", 214 | on_click=lambda text=email_content: st.write(text) 215 | ) 216 | 217 | except Exception as e: 218 | emails.append({ 219 | "target_company": target["company_name"], 220 | "email_content": f"Error generating email: {str(e)}", 221 | "status": "error" 222 | }) 223 | with col2: 224 | st.write("❌ Error occurred during generation") 225 | st.error(f"Error: {str(e)}") 226 | 227 | # Show summary statistics at the end 228 | st.markdown("## Summary") 229 | col1, col2, col3 = st.columns(3) 230 | 231 | total_emails = len(emails) 232 | successful_emails = sum(1 for email in emails if email["status"] == "success") 233 | failed_emails = total_emails - successful_emails 234 | 235 | col1.metric("Total Emails", total_emails) 236 | col2.metric("Successful", successful_emails) 237 | col3.metric("Failed", failed_emails) 238 | 239 | return emails 240 | 241 | # Remove the sidebar wrapper and organize content in the main area 242 | st.subheader("Email Generator Settings") 243 | 244 | st.session_state.cold_email_info["company_name"] = st.text_input( 245 | "Your Company Name", 246 | st.session_state.cold_email_info["company_name"] 247 | ) 248 | st.session_state.cold_email_info["company_url"] = st.text_input( 249 | "Company Website URL", 250 | st.session_state.cold_email_info["company_url"] 251 | ) 252 | st.session_state.cold_email_info["product"] = st.text_area( 253 | "Product/Service Description", 254 | st.session_state.cold_email_info["product"], 255 | height=100 256 | ) 257 | st.session_state.cold_email_info["contact_info"] = st.text_input( 258 | "Contact Information", 259 | st.session_state.cold_email_info["contact_info"] 260 | ) 261 | 262 | # Simplified text area inputs with pre-populated examples 263 | st.subheader("Example Emails (Up to 3)") 264 | for i in range(3): 265 | st.session_state.cold_email_info["cold_email_examples"][i] = st.text_area( 266 | f"Example Email {i+1}", 267 | value=st.session_state.cold_email_info["cold_email_examples"][i], 268 | height=200, 269 | key=f"example_email_{i}" 270 | ) 271 | 272 | # Generate button 273 | if st.button("Generate Cold Email", type="primary"): 274 | if not st.session_state.cold_email_info["company_name"]: 275 | st.error("Please enter your company name") 276 | else: 277 | generated_emails = generate_emails(st.session_state.cold_email_info) 278 | 279 | # Display generated emails in the main area with better formatting 280 | st.markdown("## Generated Cold Emails") 281 | 282 | # Create three columns for statistics 283 | col1, col2, col3 = st.columns(3) 284 | 285 | # Calculate statistics 286 | total_emails = len(generated_emails) 287 | successful_emails = sum(1 for email in generated_emails if email["status"] == "success") 288 | failed_emails = total_emails - successful_emails 289 | 290 | # Display statistics in metrics 291 | col1.metric("Total Emails", total_emails) 292 | col2.metric("Successful", successful_emails) 293 | col3.metric("Failed", failed_emails) 294 | 295 | # Display emails with better formatting 296 | for email in generated_emails: 297 | with st.expander(f"📧 {email['target_company']}", expanded=False): 298 | if email["status"] == "success": 299 | st.markdown("### Email Content") 300 | st.markdown(f""" 301 |
302 | {email["email_content"]} 303 |
304 | """, unsafe_allow_html=True) 305 | 306 | # Add copy button 307 | st.button( 308 | "📋 Copy to Clipboard", 309 | key=f"copy2_{email['target_company']}", 310 | on_click=lambda text=email["email_content"]: st.write(text) 311 | ) 312 | else: 313 | st.error(email["email_content"]) 314 | 315 | 316 | -------------------------------------------------------------------------------- /translate.py: -------------------------------------------------------------------------------- 1 | # from https://docs.streamlit.io/develop/tutorials/llms/build-conversational-apps 2 | 3 | import streamlit as st 4 | from langchain_upstage import ChatUpstage as Chat 5 | from langchain_upstage import UpstageDocumentParseLoader 6 | import tempfile, os 7 | import hashlib 8 | import json 9 | import time 10 | import logging 11 | from typing import Dict, Optional, Tuple 12 | from langchain.schema import Document 13 | from pdf_util import is_ocr_pdf, PDFType 14 | # Configure logging 15 | logging.basicConfig(level=logging.INFO) 16 | logger = logging.getLogger(__name__) 17 | 18 | # Page configuration 19 | st.set_page_config( 20 | layout="wide", 21 | page_title="Document Translator Pro", 22 | page_icon="📚", 23 | initial_sidebar_state="collapsed" # Start with collapsed sidebar for cleaner look 24 | ) 25 | 26 | # Modern, clean CSS styling 27 | st.markdown(""" 28 | 105 | """, unsafe_allow_html=True) 106 | 107 | # Initialize translation model 108 | #translation_llm = Chat(model="translation-enko") 109 | translation_llm = Chat(model="solar-pro") 110 | 111 | def translate_to_korean(text: str) -> str: 112 | """ 113 | Translate text to Korean using the translation model with streaming output. 114 | """ 115 | max_retries = 3 116 | for attempt in range(max_retries): 117 | try: 118 | system_prompt = """You are a professional translator specializing in Korean translations. 119 | Follow these guidelines strictly: 120 | 1. Translate the text line by line, maintaining the exact structure 121 | 2. Preserve all HTML tags, formatting, and special characters exactly as they appear 122 | 3. Do not translate: 123 | - HTML tags and attributes 124 | - Acronyms (e.g., PDF, HTML, AI) 125 | - Foreign names and proper nouns 126 | - Technical terms when commonly used in English 127 | 4. Ensure the translation is natural and fluent in Korean while maintaining the original meaning and nuance 128 | 5. Do not skip or drop any content 129 | 130 | Translate the following text to Korean:""" 131 | 132 | messages = [ 133 | {"role": "system", "content": system_prompt}, 134 | {"role": "user", "content": text} 135 | ] 136 | 137 | # Change to streaming response 138 | response_stream = translation_llm.stream(messages) 139 | return response_stream 140 | except Exception as e: 141 | logger.error(f"Translation attempt {attempt + 1} failed: {str(e)}") 142 | if attempt == max_retries - 1: 143 | st.error(f"Translation failed after {max_retries} attempts: {str(e)}") 144 | return f"Translation Error: {str(e)}" 145 | time.sleep(1) 146 | 147 | class FileCache: 148 | def __init__(self): 149 | self.cache = self._load_cache() 150 | self._cleanup_old_entries() 151 | 152 | def _get_cache_path(self): 153 | return ".file_cache.json" 154 | 155 | def _cleanup_old_entries(self, max_age_days=7): 156 | """Remove cache entries older than specified days""" 157 | current_time = time.time() 158 | entries_to_remove = [] 159 | for filename in self.cache: 160 | if 'timestamp' in self.cache[filename]: 161 | age = (current_time - self.cache[filename]['timestamp']) / (24 * 3600) 162 | if age > max_age_days: 163 | entries_to_remove.append(filename) 164 | 165 | for filename in entries_to_remove: 166 | del self.cache[filename] 167 | self._save_cache() 168 | 169 | def _load_cache(self): 170 | try: 171 | if os.path.exists(self._get_cache_path()): 172 | with open(self._get_cache_path(), 'r') as f: 173 | return json.load(f) 174 | except Exception as e: 175 | logger.error(f"Error loading cache: {str(e)}") 176 | return {} 177 | 178 | def _save_cache(self): 179 | try: 180 | with open(self._get_cache_path(), 'w') as f: 181 | json.dump(self.cache, f) 182 | except Exception as e: 183 | logger.error(f"Error saving cache: {str(e)}") 184 | 185 | def get_cached_docs(self, filename: str, content: bytes) -> Optional[list]: 186 | """Check if parsed documents are in cache""" 187 | try: 188 | file_hash = hashlib.sha256(content).hexdigest() 189 | 190 | if filename in self.cache: 191 | cached_data = self.cache[filename] 192 | if cached_data['hash'] == file_hash and 'docs' in cached_data: 193 | return [Document(page_content=doc['page_content'], metadata=doc['metadata']) 194 | for doc in cached_data['docs']] 195 | except Exception as e: 196 | logger.error(f"Error retrieving from cache: {str(e)}") 197 | return None 198 | 199 | def store_docs(self, filename: str, content: bytes, docs: list) -> None: 200 | """Store parsed documents in cache""" 201 | try: 202 | file_hash = hashlib.sha256(content).hexdigest() 203 | serializable_docs = [{'page_content': doc.page_content, 'metadata': doc.metadata} 204 | for doc in docs] 205 | 206 | if filename not in self.cache: 207 | self.cache[filename] = {} 208 | 209 | self.cache[filename].update({ 210 | 'hash': file_hash, 211 | 'docs': serializable_docs, 212 | 'translations': {}, 213 | 'timestamp': time.time() 214 | }) 215 | self._save_cache() 216 | except Exception as e: 217 | logger.error(f"Error storing docs in cache: {str(e)}") 218 | 219 | def get_cached_translation(self, filename: str, page_content: str) -> Optional[str]: 220 | """Get cached translation for a specific page content""" 221 | try: 222 | if filename in self.cache: 223 | page_hash = hashlib.sha256(page_content.encode()).hexdigest() 224 | return self.cache[filename]['translations'].get(page_hash) 225 | except Exception as e: 226 | logger.error(f"Error retrieving translation from cache: {str(e)}") 227 | return None 228 | 229 | def store_translation(self, filename: str, page_content: str, translation: str) -> None: 230 | """Store translation for a specific page content""" 231 | try: 232 | if filename not in self.cache: 233 | self.cache[filename] = {'translations': {}} 234 | 235 | page_hash = hashlib.sha256(page_content.encode()).hexdigest() 236 | if 'translations' not in self.cache[filename]: 237 | self.cache[filename]['translations'] = {} 238 | 239 | self.cache[filename]['translations'][page_hash] = translation 240 | self._save_cache() 241 | except Exception as e: 242 | logger.error(f"Error storing translation in cache: {str(e)}") 243 | 244 | def process_large_document(file_content: bytes) -> list: 245 | """Process large documents safely""" 246 | docs = [] 247 | with tempfile.NamedTemporaryFile(suffix='.pdf', delete=False) as temp_file: 248 | try: 249 | temp_file.write(file_content) 250 | temp_file.flush() 251 | ocr = "auto" if is_ocr_pdf(temp_file.name) == PDFType.DIGITAL else "force" 252 | st.info(f"OCR: {ocr}") 253 | layzer = UpstageDocumentParseLoader(temp_file.name, split="page", coordinates=False, ocr=ocr) 254 | docs = layzer.load() 255 | 256 | return docs 257 | finally: 258 | try: 259 | os.unlink(temp_file.name) # Clean up temp file 260 | except Exception as e: 261 | logger.error(f"Error cleaning up temporary file: {str(e)}") 262 | 263 | def validate_file(uploaded_file) -> Tuple[bool, str]: 264 | """Validate uploaded file""" 265 | if not uploaded_file.name.lower().endswith('.pdf'): 266 | return False, "Please upload a PDF file." 267 | 268 | try: 269 | file_content = uploaded_file.getvalue() 270 | if not file_content.startswith(b'%PDF'): 271 | return False, "Invalid PDF file format." 272 | return True, "" 273 | except Exception as e: 274 | return False, f"File validation error: {str(e)}" 275 | 276 | def initialize_session_state(): 277 | """Initialize all session state variables""" 278 | initial_state = { 279 | 'file_cache': FileCache(), 280 | 'docs': None, 281 | 'translation_complete': False, 282 | 'translated_text': None, 283 | 'current_file': None, 284 | 'translation_progress': 0 285 | } 286 | 287 | for key, value in initial_state.items(): 288 | if key not in st.session_state: 289 | st.session_state[key] = value 290 | 291 | # Initialize session state 292 | initialize_session_state() 293 | 294 | # Main App Header 295 | st.markdown(""" 296 |
297 |

Document Translator Pro (Beta)

298 |

Professional-grade document translation powered by AI

299 |

⚠️ This is a temporary service and may be discontinued without prior notice.

300 |
301 | """, unsafe_allow_html=True) 302 | 303 | # Main content area 304 | tab1, tab2 = st.tabs(["📤 Upload & Translate", "ℹ️ About"]) 305 | 306 | with tab1: 307 | uploaded_file = st.file_uploader( 308 | "Drop your PDF here or click to upload", 309 | type=["pdf"], 310 | help="Maximum file size: 10MB", 311 | accept_multiple_files=False 312 | ) 313 | 314 | if uploaded_file is not None: 315 | is_valid, error_message = validate_file(uploaded_file) 316 | if not is_valid: 317 | st.error(f"📤 {error_message}") 318 | else: 319 | st.success(f"📤 File '{uploaded_file.name}' uploaded successfully!") 320 | 321 | if uploaded_file and not uploaded_file.name in st.session_state: 322 | with st.status("Processing document...", expanded=True) as status: 323 | try: 324 | file_content = uploaded_file.getvalue() 325 | 326 | # Check document cache first 327 | cached_docs = st.session_state.file_cache.get_cached_docs(uploaded_file.name, file_content) 328 | 329 | if cached_docs is not None: 330 | status.update(label="📑 Loading from cache...") 331 | st.session_state.docs = cached_docs 332 | st.session_state[uploaded_file.name] = True 333 | st.session_state.translation_complete = False 334 | st.success("✅ Document loaded from cache") 335 | else: 336 | status.update(label="📑 Analyzing document structure...") 337 | docs = process_large_document(file_content) 338 | 339 | # Store parsed docs in cache 340 | st.session_state.file_cache.store_docs(uploaded_file.name, file_content, docs) 341 | 342 | st.session_state.docs = docs 343 | st.session_state[uploaded_file.name] = True 344 | st.session_state.translation_complete = False 345 | 346 | st.success("✅ Document ready for translation") 347 | except Exception as e: 348 | st.error(f"Error processing document: {str(e)}") 349 | logger.error(f"Document processing error: {str(e)}") 350 | 351 | # Translation Display 352 | translations = [] 353 | if st.session_state.docs: 354 | if not st.session_state.translation_complete: 355 | st.markdown('
', unsafe_allow_html=True) 356 | progress_bar = st.progress(0) 357 | st.markdown('
', unsafe_allow_html=True) 358 | 359 | for i, doc in enumerate(st.session_state.docs): 360 | st.markdown(f'
', unsafe_allow_html=True) 361 | st.markdown(f'
Page {i+1}
', unsafe_allow_html=True) 362 | 363 | col1, col2 = st.columns(2) 364 | with col1: 365 | st.markdown('
', unsafe_allow_html=True) 366 | st.markdown("**Original Text**") 367 | st.markdown(doc.page_content, unsafe_allow_html=True) 368 | st.markdown('
', unsafe_allow_html=True) 369 | 370 | with col2: 371 | st.markdown('
', unsafe_allow_html=True) 372 | st.markdown("**Translated Text**") 373 | 374 | empty_container = st.empty() 375 | with st.spinner(""): 376 | try: 377 | # Check cache first 378 | cached_translation = st.session_state.file_cache.get_cached_translation( 379 | uploaded_file.name, 380 | doc.page_content 381 | ) 382 | 383 | if cached_translation is not None: 384 | translated_content = cached_translation 385 | st.markdown(translated_content, unsafe_allow_html=True) 386 | else: 387 | # Stream translation if not in cache 388 | response_stream = translate_to_korean(doc.page_content) 389 | translated_content = "" 390 | 391 | # Use write_stream for streaming output 392 | for chunk in response_stream: 393 | if chunk and chunk.content: 394 | translated_content += chunk.content 395 | empty_container.markdown(translated_content, unsafe_allow_html=True) 396 | 397 | # Store complete translation in cache 398 | st.session_state.file_cache.store_translation( 399 | uploaded_file.name, 400 | doc.page_content, 401 | translated_content 402 | ) 403 | 404 | translations.append(translated_content) 405 | except Exception as e: 406 | error_message = f"Translation error on page {i+1}: {str(e)}" 407 | st.error(error_message) 408 | logger.error(error_message) 409 | translations.append(f"Error: {str(e)}") 410 | 411 | st.markdown('
', unsafe_allow_html=True) 412 | 413 | st.markdown('
', unsafe_allow_html=True) 414 | 415 | if not st.session_state.translation_complete: 416 | progress_bar.progress((i + 1) / len(st.session_state.docs)) 417 | 418 | st.session_state.translation_complete = True 419 | 420 | # Create HTML content for download 421 | if translations: 422 | html_content = """ 423 | 424 | 425 | 431 | 432 | 433 | """ 434 | 435 | for i, translation in enumerate(translations): 436 | html_content += f""" 437 |
438 |
Page {i+1}
439 |
{translation}
440 |
441 | """ 442 | 443 | html_content += "" 444 | 445 | # Add download button 446 | download_filename = os.path.splitext(uploaded_file.name)[0] + '.translated.html' 447 | st.download_button( 448 | label="📥 Download Translation as HTML", 449 | data=html_content, 450 | file_name=download_filename, 451 | mime="text/html" 452 | ) 453 | 454 | with tab2: 455 | st.markdown(""" 456 | ### About Document Translator Pro (Beta) 457 | 458 | Our professional document translation service uses state-of-the-art AI technology to provide: 459 | 460 | - ⚡ Fast and accurate translations 461 | - 📄 Support for PDF documents 462 | - 🔒 Secure document handling 463 | - 💯 High-quality output 464 | 465 | > ⚠️ **Please Note**: This is a temporary service and may be discontinued without prior notice. 466 | 467 | ### How to Use 468 | 469 | 1. Upload your PDF document using the upload tab 470 | 2. Wait for the automatic translation process 471 | 3. Review the side-by-side translation 472 | 4. Download the translated document 473 | 474 | ### Limitations 475 | 476 | - Maximum file size: 10MB 477 | - Maximum pages per document: 50 478 | - Supported file format: PDF only 479 | """) 480 | 481 | # Footer 482 | st.markdown(""" 483 | 487 | """, unsafe_allow_html=True) 488 | -------------------------------------------------------------------------------- /gemini.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | import re 4 | from typing import Dict, List, Any 5 | from google import genai 6 | from google.genai.types import Tool, GenerateContentConfig, GoogleSearch 7 | import streamlit as st 8 | import json 9 | from langchain_upstage import ChatUpstage 10 | from langchain_core.output_parsers import StrOutputParser 11 | from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder 12 | from langchain_core.messages import AIMessage, HumanMessage 13 | import urllib.parse 14 | from tinydb import TinyDB, Query 15 | from datetime import datetime, timedelta 16 | import hashlib 17 | import time 18 | 19 | 20 | def format_output(): 21 | """Create color formatting functions for console output""" 22 | colors = { 23 | "blue": "\033[34m", 24 | "green": "\033[32m", 25 | "yellow": "\033[33m", 26 | "red": "\033[31m", 27 | "reset": "\033[0m", 28 | } 29 | 30 | return { 31 | "info": lambda text: f"{colors['blue']}{text}{colors['reset']}", 32 | "success": lambda text: f"{colors['green']}{text}{colors['reset']}", 33 | "highlight": lambda text: f"{colors['yellow']}{text}{colors['reset']}", 34 | "error": lambda text: f"{colors['red']}{text}{colors['reset']}", 35 | } 36 | 37 | 38 | def format_response_to_markdown(text: str) -> str: 39 | """Format the AI response into markdown""" 40 | # Ensure consistent newlines 41 | processed_text = text.replace("\r\n", "\n") 42 | 43 | # Process main sections (simplified regex) 44 | processed_text = re.sub( 45 | r"^(\w[^:]+):(\s*)", r"## \1\2", processed_text, flags=re.MULTILINE 46 | ) 47 | 48 | # Process sub-sections (simplified regex without look-behind) 49 | lines = processed_text.split("\n") 50 | processed_lines = [] 51 | for line in lines: 52 | if re.match(r"^(\w[^:]+):(?!\d)", line): 53 | line = "### " + line 54 | processed_lines.append(line) 55 | processed_text = "\n".join(processed_lines) 56 | 57 | # Process bullet points 58 | processed_text = re.sub(r"^[•●○]\s*", "* ", processed_text, flags=re.MULTILINE) 59 | 60 | # Split into paragraphs and process 61 | paragraphs = [p for p in processed_text.split("\n\n") if p] 62 | formatted_paragraphs = [] 63 | for p in paragraphs: 64 | if any(p.startswith(prefix) for prefix in ["#", "*", "-"]): 65 | formatted_paragraphs.append(p) 66 | else: 67 | formatted_paragraphs.append(f"{p}\n") 68 | 69 | return "\n\n".join(formatted_paragraphs) 70 | 71 | 72 | def get_cache_db(): 73 | """Initialize TinyDB database for caching with error handling""" 74 | try: 75 | return TinyDB('search_cache.json') 76 | except json.JSONDecodeError: 77 | # If cache is corrupted, delete it and create new 78 | try: 79 | os.remove('search_cache.json') 80 | except OSError: 81 | pass 82 | return TinyDB('search_cache.json') 83 | 84 | 85 | def safe_cache_operation(func): 86 | """Decorator to safely handle cache operations""" 87 | def wrapper(*args, **kwargs): 88 | try: 89 | return func(*args, **kwargs) 90 | except (json.JSONDecodeError, OSError): 91 | # If any cache operation fails, delete cache and retry once 92 | try: 93 | os.remove('search_cache.json') 94 | except OSError: 95 | pass 96 | # Return None to indicate cache miss 97 | return None 98 | return wrapper 99 | 100 | 101 | @safe_cache_operation 102 | def get_cached_result(db, Entry, cache_key): 103 | """Safely get cached result""" 104 | try: 105 | return db.get(Entry.cache_key == cache_key) 106 | except: 107 | return None 108 | 109 | 110 | def generate_cache_key(query: str) -> str: 111 | """Generate a consistent cache key for a query""" 112 | return hashlib.md5(query.encode()).hexdigest() 113 | 114 | 115 | def is_cache_valid(timestamp: str, hours: int = 1) -> bool: 116 | """Check if cached data is still valid""" 117 | cached_time = datetime.fromisoformat(timestamp) 118 | return datetime.now() - cached_time < timedelta(hours=hours) 119 | 120 | 121 | def search(keyword: str, prompt: str="") -> Dict[str, Any]: 122 | """Perform a search using Google's Generative AI with caching""" 123 | # Initialize cache 124 | db = get_cache_db() 125 | cache_key = generate_cache_key(keyword) 126 | Entry = Query() 127 | 128 | # Check cache first with error handling 129 | cached_result = get_cached_result(db, Entry, cache_key) 130 | if cached_result and is_cache_valid(cached_result['timestamp']): 131 | return cached_result['data'] 132 | 133 | # Original search logic 134 | 135 | # Initialize the Google Generative AI client 136 | client = genai.Client(api_key=os.getenv("GOOGLE_API_KEY")) 137 | model_id = "gemini-2.0-flash" 138 | 139 | # Configure Google Search tool 140 | google_search_tool = Tool(google_search=GoogleSearch()) 141 | 142 | # Generate content 143 | response = client.models.generate_content( 144 | model=model_id, 145 | contents=prompt + keyword, 146 | config=GenerateContentConfig( 147 | tools=[google_search_tool], 148 | ), 149 | ) 150 | 151 | # Extract text from the first candidate's content 152 | if response.candidates and response.candidates[0].content.parts: 153 | text = response.candidates[0].content.parts[0].text 154 | else: 155 | raise Exception("No content found in response") 156 | 157 | # Extract sources from grounding metadata 158 | sources = [] 159 | if hasattr(response.candidates[0], "grounding_metadata"): 160 | metadata = response.candidates[0].grounding_metadata 161 | 162 | 163 | # Create a mapping of chunk indices to web sources 164 | web_sources = {} 165 | if metadata.grounding_chunks: 166 | for i, chunk in enumerate(metadata.grounding_chunks): 167 | if chunk.web: 168 | web_sources[i] = { 169 | "title": chunk.web.title, 170 | "url": chunk.web.uri, 171 | "contexts": [], 172 | } 173 | 174 | # st.json(metadata) 175 | 176 | # Add text segments to corresponding sources 177 | if metadata.grounding_supports: 178 | for support in metadata.grounding_supports: 179 | for chunk_idx in support.grounding_chunk_indices: 180 | if chunk_idx in web_sources: 181 | web_sources[chunk_idx]["contexts"].append( 182 | { 183 | "text": support.segment.text, 184 | "confidence": support.confidence_scores[0], 185 | } 186 | ) 187 | 188 | # Convert to list and filter out sources with no contexts 189 | sources = [source for source in web_sources.values() if source["contexts"]] 190 | 191 | formatted_text = format_response_to_markdown(text) 192 | 193 | # Store result in cache with error handling 194 | try: 195 | cache_data = { 196 | 'cache_key': cache_key, 197 | 'data': { 198 | "summary": formatted_text, 199 | "sources": sources, 200 | "query": keyword, 201 | "web_search_query": metadata.web_search_queries, 202 | }, 203 | 'timestamp': datetime.now().isoformat() 204 | } 205 | db.upsert(cache_data, Entry.cache_key == cache_key) 206 | except: 207 | # If cache write fails, continue without caching 208 | pass 209 | 210 | return cache_data['data'] 211 | 212 | 213 | 214 | def generate_search_query(keyword: str, results: str) -> List[str]: 215 | """Generate search queries with caching""" 216 | # Initialize cache 217 | db = get_cache_db() 218 | cache_key = generate_cache_key(f"suggestions_{keyword}") 219 | Entry = Query() 220 | 221 | # Check cache first 222 | cached_result = get_cached_result(db, Entry, cache_key) 223 | if cached_result and is_cache_valid(cached_result['timestamp']): 224 | return cached_result['data'] 225 | 226 | # Original suggestion generation logic 227 | try: 228 | llm = ChatUpstage(model="solar-mini", model_kwargs={"response_format":{"type":"json_object"}}) 229 | llm = ChatUpstage(model="solar-mini") 230 | 231 | prompt = ChatPromptTemplate.from_messages( 232 | [ 233 | ( 234 | "system", 235 | """You are a helpful assistant that generates search queries based on a user's query and the results of a previous search. 236 | Always return a JSON object with a "suggestions" array containing 3-5 search queries. 237 | IMPORTANT: You must detect the language of the input query and respond STRICTLY in the SAME LANGUAGE. 238 | - If the input query is in Korean, you MUST generate Korean search queries only 239 | - If the input query is in English, you MUST generate English search queries only 240 | 241 | Example 1 (Korean query -> Korean response): 242 | Input: "엔비디아 최신 뉴스" 243 | Output: {{"suggestions": ["엔비디아 주가 현황", "엔비디아 신제품 출시 2024", "엔비디아 AI 개발 현황", "엔비디아 최신 파트너십"]}} 244 | 245 | Example 2 (English query -> English response): 246 | Input: "latest nvidia news" 247 | Output: {{"suggestions": ["nvidia stock price today", "nvidia new product announcements 2024", "nvidia AI developments", "nvidia partnerships latest"]}} 248 | 249 | Remember: The response language MUST MATCH the input query language.""", 250 | ), 251 | ("user", "User query: {keyword}\nPrevious search results: {results}"), 252 | ( 253 | "user", 254 | "Generate a JSON array of 3-5 new search queries that would help find more relevant information.", 255 | ), 256 | ] 257 | ) 258 | chain = prompt | llm | StrOutputParser() 259 | response = chain.invoke({"keyword": keyword, "results": results}) 260 | 261 | # Ensure the response is properly parsed as JSON and handle slicing safely 262 | try: 263 | response_json = json.loads(response) 264 | queries = response_json.get("suggestions", []) 265 | return queries if isinstance(queries, list) else [keyword] 266 | except json.JSONDecodeError: 267 | return [keyword] 268 | 269 | # Store suggestions in cache before returning 270 | cache_data = { 271 | 'cache_key': cache_key, 272 | 'data': queries, 273 | 'timestamp': datetime.now().isoformat() 274 | } 275 | db.upsert(cache_data, Entry.cache_key == cache_key) 276 | 277 | return queries 278 | except json.JSONDecodeError: 279 | return [keyword] 280 | 281 | 282 | def generate_quick_answer(keyword: str, results: str) -> str: 283 | """Generate a one-line quick answer with caching""" 284 | # Initialize cache 285 | db = get_cache_db() 286 | cache_key = generate_cache_key(f"quick_answer_{keyword}") 287 | Entry = Query() 288 | 289 | # Check cache first 290 | cached_result = get_cached_result(db, Entry, cache_key) 291 | if cached_result and is_cache_valid(cached_result['timestamp']): 292 | return cached_result['data'] 293 | 294 | try: 295 | llm = ChatUpstage(model="solar-pro", model_kwargs={"response_format":{"type":"json_object"}}) 296 | llm = ChatUpstage(model="solar-mini") 297 | 298 | prompt = ChatPromptTemplate.from_messages([ 299 | ( 300 | "system", 301 | """You are a helpful assistant that generates concise, one-line answers based on search results. 302 | Always return a JSON object with a "quick_answer" string containing a direct, factual response. 303 | IMPORTANT: You must detect the language of the input query and respond STRICTLY in the SAME LANGUAGE. 304 | - If the input query is in Korean, respond in Korean 305 | - If the input query is in English, respond in English 306 | 307 | The answer should be: 308 | 1. No more than 20 words 309 | 2. Direct and informative 310 | 3. Based on the most recent/relevant information from results 311 | 4. In the same language as the query 312 | 313 | Example 1 (Korean query -> Korean response): 314 | Input: "User query: 현재 비트코인 가격은?\nSearch results: 비트코인이 최근 강세를 보이며 현재 67,000달러 선에서 거래되고 있습니다. 이는 작년 대비 150% 상승한 수치이며, 전문가들은 연말까지 추가 상승 가능성을 전망하고 있습니다. 특히 최근 비트코인 ETF 승인 이후 기관 투자자들의 관심이 높아지면서 가격 상승세가 지속되고 있습니다." 315 | Output: {{"quick_answer": "비트코인은 현재 67,000달러 선에서 거래되고 있습니다."}} 316 | 317 | Example 2 (English query -> English response): 318 | Input: "User query: What is Bitcoin's price?\nSearch results: Bitcoin continues its bullish trend, currently trading at around $67,000. This represents a 150% increase from last year, with experts predicting further gains by year-end. The recent approval of Bitcoin ETFs has particularly attracted institutional investors, contributing to the sustained price momentum." 319 | Output: {{"quick_answer": "Bitcoin is currently trading at around $67,000."}}""", 320 | ), 321 | ("user", "User query: {keyword}\nSearch results: {results}"), 322 | ("user", "Generate a one-line quick answer based on the search results."), 323 | ]) 324 | 325 | chain = prompt | llm | StrOutputParser() 326 | response = chain.invoke({"keyword": keyword, "results": results}) 327 | 328 | try: 329 | response_json = json.loads(response) 330 | quick_answer = response_json.get("quick_answer", "") 331 | 332 | # Store answer in cache 333 | cache_data = { 334 | 'cache_key': cache_key, 335 | 'data': quick_answer, 336 | 'timestamp': datetime.now().isoformat() 337 | } 338 | db.upsert(cache_data, Entry.cache_key == cache_key) 339 | 340 | return quick_answer 341 | except json.JSONDecodeError: 342 | return "" 343 | 344 | except Exception as e: 345 | print(f"Quick answer generation error: {e}") 346 | return "" 347 | 348 | def show_sources(result:Dict[str, Any]) -> None: 349 | # Sources with improved design 350 | if result.get("sources"): 351 | sources = [s for s in result["sources"] if s.get("title") and s.get("url")] 352 | if sources: 353 | st.markdown("### Sources") 354 | for idx, source in enumerate(sources, 1): 355 | content = " ".join([context["text"] for context in source["contexts"]])[:200] + "..." 356 | st.markdown( 357 | f""" 358 |
359 |
360 | {idx} 361 | 362 | {source['title']} 363 | 364 |
365 |
366 | {content} 367 |
368 |
369 | """, 370 | unsafe_allow_html=True 371 | ) 372 | 373 | 374 | def sources_to_citations(result:Dict[str, Any]) -> None: 375 | if result.get("sources"): 376 | sources = [s for s in result["sources"] if s.get("title") and s.get("url")] 377 | if sources: 378 | citations = [] 379 | for idx, source in enumerate(sources, 1): 380 | content = " ".join([context["text"] for context in source["contexts"]]) 381 | citations.append(f"{idx}. {source['title']}: {content}\n\n") 382 | 383 | return "\n\n".join(citations) 384 | 385 | def get_full_sources(search_query: str, result: Dict[str, Any]) -> None: 386 | """ 387 | Query for full list of sources and display them with improved design. 388 | 389 | This function: 390 | 1. Performs a reference search to get the full sources data 391 | 2. Displays the resulting JSON for debugging/visualization 392 | 3. Filters and displays the sources (if available) in a formatted manner 393 | """ 394 | # Generate the reference query that asks for full, unmodified content details. 395 | ref_query = ( 396 | "For a given query and provided search results, analyze and return a JSON object containing the full list of sources.\n" 397 | "The output should be in the following format:\n" 398 | "{\n" 399 | ' "sources": [\n' 400 | " {\n" 401 | ' "url": "source URL",\n' 402 | ' "title": "source title",\n' 403 | ' "content": "full original content without modifications or summaries"\n' 404 | " }\n" 405 | " ]\n" 406 | "}\n\n" 407 | "Important: Return the content exactly as provided in the source, without summarization or modification.\n\n" 408 | "Query: " + search_query 409 | ) 410 | # Perform the reference search using the global search function. 411 | ref_result = search(ref_query) 412 | st.json(ref_result) 413 | 414 | # Check if sources are available in the main result. 415 | if result.get("sources"): 416 | # Filter out any sources that don't have both a title and URL. 417 | sources = [s for s in result["sources"] if s.get("title") and s.get("url")] 418 | if sources: 419 | st.markdown("### Sources") 420 | # Enumerate over the valid sources and display each one. 421 | for idx, source in enumerate(sources, 1): 422 | content = " ".join([context["text"] for context in source["contexts"]])[:200] + "..." 423 | st.markdown( 424 | f""" 425 |
426 |
427 | {idx} 428 | 429 | {source['title']} 430 | 431 |
432 |
433 | {content} 434 |
435 |
436 | """, 437 | unsafe_allow_html=True 438 | ) 439 | 440 | 441 | def perform_search_and_display(search_query: str, is_suggestion: bool = False) -> None: 442 | """ 443 | Perform search and display results with enhanced source list design 444 | """ 445 | 446 | # Add share button 447 | share_url = f"?q={urllib.parse.quote(search_query)}" 448 | st.markdown( 449 | f""" 450 | 455 | """, 456 | unsafe_allow_html=True 457 | ) 458 | 459 | # CSS with improved source list styling 460 | st.markdown(""" 461 | 535 | """, unsafe_allow_html=True) 536 | 537 | 538 | web_search_query_spot = st.empty() 539 | summary_spot = st.empty() 540 | result_spot = st.empty() 541 | suggested_queries_spot = st.empty() 542 | 543 | # Main search 544 | with st.spinner("Searching..."): 545 | result = search(search_query) 546 | 547 | # Search queries (only if there are queries) 548 | if result.get("web_search_query"): 549 | with web_search_query_spot.expander("🔍 Search queries used", expanded=False): 550 | st.markdown(""" 551 | 562 | """, unsafe_allow_html=True) 563 | 564 | for query in result["web_search_query"]: 565 | st.markdown(f'
{query}
', unsafe_allow_html=True) 566 | 567 | if result["summary"]: 568 | result_spot.markdown(result["summary"]) 569 | 570 | 571 | show_sources(result) 572 | 573 | citations = sources_to_citations(result) 574 | citation_added_text = fill_citations(result["summary"], citations) 575 | result_spot.markdown(citation_added_text) 576 | 577 | # Quick answer (if available) 578 | quick_answer = generate_quick_answer(search_query, result["summary"]) 579 | if quick_answer: 580 | summary_spot.markdown( 581 | f'
{quick_answer}
', 582 | unsafe_allow_html=True 583 | ) 584 | 585 | 586 | # Related searches (only if there are suggestions) 587 | suggested_queries = generate_search_query(search_query, result["summary"]) 588 | if suggested_queries and len(suggested_queries) > 0: 589 | cols = suggested_queries_spot.columns(min(len(suggested_queries[:3]), 3)) 590 | for col, query in zip(cols, suggested_queries[:3]): 591 | col.markdown( 592 | f'{query}', 593 | unsafe_allow_html=True 594 | ) 595 | 596 | 597 | def fill_citations(text: str, citations: list) -> str: 598 | """Add citation numbers to text based on provided citations list. 599 | 600 | Args: 601 | text: The original text to add citations to 602 | citations: List of citation objects with text content to match 603 | 604 | Returns: 605 | Text with citation numbers added in [n] format 606 | """ 607 | llm = ChatUpstage(model="solar-pro") 608 | prompt = ChatPromptTemplate.from_messages([ 609 | ( 610 | "system", 611 | """You are a citation assistant. Your task is to add citation numbers to text by matching content with provided citations. 612 | 613 | Rules: 614 | 1. Do not modify the original text 615 | 2. Only add citation numbers in [n] format where appropriate 616 | 3. Add citations where text closely matches citation content 617 | 4. Multiple citations can be added to the same statement if relevant [1,2] 618 | 5. Citations should be added at the end of relevant sentences or claims 619 | 620 | Example: 621 | Text: "The sky is blue due to Rayleigh scattering. This effect causes shorter wavelengths to scatter more." 622 | Citations: 623 | 1. "Rayleigh scattering explains the blue color of the sky" 624 | 2. "Short wavelength blue light is scattered more by the atmosphere" 625 | 626 | Output: "The sky is blue due to Rayleigh scattering [1]. This effect causes shorter wavelengths to scatter more [2]." 627 | """, 628 | ), 629 | ("user", "Text: {text}\nCitations: {citations}\nAdd appropriate citation numbers to the text while preserving the original content exactly."), 630 | ]) 631 | 632 | chain = prompt | llm | StrOutputParser() 633 | return chain.invoke({"text": text, "citations": citations}) 634 | 635 | def main(): 636 | """Main function to run the Streamlit app""" 637 | st.set_page_config(page_title="Search Up", layout="wide") 638 | 639 | # Add title and subtitle 640 | st.markdown(""" 641 |

SearchUp

642 |

643 | powered by Google, Gemini, and Solar 644 |

645 | """, unsafe_allow_html=True) 646 | 647 | # Custom CSS for the UI, including improved share button styling 648 | st.markdown(""" 649 | 713 | """, unsafe_allow_html=True) 714 | 715 | # Search bar layout - input field and search button 716 | search_col1, search_col2 = st.columns([3, 1]) 717 | with search_col1: 718 | search_input = st.text_input( 719 | "", 720 | st.query_params.get("q", ""), 721 | placeholder="Search anything...", 722 | key="search_input" 723 | ) 724 | # Synchronize the session state with URL parameter "q" 725 | if st.session_state.get("search_input"): 726 | if st.session_state["search_input"] != st.query_params.get("q", ""): 727 | st.query_params["q"] = st.session_state["search_input"] 728 | st.rerun() 729 | 730 | with search_col2: 731 | st.markdown("
", unsafe_allow_html=True) 732 | if st.button("Search"): 733 | st.query_params["q"] = st.session_state["search_input"] 734 | st.rerun() 735 | 736 | # Only perform search if the URL contains a non-empty 'q' parameter 737 | if "q" in st.query_params: 738 | search_query = st.query_params["q"] 739 | if not search_query.strip(): 740 | st.warning("Please enter a search keyword to begin.") 741 | else: 742 | perform_search_and_display(search_query) 743 | 744 | 745 | if __name__ == "__main__": 746 | main() -------------------------------------------------------------------------------- /llama.py: -------------------------------------------------------------------------------- 1 | # from https://docs.streamlit.io/develop/tutorials/llms/build-conversational-apps 2 | 3 | import streamlit as st 4 | 5 | from pydantic import BaseModel, Field 6 | 7 | from langchain_groq import ChatGroq as Chat 8 | from langchain_community.tools import DuckDuckGoSearchResults 9 | 10 | 11 | from langchain_core.output_parsers import StrOutputParser 12 | from langchain_core.prompts import ( 13 | ChatPromptTemplate, 14 | MessagesPlaceholder, 15 | PromptTemplate, 16 | ) 17 | from langchain_core.output_parsers import JsonOutputParser 18 | from langchain_core.messages import AIMessage, HumanMessage 19 | 20 | MAX_TOKENS = 40000 21 | MAX_SEAERCH_RESULTS = 5 22 | 23 | MODEL_NAME = "llama-3.1-70b-versatile" 24 | 25 | llm = Chat(model=MODEL_NAME) 26 | 27 | ddg_search = DuckDuckGoSearchResults() 28 | 29 | 30 | st.set_page_config(page_title="Llama Reasoning", page_icon="🤔") 31 | st.title("Llama 3.1 70B Reasoning") 32 | 33 | reasoning_examples = """ 34 | --- 35 | Human: Given Instruction, please generate {what}. Please use the following exampels. 36 | If reasoning and/or reasoning chains are provided, please use them as context to generate the {what}. 37 | Please only generate the {what} and do not include others. 38 | 39 | See the examples below: 40 | ---- 41 | Example 1: 42 | 43 | Instruction: If a die is rolled three times, what is the probability of getting a sum of 11? None 44 | 45 | Reasoning: 1. Understand the problem: We need to find the probability of getting a sum of 11 when rolling a die three times. 46 | 2. Calculate total possible outcomes: A die has 6 faces, so for each roll, there are 6 possibilities. For three rolls, the total possible outcomes are 6^3 = 216. 47 | 3. Identify favorable outcomes: List all combinations of rolls that result in a sum of 11. There are 18 such combinations. 48 | 4. Calculate probability: Divide the number of favorable outcomes by the total possible outcomes: 18 / 216 = 1/12. 49 | 5. Conclusion: The probability of getting a sum of 11 when rolling a die three times is 1/12. 50 | 51 | Reasoning Chains: [{'step': 1, 'thought': 'Understand the problem: We need to find the probability of getting a sum of 11 when rolling a die three times.'}, {'step': 2, 'thought': 'Calculate total possible outcomes: A die has 6 faces, so for each roll, there are 6 possibilities. For three rolls, the total possible outcomes are 6^3 = 216.'}, {'step': 3, 'thought': 'Identify favorable outcomes: List all combinations of rolls that result in a sum of 11. There are 18 such combinations.'}, {'step': 4, 'thought': 'Calculate probability: Divide the number of favorable outcomes by the total possible outcomes: 18 / 216 = 1/12.'}, {'step': 5, 'thought': 'Conclusion: The probability of getting a sum of 11 when rolling a die three times is 1/12.'}] 52 | ---- 53 | Example 2: 54 | 55 | Instruction: The interactions will be about the science behind culinary techniques. The setting is a cooking class where three friends are discussing various aspects of cooking and sharing their knowledge. 56 | - USER/Jane: A curious learner who wants to understand the science behind cooking 57 | - Mike: An experienced home cook with a passion for experimenting in the kitchen 58 | - Sarah: A food scientist who loves explaining the chemistry behind different cooking processes 59 | 60 | None 61 | 62 | Reasoning: 1. Start with the given setting: a cooking class with three friends discussing the science behind culinary techniques. 63 | 2. Introduce the topic of resting meat after cooking, with Mike asking Jane if she's ever wondered about it. 64 | 3. Have Sarah explain the science behind resting meat, mentioning denatured proteins and juice redistribution. 65 | 4. Address the user's question about resting meat, with Sarah confirming that it allows juices to redistribute. 66 | 5. Move on to the topic of adding salt to water, with Mike mentioning its effect on boiling point. 67 | 6. Have Sarah explain the science behind salt's effect on boiling point, mentioning the higher temperature required for boiling. 68 | 7. Address the user's question about cooking speed, with Sarah explaining that it's slightly faster due to the hotter water. 69 | 8. Introduce the topic of acids in cooking, with Mike mentioning their use in brightening dishes. 70 | 9. Have Sarah explain the science behind acids' effects on flavor and tenderizing meats. 71 | 10. Address the user's question about baking, with Mike mentioning the science involved in baking and Sarah explaining the role of gluten and leavening agents. 72 | 11. Conclude the conversation with the characters expressing their fascination with the science behind cooking and their excitement to continue learning and experimenting. 73 | 74 | Reasoning Chains: [{'step': 1, 'thought': 'Start with the given setting: a cooking class with three friends discussing the science behind culinary techniques.'}, {'step': 2, 'thought': "Introduce the topic of resting meat after cooking, with Mike asking Jane if she's ever wondered about it."}, {'step': 3, 'thought': 'Have Sarah explain the science behind resting meat, mentioning denatured proteins and juice redistribution.'}, {'step': 4, 'thought': "Address the user's question about resting meat, with Sarah confirming that it allows juices to redistribute."}, {'step': 5, 'thought': 'Move on to the topic of adding salt to water, with Mike mentioning its effect on boiling point.'}, {'step': 6, 'thought': "Have Sarah explain the science behind salt's effect on boiling point, mentioning the higher temperature required for boiling."}, {'step': 7, 'thought': "Address the user's question about cooking speed, with Sarah explaining that it's slightly faster due to the hotter water."}, {'step': 8, 'thought': 'Introduce the topic of acids in cooking, with Mike mentioning their use in brightening dishes.'}, {'step': 9, 'thought': "Have Sarah explain the science behind acids' effects on flavor and tenderizing meats."}, {'step': 10, 'thought': "Address the user's question about baking, with Mike mentioning the science involved in baking and Sarah explaining the role of gluten and leavening agents."}, {'step': 11, 'thought': 'Conclude the conversation with the characters expressing their fascination with the science behind cooking and their excitement to continue learning and experimenting.'}] 75 | ---- 76 | Example 3: 77 | 78 | Instruction: Create a C++ program that connects to a Cassandra database and performs basic CRUD operations (Create, Read, Update, Delete) on a table containing employee information (ID, name, department, salary). Use prepared statements for queries and ensure proper error handling. 79 | None 80 | 81 | Reasoning: 1. Understand the instruction: Create a C++ program that connects to a Cassandra database and performs basic CRUD operations on a table containing employee information using prepared statements and proper error handling. 82 | 2. Identify required libraries: Include the necessary libraries for connecting to Cassandra and handling errors. 83 | 3. Establish a connection to the Cassandra cluster: Create a cluster and session object, set the contact points, and connect to the cluster. 84 | 4. Create a keyspace and table for employee information: Write the CQL queries for creating the keyspace and table, execute them, and handle any errors. 85 | 5. Prepare CRUD statements: Write the CQL queries for insert, select, update, and delete operations, and prepare them using the Cassandra session. 86 | 6. Perform basic CRUD operations using prepared statements: 87 | a. Insert an employee record: Generate a UUID for the employee ID, bind the prepared insert statement with the employee data, and execute the query. 88 | b. Read the inserted employee record: Bind the prepared select statement with the employee ID, execute the query, and display the employee information. 89 | c. Update the employee's salary: Bind the prepared update statement with the new salary and employee ID, and execute the query. 90 | d. Delete the employee record: Bind the prepared delete statement with the employee ID, and execute the query. 91 | 7. Handle errors: Check the error codes for each query execution and print error messages if necessary. 92 | 8. Clean up and close the connection: Free the prepared statements, UUID generator, and close the session and cluster objects. 93 | 9. Compile and run the program: Provide instructions for installing the DataStax C/C++ driver, compiling the program, and running it with a local Cassandra cluster. 94 | 95 | Reasoning Chains: [{'step': 1, 'thought': 'Understand the instruction: Create a C++ program that connects to a Cassandra database and performs basic CRUD operations on a table containing employee information using prepared statements and proper error handling.'}, {'step': 2, 'thought': 'Identify required libraries: Include the necessary libraries for connecting to Cassandra and handling errors.'}, {'step': 3, 'thought': 'Establish a connection to the Cassandra cluster: Create a cluster and session object, set the contact points, and connect to the cluster.'}, {'step': 4, 'thought': 'Create a keyspace and table for employee information: Write the CQL queries for creating the keyspace and table, execute them, and handle any errors.'}, {'step': 5, 'thought': 'Prepare CRUD statements: Write the CQL queries for insert, select, update, and delete operations, and prepare them using the Cassandra session.'}, {'step': 6, 'thought': "Perform basic CRUD operations using prepared statements:\n a. Insert an employee record: Generate a UUID for the employee ID, bind the prepared insert statement with the employee data, and execute the query.\n b. Read the inserted employee record: Bind the prepared select statement with the employee ID, execute the query, and display the employee information.\n c. Update the employee's salary: Bind the prepared update statement with the new salary and employee ID, and execute the query.\n d. Delete the employee record: Bind the prepared delete statement with the employee ID, and execute the query."}, {'step': 7, 'thought': 'Handle errors: Check the error codes for each query execution and print error messages if necessary.'}, {'step': 8, 'thought': 'Clean up and close the connection: Free the prepared statements, UUID generator, and close the session and cluster objects.'}, {'step': 9, 'thought': 'Compile and run the program: Provide instructions for installing the DataStax C/C++ driver, compiling the program, and running it with a local Cassandra cluster.'}] 96 | ---- 97 | Example 4: 98 | 99 | Instruction: BEGININPUT 100 | BEGINCONTEXT 101 | date: August 15, 2022 102 | author: Sarah Johnson 103 | subject: SharePoint Server 2019 Features and Benefits 104 | to: John Smith 105 | ENDCONTEXT 106 | Hi John, 107 | 108 | I hope you're doing well. I wanted to provide you with some information on Microsoft SharePoint Server 2019 and its features and benefits. As you know, our company is considering upgrading our current system, and I believe that SharePoint Server 2019 could be a great fit for us. 109 | 110 | SharePoint Server 2019 comes with several new features that can help improve productivity and collaboration within our organization. Some of these features include: 111 | 112 | 1. Modern Sites: SharePoint Server 2019 introduces modern team sites and communication sites, which offer an improved user experience compared to the classic sites we currently use. These modern sites are responsive by design, making them easily accessible from any device. 113 | 114 | 2. Improved File Sharing: The new version includes OneDrive for Business integration, allowing users to share files more easily both internally and externally. This feature also supports larger file sizes (up to 15 GB) and provides real-time co-authoring capabilities in Office Online. 115 | 116 | 3. Hybrid Scenarios: SharePoint Server 2019 allows for better integration between on-premises and cloud environments. This means we can take advantage of cloud-based services like Power BI, Flow, and Planner while still maintaining control over our sensitive data on-premises. 117 | 118 | 4. Enhanced Search Experience: The search functionality has been significantly improved in this version, providing personalized results based on the user's role and previous searches. This makes it easier for employees to find relevant content quickly. 119 | 120 | 5. Security and Compliance: SharePoint Server 2019 offers advanced security features such as Data Loss Prevention (DLP), eDiscovery, and Multi-Factor Authentication (MFA). These features help protect our sensitive data and ensure compliance with industry regulations. 121 | 122 | 6. Accessibility Improvements: The new version includes several accessibility enhancements, such as improved keyboard navigation and support for screen readers. This makes SharePoint more inclusive for all users. 123 | 124 | In addition to these features, SharePoint Server 2019 also offers better performance and scalability compared to previous versions. It supports up to 250,000 site collections per content database, which is a significant increase from the 100,000 limit in SharePoint Server 2016. 125 | 126 | Overall, I believe that upgrading to SharePoint Server 2019 would greatly benefit our organization by providing us with a modern, user-friendly platform that promotes collaboration and productivity. If you have any questions or concerns about this information, please don't hesitate to reach out. 127 | 128 | Best regards, 129 | 130 | Sarah Johnson 131 | ENDINPUT 132 | 133 | BEGININSTRUCTION 134 | - List three new features of Microsoft SharePoint Server 2019 mentioned in the email. 135 | - What is the maximum file size supported for sharing in SharePoint Server 2019? 136 | - How many site collections per content database does SharePoint Server 2019 support? 137 | Please provide references. 138 | ENDINSTRUCTION None 139 | 140 | Reasoning: 1. Read the instruction and identify the required information: three new features, maximum file size supported, and site collections per content database. 141 | 2. Scan the email for the mentioned features. 142 | 3. Identify the first feature: Modern Sites. 143 | 4. Identify the second feature: Improved File Sharing with OneDrive for Business integration. 144 | 5. Identify the third feature: Enhanced Search Experience. 145 | 6. Locate the information about the maximum file size supported: up to 15 GB. 146 | 7. Locate the information about site collections per content database: up to 250,000. 147 | 8. Compile the answer with the required information and reference(s). 148 | 149 | Reasoning Chains: [{'step': 1, 'thought': 'Read the instruction and identify the required information: three new features, maximum file size supported, and site collections per content database.'}, {'step': 2, 'thought': 'Scan the email for the mentioned features.'}, {'step': 3, 'thought': 'Identify the first feature: Modern Sites.'}, {'step': 4, 'thought': 'Identify the second feature: Improved File Sharing with OneDrive for Business integration.'}, {'step': 5, 'thought': 'Identify the third feature: Enhanced Search Experience.'}, {'step': 6, 'thought': 'Locate the information about the maximum file size supported: up to 15 GB.'}, {'step': 7, 'thought': 'Locate the information about site collections per content database: up to 250,000.'}, {'step': 8, 'thought': 'Compile the answer with the required information and reference(s).'}] 150 | ---- 151 | Example 5: 152 | 153 | Instruction: Rewrite the below text (which is a fragment of a longer script for the video). Fix grammar and other errors and make it sound more professional. 154 | 155 | Networks and security, or how to connect the digital world. How can we exchange data quickly and efficiently. There are different types of networks. 156 | A local area network (LAN) is for smaller, more local networks - home, business, school, etc. 157 | A wide area network (WAN) covers larger areas such as cities and even allows computers in different countries to connect. 158 | An intranet is a private enterprise network designed to support an organization's employees in communicating, collaborating, and performing their roles, usually to the exclusion of outside access. 159 | We also have the well-known internet, which is the largest network connecting computers around the world. 160 | 161 | But of course someone could try to intercept that communication. And we want to securely exchange data. We want to find vulnerabilities and defend against hackers. 162 | This is why we have something like cryptography. It is a field of study that focuses on the encoding and decoding of info using special algorithms. So only the sender and receiver can know the content of the message. 163 | There are many ways to protect your data, programs, devices and networks. Besides cryptography, we also use antivirus programs that can check if anything looks suspicious. A very popular trend is also the use of artificial intelligence to defend against hackers. But hackers can also use AI to attack. So it's a double-edged sword. None 164 | 165 | Reasoning: 1. Read and understand the original text. 166 | 2. Identify grammar and other errors. 167 | 3. Determine the main points and concepts. 168 | 4. Rewrite the text to fix errors and improve professionalism. 169 | 5. Ensure all important information is included and clear. 170 | 6. Check for factual accuracy and potential confusion. 171 | 7. Finalize the revised text and present it as the output. 172 | 173 | Reasoning Chains: [{'step': 1, 'thought': 'Read and understand the original text.'}, {'step': 2, 'thought': 'Identify grammar and other errors.'}, {'step': 3, 'thought': 'Determine the main points and concepts.'}, {'step': 4, 'thought': 'Rewrite the text to fix errors and improve professionalism.'}, {'step': 5, 'thought': 'Ensure all important information is included and clear.'}, {'step': 6, 'thought': 'Check for factual accuracy and potential confusion.'}, {'step': 7, 'thought': 'Finalize the revised text and present it as the output.'}] 174 | ---- 175 | Example 6: 176 | 177 | Instruction: How many even perfect square factors does $2^4 \cdot 7^9$ have? None 178 | 179 | Reasoning: 1. I need to find the number of factors of $2^4 \cdot 7^9$ that are both even and perfect squares. 180 | 2. A factor of $2^4 \cdot 7^9$ must be of the form $2^a \cdot 7^b$, where $0 \leq a \leq 4$ and $0 \leq b \leq 9$. 181 | 3. To be even, a factor must have $a > 0$, since $2^0 = 1$ is odd. 182 | 4. To be a perfect square, a factor must have both $a$ and $b$ even, since an odd power of a prime is not a perfect square. 183 | 5. I need to count how many ways I can choose $a$ and $b$ to be even and positive. 184 | 6. For $a$, I have two choices: $2$ or $4$. 185 | 7. For $b$, I have five choices: $0, 2, 4, 6, 8$. 186 | 8. So the total number of choices is $2 \cdot 5 = 10$. 187 | 188 | Reasoning Chains: [{'step': 1, 'thought': 'I need to find the number of factors of $2^4 \\cdot 7^9$ that are both even and perfect squares.'}, {'step': 2, 'thought': 'A factor of $2^4 \\cdot 7^9$ must be of the form $2^a \\cdot 7^b$, where $0 \\leq a \\leq 4$ and $0 \\leq b \\leq 9$.'}, {'step': 3, 'thought': 'To be even, a factor must have $a > 0$, since $2^0 = 1$ is odd.'}, {'step': 4, 'thought': 'To be a perfect square, a factor must have both $a$ and $b$ even, since an odd power of a prime is not a perfect square.'}, {'step': 5, 'thought': 'I need to count how many ways I can choose $a$ and $b$ to be even and positive.'}, {'step': 6, 'thought': 'For $a$, I have two choices: $2$ or $4$.'}, {'step': 7, 'thought': 'For $b$, I have five choices: $0, 2, 4, 6, 8$.'}, {'step': 8, 'thought': 'So the total number of choices is $2 \\cdot 5 = 10$.'}] 189 | ---- 190 | Example 7: 191 | 192 | Instruction: If the city council maintains spending at the same level as this year' s, it can be expected to levy a sales tax of 2 percent next year. Thus, if the council levies a higher tax, it will be because the council is increasing its expenditures. Which one of the following exhibits a pattern of reasoning most closely similar to that of the argument above? 193 | A: If the companies in the state do not increase their workers'wages this year, the prices they charge for their goods can be expected to be much the same as they were last year. Thus, if the companies do increase prices, it will be because they have increased wages. 194 | B: If newspaper publishers wish to publish good papers, they should employ good journalists. Thus, if they employ poor journalists, it will not be surprising if their circulation falls as a result. 195 | C: If shops wish to reduce shoplifting, they should employ more store detectives. Thus, if shops do not, they will suffer reduced profits because of their losses from stolen goods. 196 | D: If house-building costs are not now rising, builders cannot be expected to increase the prices of houses. Thus, if they decrease the prices of houses, it will be because that action will enable them to sell a greater number of houses. Choose A, B, C or D as your solution. 197 | 198 | Reasoning: 1. Identify the pattern of reasoning in the given argument: If X remains constant, Y can be expected to be Z. If Y is not Z, it's because X has changed. 199 | 2. Analyze each option to find a similar pattern of reasoning: 200 | A: If X (workers' wages) remains constant, Y (prices) can be expected to be Z (the same as last year). If Y is not Z, it's because X has changed (increased wages). 201 | B: This option discusses a cause and effect relationship (employing good journalists leads to good papers) but does not follow the same pattern of reasoning. 202 | C: This option also discusses a cause and effect relationship (employing more store detectives reduces shoplifting) but does not follow the same pattern of reasoning. 203 | D: This option has a different pattern of reasoning: If X (house-building costs) remains constant, Y (house prices) cannot be expected to increase. If Y decreases, it's because of a different reason (selling more houses). 204 | 3. Option A follows the same pattern of reasoning as the given argument. 205 | 4. Choose A as the solution. 206 | 207 | Reasoning Chains: [{'step': 1, 'thought': "Identify the pattern of reasoning in the given argument: If X remains constant, Y can be expected to be Z. If Y is not Z, it's because X has changed."}, {'step': 2, 'thought': "Analyze each option to find a similar pattern of reasoning:\n A: If X (workers' wages) remains constant, Y (prices) can be expected to be Z (the same as last year). If Y is not Z, it's because X has changed (increased wages).\n B: This option discusses a cause and effect relationship (employing good journalists leads to good papers) but does not follow the same pattern of reasoning.\n C: This option also discusses a cause and effect relationship (employing more store detectives reduces shoplifting) but does not follow the same pattern of reasoning.\n D: This option has a different pattern of reasoning: If X (house-building costs) remains constant, Y (house prices) cannot be expected to increase. If Y decreases, it's because of a different reason (selling more houses)."}, {'step': 3, 'thought': 'Option A follows the same pattern of reasoning as the given argument.'}, {'step': 4, 'thought': 'Choose A as the solution.'}] 208 | ---- 209 | Example 9: 210 | 211 | Instruction: If z = arctan(e^{1 + (1 + x)^2}), what's the derivative of $\frac{\partial z}{\partial x}$ at x = 0. 212 | Relevant Theorem: The Derivative Chain Rule is a fundamental rule in calculus used to find the derivative of a composite function. A composite function is a function that is formed by combining two or more functions, where the output of one function becomes the input of another function. 213 | 214 | The Chain Rule states that if you have a composite function, say h(x) = f(g(x)), then the derivative of h(x) with respect to x, denoted as h'(x) or dh/dx, can be found by taking the derivative of the outer function f with respect to the inner function g(x), and then multiplying it by the derivative of the inner function g(x) with respect to x. 215 | 216 | Mathematically, the Chain Rule can be expressed as: 217 | 218 | h'(x) = f'(g(x)) * g'(x) 219 | 220 | or 221 | 222 | dh/dx = (df/dg) * (dg/dx) 223 | 224 | The Chain Rule is particularly useful when dealing with complex functions that involve multiple layers of functions, as it allows us to break down the problem into simpler parts and find the derivative step by step. None 225 | 226 | Reasoning: 1. Identify the given function: z = arctan(e^{1 + (1 + x)^2}) 227 | 2. Recognize that this is a composite function, with an outer function (arctan) and an inner function (e^{1 + (1 + x)^2}). 228 | 3. Apply the Chain Rule to find the derivative of z with respect to x: dz/dx = (d(arctan)/d(e^{1 + (1 + x)^2})) * (d(e^{1 + (1 + x)^2})/dx) 229 | 4. Find the derivative of the outer function (arctan) with respect to the inner function (e^{1 + (1 + x)^2}): d(arctan)/d(e^{1 + (1 + x)^2}) = 1/(1 + (e^{1 + (1 + x)^2})^2) 230 | 5. Find the derivative of the inner function (e^{1 + (1 + x)^2}) with respect to x: d(e^{1 + (1 + x)^2})/dx = e^{1 + (1 + x)^2} * 2(1 + x) 231 | 6. Combine the derivatives from steps 4 and 5 using the Chain Rule: dz/dx = (1/(1 + (e^{1 + (1 + x)^2})^2)) * (e^{1 + (1 + x)^2} * 2(1 + x)) 232 | 7. Simplify the expression: dz/dx = (2(1 + x) * e^{1 + (1 + x)^2}) / (1 + e^{2(1 + (1 + x)^2)}) 233 | 8. Evaluate the derivative at x = 0: dz/dx = (2(1 + 0) * e^{1 + (1 + 0)^2}) / (1 + e^{2(1 + (1 + 0)^2)}) 234 | 9. Simplify and calculate the numerical value: dz/dx = (2 * e^2) / (1 + e^4) ≈ 0.3017 235 | 10. The derivative of z with respect to x at x = 0 is approximately 0.3017. 236 | 237 | Reasoning Chains: [{'step': 1, 'thought': 'Identify the given function: z = arctan(e^{1 + (1 + x)^2})'}, {'step': 2, 'thought': 'Recognize that this is a composite function, with an outer function (arctan) and an inner function (e^{1 + (1 + x)^2}).'}, {'step': 3, 'thought': 'Apply the Chain Rule to find the derivative of z with respect to x: dz/dx = (d(arctan)/d(e^{1 + (1 + x)^2})) * (d(e^{1 + (1 + x)^2})/dx)'}, {'step': 4, 'thought': 'Find the derivative of the outer function (arctan) with respect to the inner function (e^{1 + (1 + x)^2}): d(arctan)/d(e^{1 + (1 + x)^2}) = 1/(1 + (e^{1 + (1 + x)^2})^2)'}, {'step': 5, 'thought': 'Find the derivative of the inner function (e^{1 + (1 + x)^2}) with respect to x: d(e^{1 + (1 + x)^2})/dx = e^{1 + (1 + x)^2} * 2(1 + x)'}, {'step': 6, 'thought': 'Combine the derivatives from steps 4 and 5 using the Chain Rule: dz/dx = (1/(1 + (e^{1 + (1 + x)^2})^2)) * (e^{1 + (1 + x)^2} * 2(1 + x))'}, {'step': 7, 'thought': 'Simplify the expression: dz/dx = (2(1 + x) * e^{1 + (1 + x)^2}) / (1 + e^{2(1 + (1 + x)^2)})'}, {'step': 8, 'thought': 'Evaluate the derivative at x = 0: dz/dx = (2(1 + 0) * e^{1 + (1 + 0)^2}) / (1 + e^{2(1 + (1 + 0)^2)})'}, {'step': 9, 'thought': 'Simplify and calculate the numerical value: dz/dx = (2 * e^2) / (1 + e^4) ≈ 0.3017'}, {'step': 10, 'thought': 'The derivative of z with respect to x at x = 0 is approximately 0.3017.'}] 238 | ---- 239 | Example 10: 240 | 241 | Instruction: What is the largest number $c$ such that $2x^2+5x+c=0$ has at least one real solution? Express your answer as a common fraction. None 242 | 243 | Reasoning: 1. I need to find the largest value of $c$ for which the quadratic equation $2x^2+5x+c=0$ has at least one real solution. 244 | 2. To do this, I'll consider the discriminant of the quadratic equation, which is $b^2-4ac$. 245 | 3. The equation has at least one real solution if and only if the discriminant is non-negative, so I want to maximize $c$ subject to the constraint that $b^2-4ac\geq 0$. 246 | 4. In this case, $a=2$, $b=5$, and $c$ is the unknown, so I have $5^2-4(2)c\geq 0$. 247 | 5. Simplifying, I get $25-8c\geq 0$. 248 | 6. Adding $8c$ to both sides, I get $25\geq 8c$. 249 | 7. Dividing both sides by $8$, I get $\frac{25}{8}\geq c$. 250 | 8. This means that $c$ can be any number less than or equal to $\frac{25}{8}$, but the largest possible value of $c$ is $\frac{25}{8}$ itself. 251 | 252 | Reasoning Chains: [{'step': 1, 'thought': 'I need to find the largest value of $c$ for which the quadratic equation $2x^2+5x+c=0$ has at least one real solution.'}, {'step': 2, 'thought': "To do this, I'll consider the discriminant of the quadratic equation, which is $b^2-4ac$."}, {'step': 3, 'thought': 'The equation has at least one real solution if and only if the discriminant is non-negative, so I want to maximize $c$ subject to the constraint that $b^2-4ac\\geq 0$.'}, {'step': 4, 'thought': 'In this case, $a=2$, $b=5$, and $c$ is the unknown, so I have $5^2-4(2)c\\geq 0$.'}, {'step': 5, 'thought': 'Simplifying, I get $25-8c\\geq 0$.'}, {'step': 6, 'thought': 'Adding $8c$ to both sides, I get $25\\geq 8c$.'}, {'step': 7, 'thought': 'Dividing both sides by $8$, I get $\\frac{25}{8}\\geq c$.'}, {'step': 8, 'thought': 'This means that $c$ can be any number less than or equal to $\\frac{25}{8}$, but the largest possible value of $c$ is $\\frac{25}{8}$ itself.'}] 253 | """ 254 | 255 | reasoning_prompt = ChatPromptTemplate.from_messages( 256 | [ 257 | ( 258 | "system", 259 | """You are Solar, a smart search reasoning and answer engine by Upstage, loved by many people. 260 | 261 | See reasoning examples, context provided for instruction. 262 | Follow the instrution in user query and provide best answer for the query using reasoning technique and step by step explanation. 263 | --- 264 | {reasoning_examples} 265 | """, 266 | ), 267 | MessagesPlaceholder("chat_history"), 268 | ] 269 | ) 270 | 271 | short_answer_prompt = ChatPromptTemplate.from_messages( 272 | [ 273 | ( 274 | "system", 275 | """You are Solar, a smart search engine by Upstage, loved by many people. 276 | 277 | Write one word answer if you can say "yes", "no", or direct answer. 278 | Otherwise just one or two sentense short answer for the query from the given conetxt. 279 | Try to understand the user's intention and provide a quick answer. 280 | If the answer is not in context, please say you don't know and ask to clarify the question. 281 | """, 282 | ), 283 | MessagesPlaceholder("chat_history"), 284 | ( 285 | "human", 286 | """Query: {user_query} 287 | ---- 288 | Context: {context}""", 289 | ), 290 | ] 291 | ) 292 | 293 | search_prompt = ChatPromptTemplate.from_messages( 294 | [ 295 | ( 296 | "system", 297 | """You are Solar, a smart search engine by Upstage, loved by many people. 298 | 299 | See the origial query, context, and quick answer, and then provide detailed explanation. 300 | 301 | Try to understand the user's intention and provide the relevant information in detail. 302 | If the answer is not in context, please say you don't know and ask to clarify the question. 303 | Do not repeat the short answer. 304 | 305 | When you write the explnation, please cite the source like [1], [2] if possible. 306 | Thyen, put the cited references including citation number, title, and URL at the end of the answer. 307 | Each reference should be in a new line in the markdown format like this: 308 | 309 | [1] Title - URL 310 | [2] Title - URL 311 | ... 312 | """, 313 | ), 314 | MessagesPlaceholder("chat_history"), 315 | ( 316 | "human", 317 | """Query: {user_query} 318 | ---- 319 | Short answer: {short_answer} 320 | ---- 321 | Context: {context}""", 322 | ), 323 | ] 324 | ) 325 | 326 | 327 | query_context_expansion_prompt = """ 328 | For a given query and context(if provided), expand it with related questions and search the web for answers. 329 | Try to understand the purpose of the query and expand with upto three related questions 330 | to privde answer to the original query. 331 | Note that it's for keyword-based search engines, so it should be short and concise. 332 | 333 | Please write in Python LIST format like this: 334 | ["number of people in France?", How many people in France?", "France population"] 335 | 336 | --- 337 | Context: {context} 338 | ---- 339 | History: {chat_history} 340 | --- 341 | Orignal query: {query} 342 | """ 343 | 344 | 345 | # Define your desired data structure. 346 | class List(BaseModel): 347 | list[str] 348 | 349 | 350 | def query_context_expansion(query, chat_history, context=None): 351 | # Set up a parser + inject instructions into the prompt template. 352 | parser = JsonOutputParser(pydantic_object=List) 353 | 354 | prompt = PromptTemplate( 355 | template=query_context_expansion_prompt, 356 | input_variables=["query", "context"], 357 | ) 358 | 359 | chain = prompt | llm | parser 360 | # Invoke the chain with the joke_query. 361 | 362 | for attempt in range(3): 363 | try: 364 | parsed_output = chain.invoke( 365 | {"query": query, "chat_history": chat_history, "context": context} 366 | ) 367 | return parsed_output 368 | except Exception as e: 369 | st.warning(f"Attempt {attempt + 1} failed. Retrying...") 370 | 371 | st.error("All attempts failed. Returning empty list.") 372 | return [] 373 | 374 | 375 | def perform_task(chat_history): 376 | # Limit chat history to 3000 characters 377 | limited_history = [] 378 | total_length = 0 379 | for message in reversed(chat_history): 380 | message_length = len(message.content) 381 | if total_length + message_length > MAX_TOKENS: 382 | st.warning("Chat history is too long. Truncating to fit model input.") 383 | break 384 | limited_history.insert(0, message) 385 | total_length += message_length 386 | 387 | 388 | chain = reasoning_prompt | llm | StrOutputParser() 389 | 390 | return chain.stream( 391 | { 392 | "chat_history": limited_history, 393 | "reasoning_examples": reasoning_examples, 394 | } 395 | ) 396 | 397 | 398 | def get_search_desc(user_query, short_answer, context, chat_history): 399 | chain = search_prompt | llm | StrOutputParser() 400 | 401 | return chain.stream( 402 | { 403 | "context": context, 404 | "chat_history": chat_history, 405 | "user_query": user_query, 406 | "short_answer": short_answer, 407 | } 408 | ) 409 | 410 | 411 | def search(query, chat_history, context=None): 412 | with st.status("Extending query..."): 413 | q_list = query_context_expansion(query, chat_history, context) 414 | st.write(q_list) 415 | 416 | if not q_list: 417 | st.error("No related questions found. Returning empty list.") 418 | return [] 419 | 420 | # combine all queries with "OR" operator 421 | or_merged_search_query = " OR ".join(q_list) 422 | results = "" 423 | for q in q_list: 424 | with st.spinner(f"Searching for '{q }'..."): 425 | results += ddg_search.invoke(q) 426 | 427 | return results 428 | 429 | 430 | if "messages" not in st.session_state: 431 | st.session_state.messages = [] 432 | 433 | for message in st.session_state.messages: 434 | role = "AI" if isinstance(message, AIMessage) else "Human" 435 | with st.chat_message(role): 436 | st.markdown(message.content) 437 | 438 | q = "3.9 vs 3.11. Which one is bigger?" 439 | 440 | tasks = ["Reasoning (No conclusion)", "Reasoning Chains", "Final Answer"] 441 | 442 | if prompt := st.chat_input(q): 443 | 444 | search_result = search(prompt, st.session_state.messages) 445 | 446 | with st.status("Search Results:"): 447 | st.write(search_result) 448 | 449 | if search_result: 450 | search_result = str(search_result) 451 | st.session_state.messages.append( 452 | HumanMessage( 453 | content=f"FYI search result conext: {search_result} for the query, {prompt}" 454 | ) 455 | ) 456 | st.session_state.messages.append( 457 | AIMessage( 458 | content="Thanks for the information! I will keep in mind. Give me the instruction." 459 | ) 460 | ) 461 | 462 | for task in tasks: 463 | instruction = f"""Please provide {task} for the given query,and context and chat history. 464 | Please only provide the {task}. 465 | --- 466 | User Query: 467 | {prompt}""" 468 | st.session_state.messages.append(HumanMessage(content=instruction)) 469 | with st.chat_message("user"): 470 | st.write(instruction) 471 | with st.chat_message("assistant"): 472 | response = st.write_stream(perform_task(st.session_state.messages)) 473 | st.session_state.messages.append(AIMessage(content=response)) 474 | --------------------------------------------------------------------------------