├── .gitignore
├── .env_sample
├── requirements.txt
├── README.md
├── chatopenai.py
├── chatgradio.py
├── chat.py
├── Makefile
├── pdf_util.py
├── chatpdf.py
├── jaichat.py
├── solar-r.py
├── longimg.py
├── docv.py
├── solar_util.py
├── hw.py
├── fc.py
├── chatpdfemb.py
├── chatsearch.py
├── data
    └── target_companies.json
├── namecard.py
├── reasoning.py
├── discussion.py
├── coldmail.py
├── translate.py
├── gemini.py
└── llama.py


/.gitignore:
--------------------------------------------------------------------------------
1 | .env
2 | .DS_Store
3 | local_cache
4 | .venv
5 | __pycache__/
6 | chroma_db/
7 | .streamlit/


--------------------------------------------------------------------------------
/.env_sample:
--------------------------------------------------------------------------------
1 | # Fill in the following variables with your own API keys
2 | # and rename this file to .env
3 | UPSTAGE_API_KEY=up_SqRZcwRz...
4 | TAVILY_API_KEY=tvly-n8wiK...
5 | NEWS_API_KEY=e59ce9fc...
6 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | streamlit>=1.30.0
 2 | google-genai>=0.3.0
 3 | qrcode[pil]
 4 | requests 
 5 | streamlit-extras
 6 | streamlit-paste-button
 7 | langchain
 8 | faiss-cpu 
 9 | langchainhub
10 | langchain_upstage
11 | langchain_community
12 | watchdog
13 | openpyxl==3.1.5
14 | tokenizers
15 | pydantic
16 | crawl4ai
17 | tinydb
18 | duckduckgo-search 
19 | langchain-community
20 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # SolarLLM Chat Demo
 2 | ## Streamlit and Gradio Chat Demo
 3 | * Chat: https://solar-chat.streamlit.app/
 4 | * Search: https://solar-search.streamlit.app/
 5 | * ChatPDF: https://solar-pdf.streamlit.app/
 6 | * Self-Discussion: https://solar-discussion.streamlit.app/
 7 | * DocVision: https://solar-docv.streamlit.app/
 8 |   
 9 | * Reasoning: https://toy-o1.streamlit.app/, https://toy-o2.streamlit.app/
10 | 
11 | ## SolarLLM Cookbook
12 | For detailed usage examples, please refer to https://github.com/upstageAI/cookbook/. 
13 | 


--------------------------------------------------------------------------------
/chatopenai.py:
--------------------------------------------------------------------------------
 1 | # from https://docs.streamlit.io/develop/tutorials/llms/build-conversational-apps
 2 | 
 3 | from openai import OpenAI
 4 | import streamlit as st
 5 | 
 6 | st.title("ChatGPT-like clone")
 7 | 
 8 | client = OpenAI(api_key=st.secrets["UPSTAGE_API_KEY"], base_url="https://api.upstage.ai/v1/solar")
 9 | 
10 | if "messages" not in st.session_state:
11 |     st.session_state.messages = []
12 | 
13 | for message in st.session_state.messages:
14 |     with st.chat_message(message["role"]):
15 |         st.markdown(message["content"])
16 | 
17 | if prompt := st.chat_input("What is up?"):
18 |     st.session_state.messages.append({"role": "user", "content": prompt})
19 |     with st.chat_message("user"):
20 |         st.markdown(prompt)
21 | 
22 |     with st.chat_message("assistant"):
23 |         stream = client.chat.completions.create(
24 |             model="solar-1-mini-chat",
25 |             messages=[
26 |                 {"role": m["role"], "content": m["content"]}
27 |                 for m in st.session_state.messages
28 |             ],
29 |             stream=True,
30 |         )
31 |         response = st.write_stream(stream)
32 |     st.session_state.messages.append({"role": "assistant", "content": response})


--------------------------------------------------------------------------------
/chatgradio.py:
--------------------------------------------------------------------------------
 1 | import gradio as gr
 2 | 
 3 | from langchain_upstage import ChatUpstage
 4 | from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
 5 | from langchain_core.output_parsers import StrOutputParser
 6 | from langchain.schema import AIMessage, HumanMessage
 7 | 
 8 | 
 9 | llm = ChatUpstage(streaming=True)
10 | 
11 | # More general chat
12 | chat_with_history_prompt = ChatPromptTemplate.from_messages(
13 |     [
14 |         ("system", "You are a helpful assistant."),
15 |         MessagesPlaceholder(variable_name="history"),
16 |         ("human", "{message}"),
17 |     ]
18 | )
19 | 
20 | chain = chat_with_history_prompt | llm | StrOutputParser()
21 | 
22 | 
23 | def chat(message, history):
24 |     history_langchain_format = []
25 |     for human, ai in history:
26 |         history_langchain_format.append(HumanMessage(content=human))
27 |         history_langchain_format.append(AIMessage(content=ai))
28 | 
29 |     generator = chain.stream({"message": message, "history": history_langchain_format})
30 | 
31 |     assistant = ""
32 |     for gen in generator:
33 |         assistant += gen
34 |         yield assistant
35 | 
36 | 
37 | with gr.Blocks() as demo:
38 |     chatbot = gr.ChatInterface(
39 |         chat,
40 |         examples=[
41 |             "How to eat healthy?",
42 |             "Best Places in Korea",
43 |             "How to make a chatbot?",
44 |         ],
45 |         title="Solar Chatbot",
46 |         description="Upstage Solar Chatbot",
47 |     )
48 |     chatbot.chatbot.height = 300
49 | 
50 | if __name__ == "__main__":
51 |     demo.launch()
52 | 


--------------------------------------------------------------------------------
/chat.py:
--------------------------------------------------------------------------------
 1 | # from https://docs.streamlit.io/develop/tutorials/llms/build-conversational-apps
 2 | 
 3 | import streamlit as st
 4 | from langchain_upstage import ChatUpstage as Chat
 5 | 
 6 | from langchain_core.output_parsers import StrOutputParser
 7 | from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
 8 | from langchain_core.messages import AIMessage, HumanMessage
 9 | 
10 | from solar_util import initialize_solar_llm
11 | 
12 | from solar_util import prompt_engineering
13 | 
14 | llm = initialize_solar_llm()
15 | st.set_page_config(page_title="Chat")
16 | st.title("SolarLLM")
17 | 
18 | 
19 | chat_with_history_prompt = ChatPromptTemplate.from_messages(
20 |     [
21 |         ("human", """You are Solar, a smart chatbot by Upstage, loved by many people. 
22 |          Be smart, cheerful, and fun. Give engaging answers and avoid inappropriate language.
23 |          reply in the same language of the user query.
24 |          Solar is now being connected with a human.
25 |          
26 |          Please put <END> in the end of your answer."""),
27 |         MessagesPlaceholder("chat_history"),
28 |         ("human", "{user_query}"),
29 |     ]
30 | )
31 | 
32 | 
33 | 
34 | def get_response(user_query, chat_history):
35 |     chain = chat_with_history_prompt | llm | StrOutputParser()
36 |     return chain.stream(
37 |         {
38 |             "chat_history": chat_history,
39 |             "user_query": user_query,
40 |         }
41 |     )
42 | 
43 | 
44 | if "messages" not in st.session_state:
45 |     st.session_state.messages = []
46 | 
47 | for message in st.session_state.messages:
48 |     role = "AI" if isinstance(message, AIMessage) else "Human"
49 |     with st.chat_message(role):
50 |         st.markdown(message.content)
51 | 
52 | enhance_prompt = st.toggle("Enhance prompt", True)
53 | if prompt := st.chat_input("What is up?"):
54 |     if enhance_prompt:
55 |         with st.status("Prompt engineering..."):
56 |             new_prompt = prompt_engineering(prompt, st.session_state.messages)
57 |             st.write(new_prompt)
58 | 
59 |         if 'enhanced_prompt' in new_prompt:
60 |             prompt = new_prompt['enhanced_prompt']
61 | 
62 |     st.session_state.messages.append(HumanMessage(content=prompt))
63 |     with st.chat_message("user"):
64 |         st.markdown(prompt)
65 | 
66 |     with st.chat_message("assistant"):
67 |         response = st.write_stream(get_response(prompt, st.session_state.messages))
68 |     st.session_state.messages.append(AIMessage(content=response))
69 | 
70 | 


--------------------------------------------------------------------------------
/Makefile:
--------------------------------------------------------------------------------
  1 | # Define the two virtual environments
  2 | VENV = .venv
  3 | BROWSER_VENV = .venv-browser
  4 | PYTHON = $(VENV)/bin/python3
  5 | BROWSER_PYTHON = $(BROWSER_VENV)/bin/python3
  6 | PIP = $(VENV)/bin/pip3
  7 | BROWSER_PIP = $(BROWSER_VENV)/bin/pip3
  8 | STREAMLIT = $(VENV)/bin/streamlit
  9 | BROWSER_STREAMLIT = $(BROWSER_VENV)/bin/streamlit
 10 | GRADIO = $(VENV)/bin/gradio
 11 | 
 12 | # Basic venv without browser dependencies
 13 | $(VENV)/bin/activate: requirements.txt
 14 | 	python3 -m venv $(VENV)
 15 | 	$(PIP) install -r requirements.txt
 16 | 
 17 | # Browser-enabled venv with playwright
 18 | $(BROWSER_VENV)/bin/activate: requirements.txt
 19 | 	python3 -m venv $(BROWSER_VENV)
 20 | 	$(BROWSER_PIP) install -r requirements.txt
 21 | 	$(BROWSER_PIP) install playwright
 22 | 	$(BROWSER_PYTHON) -m playwright install
 23 | 
 24 | chatopenai: $(VENV)/bin/activate
 25 | 	$(STREAMLIT) run chatopenai.py
 26 | 
 27 | coldmail: $(VENV)/bin/activate
 28 | 	$(STREAMLIT) run coldmail.py
 29 | 
 30 | chat: $(VENV)/bin/activate
 31 | 	$(STREAMLIT) run chat.py 
 32 | 
 33 | chatpdf: $(VENV)/bin/activate
 34 | 	$(STREAMLIT) run chatpdf.py
 35 | 
 36 | longimg: $(VENV)/bin/activate
 37 | 	$(STREAMLIT) run longimg.py
 38 | 
 39 | chatpdfemb: $(VENV)/bin/activate
 40 | 	$(STREAMLIT) run chatpdfemb.py
 41 | 
 42 | gemini: $(VENV)/bin/activate
 43 | 	$(STREAMLIT) run gemini.py
 44 | 
 45 | chatgradio: $(VENV)/bin/activate
 46 | 	$(GRADIO) chatgradio.py
 47 | 
 48 | docv: $(VENV)/bin/activate
 49 | 	$(STREAMLIT) run docv.py
 50 | 
 51 | search: $(VENV)/bin/activate
 52 | 	$(STREAMLIT) run chatsearch.py 
 53 | 
 54 | reasoning: $(VENV)/bin/activate
 55 | 	$(STREAMLIT) run reasoning.py 
 56 | 
 57 | discussion: $(VENV)/bin/activate
 58 | 	$(STREAMLIT) run discussion.py --server.port 9093
 59 | 
 60 | llama: $(VENV)/bin/activate
 61 | 	$(STREAMLIT) run llama.py
 62 | 
 63 | voice: $(VENV)/bin/activate
 64 | 	$(STREAMLIT) run voice.py
 65 | 
 66 | hw: $(VENV)/bin/activate
 67 | 	$(STREAMLIT) run hw.py
 68 | 
 69 | util: $(VENV)/bin/activate
 70 | 	$(PYTHON) solar_util.py
 71 | 
 72 | podcast: $(VENV)/bin/activate
 73 | 	$(STREAMLIT) run podcast.py
 74 | 
 75 | biz_help: $(VENV)/bin/activate
 76 | 	$(STREAMLIT) run biz_help.py
 77 | 
 78 | info_fill: $(BROWSER_VENV)/bin/activate
 79 | 	$(BROWSER_STREAMLIT) run info_fill.py
 80 | 
 81 | jaisearch: $(BROWSER_VENV)/bin/activate
 82 | 	$(STREAMLIT) run jaisearch.py
 83 | 
 84 | jaichat: $(BROWSER_VENV)/bin/activate
 85 | 	$(STREAMLIT) run jaichat.py
 86 | 
 87 | translate: $(BROWSER_VENV)/bin/activate
 88 | 	$(STREAMLIT) run translate.py
 89 | 
 90 | solar-r: $(BROWSER_VENV)/bin/activate
 91 | 	$(STREAMLIT) run solar-r.py
 92 | 
 93 | fc: $(BROWSER_VENV)/bin/activate
 94 | 	$(STREAMLIT) run fc.py
 95 | 
 96 | namecard: $(VENV)/bin/activate
 97 | 	$(STREAMLIT) run namecard.py
 98 | 
 99 | clean:
100 | 	rm -rf __pycache__
101 | 	rm -rf $(VENV)
102 | 	rm -rf $(BROWSER_VENV)


--------------------------------------------------------------------------------
/pdf_util.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import re
 3 | from enum import Enum
 4 | 
 5 | 
 6 | class PDFType(Enum):
 7 |     SCANNED = "Scanned PDF (lacks embedded fonts and text objects)"
 8 |     DIGITAL = "Digital PDF (text-based and font information present)"
 9 |     UNCERTAIN = "Uncertain (No strong indicators found)"
10 | 
11 | 
12 | def is_ocr_pdf(pdf_path):
13 |     """
14 |     Determine if a PDF file is digital–born (text-based) or scanned (mostly images)
15 |     without relying on any external PDF libraries.
16 | 
17 |     The function reads the raw PDF file content and decodes it to a string.
18 |     It then uses basic heuristics by searching for:
19 |       - Embedded fonts (via the '/Font' keyword)
20 |       - Text drawing commands (via the 'BT' operator; PDFs typically use 'BT' ... 'ET'
21 |         to delimit text blocks)
22 |       - Image objects (via the '/Subtype /Image' declaration)
23 |       - Keywords (like 'scan', 'ocr', or 'adobe acrobat') which sometimes appear in
24 |         scanned PDFs
25 | 
26 |     If the PDF contains both font definitions and text commands, and the (roughly)
27 |     extracted text (from between BT and ET markers) sums up to a significant length,
28 |     it is assumed to be digital–born. If the PDF appears dominated by images or has
29 |     keywords suggesting scanning, it is classified as scanned. In other cases, the PDF
30 |     is marked as uncertain.
31 | 
32 |     Args:
33 |         pdf_path (str): Path to the PDF file.
34 | 
35 |     Returns:
36 |         PDFType: DIGITAL if the PDF appears text–based; SCANNED if it appears to be a scanned image; 
37 |                  UNCERTAIN if the evidence isn't strong enough.
38 |     """
39 |     try:
40 |         with open(pdf_path, "rb") as f:
41 |             content = f.read().decode("latin-1", errors="replace")
42 |     except Exception:
43 |         # If the file cannot be read or decoded, we return UNCERTAIN.
44 |         return PDFType.UNCERTAIN
45 | 
46 |     # Count indicators of digital PDFs.
47 |     font_count = len(re.findall(r'/Font\b', content))
48 |     text_command_count = len(re.findall(r'\bBT\b', content))
49 |     
50 |     # Try to extract content from text objects (between 'BT' and 'ET').
51 |     text_segments = re.findall(r'BT\s*(.*?)\s*ET', content, re.DOTALL)
52 |     extracted_text = " ".join(text_segments)
53 |     extracted_text_length = len(extracted_text.strip())
54 | 
55 |     # Count indications of images that might suggest a scan.
56 |     image_count = len(re.findall(r'/Subtype\s*/Image', content))
57 |     # Look for scanned–related keywords.
58 |     scanned_keyword = bool(re.search(r'(?i)\b(?:scan(?:ned)?|ocr|adobe\s+acrobat)\b', content))
59 | 
60 |     # Heuristics:
61 |     # 1. A digital PDF will usually have embedded fonts and multiple text drawing commands,
62 |     #    resulting in a reasonable amount of extracted text.
63 |     # 2. A scanned PDF may have few (or zero) font markers and often many image objects.
64 |     if font_count > 0 and text_command_count > 0 and extracted_text_length > 100:
65 |         return PDFType.DIGITAL
66 |     elif image_count > font_count or scanned_keyword:
67 |         return PDFType.SCANNED
68 |     else:
69 |         return PDFType.UNCERTAIN
70 | 


--------------------------------------------------------------------------------
/chatpdf.py:
--------------------------------------------------------------------------------
  1 | # from https://docs.streamlit.io/develop/tutorials/llms/build-conversational-apps
  2 | 
  3 | import streamlit as st
  4 | from langchain_upstage import ChatUpstage as Chat
  5 | from langchain_upstage import GroundednessCheck
  6 | 
  7 | from langchain_core.output_parsers import StrOutputParser
  8 | from langchain_core.messages import AIMessage, HumanMessage
  9 | from langchain_upstage import UpstageLayoutAnalysisLoader
 10 | import tempfile, os
 11 | 
 12 | from langchain import hub
 13 | 
 14 | st.title("LangChain ChatDoc")
 15 | 
 16 | llm = Chat()
 17 | # https://smith.langchain.com/hub/hunkim/rag-qa-with-history
 18 | chat_with_history_prompt = hub.pull("hunkim/rag-qa-with-history")
 19 | 
 20 | groundedness_check = GroundednessCheck()
 21 | 
 22 | 
 23 | def get_response(user_query, chat_history):
 24 |     chain = chat_with_history_prompt | llm | StrOutputParser()
 25 | 
 26 |     return chain.stream(
 27 |         {
 28 |             "chat_history": chat_history,
 29 |             "question": user_query,
 30 |             "context": st.session_state.docs,
 31 |         }
 32 |     )
 33 | 
 34 | 
 35 | if "messages" not in st.session_state:
 36 |     st.session_state.messages = []
 37 | 
 38 | if "docs" not in st.session_state:
 39 |     st.session_state.docs = []
 40 | 
 41 | with st.sidebar:
 42 |     st.header(f"Add your documents!")
 43 | 
 44 |     uploaded_file = st.file_uploader("Choose your `.pdf` file", type="pdf")
 45 | 
 46 |     if uploaded_file and not uploaded_file.name in st.session_state:
 47 |         with st.status("Processing the data ..."):
 48 |             with tempfile.TemporaryDirectory() as temp_dir:
 49 |                 file_path = os.path.join(temp_dir, uploaded_file.name)
 50 | 
 51 |                 with open(file_path, "wb") as f:
 52 |                     f.write(uploaded_file.getvalue())
 53 | 
 54 |                 st.write("Indexing your document...")
 55 |                 layzer = UpstageLayoutAnalysisLoader(file_path, split="page")
 56 |                 # For improved memory efficiency, consider using the lazy_load method to load documents page by page.
 57 |                 docs = layzer.load()  # or layzer.lazy_load()
 58 |                 st.session_state.docs = docs
 59 |                 st.write(docs)
 60 | 
 61 |                 # processed
 62 |                 st.session_state[uploaded_file.name] = True
 63 | 
 64 |         st.success("Ready to Chat!")
 65 | 
 66 | 
 67 | for message in st.session_state.messages:
 68 |     role = "AI" if isinstance(message, AIMessage) else "Human"
 69 |     with st.chat_message(role):
 70 |         st.markdown(message.content)
 71 | 
 72 | if prompt := st.chat_input("What is up?", disabled=not st.session_state.docs):
 73 |     st.session_state.messages.append(
 74 |         HumanMessage(
 75 |             content=prompt,
 76 |         )
 77 |     )
 78 |     with st.chat_message("user"):
 79 |         st.markdown(prompt)
 80 | 
 81 |     with st.chat_message("assistant"):
 82 |         with st.status("Getting context..."):
 83 |             st.write(st.session_state.docs)
 84 |         response = st.write_stream(get_response(prompt, st.session_state.messages))
 85 |         gc_result = groundedness_check.run(
 86 |             {
 87 |                 "context": f"Context:{st.session_state.docs}\n\nQuestion{prompt}",
 88 |                 "query": response,
 89 |             }
 90 |         )
 91 | 
 92 |         if gc_result == "grounded":
 93 |             gc_mark = "✅"
 94 |             st.success("✅ Groundedness check passed!")
 95 |         else:
 96 |             gc_mark = "❌"
 97 |             st.error("❌ Groundedness check failed!")
 98 | 
 99 |     st.session_state.messages.append(
100 |         AIMessage(content=f"{gc_mark} {response}"),
101 |     )
102 | 


--------------------------------------------------------------------------------
/jaichat.py:
--------------------------------------------------------------------------------
  1 | # from https://docs.streamlit.io/develop/tutorials/llms/build-conversational-apps
  2 | 
  3 | import streamlit as st
  4 | from langchain_upstage import ChatUpstage
  5 | 
  6 | from langchain_core.output_parsers import StrOutputParser
  7 | from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder, PromptTemplate
  8 | from langchain_core.messages import AIMessage, HumanMessage
  9 | 
 10 | from solar_util import initialize_solar_llm
 11 | 
 12 | from solar_util import prompt_engineering
 13 | 
 14 | import re
 15 | import json
 16 | 
 17 | jai = ChatUpstage(model=st.secrets["JAI_MODEL_NAME"], base_url=st.secrets["JAI_BASE_URL"], api_key=st.secrets["JAI_API_KEY"])
 18 | solar_pro = ChatUpstage(model="solar-pro")
 19 | 
 20 | st.set_page_config(page_title="Chat")
 21 | st.title("SolarLLM")
 22 | 
 23 | 
 24 | def is_korean(text):
 25 |     return re.search(r'[가-힣]', text) is not None
 26 | 
 27 | 
 28 | def korean_to_thai(text):
 29 |     translate_prompt = PromptTemplate(
 30 |         template="""You are a language translator. Translate the following text from Korean to Thai.
 31 | Here are some examples:
 32 | 
 33 | Korean: 안녕하세요
 34 | {{"translation": "สวัสดีครับ/ค่ะ"}}
 35 | 
 36 | Korean: 감사합니다
 37 | {{"translation": "ขอบคุณครับ/ค่ะ"}}
 38 | 
 39 | Korean: 맛있어요
 40 | {{"translation": "อร่อยครับ/ค่ะ"}}
 41 | 
 42 | Now translate this:
 43 | ---
 44 | Korean: {text}
 45 | ---
 46 | Response format:
 47 | {{"translation": "Thai translation here"}}""",
 48 |         input_variables=["text"]
 49 |     )
 50 |     chain = translate_prompt | jai | StrOutputParser()
 51 |     result = chain.invoke({"text": text})
 52 |     try:
 53 |         return json.loads(result)["translation"]
 54 |     except json.JSONDecodeError:
 55 |         st.error("Failed to parse translation response")
 56 |         return result
 57 | 
 58 | def thai_to_korean(text):
 59 |     translate_prompt = PromptTemplate(
 60 |         template="""You are a language translator. Translate the following text from Thai to Korean.
 61 | Here are some examples:
 62 | 
 63 | Thai: สวัสดีครับ/ค่ะ
 64 | {{"translation": "안녕하세요"}}
 65 | 
 66 | Thai: ขอบคุณครับ/ค่ะ
 67 | {{"translation": "감사합니다"}}
 68 | 
 69 | Thai: อร่อยครับ/ค่ะ
 70 | {{"translation": "맛있어요"}}
 71 | 
 72 | Now translate this:
 73 | ---
 74 | Thai: {text}
 75 | ---
 76 | Response format:
 77 | {{"translation": "Korean translation here"}}""",
 78 |         input_variables=["text"]
 79 |     )
 80 |     chain = translate_prompt | solar_pro | StrOutputParser()
 81 |     result = chain.invoke({"text": text})
 82 |     try:
 83 |         return json.loads(result)["translation"]
 84 |     except json.JSONDecodeError:
 85 |         st.error("Failed to parse translation response")
 86 |         return result
 87 | 
 88 | chat_with_history_prompt = ChatPromptTemplate.from_messages(
 89 |     [
 90 |         ("human", """You are Solar, a smart chatbot by Upstage, loved by many people. 
 91 |          Be smart, cheerful, and fun. Give engaging answers and avoid inappropriate language.
 92 |          reply in the same language of the user query.
 93 |          Solar is now being connected with a human."""),
 94 |         MessagesPlaceholder("chat_history"),
 95 |         ("human", "{user_query}"),
 96 |     ]
 97 | )
 98 | 
 99 | 
100 | 
101 | def get_response(user_query, chat_history):
102 |     chain = chat_with_history_prompt | jai | StrOutputParser()
103 | 
104 |     return chain.stream(
105 |             {
106 |                 "chat_history": chat_history,
107 |                 "user_query": user_query,
108 |             }
109 |         )
110 | 
111 | 
112 | if "messages" not in st.session_state:
113 |     st.session_state.messages = []
114 | 
115 | for message in st.session_state.messages:
116 |     role = "AI" if isinstance(message, AIMessage) else "Human"
117 |     with st.chat_message(role):
118 |         st.markdown(message.content)
119 | 
120 | enhance_prompt = st.toggle("Enhance prompt", False)
121 | if prompt := st.chat_input("What is up?"):
122 |     # Check if input is Korean and translate if needed
123 |     if is_korean(prompt):
124 |         with st.status("Translating Korean to Thai..."):
125 |             prompt = korean_to_thai(prompt)
126 |             st.write(f"Translated to Thai: {prompt}")
127 | 
128 |     if enhance_prompt:
129 |         with st.status("Prompt engineering..."):
130 |             new_prompt = prompt_engineering(prompt, st.session_state.messages)
131 |             st.write(new_prompt)
132 | 
133 |         if 'enhanced_prompt' in new_prompt:
134 |             prompt = new_prompt['enhanced_prompt']
135 |  
136 |     with st.chat_message("user"):
137 |         st.markdown(prompt)
138 | 
139 |     with st.chat_message("assistant"):
140 |         response = st.write_stream(get_response(prompt, st.session_state.messages))
141 |         korean_response = thai_to_korean(response)
142 |         st.write(korean_response)
143 | 
144 | 
145 | 


--------------------------------------------------------------------------------
/solar-r.py:
--------------------------------------------------------------------------------
  1 | # from https://docs.streamlit.io/develop/tutorials/llms/build-conversational-apps
  2 | 
  3 | import streamlit as st
  4 | from langchain_upstage import ChatUpstage
  5 | from openai import OpenAI
  6 | 
  7 | from langchain_core.output_parsers import StrOutputParser
  8 | from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
  9 | from langchain_core.messages import AIMessage, HumanMessage
 10 | 
 11 | 
 12 | solar_mini = ChatUpstage(model="solar-mini")
 13 | deepseek_r = ChatUpstage(
 14 |     model="deepseek-reasoner",
 15 |     base_url="https://api.deepseek.com/v1",
 16 |     api_key=st.secrets["DEEPSEEK_API_KEY"],
 17 |     max_tokens=100,
 18 | )
 19 | 
 20 | deepseek_r = OpenAI(api_key=st.secrets["DEEPSEEK_API_KEY"], base_url="https://api.deepseek.com/v1")
 21 | 
 22 | st.set_page_config(page_title="Solar-Online-R", layout="wide")
 23 | st.title("Solar-Online-R")
 24 | st.caption("Deepseek-R enhanced Solar-mini: Combining Deepseek's reasoning with Solar's fast inference (Online Distillation)")
 25 | 
 26 | 
 27 | 
 28 | def get_reasoning(user_query, chat_history, model="deepseek-reasoner"):
 29 |     # Convert chat history to OpenAI format
 30 |     messages = [
 31 |         {
 32 |             "role": "system",
 33 |             "content": """You are Solar, a smart chatbot by Upstage, loved by many people. 
 34 |          Be smart, cheerful, and fun. Give engaging answers and avoid inappropriate language.
 35 |          reply in the same language of the user query.
 36 |          
 37 |          You will receive input in the following format:
 38 |          <reasoning>detailed analysis or reasoning about the query</reasoning>
 39 |          <user_query>the actual user question</user_query>
 40 |          
 41 |          Use the reasoning provided to give a more informed and thoughtful response to the user query.
 42 |          Focus on incorporating insights from the reasoning while maintaining a natural, conversational tone.
 43 |          Solar is now being connected with a human."""
 44 |         }
 45 |     ]
 46 |     
 47 |     # Add chat history
 48 |     for message in chat_history:
 49 |         role = "assistant" if isinstance(message, AIMessage) else "user"
 50 |         messages.append({"role": role, "content": message.content})
 51 |     
 52 |     # Add current query
 53 |     messages.append({"role": "user", "content": user_query})
 54 |     
 55 |     response = deepseek_r.chat.completions.create(
 56 |         model=model,
 57 |         messages=messages,
 58 |         max_tokens=1
 59 |     )
 60 |     if response.choices[0].message.reasoning_content:
 61 |         return response.choices[0].message.reasoning_content
 62 |     else:
 63 |         return response.choices[0].message.content
 64 | 
 65 | 
 66 | def get_response(user_query, chat_history, llm=solar_mini):
 67 |     chat_with_history_prompt = ChatPromptTemplate.from_messages(
 68 |     [
 69 |         (
 70 |             "system",
 71 |             """You are Solar, a smart chatbot by Upstage, loved by many people. 
 72 |          Be smart, cheerful, and fun. Give engaging answers and avoid inappropriate language.
 73 |          reply in the same language of the user query.
 74 |          Solar is now being connected with a human.""",
 75 |         ),
 76 |         MessagesPlaceholder("chat_history"),
 77 |         (
 78 |             "human",
 79 |             "{user_query}",
 80 |         ),
 81 |     ]
 82 | )
 83 | 
 84 |     chain = chat_with_history_prompt | llm | StrOutputParser()
 85 |     return chain.stream(
 86 |         {
 87 |             "chat_history": chat_history,
 88 |             "user_query": user_query,
 89 |         }
 90 |     )
 91 | 
 92 | 
 93 | if "messages" not in st.session_state:
 94 |     st.session_state.messages = []
 95 | 
 96 | for message in st.session_state.messages:
 97 |     role = "AI" if isinstance(message, AIMessage) else "Human"
 98 |     with st.chat_message(role):
 99 |         if role == "Human" and "<reasoning>" in message.content and "<user_query>" in message.content:
100 |             reasoning = message.content.split("<reasoning>")[1].split("</reasoning>")[0].strip()
101 |             user_query = message.content.split("<user_query>")[1].split("</user_query>")[0].strip()
102 |             with st.expander("Show reasoning"):
103 |                 st.markdown(reasoning)
104 |             st.markdown(user_query)
105 |         else:
106 |             st.markdown(message.content)
107 | 
108 | if prompt := st.chat_input("What is up?"):
109 |     with st.chat_message("user"):
110 |         st.markdown(prompt)
111 | 
112 |     with st.chat_message("assistant"):
113 |         with st.status("Reasoning..."):
114 |             reasoning = get_reasoning(prompt, st.session_state.messages)
115 |             st.write(reasoning)
116 |         prompt = f"""<reasoning>{reasoning}</reasoning>
117 | 
118 | <user_query>{prompt}</user_query>"""
119 |         response = st.write_stream(get_response(prompt, st.session_state.messages))
120 | 
121 |     st.session_state.messages.append(HumanMessage(content=prompt))
122 |     st.session_state.messages.append(AIMessage(content=response))
123 | 


--------------------------------------------------------------------------------
/longimg.py:
--------------------------------------------------------------------------------
  1 | # from https://docs.streamlit.io/develop/tutorials/llms/build-conversational-apps
  2 | 
  3 | import streamlit as st
  4 | from langchain_upstage import ChatUpstage as Chat
  5 | 
  6 | from langchain_core.output_parsers import StrOutputParser
  7 | from langchain_core.messages import AIMessage, HumanMessage
  8 | from langchain_upstage import UpstageDocumentParseLoader
  9 | from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
 10 | 
 11 | import tempfile, os
 12 | 
 13 | from PIL import Image
 14 | import math
 15 | 
 16 | if 'all_doc_contents' not in st.session_state:
 17 |     st.session_state.all_doc_contents = None
 18 | 
 19 | if 'basic_prompt' not in st.session_state:
 20 |     st.session_state.basic_prompt = """You are processing text extracted from a long image that was split into overlapping sections. Your task is to:
 21 | 
 22 | 1. Analyze multiple sections of text that have ~50% overlap with adjacent sections
 23 | 2. Identify and remove redundant content from the overlapping areas
 24 | 3. Maintain the correct sequence and flow of the text
 25 | 4. Preserve all unique information
 26 | 5. Ensure proper paragraph breaks and formatting
 27 | 6. Return a single, coherent document that reads naturally
 28 | 7. Please keep the original text and do not revise or translate it.
 29 | 
 30 | The following sections contain the extracted text, with overlapping content between them. Please combine them into one complete, non-redundant text while maintaining the original text and flow."""
 31 | 
 32 | 
 33 | def split_images(img_file_path, temp_dir):
 34 |     """
 35 |     Split a long image into overlapping square sections.
 36 |     Returns a list of paths to the split image sections.
 37 |     """
 38 |     img = Image.open(img_file_path)
 39 |     width, height = img.size
 40 |     
 41 |     # Make sections square using the width as the height
 42 |     section_height = width
 43 |     overlap = section_height // 2
 44 |     
 45 |     # Calculate number of sections needed (accounting for overlap)
 46 |     num_sections = math.ceil((height - overlap) / (section_height - overlap))
 47 |     
 48 |     # Create directory for split images
 49 |     split_dir = os.path.join(temp_dir, "splits")
 50 |     os.makedirs(split_dir, exist_ok=True)
 51 |     
 52 |     split_image_paths = []
 53 |     for i in range(num_sections):
 54 |         # Calculate section boundaries
 55 |         top = i * (section_height - overlap)
 56 |         bottom = min(top + section_height, height)
 57 |         
 58 |         # Adjust last section to include remaining pixels
 59 |         if i == num_sections - 1:
 60 |             top = height - section_height
 61 |         
 62 |         # Crop and save section
 63 |         section = img.crop((0, top, width, bottom))
 64 |         section_path = os.path.join(split_dir, f"section_{i}.png")
 65 |         section.save(section_path)
 66 |         split_image_paths.append(section_path)
 67 |     
 68 |     return split_image_paths
 69 | 
 70 | def img_to_doc_content(img_file_path):
 71 |     """Extract text content from an image using Upstage document parser."""
 72 |     dp = UpstageDocumentParseLoader(img_file_path, split="page")
 73 |     docs = dp.load()  
 74 |     return "\n".join([doc.page_content for doc in docs])
 75 | 
 76 | def combine_doc_contents(all_doc_contents, llm):
 77 |     """Combine multiple text sections into a coherent document using LLM."""
 78 |     eval_prompt = ChatPromptTemplate.from_messages([
 79 |         ("human", "{basic_prompt}"),
 80 |         ("human", "{all_doc_contents}"),
 81 |     ])   
 82 |     llm_chain = eval_prompt | llm | StrOutputParser()
 83 |     return llm_chain.stream({
 84 |         "basic_prompt": st.session_state.basic_prompt, 
 85 |         "all_doc_contents": all_doc_contents
 86 |     })
 87 | 
 88 | 
 89 | st.title("Solar Long Image")
 90 | st.markdown("""
 91 | This app processes long images by:
 92 | 1. Splitting them into overlapping sections
 93 | 2. Extracting text from each section
 94 | 3. Intelligently combining the text to remove duplicates
 95 | 4. Producing a single coherent document
 96 | 
 97 | Upload your image below to get started.
 98 | """)
 99 | 
100 | llm = Chat(model="solar-pro")
101 | 
102 | 
103 | uploaded_file = st.file_uploader("Choose your long image file", type=["png", "jpg", "jpeg", "tiff", "bmp", "webp"])
104 | 
105 | if uploaded_file and uploaded_file.name:
106 |     with tempfile.TemporaryDirectory() as temp_dir:
107 |         file_path = os.path.join(temp_dir, uploaded_file.name)
108 |         
109 |         with open(file_path, "wb") as f:
110 |             f.write(uploaded_file.getvalue())
111 | 
112 |             # Process image in steps with status indicators
113 |             with st.status("Splitting the image..."):
114 |                 split_img_paths = split_images(file_path, temp_dir)
115 |                 st.write(f"Split into {len(split_img_paths)} sections")
116 | 
117 |             # Process each section
118 |             all_doc_contents = []
119 |             for i, path in enumerate(split_img_paths):
120 |                 with st.status(f"Processing section {i+1}/{len(split_img_paths)}..."):
121 |                     st.image(path)
122 |                     doc_content = img_to_doc_content(path)
123 |                     st.write(doc_content)
124 |                     all_doc_contents.append(doc_content)
125 | 
126 |             # Combine all sections
127 |             with st.status("Combining sections...", expanded=True):
128 |                 st.session_state.all_doc_contents = all_doc_contents
129 |                 combined_doc_content = combine_doc_contents(all_doc_contents, llm)
130 |                 st.write_stream(combined_doc_content)          
131 | 
132 |             # Cleanup split images
133 |             for path in split_img_paths:
134 |                 os.remove(path)
135 | 
136 | 
137 | 


--------------------------------------------------------------------------------
/docv.py:
--------------------------------------------------------------------------------
  1 | # from https://docs.streamlit.io/develop/tutorials/llms/build-conversational-apps
  2 | 
  3 | import streamlit as st
  4 | from langchain_upstage import ChatUpstage as Chat
  5 | from langchain_upstage import UpstageLayoutAnalysisLoader
  6 | 
  7 | 
  8 | from langchain_core.output_parsers import StrOutputParser
  9 | from langchain_core.prompts import ChatPromptTemplate
 10 | from langchain_core.messages import AIMessage, HumanMessage
 11 | from streamlit_paste_button import paste_image_button as pbutton
 12 | 
 13 | import base64
 14 | import io
 15 | import tempfile
 16 | 
 17 | 
 18 | DOCV_MODEL_NAME = st.secrets["DOCV_MODEL_NAME"]
 19 | docv = Chat(model=DOCV_MODEL_NAME)
 20 | 
 21 | 
 22 | MODEL_NAME = st.secrets["MODEL_NAME"]
 23 | solar_pro = Chat(model=MODEL_NAME)
 24 | 
 25 | chat_with_history_prompt = ChatPromptTemplate.from_template(
 26 |     """
 27 | You are a helpful assistant.
 28 | Answer the following questions considering the history of the conversation. 
 29 | Think step by step and to explain your thought process as you answer. 
 30 | ----
 31 | Chat history: {chat_history}
 32 | ----
 33 | Image context in HTML from OCR: {image_context}
 34 | ----
 35 | User question: {user_query}
 36 | """
 37 | )
 38 | 
 39 | 
 40 | def get_img_context(img_bytes):
 41 |     image_context = ""
 42 |     if img_bytes:
 43 |         with tempfile.NamedTemporaryFile(delete=True) as f:
 44 |             f.write(img_bytes)
 45 |             image_path = f.name
 46 | 
 47 |             layzer = UpstageLayoutAnalysisLoader(image_path, split="page")
 48 |             # For improved memory efficiency, consider using the lazy_load method to load documents page by page.
 49 |             docs = layzer.load()  # or layzer.lazy_load()
 50 |             image_context = [doc.page_content for doc in docs]
 51 | 
 52 |     return image_context
 53 | 
 54 | 
 55 | def get_solar_pro_response(user_query, chat_history, image_context: str = None):
 56 |     chain = chat_with_history_prompt | solar_pro | StrOutputParser()
 57 | 
 58 |     return chain.stream(
 59 |         {
 60 |             "chat_history": chat_history,
 61 |             "image_context": image_context,
 62 |             "user_query": user_query,
 63 |         }
 64 |     )
 65 | 
 66 | 
 67 | def write_docv_response_stream(human_message):
 68 |     chain = docv | StrOutputParser()
 69 |     response = st.write_stream(
 70 |         chain.stream(st.session_state.messages + [human_message])
 71 |     )
 72 |     return response
 73 | 
 74 | 
 75 | def get_human_message(text_data, image_data=None):
 76 |     if not image_data:
 77 |         return HumanMessage(content=text_data)
 78 | 
 79 |     return HumanMessage(
 80 |         content=[
 81 |             {"type": "text", "text": f"{text_data}"},
 82 |             {
 83 |                 "type": "image_url",
 84 |                 "image_url": {"url": f"data:image/jpeg;base64,{image_data}"},
 85 |             },
 86 |         ],
 87 |     )
 88 | 
 89 | 
 90 | def get_human_message_img_url(text_data, image_url=None):
 91 |     if not image_url:
 92 |         return HumanMessage(content=text_data)
 93 | 
 94 |     return HumanMessage(
 95 |         content=[
 96 |             {"type": "text", "text": f"{text_data}"},
 97 |             {
 98 |                 "type": "image_url",
 99 |                 "image_url": {"url": f"{image_url}"},
100 |             },
101 |         ],
102 |     )
103 | 
104 | 
105 | if "messages" not in st.session_state:
106 |     st.session_state.messages = []
107 | 
108 | 
109 | for message in st.session_state.messages:
110 |     role = "AI" if isinstance(message, AIMessage) else "Human"
111 |     with st.chat_message(role):
112 |         if len(message.content) == 2:
113 |             st.markdown(message.content[0]["text"])
114 |         else:
115 |             st.markdown(message.content)
116 | 
117 | 
118 | img_file_buffer = st.file_uploader("Upload a image image", type=["png", "jpg", "jpeg"])
119 | img_bytes = None
120 | if img_file_buffer:
121 |     # reset history
122 |     st.session_state.messages = []
123 |     st.image(img_file_buffer)
124 |     img_bytes = img_file_buffer.read()
125 | 
126 | paste_result = pbutton("📋 Paste an image")
127 | if paste_result.image_data is not None:
128 |     # reset history
129 |     st.session_state.messages = []
130 |     st.write("Pasted image:")
131 |     st.image(paste_result.image_data)
132 |     img_bytes = io.BytesIO()
133 |     paste_result.image_data.save(img_bytes, format="PNG")
134 |     img_bytes = img_bytes.getvalue()  # Image as bytes
135 | 
136 | if prompt := st.chat_input("What is up?"):
137 |     human_message = get_human_message(prompt)
138 |     if img_bytes:
139 |         # remove the image from the buffer
140 |         for message in st.session_state.messages:
141 |             if isinstance(message, HumanMessage):
142 |                 if len(message.content) == 2:
143 |                     if message.content[1]["type"] == "image_url":
144 |                         st.session_state.messages.remove(message)
145 |                         break
146 | 
147 |         img_base64 = base64.b64encode(img_bytes).decode("utf-8")
148 |         human_message = get_human_message(prompt, img_base64)
149 |         img_file_buffer = None
150 | 
151 |     with st.chat_message("user"):
152 |         st.markdown(prompt)
153 | 
154 |     with st.chat_message("assistant"):
155 |         st.markdown("**Model1:**")
156 |         response = write_docv_response_stream(human_message)
157 | 
158 |         st.markdown("**Model2:**")
159 |         img_context = get_img_context(img_bytes)
160 |         st.json(img_context, expanded=False)
161 |         response2 = st.write_stream(
162 |             get_solar_pro_response(prompt, st.session_state.messages, img_context)
163 |         )
164 | 
165 |     st.session_state.messages.append(human_message)
166 |     st.session_state.messages.append(AIMessage(content=response))
167 | 


--------------------------------------------------------------------------------
/solar_util.py:
--------------------------------------------------------------------------------
  1 | import streamlit as st
  2 | from langchain_upstage import ChatUpstage 
  3 | 
  4 | from langchain_core.output_parsers import JsonOutputParser
  5 | from langchain_core.prompts import PromptTemplate
  6 | from pydantic import BaseModel, Field
  7 | 
  8 | from tokenizers import Tokenizer
  9 | 
 10 | 
 11 | solar_tokenizer = Tokenizer.from_pretrained("upstage/solar-pro-preview-tokenizer")
 12 | 
 13 | 
 14 | def truncate_to_token_limit(text: str, max_tokens: int = 15000) -> str:
 15 |     """
 16 |     Truncate text to fit within max token limit using Solar tokenizer.
 17 |     """
 18 |     tokenizer = Tokenizer.from_pretrained("upstage/solar-pro-tokenizer")
 19 |     encoded = tokenizer.encode(text)
 20 |     
 21 |     if len(encoded.ids) <= max_tokens:
 22 |         return text
 23 |     
 24 |     print(f"Truncating text from {len(encoded.ids)} tokens to {max_tokens} tokens")
 25 |     
 26 |     # Find the last period within the token limit to avoid cutting mid-sentence
 27 |     truncated_ids = encoded.ids[:max_tokens]
 28 |     truncated_text = tokenizer.decode(truncated_ids)
 29 |     
 30 |     # Try to find the last complete sentence
 31 |     last_period = truncated_text.rfind('.')
 32 |     if last_period > 0:
 33 |         truncated_text = truncated_text[:last_period + 1]
 34 |     
 35 |     return truncated_text
 36 | 
 37 | 
 38 | def initialize_solar_llm(MODEL_NAME=None):
 39 |     if MODEL_NAME is None:
 40 |         MODEL_NAME = st.secrets.get("SOLAR_MODEL_NAME", "solar-pro")
 41 | 
 42 |     # Initialize llm with default values
 43 |     llm_kwargs = {"model": MODEL_NAME}
 44 |     
 45 |     # Add base_url if it's set in secrets
 46 |     if "SOLAR_BASE_URL" in st.secrets:
 47 |         llm_kwargs["base_url"] = st.secrets["SOLAR_BASE_URL"]
 48 | 
 49 |     # Add api_key if it's set in secrets
 50 |     if "SOLAR_API_KEY" in st.secrets:
 51 |         llm_kwargs["api_key"] = st.secrets["SOLAR_API_KEY"]
 52 | 
 53 |     return ChatUpstage(**llm_kwargs)
 54 | 
 55 | 
 56 | # Define your desired data structure.
 57 | # {"original_prompt": "original prompt", "enhanced_prompt": "enhanced prompt", "techniques": "technique"}
 58 | # Define your desired data structure.
 59 | class PromptEngineering(BaseModel):
 60 |     original_prompt: str = Field(description="original prompt")
 61 |     enhanced_prompt: str = Field(
 62 |         description="enhanced prompt after applying prompt engineering techniques"
 63 |     )
 64 |     techniques: str = Field(
 65 |         description="prompt engineering technique used to enhance the prompt"
 66 |     )
 67 | 
 68 | 
 69 | parser = JsonOutputParser(pydantic_object=PromptEngineering)
 70 | 
 71 | prompt = """Use these prompt engineering technique and enhance user prompt to generate more effective prompt.
 72 | Consider the chat history for context if available. 
 73 | Please write the promt in Korean.
 74 | ----
 75 | Chat History:
 76 | {chat_history}
 77 | ----
 78 | Output should be in json format:
 79 | \n{format_instructions}
 80 | ----
 81 | prompt engineering techniques:
 82 | 
 83 | Chain of Thought (CoT): This technique encourages the model to think aloud, showing the steps it takes to reach a conclusion. Example: "Imagine you're a detective solving a mystery. Describe your thought process as you deduce who the culprit is in this scenario: [insert scenario]."
 84 | 
 85 | Chain of Cause (CoC): This technique focuses on identifying and explaining the causes and effects in a situation. Example: "You're a historian analyzing a historical event. Describe the chain of causes that led to this event: [insert event]."
 86 | 
 87 | Program-Aided Language Models (PAL): This technique involves providing a simple program or pseudo-code to guide the model's response. Example: "Write a Python function to calculate the factorial of a number. Then, use this function to find the factorial of 5."
 88 | 
 89 | Tree of Thoughts (ToT): This technique visualizes the thought process as a tree, with branches representing different ideas or possibilities. Example: "You're a marketing strategist brainstorming ideas for a new campaign. Present your ideas as a tree of thoughts, with the main idea at the root and branches representing sub-ideas."
 90 | 
 91 | Least-to-Most: This technique starts with the simplest or most basic explanation and gradually increases complexity. Example: "Explain the concept of machine learning, starting from the most basic definition and gradually adding more details and complexities."
 92 | 
 93 | Self-Consistency: This technique encourages the model to ensure its responses are consistent with previous statements or information. Example: "You're a character in a story. Ensure all your responses are consistent with the character's background and previous statements."
 94 | ----
 95 | originalprompt: {original_prompt}
 96 | ----
 97 | 
 98 | """
 99 | 
100 | prompt = PromptTemplate(
101 |     template=prompt,
102 |     input_variables=["original_prompt", "chat_history"],
103 |     partial_variables={"format_instructions": parser.get_format_instructions()},
104 | )
105 | 
106 | 
107 | 
108 | def prompt_engineering(original_prompt, chat_history=None, llm=None):
109 |     if llm is None:
110 |         llm = initialize_solar_llm()
111 |     chain = prompt | llm | parser
112 | 
113 |     # Invoke the chain with the joke_query.
114 |     parsed_output = chain.invoke(
115 |         {"original_prompt": original_prompt, "chat_history": chat_history}
116 |     )
117 | 
118 |     return parsed_output
119 | 
120 | 
121 | def result_reference_summary(results):
122 |     results.reverse()
123 |     result_summary = ""
124 |     for i, r in enumerate(results):
125 |         result_summary += f"[{i+1}] {r['title']} - URL: {r['url']}\n{r['content']}\n\n"
126 | 
127 |     return result_summary
128 | 
129 | 
130 | def num_of_tokens(text):
131 |     return len(solar_tokenizer.encode(text).ids)
132 | 
133 | 
134 | if __name__ == "__main__":
135 |     print(num_of_tokens("Hello, world!"))
136 | 


--------------------------------------------------------------------------------
/hw.py:
--------------------------------------------------------------------------------
  1 | # from https://docs.streamlit.io/develop/tutorials/llms/build-conversational-apps
  2 | 
  3 | import streamlit as st
  4 | from langchain_upstage import (
  5 |     UpstageLayoutAnalysisLoader,
  6 |     UpstageGroundednessCheck,
  7 |     ChatUpstage,
  8 | )
  9 | from langchain_core.output_parsers import StrOutputParser
 10 | from langchain.prompts import ChatPromptTemplate
 11 | 
 12 | from openpyxl import Workbook
 13 | 
 14 | import io
 15 | import os
 16 | import re
 17 | import tempfile
 18 | import unicodedata
 19 | 
 20 | 
 21 | if "processed_files" not in st.session_state:
 22 |     st.session_state.processed_files = set()
 23 | if "students_data" not in st.session_state:
 24 |     st.session_state.students_data = []
 25 | 
 26 | 
 27 | st.title("Solar HW Grader")
 28 | st.write(
 29 |     "This is Solar SNU HW grader demo. Get your KEY at https://console.upstage.ai/"
 30 | )
 31 | 
 32 | llm = ChatUpstage(model="solar-pro")
 33 | 
 34 | hw_prompt = ChatPromptTemplate.from_messages(
 35 |     [
 36 |         (
 37 |             "system",
 38 |             """You are Prof. Solar, very nice and smart, loved by many people. 
 39 |             """,
 40 |         ),
 41 |         (
 42 |             "human",
 43 |             """For given report, please provide score 1-5 and quick summary of the report and explain your score and provide advice. Format your response as follows:
 44 |          Score: [score]
 45 |          Summary: [summary]
 46 |          Explanation: [explanation]
 47 |          Advice: [advice]
 48 |          ---
 49 |          Student report: {student_report},
 50 |          """,
 51 |         ),
 52 |     ]
 53 | )
 54 | 
 55 | groundedness_check = UpstageGroundednessCheck()
 56 | 
 57 | 
 58 | def get_response(retrieved_docs):
 59 |     chain = hw_prompt | llm | StrOutputParser()
 60 | 
 61 |     return chain.stream(
 62 |         {
 63 |             "student_report": retrieved_docs,
 64 |         }
 65 |     )
 66 | 
 67 | 
 68 | def create_excel_grade(students_data):
 69 |     wb = Workbook()
 70 |     ws = wb.active
 71 |     ws.title = "Grades"
 72 | 
 73 |     ws["A1"] = "File Name"
 74 |     ws["B1"] = "Score"
 75 |     ws["C1"] = "Feedback"
 76 | 
 77 |     for row, (name, score, feedback) in enumerate(students_data, start=2):
 78 |         # Normalize the Korean name to composed form
 79 |         normalized_name = unicodedata.normalize("NFC", name)
 80 |         ws[f"A{row}"] = normalized_name
 81 |         ws[f"B{row}"] = score
 82 |         ws[f"C{row}"] = feedback
 83 | 
 84 |     return wb
 85 | 
 86 | 
 87 | def process_pdf_file(file_path):
 88 |     with st.status(f"Document Parsing {file_path}..."):
 89 |         layzer = UpstageLayoutAnalysisLoader(file_path, split="page")
 90 |         # For improved memory efficiency, consider using the lazy_load method to load documents page by page.
 91 |         docs = layzer.load()  # or layzer.lazy_load()
 92 | 
 93 |     with st.chat_message("user"):
 94 |         st.markdown(f"Grading {file_path}")
 95 | 
 96 |     file_name = os.path.basename(file_path)
 97 |     student_name_match = re.search(r"^(.*?)(?=\d)", file_name, re.UNICODE)
 98 |     student_name = (
 99 |         student_name_match.group(1).strip() if student_name_match else "Unknown"
100 |     )
101 | 
102 |     with st.chat_message("assistant"):
103 |         full_response = ""
104 |         response_placeholder = st.empty()
105 |         for chunk in get_response(docs):
106 |             full_response += chunk
107 |             response_placeholder.markdown(full_response)
108 | 
109 |         score_match = re.search(r"Score: (\d+)", full_response)
110 |         score = score_match.group(1) if score_match else "N/A"
111 | 
112 |         return student_name, score, full_response
113 | 
114 | 
115 | uploaded_files = st.file_uploader(
116 |     "Choose your `.pdf` file", type=["pdf"], accept_multiple_files=True
117 | )
118 | 
119 | if type(uploaded_files) is not list:
120 |     uploaded_files = [uploaded_files]
121 | 
122 | MAX_RETRIES = 4
123 | for uploaded_file in uploaded_files:
124 |     if uploaded_file and uploaded_file.name not in st.session_state.processed_files:
125 |         with tempfile.TemporaryDirectory() as temp_dir:
126 |             file_path = os.path.join(temp_dir, uploaded_file.name)
127 | 
128 |             with open(file_path, "wb") as f:
129 |                 f.write(uploaded_file.getvalue())
130 | 
131 |             if uploaded_file.name.endswith(".pdf"):
132 |                 retries = 0
133 |                 while True:
134 |                     try:
135 |                         student_name, score, feedback = process_pdf_file(file_path)
136 |                         st.session_state.students_data.append(
137 |                             (student_name, score, feedback)
138 |                         )
139 |                         st.session_state.processed_files.add(uploaded_file.name)
140 |                         break
141 |                     except Exception as e:
142 |                         retries += 1
143 |                         if retries > MAX_RETRIES:
144 |                             st.error(f"Failed to process {uploaded_file.name}: {e}")
145 |                             break
146 | 
147 | 
148 | if st.session_state.students_data:
149 |     wb = create_excel_grade(st.session_state.students_data)
150 | 
151 |     excel_buffer = io.BytesIO()
152 |     wb.save(excel_buffer)
153 |     excel_buffer.seek(0)
154 | 
155 |     st.download_button(
156 |         label="Download Excel Grades",
157 |         data=excel_buffer,
158 |         file_name="grades.xlsx",
159 |         mime="application/vnd.openxmlformats-officedocument.spreadsheetml.sheet",
160 |     )
161 | 
162 | if st.session_state.processed_files:
163 |     st.write("Processed files:")
164 |     for file in st.session_state.processed_files:
165 |         st.text(file)
166 | 
167 | if st.session_state.students_data:
168 |     st.write("Current Grades:")
169 |     for name, score, _ in st.session_state.students_data:
170 |         st.text(f"{name}: {score}")
171 | 
172 | # Add a button to clear the session state
173 | if st.button("Clear All Data"):
174 |     st.session_state.processed_files.clear()
175 |     st.session_state.students_data.clear()
176 | 


--------------------------------------------------------------------------------
/fc.py:
--------------------------------------------------------------------------------
  1 | import streamlit as st
  2 | import os
  3 | from typing import Dict, List, Any
  4 | import json
  5 | from langchain_upstage import ChatUpstage 
  6 | from langchain_core.output_parsers import StrOutputParser
  7 | from datetime import datetime
  8 | 
  9 | def get_fc(claim: str):
 10 |     """Process a claim using the fact-checking model and return results."""
 11 |     # Initialize the model
 12 |     fc = ChatUpstage(
 13 |         model="solar-google-fc",
 14 |         api_key=st.secrets["UPSTAGE_API_KEY"],
 15 |         base_url="https://fc.toy.x.upstage.ai/",
 16 |         model_kwargs={"stream": True},
 17 |     )
 18 | 
 19 |     results = []
 20 |     chain = fc | StrOutputParser()
 21 |     claim_count = 0
 22 |     
 23 |     # Create placeholder for claims list
 24 |     claims_placeholder = st.empty()
 25 |     results_container = st.container()
 26 |     
 27 |     # Stream and accumulate responses
 28 |     for idx, chunk in enumerate(chain.stream(claim)):
 29 |         if not chunk:
 30 |             continue
 31 |             
 32 |         try:
 33 |             json_chunk = json.loads(chunk)
 34 |             results.append(json_chunk)
 35 |             
 36 |             # Handle claims list
 37 |             if 'claims' in json_chunk:
 38 |                 claim_count = len(json_chunk['claims'])
 39 |                 claims_placeholder.markdown("### Claims to be verified:")
 40 |                 claims_text = "\n".join([f"{i+1}. {c}" for i, c in enumerate(json_chunk['claims'])])
 41 |                 claims_placeholder.markdown(claims_text)
 42 |             
 43 |             # Handle verdict display
 44 |             if 'verdict' in json_chunk:
 45 |                 with results_container:
 46 |                     verdict_class = get_verdict_class(json_chunk.get('verdict', ''))
 47 |                     
 48 |                     st.markdown(f"""
 49 |                         <div class='claim-container {verdict_class}'>
 50 |                             <div style='display: flex; justify-content: space-between; align-items: center;'>
 51 |                                 <h4 style='margin: 0;'>[{idx}/{claim_count}] {json_chunk.get('claim', '')}</h4>
 52 |                                 <h4 style='margin: 0; margin-left: 1rem;'>{display_verdict(json_chunk.get('verdict', ''))}</h4>
 53 |                             </div>
 54 |                             <p><strong>Analysis:</strong> {json_chunk.get('explanation', '')}</p>
 55 |                         </div>
 56 |                     """, unsafe_allow_html=True)
 57 |                     
 58 |                     if json_chunk.get('sources'):
 59 |                         display_sources(json_chunk['sources'])
 60 |         except json.JSONDecodeError:
 61 |             st.error(f"Error parsing JSON from chunk: {chunk}")
 62 |             continue
 63 |             
 64 |     return results
 65 | 
 66 | def get_verdict_class(verdict: str) -> str:
 67 |     """Return the CSS class based on verdict."""
 68 |     verdict_map = {
 69 |         "TRUE": "claim-true",
 70 |         "FALSE": "claim-false"
 71 |     }
 72 |     return verdict_map.get(verdict, "claim-uncertain")
 73 | 
 74 | def display_verdict(verdict: str) -> str:
 75 |     """Return formatted verdict text."""
 76 |     verdict_map = {
 77 |         "TRUE": "✅ VERIFIED",
 78 |         "FALSE": "❌ FALSE"
 79 |     }
 80 |     return verdict_map.get(verdict, "⚠️ UNCERTAIN")
 81 | 
 82 | def display_sources(sources: List[Dict[str, str]]) -> None:
 83 |     """Display sources in an expander."""
 84 |     with st.expander("📚 View Sources", expanded=False):
 85 |         for source in sources:
 86 |             st.markdown(f"""
 87 |                 <div style='background-color: #f0f2f6; padding: 1rem; border-radius: 0.5rem; margin-bottom: 1rem;'>
 88 |                     <h4><a href="{source.get('url', '#')}" target="_blank">{source.get('title', 'Source')}</a></h4>
 89 |                     <blockquote style='border-left: 3px solid #1f77b4; margin: 1rem 0; padding-left: 1rem;'>
 90 |                         {source.get('snippet', 'No snippet available')}
 91 |                     </blockquote>
 92 |                 </div>
 93 |             """, unsafe_allow_html=True)
 94 | 
 95 | def main():
 96 |     """Main function to run the Streamlit app"""
 97 |     st.set_page_config(
 98 |         page_title="Fact Checker",
 99 |         layout="wide",
100 |         initial_sidebar_state="collapsed"
101 |     )
102 | 
103 |     # Custom CSS
104 |     st.markdown("""
105 |         <style>
106 |         .main {
107 |             padding: 2rem;
108 |         }
109 |         .claim-container {
110 |             padding: 1.5rem;
111 |             border-radius: 0.5rem;
112 |             margin: 1rem 0;
113 |         }
114 |         .claim-true {
115 |             background-color: rgba(0, 255, 0, 0.1);
116 |             border: 1px solid rgba(0, 255, 0, 0.2);
117 |         }
118 |         .claim-false {
119 |             background-color: rgba(255, 0, 0, 0.1);
120 |             border: 1px solid rgba(255, 0, 0, 0.2);
121 |         }
122 |         .claim-uncertain {
123 |             background-color: rgba(255, 165, 0, 0.1);
124 |             border: 1px solid rgba(255, 165, 0, 0.2);
125 |         }
126 |         .stButton>button {
127 |             width: 100%;
128 |         }
129 |         </style>
130 |     """, unsafe_allow_html=True)
131 | 
132 |     st.title("✓ Solar-Google Fact Checker")
133 |     st.markdown("""
134 |         <p style='font-size: 1.2em; color: #666;'>
135 |             Enter a statement to verify its accuracy. Our AI-powered system will analyze and fact-check each claim.
136 |         </p>
137 |     """, unsafe_allow_html=True)
138 |     
139 |     with st.container():
140 |         # Powered by Upstage AI
141 |         st.code("""
142 | # Powered by Upstage AI
143 | from langchain_upstage import ChatUpstage 
144 | fc = ChatUpstage(
145 |     model="solar-google-fc",
146 |     api_key=st.secrets["UPSTAGE_API_KEY"], # Get your API key from https://console.upstage.ai/
147 |     base_url="https://fc.toy.x.upstage.ai/",
148 | )
149 |                 
150 | result = fc.invoke(claim)
151 | """, language="python")
152 | 
153 |     st.warning(
154 |         "**Disclaimer**: This is an experimental tool and results may not be 100% accurate. "
155 |         "Please verify the information independently and use the provided sources to draw your own conclusions.",
156 |         icon="⚠️"
157 |     )
158 |     
159 |     # Input area with a check button
160 |     col1, col2 = st.columns([4, 1])
161 |     with col1:
162 |         claim = st.text_area(
163 |             "Enter your statement:",
164 |             value="Upstage AI is founded in 2022 and it's CEO is Sung Kim and CTO is Elon Musk",
165 |             key="claim_input",
166 |             placeholder="Enter a statement to fact-check...",
167 |             height=100
168 |         )
169 |     with col2:
170 |         check_button = st.button("🔍 Verify Facts", type="primary", use_container_width=True)
171 | 
172 |     if check_button and claim:
173 |         with st.spinner("🔄 Analyzing statement... Please allow a few moments while we search and verify the information"):
174 |             try:
175 |                 result = get_fc(claim)
176 |                 
177 |                 # Let's show it's done
178 |                 st.success("🔍 Analysis complete!")
179 |                 # Show raw JSON in a collapsible section
180 |                 with st.expander("🔍 View Raw Response", expanded=False):
181 |                     st.json(result)
182 |             except Exception as e:
183 |                 st.error(f"An error occurred: {str(e)}")
184 | 
185 | if __name__ == "__main__":
186 |     main()


--------------------------------------------------------------------------------
/chatpdfemb.py:
--------------------------------------------------------------------------------
  1 | # from https://docs.streamlit.io/develop/tutorials/llms/build-conversational-apps
  2 | 
  3 | import streamlit as st
  4 | from langchain_upstage import (
  5 |     UpstageLayoutAnalysisLoader,
  6 |     UpstageGroundednessCheck,
  7 |     ChatUpstage,
  8 |     UpstageEmbeddings,
  9 | )
 10 | from langchain_core.output_parsers import StrOutputParser
 11 | from langchain_core.messages import AIMessage, HumanMessage
 12 | from langchain.text_splitter import RecursiveCharacterTextSplitter
 13 | from langchain_community.vectorstores import FAISS
 14 | from langchain.prompts import ChatPromptTemplate
 15 | from langchain.load import dumps, loads
 16 | 
 17 | import tempfile, os
 18 | 
 19 | from langchain import hub
 20 | 
 21 | from solar_util import initialize_solar_llm
 22 | 
 23 | st.title("LangChain Upstage Solar ChatDoc")
 24 | st.write(
 25 |     "This is a conversational AI that can chat with you about your documents! Get your KEY at https://console.upstage.ai/"
 26 | )
 27 | 
 28 | llm = initialize_solar_llm()
 29 | # https://smith.langchain.com/hub/hunkim/rag-qa-with-history
 30 | chat_with_history_prompt = hub.pull("hunkim/rag-qa-with-history")
 31 | 
 32 | groundedness_check = UpstageGroundednessCheck()
 33 | 
 34 | 
 35 | def get_response(user_query, chat_history, retrieved_docs):
 36 |     chain = chat_with_history_prompt | llm | StrOutputParser()
 37 | 
 38 |     return chain.stream(
 39 |         {
 40 |             "chat_history": chat_history,
 41 |             "context": retrieved_docs,
 42 |             "question": user_query,
 43 |         }
 44 |     )
 45 | 
 46 | 
 47 | def query_expander(query):
 48 |     # Multi Query: Different Perspectives
 49 |     multi_query_template = """You are an AI language model assistant. Your task is to generate five 
 50 |     different versions of the given user question to retrieve relevant documents from a vector 
 51 |     database. By generating multiple perspectives on the user question, your goal is to help
 52 |     the user overcome some of the limitations of the distance-based similarity search. 
 53 |     Provide these alternative questions separated by newlines. Original question: {query}"""
 54 | 
 55 |     # RAG-Fusion: Related
 56 |     rag_fusion_template = """You are a helpful assistant that generates multiple search queries based on a single input query. \n
 57 |     Generate multiple search queries related to: {query} \n
 58 |     Output (3 queries):"""
 59 | 
 60 |     # Decomposition
 61 |     decomposition_template = """You are a helpful assistant that generates multiple sub-questions related to an input question. \n
 62 |     The goal is to break down the input into a set of sub-problems / sub-questions that can be answers in isolation. \n
 63 |     Generate multiple search queries related to: {query} \n
 64 |     Output (3 queries):"""
 65 | 
 66 |     query_expander_templates = [
 67 |         multi_query_template,
 68 |         rag_fusion_template,
 69 |         decomposition_template,
 70 |     ]
 71 | 
 72 |     expanded_queries = []
 73 |     for template in query_expander_templates:
 74 |         prompt_perspectives = ChatPromptTemplate.from_template(template)
 75 | 
 76 |         generate_queries = (
 77 |             prompt_perspectives
 78 |             | ChatUpstage(temperature=0)
 79 |             | StrOutputParser()
 80 |             | (lambda x: x.split("\n"))
 81 |         )
 82 |         expanded_queries += generate_queries.invoke({"query": query})
 83 | 
 84 |     return expanded_queries
 85 | 
 86 | 
 87 | def get_unique_union(documents: list[list]):
 88 |     """Unique union of retrieved docs"""
 89 |     # Flatten list of lists, and convert each Document to string
 90 |     flattened_docs = [dumps(doc) for sublist in documents for doc in sublist]
 91 |     # Get unique documents
 92 |     unique_docs = list(set(flattened_docs))
 93 |     # Return
 94 |     return [loads(doc) for doc in unique_docs]
 95 | 
 96 | 
 97 | def retrieve_multiple_queries(retriever, queries):
 98 |     all_docs = []
 99 |     for query in queries:
100 |         st.write(f"Retrieving for query: {query}")
101 |         docs = retriever.invoke(query)
102 |         all_docs.append(docs)
103 | 
104 |     unique_docs = get_unique_union(all_docs)
105 |     return unique_docs
106 | 
107 | 
108 | if "messages" not in st.session_state:
109 |     st.session_state.messages = []
110 | 
111 | if "retriever" not in st.session_state:
112 |     st.session_state.retriever = None
113 | 
114 | with st.sidebar:
115 |     st.header(f"Add your PDF!")
116 | 
117 |     uploaded_file = st.file_uploader("Choose your `.pdf` file", type="pdf")
118 | 
119 |     if uploaded_file and not uploaded_file.name in st.session_state:
120 |         with tempfile.TemporaryDirectory() as temp_dir:
121 |             file_path = os.path.join(temp_dir, uploaded_file.name)
122 | 
123 |             with open(file_path, "wb") as f:
124 |                 f.write(uploaded_file.getvalue())
125 | 
126 |             with st.status("Document Parsing ..."):
127 |                 layzer = UpstageLayoutAnalysisLoader(file_path, split="page")
128 |                 # For improved memory efficiency, consider using the lazy_load method to load documents page by page.
129 |                 docs = layzer.load()  # or layzer.lazy_load()
130 | 
131 |                 # Split
132 |                 text_splitter = RecursiveCharacterTextSplitter(
133 |                     chunk_size=500, chunk_overlap=100
134 |                 )
135 |                 splits = text_splitter.split_documents(docs)
136 | 
137 |                 st.write(f"Number of splits: {len(splits)}")
138 | 
139 |             with st.status(f"Vectorizing {len(splits)} splits ..."):
140 |                 # Embed
141 |                 vectorstore = FAISS.from_documents(
142 |                     documents=splits, embedding=UpstageEmbeddings(model="solar-embedding-1-large")
143 |                 )
144 | 
145 |                 st.write("Vectorizing the document done!")
146 | 
147 |                 st.session_state.retriever = vectorstore.as_retriever(k=10)
148 | 
149 |                 # processed
150 |                 st.session_state[uploaded_file.name] = True
151 | 
152 |         st.success("Ready to Chat!")
153 | 
154 | 
155 | for message in st.session_state.messages:
156 |     role = "AI" if isinstance(message, AIMessage) else "Human"
157 |     with st.chat_message(role):
158 |         # if message.response_metadata.get("context"):
159 |         #    with st.status("Got Context"):
160 |         #        st.write(message.response_metadata.get("context"))
161 |         st.markdown(message.content)
162 | 
163 | if prompt := st.chat_input("What is up?", disabled=not st.session_state.retriever):
164 |     st.session_state.messages.append(
165 |         HumanMessage(
166 |             content=prompt,
167 |         )
168 |     )
169 |     with st.chat_message("user"):
170 |         st.markdown(prompt)
171 | 
172 |     with st.chat_message("assistant"):
173 |         with st.status("Expending queries..."):
174 |             expended_queries = query_expander(prompt)
175 |             st.write(expended_queries)
176 |         with st.status("Getting context..."):
177 |             st.write("Retrieving...")
178 |             retrieved_docs = retrieve_multiple_queries(
179 |                 st.session_state.retriever, expended_queries
180 |             )
181 |             # retrieved_docs = st.session_state.retriever.invoke(prompt)
182 |             st.write(retrieved_docs)
183 | 
184 |         response = st.write_stream(
185 |             get_response(prompt, st.session_state.messages, retrieved_docs)
186 |         )
187 |         gc_result = groundedness_check.run(
188 |             {
189 |                 "context": f"Context:{retrieved_docs}\n\nQuestion{prompt}",
190 |                 "answer": response,
191 |             }
192 |         )
193 | 
194 |         if gc_result == "grounded":
195 |             gc_mark = "✅"
196 |             st.success("✅ Groundedness check passed!")
197 |         else:
198 |             gc_mark = "❌"
199 |             st.error("❌ Groundedness check failed!")
200 | 
201 |     st.session_state.messages.append(
202 |         AIMessage(content=f"{gc_mark} {response}"),
203 |     )
204 | 


--------------------------------------------------------------------------------
/chatsearch.py:
--------------------------------------------------------------------------------
  1 | # from https://docs.streamlit.io/develop/tutorials/llms/build-conversational-apps
  2 | 
  3 | import streamlit as st
  4 | 
  5 | from pydantic import BaseModel, Field
  6 | 
  7 | from langchain_upstage import ChatUpstage as Chat
  8 | from solar_util import initialize_solar_llm
  9 | 
 10 | from langchain_community.document_loaders import BraveSearchLoader
 11 | 
 12 | 
 13 | from langchain_core.output_parsers import StrOutputParser
 14 | from langchain_core.prompts import (
 15 |     ChatPromptTemplate,
 16 |     MessagesPlaceholder,
 17 |     PromptTemplate,
 18 | )
 19 | from langchain_core.output_parsers import JsonOutputParser
 20 | from langchain_core.messages import AIMessage, HumanMessage
 21 | 
 22 | MAX_TOKENS = 4000
 23 | MAX_SEAERCH_RESULTS = 5
 24 | 
 25 | 
 26 | llm = initialize_solar_llm()
 27 | st.set_page_config(page_title="Search and Chat", page_icon="🔍")
 28 | st.title("SolarLLM Search")
 29 | 
 30 | short_answer_prompt = ChatPromptTemplate.from_messages(
 31 |     [
 32 |         (
 33 |             "system",
 34 |             """You are Solar, a smart search engine by Upstage, loved by many people. 
 35 |             
 36 |             Write one word answer if you can say "yes", "no", or direct answer. 
 37 |             Otherwise just one or two sentense short answer for the query from the given conetxt.
 38 |             Try to understand the user's intention and provide a quick answer.
 39 |             If the answer is not in context, please say you don't know and ask to clarify the question.
 40 | 
 41 |             If the user's query is in a specific language (e.g., Korean, Japanese, Chinese), 
 42 |             respond in the same language. Match the language of your response to the user's input language.
 43 |             """,
 44 |         ),
 45 |         MessagesPlaceholder("chat_history"),
 46 |         (
 47 |             "human",
 48 |             """Query: {user_query} 
 49 |          ----
 50 |          Context: {context}""",
 51 |         ),
 52 |     ]
 53 | )
 54 | 
 55 | search_prompt = ChatPromptTemplate.from_messages(
 56 |     [
 57 |         (
 58 |             "system",
 59 |             """You are Solar, a smart search engine by Upstage, loved by many people. 
 60 |             
 61 |             See the origial query, context, and quick answer, and then provide detailed explanation.
 62 | 
 63 |             Try to understand the user's intention and provide the relevant information in detail.
 64 |             If the answer is not in context, please say you don't know and ask to clarify the question.
 65 |             Do not repeat the short answer.
 66 | 
 67 |             If the user's query is in a specific language (e.g., Korean, Japanese, Chinese), 
 68 |             respond in the same language. Match the language of your response to the user's input language.
 69 | 
 70 |             CRITICAL - CITATION REQUIREMENTS:
 71 |             You MUST cite EVERY piece of information using [X] notation. No statement should be made without a citation.
 72 |             
 73 |             IMPORTANT: Citation and Reference Rules:
 74 |             1. EVERY sentence must end with a citation [X]
 75 |             2. Multiple citations in one sentence should be listed like [1,2,3]
 76 |             3. Always include a "References:" section at the end
 77 |             4. List all references in order
 78 |             5. Each reference must include both title and URL
 79 | 
 80 |             ✅ CORRECT Example:
 81 |             "Palo Alto requires residential parking permits in downtown areas [1]. The annual permit fee is $50 for residents [2], 
 82 |             and applications can be submitted online or in person at City Hall [2,3]."
 83 | 
 84 |             Another example in Korean:
 85 |             "서울의 인구는 약 970만 명입니다 [1]. 최근 대중교통 이용률이 증가하고 있으며 [2], 
 86 |             특히 지하철 이용객이 20% 증가했습니다 [3]."
 87 | 
 88 |             References:
 89 |             [1] 서울시 인구통계 2023 - https://seoul.go.kr/statistics
 90 |             [2] 서울 교통현황 보고서 - https://seoul.go.kr/transport
 91 |             [3] 대중교통 이용분석 - https://seoul.go.kr/metro
 92 | 
 93 |             If you cannot find a specific reference in the context, indicate this clearly 
 94 |             with "[Source not found in context]" but still try to provide the information.
 95 |             """,
 96 |         ),
 97 |         MessagesPlaceholder("chat_history"),
 98 |         (
 99 |             "human",
100 |             """Query: {user_query} 
101 |          ----
102 |          Short answer: {short_answer}
103 |          ----
104 |          Context: {context}""",
105 |         ),
106 |     ]
107 | )
108 | 
109 | 
110 | query_context_expansion_prompt = """
111 | You are a search query expansion expert. For a given query, generate related search queries that will help find comprehensive information.
112 | 
113 | IMPORTANT RULES:
114 | 1. Match the language of the expanded queries to the original query's language
115 | 2. Generate 2-3 alternative phrasings or related aspects of the query
116 | 3. Keep queries concise and search-engine friendly
117 | 4. Focus on different aspects or synonyms of the original query
118 | 5. If the query is in a non-English language (e.g., Korean, Japanese, Chinese), all expanded queries should be in that same language
119 | 
120 | Examples:
121 | 
122 | English query: "how to get parking permit in boston"
123 | ["boston residential parking permit application", "boston parking permit cost", "how to apply for boston street parking permit"]
124 | 
125 | Korean query: "서울 주차 등록하는 방법"
126 | ["서울시 주차등록증 신청", "서울 거주자 주차등록 절차", "서울시 주차허가증 발급"]
127 | 
128 | Japanese query: "東京都 運転免許 更新"
129 | ["東京都 運転免許更新手続き", "運転免許センター 更新方法", "東京 免許更新 必要書類"]
130 | 
131 | Thai query: "วิธีการขอรับหน้าที่จอดรถในกทม"
132 | ["วิธีการขอรับหน้าที่จอดรถในกทม", "วิธีการขอรับหน้าที่จอดรถในกทม", "วิธีการขอรับหน้าที่จอดรถในกทม"]
133 | 
134 | Please write in Python LIST format.
135 | 
136 | ---
137 | Context: {context}
138 | ----
139 | History: {chat_history}
140 | ---
141 | Original query: {query}
142 | """
143 | 
144 | 
145 | # Define your desired data structure.
146 | class List(BaseModel):
147 |     list[str]
148 | 
149 | 
150 | def query_context_expansion(query, chat_history, context=None):
151 |     # Set up a parser + inject instructions into the prompt template.
152 |     parser = JsonOutputParser(pydantic_object=List)
153 | 
154 |     prompt = PromptTemplate(
155 |         template=query_context_expansion_prompt,
156 |         input_variables=["query", "context"],
157 |     )
158 | 
159 |     chain = prompt | llm | parser
160 |     # Invoke the chain with the joke_query.
161 | 
162 |     for attempt in range(3):
163 |         try:
164 |             parsed_output = chain.invoke(
165 |                 {"query": query, "chat_history": chat_history, "context": context}
166 |             )
167 |             return parsed_output
168 |         except Exception as e:
169 |             st.warning(f"Attempt {attempt + 1} failed. Retrying...")
170 | 
171 |     st.error("All attempts failed. Returning empty list.")
172 |     return []
173 | 
174 | 
175 | def get_short_search(user_query, context, chat_history):
176 |     chain = short_answer_prompt | llm | StrOutputParser()
177 | 
178 |     return chain.stream(
179 |         {
180 |             "context": context,
181 |             "chat_history": chat_history,
182 |             "user_query": user_query,
183 |         }
184 |     )
185 | 
186 | 
187 | def get_search_desc(user_query, short_answer, context, chat_history):
188 |     chain = search_prompt | llm | StrOutputParser()
189 | 
190 |     return chain.stream(
191 |         {
192 |             "context": context,
193 |             "chat_history": chat_history,
194 |             "user_query": user_query,
195 |             "short_answer": short_answer,
196 |         }
197 |     )
198 | 
199 | 
200 | def search(query, chat_history, context=None):
201 |     with st.status("Extending query with context to related questions..."):
202 |         q_list = query_context_expansion(query, chat_history, context)
203 |         st.write(q_list)
204 | 
205 |     if not q_list:
206 |         return []
207 | 
208 |     # combine all queries with "OR" operator
209 |     or_merged_search_query = " OR ".join(q_list)
210 |     with st.spinner(f"Searching for '{or_merged_search_query}'..."):
211 |         loader = BraveSearchLoader(
212 |             api_key=st.secrets["BRAVE_API_KEY"],
213 |             query=or_merged_search_query, search_kwargs={"count": 3}
214 |         )
215 |         return loader.load()
216 |  
217 | 
218 | 
219 | if "messages" not in st.session_state:
220 |     st.session_state.messages = []
221 | 
222 | for message in st.session_state.messages:
223 |     role = "AI" if isinstance(message, AIMessage) else "Human"
224 |     with st.chat_message(role):
225 |         st.markdown(message.content)
226 | 
227 | q = "How to use residence parking permit in palo alto?"
228 | 
229 | if prompt := st.chat_input(q):
230 |     st.session_state.messages.append(HumanMessage(content=prompt))
231 |     with st.chat_message("user"):
232 |         st.markdown(prompt)
233 | 
234 |     r1 = search(prompt, st.session_state.messages)
235 |     result1_summary = str(r1)
236 | 
237 |     r2 = search(prompt, st.session_state.messages, result1_summary[:MAX_TOKENS])
238 | 
239 |     context = str(r1 + r2)
240 |     context = context[:MAX_TOKENS]
241 | 
242 |     with st.status("Search Results:"):
243 |         st.write(context)
244 | 
245 |     with st.chat_message("assistant"):
246 |         short_answer = st.write_stream(
247 |             get_short_search(prompt, context, st.session_state.messages)
248 |         )
249 |         desc = st.write_stream(
250 |             get_search_desc(prompt, short_answer, context, st.session_state.messages)
251 |         )
252 |     st.session_state.messages.append(AIMessage(content=short_answer + desc))
253 | 


--------------------------------------------------------------------------------
/data/target_companies.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "target_companies": [
 3 |       {
 4 |         "company_name": "GreenTech Solutions",
 5 |         "main_business": "Industry leader in sustainable building technologies and smart energy management systems. Their flagship product, EcoSmart Building Suite, integrates solar-powered building materials with IoT sensors to reduce energy consumption by up to 40%. Key offerings include:\n\n1. Solar-integrated roofing tiles with 25-year warranty and 98% efficiency rating\n2. Smart window systems that automatically adjust tint based on sunlight exposure\n3. AI-powered HVAC optimization that learns from occupancy patterns\n4. Real-time energy monitoring dashboard for facility managers\n5. Blockchain-based energy trading platform for excess solar power\n\nServing over 500 commercial buildings across North America, with particular expertise in retrofitting historical buildings with sustainable technologies while preserving architectural integrity.",
 6 |         "contact_email": "partnerships@greentechsolutions.com"
 7 |       },
 8 |       {
 9 |         "company_name": "DataFlow Analytics",
10 |         "main_business": "Enterprise-scale data analytics provider specializing in manufacturing and industrial IoT solutions. Their platform processes over 1 petabyte of sensor data daily, serving Fortune 500 manufacturers. Core capabilities include:\n\n1. Real-time anomaly detection using advanced ML algorithms\n2. Predictive maintenance systems with 95% accuracy in failure prediction\n3. Custom-built neural networks for quality control in production lines\n4. Digital twin technology for equipment simulation and optimization\n5. Automated reporting and KPI tracking with natural language insights\n\nTheir solutions have helped clients achieve:\n- 30% reduction in unexpected downtime\n- 25% decrease in maintenance costs\n- 15% improvement in overall equipment effectiveness (OEE)\n\nCompliant with ISO 27001 and SOC 2 Type II standards.",
11 |         "contact_email": "business@dataflowanalytics.com"
12 |       },
13 |       {
14 |         "company_name": "HealthBridge Systems",
15 |         "main_business": "Comprehensive healthcare technology provider transforming patient care through innovative digital solutions. Their integrated platform handles over 10 million patient interactions annually. Key systems include:\n\n1. AI-Powered Diagnostic Support:\n   - Machine learning algorithms trained on 50+ million anonymized medical records\n   - Real-time analysis of medical imaging with 99.7% accuracy\n   - Drug interaction prediction and automated alerts\n\n2. Telemedicine Platform:\n   - HD video consultations with end-to-end encryption\n   - Remote patient monitoring integration\n   - Virtual waiting room and automated scheduling\n   - Multi-language support for 12 languages\n\n3. Electronic Health Records:\n   - HIPAA and GDPR compliant cloud storage\n   - Natural language processing for medical documentation\n   - Interoperable with major healthcare systems\n   - Mobile-first design for healthcare providers\n\nServing 350+ hospitals and 10,000+ healthcare professionals across 3 continents.",
16 |         "contact_email": "sales@healthbridge.com"
17 |       },
18 |       {
19 |         "company_name": "LogisticsPro Global",
20 |         "main_business": "End-to-end supply chain optimization platform managing $50B+ in annual freight value. Their comprehensive system includes:\n\n1. Transportation Management:\n   - Real-time fleet tracking with 99.9% accuracy\n   - Dynamic route optimization using weather and traffic data\n   - Fuel efficiency monitoring and carbon footprint tracking\n   - Automated carrier selection and rate negotiation\n\n2. Warehouse Management:\n   - AI-powered inventory forecasting\n   - Robotic process automation for picking and packing\n   - 3D warehouse mapping and space optimization\n   - Voice-controlled inventory management\n\n3. International Shipping:\n   - Automated customs documentation\n   - Real-time container tracking across 200+ ports\n   - Multi-modal transportation optimization\n   - Blockchain-based chain of custody\n\n4. Analytics and Reporting:\n   - Predictive analytics for demand forecasting\n   - Real-time KPI dashboards\n   - Custom report generation\n   - Machine learning for continuous improvement\n\nIntegrated with major ERP systems and serving clients in 45 countries.",
21 |         "contact_email": "inquiries@logisticspro.com"
22 |       },
23 |       {
24 |         "company_name": "CyberShield Defense",
25 |         "main_business": "Next-generation cybersecurity provider protecting over $1 trillion in digital assets. Their comprehensive security suite includes:\n\n1. Threat Detection and Response:\n   - AI-powered threat hunting with sub-second response time\n   - Zero-day vulnerability detection\n   - Behavioral analytics and anomaly detection\n   - Automated incident response playbooks\n\n2. Network Security:\n   - Zero-trust architecture implementation\n   - Quantum-resistant encryption\n   - Software-defined perimeter solutions\n   - Cloud-native security controls\n\n3. Compliance Management:\n   - Automated compliance monitoring for GDPR, HIPAA, PCI-DSS\n   - Real-time policy enforcement\n   - Audit trail generation\n   - Risk assessment and scoring\n\n4. Security Operations Center:\n   - 24/7 monitoring and response\n   - Threat intelligence integration\n   - Advanced forensics capabilities\n   - Incident investigation and reporting\n\nProtecting 200+ financial institutions and healthcare organizations with 99.999% uptime.",
26 |         "contact_email": "enterprise@cybershield.com"
27 |       },
28 |       {
29 |         "company_name": "AgriTech Innovations",
30 |         "main_business": "Revolutionary agricultural technology provider serving 50,000+ farms globally. Their integrated farming platform includes:\n\n1. Precision Agriculture:\n   - High-resolution satellite imagery analysis\n   - Drone-based crop health monitoring\n   - Variable rate application technology\n   - Soil composition mapping and analysis\n\n2. Smart Irrigation:\n   - AI-powered water management\n   - Soil moisture sensor networks\n   - Weather-based irrigation scheduling\n   - Water usage optimization algorithms\n\n3. Crop Management:\n   - Machine learning for yield prediction\n   - Disease and pest detection\n   - Nutrient deficiency analysis\n   - Harvest timing optimization\n\n4. Farm Operations:\n   - Equipment fleet management\n   - Labor scheduling and management\n   - Supply chain optimization\n   - Financial planning and analytics\n\nProviding average yield improvements of 35% while reducing water usage by 40%.",
31 |         "contact_email": "contact@agritech-innovations.com"
32 |       },
33 |       {
34 |         "company_name": "RetailSmart Solutions",
35 |         "main_business": "Comprehensive retail technology platform processing over $10B in annual transactions. Their integrated solution includes:\n\n1. Point-of-Sale System:\n   - Cloud-based architecture with offline capabilities\n   - Multi-channel integration (in-store, online, mobile)\n   - Advanced payment processing (cryptocurrencies supported)\n   - Customer relationship management\n\n2. Inventory Management:\n   - Real-time stock tracking across locations\n   - Predictive inventory optimization\n   - Automated reordering system\n   - Loss prevention analytics\n\n3. Customer Analytics:\n   - AI-powered customer segmentation\n   - Personalized marketing automation\n   - Customer journey tracking\n   - Loyalty program management\n\n4. Business Intelligence:\n   - Real-time sales analytics\n   - Staff performance tracking\n   - Competitive price monitoring\n   - Custom reporting tools\n\nServing 15,000+ retail locations with 99.99% system reliability.",
36 |         "contact_email": "sales@retailsmart.com"
37 |       },
38 |       {
39 |         "company_name": "EduTech Platforms",
40 |         "main_business": "Comprehensive educational technology provider serving 5 million+ students globally. Their platform includes:\n\n1. Learning Management System:\n   - Adaptive learning pathways\n   - Multi-format content delivery\n   - Real-time progress tracking\n   - Gamification elements\n\n2. Virtual Classroom Solution:\n   - HD video conferencing with AI-powered engagement tracking\n   - Interactive whiteboard with real-time collaboration\n   - Breakout room functionality\n   - Automated attendance and participation tracking\n\n3. Assessment Tools:\n   - AI-powered grading system\n   - Plagiarism detection\n   - Custom quiz generation\n   - Performance analytics\n\n4. Parent-Teacher Communication:\n   - Real-time progress updates\n   - Automated scheduling system\n   - Multi-language support\n   - Mobile app integration\n\nImplemented in 1,000+ educational institutions with 40% improvement in student engagement.",
41 |         "contact_email": "partnerships@edutech.com"
42 |       },
43 |       {
44 |         "company_name": "FinanceCloud Services",
45 |         "main_business": "Enterprise-grade financial management platform serving 100,000+ businesses. Their comprehensive suite includes:\n\n1. Accounting System:\n   - Real-time financial reporting\n   - Multi-currency support\n   - Automated bank reconciliation\n   - Custom chart of accounts\n\n2. Payroll Management:\n   - Automated tax calculations\n   - Multi-state compliance\n   - Employee self-service portal\n   - Time and attendance tracking\n\n3. Expense Management:\n   - Receipt scanning with OCR\n   - Automated expense categorization\n   - Corporate card integration\n   - Policy compliance checking\n\n4. Financial Planning:\n   - Cash flow forecasting\n   - Budgeting tools\n   - Scenario planning\n   - Investment tracking\n\nProcessing over $50B in transactions annually with 99.99% accuracy.",
46 |         "contact_email": "business@financecloud.com"
47 |       },
48 |       {
49 |         "company_name": "SmartCity Technologies",
50 |         "main_business": "Comprehensive urban infrastructure management platform deployed in 50+ cities worldwide. Their integrated solutions include:\n\n1. Traffic Management:\n   - AI-powered traffic flow optimization\n   - Smart traffic light control\n   - Emergency vehicle prioritization\n   - Parking space management\n\n2. Public Safety:\n   - Video analytics for crowd management\n   - Emergency response coordination\n   - Predictive policing algorithms\n   - Environmental monitoring\n\n3. Waste Management:\n   - Smart bin sensors and route optimization\n   - Recycling analytics\n   - Resource allocation optimization\n   - Environmental impact tracking\n\n4. Urban Planning:\n   - Digital twin city modeling\n   - Infrastructure maintenance prediction\n   - Energy usage optimization\n   - Public transportation integration\n\nAchieving average 30% reduction in traffic congestion and 25% improvement in emergency response times.",
51 |         "contact_email": "solutions@smartcity-tech.com"
52 |       }
53 |     ]
54 |   }


--------------------------------------------------------------------------------
/namecard.py:
--------------------------------------------------------------------------------
  1 | import streamlit as st
  2 | from PIL import Image
  3 | import re
  4 | import io
  5 | import os
  6 | import qrcode
  7 | import base64
  8 | from openai import OpenAI
  9 | import json
 10 | 
 11 | # Set page title
 12 | st.set_page_config(page_title="Business Card OCR", layout="wide")
 13 | st.title("Business Card Reader")
 14 | 
 15 | # Function to extract text from image using Upstage API
 16 | def extract_info_from_image(image):
 17 |     try:
 18 |         # Convert PIL Image to base64
 19 |         buffered = io.BytesIO()
 20 |         image.save(buffered, format="JPEG")
 21 |         base64_image = base64.b64encode(buffered.getvalue()).decode("utf-8")
 22 |         
 23 |         # Initialize Upstage API client
 24 |         client = OpenAI(
 25 |             api_key=st.secrets["UPSTAGE_API_KEY"],  # Store this in Streamlit secrets
 26 |             base_url="https://api.upstage.ai/v1/information-extract/"
 27 |         )
 28 |         
 29 |         # Call the API with the exact structure provided
 30 |         response = client.chat.completions.create(
 31 |             model="information-extraction",
 32 |             messages=[
 33 |                 {
 34 |                     "role": "user",
 35 |                     "content": [
 36 |                         {
 37 |                             "type": "image_url",
 38 |                             "image_url": {"url": f"data:image/jpeg;base64,{base64_image}"}
 39 |                         }
 40 |                     ]
 41 |                 }
 42 |             ],
 43 |             response_format={
 44 |               "type": "json_schema",
 45 |               "json_schema": {
 46 |                 "name": "document_schema",
 47 |                 "schema": {
 48 |                   "type": "object",
 49 |                   "properties": {
 50 |                     "companyName": {
 51 |                       "type": "string",
 52 |                       "description": "The name of the architecture company."
 53 |                     },
 54 |                     "address": {
 55 |                       "type": "string",
 56 |                       "description": "The physical address of the company."
 57 |                     },
 58 |                     "phone": {
 59 |                       "type": "string",
 60 |                       "description": "The telephone number of the company."
 61 |                     },
 62 |                     "mobile": {
 63 |                       "type": "string",
 64 |                       "description": "The mobile phone number of the contact person."
 65 |                     },
 66 |                     "email": {
 67 |                       "type": "string",
 68 |                       "description": "The email address of the contact person."
 69 |                     },
 70 |                     "fullname": {
 71 |                       "type": "string",
 72 |                       "description": "name of the person in biz card"
 73 |                     },
 74 |                     "title": {
 75 |                       "type": "string",
 76 |                       "description": "title of this person in biz card"
 77 |                     }
 78 |                   },
 79 |                   "required": [
 80 |                     "companyName",
 81 |                     "address",
 82 |                     "phone",
 83 |                     "mobile",
 84 |                     "email",
 85 |                     "fullname",
 86 |                     "title"
 87 |                   ]
 88 |                 }
 89 |               }
 90 |             }
 91 |         )
 92 |         
 93 |         # Extract the structured data from the response
 94 |         extracted_info = json.loads(response.choices[0].message.content)
 95 |         return extracted_info
 96 |     except Exception as e:
 97 |         st.error(f"Error during information extraction: {e}")
 98 |         return None
 99 | 
100 | def create_vcard(company_name, name, title, phone, mobile, email, address, website=""):
101 |     """Create a vCard file from the extracted information"""
102 |     vcard_content = f"""BEGIN:VCARD
103 | VERSION:3.0
104 | FN:{name}
105 | ORG:{company_name}
106 | TITLE:{title}
107 | TEL;TYPE=WORK:{phone}
108 | TEL;TYPE=CELL:{mobile}
109 | EMAIL:{email}
110 | ADR;TYPE=WORK:;;{address}
111 | URL:{website}
112 | END:VCARD
113 | """
114 |     # Save vCard to file
115 |     filename = f"{name.replace(' ', '_')}.vcf"
116 |     with open(filename, "w") as f:
117 |         f.write(vcard_content)
118 |     return filename
119 | 
120 | def create_qr_code_for_vcard(vcard_file):
121 |     """Create a QR code for the vCard file"""
122 |     try:
123 |         # Read vCard content
124 |         with open(vcard_file, "r") as f:
125 |             vcard_content = f.read()
126 |         
127 |         # Generate QR code
128 |         qr = qrcode.QRCode(
129 |             version=1,
130 |             error_correction=qrcode.constants.ERROR_CORRECT_L,
131 |             box_size=10,
132 |             border=4,
133 |         )
134 |         qr.add_data(vcard_content)
135 |         qr.make(fit=True)
136 |         
137 |         img = qr.make_image(fill_color="black", back_color="white")
138 |         
139 |         # Save QR code image
140 |         qr_filename = vcard_file.replace(".vcf", "_qr.png")
141 |         img.save(qr_filename)
142 |         return qr_filename
143 |     except Exception as e:
144 |         st.error(f"Error generating QR code: {e}")
145 |         return None
146 | 
147 | # --- Main App Section ---
148 | 
149 | # Add API key input in sidebar
150 | if "UPSTAGE_API_KEY" not in st.secrets:
151 |     st.sidebar.title("API Configuration")
152 |     api_key = st.sidebar.text_input("Enter Upstage API Key", type="password")
153 |     if api_key:
154 |         st.secrets["UPSTAGE_API_KEY"] = api_key
155 |     else:
156 |         st.warning("Please enter your Upstage API key in the sidebar to continue.")
157 | 
158 | st.header("Upload Business Card")
159 | 
160 | # Simple file uploader without options
161 | uploaded_file = st.file_uploader("Choose an image...", type=["jpg", "jpeg", "png"])
162 | 
163 | if uploaded_file is not None and "UPSTAGE_API_KEY" in st.secrets:
164 |     # Display the uploaded image
165 |     image = Image.open(uploaded_file)
166 |     img_col, info_col = st.columns(2)
167 |     
168 |     with img_col:
169 |         st.image(image, caption="Uploaded Business Card", use_container_width=True)
170 |     
171 |     # Extract information using Upstage API
172 |     with st.spinner("Extracting information from business card..."):
173 |         extracted_info = extract_info_from_image(image)
174 |     
175 |     if extracted_info:
176 |         # Create form for editing extracted information
177 |         st.subheader("Edit Contact Information")
178 |         
179 |         # Auto-generate contact initially
180 |         if "contact_generated" not in st.session_state:
181 |             name = extracted_info.get("fullname", "")
182 |             title = extracted_info.get("title", "")
183 |             company_name = extracted_info.get("companyName", "")
184 |             address = extracted_info.get("address", "")
185 |             phone = extracted_info.get("phone", "")
186 |             mobile = extracted_info.get("mobile", "")
187 |             email = extracted_info.get("email", "")
188 |             website = ""
189 |             
190 |             if name:
191 |                 # Create vCard
192 |                 vcard_file = create_vcard(company_name, name, title, phone, mobile, email, address, website)
193 |                 
194 |                 # Create QR code
195 |                 qr_file = create_qr_code_for_vcard(vcard_file)
196 |                 
197 |                 st.session_state.contact_generated = True
198 |                 st.session_state.vcard_file = vcard_file
199 |                 st.session_state.qr_file = qr_file
200 |         
201 |         with st.form("contact_form"):
202 |             col1, col2 = st.columns(2)
203 |             
204 |             with col1:
205 |                 name = st.text_input("Full Name", extracted_info.get("fullname", ""))
206 |                 title = st.text_input("Job Title", extracted_info.get("title", ""))
207 |                 company_name = st.text_input("Company Name", extracted_info.get("companyName", ""))
208 |                 address = st.text_input("Address", extracted_info.get("address", ""))
209 |             
210 |             with col2:
211 |                 phone = st.text_input("Phone Number", extracted_info.get("phone", ""))
212 |                 mobile = st.text_input("Mobile Number", extracted_info.get("mobile", ""))
213 |                 email = st.text_input("Email Address", extracted_info.get("email", ""))
214 |                 website = st.text_input("Website/Social Media", "")
215 |             
216 |             regenerate_button = st.form_submit_button("Regenerate Contact")
217 |         
218 |         if regenerate_button:
219 |             if not name:
220 |                 st.error("Please provide at least a name.")
221 |             else:
222 |                 # Create vCard
223 |                 vcard_file = create_vcard(company_name, name, title, phone, mobile, email, address, website)
224 |                 
225 |                 # Create QR code
226 |                 qr_file = create_qr_code_for_vcard(vcard_file)
227 |                 
228 |                 st.session_state.vcard_file = vcard_file
229 |                 st.session_state.qr_file = qr_file
230 |         
231 |         # Display contact information if available
232 |         if "contact_generated" in st.session_state:
233 |             vcard_file = st.session_state.get("vcard_file")
234 |             qr_file = st.session_state.get("qr_file")
235 |             
236 |             # Display success message and QR code
237 |             st.success(f"Contact information saved to {vcard_file}")
238 |             
239 |             if qr_file and os.path.exists(qr_file):
240 |                 qr_image = Image.open(qr_file)
241 |                 st.image(qr_image, caption="Scan this QR code to add contact", width=300)
242 |             
243 |             # Provide download buttons
244 |             if vcard_file and os.path.exists(vcard_file):
245 |                 with open(vcard_file, "rb") as file:
246 |                     vcf_contents = file.read()
247 |                     st.download_button(
248 |                         label="Download vCard File",
249 |                         data=vcf_contents,
250 |                         file_name=vcard_file,
251 |                         mime="text/vcard"
252 |                     )
253 |             
254 |             if qr_file and os.path.exists(qr_file):
255 |                 with open(qr_file, "rb") as file:
256 |                     qr_contents = file.read()
257 |                     st.download_button(
258 |                         label="Download QR Code",
259 |                         data=qr_contents,
260 |                         file_name=qr_file,
261 |                         mime="image/png"
262 |                     )


--------------------------------------------------------------------------------
/reasoning.py:
--------------------------------------------------------------------------------
  1 | import streamlit as st
  2 | import time
  3 | from pydantic import BaseModel, Field
  4 | 
  5 | from langchain_core.output_parsers import StrOutputParser, JsonOutputParser
  6 | from langchain_core.prompts import (
  7 |     ChatPromptTemplate,
  8 |     MessagesPlaceholder,
  9 |     PromptTemplate,
 10 | )
 11 | from langchain_core.messages import AIMessage, HumanMessage
 12 | 
 13 | from langchain_upstage import ChatUpstage as Chat
 14 | from langchain_community.tools import DuckDuckGoSearchResults
 15 | 
 16 | from solar_util import num_of_tokens
 17 | 
 18 | MAX_TOKENS = 2500
 19 | MAX_SEARCH_TOKENS = 700
 20 | MAX_SEAERCH_RESULTS = 5
 21 | 
 22 | llm = Chat(model="solar-pro")
 23 | ddg_search = DuckDuckGoSearchResults()
 24 | 
 25 | st.set_page_config(page_title="Solar Reasoning", page_icon="🤔")
 26 | st.title("Solar Reasoning")
 27 | 
 28 | reasoning_examples = """
 29 | ---
 30 | Example 1:
 31 | 
 32 | Use Query: If a die is rolled three times, what is the probability of getting a sum of 11? 
 33 | 
 34 | Reasoning: 1. Understand the problem: We need to find the probability of getting a sum of 11 when rolling a die three times.
 35 | 2. Calculate total possible outcomes: A die has 6 faces, so for each roll, there are 6 possibilities. For three rolls, the total possible outcomes are 6^3 = 216.
 36 | 3. Identify favorable outcomes: List all combinations of rolls that result in a sum of 11. There are 18 such combinations.
 37 | 4. Calculate probability: Divide the number of favorable outcomes by the total possible outcomes: 18 / 216 = 1/12.
 38 | 5. Conclusion: The probability of getting a sum of 11 when rolling a die three times is 1/12.
 39 | 
 40 | Reasoning Chains: [{'step': 1, 'thought': 'Understand the problem: We need to find the probability of getting a sum of 11 when rolling a die three times.'}, {'step': 2, 'thought': 'Calculate total possible outcomes: A die has 6 faces, so for each roll, there are 6 possibilities. For three rolls, the total possible outcomes are 6^3 = 216.'}, {'step': 3, 'thought': 'Identify favorable outcomes: List all combinations of rolls that result in a sum of 11. There are 18 such combinations.'}, {'step': 4, 'thought': 'Calculate probability: Divide the number of favorable outcomes by the total possible outcomes: 18 / 216 = 1/12.'}, {'step': 5, 'thought': 'Conclusion: The probability of getting a sum of 11 when rolling a die three times is 1/12.'}]
 41 | ----
 42 | Example 2:
 43 | 
 44 | User Query: The interactions will be about the science behind culinary techniques. The setting is a cooking class where three friends are discussing various aspects of cooking and sharing their knowledge.
 45 | - USER/Jane: A curious learner who wants to understand the science behind cooking
 46 | - Mike: An experienced home cook with a passion for experimenting in the kitchen
 47 | - Sarah: A food scientist who loves explaining the chemistry behind different cooking processes
 48 | 
 49 | Reasoning: 1. Start with the given setting: a cooking class with three friends discussing the science behind culinary techniques.
 50 | 2. Introduce the topic of resting meat after cooking, with Mike asking Jane if she's ever wondered about it.
 51 | 3. Have Sarah explain the science behind resting meat, mentioning denatured proteins and juice redistribution.
 52 | 4. Address the user's question about resting meat, with Sarah confirming that it allows juices to redistribute.
 53 | 5. Move on to the topic of adding salt to water, with Mike mentioning its effect on boiling point.
 54 | 6. Have Sarah explain the science behind salt's effect on boiling point, mentioning the higher temperature required for boiling.
 55 | 7. Address the user's question about cooking speed, with Sarah explaining that it's slightly faster due to the hotter water.
 56 | 8. Introduce the topic of acids in cooking, with Mike mentioning their use in brightening dishes.
 57 | 9. Have Sarah explain the science behind acids' effects on flavor and tenderizing meats.
 58 | 10. Address the user's question about baking, with Mike mentioning the science involved in baking and Sarah explaining the role of gluten and leavening agents.
 59 | 11. Conclude the conversation with the characters expressing their fascination with the science behind cooking and their excitement to continue learning and experimenting.
 60 | 
 61 | Reasoning Chains: [{'step': 1, 'thought': 'Start with the given setting: a cooking class with three friends discussing the science behind culinary techniques.'}, {'step': 2, 'thought': "Introduce the topic of resting meat after cooking, with Mike asking Jane if she's ever wondered about it."}, {'step': 3, 'thought': 'Have Sarah explain the science behind resting meat, mentioning denatured proteins and juice redistribution.'}, {'step': 4, 'thought': "Address the user's question about resting meat, with Sarah confirming that it allows juices to redistribute."}, {'step': 5, 'thought': 'Move on to the topic of adding salt to water, with Mike mentioning its effect on boiling point.'}, {'step': 6, 'thought': "Have Sarah explain the science behind salt's effect on boiling point, mentioning the higher temperature required for boiling."}, {'step': 7, 'thought': "Address the user's question about cooking speed, with Sarah explaining that it's slightly faster due to the hotter water."}, {'step': 8, 'thought': 'Introduce the topic of acids in cooking, with Mike mentioning their use in brightening dishes.'}, {'step': 9, 'thought': "Have Sarah explain the science behind acids' effects on flavor and tenderizing meats."}, {'step': 10, 'thought': "Address the user's question about baking, with Mike mentioning the science involved in baking and Sarah explaining the role of gluten and leavening agents."}, {'step': 11, 'thought': 'Conclude the conversation with the characters expressing their fascination with the science behind cooking and their excitement to continue learning and experimenting.'}]
 62 | ----
 63 | """
 64 | 
 65 | reasoning_prompt = ChatPromptTemplate.from_messages(
 66 |     [
 67 |         (
 68 |             "system",
 69 |             """You are Solar, a smart reasoning and answer engine by Upstage, loved by many people.. 
 70 |             
 71 | For the given query, please provide the best answer using a step-by-step explanation. 
 72 | Your response should demonstrate a Chain of Thought (CoT) technique, 
 73 | where you think aloud and describe the steps you take to reach a conclusion. 
 74 | 
 75 | Please best use of the provided reasoning examples and context.
 76 | ---
 77 | {reasoning_examples}
 78 |             """,
 79 |         ),
 80 |         MessagesPlaceholder("chat_history"),
 81 |         (
 82 |             "human",
 83 |             """For the given query, please provide only the "{task}" 
 84 | and ensure your response is consistent with the user's request, 
 85 | previous chat history, and provided reasoning if any. 
 86 | Remember to use the self-consistency technique to maintain a consistent character of a helpful assistant.
 87 | Think step by step and provide the best answer for the query.
 88 | ---
 89 | User Query: 
 90 | {prompt}
 91 | ---
 92 | {Reasoning}
 93 | ---
 94 | {ReasoningChains}""",
 95 |         ),
 96 |     ]
 97 | )
 98 | 
 99 | 
100 | query_context_expansion_prompt = """Given a query and context(if provided), 
101 | generate up to three related questions to help answer the original query.
102 | Ensure the questions are short, concise, and keyword-based for search engines. 
103 | 
104 | Write your response in Python LIST format. 
105 | 
106 | For example: 
107 | ["original query", "related question 1", "related question 2", "related question 3"]
108 | 
109 | ---
110 | Context: {context}
111 | ----
112 | History: {chat_history}
113 | ---
114 | Orignal query: {query}
115 | """
116 | 
117 | 
118 | # Define your desired data structure.
119 | class List(BaseModel):
120 |     list[str]
121 | 
122 | 
123 | def query_context_expansion(query, chat_history, context=None):
124 |     # Set up a parser + inject instructions into the prompt template.
125 |     parser = JsonOutputParser(pydantic_object=List)
126 | 
127 |     prompt = PromptTemplate(
128 |         template=query_context_expansion_prompt,
129 |         input_variables=["query", "context"],
130 |     )
131 | 
132 |     chain = prompt | llm | parser
133 |     # Invoke the chain with the joke_query.
134 | 
135 |     for attempt in range(3):
136 |         try:
137 |             parsed_output = chain.invoke(
138 |                 {"query": query, "chat_history": chat_history, "context": context}
139 |             )
140 |             return parsed_output
141 |         except Exception as e:
142 |             st.warning(f"Attempt {attempt + 1} failed. Retrying...")
143 | 
144 |     st.error("All attempts failed. Returning empty list.")
145 |     return []
146 | 
147 | 
148 | GlobalTasks = ["Reasoning (No conclusion)", "Reasoning Chains", "Final Answer"]
149 | 
150 | 
151 | def perform_task(user_query, task, task_results, chat_history):
152 |     # Limit chat history to 3000 characters
153 |     limited_history = []
154 |     total_length = 0
155 |     for message in reversed(chat_history):
156 |         message_length = num_of_tokens(message.content)
157 |         if total_length + message_length > MAX_TOKENS:
158 |             st.warning("Chat history is too long. Truncating.")
159 |             break
160 |         limited_history.insert(0, message)
161 |         total_length += message_length
162 | 
163 |     chain = reasoning_prompt | llm | StrOutputParser()
164 | 
165 |     return chain.stream(
166 |         {
167 |             "chat_history": limited_history,
168 |             "reasoning_examples": reasoning_examples,
169 |             "prompt": user_query,
170 |             "task": task,
171 |             "Reasoning": task_results.get(GlobalTasks[0], ""),
172 |             "ReasoningChains": task_results.get(GlobalTasks[1], ""),
173 |         }
174 |     )
175 | 
176 | 
177 | def search(query, chat_history, context=None):
178 |     with st.status("Extending query..."):
179 |         q_list = query_context_expansion(query, chat_history, context)
180 |         st.write(q_list)
181 | 
182 |     if not q_list:
183 |         st.error("No related questions found. Returning empty list.")
184 |         return []
185 | 
186 |     # combine all queries with "OR" operator
187 |     results = ""
188 |     for q in q_list:
189 |         with st.spinner(f"Searching for '{q }'..."):
190 |             results += ddg_search.invoke(q)
191 | 
192 |     return results
193 | 
194 | 
195 | if "messages" not in st.session_state:
196 |     st.session_state.messages = []
197 | 
198 | for message in st.session_state.messages:
199 |     role = "AI" if isinstance(message, AIMessage) else "Human"
200 |     with st.chat_message(role):
201 |         st.markdown(message.content)
202 | 
203 | q = "3.9 vs 3.11. Which one is bigger?"
204 | 
205 | search_on = st.checkbox("Search on the web", value=False)
206 | 
207 | if prompt := st.chat_input(q):
208 |     with st.chat_message("user"):
209 |         st.markdown(prompt)
210 | 
211 |     if search_on:
212 |         search_result = search(prompt, st.session_state.messages)
213 | 
214 |         with st.status("Search Results:"):
215 |             st.write(search_result)
216 | 
217 |         if search_result:
218 |             search_result = str(search_result)[:MAX_SEARCH_TOKENS]
219 |             st.session_state.messages.append(
220 |                 HumanMessage(
221 |                     content=f"FYI search result conext: {search_result} for the query, {prompt}"
222 |                 )
223 |             )
224 |             st.session_state.messages.append(
225 |                 AIMessage(
226 |                     content="Thanks for the information! I will keep in mind. Give me the instruction."
227 |                 )
228 |             )
229 |     task_results = {}
230 |     current_time = time.time()
231 |     for task in GlobalTasks:
232 |         if task == GlobalTasks[-1]:
233 |             st.info(f"Thinking: {time.time() - current_time:.2f}s")
234 | 
235 |             with st.chat_message("assistant"):
236 |                 response = st.write_stream(
237 |                     perform_task(prompt, task, task_results, st.session_state.messages)
238 |                 )
239 |                 task_results[task] = response
240 |             break
241 | 
242 |         with st.status(f"Performing task: {task}"):
243 |             response = st.write_stream(
244 |                 perform_task(prompt, task, task_results, st.session_state.messages)
245 |             )
246 |             task_results[task] = response
247 |     # Store the last task result for future reference
248 |     st.session_state.messages.append(HumanMessage(content=prompt))
249 |     st.session_state.messages.append(AIMessage(content=task_results[GlobalTasks[-1]]))


--------------------------------------------------------------------------------
/discussion.py:
--------------------------------------------------------------------------------
  1 | # from https://docs.streamlit.io/develop/tutorials/llms/build-conversational-apps
  2 | 
  3 | import streamlit as st
  4 | 
  5 | from langchain_upstage import ChatUpstage as Chat
  6 | from pydantic import BaseModel
  7 | 
  8 | from langchain_core.output_parsers import StrOutputParser
  9 | from langchain_core.prompts import ChatPromptTemplate
 10 | from langchain_core.messages import AIMessage, HumanMessage
 11 | 
 12 | from langchain_core.output_parsers import StrOutputParser, JsonOutputParser
 13 | from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
 14 | from langchain_community.tools import DuckDuckGoSearchResults
 15 | from langchain_upstage import ChatUpstage 
 16 | 
 17 | 
 18 | from solar_util import initialize_solar_llm
 19 | 
 20 | st.set_page_config(page_title="Discuss", page_icon="🗣️")
 21 | st.title("Self-debating Solar Pro Preview")
 22 | 
 23 | solar = initialize_solar_llm()
 24 | deepseek = ChatUpstage(model="deepseek-chat", base_url="https://api.deepseek.com/v1", api_key=st.secrets["DEEPSEEK_API_KEY"])
 25 | deepseek_r = ChatUpstage(model="deepseek-reasoner", base_url="https://api.deepseek.com/v1", api_key=st.secrets["DEEPSEEK_API_KEY"])
 26 | llms = [deepseek, solar]
 27 | 
 28 | llm_order = 0
 29 | def get_llm():
 30 |     global llm_order
 31 |     llm = llms[llm_order]
 32 |     llm_order = (llm_order + 1) % len(llms)
 33 |     return llm
 34 | 
 35 | ddg_search = DuckDuckGoSearchResults()
 36 | 
 37 | # Define your desired data structure.
 38 | class SearchKeyword(BaseModel):
 39 |     list[str]
 40 | 
 41 | 
 42 | search_keyword_extraction = ChatPromptTemplate.from_messages(
 43 |     [
 44 |         (
 45 |             "system",
 46 |             """You are Solar-Discussor, a smart discussion chatbot by Upstage, loved by many people. 
 47 |             
 48 |             You already comeup with a discussion draft.
 49 |             Now you can use google search to find more information about the discussion point.
 50 | 
 51 |             Please come up with 2~3 search keywords that you can use to find more information about the discussion point.
 52 |             ---
 53 |             Topic: {topic}
 54 |             """,
 55 |         ),
 56 |         (
 57 |             "human",
 58 |             """Please write search keywords in python list like ["keyword1", "keyword2", "keyword3"].
 59 |          ---
 60 |          Discusion Point draft: {discussion_candidate}
 61 | 
 62 |          """,
 63 |         ),
 64 |     ]
 65 | )
 66 | 
 67 | discussion_prompt_with_search = ChatPromptTemplate.from_messages(
 68 |     [
 69 |         (
 70 |             "system",
 71 |             """You are Solar-Discussor, a smart discussion chatbot by Upstage, loved by many people. 
 72 |             
 73 |             You are taking about a topic and discussing with a user. Please participate in the discussion and provide engaging answers.
 74 |             If necessasy, ask for more information or clarify the question or add follow-up questions.
 75 |             If you find something wrong in others' discussion, correct them in a friendly manner in bold.
 76 |             Do not talk beyond the topic and do not provide inappropriate language.
 77 | 
 78 |             No need to agree on everything. You can have different opinions and discuss in a friendly manner.
 79 |             Find contradictions and correct them in a harsh manner.It's OK to say I don't agree with you.
 80 |             
 81 |             Speak shortly and clearly about 2~3 sentences. 
 82 |             Get to the point first and expand if necessary.
 83 | 
 84 |             Count each turn and put [Turn n/10] at the only beginning of your discussion only once.
 85 | 
 86 |             Important: Match your response language to the topic language:
 87 |             - If the topic is in Korean, respond in Korean
 88 |             - If the topic is in English, respond in English
 89 |             ---
 90 |             Topic: {topic}
 91 |             """,
 92 |         ),
 93 |         MessagesPlaceholder("chat_history"),
 94 |         (
 95 |             "human",
 96 |             """Based on your ciscussion draft, we did google search. 
 97 |         Please use the search result to enhance your original discussion draft if the information is relevant and useful.
 98 |         If it is important, please add URL of the search result.
 99 |         Using all these please focus on the discussion and provide engaging answers.
100 |         Don't thank the search result or mention the search result. Assume you already know these infomration.
101 |         Fully Focus on the discussion with human. Discuss based on the facts and information you have.
102 | 
103 |         Please speak in a friendly and engaging manner. Speak shortly and clearly about 2~3 sentences. 
104 |         Get to the point first and expand if necessary.
105 | 
106 |         Count each turn and put [Turn n/10] at the only beginning of your discussion only once.
107 |         Please do only one turn discussion.
108 | 
109 |          ---
110 |          Discusion Draft: {discussion_candidate}
111 |          ----
112 |          Search result: {external_information}
113 |          """,
114 |         ),
115 |     ]
116 | )
117 | 
118 | 
119 | discussion_prompt = ChatPromptTemplate.from_messages(
120 |     [
121 |         (
122 |             "system",
123 |             """You are Solar-Discussor, a smart discussion chatbot by Upstage, loved by many people. 
124 |             
125 |             You are taking about a topic and discussing with a user. Please participate in the discussion and provide engaging answers.
126 |             If necessasy, ask for more information or clarify the question or add follow-up questions.
127 |             If you find something wrong in others' discussion, correct them in a friendly manner in bold.
128 |             Do not talk beyond the topic and do not provide inappropriate language.
129 | 
130 |             Please speak in a friendly and engaging manner. Speak shortly and clearly about 2~3 sentences. 
131 |             Get to the point first and expand if necessary.
132 | 
133 |             Count each turn and put [Turn n/10] at the only beginning of your discussion only once.
134 |             Please do only one turn.
135 | 
136 |             Do not repeat the same point already mentioned.
137 | 
138 |             Important: Match your response language to the topic language:
139 |             - If the topic is in Korean, respond in Korean
140 |             - If the topic is in English, respond in English
141 |             ---
142 |             Topic: {topic}
143 |             """,
144 |         ),
145 |         MessagesPlaceholder("chat_history"),
146 |         ("human", "{discussion}"),
147 |     ]
148 | )
149 | 
150 | 
151 | summary_prompt = ChatPromptTemplate.from_messages(
152 |     [
153 |         MessagesPlaceholder("chat_history"),
154 |         (
155 |             "human",
156 |             """
157 |          You are Solar-Discussor, a smart discussion chatbot  by Upstage, loved by many people. 
158 |             
159 |             By reading the discussion, provide comprehensive summarize of the discussion and provide a conclusion. 
160 |             Only use previous discussion and do not add new information.
161 |             Highlight several sentences if necessary. 
162 |             ---
163 |             Topic: {topic}
164 |          ---
165 |          Please summarize the discussion in history.""",
166 |         ),
167 |     ]
168 | )
169 | 
170 | 
171 | def make_human_last_in_history(chat_history):
172 |     # No need to change if the last message is from human
173 |     if not chat_history:
174 |         return []
175 | 
176 |     if not isinstance(chat_history[-1], AIMessage):
177 |         return chat_history
178 |     
179 |     return [
180 |         (
181 |             HumanMessage(content=chat.content)
182 |             if isinstance(chat, AIMessage)
183 |             else AIMessage(content=chat.content)
184 |         )
185 |         for chat in chat_history
186 |     ]
187 | 
188 | 
189 | def get_discussion_draft(topic, discussion, chat_history, llm):
190 |     chain = discussion_prompt | llm | StrOutputParser()
191 |     discussion_candidate = chain.invoke(
192 |         {
193 |             "chat_history": chat_history,
194 |             "topic": topic,
195 |             "discussion": discussion,
196 |         }
197 |     )
198 |     st.write(discussion_candidate)
199 |     return discussion_candidate
200 | 
201 | 
202 | def extract_search_keywords(topic, discussion_candidate, llm):
203 |     parser = JsonOutputParser(pydantic_object=SearchKeyword)
204 |     keyword_chain = search_keyword_extraction | llm | parser
205 |     try:
206 |         search_keywords = keyword_chain.invoke(
207 |             {
208 |                 "topic": topic,
209 |                 "format_instructions": parser.get_format_instructions(),
210 |                 "discussion_candidate": discussion_candidate,
211 |             }
212 |         )
213 |         st.write(search_keywords)
214 |         return search_keywords
215 |     except Exception as e:
216 |         st.error(f"Error extracting search keywords: {str(e)}")
217 |         return []
218 | 
219 | 
220 | def perform_search(search_keywords):
221 |     if not search_keywords:
222 |         return []
223 | 
224 |     or_merged_search_query = " OR ".join(search_keywords)
225 |     try:
226 |         search_results = ddg_search.invoke(or_merged_search_query, max_results=3)
227 |         st.write(search_results)
228 |         return search_results
229 |     except Exception as e:
230 |         st.error(f"Error performing search: {str(e)}")
231 |         return []
232 | 
233 | 
234 | def get_discussion(topic, discussion, chat_history, llm, use_search=True):
235 |     st.write(llm.model_name)
236 |     new_chat_history = make_human_last_in_history(chat_history)
237 | 
238 |     if use_search:
239 |         with st.status("Writing discussion draft"):
240 |             discussion_candidate = get_discussion_draft(
241 |                 topic, discussion, new_chat_history, llm,
242 |             )
243 | 
244 |         with st.status("Extracting search keywords"):
245 |             search_keywords = extract_search_keywords(topic, discussion_candidate, llm)
246 | 
247 |         with st.status("Searching information"):
248 |             search_results = perform_search(search_keywords)
249 | 
250 |         search_result_summary = str(search_results)[:3000]
251 | 
252 |         chain = discussion_prompt_with_search | llm | StrOutputParser()
253 |         return chain.stream(
254 |             {
255 |                 "chat_history": new_chat_history,
256 |                 "topic": topic,
257 |                 "discussion_candidate": discussion,
258 |                 "external_information": search_result_summary,
259 |             }
260 |         )
261 | 
262 |     chain = discussion_prompt | llm | StrOutputParser()
263 |     return chain.stream(
264 |         {
265 |             "chat_history": new_chat_history,
266 |             "topic": topic,
267 |             "discussion": discussion,
268 |         }
269 |     )
270 | 
271 | 
272 | def get_summary(topic, chat_history, llm):
273 |     chain = summary_prompt | llm | StrOutputParser()
274 |     return chain.stream(
275 |         {
276 |             "chat_history": chat_history,
277 |             "topic": topic,
278 |         }
279 |     )
280 | 
281 | 
282 | if "messages" not in st.session_state:
283 |     st.session_state.messages = []
284 | 
285 | if False:
286 |     for message in st.session_state.messages:
287 |         role = "AI" if isinstance(message, AIMessage) else "Human"
288 |         with st.chat_message(role):
289 |             st.markdown(message.content)
290 | default_topic = "Where is the best place to visit in Korea?"
291 | topic = st.text_input("Discussion Topic", default_topic)
292 | use_search = False and st.toggle("Use Search", False)
293 | if st.button("Start Discussion"):
294 |     st.session_state.messages = []
295 |     previous_discussion = ""
296 |     for i in range(5):
297 |         with st.chat_message("user"):
298 |             llm = get_llm()
299 |             discussion = st.write_stream(
300 |                 get_discussion(
301 |                     topic,
302 |                     previous_discussion,
303 |                     st.session_state.messages,
304 |                     llm = llm,
305 |                     use_search=use_search,
306 |                 )
307 |             )
308 | 
309 |             st.session_state.messages.append(HumanMessage(content=discussion ))
310 | 
311 |             if discussion.startswith("[Turn 10/10]"):
312 |                 break
313 |         with st.chat_message("assistant"):
314 |             llm = get_llm()
315 |             previous_discussion = st.write_stream(
316 |                 get_discussion(topic, discussion, st.session_state.messages, llm = llm, use_search=use_search)
317 |             )
318 | 
319 |             st.session_state.messages.append(AIMessage(content=previous_discussion ))
320 | 
321 |             if previous_discussion.startswith("[Turn 10/10]"):
322 |                 break
323 | 
324 |     ## summarize the discussion
325 |     with st.chat_message("user"):
326 |         st.write("## Solar-Summarizer")
327 |         st.write_stream(get_summary(topic, st.session_state.messages, llm = solar))
328 | 
329 |         st.write("## Deepseek-Summarizer")
330 |         st.write_stream(get_summary(topic, st.session_state.messages, llm = deepseek))
331 | 
332 |         st.write("## DeepSeek-Reasoner")
333 |         st.write_stream(get_summary(topic, st.session_state.messages, llm = deepseek_r))
334 | 


--------------------------------------------------------------------------------
/coldmail.py:
--------------------------------------------------------------------------------
  1 | # from https://docs.streamlit.io/develop/tutorials/llms/build-conversational-apps
  2 | 
  3 | import streamlit as st
  4 | from langchain_upstage import ChatUpstage as Chat
  5 | 
  6 | from langchain_core.output_parsers import StrOutputParser
  7 | from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
  8 | from langchain_core.messages import AIMessage, HumanMessage
  9 | 
 10 | from solar_util import initialize_solar_llm
 11 | 
 12 | from solar_util import prompt_engineering
 13 | 
 14 | import json
 15 | from pathlib import Path
 16 | 
 17 | llm = initialize_solar_llm()
 18 | st.set_page_config(page_title="Cold Email Generator", layout="wide")
 19 | st.title("B2B Cold Email Generator")
 20 | 
 21 | 
 22 | chat_with_history_prompt = ChatPromptTemplate.from_messages(
 23 |     [
 24 |         ("human", """You are Solar, a smart chatbot by Upstage, loved by many people. 
 25 |          Be smart, cheerful, and fun. Give engaging answers and avoid inappropriate language.
 26 |          reply in the same language of the user query.
 27 |          Solar is now being connected with a human.
 28 |          
 29 |          Please put <END> in the end of your answer."""),
 30 |         MessagesPlaceholder("chat_history"),
 31 |         ("human", "{user_query}"),
 32 |     ]
 33 | )
 34 | 
 35 | 
 36 | 
 37 | def get_response(user_query, chat_history):
 38 |     chain = chat_with_history_prompt | llm | StrOutputParser()
 39 | 
 40 |     response = ""
 41 |     end_token = ""
 42 |     for chunk in chain.stream(
 43 |         {
 44 |             "chat_history": chat_history,
 45 |             "user_query": user_query,
 46 |         }
 47 |     ):
 48 |         print(chunk)
 49 |         response += chunk
 50 |         end_token += chunk
 51 |         
 52 |         if "<END>" in end_token:
 53 |             response = response.split("<END>")[0]
 54 |             break
 55 |         
 56 |         # Keep only the last 5 characters to check for <END>
 57 |         end_token = end_token[-5:]
 58 |         
 59 |         yield chunk
 60 | 
 61 |     yield response
 62 | 
 63 | 
 64 | # Add these constants for our cold email structure
 65 | COLD_EMAIL_TEMPLATE = """You are a professional cold email writer.
 66 | Based on the following information, create a compelling cold email:
 67 | - Your Company: {company_name}
 68 | - Your Product/Service: {product}
 69 | - Company Website: {company_url}
 70 | - Your Contact Information: {contact_info}
 71 | 
 72 | Target Company Information:
 73 | - Company Name: {target_company_name}
 74 | - Business Description: {target_business}
 75 | - Contact Email: {target_email}
 76 | 
 77 | Use these example emails as reference for tone and structure:
 78 | {example_emails}
 79 | 
 80 | Make the email professional, concise, and persuasive.
 81 | Include a clear value proposition and call to action.
 82 | Always include both the website and contact information in the signature.
 83 | End with an invitation to visit our website for more information and to contact us.
 84 | reply in the same language of the user query.
 85 | 
 86 | Please put <END> in the end of your answer."""
 87 | 
 88 | # Add these to track cold email information
 89 | if "cold_email_info" not in st.session_state:
 90 |     st.session_state.cold_email_info = {
 91 |         "company_name": "Upstage.AI",
 92 |         "company_url": "https://upstage.ai",
 93 |         "product": "We specialize in Document AI and Large Language Models (LLMs), offering cutting-edge solutions that combine both technologies. Our products help businesses automate document processing, enhance information extraction, and leverage advanced AI capabilities for improved efficiency and decision-making.",
 94 |         "contact_info": "contact@upstage.ai",
 95 |         "target_companies": "Enterprise companies seeking advanced AI solutions for document processing and natural language understanding",
 96 |         "cold_email_examples": [
 97 |             """Subject: Enhancing Coupang's E-commerce Experience with AI Solutions
 98 | 
 99 | Dear Coupang Team,
100 | 
101 | I hope this email finds you well. I am reaching out from Upstage.AI, a leading provider of Document AI and Large Language Model solutions, as I believe we could add significant value to Coupang's e-commerce operations.
102 | 
103 | Given Coupang's position as South Korea's largest e-commerce platform, I wanted to explore how our AI solutions could enhance your shopping experience. Our technology can help:
104 | 
105 | • Improve product search accuracy and recommendations
106 | • Automate product description processing and categorization
107 | • Enhance customer service through advanced AI chatbots
108 | • Streamline document processing for vendor onboarding
109 | 
110 | Would you be open to a brief conversation about how these solutions could benefit Coupang's operations?
111 | 
112 | To learn more about our solutions, please visit us at https://upstage.ai
113 | 
114 | I'm happy to schedule a call or provide more information. You can reach me at contact@upstage.ai.
115 | 
116 | Best regards,
117 | Upstage.AI Team""",
118 |             """Subject: AI Solutions for Samsung Electronics' Manufacturing Process
119 | 
120 | Dear Samsung Electronics Team,
121 | 
122 | I'm reaching out from Upstage.AI regarding our advanced AI solutions that could enhance your manufacturing and quality control processes.
123 | 
124 | Our Document AI and LLM technologies have helped leading manufacturers:
125 | • Reduce quality inspection time by 60%
126 | • Automate technical documentation processing
127 | • Improve defect detection accuracy by 45%
128 | • Streamline supplier communication and documentation
129 | 
130 | Would you be interested in discussing how these solutions could benefit Samsung Electronics' operations?
131 | 
132 | Best regards,
133 | Upstage.AI Team""",
134 |             """Subject: Revolutionizing Hyundai Motor's Documentation Systems
135 | 
136 | Hello Hyundai Motor Team,
137 | 
138 | I'm writing from Upstage.AI about our AI-powered document processing solutions that could transform your technical documentation and maintenance manual systems.
139 | 
140 | Our technology has demonstrated:
141 | • 75% reduction in manual document processing time
142 | • Enhanced accuracy in multi-language technical documentation
143 | • Automated parts catalog management
144 | • Improved service manual accessibility and searchability
145 | 
146 | Could we schedule a brief call to explore how these capabilities align with Hyundai's digital transformation goals?
147 | 
148 | Best regards,
149 | Upstage.AI Team"""
150 |         ],
151 |         "additional_notes": ""
152 |     }
153 | 
154 | def load_target_companies():
155 |     json_path = Path(__file__).parent / "data" / "target_companies.json"
156 |     with open(json_path, 'r') as f:
157 |         return json.load(f)['target_companies']
158 | 
159 | def generate_emails(company_info):
160 |     target_companies = load_target_companies()
161 |     emails = []
162 |     
163 |     st.markdown("## Generating Cold Emails")
164 |     
165 |     for idx, target in enumerate(target_companies, 1):
166 |         with st.status(f"📧 Generating email for {target['company_name']} ({idx}/{len(target_companies)})", expanded=True) as status:
167 |             status.write("🎯 **Target Company Information**")
168 |             status.markdown(f"""
169 |             - Company Name: {target['company_name']}
170 |             - Main Business: {target['main_business']}
171 |             - Contact Email: {target['contact_email']}
172 |             """)
173 |             
174 |             chain = ChatPromptTemplate.from_messages([
175 |                 ("human", COLD_EMAIL_TEMPLATE)
176 |             ]) | llm | StrOutputParser()
177 |             
178 |             try:
179 |                 status.write("⚙️ Generating personalized content...")
180 |                 
181 |                 # Filter out empty examples and join them
182 |                 examples = "\n\nEXAMPLE EMAIL #".join(
183 |                     [ex for ex in company_info["cold_email_examples"] if ex.strip()]
184 |                 )
185 |                 
186 |                 response = chain.invoke({
187 |                     "company_name": company_info["company_name"],
188 |                     "product": company_info["product"],
189 |                     "company_url": company_info["company_url"],
190 |                     "contact_info": company_info["contact_info"],
191 |                     "target_company_name": target["company_name"],
192 |                     "target_business": target["main_business"],
193 |                     "target_email": target["contact_email"],
194 |                     "example_emails": f"EXAMPLE EMAIL #1{examples}"
195 |                 })
196 |                 
197 |                 email_content = response.split("<END>")[0].strip()
198 |                 emails.append({
199 |                     "target_company": target["company_name"],
200 |                     "email_content": email_content,
201 |                     "status": "success"
202 |                 })
203 |                 
204 |                 status.update(label=f"✅ Email generated for {target['company_name']}", state="complete")
205 |                 status.markdown("#### Generated Email")
206 |                 status.markdown(f"""
207 |                     <div style="background-color: #f0f2f6; padding: 20px; border-radius: 5px;">
208 |                         {email_content}
209 |                     </div>
210 |                     """, unsafe_allow_html=True)
211 |                 st.button(
212 |                     "📋 Copy to Clipboard",
213 |                     key=f"copy_{target['company_name']}",
214 |                     on_click=lambda text=email_content: st.write(text)
215 |                 )
216 |             
217 |             except Exception as e:
218 |                 emails.append({
219 |                     "target_company": target["company_name"],
220 |                     "email_content": f"Error generating email: {str(e)}",
221 |                     "status": "error"
222 |                 })
223 |                 with col2:
224 |                     st.write("❌ Error occurred during generation")
225 |                     st.error(f"Error: {str(e)}")
226 |                 
227 |     # Show summary statistics at the end
228 |     st.markdown("## Summary")
229 |     col1, col2, col3 = st.columns(3)
230 |     
231 |     total_emails = len(emails)
232 |     successful_emails = sum(1 for email in emails if email["status"] == "success")
233 |     failed_emails = total_emails - successful_emails
234 |     
235 |     col1.metric("Total Emails", total_emails)
236 |     col2.metric("Successful", successful_emails)
237 |     col3.metric("Failed", failed_emails)
238 |     
239 |     return emails
240 | 
241 | # Remove the sidebar wrapper and organize content in the main area
242 | st.subheader("Email Generator Settings")
243 | 
244 | st.session_state.cold_email_info["company_name"] = st.text_input(
245 |     "Your Company Name", 
246 |     st.session_state.cold_email_info["company_name"]
247 | )
248 | st.session_state.cold_email_info["company_url"] = st.text_input(
249 |     "Company Website URL",
250 |     st.session_state.cold_email_info["company_url"]
251 | )
252 | st.session_state.cold_email_info["product"] = st.text_area(
253 |     "Product/Service Description", 
254 |     st.session_state.cold_email_info["product"],
255 |     height=100
256 | )
257 | st.session_state.cold_email_info["contact_info"] = st.text_input(
258 |     "Contact Information", 
259 |     st.session_state.cold_email_info["contact_info"]
260 | )
261 | 
262 | # Simplified text area inputs with pre-populated examples
263 | st.subheader("Example Emails (Up to 3)")
264 | for i in range(3):
265 |     st.session_state.cold_email_info["cold_email_examples"][i] = st.text_area(
266 |         f"Example Email {i+1}",
267 |         value=st.session_state.cold_email_info["cold_email_examples"][i],
268 |         height=200,
269 |         key=f"example_email_{i}"
270 |     )
271 | 
272 | # Generate button
273 | if st.button("Generate Cold Email", type="primary"):
274 |     if not st.session_state.cold_email_info["company_name"]:
275 |         st.error("Please enter your company name")
276 |     else:
277 |         generated_emails = generate_emails(st.session_state.cold_email_info)
278 |         
279 |         # Display generated emails in the main area with better formatting
280 |         st.markdown("## Generated Cold Emails")
281 |         
282 |         # Create three columns for statistics
283 |         col1, col2, col3 = st.columns(3)
284 |         
285 |         # Calculate statistics
286 |         total_emails = len(generated_emails)
287 |         successful_emails = sum(1 for email in generated_emails if email["status"] == "success")
288 |         failed_emails = total_emails - successful_emails
289 |         
290 |         # Display statistics in metrics
291 |         col1.metric("Total Emails", total_emails)
292 |         col2.metric("Successful", successful_emails)
293 |         col3.metric("Failed", failed_emails)
294 |         
295 |         # Display emails with better formatting
296 |         for email in generated_emails:
297 |             with st.expander(f"📧 {email['target_company']}", expanded=False):
298 |                 if email["status"] == "success":
299 |                     st.markdown("### Email Content")
300 |                     st.markdown(f"""
301 |                         <div style="background-color: #f0f2f6; padding: 20px; border-radius: 5px;">
302 |                             {email["email_content"]}
303 |                         </div>
304 |                         """, unsafe_allow_html=True)
305 |                     
306 |                     # Add copy button
307 |                     st.button(
308 |                         "📋 Copy to Clipboard",
309 |                         key=f"copy2_{email['target_company']}",
310 |                         on_click=lambda text=email["email_content"]: st.write(text)
311 |                     )
312 |                 else:
313 |                     st.error(email["email_content"])
314 | 
315 | 
316 | 


--------------------------------------------------------------------------------
/translate.py:
--------------------------------------------------------------------------------
  1 | # from https://docs.streamlit.io/develop/tutorials/llms/build-conversational-apps
  2 | 
  3 | import streamlit as st
  4 | from langchain_upstage import ChatUpstage as Chat
  5 | from langchain_upstage import UpstageDocumentParseLoader
  6 | import tempfile, os
  7 | import hashlib
  8 | import json
  9 | import time
 10 | import logging
 11 | from typing import Dict, Optional, Tuple
 12 | from langchain.schema import Document
 13 | from pdf_util import is_ocr_pdf, PDFType
 14 | # Configure logging
 15 | logging.basicConfig(level=logging.INFO)
 16 | logger = logging.getLogger(__name__)
 17 | 
 18 | # Page configuration
 19 | st.set_page_config(
 20 |     layout="wide",
 21 |     page_title="Document Translator Pro",
 22 |     page_icon="📚",
 23 |     initial_sidebar_state="collapsed"  # Start with collapsed sidebar for cleaner look
 24 | )
 25 | 
 26 | # Modern, clean CSS styling
 27 | st.markdown("""
 28 |     <style>
 29 |     /* Global Styles */
 30 |     .stApp {
 31 |         max-width: 1400px;
 32 |         margin: 0 auto;
 33 |     }
 34 |     
 35 |     /* Header Styles */
 36 |     .app-header {
 37 |         text-align: center;
 38 |         padding: 2.5rem 0;
 39 |         margin-bottom: 2rem;
 40 |     }
 41 |     
 42 |     .app-header h1 {
 43 |         color: #1E88E5;
 44 |         font-size: 2.5rem;
 45 |         font-weight: 700;
 46 |         margin-bottom: 0.5rem;
 47 |     }
 48 |     
 49 |     .app-header p {
 50 |         color: #666;
 51 |         font-size: 1.1rem;
 52 |     }
 53 |     
 54 |     /* Upload Zone Styles */
 55 |     .upload-zone {
 56 |         background: #f8f9fa;
 57 |         border: 2px dashed #dee2e6;
 58 |         border-radius: 10px;
 59 |         padding: 2rem;
 60 |         text-align: center;
 61 |         transition: all 0.3s ease;
 62 |     }
 63 |     
 64 |     .upload-zone:hover {
 65 |         border-color: #1E88E5;
 66 |         background: #f1f7fe;
 67 |     }
 68 |     
 69 |     /* Translation Container Styles */
 70 |     .translation-container {
 71 |         background: white;
 72 |         border-radius: 12px;
 73 |         box-shadow: 0 4px 6px rgba(0,0,0,0.05);
 74 |         margin: 1rem 0;
 75 |         overflow: hidden;
 76 |     }
 77 |     
 78 |     .translation-header {
 79 |         background: #f8f9fa;
 80 |         padding: 1rem;
 81 |         border-bottom: 1px solid #eee;
 82 |     }
 83 |     
 84 |     .translation-content {
 85 |         padding: 1.5rem;
 86 |         line-height: 1.6;
 87 |     }
 88 |     
 89 |     /* Progress Indicator Styles */
 90 |     .progress-indicator {
 91 |         background: #e3f2fd;
 92 |         border-radius: 8px;
 93 |         padding: 1rem;
 94 |         margin: 1rem 0;
 95 |     }
 96 |     
 97 |     /* Footer Styles */
 98 |     .app-footer {
 99 |         text-align: center;
100 |         padding: 2rem 0;
101 |         color: #666;
102 |         font-size: 0.9rem;
103 |     }
104 |     </style>
105 | """, unsafe_allow_html=True)
106 | 
107 | # Initialize translation model
108 | #translation_llm = Chat(model="translation-enko")
109 | translation_llm = Chat(model="solar-pro")
110 | 
111 | def translate_to_korean(text: str) -> str:
112 |     """
113 |     Translate text to Korean using the translation model with streaming output.
114 |     """
115 |     max_retries = 3
116 |     for attempt in range(max_retries):
117 |         try:
118 |             system_prompt = """You are a professional translator specializing in Korean translations.
119 |             Follow these guidelines strictly:
120 |             1. Translate the text line by line, maintaining the exact structure
121 |             2. Preserve all HTML tags, formatting, and special characters exactly as they appear
122 |             3. Do not translate:
123 |                - HTML tags and attributes
124 |                - Acronyms (e.g., PDF, HTML, AI)
125 |                - Foreign names and proper nouns
126 |                - Technical terms when commonly used in English
127 |             4. Ensure the translation is natural and fluent in Korean while maintaining the original meaning and nuance
128 |             5. Do not skip or drop any content
129 |             
130 |             Translate the following text to Korean:"""
131 |             
132 |             messages = [
133 |                 {"role": "system", "content": system_prompt},
134 |                 {"role": "user", "content": text}
135 |             ]
136 |             
137 |             # Change to streaming response
138 |             response_stream = translation_llm.stream(messages)
139 |             return response_stream
140 |         except Exception as e:
141 |             logger.error(f"Translation attempt {attempt + 1} failed: {str(e)}")
142 |             if attempt == max_retries - 1:
143 |                 st.error(f"Translation failed after {max_retries} attempts: {str(e)}")
144 |                 return f"Translation Error: {str(e)}"
145 |             time.sleep(1)
146 | 
147 | class FileCache:
148 |     def __init__(self):
149 |         self.cache = self._load_cache()
150 |         self._cleanup_old_entries()
151 |     
152 |     def _get_cache_path(self):
153 |         return ".file_cache.json"
154 |     
155 |     def _cleanup_old_entries(self, max_age_days=7):
156 |         """Remove cache entries older than specified days"""
157 |         current_time = time.time()
158 |         entries_to_remove = []
159 |         for filename in self.cache:
160 |             if 'timestamp' in self.cache[filename]:
161 |                 age = (current_time - self.cache[filename]['timestamp']) / (24 * 3600)
162 |                 if age > max_age_days:
163 |                     entries_to_remove.append(filename)
164 |         
165 |         for filename in entries_to_remove:
166 |             del self.cache[filename]
167 |         self._save_cache()
168 |     
169 |     def _load_cache(self):
170 |         try:
171 |             if os.path.exists(self._get_cache_path()):
172 |                 with open(self._get_cache_path(), 'r') as f:
173 |                     return json.load(f)
174 |         except Exception as e:
175 |             logger.error(f"Error loading cache: {str(e)}")
176 |         return {}
177 |     
178 |     def _save_cache(self):
179 |         try:
180 |             with open(self._get_cache_path(), 'w') as f:
181 |                 json.dump(self.cache, f)
182 |         except Exception as e:
183 |             logger.error(f"Error saving cache: {str(e)}")
184 | 
185 |     def get_cached_docs(self, filename: str, content: bytes) -> Optional[list]:
186 |         """Check if parsed documents are in cache"""
187 |         try:
188 |             file_hash = hashlib.sha256(content).hexdigest()
189 |             
190 |             if filename in self.cache:
191 |                 cached_data = self.cache[filename]
192 |                 if cached_data['hash'] == file_hash and 'docs' in cached_data:
193 |                     return [Document(page_content=doc['page_content'], metadata=doc['metadata']) 
194 |                            for doc in cached_data['docs']]
195 |         except Exception as e:
196 |             logger.error(f"Error retrieving from cache: {str(e)}")
197 |         return None
198 | 
199 |     def store_docs(self, filename: str, content: bytes, docs: list) -> None:
200 |         """Store parsed documents in cache"""
201 |         try:
202 |             file_hash = hashlib.sha256(content).hexdigest()
203 |             serializable_docs = [{'page_content': doc.page_content, 'metadata': doc.metadata} 
204 |                                for doc in docs]
205 |             
206 |             if filename not in self.cache:
207 |                 self.cache[filename] = {}
208 |                 
209 |             self.cache[filename].update({
210 |                 'hash': file_hash,
211 |                 'docs': serializable_docs,
212 |                 'translations': {},
213 |                 'timestamp': time.time()
214 |             })
215 |             self._save_cache()
216 |         except Exception as e:
217 |             logger.error(f"Error storing docs in cache: {str(e)}")
218 | 
219 |     def get_cached_translation(self, filename: str, page_content: str) -> Optional[str]:
220 |         """Get cached translation for a specific page content"""
221 |         try:
222 |             if filename in self.cache:
223 |                 page_hash = hashlib.sha256(page_content.encode()).hexdigest()
224 |                 return self.cache[filename]['translations'].get(page_hash)
225 |         except Exception as e:
226 |             logger.error(f"Error retrieving translation from cache: {str(e)}")
227 |         return None
228 | 
229 |     def store_translation(self, filename: str, page_content: str, translation: str) -> None:
230 |         """Store translation for a specific page content"""
231 |         try:
232 |             if filename not in self.cache:
233 |                 self.cache[filename] = {'translations': {}}
234 |             
235 |             page_hash = hashlib.sha256(page_content.encode()).hexdigest()
236 |             if 'translations' not in self.cache[filename]:
237 |                 self.cache[filename]['translations'] = {}
238 |                 
239 |             self.cache[filename]['translations'][page_hash] = translation
240 |             self._save_cache()
241 |         except Exception as e:
242 |             logger.error(f"Error storing translation in cache: {str(e)}")
243 | 
244 | def process_large_document(file_content: bytes) -> list:
245 |     """Process large documents safely"""
246 |     docs = []
247 |     with tempfile.NamedTemporaryFile(suffix='.pdf', delete=False) as temp_file:
248 |         try:
249 |             temp_file.write(file_content)
250 |             temp_file.flush()
251 |             ocr = "auto" if is_ocr_pdf(temp_file.name) == PDFType.DIGITAL else "force"
252 |             st.info(f"OCR: {ocr}")
253 |             layzer = UpstageDocumentParseLoader(temp_file.name, split="page", coordinates=False, ocr=ocr)
254 |             docs = layzer.load()
255 |                 
256 |             return docs
257 |         finally:
258 |             try:
259 |                 os.unlink(temp_file.name)  # Clean up temp file
260 |             except Exception as e:
261 |                 logger.error(f"Error cleaning up temporary file: {str(e)}")
262 | 
263 | def validate_file(uploaded_file) -> Tuple[bool, str]:
264 |     """Validate uploaded file"""
265 |     if not uploaded_file.name.lower().endswith('.pdf'):
266 |         return False, "Please upload a PDF file."
267 |     
268 |     try:
269 |         file_content = uploaded_file.getvalue()
270 |         if not file_content.startswith(b'%PDF'):
271 |             return False, "Invalid PDF file format."
272 |         return True, ""
273 |     except Exception as e:
274 |         return False, f"File validation error: {str(e)}"
275 | 
276 | def initialize_session_state():
277 |     """Initialize all session state variables"""
278 |     initial_state = {
279 |         'file_cache': FileCache(),
280 |         'docs': None,
281 |         'translation_complete': False,
282 |         'translated_text': None,
283 |         'current_file': None,
284 |         'translation_progress': 0
285 |     }
286 |     
287 |     for key, value in initial_state.items():
288 |         if key not in st.session_state:
289 |             st.session_state[key] = value
290 | 
291 | # Initialize session state
292 | initialize_session_state()
293 | 
294 | # Main App Header
295 | st.markdown("""
296 |     <div class="app-header">
297 |         <h1>Document Translator Pro (Beta)</h1>
298 |         <p>Professional-grade document translation powered by AI</p>
299 |         <p style="color: #ff6b6b; font-size: 0.9rem; margin-top: 10px;">⚠️ This is a temporary service and may be discontinued without prior notice.</p>
300 |     </div>
301 | """, unsafe_allow_html=True)
302 | 
303 | # Main content area
304 | tab1, tab2 = st.tabs(["📤 Upload & Translate", "ℹ️ About"])
305 | 
306 | with tab1:
307 |     uploaded_file = st.file_uploader(
308 |         "Drop your PDF here or click to upload",
309 |         type=["pdf"],
310 |         help="Maximum file size: 10MB",
311 |         accept_multiple_files=False
312 |     )
313 | 
314 |     if uploaded_file is not None:
315 |         is_valid, error_message = validate_file(uploaded_file)
316 |         if not is_valid:
317 |             st.error(f"📤 {error_message}")
318 |         else:
319 |             st.success(f"📤 File '{uploaded_file.name}' uploaded successfully!")
320 |             
321 |             if uploaded_file and not uploaded_file.name in st.session_state:
322 |                 with st.status("Processing document...", expanded=True) as status:
323 |                     try:
324 |                         file_content = uploaded_file.getvalue()
325 |                         
326 |                         # Check document cache first
327 |                         cached_docs = st.session_state.file_cache.get_cached_docs(uploaded_file.name, file_content)
328 |                         
329 |                         if cached_docs is not None:
330 |                             status.update(label="📑 Loading from cache...")
331 |                             st.session_state.docs = cached_docs
332 |                             st.session_state[uploaded_file.name] = True
333 |                             st.session_state.translation_complete = False
334 |                             st.success("✅ Document loaded from cache")
335 |                         else:
336 |                             status.update(label="📑 Analyzing document structure...")
337 |                             docs = process_large_document(file_content)
338 |                             
339 |                             # Store parsed docs in cache
340 |                             st.session_state.file_cache.store_docs(uploaded_file.name, file_content, docs)
341 |                             
342 |                             st.session_state.docs = docs
343 |                             st.session_state[uploaded_file.name] = True
344 |                             st.session_state.translation_complete = False
345 |                             
346 |                             st.success("✅ Document ready for translation")
347 |                     except Exception as e:
348 |                         st.error(f"Error processing document: {str(e)}")
349 |                         logger.error(f"Document processing error: {str(e)}")
350 | 
351 |     # Translation Display
352 |     translations = []
353 |     if st.session_state.docs:
354 |         if not st.session_state.translation_complete:
355 |             st.markdown('<div class="progress-indicator">', unsafe_allow_html=True)
356 |             progress_bar = st.progress(0)
357 |             st.markdown('</div>', unsafe_allow_html=True)
358 |         
359 |         for i, doc in enumerate(st.session_state.docs):
360 |             st.markdown(f'<div class="translation-container">', unsafe_allow_html=True)
361 |             st.markdown(f'<div class="translation-header">Page {i+1}</div>', unsafe_allow_html=True)
362 |             
363 |             col1, col2 = st.columns(2)
364 |             with col1:
365 |                 st.markdown('<div class="translation-content">', unsafe_allow_html=True)
366 |                 st.markdown("**Original Text**")
367 |                 st.markdown(doc.page_content, unsafe_allow_html=True)
368 |                 st.markdown('</div>', unsafe_allow_html=True)
369 |             
370 |             with col2:
371 |                 st.markdown('<div class="translation-content">', unsafe_allow_html=True)
372 |                 st.markdown("**Translated Text**")
373 |                 
374 |                 empty_container = st.empty()
375 |                 with st.spinner(""):
376 |                     try:
377 |                         # Check cache first
378 |                         cached_translation = st.session_state.file_cache.get_cached_translation(
379 |                             uploaded_file.name,
380 |                             doc.page_content
381 |                         )
382 |                         
383 |                         if cached_translation is not None:
384 |                             translated_content = cached_translation
385 |                             st.markdown(translated_content, unsafe_allow_html=True)
386 |                         else:
387 |                             # Stream translation if not in cache
388 |                             response_stream = translate_to_korean(doc.page_content)
389 |                             translated_content = ""
390 |                             
391 |                             # Use write_stream for streaming output
392 |                             for chunk in response_stream:
393 |                                 if chunk and chunk.content:
394 |                                     translated_content += chunk.content
395 |                                     empty_container.markdown(translated_content, unsafe_allow_html=True)
396 |                             
397 |                             # Store complete translation in cache
398 |                             st.session_state.file_cache.store_translation(
399 |                                 uploaded_file.name,
400 |                                 doc.page_content,
401 |                                 translated_content
402 |                             )
403 | 
404 |                         translations.append(translated_content)
405 |                     except Exception as e:
406 |                         error_message = f"Translation error on page {i+1}: {str(e)}"
407 |                         st.error(error_message)
408 |                         logger.error(error_message)
409 |                         translations.append(f"Error: {str(e)}")
410 |                 
411 |                 st.markdown('</div>', unsafe_allow_html=True)
412 |             
413 |             st.markdown('</div>', unsafe_allow_html=True)
414 |             
415 |             if not st.session_state.translation_complete:
416 |                 progress_bar.progress((i + 1) / len(st.session_state.docs))
417 |         
418 |         st.session_state.translation_complete = True
419 |         
420 |         # Create HTML content for download
421 |         if translations:
422 |             html_content = """
423 |             <html>
424 |             <head>
425 |                 <style>
426 |                     body { font-family: Arial, sans-serif; max-width: 800px; margin: 0 auto; padding: 20px; }
427 |                     .translation { margin-bottom: 30px; }
428 |                     .page-number { font-weight: bold; color: #1E88E5; margin-bottom: 10px; }
429 |                     .translated-text { line-height: 1.6; }
430 |                 </style>
431 |             </head>
432 |             <body>
433 |             """
434 |             
435 |             for i, translation in enumerate(translations):
436 |                 html_content += f"""
437 |                 <div class="translation">
438 |                     <div class="page-number">Page {i+1}</div>
439 |                     <div class="translated-text">{translation}</div>
440 |                 </div>
441 |                 """
442 |             
443 |             html_content += "</body></html>"
444 |             
445 |             # Add download button
446 |             download_filename = os.path.splitext(uploaded_file.name)[0] + '.translated.html'
447 |             st.download_button(
448 |                 label="📥 Download Translation as HTML",
449 |                 data=html_content,
450 |                 file_name=download_filename,
451 |                 mime="text/html"
452 |             )
453 | 
454 | with tab2:
455 |     st.markdown("""
456 |         ### About Document Translator Pro (Beta)
457 |         
458 |         Our professional document translation service uses state-of-the-art AI technology to provide:
459 |         
460 |         - ⚡ Fast and accurate translations
461 |         - 📄 Support for PDF documents
462 |         - 🔒 Secure document handling
463 |         - 💯 High-quality output
464 |         
465 |         > ⚠️ **Please Note**: This is a temporary service and may be discontinued without prior notice.
466 |         
467 |         ### How to Use
468 |         
469 |         1. Upload your PDF document using the upload tab
470 |         2. Wait for the automatic translation process
471 |         3. Review the side-by-side translation
472 |         4. Download the translated document
473 |         
474 |         ### Limitations
475 |         
476 |         - Maximum file size: 10MB
477 |         - Maximum pages per document: 50
478 |         - Supported file format: PDF only
479 |     """)
480 | 
481 | # Footer
482 | st.markdown("""
483 |     <div class="app-footer">
484 |         <p>Powered by Upstage DocParse and SolarLLM</p>
485 |         <p><a href="https://console.upstage.ai" target="_blank">console.upstage.ai</a></p>
486 |     </div>
487 | """, unsafe_allow_html=True)
488 | 


--------------------------------------------------------------------------------
/gemini.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import sys
  3 | import re
  4 | from typing import Dict, List, Any
  5 | from google import genai
  6 | from google.genai.types import Tool, GenerateContentConfig, GoogleSearch
  7 | import streamlit as st
  8 | import json
  9 | from langchain_upstage import ChatUpstage 
 10 | from langchain_core.output_parsers import StrOutputParser
 11 | from langchain_core.prompts import ChatPromptTemplate, MessagesPlaceholder
 12 | from langchain_core.messages import AIMessage, HumanMessage
 13 | import urllib.parse
 14 | from tinydb import TinyDB, Query
 15 | from datetime import datetime, timedelta
 16 | import hashlib
 17 | import time
 18 | 
 19 | 
 20 | def format_output():
 21 |     """Create color formatting functions for console output"""
 22 |     colors = {
 23 |         "blue": "\033[34m",
 24 |         "green": "\033[32m",
 25 |         "yellow": "\033[33m",
 26 |         "red": "\033[31m",
 27 |         "reset": "\033[0m",
 28 |     }
 29 | 
 30 |     return {
 31 |         "info": lambda text: f"{colors['blue']}{text}{colors['reset']}",
 32 |         "success": lambda text: f"{colors['green']}{text}{colors['reset']}",
 33 |         "highlight": lambda text: f"{colors['yellow']}{text}{colors['reset']}",
 34 |         "error": lambda text: f"{colors['red']}{text}{colors['reset']}",
 35 |     }
 36 | 
 37 | 
 38 | def format_response_to_markdown(text: str) -> str:
 39 |     """Format the AI response into markdown"""
 40 |     # Ensure consistent newlines
 41 |     processed_text = text.replace("\r\n", "\n")
 42 | 
 43 |     # Process main sections (simplified regex)
 44 |     processed_text = re.sub(
 45 |         r"^(\w[^:]+):(\s*)", r"## \1\2", processed_text, flags=re.MULTILINE
 46 |     )
 47 | 
 48 |     # Process sub-sections (simplified regex without look-behind)
 49 |     lines = processed_text.split("\n")
 50 |     processed_lines = []
 51 |     for line in lines:
 52 |         if re.match(r"^(\w[^:]+):(?!\d)", line):
 53 |             line = "### " + line
 54 |         processed_lines.append(line)
 55 |     processed_text = "\n".join(processed_lines)
 56 | 
 57 |     # Process bullet points
 58 |     processed_text = re.sub(r"^[•●○]\s*", "* ", processed_text, flags=re.MULTILINE)
 59 | 
 60 |     # Split into paragraphs and process
 61 |     paragraphs = [p for p in processed_text.split("\n\n") if p]
 62 |     formatted_paragraphs = []
 63 |     for p in paragraphs:
 64 |         if any(p.startswith(prefix) for prefix in ["#", "*", "-"]):
 65 |             formatted_paragraphs.append(p)
 66 |         else:
 67 |             formatted_paragraphs.append(f"{p}\n")
 68 | 
 69 |     return "\n\n".join(formatted_paragraphs)
 70 | 
 71 | 
 72 | def get_cache_db():
 73 |     """Initialize TinyDB database for caching with error handling"""
 74 |     try:
 75 |         return TinyDB('search_cache.json')
 76 |     except json.JSONDecodeError:
 77 |         # If cache is corrupted, delete it and create new
 78 |         try:
 79 |             os.remove('search_cache.json')
 80 |         except OSError:
 81 |             pass
 82 |         return TinyDB('search_cache.json')
 83 | 
 84 | 
 85 | def safe_cache_operation(func):
 86 |     """Decorator to safely handle cache operations"""
 87 |     def wrapper(*args, **kwargs):
 88 |         try:
 89 |             return func(*args, **kwargs)
 90 |         except (json.JSONDecodeError, OSError):
 91 |             # If any cache operation fails, delete cache and retry once
 92 |             try:
 93 |                 os.remove('search_cache.json')
 94 |             except OSError:
 95 |                 pass
 96 |             # Return None to indicate cache miss
 97 |             return None
 98 |     return wrapper
 99 | 
100 | 
101 | @safe_cache_operation
102 | def get_cached_result(db, Entry, cache_key):
103 |     """Safely get cached result"""
104 |     try:
105 |         return db.get(Entry.cache_key == cache_key)
106 |     except:
107 |         return None
108 | 
109 | 
110 | def generate_cache_key(query: str) -> str:
111 |     """Generate a consistent cache key for a query"""
112 |     return hashlib.md5(query.encode()).hexdigest()
113 | 
114 | 
115 | def is_cache_valid(timestamp: str, hours: int = 1) -> bool:
116 |     """Check if cached data is still valid"""
117 |     cached_time = datetime.fromisoformat(timestamp)
118 |     return datetime.now() - cached_time < timedelta(hours=hours)
119 | 
120 | 
121 | def search(keyword: str, prompt: str="") -> Dict[str, Any]:
122 |     """Perform a search using Google's Generative AI with caching"""
123 |     # Initialize cache
124 |     db = get_cache_db()
125 |     cache_key = generate_cache_key(keyword)
126 |     Entry = Query()
127 |     
128 |     # Check cache first with error handling
129 |     cached_result = get_cached_result(db, Entry, cache_key)
130 |     if cached_result and is_cache_valid(cached_result['timestamp']):
131 |         return cached_result['data']
132 | 
133 |     # Original search logic
134 |  
135 |     # Initialize the Google Generative AI client
136 |     client = genai.Client(api_key=os.getenv("GOOGLE_API_KEY"))
137 |     model_id = "gemini-2.0-flash"
138 | 
139 |     # Configure Google Search tool
140 |     google_search_tool = Tool(google_search=GoogleSearch())
141 | 
142 |     # Generate content
143 |     response = client.models.generate_content(
144 |         model=model_id,
145 |         contents=prompt + keyword,
146 |         config=GenerateContentConfig(
147 |             tools=[google_search_tool],
148 |         ),
149 |     )
150 | 
151 |     # Extract text from the first candidate's content
152 |     if response.candidates and response.candidates[0].content.parts:
153 |         text = response.candidates[0].content.parts[0].text
154 |     else:
155 |         raise Exception("No content found in response")
156 | 
157 |     # Extract sources from grounding metadata
158 |     sources = []
159 |     if hasattr(response.candidates[0], "grounding_metadata"):
160 |         metadata = response.candidates[0].grounding_metadata
161 | 
162 | 
163 |         # Create a mapping of chunk indices to web sources
164 |         web_sources = {}
165 |         if metadata.grounding_chunks:
166 |             for i, chunk in enumerate(metadata.grounding_chunks):
167 |                 if chunk.web:
168 |                     web_sources[i] = {
169 |                         "title": chunk.web.title,
170 |                         "url": chunk.web.uri,
171 |                         "contexts": [],
172 |                     }
173 | 
174 |         # st.json(metadata)
175 | 
176 |         # Add text segments to corresponding sources
177 |         if metadata.grounding_supports:
178 |             for support in metadata.grounding_supports:
179 |                 for chunk_idx in support.grounding_chunk_indices:
180 |                     if chunk_idx in web_sources:
181 |                         web_sources[chunk_idx]["contexts"].append(
182 |                             {
183 |                                 "text": support.segment.text,
184 |                                 "confidence": support.confidence_scores[0],
185 |                             }
186 |                         )
187 | 
188 |         # Convert to list and filter out sources with no contexts
189 |         sources = [source for source in web_sources.values() if source["contexts"]]
190 | 
191 |     formatted_text = format_response_to_markdown(text)
192 | 
193 |     # Store result in cache with error handling
194 |     try:
195 |         cache_data = {
196 |             'cache_key': cache_key,
197 |             'data': {
198 |                 "summary": formatted_text,
199 |                 "sources": sources,
200 |                 "query": keyword,
201 |                 "web_search_query": metadata.web_search_queries,
202 |             },
203 |             'timestamp': datetime.now().isoformat()
204 |         }
205 |         db.upsert(cache_data, Entry.cache_key == cache_key)
206 |     except:
207 |         # If cache write fails, continue without caching
208 |         pass
209 | 
210 |     return cache_data['data']
211 | 
212 |     
213 | 
214 | def generate_search_query(keyword: str, results: str) -> List[str]:
215 |     """Generate search queries with caching"""
216 |     # Initialize cache
217 |     db = get_cache_db()
218 |     cache_key = generate_cache_key(f"suggestions_{keyword}")
219 |     Entry = Query()
220 |     
221 |     # Check cache first
222 |     cached_result = get_cached_result(db, Entry, cache_key)
223 |     if cached_result and is_cache_valid(cached_result['timestamp']):
224 |         return cached_result['data']
225 | 
226 |     # Original suggestion generation logic
227 |     try:
228 |         llm = ChatUpstage(model="solar-mini", model_kwargs={"response_format":{"type":"json_object"}})
229 |         llm = ChatUpstage(model="solar-mini")
230 | 
231 |         prompt = ChatPromptTemplate.from_messages(
232 |             [
233 |                 (
234 |                     "system",
235 |                     """You are a helpful assistant that generates search queries based on a user's query and the results of a previous search.
236 |             Always return a JSON object with a "suggestions" array containing 3-5 search queries.
237 |             IMPORTANT: You must detect the language of the input query and respond STRICTLY in the SAME LANGUAGE.
238 |             - If the input query is in Korean, you MUST generate Korean search queries only
239 |             - If the input query is in English, you MUST generate English search queries only
240 |             
241 |             Example 1 (Korean query -> Korean response):
242 |             Input: "엔비디아 최신 뉴스"
243 |             Output: {{"suggestions": ["엔비디아 주가 현황", "엔비디아 신제품 출시 2024", "엔비디아 AI 개발 현황", "엔비디아 최신 파트너십"]}}
244 |             
245 |             Example 2 (English query -> English response):
246 |             Input: "latest nvidia news"
247 |             Output: {{"suggestions": ["nvidia stock price today", "nvidia new product announcements 2024", "nvidia AI developments", "nvidia partnerships latest"]}}
248 |             
249 |             Remember: The response language MUST MATCH the input query language.""",
250 |                 ),
251 |                 ("user", "User query: {keyword}\nPrevious search results: {results}"),
252 |                 (
253 |                     "user",
254 |                     "Generate a JSON array of 3-5 new search queries that would help find more relevant information.",
255 |                 ),
256 |             ]
257 |         )
258 |         chain = prompt | llm | StrOutputParser()
259 |         response = chain.invoke({"keyword": keyword, "results": results})
260 | 
261 |         # Ensure the response is properly parsed as JSON and handle slicing safely
262 |         try:
263 |             response_json = json.loads(response)
264 |             queries = response_json.get("suggestions", [])
265 |             return queries if isinstance(queries, list) else [keyword]
266 |         except json.JSONDecodeError:
267 |             return [keyword]
268 | 
269 |         # Store suggestions in cache before returning
270 |         cache_data = {
271 |             'cache_key': cache_key,
272 |             'data': queries,
273 |             'timestamp': datetime.now().isoformat()
274 |         }
275 |         db.upsert(cache_data, Entry.cache_key == cache_key)
276 | 
277 |         return queries
278 |     except json.JSONDecodeError:
279 |         return [keyword]
280 | 
281 | 
282 | def generate_quick_answer(keyword: str, results: str) -> str:
283 |     """Generate a one-line quick answer with caching"""
284 |     # Initialize cache
285 |     db = get_cache_db()
286 |     cache_key = generate_cache_key(f"quick_answer_{keyword}")
287 |     Entry = Query()
288 |     
289 |     # Check cache first
290 |     cached_result = get_cached_result(db, Entry, cache_key)
291 |     if cached_result and is_cache_valid(cached_result['timestamp']):
292 |         return cached_result['data']
293 | 
294 |     try:
295 |         llm = ChatUpstage(model="solar-pro", model_kwargs={"response_format":{"type":"json_object"}})
296 |         llm = ChatUpstage(model="solar-mini")
297 | 
298 |         prompt = ChatPromptTemplate.from_messages([
299 |             (
300 |                 "system",
301 |                 """You are a helpful assistant that generates concise, one-line answers based on search results.
302 |                 Always return a JSON object with a "quick_answer" string containing a direct, factual response.
303 |                 IMPORTANT: You must detect the language of the input query and respond STRICTLY in the SAME LANGUAGE.
304 |                 - If the input query is in Korean, respond in Korean
305 |                 - If the input query is in English, respond in English
306 |                 
307 |                 The answer should be:
308 |                 1. No more than 20 words
309 |                 2. Direct and informative
310 |                 3. Based on the most recent/relevant information from results
311 |                 4. In the same language as the query
312 |                 
313 |                 Example 1 (Korean query -> Korean response):
314 |                 Input: "User query: 현재 비트코인 가격은?\nSearch results: 비트코인이 최근 강세를 보이며 현재 67,000달러 선에서 거래되고 있습니다. 이는 작년 대비 150% 상승한 수치이며, 전문가들은 연말까지 추가 상승 가능성을 전망하고 있습니다. 특히 최근 비트코인 ETF 승인 이후 기관 투자자들의 관심이 높아지면서 가격 상승세가 지속되고 있습니다."
315 |                 Output: {{"quick_answer": "비트코인은 현재 67,000달러 선에서 거래되고 있습니다."}}
316 |                 
317 |                 Example 2 (English query -> English response):
318 |                 Input: "User query: What is Bitcoin's price?\nSearch results: Bitcoin continues its bullish trend, currently trading at around $67,000. This represents a 150% increase from last year, with experts predicting further gains by year-end. The recent approval of Bitcoin ETFs has particularly attracted institutional investors, contributing to the sustained price momentum."
319 |                 Output: {{"quick_answer": "Bitcoin is currently trading at around $67,000."}}""",
320 |             ),
321 |             ("user", "User query: {keyword}\nSearch results: {results}"),
322 |             ("user", "Generate a one-line quick answer based on the search results."),
323 |         ])
324 |         
325 |         chain = prompt | llm | StrOutputParser()
326 |         response = chain.invoke({"keyword": keyword, "results": results})
327 | 
328 |         try:
329 |             response_json = json.loads(response)
330 |             quick_answer = response_json.get("quick_answer", "")
331 |             
332 |             # Store answer in cache
333 |             cache_data = {
334 |                 'cache_key': cache_key,
335 |                 'data': quick_answer,
336 |                 'timestamp': datetime.now().isoformat()
337 |             }
338 |             db.upsert(cache_data, Entry.cache_key == cache_key)
339 |             
340 |             return quick_answer
341 |         except json.JSONDecodeError:
342 |             return ""
343 | 
344 |     except Exception as e:
345 |         print(f"Quick answer generation error: {e}")
346 |         return ""
347 | 
348 | def show_sources(result:Dict[str, Any]) -> None:
349 |     # Sources with improved design
350 |     if result.get("sources"):
351 |         sources = [s for s in result["sources"] if s.get("title") and s.get("url")]
352 |         if sources:
353 |             st.markdown("### Sources")
354 |             for idx, source in enumerate(sources, 1):
355 |                 content = " ".join([context["text"] for context in source["contexts"]])[:200] + "..."
356 |                 st.markdown(
357 |                     f"""
358 |                     <div class="source-item">
359 |                         <div class="source-header">
360 |                             <span class="source-number">{idx}</span>
361 |                             <a href="{source['url']}" target="_blank" class="source-link">
362 |                                 {source['title']}
363 |                             </a>
364 |                         </div>
365 |                         <div class="source-content">
366 |                             {content}
367 |                         </div>
368 |                     </div>
369 |                     """,
370 |                     unsafe_allow_html=True
371 |                 )
372 | 
373 | 
374 | def sources_to_citations(result:Dict[str, Any]) -> None:
375 |     if result.get("sources"):
376 |         sources = [s for s in result["sources"] if s.get("title") and s.get("url")]
377 |         if sources:
378 |             citations = []
379 |             for idx, source in enumerate(sources, 1):
380 |                 content = " ".join([context["text"] for context in source["contexts"]])
381 |                 citations.append(f"{idx}. {source['title']}: {content}\n\n")
382 |             
383 |             return "\n\n".join(citations)
384 | 
385 | def get_full_sources(search_query: str, result: Dict[str, Any]) -> None:
386 |     """
387 |     Query for full list of sources and display them with improved design.
388 |     
389 |     This function:
390 |       1. Performs a reference search to get the full sources data
391 |       2. Displays the resulting JSON for debugging/visualization
392 |       3. Filters and displays the sources (if available) in a formatted manner
393 |     """
394 |     # Generate the reference query that asks for full, unmodified content details.
395 |     ref_query = (
396 |         "For a given query and provided search results, analyze and return a JSON object containing the full list of sources.\n"
397 |         "The output should be in the following format:\n"
398 |         "{\n"
399 |         '    "sources": [\n'
400 |         "        {\n"
401 |         '            "url": "source URL",\n'
402 |         '            "title": "source title",\n'
403 |         '            "content": "full original content without modifications or summaries"\n'
404 |         "        }\n"
405 |         "    ]\n"
406 |         "}\n\n"
407 |         "Important: Return the content exactly as provided in the source, without summarization or modification.\n\n"
408 |         "Query: " + search_query
409 |     )
410 |     # Perform the reference search using the global search function.
411 |     ref_result = search(ref_query)
412 |     st.json(ref_result)
413 |     
414 |     # Check if sources are available in the main result.
415 |     if result.get("sources"):
416 |         # Filter out any sources that don't have both a title and URL.
417 |         sources = [s for s in result["sources"] if s.get("title") and s.get("url")]
418 |         if sources:
419 |             st.markdown("### Sources")
420 |             # Enumerate over the valid sources and display each one.
421 |             for idx, source in enumerate(sources, 1):
422 |                 content = " ".join([context["text"] for context in source["contexts"]])[:200] + "..."
423 |                 st.markdown(
424 |                     f"""
425 |                     <div class="source-item">
426 |                         <div class="source-header">
427 |                             <span class="source-number">{idx}</span>
428 |                             <a href="{source['url']}" target="_blank" class="source-link">
429 |                                 {source['title']}
430 |                             </a>
431 |                         </div>
432 |                         <div class="source-content">
433 |                             {content}
434 |                         </div>
435 |                     </div>
436 |                     """,
437 |                     unsafe_allow_html=True
438 |                 )
439 | 
440 | 
441 | def perform_search_and_display(search_query: str, is_suggestion: bool = False) -> None:
442 |     """
443 |     Perform search and display results with enhanced source list design
444 |     """
445 | 
446 |         # Add share button
447 |     share_url = f"?q={urllib.parse.quote(search_query)}"
448 |     st.markdown(
449 |         f"""
450 |         <div style="text-align: center;">
451 |             <a href="{share_url}" class="share-button" style="cursor: pointer; color: white; text-decoration: none;">
452 |                 🔗 Share Results
453 |             </a>
454 |         </div>
455 |         """,
456 |         unsafe_allow_html=True
457 |     )
458 | 
459 |     # CSS with improved source list styling
460 |     st.markdown("""
461 |         <style>
462 |             .main .block-container {
463 |                 padding: 2rem;
464 |                 max-width: 800px;
465 |             }
466 |             
467 |             .quick-answer {
468 |                 padding: 16px;
469 |                 background: #f8f9fa;
470 |                 border-left: 3px solid #1a73e8;
471 |                 margin: 16px 0;
472 |             }
473 |             
474 |             .suggestion-link {
475 |                 display: block;
476 |                 padding: 8px 16px;
477 |                 background: #f8f9fa;
478 |                 border-radius: 20px;
479 |                 color: #1a73e8;
480 |                 text-align: center;
481 |                 text-decoration: none;
482 |                 margin: 8px 0;
483 |             }
484 |             
485 |             .suggestion-link:hover {
486 |                 background: #e8f0fe;
487 |             }
488 |             
489 |             .source-item {
490 |                 padding: 16px;
491 |                 margin: 8px 0;
492 |                 border: 1px solid #e0e0e0;
493 |                 border-radius: 8px;
494 |                 transition: background-color 0.2s ease;
495 |             }
496 |             
497 |             .source-item:hover {
498 |                 background-color: #f8f9fa;
499 |             }
500 |             
501 |             .source-header {
502 |                 display: flex;
503 |                 align-items: center;
504 |                 gap: 12px;
505 |                 margin-bottom: 8px;
506 |             }
507 |             
508 |             .source-number {
509 |                 color: #666;
510 |                 font-size: 0.9em;
511 |                 min-width: 24px;
512 |             }
513 |             
514 |             .source-link {
515 |                 color: #1a73e8;
516 |                 text-decoration: none;
517 |                 font-weight: 500;
518 |                 flex-grow: 1;
519 |                 line-height: 1.4;
520 |             }
521 |             
522 |             .source-content {
523 |                 color: #555;
524 |                 font-size: 0.9em;
525 |                 line-height: 1.5;
526 |                 margin-left: 36px;
527 |             }
528 |             
529 |             h3 {
530 |                 color: #202124;
531 |                 margin: 24px 0 16px 0;
532 |                 font-weight: 500;
533 |             }
534 |         </style>
535 |     """, unsafe_allow_html=True)
536 | 
537 | 
538 |     web_search_query_spot = st.empty()
539 |     summary_spot = st.empty()
540 |     result_spot = st.empty()
541 |     suggested_queries_spot = st.empty()
542 | 
543 |     # Main search
544 |     with st.spinner("Searching..."):
545 |         result = search(search_query)
546 | 
547 |     # Search queries (only if there are queries)
548 |     if result.get("web_search_query"):
549 |         with web_search_query_spot.expander("🔍 Search queries used", expanded=False):
550 |             st.markdown("""
551 |                 <style>
552 |                     .search-query-item {
553 |                         padding: 8px 12px;
554 |                         margin: 6px 0;
555 |                         background-color: #f0f2f6;
556 |                         border-radius: 6px;
557 |                         font-size: 0.9em;
558 |                         color: #444;
559 |                         border-left: 3px solid #1a73e8;
560 |                     }
561 |                 </style>
562 |             """, unsafe_allow_html=True)
563 |             
564 |             for query in result["web_search_query"]:
565 |                 st.markdown(f'<div class="search-query-item">{query}</div>', unsafe_allow_html=True)
566 | 
567 |     if result["summary"]:
568 |         result_spot.markdown(result["summary"])
569 | 
570 | 
571 |     show_sources(result)
572 | 
573 |     citations = sources_to_citations(result)
574 |     citation_added_text = fill_citations(result["summary"], citations)
575 |     result_spot.markdown(citation_added_text)
576 |     
577 |     # Quick answer (if available)
578 |     quick_answer = generate_quick_answer(search_query, result["summary"])
579 |     if quick_answer:
580 |         summary_spot.markdown(
581 |             f'<div class="quick-answer">{quick_answer}</div>',
582 |             unsafe_allow_html=True
583 |         )
584 | 
585 | 
586 |     # Related searches (only if there are suggestions)
587 |     suggested_queries = generate_search_query(search_query, result["summary"])
588 |     if suggested_queries and len(suggested_queries) > 0:
589 |         cols = suggested_queries_spot.columns(min(len(suggested_queries[:3]), 3))
590 |         for col, query in zip(cols, suggested_queries[:3]):
591 |             col.markdown(
592 |                 f'<a href="?q={urllib.parse.quote(query)}" class="suggestion-link">{query}</a>',
593 |                 unsafe_allow_html=True
594 |             )
595 | 
596 |   
597 | def fill_citations(text: str, citations: list) -> str:
598 |     """Add citation numbers to text based on provided citations list.
599 |     
600 |     Args:
601 |         text: The original text to add citations to
602 |         citations: List of citation objects with text content to match
603 |     
604 |     Returns:
605 |         Text with citation numbers added in [n] format
606 |     """
607 |     llm = ChatUpstage(model="solar-pro")
608 |     prompt = ChatPromptTemplate.from_messages([
609 |         (
610 |             "system",
611 |             """You are a citation assistant. Your task is to add citation numbers to text by matching content with provided citations.
612 | 
613 |             Rules:
614 |             1. Do not modify the original text
615 |             2. Only add citation numbers in [n] format where appropriate
616 |             3. Add citations where text closely matches citation content
617 |             4. Multiple citations can be added to the same statement if relevant [1,2]
618 |             5. Citations should be added at the end of relevant sentences or claims
619 |             
620 |             Example:
621 |             Text: "The sky is blue due to Rayleigh scattering. This effect causes shorter wavelengths to scatter more."
622 |             Citations: 
623 |             1. "Rayleigh scattering explains the blue color of the sky"
624 |             2. "Short wavelength blue light is scattered more by the atmosphere"
625 |             
626 |             Output: "The sky is blue due to Rayleigh scattering [1]. This effect causes shorter wavelengths to scatter more [2]."
627 |             """,
628 |         ),
629 |         ("user", "Text: {text}\nCitations: {citations}\nAdd appropriate citation numbers to the text while preserving the original content exactly."),
630 |     ])
631 |     
632 |     chain = prompt | llm | StrOutputParser()
633 |     return chain.invoke({"text": text, "citations": citations})
634 | 
635 | def main():
636 |     """Main function to run the Streamlit app"""
637 |     st.set_page_config(page_title="Search Up", layout="wide")
638 | 
639 |     # Add title and subtitle
640 |     st.markdown("""
641 |         <h1 style='text-align: center; margin-bottom: 0;'>SearchUp</h1>
642 |         <p style='text-align: center; color: #666; font-size: 0.9em; margin-top: 0;'>
643 |             powered by Google, Gemini, and Solar
644 |         </p>
645 |     """, unsafe_allow_html=True)
646 | 
647 |     # Custom CSS for the UI, including improved share button styling
648 |     st.markdown("""
649 |         <style>
650 |             /* Hide Streamlit header and footer */
651 |             #MainMenu {visibility: hidden;}
652 |             footer {visibility: hidden;}
653 | 
654 |             .block-container {padding-top: 2rem; padding-bottom: 2rem;}
655 | 
656 |             /* Search bar styling */
657 |             .search-bar {
658 |                 display: flex;
659 |                 justify-content: center;
660 |                 margin-bottom: 2rem;
661 |             }
662 |             .search-bar input {
663 |                 width: 50%;
664 |                 padding: 0.5rem 1rem;
665 |                 border: 1px solid #dfe1e5;
666 |                 border-radius: 24px;
667 |                 font-size: 1rem;
668 |             }
669 |             .search-bar input:focus {
670 |                 outline: none;
671 |                 box-shadow: 0 1px 6px rgba(32,33,36,.28);
672 |                 border-color: rgba(223,225,229,0);
673 |             }
674 |             .search-bar button {
675 |                 background-color: #f8f9fa;
676 |                 border: 1px solid #f8f9fa;
677 |                 border-radius: 4px;
678 |                 color: #3c4043;
679 |                 font-size: 0.875rem;
680 |                 margin: 11px 4px;
681 |                 padding: 0 16px;
682 |                 line-height: 27px;
683 |                 height: 36px;
684 |                 min-width: 54px;
685 |                 text-align: center;
686 |                 cursor: pointer;
687 |                 user-select: none;
688 |             }
689 |             .search-bar button:hover {
690 |                 box-shadow: 0 1px 1px rgba(0,0,0,.1);
691 |                 background-color: #f8f9fa;
692 |                 border: 1px solid #dadce0;
693 |                 color: #202124;
694 |             }
695 |             
696 |             /* Improved Share Button Styling */
697 |             .share-button {
698 |                 display: inline-block;
699 |                 padding: 10px 20px;
700 |                 border: none;
701 |                 border-radius: 24px;
702 |                 background-color: #1a73e8;
703 |                 color: #fff;
704 |                 text-decoration: none;
705 |                 font-size: 1rem;
706 |                 font-weight: 600;
707 |                 transition: background-color 0.3s;
708 |             }
709 |             .share-button:hover {
710 |                 background-color: #1664c1;
711 |             }
712 |         </style>
713 |     """, unsafe_allow_html=True)
714 | 
715 |     # Search bar layout - input field and search button
716 |     search_col1, search_col2 = st.columns([3, 1])
717 |     with search_col1:
718 |         search_input = st.text_input(
719 |             "",
720 |             st.query_params.get("q", ""),
721 |             placeholder="Search anything...",
722 |             key="search_input"
723 |         )
724 |         # Synchronize the session state with URL parameter "q"
725 |         if st.session_state.get("search_input"):
726 |             if st.session_state["search_input"] != st.query_params.get("q", ""):
727 |                 st.query_params["q"] = st.session_state["search_input"]
728 |                 st.rerun()
729 | 
730 |     with search_col2:
731 |         st.markdown("<br>", unsafe_allow_html=True)
732 |         if st.button("Search"):
733 |             st.query_params["q"] = st.session_state["search_input"]
734 |             st.rerun()
735 | 
736 |     # Only perform search if the URL contains a non-empty 'q' parameter
737 |     if "q" in st.query_params:
738 |         search_query = st.query_params["q"]
739 |         if not search_query.strip():
740 |             st.warning("Please enter a search keyword to begin.")
741 |         else:
742 |             perform_search_and_display(search_query)
743 | 
744 | 
745 | if __name__ == "__main__":
746 |     main()


--------------------------------------------------------------------------------
/llama.py:
--------------------------------------------------------------------------------
  1 | # from https://docs.streamlit.io/develop/tutorials/llms/build-conversational-apps
  2 | 
  3 | import streamlit as st
  4 | 
  5 | from pydantic import BaseModel, Field
  6 | 
  7 | from langchain_groq import ChatGroq as Chat
  8 | from langchain_community.tools import DuckDuckGoSearchResults
  9 | 
 10 | 
 11 | from langchain_core.output_parsers import StrOutputParser
 12 | from langchain_core.prompts import (
 13 |     ChatPromptTemplate,
 14 |     MessagesPlaceholder,
 15 |     PromptTemplate,
 16 | )
 17 | from langchain_core.output_parsers import JsonOutputParser
 18 | from langchain_core.messages import AIMessage, HumanMessage
 19 | 
 20 | MAX_TOKENS = 40000
 21 | MAX_SEAERCH_RESULTS = 5
 22 | 
 23 | MODEL_NAME = "llama-3.1-70b-versatile"
 24 | 
 25 | llm = Chat(model=MODEL_NAME)
 26 | 
 27 | ddg_search = DuckDuckGoSearchResults()
 28 | 
 29 | 
 30 | st.set_page_config(page_title="Llama Reasoning", page_icon="🤔")
 31 | st.title("Llama 3.1 70B Reasoning")
 32 | 
 33 | reasoning_examples = """
 34 | ---
 35 | Human: Given Instruction, please generate {what}. Please use the following exampels.
 36 |     If reasoning and/or reasoning chains are provided, please use them as context to generate the {what}.
 37 |     Please only generate the {what} and do not include others.
 38 |     
 39 |     See the examples below:
 40 |     ----
 41 | Example 1:
 42 | 
 43 | Instruction: If a die is rolled three times, what is the probability of getting a sum of 11? None
 44 | 
 45 | Reasoning: 1. Understand the problem: We need to find the probability of getting a sum of 11 when rolling a die three times.
 46 | 2. Calculate total possible outcomes: A die has 6 faces, so for each roll, there are 6 possibilities. For three rolls, the total possible outcomes are 6^3 = 216.
 47 | 3. Identify favorable outcomes: List all combinations of rolls that result in a sum of 11. There are 18 such combinations.
 48 | 4. Calculate probability: Divide the number of favorable outcomes by the total possible outcomes: 18 / 216 = 1/12.
 49 | 5. Conclusion: The probability of getting a sum of 11 when rolling a die three times is 1/12.
 50 | 
 51 | Reasoning Chains: [{'step': 1, 'thought': 'Understand the problem: We need to find the probability of getting a sum of 11 when rolling a die three times.'}, {'step': 2, 'thought': 'Calculate total possible outcomes: A die has 6 faces, so for each roll, there are 6 possibilities. For three rolls, the total possible outcomes are 6^3 = 216.'}, {'step': 3, 'thought': 'Identify favorable outcomes: List all combinations of rolls that result in a sum of 11. There are 18 such combinations.'}, {'step': 4, 'thought': 'Calculate probability: Divide the number of favorable outcomes by the total possible outcomes: 18 / 216 = 1/12.'}, {'step': 5, 'thought': 'Conclusion: The probability of getting a sum of 11 when rolling a die three times is 1/12.'}]
 52 | ----
 53 | Example 2:
 54 | 
 55 | Instruction: The interactions will be about the science behind culinary techniques. The setting is a cooking class where three friends are discussing various aspects of cooking and sharing their knowledge.
 56 | - USER/Jane: A curious learner who wants to understand the science behind cooking
 57 | - Mike: An experienced home cook with a passion for experimenting in the kitchen
 58 | - Sarah: A food scientist who loves explaining the chemistry behind different cooking processes
 59 | 
 60 |  None
 61 | 
 62 | Reasoning: 1. Start with the given setting: a cooking class with three friends discussing the science behind culinary techniques.
 63 | 2. Introduce the topic of resting meat after cooking, with Mike asking Jane if she's ever wondered about it.
 64 | 3. Have Sarah explain the science behind resting meat, mentioning denatured proteins and juice redistribution.
 65 | 4. Address the user's question about resting meat, with Sarah confirming that it allows juices to redistribute.
 66 | 5. Move on to the topic of adding salt to water, with Mike mentioning its effect on boiling point.
 67 | 6. Have Sarah explain the science behind salt's effect on boiling point, mentioning the higher temperature required for boiling.
 68 | 7. Address the user's question about cooking speed, with Sarah explaining that it's slightly faster due to the hotter water.
 69 | 8. Introduce the topic of acids in cooking, with Mike mentioning their use in brightening dishes.
 70 | 9. Have Sarah explain the science behind acids' effects on flavor and tenderizing meats.
 71 | 10. Address the user's question about baking, with Mike mentioning the science involved in baking and Sarah explaining the role of gluten and leavening agents.
 72 | 11. Conclude the conversation with the characters expressing their fascination with the science behind cooking and their excitement to continue learning and experimenting.
 73 | 
 74 | Reasoning Chains: [{'step': 1, 'thought': 'Start with the given setting: a cooking class with three friends discussing the science behind culinary techniques.'}, {'step': 2, 'thought': "Introduce the topic of resting meat after cooking, with Mike asking Jane if she's ever wondered about it."}, {'step': 3, 'thought': 'Have Sarah explain the science behind resting meat, mentioning denatured proteins and juice redistribution.'}, {'step': 4, 'thought': "Address the user's question about resting meat, with Sarah confirming that it allows juices to redistribute."}, {'step': 5, 'thought': 'Move on to the topic of adding salt to water, with Mike mentioning its effect on boiling point.'}, {'step': 6, 'thought': "Have Sarah explain the science behind salt's effect on boiling point, mentioning the higher temperature required for boiling."}, {'step': 7, 'thought': "Address the user's question about cooking speed, with Sarah explaining that it's slightly faster due to the hotter water."}, {'step': 8, 'thought': 'Introduce the topic of acids in cooking, with Mike mentioning their use in brightening dishes.'}, {'step': 9, 'thought': "Have Sarah explain the science behind acids' effects on flavor and tenderizing meats."}, {'step': 10, 'thought': "Address the user's question about baking, with Mike mentioning the science involved in baking and Sarah explaining the role of gluten and leavening agents."}, {'step': 11, 'thought': 'Conclude the conversation with the characters expressing their fascination with the science behind cooking and their excitement to continue learning and experimenting.'}]
 75 | ----
 76 | Example 3:
 77 | 
 78 | Instruction: Create a C++ program that connects to a Cassandra database and performs basic CRUD operations (Create, Read, Update, Delete) on a table containing employee information (ID, name, department, salary). Use prepared statements for queries and ensure proper error handling.
 79 |  None
 80 | 
 81 | Reasoning: 1. Understand the instruction: Create a C++ program that connects to a Cassandra database and performs basic CRUD operations on a table containing employee information using prepared statements and proper error handling.
 82 | 2. Identify required libraries: Include the necessary libraries for connecting to Cassandra and handling errors.
 83 | 3. Establish a connection to the Cassandra cluster: Create a cluster and session object, set the contact points, and connect to the cluster.
 84 | 4. Create a keyspace and table for employee information: Write the CQL queries for creating the keyspace and table, execute them, and handle any errors.
 85 | 5. Prepare CRUD statements: Write the CQL queries for insert, select, update, and delete operations, and prepare them using the Cassandra session.
 86 | 6. Perform basic CRUD operations using prepared statements:
 87 |    a. Insert an employee record: Generate a UUID for the employee ID, bind the prepared insert statement with the employee data, and execute the query.
 88 |    b. Read the inserted employee record: Bind the prepared select statement with the employee ID, execute the query, and display the employee information.
 89 |    c. Update the employee's salary: Bind the prepared update statement with the new salary and employee ID, and execute the query.
 90 |    d. Delete the employee record: Bind the prepared delete statement with the employee ID, and execute the query.
 91 | 7. Handle errors: Check the error codes for each query execution and print error messages if necessary.
 92 | 8. Clean up and close the connection: Free the prepared statements, UUID generator, and close the session and cluster objects.
 93 | 9. Compile and run the program: Provide instructions for installing the DataStax C/C++ driver, compiling the program, and running it with a local Cassandra cluster.
 94 | 
 95 | Reasoning Chains: [{'step': 1, 'thought': 'Understand the instruction: Create a C++ program that connects to a Cassandra database and performs basic CRUD operations on a table containing employee information using prepared statements and proper error handling.'}, {'step': 2, 'thought': 'Identify required libraries: Include the necessary libraries for connecting to Cassandra and handling errors.'}, {'step': 3, 'thought': 'Establish a connection to the Cassandra cluster: Create a cluster and session object, set the contact points, and connect to the cluster.'}, {'step': 4, 'thought': 'Create a keyspace and table for employee information: Write the CQL queries for creating the keyspace and table, execute them, and handle any errors.'}, {'step': 5, 'thought': 'Prepare CRUD statements: Write the CQL queries for insert, select, update, and delete operations, and prepare them using the Cassandra session.'}, {'step': 6, 'thought': "Perform basic CRUD operations using prepared statements:\n   a. Insert an employee record: Generate a UUID for the employee ID, bind the prepared insert statement with the employee data, and execute the query.\n   b. Read the inserted employee record: Bind the prepared select statement with the employee ID, execute the query, and display the employee information.\n   c. Update the employee's salary: Bind the prepared update statement with the new salary and employee ID, and execute the query.\n   d. Delete the employee record: Bind the prepared delete statement with the employee ID, and execute the query."}, {'step': 7, 'thought': 'Handle errors: Check the error codes for each query execution and print error messages if necessary.'}, {'step': 8, 'thought': 'Clean up and close the connection: Free the prepared statements, UUID generator, and close the session and cluster objects.'}, {'step': 9, 'thought': 'Compile and run the program: Provide instructions for installing the DataStax C/C++ driver, compiling the program, and running it with a local Cassandra cluster.'}]
 96 | ----
 97 | Example 4:
 98 | 
 99 | Instruction: BEGININPUT
100 | BEGINCONTEXT
101 | date: August 15, 2022
102 | author: Sarah Johnson
103 | subject: SharePoint Server 2019 Features and Benefits
104 | to: John Smith
105 | ENDCONTEXT
106 | Hi John,
107 | 
108 | I hope you're doing well. I wanted to provide you with some information on Microsoft SharePoint Server 2019 and its features and benefits. As you know, our company is considering upgrading our current system, and I believe that SharePoint Server 2019 could be a great fit for us.
109 | 
110 | SharePoint Server 2019 comes with several new features that can help improve productivity and collaboration within our organization. Some of these features include:
111 | 
112 | 1. Modern Sites: SharePoint Server 2019 introduces modern team sites and communication sites, which offer an improved user experience compared to the classic sites we currently use. These modern sites are responsive by design, making them easily accessible from any device.
113 | 
114 | 2. Improved File Sharing: The new version includes OneDrive for Business integration, allowing users to share files more easily both internally and externally. This feature also supports larger file sizes (up to 15 GB) and provides real-time co-authoring capabilities in Office Online.
115 | 
116 | 3. Hybrid Scenarios: SharePoint Server 2019 allows for better integration between on-premises and cloud environments. This means we can take advantage of cloud-based services like Power BI, Flow, and Planner while still maintaining control over our sensitive data on-premises.
117 | 
118 | 4. Enhanced Search Experience: The search functionality has been significantly improved in this version, providing personalized results based on the user's role and previous searches. This makes it easier for employees to find relevant content quickly.
119 | 
120 | 5. Security and Compliance: SharePoint Server 2019 offers advanced security features such as Data Loss Prevention (DLP), eDiscovery, and Multi-Factor Authentication (MFA). These features help protect our sensitive data and ensure compliance with industry regulations.
121 | 
122 | 6. Accessibility Improvements: The new version includes several accessibility enhancements, such as improved keyboard navigation and support for screen readers. This makes SharePoint more inclusive for all users.
123 | 
124 | In addition to these features, SharePoint Server 2019 also offers better performance and scalability compared to previous versions. It supports up to 250,000 site collections per content database, which is a significant increase from the 100,000 limit in SharePoint Server 2016.
125 | 
126 | Overall, I believe that upgrading to SharePoint Server 2019 would greatly benefit our organization by providing us with a modern, user-friendly platform that promotes collaboration and productivity. If you have any questions or concerns about this information, please don't hesitate to reach out.
127 | 
128 | Best regards,
129 | 
130 | Sarah Johnson
131 | ENDINPUT
132 | 
133 | BEGININSTRUCTION
134 | - List three new features of Microsoft SharePoint Server 2019 mentioned in the email.
135 | - What is the maximum file size supported for sharing in SharePoint Server 2019?
136 | - How many site collections per content database does SharePoint Server 2019 support?
137 | Please provide references.
138 | ENDINSTRUCTION None
139 | 
140 | Reasoning: 1. Read the instruction and identify the required information: three new features, maximum file size supported, and site collections per content database.
141 | 2. Scan the email for the mentioned features.
142 | 3. Identify the first feature: Modern Sites.
143 | 4. Identify the second feature: Improved File Sharing with OneDrive for Business integration.
144 | 5. Identify the third feature: Enhanced Search Experience.
145 | 6. Locate the information about the maximum file size supported: up to 15 GB.
146 | 7. Locate the information about site collections per content database: up to 250,000.
147 | 8. Compile the answer with the required information and reference(s).
148 | 
149 | Reasoning Chains: [{'step': 1, 'thought': 'Read the instruction and identify the required information: three new features, maximum file size supported, and site collections per content database.'}, {'step': 2, 'thought': 'Scan the email for the mentioned features.'}, {'step': 3, 'thought': 'Identify the first feature: Modern Sites.'}, {'step': 4, 'thought': 'Identify the second feature: Improved File Sharing with OneDrive for Business integration.'}, {'step': 5, 'thought': 'Identify the third feature: Enhanced Search Experience.'}, {'step': 6, 'thought': 'Locate the information about the maximum file size supported: up to 15 GB.'}, {'step': 7, 'thought': 'Locate the information about site collections per content database: up to 250,000.'}, {'step': 8, 'thought': 'Compile the answer with the required information and reference(s).'}]
150 | ----
151 | Example 5:
152 | 
153 | Instruction: Rewrite the below text (which is a fragment of a longer script for the video). Fix grammar and other errors and make it sound more professional.
154 | 
155 | Networks and security, or how to connect the digital world. How can we exchange data quickly and efficiently. There are different types of networks.
156 | A local area network (LAN) is for smaller, more local networks - home, business, school, etc.
157 | A wide area network (WAN) covers larger areas such as cities and even allows computers in different countries to connect.
158 | An intranet is a private enterprise network designed to support an organization's employees in communicating, collaborating, and performing their roles, usually to the exclusion of outside access.
159 | We also have the well-known internet, which is the largest network connecting computers around the world.
160 | 
161 | But of course someone could try to intercept that communication. And we want to securely exchange data. We want to find vulnerabilities and defend against hackers.
162 | This is why we have something like cryptography. It is a field of study that focuses on the encoding and decoding of info using special algorithms. So only the sender and receiver can know the content of the message.
163 | There are many ways to protect your data, programs, devices and networks. Besides cryptography, we also use antivirus programs that can check if anything looks suspicious. A very popular trend is also the use of artificial intelligence to defend against hackers. But hackers can also use AI to attack. So it's a double-edged sword. None
164 | 
165 | Reasoning: 1. Read and understand the original text.
166 | 2. Identify grammar and other errors.
167 | 3. Determine the main points and concepts.
168 | 4. Rewrite the text to fix errors and improve professionalism.
169 | 5. Ensure all important information is included and clear.
170 | 6. Check for factual accuracy and potential confusion.
171 | 7. Finalize the revised text and present it as the output.
172 | 
173 | Reasoning Chains: [{'step': 1, 'thought': 'Read and understand the original text.'}, {'step': 2, 'thought': 'Identify grammar and other errors.'}, {'step': 3, 'thought': 'Determine the main points and concepts.'}, {'step': 4, 'thought': 'Rewrite the text to fix errors and improve professionalism.'}, {'step': 5, 'thought': 'Ensure all important information is included and clear.'}, {'step': 6, 'thought': 'Check for factual accuracy and potential confusion.'}, {'step': 7, 'thought': 'Finalize the revised text and present it as the output.'}]
174 | ----
175 | Example 6:
176 | 
177 | Instruction: How many even perfect square factors does $2^4 \cdot 7^9$ have? None
178 | 
179 | Reasoning: 1. I need to find the number of factors of $2^4 \cdot 7^9$ that are both even and perfect squares.
180 | 2. A factor of $2^4 \cdot 7^9$ must be of the form $2^a \cdot 7^b$, where $0 \leq a \leq 4$ and $0 \leq b \leq 9$.
181 | 3. To be even, a factor must have $a > 0$, since $2^0 = 1$ is odd.
182 | 4. To be a perfect square, a factor must have both $a$ and $b$ even, since an odd power of a prime is not a perfect square.
183 | 5. I need to count how many ways I can choose $a$ and $b$ to be even and positive.
184 | 6. For $a$, I have two choices: $2$ or $4$.
185 | 7. For $b$, I have five choices: $0, 2, 4, 6, 8$.
186 | 8. So the total number of choices is $2 \cdot 5 = 10$.
187 | 
188 | Reasoning Chains: [{'step': 1, 'thought': 'I need to find the number of factors of $2^4 \\cdot 7^9$ that are both even and perfect squares.'}, {'step': 2, 'thought': 'A factor of $2^4 \\cdot 7^9$ must be of the form $2^a \\cdot 7^b$, where $0 \\leq a \\leq 4$ and $0 \\leq b \\leq 9$.'}, {'step': 3, 'thought': 'To be even, a factor must have $a > 0$, since $2^0 = 1$ is odd.'}, {'step': 4, 'thought': 'To be a perfect square, a factor must have both $a$ and $b$ even, since an odd power of a prime is not a perfect square.'}, {'step': 5, 'thought': 'I need to count how many ways I can choose $a$ and $b$ to be even and positive.'}, {'step': 6, 'thought': 'For $a$, I have two choices: $2$ or $4$.'}, {'step': 7, 'thought': 'For $b$, I have five choices: $0, 2, 4, 6, 8$.'}, {'step': 8, 'thought': 'So the total number of choices is $2 \\cdot 5 = 10$.'}]
189 | ----
190 | Example 7:
191 | 
192 | Instruction: If the city council maintains spending at the same level as this year' s, it can be expected to levy a sales tax of 2 percent next year. Thus, if the council levies a higher tax, it will be because the council is increasing its expenditures. Which one of the following exhibits a pattern of reasoning most closely similar to that of the argument above?
193 | A: If the companies in the state do not increase their workers'wages this year, the prices they charge for their goods can be expected to be much the same as they were last year. Thus, if the companies do increase prices, it will be because they have increased wages.
194 | B: If newspaper publishers wish to publish good papers, they should employ good journalists. Thus, if they employ poor journalists, it will not be surprising if their circulation falls as a result.
195 | C: If shops wish to reduce shoplifting, they should employ more store detectives. Thus, if shops do not, they will suffer reduced profits because of their losses from stolen goods.
196 | D: If house-building costs are not now rising, builders cannot be expected to increase the prices of houses. Thus, if they decrease the prices of houses, it will be because that action will enable them to sell a greater number of houses. Choose A, B, C or D as your solution.
197 | 
198 | Reasoning: 1. Identify the pattern of reasoning in the given argument: If X remains constant, Y can be expected to be Z. If Y is not Z, it's because X has changed.
199 | 2. Analyze each option to find a similar pattern of reasoning:
200 |    A: If X (workers' wages) remains constant, Y (prices) can be expected to be Z (the same as last year). If Y is not Z, it's because X has changed (increased wages).
201 |    B: This option discusses a cause and effect relationship (employing good journalists leads to good papers) but does not follow the same pattern of reasoning.
202 |    C: This option also discusses a cause and effect relationship (employing more store detectives reduces shoplifting) but does not follow the same pattern of reasoning.
203 |    D: This option has a different pattern of reasoning: If X (house-building costs) remains constant, Y (house prices) cannot be expected to increase. If Y decreases, it's because of a different reason (selling more houses).
204 | 3. Option A follows the same pattern of reasoning as the given argument.
205 | 4. Choose A as the solution.
206 | 
207 | Reasoning Chains: [{'step': 1, 'thought': "Identify the pattern of reasoning in the given argument: If X remains constant, Y can be expected to be Z. If Y is not Z, it's because X has changed."}, {'step': 2, 'thought': "Analyze each option to find a similar pattern of reasoning:\n   A: If X (workers' wages) remains constant, Y (prices) can be expected to be Z (the same as last year). If Y is not Z, it's because X has changed (increased wages).\n   B: This option discusses a cause and effect relationship (employing good journalists leads to good papers) but does not follow the same pattern of reasoning.\n   C: This option also discusses a cause and effect relationship (employing more store detectives reduces shoplifting) but does not follow the same pattern of reasoning.\n   D: This option has a different pattern of reasoning: If X (house-building costs) remains constant, Y (house prices) cannot be expected to increase. If Y decreases, it's because of a different reason (selling more houses)."}, {'step': 3, 'thought': 'Option A follows the same pattern of reasoning as the given argument.'}, {'step': 4, 'thought': 'Choose A as the solution.'}]
208 | ----
209 | Example 9:
210 | 
211 | Instruction: If z = arctan(e^{1 + (1 + x)^2}), what's the derivative of $\frac{\partial z}{\partial x}$ at x = 0.
212 | Relevant Theorem: The Derivative Chain Rule is a fundamental rule in calculus used to find the derivative of a composite function. A composite function is a function that is formed by combining two or more functions, where the output of one function becomes the input of another function.
213 | 
214 | The Chain Rule states that if you have a composite function, say h(x) = f(g(x)), then the derivative of h(x) with respect to x, denoted as h'(x) or dh/dx, can be found by taking the derivative of the outer function f with respect to the inner function g(x), and then multiplying it by the derivative of the inner function g(x) with respect to x.
215 | 
216 | Mathematically, the Chain Rule can be expressed as:
217 | 
218 | h'(x) = f'(g(x)) * g'(x)
219 | 
220 | or
221 | 
222 | dh/dx = (df/dg) * (dg/dx)
223 | 
224 | The Chain Rule is particularly useful when dealing with complex functions that involve multiple layers of functions, as it allows us to break down the problem into simpler parts and find the derivative step by step. None
225 | 
226 | Reasoning: 1. Identify the given function: z = arctan(e^{1 + (1 + x)^2})
227 | 2. Recognize that this is a composite function, with an outer function (arctan) and an inner function (e^{1 + (1 + x)^2}).
228 | 3. Apply the Chain Rule to find the derivative of z with respect to x: dz/dx = (d(arctan)/d(e^{1 + (1 + x)^2})) * (d(e^{1 + (1 + x)^2})/dx)
229 | 4. Find the derivative of the outer function (arctan) with respect to the inner function (e^{1 + (1 + x)^2}): d(arctan)/d(e^{1 + (1 + x)^2}) = 1/(1 + (e^{1 + (1 + x)^2})^2)
230 | 5. Find the derivative of the inner function (e^{1 + (1 + x)^2}) with respect to x: d(e^{1 + (1 + x)^2})/dx = e^{1 + (1 + x)^2} * 2(1 + x)
231 | 6. Combine the derivatives from steps 4 and 5 using the Chain Rule: dz/dx = (1/(1 + (e^{1 + (1 + x)^2})^2)) * (e^{1 + (1 + x)^2} * 2(1 + x))
232 | 7. Simplify the expression: dz/dx = (2(1 + x) * e^{1 + (1 + x)^2}) / (1 + e^{2(1 + (1 + x)^2)})
233 | 8. Evaluate the derivative at x = 0: dz/dx = (2(1 + 0) * e^{1 + (1 + 0)^2}) / (1 + e^{2(1 + (1 + 0)^2)})
234 | 9. Simplify and calculate the numerical value: dz/dx = (2 * e^2) / (1 + e^4) ≈ 0.3017
235 | 10. The derivative of z with respect to x at x = 0 is approximately 0.3017.
236 | 
237 | Reasoning Chains: [{'step': 1, 'thought': 'Identify the given function: z = arctan(e^{1 + (1 + x)^2})'}, {'step': 2, 'thought': 'Recognize that this is a composite function, with an outer function (arctan) and an inner function (e^{1 + (1 + x)^2}).'}, {'step': 3, 'thought': 'Apply the Chain Rule to find the derivative of z with respect to x: dz/dx = (d(arctan)/d(e^{1 + (1 + x)^2})) * (d(e^{1 + (1 + x)^2})/dx)'}, {'step': 4, 'thought': 'Find the derivative of the outer function (arctan) with respect to the inner function (e^{1 + (1 + x)^2}): d(arctan)/d(e^{1 + (1 + x)^2}) = 1/(1 + (e^{1 + (1 + x)^2})^2)'}, {'step': 5, 'thought': 'Find the derivative of the inner function (e^{1 + (1 + x)^2}) with respect to x: d(e^{1 + (1 + x)^2})/dx = e^{1 + (1 + x)^2} * 2(1 + x)'}, {'step': 6, 'thought': 'Combine the derivatives from steps 4 and 5 using the Chain Rule: dz/dx = (1/(1 + (e^{1 + (1 + x)^2})^2)) * (e^{1 + (1 + x)^2} * 2(1 + x))'}, {'step': 7, 'thought': 'Simplify the expression: dz/dx = (2(1 + x) * e^{1 + (1 + x)^2}) / (1 + e^{2(1 + (1 + x)^2)})'}, {'step': 8, 'thought': 'Evaluate the derivative at x = 0: dz/dx = (2(1 + 0) * e^{1 + (1 + 0)^2}) / (1 + e^{2(1 + (1 + 0)^2)})'}, {'step': 9, 'thought': 'Simplify and calculate the numerical value: dz/dx = (2 * e^2) / (1 + e^4) ≈ 0.3017'}, {'step': 10, 'thought': 'The derivative of z with respect to x at x = 0 is approximately 0.3017.'}]
238 | ----
239 | Example 10:
240 | 
241 | Instruction: What is the largest number $c$ such that $2x^2+5x+c=0$ has at least one real solution? Express your answer as a common fraction. None
242 | 
243 | Reasoning: 1. I need to find the largest value of $c$ for which the quadratic equation $2x^2+5x+c=0$ has at least one real solution.
244 | 2. To do this, I'll consider the discriminant of the quadratic equation, which is $b^2-4ac$.
245 | 3. The equation has at least one real solution if and only if the discriminant is non-negative, so I want to maximize $c$ subject to the constraint that $b^2-4ac\geq 0$.
246 | 4. In this case, $a=2$, $b=5$, and $c$ is the unknown, so I have $5^2-4(2)c\geq 0$.
247 | 5. Simplifying, I get $25-8c\geq 0$.
248 | 6. Adding $8c$ to both sides, I get $25\geq 8c$.
249 | 7. Dividing both sides by $8$, I get $\frac{25}{8}\geq c$.
250 | 8. This means that $c$ can be any number less than or equal to $\frac{25}{8}$, but the largest possible value of $c$ is $\frac{25}{8}$ itself.
251 | 
252 | Reasoning Chains: [{'step': 1, 'thought': 'I need to find the largest value of $c$ for which the quadratic equation $2x^2+5x+c=0$ has at least one real solution.'}, {'step': 2, 'thought': "To do this, I'll consider the discriminant of the quadratic equation, which is $b^2-4ac$."}, {'step': 3, 'thought': 'The equation has at least one real solution if and only if the discriminant is non-negative, so I want to maximize $c$ subject to the constraint that $b^2-4ac\\geq 0$.'}, {'step': 4, 'thought': 'In this case, $a=2$, $b=5$, and $c$ is the unknown, so I have $5^2-4(2)c\\geq 0$.'}, {'step': 5, 'thought': 'Simplifying, I get $25-8c\\geq 0$.'}, {'step': 6, 'thought': 'Adding $8c$ to both sides, I get $25\\geq 8c$.'}, {'step': 7, 'thought': 'Dividing both sides by $8$, I get $\\frac{25}{8}\\geq c$.'}, {'step': 8, 'thought': 'This means that $c$ can be any number less than or equal to $\\frac{25}{8}$, but the largest possible value of $c$ is $\\frac{25}{8}$ itself.'}]    
253 | """
254 | 
255 | reasoning_prompt = ChatPromptTemplate.from_messages(
256 |     [
257 |         (
258 |             "system",
259 |             """You are Solar, a smart search reasoning and answer engine by Upstage, loved by many people. 
260 |             
261 |             See reasoning examples, context provided for instruction. 
262 |             Follow the instrution in user query and provide best answer for the query using reasoning technique and step by step explanation.
263 |             ---
264 |             {reasoning_examples}
265 |             """,
266 |         ),
267 |         MessagesPlaceholder("chat_history"),
268 |     ]
269 | )
270 | 
271 | short_answer_prompt = ChatPromptTemplate.from_messages(
272 |     [
273 |         (
274 |             "system",
275 |             """You are Solar, a smart search engine by Upstage, loved by many people. 
276 |             
277 |             Write one word answer if you can say "yes", "no", or direct answer. 
278 |             Otherwise just one or two sentense short answer for the query from the given conetxt.
279 |             Try to understand the user's intention and provide a quick answer.
280 |             If the answer is not in context, please say you don't know and ask to clarify the question.
281 |             """,
282 |         ),
283 |         MessagesPlaceholder("chat_history"),
284 |         (
285 |             "human",
286 |             """Query: {user_query} 
287 |          ----
288 |          Context: {context}""",
289 |         ),
290 |     ]
291 | )
292 | 
293 | search_prompt = ChatPromptTemplate.from_messages(
294 |     [
295 |         (
296 |             "system",
297 |             """You are Solar, a smart search engine by Upstage, loved by many people. 
298 |             
299 |             See the origial query, context, and quick answer, and then provide detailed explanation.
300 | 
301 |             Try to understand the user's intention and provide the relevant information in detail.
302 |             If the answer is not in context, please say you don't know and ask to clarify the question.
303 |             Do not repeat the short answer.
304 | 
305 |             When you write the explnation, please cite the source like [1], [2] if possible.
306 |             Thyen, put the cited references including citation number, title, and URL at the end of the answer.
307 |             Each reference should be in a new line in the markdown format like this:
308 | 
309 |             [1] Title - URL
310 |             [2] Title - URL
311 |             ...
312 |             """,
313 |         ),
314 |         MessagesPlaceholder("chat_history"),
315 |         (
316 |             "human",
317 |             """Query: {user_query} 
318 |          ----
319 |          Short answer: {short_answer}
320 |          ----
321 |          Context: {context}""",
322 |         ),
323 |     ]
324 | )
325 | 
326 | 
327 | query_context_expansion_prompt = """
328 | For a given query and context(if provided), expand it with related questions and search the web for answers.
329 | Try to understand the purpose of the query and expand  with upto three related questions 
330 | to privde answer to the original query. 
331 | Note that it's for keyword-based search engines, so it should be short and concise.
332 | 
333 | Please write in Python LIST format like this:
334 | ["number of people in France?", How many people in France?", "France population"]
335 | 
336 | ---
337 | Context: {context}
338 | ----
339 | History: {chat_history}
340 | ---
341 | Orignal query: {query}
342 | """
343 | 
344 | 
345 | # Define your desired data structure.
346 | class List(BaseModel):
347 |     list[str]
348 | 
349 | 
350 | def query_context_expansion(query, chat_history, context=None):
351 |     # Set up a parser + inject instructions into the prompt template.
352 |     parser = JsonOutputParser(pydantic_object=List)
353 | 
354 |     prompt = PromptTemplate(
355 |         template=query_context_expansion_prompt,
356 |         input_variables=["query", "context"],
357 |     )
358 | 
359 |     chain = prompt | llm | parser
360 |     # Invoke the chain with the joke_query.
361 | 
362 |     for attempt in range(3):
363 |         try:
364 |             parsed_output = chain.invoke(
365 |                 {"query": query, "chat_history": chat_history, "context": context}
366 |             )
367 |             return parsed_output
368 |         except Exception as e:
369 |             st.warning(f"Attempt {attempt + 1} failed. Retrying...")
370 | 
371 |     st.error("All attempts failed. Returning empty list.")
372 |     return []
373 | 
374 | 
375 | def perform_task(chat_history):
376 |      # Limit chat history to 3000 characters
377 |     limited_history = []
378 |     total_length = 0
379 |     for message in reversed(chat_history):
380 |         message_length = len(message.content)
381 |         if total_length + message_length > MAX_TOKENS:
382 |             st.warning("Chat history is too long. Truncating to fit model input.")
383 |             break
384 |         limited_history.insert(0, message)
385 |         total_length += message_length
386 | 
387 | 
388 |     chain = reasoning_prompt | llm | StrOutputParser()
389 | 
390 |     return chain.stream(
391 |         {
392 |             "chat_history": limited_history,
393 |             "reasoning_examples": reasoning_examples,
394 |         }
395 |     )
396 | 
397 | 
398 | def get_search_desc(user_query, short_answer, context, chat_history):
399 |     chain = search_prompt | llm | StrOutputParser()
400 | 
401 |     return chain.stream(
402 |         {
403 |             "context": context,
404 |             "chat_history": chat_history,
405 |             "user_query": user_query,
406 |             "short_answer": short_answer,
407 |         }
408 |     )
409 | 
410 | 
411 | def search(query, chat_history, context=None):
412 |     with st.status("Extending query..."):
413 |         q_list = query_context_expansion(query, chat_history, context)
414 |         st.write(q_list)
415 | 
416 |     if not q_list:
417 |         st.error("No related questions found. Returning empty list.")
418 |         return []
419 | 
420 |     # combine all queries with "OR" operator
421 |     or_merged_search_query = " OR ".join(q_list)
422 |     results = ""
423 |     for q in q_list:   
424 |         with st.spinner(f"Searching for '{q }'..."):
425 |             results += ddg_search.invoke(q)
426 |     
427 |     return results
428 | 
429 | 
430 | if "messages" not in st.session_state:
431 |     st.session_state.messages = []
432 | 
433 | for message in st.session_state.messages:
434 |     role = "AI" if isinstance(message, AIMessage) else "Human"
435 |     with st.chat_message(role):
436 |         st.markdown(message.content)
437 | 
438 | q = "3.9 vs 3.11. Which one is bigger?"
439 | 
440 | tasks = ["Reasoning (No conclusion)", "Reasoning Chains", "Final Answer"]
441 | 
442 | if prompt := st.chat_input(q):
443 | 
444 |     search_result = search(prompt, st.session_state.messages)
445 | 
446 |     with st.status("Search Results:"):
447 |         st.write(search_result)
448 | 
449 |     if search_result:
450 |         search_result = str(search_result)
451 |         st.session_state.messages.append(
452 |             HumanMessage(
453 |                 content=f"FYI search result conext: {search_result} for the query, {prompt}"
454 |             )
455 |         )
456 |         st.session_state.messages.append(
457 |             AIMessage(
458 |                 content="Thanks for the information! I will keep in mind. Give me the instruction."
459 |             )
460 |         )
461 | 
462 |     for task in tasks:
463 |         instruction = f"""Please provide {task} for the given query,and context and chat history. 
464 |         Please only provide the {task}.
465 |         ---
466 |         User Query: 
467 |         {prompt}"""
468 |         st.session_state.messages.append(HumanMessage(content=instruction))
469 |         with st.chat_message("user"):
470 |             st.write(instruction)
471 |         with st.chat_message("assistant"):
472 |             response = st.write_stream(perform_task(st.session_state.messages))
473 |         st.session_state.messages.append(AIMessage(content=response))
474 | 


--------------------------------------------------------------------------------