├── docs ├── __init__.py ├── components │ ├── phidata_agents.md │ ├── empire_agent.md │ ├── tools.md │ ├── cool_stuff.md │ ├── chatbots.md │ ├── rag.md │ └── vector_stores.md ├── license.md ├── getting-started │ ├── installation.md │ └── quickstart.md ├── index.md ├── contributing.md ├── user-guide │ ├── document-processing.md │ ├── llm-integration.md │ ├── visualization.md │ └── core-concepts.md ├── tutorials │ ├── chat-with-pdf.md │ └── empire-rag.md └── api-reference │ ├── llms.md │ └── vector-stores.md ├── tests ├── __init__.py ├── test_dependency.py ├── test_docling.py ├── test_phidata_agents.py ├── test_embeddings.py ├── test_llms.py ├── test_file_reader.py ├── test_visualizer.py ├── test_streamlit_chatbot.py └── test_vector_stores.py ├── cookbooks ├── __init__.py ├── tools │ ├── stt.py │ ├── crawler.py │ ├── docling_md.py │ └── generalized_read_file.py ├── phidata │ ├── web_agent.py │ └── finance_agent.py ├── playground │ └── compare-llms.py ├── chatbots │ ├── simple_chatbot.py │ └── chat_with_pdf.py ├── vector_stores │ └── qdrant.py ├── cool_stuff │ ├── visualize_data.py │ └── topic-to-podcast.py ├── RAG │ └── empire_rag.py ├── agent │ └── empire_agent.py └── prompt_templates │ ├── education_template.py │ ├── creative_template.py │ ├── financial_template.py │ ├── medical_template.py │ ├── blog_template.py │ ├── reasoning_template.py │ └── coding_template.py ├── empire_chain ├── prompt_templates │ ├── __init__.py │ └── templates.py ├── payments │ └── __init__.py ├── playground │ ├── __init__.py │ └── compare_llms.py ├── llms │ ├── __init__.py │ └── llms.py ├── embeddings │ ├── sentence_transformers_embeddings.py │ ├── openai_embeddings.py │ └── __init__.py ├── streamlit │ ├── __init__.py │ ├── base_chatbot.py │ └── pdf_chatbot.py ├── phidata │ ├── web_agent.py │ └── finance_agent.py ├── tools │ ├── docling.py │ ├── crawl4ai.py │ └── file_reader.py ├── __init__.py ├── stt │ └── stt.py ├── vector_stores │ └── __init__.py ├── agent │ └── agent.py └── cool_stuff │ ├── podcast.py │ └── visualizer.py ├── .DS_Store ├── test.py ├── Makefile ├── .gitignore ├── pyproject.toml ├── setup.py ├── LICENSE ├── .github └── workflows │ └── docs.yml ├── mkdocs.yml ├── CONTRIBUTING.md └── Readme.md /docs/__init__.py: -------------------------------------------------------------------------------- 1 | # empire chain -------------------------------------------------------------------------------- /tests/__init__.py: -------------------------------------------------------------------------------- 1 | # empire chain -------------------------------------------------------------------------------- /cookbooks/__init__.py: -------------------------------------------------------------------------------- 1 | # empire chain 2 | -------------------------------------------------------------------------------- /empire_chain/prompt_templates/__init__.py: -------------------------------------------------------------------------------- 1 | -------------------------------------------------------------------------------- /.DS_Store: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/manas95826/empire-chain/HEAD/.DS_Store -------------------------------------------------------------------------------- /empire_chain/payments/__init__.py: -------------------------------------------------------------------------------- 1 | from empire_chain.payments.stripe import StripePaymentUI 2 | 3 | __all__ = ["StripePaymentUI"] -------------------------------------------------------------------------------- /cookbooks/tools/stt.py: -------------------------------------------------------------------------------- 1 | from empire_chain.stt.stt import GroqSTT 2 | 3 | stt = GroqSTT() 4 | text = stt.transcribe("audio.mp3") 5 | print(text) 6 | -------------------------------------------------------------------------------- /empire_chain/playground/__init__.py: -------------------------------------------------------------------------------- 1 | from .compare_llms import LLMPlayground, launch_playground 2 | 3 | __all__ = ['LLMPlayground', 'launch_playground'] 4 | -------------------------------------------------------------------------------- /empire_chain/llms/__init__.py: -------------------------------------------------------------------------------- 1 | from .llms import OpenAILLM, AnthropicLLM, GroqLLM, GeminiLLM 2 | 3 | __all__ = ['OpenAILLM', 'AnthropicLLM', 'GroqLLM', 'GeminiLLM'] -------------------------------------------------------------------------------- /test.py: -------------------------------------------------------------------------------- 1 | from empire_chain.payments import StripePaymentUI 2 | 3 | payment_ui = StripePaymentUI( 4 | title="Empire Chain Subscription", 5 | amount=50, 6 | verbose=True 7 | ) -------------------------------------------------------------------------------- /Makefile: -------------------------------------------------------------------------------- 1 | # empire chain 2 | .PHONY: clean build publish 3 | 4 | clean: 5 | rm -rf dist empire_chain.egg-info 6 | 7 | build: clean 8 | python3 -m build 9 | 10 | publish: build 11 | twine upload dist/* 12 | 13 | .DEFAULT_GOAL := publish -------------------------------------------------------------------------------- /tests/test_dependency.py: -------------------------------------------------------------------------------- 1 | # tests/test_dependency.py 2 | 3 | def test_import_dependencies(): 4 | try: 5 | import soundfile 6 | import cffi 7 | import pycparser 8 | import empire_chain 9 | except ImportError as e: 10 | assert False, f"Import failed: {e}" 11 | -------------------------------------------------------------------------------- /.gitignore: -------------------------------------------------------------------------------- 1 | # empire chain 2 | .env 3 | env 4 | build 5 | dist 6 | *.egg-info 7 | pycache 8 | __pycache__ 9 | sample.pdf 10 | input.pdf 11 | .pypirc 12 | token.pickle 13 | client_secret.json 14 | audio.mp3 15 | video.mp4 16 | *.txt 17 | *.json 18 | *.yaml 19 | *.yml 20 | *.csv 21 | *.tsv 22 | *.log 23 | *.ini 24 | *.cfg 25 | *.env 26 | venv -------------------------------------------------------------------------------- /empire_chain/embeddings/sentence_transformers_embeddings.py: -------------------------------------------------------------------------------- 1 | from sentence_transformers import SentenceTransformer 2 | import os 3 | from dotenv import load_dotenv 4 | 5 | load_dotenv() 6 | 7 | class HFEmbeddings: 8 | def __init__(self, model: str): 9 | self.model = SentenceTransformer(model) 10 | 11 | def embed(self, text: str) -> list[float]: 12 | return self.model.encode(text) -------------------------------------------------------------------------------- /tests/test_docling.py: -------------------------------------------------------------------------------- 1 | # empire chain 2 | from empire_chain.tools.docling import Docling 3 | import unittest 4 | 5 | class TestDocling(unittest.TestCase): 6 | def test_docling(self): 7 | docling = Docling() 8 | converted_doc = docling.convert("Manas-Resume.pdf") 9 | docling.save_markdown(converted_doc, "Manas-Resume.md") 10 | 11 | if __name__ == "__main__": 12 | unittest.main() -------------------------------------------------------------------------------- /cookbooks/phidata/web_agent.py: -------------------------------------------------------------------------------- 1 | """ 2 | This is a simple example of how to use the WebAgent class to generate web data. 3 | Please run the following command to install the necessary dependencies and store keys in .env: 4 | !pip install empire-chain phidata duckduckgo-search 5 | """ 6 | from empire_chain.phidata.web_agent import WebAgent 7 | 8 | web_agent = WebAgent() 9 | 10 | web_agent.generate("What is the price of Tesla?") -------------------------------------------------------------------------------- /cookbooks/playground/compare-llms.py: -------------------------------------------------------------------------------- 1 | """ 2 | This is a cookbook for the LLMPlayground 3 | 4 | To run this cookbook, you need to have the empire-chain library installed. 5 | 6 | You can install it using the following command: 7 | 8 | ```bash 9 | pip install empire-chain streamlit 10 | ``` 11 | """ 12 | from empire_chain.playground.compare_llms import LLMPlayground 13 | 14 | llm_playground = LLMPlayground() 15 | llm_playground.launch() -------------------------------------------------------------------------------- /cookbooks/phidata/finance_agent.py: -------------------------------------------------------------------------------- 1 | """ 2 | This is a simple example of how to use the PhiFinanceAgent class to generate financial data. 3 | Please run the following command to install the necessary dependencies and store keys in .env: 4 | !pip install empire-chain phidata yfinance 5 | """ 6 | from empire_chain.phidata.finance_agent import PhiFinanceAgent 7 | 8 | finance_agent = PhiFinanceAgent() 9 | 10 | finance_agent.generate("What is the price of Tesla?") -------------------------------------------------------------------------------- /cookbooks/tools/crawler.py: -------------------------------------------------------------------------------- 1 | """ 2 | This is a simple crawler that uses the Empire Chain library to crawl a website and save the content as markdown. 3 | Please run the following command to install the necessary dependencies and store keys in .env: 4 | !pip install empire-chain crawl4ai 5 | !playwright install 6 | """ 7 | from empire_chain.tools.crawl4ai import Crawler 8 | 9 | crawler = Crawler() 10 | result = crawler.crawl(url="https://www.geekroom.in", format="markdown") 11 | print(result) 12 | -------------------------------------------------------------------------------- /empire_chain/embeddings/openai_embeddings.py: -------------------------------------------------------------------------------- 1 | import openai 2 | from dotenv import load_dotenv 3 | import os 4 | 5 | load_dotenv() 6 | 7 | class OpenAIEmbeddings: 8 | def __init__(self, model: str): 9 | self.model = model 10 | self.client = openai.OpenAI(api_key=os.getenv("OPENAI_API_KEY")) 11 | 12 | def embed(self, text: str) -> list[float]: 13 | response = self.client.embeddings.create(input=text, model=self.model) 14 | return response.data[0].embedding -------------------------------------------------------------------------------- /cookbooks/chatbots/simple_chatbot.py: -------------------------------------------------------------------------------- 1 | """ 2 | This is a simple chatbot that uses the Empire Chain library to create a chatbot. 3 | Please run the following command to install the necessary dependencies and store keys in .env: 4 | !pip install empire-chain streamlit 5 | !streamlit run app.py 6 | """ 7 | from empire_chain.streamlit import Chatbot 8 | from empire_chain.llms.llms import GroqLLM 9 | 10 | chatbot = Chatbot(title="Empire Chatbot", llm=GroqLLM("llama3-8b-8192"), chat_history=False, verbose=False) 11 | chatbot.chat() 12 | -------------------------------------------------------------------------------- /cookbooks/tools/docling_md.py: -------------------------------------------------------------------------------- 1 | # """ 2 | # This is a simple tool that uses the Empire Chain library to convert a PDF file to markdown. 3 | # Please run the following command to install the necessary dependencies and store keys in .env: 4 | # !pip install empire-chain docling 5 | # """ 6 | # from empire_chain.tools.docling import Docling 7 | 8 | # docling = Docling() 9 | 10 | # converted_doc = docling.convert("https://arxiv.org/pdf/2408.09869") 11 | # docling.save_markdown(converted_doc, "arxiv_2408.09869.md") 12 | 13 | # Todo: This is in development! -------------------------------------------------------------------------------- /cookbooks/tools/generalized_read_file.py: -------------------------------------------------------------------------------- 1 | """ 2 | This is a simple file reader that uses the Empire Chain library to read a file. 3 | It supports 4 | 1. PDF files (.pdf) 5 | 2. Microsoft Word documents (.docx) 6 | 3. Text files (.txt) 7 | 4. JSON files (.json) 8 | 5. CSV files (.csv) 9 | 6. Google Drive files (.gdrive) 10 | """ 11 | from empire_chain.tools.file_reader import DocumentReader 12 | reader = DocumentReader() 13 | 14 | text = reader.read("https://drive.google.com/file/d/1t0Itw6oGO2iVusp=sharing") 15 | print(text) 16 | 17 | text = reader.read("input.pdf") 18 | print(text) -------------------------------------------------------------------------------- /empire_chain/embeddings/__init__.py: -------------------------------------------------------------------------------- 1 | from empire_chain.embeddings.openai_embeddings import OpenAIEmbeddings 2 | 3 | def HFEmbeddings(*args, **kwargs): 4 | try: 5 | from empire_chain.embeddings.sentence_transformers_embeddings import HFEmbeddings as _HFEmbeddings 6 | return _HFEmbeddings(*args, **kwargs) 7 | except ImportError: 8 | raise ImportError( 9 | "Could not import sentence-transformers. Please install it with: " 10 | "pip install sentence-transformers" 11 | ) 12 | 13 | __all__ = ["OpenAIEmbeddings", "HFEmbeddings"] -------------------------------------------------------------------------------- /empire_chain/streamlit/__init__.py: -------------------------------------------------------------------------------- 1 | def Chatbot(*args, **kwargs): 2 | from empire_chain.streamlit.base_chatbot import Chatbot as _Chatbot 3 | return _Chatbot(*args, **kwargs) 4 | 5 | def VisionChatbot(*args, **kwargs): 6 | from empire_chain.streamlit.vision_chatbot import VisionChatbot as _VisionChatbot 7 | return _VisionChatbot(*args, **kwargs) 8 | 9 | def PDFChatbot(*args, **kwargs): 10 | from empire_chain.streamlit.pdf_chatbot import PDFChatbot as _PDFChatbot 11 | return _PDFChatbot(*args, **kwargs) 12 | 13 | __all__ = ["Chatbot", "VisionChatbot", "PDFChatbot"] -------------------------------------------------------------------------------- /empire_chain/phidata/web_agent.py: -------------------------------------------------------------------------------- 1 | from phi.agent import Agent 2 | from phi.model.openai import OpenAIChat 3 | from phi.tools.duckduckgo import DuckDuckGo 4 | 5 | class WebAgent(Agent): 6 | def __init__(self): 7 | super().__init__(name="Web Agent") 8 | self.tools = [DuckDuckGo()] 9 | self.instructions = ["Always include sources"] 10 | self.model = OpenAIChat(id="gpt-4o-mini") 11 | self.show_tool_calls = True 12 | self.markdown = True 13 | 14 | def generate(self, prompt: str) -> str: 15 | return self.print_response(prompt, stream=True) -------------------------------------------------------------------------------- /tests/test_phidata_agents.py: -------------------------------------------------------------------------------- 1 | # empire chain 2 | from empire_chain.phidata.phidata_agents import PhiWebAgent, PhiFinanceAgent 3 | import unittest 4 | from dotenv import load_dotenv 5 | 6 | load_dotenv() 7 | 8 | class TestPhiAgents(unittest.TestCase): 9 | def test_phi_web_agent(self): 10 | agent = PhiWebAgent() 11 | agent.generate("What is the recent news about Tesla with sources?") 12 | 13 | def test_phi_finance_agent(self): 14 | agent = PhiFinanceAgent() 15 | agent.generate("What is the price of Tesla?") 16 | 17 | if __name__ == "__main__": 18 | unittest.main() -------------------------------------------------------------------------------- /pyproject.toml: -------------------------------------------------------------------------------- 1 | # empire chain 2 | [build-system] 3 | requires = ["setuptools>=42", "wheel"] 4 | build-backend = "setuptools.build_meta" 5 | 6 | [project] 7 | name = "empire-chain" 8 | version = "0.5.4" 9 | description = "An orchestration framework for all your AI needs" 10 | readme = "README.md" 11 | requires-python = ">=3.10" 12 | dependencies = [ 13 | "openai", 14 | "anthropic", 15 | "groq", 16 | "python-dotenv", 17 | "qdrant-client", 18 | "requests", 19 | "PyPDF2", 20 | "python-docx", 21 | "soundfile" 22 | ] 23 | 24 | [tool.setuptools.packages.find] 25 | where = ["."] 26 | include = ["empire_chain*"] 27 | -------------------------------------------------------------------------------- /empire_chain/tools/docling.py: -------------------------------------------------------------------------------- 1 | # Empire Chain Document Processing Tool 2 | # Updated: March 2025 - Adding comments for version tracking 3 | 4 | # from docling.document_converter import DocumentConverter 5 | 6 | # class Docling: 7 | # def __init__(self): 8 | # self.converter = DocumentConverter() 9 | 10 | # def convert(self, source): 11 | # result = self.converter.convert(source) 12 | # return result.document.export_to_markdown() 13 | 14 | # def save_markdown(self, markdown, filename): 15 | # with open(filename, 'w') as file: 16 | # file.write(markdown) 17 | 18 | # Todo: This is in development! -------------------------------------------------------------------------------- /cookbooks/vector_stores/qdrant.py: -------------------------------------------------------------------------------- 1 | """ 2 | This example demonstrates how to use the QdrantVectorStore to store and query embeddings. 3 | Requires an OpenAI API key and requirements as: 4 | pip install empire-chain 5 | """ 6 | from empire_chain.vector_stores import QdrantVectorStore 7 | from empire_chain.embeddings import OpenAIEmbeddings 8 | from dotenv import load_dotenv 9 | 10 | load_dotenv() 11 | 12 | store = QdrantVectorStore() 13 | 14 | text = "your_text_here" 15 | embedding = OpenAIEmbeddings("text-embedding-3-small") 16 | store.add(text=text, embedding=embedding) 17 | 18 | similar_texts = store.query("your_query_here") # Returns top 10 similar texts by default -------------------------------------------------------------------------------- /empire_chain/phidata/finance_agent.py: -------------------------------------------------------------------------------- 1 | from phi.agent import Agent 2 | from phi.model.openai import OpenAIChat 3 | from phi.tools.yfinance import YFinanceTools 4 | 5 | class FinanceAgent(Agent): 6 | def __init__(self): 7 | super().__init__(name="Finance Agent") 8 | self.tools = [YFinanceTools(stock_price=True, analyst_recommendations=True, company_info=True, company_news=True)] 9 | self.instructions = ["Always include sources"] 10 | self.model = OpenAIChat(id="gpt-4o-mini") 11 | self.show_tool_calls = True 12 | self.markdown = True 13 | 14 | def generate(self, prompt: str) -> str: 15 | return self.print_response(prompt, stream=True) -------------------------------------------------------------------------------- /cookbooks/chatbots/chat_with_pdf.py: -------------------------------------------------------------------------------- 1 | """ 2 | This is a simple chatbot that uses the Empire Chain library to create a pdf chatbot. 3 | Please run the following command to install the necessary dependencies and store keys in .env: 4 | !pip install empire-chain streamlit sentence-transformers 5 | !streamlit run app.py 6 | """ 7 | from empire_chain.embeddings import HFEmbeddings 8 | from empire_chain.vector_stores import QdrantVectorStore 9 | from empire_chain.streamlit import PDFChatbot 10 | from empire_chain.llms.llms import GroqLLM 11 | 12 | pdf_chatbot = PDFChatbot(title="Empire PDF Chatbot",llm=GroqLLM("llama3-8b-8192"),vector_store=QdrantVectorStore(vector_size=384),embeddings=HFEmbeddings("all-MiniLM-L6-v2")) 13 | pdf_chatbot.chat() 14 | -------------------------------------------------------------------------------- /tests/test_embeddings.py: -------------------------------------------------------------------------------- 1 | # empire chain 2 | from empire_chain.embeddings import OpenAIEmbeddings, SentenceTransformerEmbeddings 3 | import unittest 4 | from dotenv import load_dotenv 5 | 6 | class TestEmbeddings(unittest.TestCase): 7 | def setUp(self): 8 | load_dotenv() 9 | 10 | def test_openai_embeddings(self): 11 | embeddings = OpenAIEmbeddings("text-embedding-3-small") 12 | embedding = embeddings.embed("What is the capital of France?") 13 | print(embedding) 14 | 15 | def test_sentence_transformer_embeddings(self): 16 | embeddings = SentenceTransformerEmbeddings("all-MiniLM-L6-v2") 17 | embedding = embeddings.embed("What is the capital of France?") 18 | print(embedding) 19 | 20 | if __name__ == "__main__": 21 | unittest.main() -------------------------------------------------------------------------------- /cookbooks/cool_stuff/visualize_data.py: -------------------------------------------------------------------------------- 1 | """ 2 | This is a simple example of how to use the DataAnalyzer and ChartFactory classes to visualize data. 3 | Please run the following command to install the necessary dependencies and store keys in .env: 4 | !pip install empire-chain matplotlib 5 | 6 | _chart_types = { 7 | 'Line Chart': LineChart, 8 | 'Pie Chart': PieChart, 9 | 'Bar Graph': BarGraph, 10 | 'Scatter Plot': ScatterChart, 11 | 'Histogram': Histogram, 12 | 'Box Plot': BoxPlot 13 | } 14 | Please adhere to the naming convention for the chart type. 15 | """ 16 | from empire_chain.cool_stuff.visualizer import DataAnalyzer, ChartFactory 17 | 18 | data = """ 19 | Empire chain got a fund raise of $100M from a new investor in 2024 and $50M from a new investor in 2023. 20 | """ 21 | 22 | analyzer = DataAnalyzer() 23 | analyzed_data = analyzer.analyze(data) 24 | 25 | chart = ChartFactory.create_chart('Bar Graph', analyzed_data) 26 | chart.show() 27 | -------------------------------------------------------------------------------- /setup.py: -------------------------------------------------------------------------------- 1 | # Empire Chain Package Setup 2 | # Updated: March 2025 - Adding comments for version tracking 3 | 4 | from setuptools import setup, find_packages 5 | 6 | setup( 7 | name="empire-chain", 8 | version="0.5.4", 9 | description="An orchestration framework for all your AI needs", 10 | long_description=open("README.md").read(), 11 | long_description_content_type="text/markdown", 12 | author="Manas Chopra", 13 | author_email="manaschopra95826@gmail.com", 14 | url="https://github.com/manas95826/empire-chain", 15 | packages=find_packages(), 16 | install_requires=[ 17 | "openai", 18 | "anthropic", 19 | "groq", 20 | "python-dotenv", 21 | "requests", 22 | "PyPDF2", 23 | "python-docx", 24 | "qdrant-client", 25 | "soundfile" 26 | ], 27 | classifiers=[ 28 | "Programming Language :: Python :: 3", 29 | "License :: OSI Approved :: MIT License", 30 | "Operating System :: OS Independent", 31 | ], 32 | python_requires=">=3.10", 33 | ) 34 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | # empire chain 2 | MIT License 3 | 4 | Copyright (c) 2024 International Business Machines 5 | 6 | Permission is hereby granted, free of charge, to any person obtaining a copy 7 | of this software and associated documentation files (the "Software"), to deal 8 | in the Software without restriction, including without limitation the rights 9 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 10 | copies of the Software, and to permit persons to whom the Software is 11 | furnished to do so, subject to the following conditions: 12 | 13 | The above copyright notice and this permission notice shall be included in all 14 | copies or substantial portions of the Software. 15 | 16 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 17 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 18 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 19 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 21 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 22 | SOFTWARE. -------------------------------------------------------------------------------- /empire_chain/__init__.py: -------------------------------------------------------------------------------- 1 | # Empire Chain - Your AI Orchestration Framework 2 | # Updated: March 2025 - Adding comments for version tracking 3 | 4 | import sys 5 | 6 | def display_welcome(): 7 | welcome_message = """ 8 | ███████╗███╗ ███╗██████╗ ██╗██████╗ ███████╗ 9 | ██╔════╝████╗ ████║██╔══██╗██║██╔══██╗██╔════╝ 10 | █████╗ ██╔████╔██║██████╔╝██║██████╔╝█████╗ 11 | ██╔══╝ ██║╚██╔╝██║██╔═══╝ ██║██╔══██╗██╔══╝ 12 | ███████╗██║ ╚═╝ ██║██║ ██║██║ ██║███████╗ 13 | ╚══════╝╚═╝ ╚═╝╚═╝ ╚═╝╚═╝ ╚═╝╚══════╝ 14 | ██████╗██╗ ██╗ █████╗ ██╗███╗ ██╗ 15 | ██╔════╝██║ ██║██╔══██╗██║████╗ ██║ 16 | ██║ ███████║███████║██║██╔██╗ ██║ 17 | ██║ ██╔══██║██╔══██║██║██║╚██╗██║ 18 | ╚██████╗██║ ██║██║ ██║██║██║ ╚████║ 19 | ╚═════╝╚═╝ ╚═╝╚═╝ ╚═╝╚═╝╚═╝ ╚═══╝ 20 | ============================================= 21 | 🔗 Welcome to Empire Chain! Ready to 22 | chain your AI dreams together? 🔗 23 | ============================================= 24 | """ 25 | print(welcome_message, file=sys.stderr) 26 | 27 | if not any('pytest' in arg or 'sphinx' in arg for arg in sys.argv): 28 | display_welcome() -------------------------------------------------------------------------------- /cookbooks/cool_stuff/topic-to-podcast.py: -------------------------------------------------------------------------------- 1 | """ 2 | This is a simple example of how to use the GeneratePodcast class to generate a podcast. 3 | Please run the following command to install the necessary dependencies and store keys in .env: 4 | !pip install empire-chain kokoro_onnx (It might take a while to download the model files) 5 | """ 6 | from empire_chain.cool_stuff.podcast import GeneratePodcast 7 | 8 | podcast=GeneratePodcast() 9 | podcast.generate(topic=""" 10 | Manas Chopra is a dynamic tech community leader, AI enthusiast, and the co-founder of Geek Room, a thriving community with over 50,000 members that has hosted multiple hackathons, including high-profile collaborations with Microsoft and MasterCard. Passionate about AI and product development, Manas has worked on diverse projects, from LLM-powered chatbots and vector database integrations to document data extraction and fantasy sports AI. With a keen interest in digital forensics and cybersecurity, he actively explores emerging technologies while mentoring students and professionals. Currently, he’s part of the AI team at myracle.io, driving innovation in AI-driven solutions. His expertise in prompt engineering, LangChain, and retrieval-augmented generation (RAG) makes him a valuable voice in the tech space. 11 | """) 12 | -------------------------------------------------------------------------------- /empire_chain/stt/stt.py: -------------------------------------------------------------------------------- 1 | from dotenv import load_dotenv 2 | import os 3 | from groq import Groq 4 | import requests 5 | load_dotenv() 6 | 7 | client = Groq() 8 | 9 | class GroqSTT: 10 | def __init__(self, model: str = "whisper-large-v3"): 11 | self.client = Groq() 12 | self.model = model 13 | 14 | def transcribe(self, filename: str) -> str: 15 | with open(filename, "rb") as file: 16 | transcription = self.client.audio.transcriptions.create( 17 | file=(filename, file.read()), 18 | model=self.model, 19 | response_format="verbose_json", 20 | language="en", 21 | ) 22 | return transcription.text 23 | 24 | 25 | class HuggingFaceSTT: 26 | def __init__(self, model_name: str = "openai/whisper-large-v3-turbo"): 27 | self.model_name = model_name 28 | self.API_URL = f"https://api-inference.huggingface.co/models/{model_name}" 29 | self.headers = {"Authorization": f"Bearer {os.getenv('HF_API_KEY')}"} 30 | 31 | def transcribe(self, filename: str) -> str: 32 | with open(filename, "rb") as f: 33 | data = f.read() 34 | response = requests.post(self.API_URL, headers=self.headers, data=data) 35 | return response.json()["text"] -------------------------------------------------------------------------------- /empire_chain/tools/crawl4ai.py: -------------------------------------------------------------------------------- 1 | # Empire Chain Web Crawler Module 2 | # Updated: March 2025 - Adding comments for version tracking 3 | 4 | import asyncio 5 | from crawl4ai import AsyncWebCrawler 6 | 7 | class Crawler: 8 | def __init__(self): 9 | self.crawler = AsyncWebCrawler() 10 | 11 | def crawl(self, url: str, format: str = "markdown"): 12 | async def _crawl(): 13 | async with self.crawler as crawler: 14 | result = await crawler.arun(url=url) 15 | 16 | if format == "markdown": 17 | return result.markdown 18 | elif format == "html": 19 | return result.html 20 | elif format == "cleaned_html": 21 | return result.cleaned_html 22 | elif format == "fit_markdown": 23 | return result.fit_markdown 24 | elif format == "success": 25 | return result.success 26 | elif format == "status_code": 27 | return result.status_code 28 | elif format == "media": 29 | return result.media 30 | elif format == "links": 31 | return result.links 32 | else: 33 | raise ValueError(f"Invalid format: {format}") 34 | 35 | return asyncio.run(_crawl()) -------------------------------------------------------------------------------- /cookbooks/RAG/empire_rag.py: -------------------------------------------------------------------------------- 1 | from empire_chain.vector_stores import QdrantVectorStore 2 | from empire_chain.embeddings import OpenAIEmbeddings 3 | from empire_chain.llms.llms import GroqLLM 4 | from empire_chain.tools.file_reader import DocumentReader 5 | import os 6 | from dotenv import load_dotenv 7 | from empire_chain.stt.stt import GroqSTT 8 | 9 | def main(if_audio_input: bool = False): 10 | load_dotenv() 11 | 12 | vector_store = QdrantVectorStore(":memory:") 13 | embeddings = OpenAIEmbeddings("text-embedding-3-small") 14 | llm = GroqLLM("llama3-8b-8192") 15 | reader = DocumentReader() 16 | 17 | file_path = "input.pdf" 18 | text = reader.read(file_path) 19 | 20 | text_embedding = embeddings.embed(text) 21 | vector_store.add(text, text_embedding) 22 | 23 | text_query = "What is the main topic of this document?" 24 | if if_audio_input: 25 | stt = GroqSTT() 26 | audio_query = stt.transcribe("audio.mp3") 27 | query_embedding = embeddings.embed(audio_query) 28 | else: 29 | query_embedding = embeddings.embed(text_query) 30 | relevant_texts = vector_store.query(query_embedding, k=3) 31 | 32 | context = "\n".join(relevant_texts) 33 | prompt = f"Based on the following context, {text_query}\n\nContext: {context}" 34 | response = llm.generate(prompt) 35 | print(f"Query: {text_query}") 36 | print(f"Response: {response}") 37 | 38 | if __name__ == "__main__": 39 | main(if_audio_input=False) -------------------------------------------------------------------------------- /docs/components/phidata_agents.md: -------------------------------------------------------------------------------- 1 | # PhiData Agents 2 | 3 | Empire Chain integrates with PhiData to provide specialized agents for various tasks. 4 | 5 | ## Web Agent 6 | 7 | The Web Agent helps gather and analyze information from the internet: 8 | 9 | ```python 10 | from empire_chain.phidata.web_agent import WebAgent 11 | 12 | # Create web agent 13 | web_agent = WebAgent() 14 | 15 | # Generate response about a topic 16 | response = web_agent.generate("What is the price of Tesla?") 17 | print(response) 18 | ``` 19 | 20 | ## Finance Agent 21 | 22 | The Finance Agent specializes in financial analysis and stock market data: 23 | 24 | ```python 25 | from empire_chain.phidata.finance_agent import PhiFinanceAgent 26 | 27 | # Create finance agent 28 | finance_agent = PhiFinanceAgent() 29 | 30 | # Analyze stock performance 31 | analysis = finance_agent.generate("Analyze TSLA stock performance") 32 | print(analysis) 33 | ``` 34 | 35 | ## Features 36 | 37 | - **Web Agent**: 38 | - Real-time web search 39 | - Information synthesis 40 | - Data extraction 41 | - News analysis 42 | 43 | - **Finance Agent**: 44 | - Stock analysis 45 | - Market trends 46 | - Financial metrics 47 | - Price predictions 48 | 49 | ## Installation 50 | 51 | ```bash 52 | pip install empire-chain phidata 53 | ``` 54 | 55 | Additional dependencies: 56 | - Web Agent: `pip install duckduckgo-search` 57 | - Finance Agent: `pip install yfinance` 58 | 59 | For more examples and advanced usage, check out the PhiData agent cookbooks in the repository. -------------------------------------------------------------------------------- /.github/workflows/docs.yml: -------------------------------------------------------------------------------- 1 | # empire chain 2 | name: Deploy Documentation 3 | on: 4 | push: 5 | branches: 6 | - main 7 | pull_request: 8 | branches: 9 | - main 10 | 11 | # Sets permissions of the GITHUB_TOKEN to allow deployment to GitHub Pages 12 | permissions: 13 | contents: read 14 | pages: write 15 | id-token: write 16 | 17 | # Allow only one concurrent deployment, skipping runs queued between the run in-progress and latest queued. 18 | concurrency: 19 | group: "pages" 20 | cancel-in-progress: false 21 | 22 | jobs: 23 | build: 24 | runs-on: ubuntu-latest 25 | steps: 26 | - uses: actions/checkout@v4 27 | 28 | - name: Setup Python 29 | uses: actions/setup-python@v4 30 | with: 31 | python-version: '3.x' 32 | 33 | - name: Install dependencies 34 | run: | 35 | python -m pip install --upgrade pip 36 | pip install mkdocs-material 37 | pip install mkdocs-mermaid2-plugin 38 | 39 | - name: Setup Pages 40 | uses: actions/configure-pages@v4 41 | 42 | - name: Build 43 | run: mkdocs build 44 | 45 | - name: Upload artifact 46 | uses: actions/upload-pages-artifact@v3 47 | with: 48 | path: 'site' 49 | 50 | deploy: 51 | environment: 52 | name: github-pages 53 | url: ${{ steps.deployment.outputs.page_url }} 54 | needs: build 55 | runs-on: ubuntu-latest 56 | if: github.ref == 'refs/heads/main' 57 | steps: 58 | - name: Deploy to GitHub Pages 59 | id: deployment 60 | uses: actions/deploy-pages@v4 -------------------------------------------------------------------------------- /empire_chain/vector_stores/__init__.py: -------------------------------------------------------------------------------- 1 | from abc import ABC, abstractmethod 2 | from typing import List 3 | 4 | class VectorStore(ABC): 5 | """Abstract base class for vector store implementations. 6 | 7 | This class defines the interface that all vector store implementations must follow. 8 | """ 9 | 10 | @abstractmethod 11 | def add(self, text: str, embedding: List[float]) -> None: 12 | """Add a text and its embedding to the vector store. 13 | 14 | Args: 15 | text: The text to store 16 | embedding: The vector embedding of the text 17 | """ 18 | pass 19 | 20 | @abstractmethod 21 | def query(self, query_embedding: List[float], k: int = 10) -> List[str]: 22 | """Query the vector store for similar texts. 23 | 24 | Args: 25 | query_embedding: The vector embedding to search for 26 | k: Number of results to return 27 | 28 | Returns: 29 | List of similar texts 30 | """ 31 | pass 32 | 33 | def QdrantVectorStore(*args, **kwargs): 34 | """Factory function for creating a QdrantVectorStore instance.""" 35 | from empire_chain.vector_stores.qdrant import QdrantVectorStore as _QdrantVectorStore 36 | return _QdrantVectorStore(*args, **kwargs) 37 | 38 | def QdrantWrapper(*args, **kwargs): 39 | """Factory function for creating a QdrantWrapper instance.""" 40 | from empire_chain.vector_stores.qdrant import QdrantWrapper as _QdrantWrapper 41 | return _QdrantWrapper(*args, **kwargs) 42 | 43 | __all__ = ['VectorStore', 'QdrantVectorStore', 'QdrantWrapper'] -------------------------------------------------------------------------------- /tests/test_llms.py: -------------------------------------------------------------------------------- 1 | # empire chain 2 | from empire_chain.llms.llms import OpenAILLM, AnthropicLLM, GroqLLM, GeminiLLM 3 | import unittest 4 | import os 5 | from unittest.mock import patch 6 | from dotenv import load_dotenv 7 | 8 | class TestLLMs(unittest.TestCase): 9 | def setUp(self): 10 | load_dotenv() 11 | self.env_patcher = patch.dict('os.environ', { 12 | 'OPENAI_API_KEY': os.getenv('OPENAI_API_KEY', 'test-openai-key'), 13 | 'ANTHROPIC_API_KEY': os.getenv('ANTHROPIC_API_KEY', 'test-anthropic-key'), 14 | 'GROQ_API_KEY': os.getenv('GROQ_API_KEY', 'test-groq-key'), 15 | "GEMINI_API_KEY": os.getenv('GEMINI_API_KEY', 'test-gemini-key') 16 | }) 17 | self.env_patcher.start() 18 | 19 | def tearDown(self): 20 | self.env_patcher.stop() 21 | 22 | def test_openai_llm(self): 23 | llm = OpenAILLM("gpt-4o-mini") 24 | response = llm.generate("What is the capital of France?") 25 | print(response) 26 | 27 | def test_anthropic_llm(self): 28 | llm = AnthropicLLM("claude-3-5-sonnet-20240620") 29 | response = llm.generate("What is the capital of France?") 30 | print(response) 31 | 32 | def test_groq_llm(self): 33 | llm = GroqLLM("llama3-8b-8192") 34 | response = llm.generate("What is the capital of France?") 35 | print(response) 36 | 37 | def test_gemini_llm(self): 38 | llm = GeminiLLM("gemini-1.5-pro") 39 | response = llm.generate("What is the capital of France?") 40 | print(response) 41 | 42 | 43 | 44 | if __name__ == "__main__": 45 | unittest.main() -------------------------------------------------------------------------------- /docs/components/empire_agent.md: -------------------------------------------------------------------------------- 1 | # Empire Agent 2 | 3 | The Empire Agent is a powerful tool for building AI-powered agents that can perform various tasks using registered functions. 4 | 5 | ## Basic Usage 6 | 7 | ```python 8 | from empire_chain.agent.agent import Agent 9 | from datetime import datetime 10 | 11 | # Create agent 12 | agent = Agent() 13 | 14 | # Example function to register 15 | def get_weather(location: str) -> str: 16 | return f"The weather in {location} is sunny!" 17 | 18 | # Register function 19 | agent.register_function(get_weather) 20 | 21 | # Process query 22 | result = agent.process_query("What's the weather like in Tokyo?") 23 | print(result['result']) 24 | ``` 25 | 26 | ## Available Functions 27 | 28 | The Empire Agent supports registering any Python function that follows these guidelines: 29 | - Has type hints for parameters 30 | - Returns a string 31 | - Has clear, descriptive parameter names 32 | 33 | ## Example Functions 34 | 35 | Here are some example functions you can register with the agent: 36 | 37 | ```python 38 | def calculate_distance(from_city: str, to_city: str) -> str: 39 | return f"The distance from {from_city} to {to_city} is 500km" 40 | 41 | def get_time(timezone: str) -> str: 42 | return f"Current time in {timezone}: {datetime.now()}" 43 | 44 | def translate_text(text: str, target_language: str) -> str: 45 | return f"Translated '{text}' to {target_language}: [translation would go here]" 46 | 47 | def search_web(query: str, num_results: int) -> str: 48 | return f"Top {num_results} results for '{query}': [search results would go here]" 49 | ``` 50 | 51 | For more examples and advanced usage, check out the cookbooks in the repository. -------------------------------------------------------------------------------- /docs/components/tools.md: -------------------------------------------------------------------------------- 1 | # Tools 2 | 3 | Empire Chain provides a collection of utility tools for various tasks. 4 | 5 | ## File Reader 6 | 7 | A versatile document reader supporting multiple file formats: 8 | 9 | ```python 10 | from empire_chain.tools.file_reader import DocumentReader 11 | 12 | reader = DocumentReader() 13 | 14 | # Read from Google Drive 15 | text = reader.read("https://drive.google.com/file/d/1t0Itw6oGO2iVusp=sharing") 16 | 17 | # Read local PDF 18 | text = reader.read("input.pdf") 19 | ``` 20 | 21 | Supported file formats: 22 | - PDF files (.pdf) 23 | - Microsoft Word documents (.docx) 24 | - Text files (.txt) 25 | - JSON files (.json) 26 | - CSV files (.csv) 27 | - Google Drive files (.gdrive) 28 | 29 | ## Website Crawler 30 | 31 | Crawl websites and extract content in various formats: 32 | 33 | ```python 34 | from empire_chain.tools.crawl4ai import Crawler 35 | 36 | crawler = Crawler() 37 | content = crawler.crawl( 38 | url="https://www.example.com", 39 | format="markdown" 40 | ) 41 | ``` 42 | 43 | ## Speech to Text 44 | 45 | Convert audio to text using different providers: 46 | 47 | ```python 48 | from empire_chain.stt.stt import GroqSTT, HuggingFaceSTT 49 | 50 | # Using Groq 51 | groq_stt = GroqSTT() 52 | text = groq_stt.transcribe("audio.mp3") 53 | 54 | # Using HuggingFace 55 | hf_stt = HuggingFaceSTT() 56 | text = hf_stt.transcribe("audio.mp3") 57 | ``` 58 | 59 | ## Installation 60 | 61 | ```bash 62 | # Base installation 63 | pip install empire-chain 64 | 65 | # Crawler dependencies 66 | pip install crawl4ai 67 | 68 | # Speech-to-text dependencies 69 | pip install kokoro_onnx # Note: Model download may take time 70 | ``` 71 | 72 | For more examples and advanced usage, check out the tools cookbooks in the repository. -------------------------------------------------------------------------------- /docs/components/cool_stuff.md: -------------------------------------------------------------------------------- 1 | # Cool Stuff 2 | 3 | Empire Chain includes some exciting features that push the boundaries of what's possible with AI. 4 | 5 | ## Data Visualization 6 | 7 | Analyze and visualize data using natural language: 8 | 9 | ```python 10 | from empire_chain.cool_stuff.visualizer import DataAnalyzer, ChartFactory 11 | 12 | # Example data 13 | data = """ 14 | Empire chain got a fund raise of $100M from a new investor in 2024 15 | and $50M from a new investor in 2023. 16 | """ 17 | 18 | # Analyze data 19 | analyzer = DataAnalyzer() 20 | analyzed_data = analyzer.analyze(data) 21 | 22 | # Create and display chart 23 | chart = ChartFactory.create_chart('Bar Chart', analyzed_data) 24 | chart.show() 25 | ``` 26 | 27 | Supported chart types: 28 | - Line Chart 29 | - Pie Chart 30 | - Bar Graph 31 | - Scatter Plot 32 | - Histogram 33 | - Box Plot 34 | 35 | ## Text to Podcast 36 | 37 | Convert text into engaging podcast-style audio: 38 | 39 | ```python 40 | from empire_chain.cool_stuff.podcast import GeneratePodcast 41 | 42 | # Create podcast generator 43 | podcast = GeneratePodcast() 44 | 45 | # Generate podcast from topic 46 | podcast.generate(topic="About boom of meal plan and recipe generation apps") 47 | ``` 48 | 49 | ## Features 50 | 51 | - **Data Visualization**: 52 | - Natural language data analysis 53 | - Multiple chart types 54 | - Automatic data formatting 55 | - Interactive visualizations 56 | 57 | - **Text to Podcast**: 58 | - Natural-sounding voices 59 | - Topic-based generation 60 | - Customizable output 61 | - Background music support 62 | 63 | ## Installation 64 | 65 | ```bash 66 | # Base installation 67 | pip install empire-chain 68 | 69 | # Text to Podcast dependencies 70 | pip install kokoro_onnx # Note: Model download may take time 71 | ``` 72 | 73 | For more examples and advanced usage, check out the cool stuff cookbooks in the repository. -------------------------------------------------------------------------------- /docs/components/chatbots.md: -------------------------------------------------------------------------------- 1 | # Chatbots 2 | 3 | Empire Chain provides several types of chatbots that can be easily integrated into your applications. 4 | 5 | ## Simple Chatbot 6 | 7 | The basic chatbot implementation using Streamlit: 8 | 9 | ```python 10 | from empire_chain.streamlit import Chatbot 11 | from empire_chain.llms.llms import OpenAILLM 12 | 13 | # Create and run chatbot 14 | chatbot = Chatbot( 15 | title="Empire Chatbot", 16 | llm=OpenAILLM("gpt-4o-mini") 17 | ) 18 | chatbot.chat() 19 | ``` 20 | 21 | ## Vision Chatbot 22 | 23 | Chat with images using multimodal models: 24 | 25 | ```python 26 | from empire_chain.streamlit import VisionChatbot 27 | 28 | # Create and run vision chatbot 29 | chatbot = VisionChatbot(title="Empire Vision Chatbot") 30 | chatbot.chat() 31 | ``` 32 | 33 | ## PDF Chatbot 34 | 35 | Chat with PDF documents using RAG: 36 | 37 | ```python 38 | from empire_chain.streamlit import PDFChatbot 39 | from empire_chain.llms.llms import OpenAILLM 40 | from empire_chain.vector_stores import QdrantVectorStore 41 | from empire_chain.embeddings import OpenAIEmbeddings 42 | 43 | # Create and run PDF chatbot 44 | pdf_chatbot = PDFChatbot( 45 | title="PDF Chatbot", 46 | llm=OpenAILLM("gpt-4o-mini"), 47 | vector_store=QdrantVectorStore(":memory:"), 48 | embeddings=OpenAIEmbeddings("text-embedding-3-small") 49 | ) 50 | pdf_chatbot.chat() 51 | ``` 52 | 53 | ## Features 54 | 55 | - **Simple Chatbot**: Basic text-based conversation 56 | - **Vision Chatbot**: Image understanding and discussion 57 | - **PDF Chatbot**: Document-based conversation using RAG 58 | - **Customizable UI**: Built with Streamlit for easy deployment 59 | - **Multiple LLM Support**: OpenAI, Anthropic, Groq 60 | 61 | ## Running the Chatbots 62 | 63 | 1. Install dependencies: 64 | ```bash 65 | pip install empire-chain streamlit 66 | ``` 67 | 68 | 2. Run the chatbot: 69 | ```bash 70 | streamlit run app.py 71 | ``` 72 | 73 | For more examples and advanced usage, check out the chatbot cookbooks in the repository. -------------------------------------------------------------------------------- /docs/components/rag.md: -------------------------------------------------------------------------------- 1 | # RAG (Retrieval-Augmented Generation) 2 | 3 | Empire Chain provides a powerful RAG implementation that combines document processing, vector stores, and LLMs for enhanced question-answering capabilities. 4 | 5 | ## Basic Usage 6 | 7 | ```python 8 | from empire_chain.vector_stores import QdrantVectorStore 9 | from empire_chain.embeddings import OpenAIEmbeddings 10 | from empire_chain.llms.llms import GroqLLM 11 | from empire_chain.tools.file_reader import DocumentReader 12 | from empire_chain.stt.stt import GroqSTT 13 | 14 | # Initialize components 15 | vector_store = QdrantVectorStore(":memory:") 16 | embeddings = OpenAIEmbeddings("text-embedding-3-small") 17 | llm = GroqLLM("llama3-8b-8192") 18 | reader = DocumentReader() 19 | 20 | # Read and process document 21 | file_path = "input.pdf" 22 | text = reader.read(file_path) 23 | 24 | # Create and store embeddings 25 | text_embedding = embeddings.embed(text) 26 | vector_store.add(text, text_embedding) 27 | 28 | # Process query 29 | text_query = "What is the main topic of this document?" 30 | query_embedding = embeddings.embed(text_query) 31 | relevant_texts = vector_store.query(query_embedding, k=3) 32 | 33 | # Generate response 34 | context = "\n".join(relevant_texts) 35 | prompt = f"Based on the following context, {text_query}\n\nContext: {context}" 36 | response = llm.generate(prompt) 37 | ``` 38 | 39 | ## Audio Input Support 40 | 41 | Empire Chain's RAG system also supports audio input through speech-to-text conversion: 42 | 43 | ```python 44 | # Initialize STT 45 | stt = GroqSTT() 46 | 47 | # Convert audio to text 48 | audio_query = stt.transcribe("audio.mp3") 49 | query_embedding = embeddings.embed(audio_query) 50 | 51 | # Process as before... 52 | ``` 53 | 54 | ## Supported Components 55 | 56 | - **Vector Stores**: Qdrant, ChromaDB 57 | - **Embeddings**: OpenAI, HuggingFace 58 | - **LLMs**: OpenAI, Anthropic, Groq 59 | - **Document Types**: PDF, DOCX, TXT, JSON, CSV, Google Drive files 60 | 61 | For more examples and advanced usage, check out the RAG cookbooks in the repository. -------------------------------------------------------------------------------- /cookbooks/agent/empire_agent.py: -------------------------------------------------------------------------------- 1 | """ 2 | This is a simple example of how to use the Empire Agent. 3 | Please run the following command to install the necessary dependencies and store keys in .env: 4 | !pip install empire-chain 5 | """ 6 | from datetime import datetime 7 | from empire_chain.agent.agent import Agent 8 | from dotenv import load_dotenv 9 | 10 | load_dotenv() 11 | 12 | def get_weather(location: str) -> str: 13 | return f"The weather in {location} is sunny!" 14 | 15 | def calculate_distance(from_city: str, to_city: str) -> str: 16 | return f"The distance from {from_city} to {to_city} is 500km" 17 | 18 | def get_time(timezone: str) -> str: 19 | return f"Current time in {timezone}: {datetime.now()}" 20 | 21 | def translate_text(text: str, target_language: str) -> str: 22 | return f"Translated '{text}' to {target_language}: [translation would go here]" 23 | 24 | def search_web(query: str, num_results: int) -> str: 25 | return f"Top {num_results} results for '{query}': [search results would go here]" 26 | 27 | def main(): 28 | # Create agent 29 | agent = Agent() 30 | 31 | # Register functions 32 | functions = [ 33 | get_weather, 34 | calculate_distance, 35 | get_time, 36 | translate_text, 37 | search_web 38 | ] 39 | 40 | for func in functions: 41 | agent.register_function(func) 42 | 43 | # Example queries 44 | queries = [ 45 | "What's the weather like in Tokyo?", 46 | "How far is London from Paris?", 47 | "What time is it in EST timezone?", 48 | "Translate 'Hello World' to Spanish", 49 | "Search for latest news about AI and show 3 results" 50 | ] 51 | 52 | # Process queries 53 | for query in queries: 54 | try: 55 | result = agent.process_query(query) 56 | print(f"\nQuery: {query}") 57 | print(f"Result: {result['result']}") 58 | except Exception as e: 59 | print(f"Error processing query '{query}': {str(e)}") 60 | 61 | if __name__ == "__main__": 62 | main() -------------------------------------------------------------------------------- /docs/license.md: -------------------------------------------------------------------------------- 1 | # empire chain 2 | 3 | # License 4 | 5 | ## MIT License 6 | 7 | Copyright (c) 2024 Empire Chain 8 | 9 | Permission is hereby granted, free of charge, to any person obtaining a copy 10 | of this software and associated documentation files (the "Software"), to deal 11 | in the Software without restriction, including without limitation the rights 12 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 13 | copies of the Software, and to permit persons to whom the Software is 14 | furnished to do so, subject to the following conditions: 15 | 16 | The above copyright notice and this permission notice shall be included in all 17 | copies or substantial portions of the Software. 18 | 19 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 20 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 21 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 22 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 23 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 24 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 25 | SOFTWARE. 26 | 27 | ## Third-Party Licenses 28 | 29 | Empire Chain uses several open-source packages that are distributed under their own licenses: 30 | 31 | ### OpenAI 32 | - License: [MIT License](https://github.com/openai/openai-python/blob/main/LICENSE) 33 | - Copyright (c) 2023 OpenAI 34 | 35 | ### Qdrant 36 | - License: [Apache License 2.0](https://github.com/qdrant/qdrant/blob/master/LICENSE) 37 | - Copyright (c) 2023 Qdrant 38 | 39 | ### Streamlit 40 | - License: [Apache License 2.0](https://github.com/streamlit/streamlit/blob/develop/LICENSE) 41 | - Copyright (c) 2023 Streamlit Inc. 42 | 43 | ### PhiData 44 | - License: [MIT License](https://github.com/phidatahq/phidata/blob/main/LICENSE) 45 | - Copyright (c) 2023 PhiData 46 | 47 | ### Crawl4ai 48 | - License: [MIT License](https://github.com/crawl4ai/crawl4ai/blob/main/LICENSE) 49 | - Copyright (c) 2023 Crawl4ai 50 | 51 | For a complete list of dependencies and their licenses, please check the project's dependencies in `pyproject.toml` or `requirements.txt`. -------------------------------------------------------------------------------- /mkdocs.yml: -------------------------------------------------------------------------------- 1 | # empire chain 2 | site_name: Empire Chain 3 | site_description: An orchestration framework for all your AI needs 4 | theme: 5 | name: material 6 | palette: 7 | - scheme: default 8 | primary: indigo 9 | accent: indigo 10 | toggle: 11 | icon: material/brightness-7 12 | name: Switch to dark mode 13 | - scheme: slate 14 | primary: indigo 15 | accent: indigo 16 | toggle: 17 | icon: material/brightness-4 18 | name: Switch to light mode 19 | features: 20 | - navigation.instant 21 | - navigation.tracking 22 | - navigation.sections 23 | - navigation.expand 24 | - navigation.top 25 | - search.suggest 26 | - search.highlight 27 | 28 | plugins: 29 | - search 30 | 31 | markdown_extensions: 32 | - pymdownx.highlight: 33 | anchor_linenums: true 34 | - pymdownx.inlinehilite 35 | - pymdownx.snippets 36 | - pymdownx.superfences 37 | - pymdownx.superfences: 38 | custom_fences: 39 | - name: mermaid 40 | class: mermaid 41 | - admonition 42 | - pymdownx.details 43 | - pymdownx.tabbed: 44 | alternate_style: true 45 | - attr_list 46 | - md_in_html 47 | 48 | nav: 49 | - Home: index.md 50 | - Getting Started: 51 | - Installation: getting-started/installation.md 52 | - Quickstart: getting-started/quickstart.md 53 | - User Guide: 54 | - Core Concepts: user-guide/core-concepts.md 55 | - Document Processing: user-guide/document-processing.md 56 | - LLM Integration: user-guide/llm-integration.md 57 | - Visualization: user-guide/visualization.md 58 | - API Reference: 59 | - LLMs: api-reference/llms.md 60 | - Vector Stores: api-reference/vector-stores.md 61 | - Tutorials: 62 | - Chat with PDF: tutorials/chat-with-pdf.md 63 | - Empire RAG: tutorials/empire-rag.md 64 | - Components: 65 | - Empire Agent: components/empire_agent.md 66 | - RAG: components/rag.md 67 | - Vector Stores: components/vector_stores.md 68 | - Chatbots: components/chatbots.md 69 | - PhiData Agents: components/phidata_agents.md 70 | - Tools: components/tools.md 71 | - Cool Stuff: components/cool_stuff.md 72 | - Contributing: contributing.md 73 | - License: license.md -------------------------------------------------------------------------------- /cookbooks/prompt_templates/education_template.py: -------------------------------------------------------------------------------- 1 | """ 2 | Examples of using education prompt templates for lesson planning and concept explanation. 3 | Please run: pip install empire-chain 4 | """ 5 | from empire_chain.prompt_templates.templates import ( 6 | LESSON_PLAN_TEMPLATE, 7 | CONCEPT_EXPLANATION_TEMPLATE, 8 | format_prompt 9 | ) 10 | 11 | # Example 1: High School Physics Lesson 12 | def physics_lesson_example(): 13 | prompt = format_prompt( 14 | LESSON_PLAN_TEMPLATE, 15 | subject="Physics - Newton's Laws of Motion", 16 | grade_level="10th Grade", 17 | duration=""" 18 | - Total Time: 90 minutes 19 | - Introduction: 15 minutes 20 | - Main Activity: 45 minutes 21 | - Group Work: 20 minutes 22 | - Assessment: 10 minutes 23 | """ 24 | ) 25 | return prompt 26 | 27 | # Example 2: Programming Concept Explanation 28 | def programming_concept_example(): 29 | prompt = format_prompt( 30 | CONCEPT_EXPLANATION_TEMPLATE, 31 | topic="Object-Oriented Programming: Inheritance", 32 | level="Intermediate Programming Students", 33 | prerequisites=""" 34 | - Basic understanding of classes and objects 35 | - Experience with method creation 36 | - Familiarity with Python syntax 37 | - Understanding of variables and data types 38 | """ 39 | ) 40 | return prompt 41 | 42 | # Example 3: Mathematics Problem-Solving 43 | def math_lesson_example(): 44 | prompt = format_prompt( 45 | LESSON_PLAN_TEMPLATE, 46 | subject="Algebra - Quadratic Equations", 47 | grade_level="9th Grade", 48 | duration=""" 49 | - Class Duration: 60 minutes 50 | - Concept Review: 15 minutes 51 | - Guided Practice: 25 minutes 52 | - Independent Work: 15 minutes 53 | - Wrap-up/Homework: 5 minutes 54 | """ 55 | ) 56 | return prompt 57 | 58 | if __name__ == "__main__": 59 | # Example usage 60 | print("=== Physics Lesson Plan ===") 61 | print(physics_lesson_example()) 62 | 63 | print("\n=== Programming Concept Explanation ===") 64 | print(programming_concept_example()) 65 | 66 | print("\n=== Mathematics Lesson Plan ===") 67 | print(math_lesson_example()) -------------------------------------------------------------------------------- /cookbooks/prompt_templates/creative_template.py: -------------------------------------------------------------------------------- 1 | """ 2 | Examples of using creative prompt templates for story generation and creative writing. 3 | Please run: pip install empire-chain 4 | """ 5 | from empire_chain.prompt_templates.templates import ( 6 | STORY_GENERATION_TEMPLATE, 7 | CREATIVE_PROMPT_TEMPLATE, 8 | format_prompt 9 | ) 10 | 11 | # Example 1: Science Fiction Story 12 | def scifi_story_example(): 13 | prompt = format_prompt( 14 | STORY_GENERATION_TEMPLATE, 15 | genre="Science Fiction", 16 | theme="Artificial Intelligence Ethics", 17 | elements=""" 18 | - Setting: Mars colony, year 2157 19 | - Main character: AI systems engineer 20 | - Conflict: AI showing signs of consciousness 21 | - Stakes: Future of human-AI relations 22 | - Tone: Philosophical and thought-provoking 23 | """ 24 | ) 25 | return prompt 26 | 27 | # Example 2: Poetry Writing 28 | def poetry_example(): 29 | prompt = format_prompt( 30 | CREATIVE_PROMPT_TEMPLATE, 31 | form="Modern Free Verse Poetry", 32 | style="Introspective and Metaphorical", 33 | requirements=""" 34 | - Theme: Urban solitude 35 | - Length: 3 stanzas 36 | - Imagery: City landscapes 37 | - Emotional tone: Contemplative 38 | - Literary devices: Metaphor, personification 39 | """ 40 | ) 41 | return prompt 42 | 43 | # Example 3: Character Development 44 | def character_creation_example(): 45 | prompt = format_prompt( 46 | CREATIVE_PROMPT_TEMPLATE, 47 | form="Character Profile", 48 | style="Detailed and Nuanced", 49 | requirements=""" 50 | - Background: Complex family history 51 | - Personality: Morally ambiguous 52 | - Motivations: Personal redemption 53 | - Conflicts: Internal and external 54 | - Character arc: Transformation 55 | """ 56 | ) 57 | return prompt 58 | 59 | if __name__ == "__main__": 60 | # Example usage 61 | print("=== Science Fiction Story Prompt ===") 62 | print(scifi_story_example()) 63 | 64 | print("\n=== Poetry Writing Prompt ===") 65 | print(poetry_example()) 66 | 67 | print("\n=== Character Development Prompt ===") 68 | print(character_creation_example()) -------------------------------------------------------------------------------- /cookbooks/prompt_templates/financial_template.py: -------------------------------------------------------------------------------- 1 | """ 2 | Examples of using financial prompt templates for analysis and investment guidance. 3 | Please run: pip install empire-chain 4 | """ 5 | from empire_chain.prompt_templates.templates import ( 6 | FINANCIAL_ANALYSIS_TEMPLATE, 7 | INVESTMENT_TEMPLATE, 8 | format_prompt 9 | ) 10 | 11 | # Example 1: Company Financial Health Analysis 12 | def company_analysis_example(): 13 | prompt = format_prompt( 14 | FINANCIAL_ANALYSIS_TEMPLATE, 15 | financial_data=""" 16 | - Revenue: $50M (20% YoY growth) 17 | - Operating Margin: 25% 18 | - Debt-to-Equity: 0.8 19 | - Current Ratio: 2.1 20 | - Cash Flow from Operations: $12M 21 | - R&D Spending: 15% of revenue 22 | """, 23 | analysis_type="Comprehensive Financial Health Assessment" 24 | ) 25 | return prompt 26 | 27 | # Example 2: Investment Portfolio Strategy 28 | def portfolio_strategy_example(): 29 | prompt = format_prompt( 30 | INVESTMENT_TEMPLATE, 31 | investment_type="Diversified Portfolio Strategy", 32 | market_conditions=""" 33 | - High inflation environment (6.5%) 34 | - Rising interest rates 35 | - Tech sector volatility 36 | - Emerging market opportunities 37 | """, 38 | risk_tolerance="Moderate - Balanced growth and safety" 39 | ) 40 | return prompt 41 | 42 | # Example 3: Startup Valuation Analysis 43 | def startup_valuation_example(): 44 | prompt = format_prompt( 45 | FINANCIAL_ANALYSIS_TEMPLATE, 46 | financial_data=""" 47 | - Pre-money valuation: $15M 48 | - Monthly burn rate: $200K 49 | - User growth: 15% MoM 50 | - Revenue run rate: $2M ARR 51 | - Market size: $5B TAM 52 | - Competitor valuations: 10-12x ARR 53 | """, 54 | analysis_type="Series A Investment Evaluation" 55 | ) 56 | return prompt 57 | 58 | if __name__ == "__main__": 59 | # Example usage 60 | print("=== Company Financial Analysis ===") 61 | print(company_analysis_example()) 62 | 63 | print("\n=== Investment Portfolio Strategy ===") 64 | print(portfolio_strategy_example()) 65 | 66 | print("\n=== Startup Valuation Analysis ===") 67 | print(startup_valuation_example()) -------------------------------------------------------------------------------- /cookbooks/prompt_templates/medical_template.py: -------------------------------------------------------------------------------- 1 | """ 2 | Examples of using medical prompt templates for various healthcare scenarios. 3 | Please run: pip install empire-chain 4 | """ 5 | from empire_chain.prompt_templates.templates import ( 6 | MEDICAL_ANALYSIS_TEMPLATE, 7 | MEDICAL_RESEARCH_TEMPLATE, 8 | format_prompt 9 | ) 10 | 11 | # Example 1: Patient Case Analysis 12 | def patient_case_example(): 13 | prompt = format_prompt( 14 | MEDICAL_ANALYSIS_TEMPLATE, 15 | patient_info="42-year-old female, non-smoker, active lifestyle", 16 | symptoms=""" 17 | - Persistent cough for 3 weeks 18 | - Low-grade fever (99.5°F) 19 | - Fatigue and reduced exercise tolerance 20 | """, 21 | medical_history=""" 22 | - Controlled asthma 23 | - No other chronic conditions 24 | - Up-to-date vaccinations 25 | """ 26 | ) 27 | return prompt 28 | 29 | # Example 2: Medical Research Review 30 | def research_review_example(): 31 | prompt = format_prompt( 32 | MEDICAL_RESEARCH_TEMPLATE, 33 | topic="Emerging Treatments in Type 2 Diabetes", 34 | background=""" 35 | Current standard treatments include: 36 | - Metformin as first-line therapy 37 | - GLP-1 receptor agonists 38 | - SGLT2 inhibitors 39 | Need to evaluate new therapeutic approaches and their efficacy. 40 | """ 41 | ) 42 | return prompt 43 | 44 | # Example 3: Clinical Guidelines Analysis 45 | def clinical_guidelines_example(): 46 | prompt = format_prompt( 47 | MEDICAL_ANALYSIS_TEMPLATE, 48 | patient_info="Clinical Protocol Development", 49 | symptoms="Acute Coronary Syndrome Management", 50 | medical_history=""" 51 | Review current protocols for: 52 | - Initial assessment 53 | - Risk stratification 54 | - Treatment pathways 55 | - Follow-up care 56 | """ 57 | ) 58 | return prompt 59 | 60 | if __name__ == "__main__": 61 | # Example usage 62 | print("=== Patient Case Analysis ===") 63 | print(patient_case_example()) 64 | 65 | print("\n=== Medical Research Review ===") 66 | print(research_review_example()) 67 | 68 | print("\n=== Clinical Guidelines Analysis ===") 69 | print(clinical_guidelines_example()) -------------------------------------------------------------------------------- /cookbooks/prompt_templates/blog_template.py: -------------------------------------------------------------------------------- 1 | """ 2 | Examples of using blog writing prompt templates for content creation and strategy. 3 | Please run: pip install empire-chain 4 | """ 5 | from empire_chain.prompt_templates.templates import ( 6 | BLOG_POST_TEMPLATE, 7 | CONTENT_STRATEGY_TEMPLATE, 8 | format_prompt 9 | ) 10 | 11 | # Example 1: Tech Blog Post 12 | def tech_blog_example(): 13 | prompt = format_prompt( 14 | BLOG_POST_TEMPLATE, 15 | topic="Introduction to Machine Learning", 16 | audience="Tech-curious professionals", 17 | purpose=""" 18 | - Educate beginners about ML basics 19 | - Demystify technical concepts 20 | - Encourage further learning 21 | - Build authority in AI/ML space 22 | """ 23 | ) 24 | return prompt 25 | 26 | # Example 2: Content Strategy 27 | def content_strategy_example(): 28 | prompt = format_prompt( 29 | CONTENT_STRATEGY_TEMPLATE, 30 | focus="Sustainable Living Blog", 31 | demographics=""" 32 | - Primary: 25-40 year old urban professionals 33 | - Interest in eco-friendly lifestyle 34 | - Value practical solutions 35 | - Engaged in social media 36 | """, 37 | goals=""" 38 | - Increase organic traffic by 50% 39 | - Build email list to 10k subscribers 40 | - Establish brand partnerships 41 | - Create community engagement 42 | """ 43 | ) 44 | return prompt 45 | 46 | # Example 3: Lifestyle Blog Post 47 | def lifestyle_blog_example(): 48 | prompt = format_prompt( 49 | BLOG_POST_TEMPLATE, 50 | topic="Mindful Morning Routines", 51 | audience="Busy professionals seeking work-life balance", 52 | purpose=""" 53 | - Share actionable morning routine tips 54 | - Address common time management challenges 55 | - Incorporate mindfulness practices 56 | - Promote sustainable lifestyle changes 57 | """ 58 | ) 59 | return prompt 60 | 61 | if __name__ == "__main__": 62 | # Example usage 63 | print("=== Tech Blog Post ===") 64 | print(tech_blog_example()) 65 | 66 | print("\n=== Content Strategy ===") 67 | print(content_strategy_example()) 68 | 69 | print("\n=== Lifestyle Blog Post ===") 70 | print(lifestyle_blog_example()) -------------------------------------------------------------------------------- /docs/getting-started/installation.md: -------------------------------------------------------------------------------- 1 | # Installation Guide 2 | 3 | ## Requirements 4 | 5 | Empire Chain requires Python 3.10 or later. 6 | 7 | ## Installation 8 | 9 | You can install Empire Chain using pip: 10 | 11 | ```bash 12 | pip install empire-chain 13 | ``` 14 | 15 | ## Dependencies 16 | 17 | Empire Chain comes with the following core dependencies: 18 | 19 | ### LLM Providers 20 | - `openai` - OpenAI API client 21 | - `anthropic` - Anthropic API client 22 | - `groq` - Groq API client 23 | 24 | ### Vector Stores 25 | - `qdrant-client` - Qdrant vector database client 26 | - `chromadb` - ChromaDB vector database 27 | - `sentence-transformers` - For embeddings generation 28 | 29 | ### Document Processing 30 | - `PyPDF2` - PDF processing 31 | - `python-docx` - Word document processing 32 | - `docling` - Document analysis 33 | 34 | ### Web and Data 35 | - `crawl4ai` - Web crawling 36 | - `duckduckgo-search` - Web search capabilities 37 | - `yfinance` - Financial data access 38 | 39 | ### Visualization and UI 40 | - `streamlit` - Interactive UI components 41 | - `matplotlib` - Data visualization 42 | - `Pillow` - Image processing 43 | 44 | ### Audio Processing 45 | - `soundfile` - Audio file handling 46 | - `kokoro_onnx` - Speech processing 47 | 48 | ### Utilities 49 | - `phidata` - Agent framework 50 | - `python-dotenv` - Environment management 51 | - `numpy` - Numerical computations 52 | - `tqdm` - Progress bars 53 | 54 | ## Environment Setup 55 | 56 | 1. Create a `.env` file in your project root: 57 | 58 | ```bash 59 | touch .env 60 | ``` 61 | 62 | 2. Add your API keys (as needed): 63 | 64 | ```env 65 | OPENAI_API_KEY=your_openai_key 66 | ANTHROPIC_API_KEY=your_anthropic_key 67 | GROQ_API_KEY=your_groq_key 68 | ``` 69 | 70 | ## Verifying Installation 71 | 72 | You can verify your installation by running: 73 | 74 | ```python 75 | from empire_chain.llms import OpenAILLM 76 | from empire_chain.vector_stores import QdrantVectorStore 77 | from empire_chain.embeddings import OpenAIEmbeddings 78 | 79 | # These imports should work without errors if installation is successful 80 | ``` 81 | 82 | ## Next Steps 83 | 84 | - Check out the [Quick Start Guide](quickstart.md) to begin using Empire Chain 85 | - Explore [Example Cookbooks](../tutorials/empire-rag.md) for practical examples 86 | - Read about [Core Concepts](../user-guide/core-concepts.md) to understand the framework -------------------------------------------------------------------------------- /cookbooks/prompt_templates/reasoning_template.py: -------------------------------------------------------------------------------- 1 | """ 2 | Examples of using reasoning prompt templates for logical analysis and critical thinking. 3 | Please run: pip install empire-chain 4 | """ 5 | from empire_chain.prompt_templates.templates import ( 6 | LOGICAL_ANALYSIS_TEMPLATE, 7 | CRITICAL_THINKING_TEMPLATE, 8 | format_prompt 9 | ) 10 | 11 | # Example 1: Complex Business Decision 12 | def business_decision_example(): 13 | prompt = format_prompt( 14 | LOGICAL_ANALYSIS_TEMPLATE, 15 | problem="Market Expansion Strategy Evaluation", 16 | context=""" 17 | - Current market share: 25% domestic 18 | - Available expansion budget: $10M 19 | - Three potential markets: Asia, Europe, South America 20 | - Key constraints: regulatory compliance, supply chain capacity 21 | - Competition analysis available for each region 22 | """ 23 | ) 24 | return prompt 25 | 26 | # Example 2: Policy Impact Analysis 27 | def policy_impact_example(): 28 | prompt = format_prompt( 29 | CRITICAL_THINKING_TEMPLATE, 30 | scenario=""" 31 | A proposed urban development policy aims to: 32 | - Convert 30% of downtown parking to green spaces 33 | - Implement congestion pricing 34 | - Expand public transit infrastructure 35 | - Offer tax incentives for remote work 36 | """, 37 | question="What are the potential socioeconomic and environmental impacts over 5-10 years?" 38 | ) 39 | return prompt 40 | 41 | # Example 3: Scientific Hypothesis Evaluation 42 | def scientific_reasoning_example(): 43 | prompt = format_prompt( 44 | LOGICAL_ANALYSIS_TEMPLATE, 45 | problem="Evaluate the relationship between sleep patterns and cognitive performance", 46 | context=""" 47 | - Recent sleep study data from 500 participants 48 | - Cognitive performance metrics: memory, attention, problem-solving 49 | - Variables: sleep duration, quality, consistency 50 | - Confounding factors: age, stress levels, screen time 51 | """ 52 | ) 53 | return prompt 54 | 55 | if __name__ == "__main__": 56 | # Example usage 57 | print("=== Business Decision Analysis ===") 58 | print(business_decision_example()) 59 | 60 | print("\n=== Policy Impact Analysis ===") 61 | print(policy_impact_example()) 62 | 63 | print("\n=== Scientific Reasoning ===") 64 | print(scientific_reasoning_example()) -------------------------------------------------------------------------------- /tests/test_file_reader.py: -------------------------------------------------------------------------------- 1 | # empire chain 2 | import unittest 3 | from unittest.mock import patch, mock_open, MagicMock 4 | from empire_chain.tools.file_reader import DocumentReader, PDFReader, DocxReader, TxtReader, JSONReader, CSVReader 5 | 6 | class TestFileReader(unittest.TestCase): 7 | def setUp(self): 8 | self.reader = DocumentReader() 9 | 10 | def test_supported_formats(self): 11 | formats = self.reader.supported_formats() 12 | expected_formats = ['.pdf', '.docx', '.txt', '.json', '.csv'] 13 | self.assertEqual(sorted(formats), sorted(expected_formats)) 14 | 15 | def test_unsupported_format(self): 16 | with self.assertRaises(ValueError): 17 | self.reader.read("test.xyz") 18 | 19 | @patch('PyPDF2.PdfReader') 20 | def test_pdf_reader(self, mock_pdf_reader): 21 | mock_page = MagicMock() 22 | mock_page.extract_text.return_value = "PDF content" 23 | mock_pdf_reader.return_value.pages = [mock_page] 24 | 25 | with patch('builtins.open', mock_open()): 26 | text = self.reader.read("test.pdf") 27 | self.assertEqual(text.strip(), "PDF content") 28 | 29 | @patch('docx.Document') 30 | def test_docx_reader(self, mock_document): 31 | mock_para = MagicMock() 32 | mock_para.text = "DOCX content" 33 | mock_document.return_value.paragraphs = [mock_para] 34 | 35 | text = self.reader.read("test.docx") 36 | self.assertEqual(text.strip(), "DOCX content") 37 | 38 | def test_txt_reader(self): 39 | mock_content = "Text content" 40 | with patch('builtins.open', mock_open(read_data=mock_content)): 41 | text = self.reader.read("test.txt") 42 | self.assertEqual(text, mock_content) 43 | 44 | def test_json_reader(self): 45 | mock_content = '{"key": "value"}' 46 | with patch('builtins.open', mock_open(read_data=mock_content)): 47 | text = self.reader.read("test.json") 48 | self.assertIn("key", text) 49 | self.assertIn("value", text) 50 | 51 | def test_csv_reader(self): 52 | mock_content = "header1,header2\nvalue1,value2" 53 | with patch('builtins.open', mock_open(read_data=mock_content)): 54 | text = self.reader.read("test.csv") 55 | self.assertEqual(text.strip(), "header1,header2\nvalue1,value2") 56 | 57 | if __name__ == "__main__": 58 | unittest.main() -------------------------------------------------------------------------------- /CONTRIBUTING.md: -------------------------------------------------------------------------------- 1 | # empire chain 2 | 3 | We love your input! We want to make contributing to Empire Chain as easy and transparent as possible, whether it's: 4 | 5 | - Reporting a bug 6 | - Discussing the current state of the code 7 | - Submitting a fix 8 | - Proposing new features 9 | - Becoming a maintainer 10 | 11 | ## We Develop with Github 12 | We use GitHub to host code, to track issues and feature requests, as well as accept pull requests. 13 | 14 | ## We Use [Github Flow](https://guides.github.com/introduction/flow/index.html) 15 | Pull requests are the best way to propose changes to the codebase. We actively welcome your pull requests: 16 | 17 | 1. Fork the repo and create your branch from `main`. 18 | 2. If you've added code that should be tested, add tests. 19 | 3. If you've changed APIs, update the documentation. 20 | 4. Ensure the test suite passes. 21 | 5. Make sure your code lints. 22 | 6. Issue that pull request! 23 | 24 | ## Development Setup 25 | 26 | 1. Clone the repository: 27 | ```bash 28 | git clone https://github.com/yourusername/empire-chain.git 29 | cd empire-chain 30 | ``` 31 | 32 | 2. Create a virtual environment and activate it: 33 | ```bash 34 | python -m venv venv 35 | source venv/bin/activate # On Windows use: venv\Scripts\activate 36 | ``` 37 | 38 | 3. Install development dependencies: 39 | ```bash 40 | pip install -e ".[dev]" 41 | ``` 42 | 43 | ## Any contributions you make will be under the MIT Software License 44 | In short, when you submit code changes, your submissions are understood to be under the same [MIT License](http://choosealicense.com/licenses/mit/) that covers the project. Feel free to contact the maintainers if that's a concern. 45 | 46 | ## Report bugs using Github's [issue tracker](../../issues) 47 | We use GitHub issues to track public bugs. Report a bug by [opening a new issue](../../issues/new); it's that easy! 48 | 49 | ## Write bug reports with detail, background, and sample code 50 | 51 | **Great Bug Reports** tend to have: 52 | 53 | - A quick summary and/or background 54 | - Steps to reproduce 55 | - Be specific! 56 | - Give sample code if you can. 57 | - What you expected would happen 58 | - What actually happens 59 | - Notes (possibly including why you think this might be happening, or stuff you tried that didn't work) 60 | 61 | ## License 62 | By contributing, you agree that your contributions will be licensed under its MIT License. 63 | 64 | ## References 65 | This document was adapted from the open-source contribution guidelines for [Facebook's Draft](https://github.com/facebook/draft-js/blob/a9316a723f9e918afde44dea68b5f9f39b7d9b00/CONTRIBUTING.md). -------------------------------------------------------------------------------- /cookbooks/prompt_templates/coding_template.py: -------------------------------------------------------------------------------- 1 | """ 2 | Examples of using coding prompt templates for code review and architecture design. 3 | Please run: pip install empire-chain 4 | """ 5 | from empire_chain.prompt_templates.templates import ( 6 | CODE_REVIEW_TEMPLATE, 7 | ARCHITECTURE_DESIGN_TEMPLATE, 8 | format_prompt 9 | ) 10 | 11 | # Example 1: Python Code Review 12 | def python_review_example(): 13 | prompt = format_prompt( 14 | CODE_REVIEW_TEMPLATE, 15 | language="Python", 16 | context="Data Processing Pipeline", 17 | code=""" 18 | def process_data(data_frame): 19 | # Clean data 20 | df_cleaned = data_frame.dropna() 21 | 22 | # Transform columns 23 | df_cleaned['date'] = pd.to_datetime(df_cleaned['date']) 24 | df_cleaned['value'] = df_cleaned['value'].astype(float) 25 | 26 | # Aggregate results 27 | results = df_cleaned.groupby('category').agg({ 28 | 'value': ['mean', 'sum', 'count'] 29 | }) 30 | 31 | return results 32 | """ 33 | ) 34 | return prompt 35 | 36 | # Example 2: System Architecture 37 | def architecture_example(): 38 | prompt = format_prompt( 39 | ARCHITECTURE_DESIGN_TEMPLATE, 40 | project_type="E-commerce Platform", 41 | requirements=""" 42 | - High availability (99.9% uptime) 43 | - Scalable to 1M users 44 | - Real-time inventory management 45 | - Secure payment processing 46 | - Order tracking system 47 | """, 48 | constraints=""" 49 | - Budget: $100k initial setup 50 | - Timeline: 6 months to MVP 51 | - Team: 5 developers 52 | - Technology: Cloud-native 53 | """ 54 | ) 55 | return prompt 56 | 57 | # Example 3: API Design Review 58 | def api_design_example(): 59 | prompt = format_prompt( 60 | CODE_REVIEW_TEMPLATE, 61 | language="REST API", 62 | context="User Management Service", 63 | code=""" 64 | Endpoints: 65 | POST /api/v1/users 66 | - Create new user 67 | - Required fields: username, email, password 68 | 69 | GET /api/v1/users/{id} 70 | - Retrieve user details 71 | - Returns: user profile data 72 | 73 | PUT /api/v1/users/{id} 74 | - Update user information 75 | - Supports partial updates 76 | 77 | DELETE /api/v1/users/{id} 78 | - Deactivate user account 79 | - Soft delete implementation 80 | """ 81 | ) 82 | return prompt 83 | 84 | if __name__ == "__main__": 85 | # Example usage 86 | print("=== Python Code Review ===") 87 | print(python_review_example()) 88 | 89 | print("\n=== System Architecture Design ===") 90 | print(architecture_example()) 91 | 92 | print("\n=== API Design Review ===") 93 | print(api_design_example()) -------------------------------------------------------------------------------- /docs/index.md: -------------------------------------------------------------------------------- 1 | # empire chain 2 | 3 | # ⚔️🔗 EmpireChain 4 | 5 | ⚡ An orchestration framework for all your AI needs ⚡ 6 | 7 | ## Features 8 | 9 | - 🤖 Multiple LLM Support (OpenAI, Anthropic, Groq) 10 | - 📚 Vector Store Integration (Qdrant, ChromaDB) 11 | - 🔍 Advanced Document Processing 12 | - 🎙️ Speech-to-Text Capabilities 13 | - 🌐 Web Crawling with crawl4ai 14 | - 📊 Data Visualization 15 | - 🎯 RAG Applications 16 | - 🤝 PhiData Agent Integration 17 | - 💬 Interactive Chatbots 18 | - 🤖 Agentic Framework 19 | 20 | ## Installation 21 | 22 | ```bash 23 | pip install empire-chain 24 | ``` 25 | 26 | For detailed information about each component, please check out the respective sections in the documentation. 27 | 28 | ## Overview 29 | 30 | Empire Chain is a Python framework that simplifies the process of building complex AI applications. It provides a comprehensive suite of tools for: 31 | 32 | - Multiple LLM integrations (OpenAI, Anthropic, Groq) 33 | - Vector store operations with Qdrant and ChromaDB 34 | - Advanced document processing and analysis 35 | - Speech-to-Text capabilities 36 | - Web crawling with crawl4ai 37 | - Data visualization and analysis 38 | - RAG (Retrieval Augmented Generation) applications 39 | - PhiData agent integration 40 | - Interactive chatbots (Text, Vision, PDF) 41 | - Document analysis with Docling 42 | 43 | ## Key Features 44 | 45 | - **Multiple LLM Support**: Seamless integration with OpenAI, Anthropic, and Groq models 46 | - **Vector Stores**: Built-in support for Qdrant and ChromaDB 47 | - **Document Processing**: Process PDFs, DOCX, and other document formats 48 | - **Speech Processing**: Convert audio to text using state-of-the-art models 49 | - **Web Crawling**: Extract and process web content using crawl4ai 50 | - **Data Visualization**: Create insightful visualizations of your data 51 | - **Interactive Chatbots**: Build text, vision, and PDF-based chat applications 52 | - **PhiData Integration**: Leverage powerful PhiData agents for web and finance tasks 53 | - **RAG Applications**: Build sophisticated retrieval-augmented generation systems 54 | - **Docling Analysis**: Advanced document analysis capabilities 55 | 56 | ## Quick Links 57 | 58 | - [Installation Guide](getting-started/installation.md) 59 | - [Quick Start Tutorial](getting-started/quickstart.md) 60 | - [Core Concepts](user-guide/core-concepts.md) 61 | - [Example Cookbooks](tutorials/empire-rag.md) 62 | - [Contributing Guidelines](contributing.md) 63 | 64 | ## Example Cookbooks 65 | 66 | Check out our comprehensive examples in the cookbooks directory: 67 | - RAG Applications (`cookbooks/empire_rag.py`) 68 | - Web Crawling (`cookbooks/crawler.py`) 69 | - Document Processing (`cookbooks/generalized_read_file.py`) 70 | - Topic to Podcast (`cookbooks/topic-to-podcast.py`) 71 | - Data Visualization (`cookbooks/visualize_data.py`) 72 | - Chatbot Examples (`cookbooks/simple_chatbot.py`, `cookbooks/chat_with_image.py`, `cookbooks/chat_with_pdf.py`) 73 | - PhiData Agent Usage (`cookbooks/phi_agents.py`) 74 | 75 | ## License 76 | 77 | Empire Chain is released under the MIT License. See the [LICENSE](https://github.com/manas95826/empire-chain/blob/main/LICENSE) file for more details. -------------------------------------------------------------------------------- /docs/contributing.md: -------------------------------------------------------------------------------- 1 | # empire chain 2 | 3 | We love your input! We want to make contributing to Empire Chain as easy and transparent as possible, whether it's: 4 | 5 | - Reporting a bug 6 | - Discussing the current state of the code 7 | - Submitting a fix 8 | - Proposing new features 9 | - Becoming a maintainer 10 | 11 | ## Development Process 12 | 13 | We use GitHub to host code, to track issues and feature requests, as well as accept pull requests. 14 | 15 | 1. Fork the repo and create your branch from `main` 16 | 2. If you've added code that should be tested, add tests 17 | 3. If you've changed APIs, update the documentation 18 | 4. Ensure the test suite passes 19 | 5. Make sure your code lints 20 | 6. Issue that pull request! 21 | 22 | ## Local Development Setup 23 | 24 | ```bash 25 | # Clone the repository 26 | git clone https://github.com/manas95826/empire-chain.git 27 | cd empire-chain 28 | 29 | # Create a virtual environment (optional but recommended) 30 | python -m venv venv 31 | source venv/bin/activate # On Windows: venv\Scripts\activate 32 | 33 | # Install in development mode 34 | pip install -e . 35 | ``` 36 | 37 | ## Running Tests 38 | 39 | ```bash 40 | # Install test dependencies 41 | pip install pytest pytest-cov 42 | 43 | # Run tests 44 | pytest 45 | ``` 46 | 47 | ## Code Style 48 | 49 | We use `black` for Python code formatting and `flake8` for linting: 50 | 51 | ```bash 52 | # Install formatting tools 53 | pip install black flake8 54 | 55 | # Format code 56 | black . 57 | 58 | # Run linter 59 | flake8 60 | ``` 61 | 62 | ## Documentation 63 | 64 | We use MkDocs with the Material theme for documentation: 65 | 66 | ```bash 67 | # Install documentation dependencies 68 | pip install mkdocs-material 69 | 70 | # Serve documentation locally 71 | mkdocs serve 72 | 73 | # Build documentation 74 | mkdocs build 75 | ``` 76 | 77 | ## Pull Request Process 78 | 79 | 1. Update the README.md with details of changes to the interface 80 | 2. Update the documentation with any new features or changes 81 | 3. The PR will be merged once you have the sign-off of at least one maintainer 82 | 83 | ## Any Contributions You Make Will Be Under the MIT License 84 | 85 | In short, when you submit code changes, your submissions are understood to be under the same [MIT License](LICENSE) that covers the project. Feel free to contact the maintainers if that's a concern. 86 | 87 | ## Report Bugs Using GitHub's Issue Tracker 88 | 89 | We use GitHub issues to track public bugs. Report a bug by [opening a new issue](https://github.com/manas95826/empire-chain/issues/new). 90 | 91 | ## Write Bug Reports With Detail, Background, and Sample Code 92 | 93 | **Great Bug Reports** tend to have: 94 | 95 | - A quick summary and/or background 96 | - Steps to reproduce 97 | - Be specific! 98 | - Give sample code if you can 99 | - What you expected would happen 100 | - What actually happens 101 | - Notes (possibly including why you think this might be happening, or stuff you tried that didn't work) 102 | 103 | ## License 104 | 105 | By contributing, you agree that your contributions will be licensed under its MIT License. -------------------------------------------------------------------------------- /docs/user-guide/document-processing.md: -------------------------------------------------------------------------------- 1 | # Document Processing 2 | 3 | ## Overview 4 | 5 | Empire Chain provides powerful document processing capabilities through its `Docling` module. This guide covers how to process different types of documents and extract meaningful information from them. 6 | 7 | ## Supported Document Types 8 | 9 | - PDF Documents 10 | - Text Files 11 | - Images (with OCR) 12 | - Word Documents 13 | - HTML Pages 14 | 15 | ## Basic Document Processing 16 | 17 | ### Loading Documents 18 | 19 | ```python 20 | from empire_chain.docling import Docling 21 | 22 | # Load a PDF document 23 | pdf_doc = Docling("document.pdf") 24 | 25 | # Load a text file 26 | text_doc = Docling("document.txt") 27 | 28 | # Load from bytes 29 | doc = Docling(bytes_content, content_type="application/pdf") 30 | ``` 31 | 32 | ### Text Extraction 33 | 34 | ```python 35 | # Extract all text 36 | text = doc.extract_text() 37 | 38 | # Extract text from specific pages 39 | text = doc.extract_text(pages=[1, 3, 5]) 40 | 41 | # Extract with layout preservation 42 | text = doc.extract_text(preserve_layout=True) 43 | ``` 44 | 45 | ### Document Analysis 46 | 47 | ```python 48 | # Get document metadata 49 | metadata = doc.get_metadata() 50 | 51 | # Analyze document structure 52 | structure = doc.analyze_structure() 53 | 54 | # Extract tables 55 | tables = doc.extract_tables() 56 | ``` 57 | 58 | ## Advanced Features 59 | 60 | ### OCR Processing 61 | 62 | ```python 63 | # Enable OCR for image-based PDFs 64 | doc = Docling("scanned.pdf", config={"ocr_enabled": True}) 65 | 66 | # Extract text with OCR 67 | text = doc.extract_text() 68 | ``` 69 | 70 | ### Document Transformation 71 | 72 | ```python 73 | # Convert to different format 74 | doc.convert_to("docx") 75 | 76 | # Split document 77 | doc.split(pages=[1, 3, 5]) 78 | 79 | # Merge documents 80 | Docling.merge(["doc1.pdf", "doc2.pdf"], output="merged.pdf") 81 | ``` 82 | 83 | ### Content Extraction 84 | 85 | ```python 86 | # Extract images 87 | images = doc.extract_images() 88 | 89 | # Extract tables to pandas DataFrame 90 | tables = doc.extract_tables(output_format="pandas") 91 | 92 | # Extract specific regions 93 | content = doc.extract_region(bbox=(100, 100, 500, 500)) 94 | ``` 95 | 96 | ## Best Practices 97 | 98 | 1. **Memory Management** 99 | ```python 100 | with Docling("large.pdf") as doc: 101 | text = doc.extract_text() 102 | ``` 103 | 104 | 2. **Error Handling** 105 | ```python 106 | try: 107 | doc = Docling("document.pdf") 108 | text = doc.extract_text() 109 | except DocumentError as e: 110 | print(f"Error processing document: {e}") 111 | ``` 112 | 113 | 3. **Batch Processing** 114 | ```python 115 | from empire_chain.docling import BatchProcessor 116 | 117 | processor = BatchProcessor() 118 | results = processor.process_directory("docs/") 119 | ``` 120 | 121 | ## Configuration Options 122 | 123 | ```python 124 | config = { 125 | "ocr": { 126 | "enabled": True, 127 | "language": "eng", 128 | "dpi": 300 129 | }, 130 | "extraction": { 131 | "preserve_layout": True, 132 | "include_images": False 133 | }, 134 | "processing": { 135 | "chunk_size": 1000, 136 | "max_workers": 4 137 | } 138 | } 139 | 140 | doc = Docling("document.pdf", config=config) 141 | ``` -------------------------------------------------------------------------------- /empire_chain/llms/llms.py: -------------------------------------------------------------------------------- 1 | # Empire Chain LLM Integration Module 2 | # Updated: March 2025 - Adding comments for version tracking 3 | 4 | from openai import OpenAI 5 | from anthropic import Anthropic 6 | from groq import Groq 7 | import os 8 | from dotenv import load_dotenv 9 | 10 | load_dotenv() 11 | 12 | class LLM: 13 | def __init__(self, model: str, api_key: str = None, custom_instructions: str = ""): 14 | self.model = model 15 | self.api_key = api_key 16 | self.custom_instructions = custom_instructions 17 | def generate(self, prompt: str) -> str: 18 | pass 19 | 20 | class OpenAILLM(LLM): 21 | def __init__(self, model: str = "gpt-4o-mini", api_key: str = None, custom_instructions: str = ""): 22 | super().__init__(model, api_key, custom_instructions) 23 | self.client = OpenAI(api_key=self.api_key or os.getenv("OPENAI_API_KEY")) 24 | 25 | def generate(self, prompt: str) -> str: 26 | response = self.client.chat.completions.create( 27 | model=self.model, 28 | messages=[ 29 | {"role": "system", "content": self.custom_instructions}, 30 | {"role": "user", "content": prompt} 31 | ] 32 | ) 33 | return response.choices[0].message.content 34 | 35 | class AnthropicLLM(LLM): 36 | def __init__(self, model: str = "claude-3-5-sonnet-20240620", api_key: str = None, custom_instructions: str = ""): 37 | super().__init__(model, api_key, custom_instructions) 38 | self.client = Anthropic(api_key=self.api_key or os.getenv("ANTHROPIC_API_KEY")) 39 | 40 | def generate(self, prompt: str) -> str: 41 | response = self.client.messages.create( 42 | model=self.model, 43 | max_tokens=1000, 44 | messages=[{"role": "system", "content": self.custom_instructions}, {"role": "user", "content": prompt}] 45 | ) 46 | return response.content[0].text 47 | 48 | class GroqLLM(LLM): 49 | def __init__(self, model: str = "llama3-8b-8192", api_key: str = None, custom_instructions: str = ""): 50 | super().__init__(model, api_key, custom_instructions) 51 | self.client = Groq(api_key=self.api_key or os.getenv("GROQ_API_KEY")) 52 | 53 | def generate(self, prompt: str) -> str: 54 | response = self.client.chat.completions.create( 55 | model=self.model, 56 | messages=[ 57 | {"role": "system", "content": self.custom_instructions}, 58 | {"role": "user", "content": prompt} 59 | ] 60 | ) 61 | return response.choices[0].message.content 62 | 63 | 64 | class GeminiLLM(LLM): 65 | def __init__(self, model: str = "gemini-1.5-flash", api_key: str = None, custom_instructions: str = ""): 66 | super().__init__(model, api_key, custom_instructions) 67 | self.client = OpenAI( 68 | api_key=self.api_key or os.getenv("GEMINI_API_KEY"), 69 | base_url="https://generativelanguage.googleapis.com/v1beta/openai/" 70 | ) 71 | 72 | def generate(self, prompt: str) -> str: 73 | response = self.client.chat.completions.create( 74 | model=self.model, 75 | n=1, 76 | messages=[ 77 | {"role": "system", "content": self.custom_instructions}, 78 | {"role": "user", "content": prompt} 79 | ] 80 | ) 81 | return response.choices[0].message.content 82 | -------------------------------------------------------------------------------- /docs/getting-started/quickstart.md: -------------------------------------------------------------------------------- 1 | # Quick Start Guide 2 | 3 | This guide will help you get started with Empire Chain by walking through some common use cases. 4 | 5 | ## Basic Setup 6 | 7 | First, make sure you have Empire Chain installed and your environment configured: 8 | 9 | ```python 10 | from dotenv import load_dotenv 11 | load_dotenv() # Load environment variables from .env file 12 | ``` 13 | 14 | ## 1. Simple LLM Integration 15 | 16 | ```python 17 | from empire_chain.llms import OpenAILLM 18 | 19 | # Initialize the LLM 20 | llm = OpenAILLM("gpt-4") 21 | 22 | # Generate text 23 | response = llm.generate("What are the key principles of AI safety?") 24 | print(response) 25 | ``` 26 | 27 | ## 2. Document Processing 28 | 29 | ```python 30 | from empire_chain.file_reader import DocumentReader 31 | 32 | # Initialize the document reader 33 | reader = DocumentReader() 34 | 35 | # Read a PDF file 36 | text = reader.read("document.pdf") 37 | print(text) 38 | ``` 39 | 40 | ## 3. Building a Simple Chatbot 41 | 42 | ```python 43 | from empire_chain.streamlit import Chatbot 44 | from empire_chain.llms import OpenAILLM 45 | 46 | # Create a chatbot 47 | chatbot = Chatbot( 48 | llm=OpenAILLM("gpt-4"), 49 | title="My First Chatbot" 50 | ) 51 | 52 | # Launch the chatbot 53 | chatbot.chat() 54 | ``` 55 | 56 | ## 4. RAG Implementation 57 | 58 | ```python 59 | from empire_chain.vector_stores import QdrantVectorStore 60 | from empire_chain.embeddings import OpenAIEmbeddings 61 | from empire_chain.llms import OpenAILLM 62 | from empire_chain.file_reader import DocumentReader 63 | 64 | # Initialize components 65 | vector_store = QdrantVectorStore(":memory:") 66 | embeddings = OpenAIEmbeddings("text-embedding-3-small") 67 | llm = OpenAILLM("gpt-4") 68 | reader = DocumentReader() 69 | 70 | # Process document 71 | text = reader.read("knowledge_base.pdf") 72 | text_embedding = embeddings.embed(text) 73 | vector_store.add(text, text_embedding) 74 | 75 | # Query the system 76 | query = "What are the main points in the document?" 77 | query_embedding = embeddings.embed(query) 78 | relevant_texts = vector_store.query(query_embedding, k=3) 79 | 80 | # Generate response 81 | context = "\n".join(relevant_texts) 82 | response = llm.generate(f"Based on this context, {query}\n\nContext: {context}") 83 | print(response) 84 | ``` 85 | 86 | ## 5. Web Crawling 87 | 88 | ```python 89 | from empire_chain.crawl4ai import Crawler 90 | 91 | # Initialize crawler 92 | crawler = Crawler() 93 | 94 | # Crawl a website 95 | data = crawler.crawl("https://example.com") 96 | print(data) 97 | ``` 98 | 99 | ## 6. Data Visualization 100 | 101 | ```python 102 | from empire_chain.visualizer import DataAnalyzer, ChartFactory 103 | 104 | # Analyze data 105 | analyzer = DataAnalyzer() 106 | data = """ 107 | The company saw revenue growth of $1M in Q1, $1.5M in Q2, 108 | $2M in Q3, and $2.5M in Q4 of 2023. 109 | """ 110 | analyzed_data = analyzer.analyze(data) 111 | 112 | # Create and display chart 113 | chart = ChartFactory.create_chart('Line Graph', analyzed_data) 114 | chart.show() 115 | ``` 116 | 117 | ## 7. Using PhiData Agents 118 | 119 | ```python 120 | from empire_chain.phidata_agents import PhiWebAgent, PhiFinanceAgent 121 | 122 | # Create agents 123 | web_agent = PhiWebAgent() 124 | finance_agent = PhiFinanceAgent() 125 | 126 | # Use agents 127 | news = web_agent.generate("What are the latest developments in AI?") 128 | stock_analysis = finance_agent.generate("Analyze recent NVIDIA stock performance") 129 | ``` 130 | 131 | ## Next Steps 132 | 133 | - Explore more examples in our [Cookbooks](../tutorials/empire-rag.md) 134 | - Learn about [Core Concepts](../user-guide/core-concepts.md) 135 | - Check out the [API Reference](../api-reference/docling.md) 136 | 137 | For more detailed examples, visit our [GitHub repository](https://github.com/manas95826/empire-chain/tree/main/cookbooks). -------------------------------------------------------------------------------- /tests/test_visualizer.py: -------------------------------------------------------------------------------- 1 | # empire chain 2 | import unittest 3 | from unittest.mock import patch, MagicMock 4 | import matplotlib.pyplot as plt 5 | import json 6 | from empire_chain.cool_stuff.visualizer import ( 7 | DataAnalyzer, ChartFactory, LineChart, PieChart, 8 | BarGraph, ScatterChart, Histogram, BoxPlot 9 | ) 10 | 11 | class TestVisualizer(unittest.TestCase): 12 | def setUp(self): 13 | self.sample_data = { 14 | "title": "Test Chart", 15 | "x-axis": "Categories", 16 | "y-axis": "Values", 17 | "datapoints": {"A": 1, "B": 2, "C": 3} 18 | } 19 | self.sample_data_json = json.dumps(self.sample_data) 20 | 21 | def test_data_analyzer_success(self): 22 | analyzer = DataAnalyzer() 23 | with patch.object(analyzer.client, 'generate') as mock_generate: 24 | mock_generate.return_value = json.dumps({ 25 | "title": "Analysis", 26 | "x-axis": "X", 27 | "y-axis": "Y", 28 | "datapoints": {"test": 1} 29 | }) 30 | result = analyzer.analyze("test data") 31 | self.assertIsInstance(result, dict) 32 | self.assertIn("title", result) 33 | self.assertIn("datapoints", result) 34 | 35 | def test_data_analyzer_failure(self): 36 | analyzer = DataAnalyzer() 37 | with patch.object(analyzer.client, 'generate') as mock_generate: 38 | mock_generate.return_value = "invalid json" 39 | result = analyzer.analyze("test data") 40 | self.assertIsNone(result["result"]) 41 | self.assertIn("error", result) 42 | 43 | def test_line_chart(self): 44 | chart = LineChart(self.sample_data) 45 | fig = chart.plot() 46 | self.assertIsNotNone(fig) 47 | plt.close() 48 | 49 | def test_pie_chart(self): 50 | chart = PieChart(self.sample_data) 51 | fig = chart.plot() 52 | self.assertIsNotNone(fig) 53 | plt.close() 54 | 55 | def test_bar_graph(self): 56 | chart = BarGraph(self.sample_data) 57 | fig = chart.plot() 58 | self.assertIsNotNone(fig) 59 | plt.close() 60 | 61 | def test_scatter_chart(self): 62 | chart = ScatterChart(self.sample_data) 63 | fig = chart.plot() 64 | self.assertIsNotNone(fig) 65 | plt.close() 66 | 67 | def test_histogram(self): 68 | chart = Histogram(self.sample_data) 69 | fig = chart.plot() 70 | self.assertIsNotNone(fig) 71 | plt.close() 72 | 73 | def test_box_plot(self): 74 | chart = BoxPlot(self.sample_data) 75 | fig = chart.plot() 76 | self.assertIsNotNone(fig) 77 | plt.close() 78 | 79 | def test_chart_factory_valid_type(self): 80 | chart = ChartFactory.create_chart('Line Chart', self.sample_data) 81 | self.assertIsInstance(chart, LineChart) 82 | 83 | def test_chart_factory_invalid_type(self): 84 | chart = ChartFactory.create_chart('Invalid Chart', self.sample_data) 85 | self.assertIsNone(chart) 86 | 87 | def test_invalid_data_format(self): 88 | invalid_data = { 89 | "title": "Test Chart", 90 | "x-axis": "Categories", 91 | "y-axis": "Values", 92 | "datapoints": {"A": "invalid", "B": "invalid2"} # Non-numeric values 93 | } 94 | chart = LineChart(invalid_data) 95 | self.assertIsNone(chart.plot()) 96 | plt.close() 97 | 98 | def test_error_result_handling(self): 99 | error_data = { 100 | "result": None, 101 | "error": "Test error message" 102 | } 103 | chart = LineChart(error_data) 104 | self.assertIsNone(chart.plot()) 105 | plt.close() 106 | 107 | if __name__ == "__main__": 108 | unittest.main() -------------------------------------------------------------------------------- /docs/tutorials/chat-with-pdf.md: -------------------------------------------------------------------------------- 1 | # Chat with PDF Tutorial 2 | 3 | This tutorial will show you how to create an interactive chatbot that can answer questions about PDF documents. 4 | 5 | ## Overview 6 | 7 | The PDF chatbot combines several Empire Chain components: 8 | - Document processing for PDF files 9 | - Vector store for efficient retrieval 10 | - LLM for generating responses 11 | - Streamlit interface for interaction 12 | 13 | ## Prerequisites 14 | 15 | - Empire Chain installed 16 | - API keys configured in `.env` 17 | - PDF document(s) for analysis 18 | 19 | ## Implementation 20 | 21 | ### 1. Import Required Components 22 | 23 | ```python 24 | from empire_chain.streamlit import PDFChatbot 25 | from empire_chain.llms import OpenAILLM 26 | from empire_chain.vector_stores import QdrantVectorStore 27 | from empire_chain.embeddings import OpenAIEmbeddings 28 | ``` 29 | 30 | ### 2. Initialize and Launch Chatbot 31 | 32 | ```python 33 | # Create the chatbot with all necessary components 34 | pdf_chatbot = PDFChatbot( 35 | title="PDF Assistant", 36 | llm=OpenAILLM("gpt-4"), 37 | vector_store=QdrantVectorStore(":memory:"), 38 | embeddings=OpenAIEmbeddings("text-embedding-3-small") 39 | ) 40 | 41 | # Launch the interactive interface 42 | pdf_chatbot.chat() 43 | ``` 44 | 45 | ## How It Works 46 | 47 | 1. **Document Upload**: The chatbot provides a file upload interface for PDF documents 48 | 2. **Processing**: When a document is uploaded: 49 | - Text is extracted using `DocumentReader` 50 | - Text is split into chunks 51 | - Chunks are embedded and stored in the vector store 52 | 3. **Query Processing**: When a user asks a question: 53 | - The question is embedded 54 | - Similar chunks are retrieved from the vector store 55 | - Context and question are sent to the LLM 56 | 4. **Response**: The LLM generates a response based on the retrieved context 57 | 58 | ## Customization Options 59 | 60 | ### Using Different LLM Models 61 | 62 | ```python 63 | # Using Anthropic 64 | from empire_chain.llms import AnthropicLLM 65 | chatbot = PDFChatbot( 66 | title="PDF Assistant", 67 | llm=AnthropicLLM("claude-3-sonnet"), 68 | vector_store=QdrantVectorStore(":memory:"), 69 | embeddings=OpenAIEmbeddings("text-embedding-3-small") 70 | ) 71 | 72 | # Using Groq 73 | from empire_chain.llms import GroqLLM 74 | chatbot = PDFChatbot( 75 | title="PDF Assistant", 76 | llm=GroqLLM("mixtral-8x7b"), 77 | vector_store=QdrantVectorStore(":memory:"), 78 | embeddings=OpenAIEmbeddings("text-embedding-3-small") 79 | ) 80 | ``` 81 | 82 | ### Using Different Vector Stores 83 | 84 | ```python 85 | # Using ChromaDB 86 | from empire_chain.vector_stores import ChromaVectorStore 87 | chatbot = PDFChatbot( 88 | title="PDF Assistant", 89 | llm=OpenAILLM("gpt-4"), 90 | vector_store=ChromaVectorStore(), 91 | embeddings=OpenAIEmbeddings("text-embedding-3-small") 92 | ) 93 | ``` 94 | 95 | ## Complete Example 96 | 97 | Here's a complete example with all components configured: 98 | 99 | ```python 100 | from empire_chain.streamlit import PDFChatbot 101 | from empire_chain.llms import OpenAILLM 102 | from empire_chain.vector_stores import QdrantVectorStore 103 | from empire_chain.embeddings import OpenAIEmbeddings 104 | from dotenv import load_dotenv 105 | 106 | def main(): 107 | # Load environment variables 108 | load_dotenv() 109 | 110 | # Create and configure the chatbot 111 | chatbot = PDFChatbot( 112 | title="PDF Assistant", 113 | llm=OpenAILLM("gpt-4"), 114 | vector_store=QdrantVectorStore(":memory:"), 115 | embeddings=OpenAIEmbeddings("text-embedding-3-small") 116 | ) 117 | 118 | # Launch the interface 119 | chatbot.chat() 120 | 121 | if __name__ == "__main__": 122 | main() 123 | ``` 124 | 125 | ## Best Practices 126 | 127 | 1. **Memory Management**: Use `:memory:` for temporary storage or configure persistent storage for production 128 | 2. **Model Selection**: Choose models based on your needs: 129 | - GPT-4 for highest accuracy 130 | - Claude for longer context 131 | - Mixtral for faster responses 132 | 3. **Error Handling**: The chatbot includes built-in error handling for: 133 | - File upload issues 134 | - Processing errors 135 | - API failures 136 | 137 | ## Next Steps 138 | 139 | - Try the [Chat with Images](../components/chatbots.md) tutorial 140 | - Learn about [Data Visualization](../user-guide/visualization.md) 141 | - Explore [Vector Store Options](../components/vector_stores.md) -------------------------------------------------------------------------------- /docs/tutorials/empire-rag.md: -------------------------------------------------------------------------------- 1 | # Building a RAG System 2 | 3 | This tutorial will guide you through building a Retrieval Augmented Generation (RAG) system using Empire Chain. 4 | 5 | ## Overview 6 | 7 | In this tutorial, we'll build a RAG system that can: 8 | - Process PDF documents 9 | - Convert audio queries to text 10 | - Retrieve relevant information 11 | - Generate contextual responses 12 | 13 | ## Prerequisites 14 | 15 | - Empire Chain installed 16 | - API keys configured in `.env` 17 | - Sample PDF document 18 | - (Optional) Audio file for voice queries 19 | 20 | ## Step-by-Step Implementation 21 | 22 | ### 1. Import Required Components 23 | 24 | ```python 25 | from empire_chain.vector_stores import QdrantVectorStore 26 | from empire_chain.embeddings import OpenAIEmbeddings 27 | from empire_chain.llms import OpenAILLM 28 | from empire_chain.file_reader import DocumentReader 29 | from empire_chain.stt import GroqSTT 30 | from dotenv import load_dotenv 31 | ``` 32 | 33 | ### 2. Initialize Components 34 | 35 | ```python 36 | load_dotenv() # Load environment variables 37 | 38 | # Initialize core components 39 | vector_store = QdrantVectorStore(":memory:") # In-memory vector store 40 | embeddings = OpenAIEmbeddings("text-embedding-3-small") 41 | llm = OpenAILLM("gpt-4") 42 | reader = DocumentReader() 43 | ``` 44 | 45 | ### 3. Process Document 46 | 47 | ```python 48 | # Read and process the document 49 | file_path = "input.pdf" 50 | text = reader.read(file_path) 51 | 52 | # Create and store embeddings 53 | text_embedding = embeddings.embed(text) 54 | vector_store.add(text, text_embedding) 55 | ``` 56 | 57 | ### 4. Handle Queries 58 | 59 | ```python 60 | # Text query 61 | text_query = "What is the main topic of this document?" 62 | 63 | # Optional: Audio query processing 64 | stt = GroqSTT() 65 | audio_query = stt.transcribe("audio.mp3") # If using voice input 66 | 67 | # Create query embedding 68 | query_embedding = embeddings.embed(audio_query) # or text_query 69 | ``` 70 | 71 | ### 5. Retrieve and Generate 72 | 73 | ```python 74 | # Retrieve relevant context 75 | relevant_texts = vector_store.query(query_embedding, k=3) 76 | context = "\n".join(relevant_texts) 77 | 78 | # Generate response 79 | prompt = f"Based on the following context, {text_query}\n\nContext: {context}" 80 | response = llm.generate(prompt) 81 | 82 | print(f"Query: {text_query}") 83 | print(f"Response: {response}") 84 | ``` 85 | 86 | ## Complete Example 87 | 88 | Here's the complete code that puts everything together: 89 | 90 | ```python 91 | from empire_chain.vector_stores import QdrantVectorStore 92 | from empire_chain.embeddings import OpenAIEmbeddings 93 | from empire_chain.llms import OpenAILLM 94 | from empire_chain.file_reader import DocumentReader 95 | from empire_chain.stt import GroqSTT 96 | from dotenv import load_dotenv 97 | 98 | def main(): 99 | load_dotenv() 100 | 101 | # Initialize components 102 | vector_store = QdrantVectorStore(":memory:") 103 | embeddings = OpenAIEmbeddings("text-embedding-3-small") 104 | llm = OpenAILLM("gpt-4") 105 | reader = DocumentReader() 106 | 107 | # Process document 108 | file_path = "input.pdf" 109 | text = reader.read(file_path) 110 | text_embedding = embeddings.embed(text) 111 | vector_store.add(text, text_embedding) 112 | 113 | # Handle query 114 | text_query = "What is the main topic of this document?" 115 | stt = GroqSTT() 116 | audio_query = stt.transcribe("audio.mp3") 117 | query_embedding = embeddings.embed(audio_query) 118 | 119 | # Retrieve and generate 120 | relevant_texts = vector_store.query(query_embedding, k=3) 121 | context = "\n".join(relevant_texts) 122 | prompt = f"Based on the following context, {text_query}\n\nContext: {context}" 123 | response = llm.generate(prompt) 124 | 125 | print(f"Query: {text_query}") 126 | print(f"Response: {response}") 127 | 128 | if __name__ == "__main__": 129 | main() 130 | ``` 131 | 132 | ## Customization Options 133 | 134 | - Change vector store implementation: Use `ChromaVectorStore` instead of `QdrantVectorStore` 135 | - Adjust retrieval parameters: Modify `k` value in `vector_store.query()` 136 | - Use different LLM models: Switch between OpenAI, Anthropic, or Groq 137 | - Customize prompt template: Modify the prompt format for different use cases 138 | 139 | ## Next Steps 140 | 141 | - Try the [Chat with PDF](chat-with-pdf.md) tutorial for an interactive interface 142 | - Explore [Data Visualization](visualize_data.md) for analyzing results 143 | - Learn about [Vector Stores](../user-guide/vector-stores.md) in depth -------------------------------------------------------------------------------- /docs/api-reference/llms.md: -------------------------------------------------------------------------------- 1 | # LLM Module API Reference 2 | 3 | The `empire_chain.llms` module provides interfaces to various Language Model providers. 4 | 5 | ## OpenAILLM 6 | 7 | ```python 8 | from empire_chain.llms import OpenAILLM 9 | ``` 10 | 11 | Class for interacting with OpenAI's language models. 12 | 13 | ### Constructor 14 | 15 | ```python 16 | OpenAILLM(model_name: str = "gpt-4") 17 | ``` 18 | 19 | **Parameters:** 20 | - `model_name` (str): The OpenAI model to use. Options include: 21 | - `"gpt-4"` 22 | - `"gpt-3.5-turbo"` 23 | - `"gpt-4-turbo"` 24 | 25 | ### Methods 26 | 27 | #### generate() 28 | 29 | ```python 30 | def generate(self, prompt: str) -> str 31 | ``` 32 | 33 | Generate text based on a prompt. 34 | 35 | **Parameters:** 36 | - `prompt` (str): The input prompt for text generation 37 | 38 | **Returns:** 39 | - str: The generated text response 40 | 41 | **Example:** 42 | ```python 43 | llm = OpenAILLM("gpt-4") 44 | response = llm.generate("What is artificial intelligence?") 45 | ``` 46 | 47 | ## AnthropicLLM 48 | 49 | ```python 50 | from empire_chain.llms import AnthropicLLM 51 | ``` 52 | 53 | Class for interacting with Anthropic's Claude models. 54 | 55 | ### Constructor 56 | 57 | ```python 58 | AnthropicLLM(model_name: str = "claude-3-sonnet") 59 | ``` 60 | 61 | **Parameters:** 62 | - `model_name` (str): The Anthropic model to use. Options include: 63 | - `"claude-3-sonnet"` 64 | - `"claude-3-opus"` 65 | - `"claude-3-haiku"` 66 | 67 | ### Methods 68 | 69 | #### generate() 70 | 71 | ```python 72 | def generate(self, prompt: str) -> str 73 | ``` 74 | 75 | Generate text using Claude. 76 | 77 | **Parameters:** 78 | - `prompt` (str): The input prompt for text generation 79 | 80 | **Returns:** 81 | - str: The generated text response 82 | 83 | **Example:** 84 | ```python 85 | llm = AnthropicLLM("claude-3-sonnet") 86 | response = llm.generate("Explain quantum computing") 87 | ``` 88 | 89 | ## GroqLLM 90 | 91 | ```python 92 | from empire_chain.llms import GroqLLM 93 | ``` 94 | 95 | Class for interacting with Groq's language models. 96 | 97 | ### Constructor 98 | 99 | ```python 100 | GroqLLM(model_name: str = "mixtral-8x7b") 101 | ``` 102 | 103 | **Parameters:** 104 | - `model_name` (str): The Groq model to use. Options include: 105 | - `"mixtral-8x7b"` 106 | - `"llama2-70b"` 107 | 108 | ### Methods 109 | 110 | #### generate() 111 | 112 | ```python 113 | def generate(self, prompt: str) -> str 114 | ``` 115 | 116 | Generate text using Groq. 117 | 118 | **Parameters:** 119 | - `prompt` (str): The input prompt for text generation 120 | 121 | **Returns:** 122 | - str: The generated text response 123 | 124 | **Example:** 125 | ```python 126 | llm = GroqLLM("mixtral-8x7b") 127 | response = llm.generate("Write a poem about AI") 128 | ``` 129 | 130 | ## GeminiLLM 131 | 132 | ```python 133 | from empire_chain.llms import GeminiLLM 134 | ``` 135 | 136 | Class for interacting with Google's Gemini language models. 137 | 138 | ### Constructor 139 | 140 | ```python 141 | GeminiLLM(model_name: str = "gemini-1.5-flash") 142 | ``` 143 | 144 | **Parameters:** 145 | - `model_name` (str): The Gemini model to use. Options include: 146 | - `"gemini-1.5-flash"` 147 | - `"gemini-1.5-pro"` 148 | 149 | ### Methods 150 | 151 | #### generate() 152 | 153 | ```python 154 | def generate(self, prompt: str) -> str 155 | ``` 156 | 157 | Generate text using Gemini. 158 | 159 | **Parameters:** 160 | - `prompt` (str): The input prompt for text generation 161 | 162 | **Returns:** 163 | - str: The generated text response 164 | 165 | **Example:** 166 | ```python 167 | llm = GeminiLLM("gemini-1.5-flash") 168 | response = llm.generate("Who is Lionel Messi?") 169 | ``` 170 | 171 | ## Common Features 172 | 173 | All LLM classes share these common features: 174 | 175 | ### Error Handling 176 | 177 | ```python 178 | try: 179 | llm = OpenAILLM() 180 | response = llm.generate("prompt") 181 | except Exception as e: 182 | print(f"Error: {e}") 183 | ``` 184 | 185 | ### Environment Variables 186 | 187 | Required environment variables: 188 | - OpenAI: `OPENAI_API_KEY` 189 | - Anthropic: `ANTHROPIC_API_KEY` 190 | - Groq: `GROQ_API_KEY` 191 | - Gemini: `GEMINI_API_KEY` 192 | 193 | ### Best Practices 194 | 195 | 1. **Model Selection** 196 | ```python 197 | # For complex reasoning 198 | llm = OpenAILLM("gpt-4") 199 | 200 | # For coding related tasks 201 | llm = AnthropicLLM("claude-3-opus") 202 | 203 | # For faster responses 204 | llm = GroqLLM("mixtral-8x7b") 205 | 206 | # For longer context 207 | llm = GeminiLLM("gemini-1.5-flash") 208 | ``` 209 | 210 | 2. **Error Handling** 211 | ```python 212 | try: 213 | response = llm.generate(prompt) 214 | except Exception as e: 215 | # Handle specific error types 216 | pass 217 | ``` 218 | 219 | 3. **Environment Setup** 220 | ```python 221 | from dotenv import load_dotenv 222 | load_dotenv() # Load API keys from .env 223 | ``` -------------------------------------------------------------------------------- /docs/user-guide/llm-integration.md: -------------------------------------------------------------------------------- 1 | # LLM Integration 2 | 3 | ## Overview 4 | 5 | Empire Chain provides seamless integration with various Large Language Models (LLMs). This guide covers how to use different LLMs, configure them, and build applications with them. 6 | 7 | ## Supported Models 8 | 9 | - OpenAI GPT Models 10 | - Anthropic Claude 11 | - Local Models (via HuggingFace) 12 | - Custom Model Integration 13 | 14 | ## Basic Usage 15 | 16 | ### Setting Up 17 | 18 | ```python 19 | from empire_chain.docling import LLMHandler 20 | 21 | # Initialize with OpenAI 22 | llm = LLMHandler(provider="openai") 23 | 24 | # Initialize with Anthropic 25 | llm = LLMHandler(provider="anthropic") 26 | 27 | # Initialize with local model 28 | llm = LLMHandler( 29 | provider="local", 30 | model_path="path/to/model" 31 | ) 32 | ``` 33 | 34 | ### Simple Queries 35 | 36 | ```python 37 | # Basic completion 38 | response = llm.complete("Tell me about AI") 39 | 40 | # Chat completion 41 | messages = [ 42 | {"role": "user", "content": "What is machine learning?"} 43 | ] 44 | response = llm.chat(messages) 45 | ``` 46 | 47 | ## Advanced Features 48 | 49 | ### Model Configuration 50 | 51 | ```python 52 | config = { 53 | "model": "gpt-4", 54 | "temperature": 0.7, 55 | "max_tokens": 150, 56 | "top_p": 1, 57 | "frequency_penalty": 0, 58 | "presence_penalty": 0 59 | } 60 | 61 | llm = LLMHandler(provider="openai", config=config) 62 | ``` 63 | 64 | ### Streaming Responses 65 | 66 | ```python 67 | for chunk in llm.stream("Tell me a story"): 68 | print(chunk, end="", flush=True) 69 | ``` 70 | 71 | ### Function Calling 72 | 73 | ```python 74 | functions = [{ 75 | "name": "get_weather", 76 | "description": "Get weather information", 77 | "parameters": { 78 | "type": "object", 79 | "properties": { 80 | "location": {"type": "string"}, 81 | "unit": {"type": "string"} 82 | } 83 | } 84 | }] 85 | 86 | response = llm.chat( 87 | messages=[{"role": "user", "content": "What's the weather in London?"}], 88 | functions=functions 89 | ) 90 | ``` 91 | 92 | ## Integration Patterns 93 | 94 | ### RAG Implementation 95 | 96 | ```python 97 | from empire_chain.docling import RAGSystem 98 | 99 | # Initialize RAG with specific LLM 100 | rag = RAGSystem(llm_handler=llm) 101 | 102 | # Add documents 103 | rag.add_documents(["doc1.pdf", "doc2.pdf"]) 104 | 105 | # Query with context 106 | response = rag.query("What do the documents say about AI?") 107 | ``` 108 | 109 | ### Chain of Thought 110 | 111 | ```python 112 | prompt = """ 113 | Question: {question} 114 | Let's approach this step by step: 115 | 1) First, let's understand what we're asked 116 | 2) Then, break down the problem 117 | 3) Finally, provide the solution 118 | """ 119 | 120 | response = llm.complete( 121 | prompt.format(question="How does photosynthesis work?"), 122 | temperature=0.3 123 | ) 124 | ``` 125 | 126 | ### Agent Implementation 127 | 128 | ```python 129 | from empire_chain.docling import Agent 130 | 131 | # Create an agent with tools 132 | agent = Agent( 133 | llm_handler=llm, 134 | tools=[ 135 | "calculator", 136 | "web_search", 137 | "code_executor" 138 | ] 139 | ) 140 | 141 | # Run agent 142 | result = agent.run("Calculate the compound interest on $1000") 143 | ``` 144 | 145 | ## Best Practices 146 | 147 | ### Error Handling 148 | 149 | ```python 150 | from empire_chain.exceptions import LLMError 151 | 152 | try: 153 | response = llm.complete("Generate text") 154 | except LLMError as e: 155 | print(f"LLM Error: {e}") 156 | except Exception as e: 157 | print(f"Unexpected error: {e}") 158 | ``` 159 | 160 | ### Rate Limiting 161 | 162 | ```python 163 | llm = LLMHandler( 164 | provider="openai", 165 | config={ 166 | "rate_limit": { 167 | "requests_per_minute": 60, 168 | "tokens_per_minute": 40000 169 | } 170 | } 171 | ) 172 | ``` 173 | 174 | ### Caching 175 | 176 | ```python 177 | llm = LLMHandler( 178 | provider="openai", 179 | config={ 180 | "cache": { 181 | "enabled": True, 182 | "ttl": 3600, # 1 hour 183 | "max_size": 1000 184 | } 185 | } 186 | ) 187 | ``` 188 | 189 | ## Model Comparison 190 | 191 | | Provider | Strengths | Use Cases | 192 | |----------|-----------|-----------| 193 | | OpenAI | State-of-the-art performance | General purpose, code generation | 194 | | Anthropic| Long context, reasoning | Document analysis, complex tasks | 195 | | Local | Privacy, no latency | Edge deployment, offline use | 196 | 197 | ## Security Considerations 198 | 199 | 1. API Key Management 200 | ```python 201 | # Use environment variables 202 | import os 203 | llm = LLMHandler( 204 | provider="openai", 205 | api_key=os.getenv("OPENAI_API_KEY") 206 | ) 207 | ``` 208 | 209 | 2. Content Filtering 210 | ```python 211 | llm = LLMHandler( 212 | provider="openai", 213 | config={ 214 | "content_filter": { 215 | "enabled": True, 216 | "level": "strict" 217 | } 218 | } 219 | ) -------------------------------------------------------------------------------- /docs/user-guide/visualization.md: -------------------------------------------------------------------------------- 1 | # Visualization 2 | 3 | ## Overview 4 | 5 | Empire Chain provides powerful visualization capabilities through its `visualizer` module. This guide covers how to create various types of visualizations for your data and AI pipeline results. 6 | 7 | ## Basic Visualizations 8 | 9 | ### Data Plots 10 | 11 | ```python 12 | from empire_chain.visualizer import DataVisualizer 13 | import pandas as pd 14 | 15 | # Create sample data 16 | data = pd.DataFrame({ 17 | 'x': range(10), 18 | 'y': [x**2 for x in range(10)] 19 | }) 20 | 21 | # Initialize visualizer 22 | viz = DataVisualizer(data) 23 | 24 | # Create different types of plots 25 | viz.line_plot('x', 'y', title='Square Function') 26 | viz.scatter_plot('x', 'y', title='Data Points') 27 | viz.bar_plot('x', 'y', title='Bar Chart') 28 | ``` 29 | 30 | ### Interactive Plots 31 | 32 | ```python 33 | # Create interactive plot 34 | viz.interactive_plot( 35 | 'x', 'y', 36 | plot_type='line', 37 | hover_data=['x', 'y'] 38 | ) 39 | 40 | # Create dashboard 41 | viz.create_dashboard([ 42 | ('line', {'x': 'x', 'y': 'y'}), 43 | ('scatter', {'x': 'x', 'y': 'y'}) 44 | ]) 45 | ``` 46 | 47 | ## AI Pipeline Visualization 48 | 49 | ### RAG Visualization 50 | 51 | ```python 52 | from empire_chain.visualizer import RAGVisualizer 53 | 54 | # Initialize RAG visualizer 55 | rag_viz = RAGVisualizer(rag_system) 56 | 57 | # Visualize retrieval process 58 | rag_viz.show_retrieval_path() 59 | 60 | # Visualize document similarities 61 | rag_viz.plot_document_similarities() 62 | 63 | # Show attention heatmap 64 | rag_viz.attention_heatmap() 65 | ``` 66 | 67 | ### Model Performance 68 | 69 | ```python 70 | from empire_chain.visualizer import ModelVisualizer 71 | 72 | # Initialize model visualizer 73 | model_viz = ModelVisualizer(model) 74 | 75 | # Plot training metrics 76 | model_viz.plot_training_history() 77 | 78 | # Show confusion matrix 79 | model_viz.confusion_matrix() 80 | 81 | # Plot attention weights 82 | model_viz.attention_weights() 83 | ``` 84 | 85 | ## Advanced Features 86 | 87 | ### Custom Styling 88 | 89 | ```python 90 | # Set global style 91 | viz.set_style({ 92 | 'theme': 'dark', 93 | 'color_palette': ['#FF0000', '#00FF00', '#0000FF'], 94 | 'font_family': 'Arial', 95 | 'font_size': 12 96 | }) 97 | 98 | # Apply to specific plot 99 | viz.line_plot( 100 | 'x', 'y', 101 | style={ 102 | 'line_color': '#FF0000', 103 | 'line_width': 2, 104 | 'marker_size': 8 105 | } 106 | ) 107 | ``` 108 | 109 | ### Animation 110 | 111 | ```python 112 | # Create animated plot 113 | viz.animate_plot( 114 | 'x', 'y', 115 | frames=range(10), 116 | title='Animation' 117 | ) 118 | 119 | # Save animation 120 | viz.save_animation('animation.gif') 121 | ``` 122 | 123 | ### Export Options 124 | 125 | ```python 126 | # Save as static image 127 | viz.save_plot('plot.png', dpi=300) 128 | 129 | # Export as interactive HTML 130 | viz.export_interactive('plot.html') 131 | 132 | # Export dashboard 133 | viz.export_dashboard('dashboard.html') 134 | ``` 135 | 136 | ## Streamlit Integration 137 | 138 | ```python 139 | from empire_chain.visualizer import StreamlitVisualizer 140 | import streamlit as st 141 | 142 | # Initialize Streamlit visualizer 143 | st_viz = StreamlitVisualizer() 144 | 145 | # Create interactive components 146 | st_viz.plot_with_controls( 147 | data, 148 | x_column='x', 149 | y_column='y', 150 | plot_types=['line', 'scatter', 'bar'] 151 | ) 152 | 153 | # Create metrics dashboard 154 | st_viz.metrics_dashboard({ 155 | 'Accuracy': 0.95, 156 | 'Precision': 0.92, 157 | 'Recall': 0.89 158 | }) 159 | ``` 160 | 161 | ## Best Practices 162 | 163 | ### Memory Management 164 | 165 | ```python 166 | # Handle large datasets 167 | viz.enable_chunking(chunk_size=1000) 168 | 169 | # Clear memory 170 | viz.clear_cache() 171 | ``` 172 | 173 | ### Performance Optimization 174 | 175 | ```python 176 | # Enable GPU acceleration 177 | viz.enable_gpu() 178 | 179 | # Use downsampling for large datasets 180 | viz.downsample(factor=0.1) 181 | ``` 182 | 183 | ### Responsive Design 184 | 185 | ```python 186 | # Make plots responsive 187 | viz.set_responsive(True) 188 | 189 | # Set breakpoints 190 | viz.set_breakpoints({ 191 | 'sm': 576, 192 | 'md': 768, 193 | 'lg': 992, 194 | 'xl': 1200 195 | }) 196 | ``` 197 | 198 | ## Examples 199 | 200 | ### Complex Dashboard 201 | 202 | ```python 203 | # Create multi-panel dashboard 204 | viz.create_complex_dashboard({ 205 | 'top': [ 206 | ('metrics', {'values': {'Accuracy': 0.95}}), 207 | ('line', {'x': 'x', 'y': 'y'}) 208 | ], 209 | 'bottom': [ 210 | ('heatmap', {'data': correlation_matrix}), 211 | ('scatter', {'x': 'x', 'y': 'y'}) 212 | ] 213 | }) 214 | ``` 215 | 216 | ### Custom Visualization 217 | 218 | ```python 219 | from empire_chain.visualizer import BaseVisualizer 220 | 221 | class CustomVisualizer(BaseVisualizer): 222 | def custom_plot(self, data, **kwargs): 223 | # Custom visualization logic 224 | pass 225 | 226 | # Use custom visualizer 227 | custom_viz = CustomVisualizer() 228 | custom_viz.custom_plot(data) 229 | ``` -------------------------------------------------------------------------------- /docs/api-reference/vector-stores.md: -------------------------------------------------------------------------------- 1 | # Vector Stores API Reference 2 | 3 | The `empire_chain.vector_stores` module provides interfaces to various vector databases for efficient similarity search. 4 | 5 | ## QdrantVectorStore 6 | 7 | ```python 8 | from empire_chain.vector_stores import QdrantVectorStore 9 | ``` 10 | 11 | Class for interacting with Qdrant vector database. 12 | 13 | ### Constructor 14 | 15 | ```python 16 | QdrantVectorStore(location: str = ":memory:", collection_name: str = "default") 17 | ``` 18 | 19 | **Parameters:** 20 | - `location` (str): Location of the Qdrant database 21 | - `:memory:` for in-memory storage 22 | - URL or path for persistent storage 23 | - `collection_name` (str): Name of the collection to use 24 | 25 | ### Methods 26 | 27 | #### add() 28 | 29 | ```python 30 | def add(self, text: str, embedding: List[float]) -> None 31 | ``` 32 | 33 | Add text and its embedding to the store. 34 | 35 | **Parameters:** 36 | - `text` (str): The text to store 37 | - `embedding` (List[float]): Vector representation of the text 38 | 39 | **Example:** 40 | ```python 41 | store = QdrantVectorStore(":memory:") 42 | store.add("Sample text", embedding_vector) 43 | ``` 44 | 45 | #### query() 46 | 47 | ```python 48 | def query(self, embedding: List[float], k: int = 3) -> List[str] 49 | ``` 50 | 51 | Retrieve similar texts based on embedding. 52 | 53 | **Parameters:** 54 | - `embedding` (List[float]): Query vector 55 | - `k` (int): Number of results to return 56 | 57 | **Returns:** 58 | - List[str]: List of similar texts 59 | 60 | **Example:** 61 | ```python 62 | similar_texts = store.query(query_embedding, k=5) 63 | ``` 64 | 65 | #### delete() 66 | 67 | ```python 68 | def delete(self, ids: List[str]) -> None 69 | ``` 70 | 71 | Delete entries by their IDs. 72 | 73 | **Parameters:** 74 | - `ids` (List[str]): List of IDs to delete 75 | 76 | #### clear() 77 | 78 | ```python 79 | def clear(self) -> None 80 | ``` 81 | 82 | Clear all entries from the store. 83 | 84 | ## ChromaVectorStore 85 | 86 | ```python 87 | from empire_chain.vector_stores import ChromaVectorStore 88 | ``` 89 | 90 | Class for interacting with ChromaDB. 91 | 92 | ### Constructor 93 | 94 | ```python 95 | ChromaVectorStore(path: Optional[str] = None, collection_name: str = "default") 96 | ``` 97 | 98 | **Parameters:** 99 | - `path` (Optional[str]): Path for persistent storage 100 | - `collection_name` (str): Name of the collection 101 | 102 | ### Methods 103 | 104 | #### add() 105 | 106 | ```python 107 | def add(self, text: str, embedding: List[float]) -> None 108 | ``` 109 | 110 | Add text and its embedding to ChromaDB. 111 | 112 | **Parameters:** 113 | - `text` (str): The text to store 114 | - `embedding` (List[float]): Vector representation of the text 115 | 116 | **Example:** 117 | ```python 118 | store = ChromaVectorStore() 119 | store.add("Sample text", embedding_vector) 120 | ``` 121 | 122 | #### query() 123 | 124 | ```python 125 | def query(self, embedding: List[float], k: int = 3) -> List[str] 126 | ``` 127 | 128 | Retrieve similar texts from ChromaDB. 129 | 130 | **Parameters:** 131 | - `embedding` (List[float]): Query vector 132 | - `k` (int): Number of results to return 133 | 134 | **Returns:** 135 | - List[str]: List of similar texts 136 | 137 | **Example:** 138 | ```python 139 | similar_texts = store.query(query_embedding, k=5) 140 | ``` 141 | 142 | ## Common Usage Patterns 143 | 144 | ### Basic RAG Setup 145 | 146 | ```python 147 | from empire_chain.vector_stores import QdrantVectorStore 148 | from empire_chain.embeddings import OpenAIEmbeddings 149 | 150 | # Initialize components 151 | store = QdrantVectorStore(":memory:") 152 | embeddings = OpenAIEmbeddings("text-embedding-3-small") 153 | 154 | # Add documents 155 | text = "Your document text here" 156 | embedding = embeddings.embed(text) 157 | store.add(text, embedding) 158 | 159 | # Query 160 | query = "Your query here" 161 | query_embedding = embeddings.embed(query) 162 | results = store.query(query_embedding, k=3) 163 | ``` 164 | 165 | ### Persistent Storage 166 | 167 | ```python 168 | # Qdrant with persistent storage 169 | qdrant_store = QdrantVectorStore( 170 | location="path/to/storage", 171 | collection_name="my_documents" 172 | ) 173 | 174 | # ChromaDB with persistent storage 175 | chroma_store = ChromaVectorStore( 176 | path="path/to/storage", 177 | collection_name="my_documents" 178 | ) 179 | ``` 180 | 181 | ### Error Handling 182 | 183 | ```python 184 | try: 185 | store = QdrantVectorStore(":memory:") 186 | store.add(text, embedding) 187 | except Exception as e: 188 | print(f"Error: {e}") 189 | ``` 190 | 191 | ## Best Practices 192 | 193 | 1. **Memory Management** 194 | ```python 195 | # For development/testing 196 | store = QdrantVectorStore(":memory:") 197 | 198 | # For production 199 | store = QdrantVectorStore("path/to/persistent/storage") 200 | ``` 201 | 202 | 2. **Batch Operations** 203 | ```python 204 | # Add multiple documents efficiently 205 | for text, embedding in zip(texts, embeddings): 206 | store.add(text, embedding) 207 | ``` 208 | 209 | 3. **Collection Management** 210 | ```python 211 | # Use separate collections for different purposes 212 | docs_store = QdrantVectorStore(collection_name="documents") 213 | qa_store = QdrantVectorStore(collection_name="qa_pairs") 214 | ``` -------------------------------------------------------------------------------- /empire_chain/streamlit/base_chatbot.py: -------------------------------------------------------------------------------- 1 | import streamlit as st 2 | from empire_chain.llms.llms import OpenAILLM 3 | from dotenv import load_dotenv 4 | 5 | load_dotenv() 6 | 7 | class Chatbot: 8 | def __init__(self, llm: OpenAILLM, title: str, chat_history: bool = True, custom_instructions: str = "", verbose: bool = True): 9 | self.llm = llm 10 | self.title = title 11 | self.chat_history = chat_history 12 | self.custom_instructions = custom_instructions 13 | self.verbose = verbose 14 | 15 | if 'messages' not in st.session_state: 16 | st.session_state.messages = [] 17 | 18 | if 'sidebar_state' not in st.session_state: 19 | st.session_state.sidebar_state = 'expanded' 20 | 21 | def display_example_queries(self): 22 | with st.expander("Example Queries"): 23 | example_queries = { 24 | "example1": "Who is the CEO of Tesla?", 25 | "example2": "What are llms?", 26 | "example3": "How to write a research paper?", 27 | "example4": "How to set up a company in Delaware?" 28 | } 29 | 30 | col1, col2 = st.columns(2) 31 | with col1: 32 | if st.button("Who is the CEO of Tesla?", key="example1"): 33 | st.session_state.example_query = example_queries["example1"] 34 | if st.button("What are llms?", key="example2"): 35 | st.session_state.example_query = example_queries["example2"] 36 | with col2: 37 | if st.button("How to write a research paper?", key="example3"): 38 | st.session_state.example_query = example_queries["example3"] 39 | if st.button("How to set up a company in Delaware?", key="example4"): 40 | st.session_state.example_query = example_queries["example4"] 41 | 42 | def display_sidebar(self): 43 | with st.sidebar: 44 | st.title("Empire Chain 🚀") 45 | st.markdown("### AI Orchestration Framework") 46 | 47 | st.markdown("#### Key Features") 48 | st.markdown(""" 49 | - 🤖 Seamless LLM Integration 50 | - Groq 51 | - OpenAI 52 | - Anthropic 53 | 54 | - 📚 Embedding Support 55 | - Sentence Transformers 56 | - OpenAI Embeddings 57 | 58 | - 🗄️ Vector Stores 59 | - Qdrant 60 | - ChromaDB 61 | 62 | - 🤝 Custom Agents 63 | - Web Agent (DuckDuckGo) 64 | - Finance Agent (YFinance) 65 | """) 66 | 67 | st.markdown("#### Quick Links") 68 | st.markdown("[GitHub Repository](https://lnkd.in/gbiiCVtk)") 69 | st.markdown("[PyPI Package](https://lnkd.in/gfhc4YeE)") 70 | 71 | st.markdown("---") 72 | st.markdown("*Make your RAG solution in just 30 lines of code!*") 73 | 74 | def chat(self): 75 | if self.verbose: 76 | self.display_sidebar() 77 | 78 | with st.container(): 79 | st.title(self.title) 80 | 81 | if self.verbose: 82 | st.markdown(""" 83 | Welcome to the Empire Chain Demo! This chatbot showcases the capabilities 84 | of our AI orchestration framework. Feel free to ask questions about anything! 85 | """) 86 | 87 | st.subheader("Example Queries") 88 | self.display_example_queries() 89 | 90 | message_container = st.container() 91 | with message_container: 92 | for message in st.session_state.messages: 93 | role = message["role"] 94 | content = message["content"] 95 | with st.chat_message(role): 96 | st.markdown(content) 97 | 98 | prompt = st.chat_input("What would you like to know?") 99 | 100 | if "example_query" in st.session_state: 101 | prompt = st.session_state.pop("example_query") 102 | 103 | if prompt: 104 | with st.chat_message("user"): 105 | st.markdown(prompt) 106 | if self.chat_history: 107 | st.session_state.messages.append({"role": "user", "content": prompt}) 108 | 109 | response_container = st.chat_message("assistant") 110 | with response_container: 111 | placeholder = st.empty() 112 | with placeholder: 113 | with st.spinner("Thinking..."): 114 | if self.chat_history: 115 | conversation_history = f"{self.custom_instructions}\n" 116 | for message in st.session_state.messages: 117 | conversation_history += f"{message['role']}: {message['content']}\n" 118 | full_prompt = f"Previous conversation history:\n{conversation_history}\nNew query: {prompt}" 119 | response = self.llm.generate(full_prompt) 120 | else: 121 | response = self.llm.generate(prompt) 122 | st.markdown(response) 123 | if self.chat_history: 124 | st.session_state.messages.append({"role": "assistant", "content": response}) -------------------------------------------------------------------------------- /docs/components/vector_stores.md: -------------------------------------------------------------------------------- 1 | # Vector Stores 2 | 3 | Empire Chain provides robust vector store implementations for efficient similarity search and retrieval. The primary implementation is based on Qdrant, a high-performance vector database. 4 | 5 | ## QdrantVectorStore 6 | 7 | The `QdrantVectorStore` class provides a simple interface for storing and querying text embeddings with sensible defaults. 8 | 9 | ### Quick Start 10 | 11 | ```python 12 | from empire_chain.vector_stores import QdrantVectorStore 13 | from empire_chain.embeddings import OpenAIEmbeddings 14 | 15 | # Initialize components 16 | store = QdrantVectorStore() 17 | embeddings = OpenAIEmbeddings("text-embedding-3-small") 18 | 19 | # Add text with its embedding 20 | text = "Hello world" 21 | text_embedding = embeddings.embed(text) 22 | store.add(text=text, embedding=text_embedding) 23 | 24 | # Query similar texts 25 | query = "What is this about?" 26 | query_embedding = embeddings.embed(query) 27 | similar_texts = store.query(query_embedding) # Returns top 10 similar texts 28 | ``` 29 | 30 | ### Default Configuration 31 | 32 | The `QdrantVectorStore` comes with carefully chosen defaults suitable for most use cases: 33 | 34 | #### Basic Settings 35 | - Storage: In-memory (uses local memory) 36 | - Collection name: "default" 37 | - Vector size: 1536 (compatible with many embedding models) 38 | - Distance metric: COSINE 39 | - Storage type: RAM (not on disk) 40 | - Query results: Top 10 by default 41 | - Point IDs: Automatically generated UUIDs 42 | 43 | #### HNSW Index Settings 44 | - m: 16 (edges per node) 45 | - ef_construct: 100 (candidates for index construction) 46 | - full_scan_threshold: 10000 47 | - max_indexing_threads: Auto-detected 48 | - on_disk: False (stored in RAM) 49 | 50 | #### Optimizer Settings 51 | - deleted_threshold: 0.2 52 | - vacuum_min_vector_number: 1000 53 | - indexing_threshold: 20000 54 | - flush_interval_sec: 5 55 | - max_optimization_threads: Auto-detected 56 | 57 | #### WAL (Write-Ahead-Log) Settings 58 | - wal_capacity_mb: 32 59 | - wal_segments_ahead: 0 60 | 61 | ### Advanced Usage 62 | 63 | For more control over the vector store configuration: 64 | 65 | ```python 66 | from empire_chain.vector_stores import QdrantVectorStore 67 | from qdrant_client.models import Distance 68 | 69 | # Create a store with custom settings 70 | store = QdrantVectorStore( 71 | url="localhost:6333", # Qdrant server URL 72 | collection_name="my_vectors", # Custom collection name 73 | vector_size=768, # For smaller embeddings 74 | distance=Distance.EUCLID, # Euclidean distance 75 | on_disk=True, # Store vectors on disk 76 | ) 77 | 78 | # Add text with its embedding 79 | store.add( 80 | text="Important document", 81 | embedding=[...], # Your embedding 82 | ) 83 | 84 | # Query with filters and threshold 85 | similar_texts = store.query( 86 | query_embedding=[...], # Your query embedding 87 | k=5, # Return top 5 results 88 | score_threshold=0.8, # Minimum similarity score 89 | filter={"category": "important"} # Optional filtering 90 | ) 91 | ``` 92 | 93 | ### Production Recommendations 94 | 95 | When using QdrantVectorStore in production: 96 | 97 | 1. **Storage Configuration** 98 | - Use a persistent Qdrant server instead of in-memory storage 99 | - Consider enabling on-disk storage for large datasets 100 | - Configure proper backup and snapshot strategies 101 | 102 | 2. **Performance Optimization** 103 | - Adjust HNSW index parameters based on your dataset size 104 | - Use appropriate vector size for your embedding model 105 | - Consider enabling vector quantization for large collections 106 | 107 | 3. **Resource Management** 108 | - Monitor memory usage and disk space 109 | - Configure appropriate shard numbers for distributed setups 110 | - Set up proper replication for high availability 111 | 112 | ### Integration with RAG 113 | 114 | QdrantVectorStore works seamlessly with the RAG (Retrieval-Augmented Generation) pipeline: 115 | 116 | ```python 117 | from empire_chain.vector_stores import QdrantVectorStore 118 | from empire_chain.embeddings import OpenAIEmbeddings 119 | from empire_chain.llms import OpenAILLM 120 | 121 | # Initialize components 122 | store = QdrantVectorStore() 123 | embeddings = OpenAIEmbeddings() 124 | llm = OpenAILLM() 125 | 126 | # Add documents to the store 127 | documents = ["doc1", "doc2", "doc3"] 128 | for doc in documents: 129 | embedding = embeddings.embed(doc) 130 | store.add(doc, embedding) 131 | 132 | # Query and generate 133 | query = "What are the key points?" 134 | query_embedding = embeddings.embed(query) 135 | relevant_docs = store.query(query_embedding, k=3) 136 | response = llm.generate(f"Context: {relevant_docs}\nQuery: {query}") 137 | ``` 138 | 139 | ### Error Handling 140 | 141 | The QdrantVectorStore implements robust error handling: 142 | 143 | ```python 144 | try: 145 | store = QdrantVectorStore() 146 | store.add(text="example", embedding=[...]) 147 | except RuntimeError as e: 148 | print(f"Failed to add text: {e}") 149 | ``` 150 | 151 | Common errors are handled gracefully with descriptive error messages. 152 | 153 | ### Best Practices 154 | 155 | 1. **Vector Normalization** 156 | - Always normalize embeddings when using COSINE distance 157 | - Use consistent embedding dimensions 158 | 159 | 2. **Performance** 160 | - Batch operations when adding multiple points 161 | - Use appropriate index settings for your dataset size 162 | - Consider payload size impact on performance 163 | 164 | 3. **Resource Management** 165 | - Monitor memory usage with large collections 166 | - Use disk storage for large datasets 167 | - Implement proper cleanup procedures 168 | 169 | 4. **Security** 170 | - Use proper authentication in production 171 | - Implement access controls 172 | - Regular backup procedures 173 | 174 | For more examples and advanced usage, check out the [cookbooks](https://github.com/manas95826/empire-chain/tree/main/cookbooks) in the repository. -------------------------------------------------------------------------------- /empire_chain/tools/file_reader.py: -------------------------------------------------------------------------------- 1 | # Empire Chain File Reader Module 2 | # Updated: March 2025 - Adding comments for version tracking 3 | 4 | from typing import Protocol 5 | from pathlib import Path 6 | import PyPDF2 7 | import docx 8 | import json 9 | import csv 10 | import requests 11 | import webbrowser 12 | import io 13 | import os 14 | 15 | class FileReader(Protocol): 16 | def read(self, file_path: str) -> str: 17 | pass 18 | 19 | class PDFReader(FileReader): 20 | def read(self, file_path: str) -> str: 21 | with open(file_path, 'rb') as file: 22 | reader = PyPDF2.PdfReader(file) 23 | text = "" 24 | for page in reader.pages: 25 | text += page.extract_text() + "\n" 26 | return text 27 | 28 | class DocxReader(FileReader): 29 | def read(self, file_path: str) -> str: 30 | doc = docx.Document(file_path) 31 | text = "" 32 | for paragraph in doc.paragraphs: 33 | text += paragraph.text + "\n" 34 | return text 35 | 36 | class TxtReader(FileReader): 37 | def read(self, file_path: str) -> str: 38 | with open(file_path, 'r', encoding='utf-8') as file: 39 | return file.read() 40 | 41 | class JSONReader(FileReader): 42 | def read(self, file_path: str) -> str: 43 | with open(file_path, 'r', encoding='utf-8') as file: 44 | data = json.load(file) 45 | return json.dumps(data, indent=2) 46 | 47 | class CSVReader(FileReader): 48 | def read(self, file_path: str) -> str: 49 | text = "" 50 | with open(file_path, 'r', encoding='utf-8') as file: 51 | reader = csv.reader(file) 52 | for row in reader: 53 | text += ",".join(row) + "\n" 54 | return text 55 | 56 | class GoogleDocsReader(FileReader): 57 | def read(self, file_path: str) -> str: 58 | """Reads a Google Drive file and returns its content as text. 59 | 60 | Args: 61 | file_path: The Google Drive file URL 62 | 63 | Returns: 64 | str: The document content as text 65 | 66 | Raises: 67 | ValueError: If file cannot be accessed 68 | """ 69 | if not 'drive.google.com' in file_path: 70 | raise ValueError("Not a valid Google Drive URL") 71 | 72 | if '/file/d/' in file_path: 73 | file_id = file_path.split('/file/d/')[1].split('/')[0] 74 | elif '/document/d/' in file_path: 75 | file_id = file_path.split('/document/d/')[1].split('/')[0] 76 | else: 77 | raise ValueError("Invalid Google Drive URL format. Please use the 'Share' link from Google Drive.") 78 | 79 | download_url = f"https://drive.google.com/uc?export=download&id={file_id}" 80 | 81 | try: 82 | response = requests.get(download_url) 83 | 84 | if response.status_code == 403 or 'Sign in' in response.text: 85 | print("\nPlease sign in with your Google account to access this file.") 86 | print("A browser window will open. After signing in, please try again.") 87 | webbrowser.open(file_path) 88 | raise ValueError("Please authenticate through your browser and try again") 89 | 90 | if response.status_code != 200: 91 | raise ValueError("Could not access file. Make sure the file is shared and accessible.") 92 | 93 | content = io.BytesIO(response.content) 94 | 95 | if b'%PDF' in response.content[:1024]: 96 | reader = PyPDF2.PdfReader(content) 97 | return "\n".join(page.extract_text() for page in reader.pages) 98 | 99 | try: 100 | return response.content.decode('utf-8') 101 | except UnicodeDecodeError: 102 | try: 103 | doc = docx.Document(content) 104 | return "\n".join(paragraph.text for paragraph in doc.paragraphs) 105 | except: 106 | raise ValueError("Unsupported file type or file is corrupted") 107 | 108 | except requests.RequestException as e: 109 | raise ValueError(f"Error accessing Google Drive file: {str(e)}") 110 | 111 | class DocumentReader: 112 | def __init__(self): 113 | """Initialize document reader.""" 114 | self.readers = { 115 | '.pdf': PDFReader(), 116 | '.docx': DocxReader(), 117 | '.txt': TxtReader(), 118 | '.json': JSONReader(), 119 | '.csv': CSVReader() 120 | } 121 | self.google_reader = GoogleDocsReader() 122 | 123 | def _is_google_drive_url(self, file_path: str) -> bool: 124 | """Check if the given path is a Google Drive URL.""" 125 | return 'drive.google.com' in file_path 126 | 127 | def read(self, file_path: str) -> str: 128 | """Read content from various file types and return as text. 129 | 130 | Args: 131 | file_path: Path to the file to read or Google Drive URL 132 | 133 | Returns: 134 | str: Text content of the file 135 | 136 | Raises: 137 | ValueError: If file type is not supported or file cannot be accessed 138 | """ 139 | if self._is_google_drive_url(file_path): 140 | return self.google_reader.read(file_path) 141 | 142 | file_extension = Path(file_path).suffix.lower() 143 | if not file_extension: 144 | raise ValueError("File has no extension and is not a Google Drive URL") 145 | 146 | if file_extension not in self.readers: 147 | raise ValueError(f"Unsupported file type: {file_extension}") 148 | 149 | return self.readers[file_extension].read(file_path) 150 | 151 | def supported_formats(self) -> list[str]: 152 | """Get list of supported file formats. 153 | 154 | Returns: 155 | list[str]: List of supported file extensions 156 | """ 157 | return list(self.readers.keys()) -------------------------------------------------------------------------------- /empire_chain/streamlit/pdf_chatbot.py: -------------------------------------------------------------------------------- 1 | import streamlit as st 2 | import tempfile 3 | from empire_chain.llms.llms import OpenAILLM 4 | from empire_chain.vector_stores import QdrantVectorStore 5 | from empire_chain.embeddings import OpenAIEmbeddings 6 | from empire_chain.tools.file_reader import DocumentReader 7 | from dotenv import load_dotenv 8 | 9 | load_dotenv() 10 | 11 | class PDFChatbot: 12 | def __init__(self, title: str, llm: OpenAILLM, vector_store: QdrantVectorStore, embeddings: OpenAIEmbeddings, chat_history: bool = True, custom_instructions: str = "", verbose: bool = True): 13 | self.title = title 14 | self.llm = llm 15 | self.vector_store = vector_store 16 | self.embeddings = embeddings 17 | self.chat_history = chat_history 18 | self.custom_instructions = custom_instructions 19 | self.verbose = verbose 20 | 21 | if 'messages' not in st.session_state: 22 | st.session_state.messages = [] 23 | 24 | if 'sidebar_state' not in st.session_state: 25 | st.session_state.sidebar_state = 'expanded' 26 | 27 | def display_example_queries(self): 28 | with st.expander("Example Queries"): 29 | example_queries = { 30 | "example1": "What is the main topic of this document?", 31 | "example2": "What is the purpose of this document?", 32 | "example3": "What is the main idea of this document?", 33 | "example4": "What is the main conclusion of this document?" 34 | } 35 | 36 | col1, col2 = st.columns(2) 37 | with col1: 38 | if st.button("What is the main topic of this document?", key="example1"): 39 | st.session_state.example_query = example_queries["example1"] 40 | if st.button("What is the purpose of this document?", key="example2"): 41 | st.session_state.example_query = example_queries["example2"] 42 | with col2: 43 | if st.button("What is the main idea of this document?", key="example3"): 44 | st.session_state.example_query = example_queries["example3"] 45 | if st.button("What is the main conclusion of this document?", key="example4"): 46 | st.session_state.example_query = example_queries["example4"] 47 | 48 | def display_sidebar(self): 49 | with st.sidebar: 50 | st.title("Empire Chain 🚀") 51 | st.markdown("### AI Orchestration Framework") 52 | 53 | st.markdown("#### Key Features") 54 | st.markdown(""" 55 | - 🤖 Seamless LLM Integration 56 | - Groq 57 | - OpenAI 58 | - Anthropic 59 | 60 | - 📚 Embedding Support 61 | - Sentence Transformers 62 | - OpenAI Embeddings 63 | 64 | - 🗄️ Vector Stores 65 | - Qdrant 66 | - ChromaDB 67 | 68 | - 🤝 Custom Agents 69 | - Web Agent (DuckDuckGo) 70 | - Finance Agent (YFinance) 71 | """) 72 | 73 | st.markdown("#### Quick Links") 74 | st.markdown("[GitHub Repository](https://lnkd.in/gbiiCVtk)") 75 | st.markdown("[PyPI Package](https://lnkd.in/gfhc4YeE)") 76 | 77 | st.markdown("---") 78 | st.markdown("*Make your RAG solution in just 30 lines of code!*") 79 | 80 | def chat(self): 81 | if self.verbose: 82 | self.display_sidebar() 83 | 84 | st.title(self.title) 85 | 86 | if self.verbose: 87 | st.markdown(""" 88 | Welcome to the Empire Chain PDF Chatbot! This chatbot can answer questions about a PDF file. 89 | Upload a PDF file and ask questions about it! 90 | """) 91 | 92 | st.subheader("Example Queries") 93 | self.display_example_queries() 94 | 95 | uploaded_file = st.file_uploader("Choose a PDF file...", type=["pdf"]) 96 | if uploaded_file is not None: 97 | reader = DocumentReader() 98 | with st.spinner("Reading PDF..."): 99 | with tempfile.NamedTemporaryFile(delete=False, suffix=".pdf") as temp_file: 100 | temp_file.write(uploaded_file.read()) 101 | temp_file_path = temp_file.name 102 | 103 | text = reader.read(temp_file_path) 104 | text_embedding = self.embeddings.embed(text) 105 | self.vector_store.add(text, text_embedding) 106 | 107 | message_container = st.container() 108 | 109 | prompt = st.chat_input("What would you like to know about the document?") 110 | 111 | if "example_query" in st.session_state: 112 | prompt = st.session_state.pop("example_query") 113 | 114 | with message_container: 115 | for message in st.session_state.messages: 116 | with st.chat_message(message["role"]): 117 | st.markdown(message["content"]) 118 | 119 | if prompt: 120 | if uploaded_file is None: 121 | st.warning("Please upload a PDF file first!") 122 | return 123 | 124 | with message_container: 125 | with st.chat_message("user"): 126 | st.markdown(prompt) 127 | st.session_state.messages.append({"role": "user", "content": prompt}) 128 | 129 | with st.chat_message("assistant"): 130 | with st.spinner("Analyzing document..."): 131 | query_embedding = self.embeddings.embed(prompt) 132 | relevant_texts = self.vector_store.query(query_embedding, k=3) 133 | context = "\n".join(relevant_texts) 134 | full_prompt = f"{self.custom_instructions}\nBased on the following context, {prompt}\n\nContext: {context}" 135 | response = self.llm.generate(full_prompt) 136 | st.markdown(response) 137 | st.session_state.messages.append({"role": "assistant", "content": response}) -------------------------------------------------------------------------------- /tests/test_streamlit_chatbot.py: -------------------------------------------------------------------------------- 1 | # empire chain 2 | from empire_chain.streamlit import Chatbot, VisionChatbot, PDFChatbot 3 | from empire_chain.llms.llms import OpenAILLM 4 | from empire_chain.vector_stores import QdrantVectorStore 5 | from empire_chain.embeddings import OpenAIEmbeddings 6 | import unittest 7 | import streamlit as st 8 | from unittest.mock import MagicMock, patch 9 | from PIL import Image 10 | import io 11 | import numpy as np 12 | 13 | class TestStreamlitChatbot(unittest.TestCase): 14 | def setUp(self): 15 | self.llm = OpenAILLM("gpt-4o-mini") 16 | 17 | def test_chatbot_initialization(self): 18 | chatbot = Chatbot(llm=self.llm, title="Test Chatbot") 19 | self.assertEqual(chatbot.title, "Test Chatbot") 20 | self.assertEqual(chatbot.llm, self.llm) 21 | self.assertTrue(chatbot.chat_history) 22 | 23 | def test_chatbot_initialization_no_history(self): 24 | chatbot = Chatbot(llm=self.llm, title="Test Chatbot", chat_history=False) 25 | self.assertFalse(chatbot.chat_history) 26 | 27 | @patch('streamlit.chat_input') 28 | @patch('streamlit.chat_message') 29 | def test_chatbot_chat_flow(self, mock_chat_message, mock_chat_input): 30 | chatbot = Chatbot(llm=self.llm, title="Test Chatbot") 31 | mock_chat_input.return_value = "Hello" 32 | mock_response = MagicMock() 33 | mock_response.markdown = MagicMock() 34 | mock_chat_message.return_value.__enter__.return_value = mock_response 35 | 36 | with patch.object(self.llm, 'generate', return_value="Hi there!"): 37 | chatbot.chat() 38 | self.llm.generate.assert_called_once() 39 | 40 | class TestVisionChatbot(unittest.TestCase): 41 | def setUp(self): 42 | self.test_image = Image.new('RGB', (100, 100), color='red') 43 | 44 | def test_vision_chatbot_initialization(self): 45 | chatbot = VisionChatbot(title="Test Vision Chatbot") 46 | self.assertEqual(chatbot.title, "Test Vision Chatbot") 47 | self.assertTrue(chatbot.chat_history) 48 | 49 | def test_vision_chatbot_initialization_no_history(self): 50 | chatbot = VisionChatbot(title="Test Vision Chatbot", chat_history=False) 51 | self.assertFalse(chatbot.chat_history) 52 | 53 | def test_convert_image_to_base64(self): 54 | chatbot = VisionChatbot(title="Test Vision Chatbot") 55 | base64_image = chatbot.convert_image_to_base64(self.test_image) 56 | self.assertTrue(base64_image.startswith("data:image/png;base64,")) 57 | 58 | @patch('streamlit.file_uploader') 59 | @patch('streamlit.chat_input') 60 | @patch('streamlit.chat_message') 61 | def test_vision_chatbot_chat_flow(self, mock_chat_message, mock_chat_input, mock_file_uploader): 62 | chatbot = VisionChatbot(title="Test Vision Chatbot") 63 | 64 | mock_file = MagicMock() 65 | mock_file.read = MagicMock(return_value=self.test_image.tobytes()) 66 | mock_file_uploader.return_value = mock_file 67 | 68 | mock_chat_input.return_value = "What's in this image?" 69 | mock_response = MagicMock() 70 | mock_response.markdown = MagicMock() 71 | mock_chat_message.return_value.__enter__.return_value = mock_response 72 | 73 | with patch.object(Image, 'open', return_value=self.test_image), \ 74 | patch.object(chatbot, 'process_image_query', return_value="I see a red image"): 75 | chatbot.chat() 76 | chatbot.process_image_query.assert_called_once() 77 | 78 | class TestPDFChatbot(unittest.TestCase): 79 | def setUp(self): 80 | self.llm = OpenAILLM("gpt-4-mini") 81 | self.vector_store = QdrantVectorStore() 82 | self.embeddings = OpenAIEmbeddings(model="text-embedding-3-small") 83 | # Create a mock embedding vector of correct size (1536) 84 | self.mock_embedding = np.random.rand(1536).tolist() 85 | 86 | def test_pdf_chatbot_initialization(self): 87 | chatbot = PDFChatbot( 88 | title="Test PDF Chatbot", 89 | llm=self.llm, 90 | vector_store=self.vector_store, 91 | embeddings=self.embeddings 92 | ) 93 | self.assertEqual(chatbot.title, "Test PDF Chatbot") 94 | self.assertEqual(chatbot.llm, self.llm) 95 | self.assertEqual(chatbot.vector_store, self.vector_store) 96 | self.assertEqual(chatbot.embeddings, self.embeddings) 97 | self.assertTrue(chatbot.chat_history) 98 | 99 | def test_pdf_chatbot_initialization_no_history(self): 100 | chatbot = PDFChatbot( 101 | title="Test PDF Chatbot", 102 | llm=self.llm, 103 | vector_store=self.vector_store, 104 | embeddings=self.embeddings, 105 | chat_history=False 106 | ) 107 | self.assertFalse(chatbot.chat_history) 108 | 109 | @patch('streamlit.file_uploader') 110 | @patch('streamlit.chat_input') 111 | @patch('streamlit.chat_message') 112 | def test_pdf_chatbot_chat_flow(self, mock_chat_message, mock_chat_input, mock_file_uploader): 113 | chatbot = PDFChatbot( 114 | title="Test PDF Chatbot", 115 | llm=self.llm, 116 | vector_store=self.vector_store, 117 | embeddings=self.embeddings 118 | ) 119 | 120 | mock_file = MagicMock() 121 | mock_file.read = MagicMock(return_value=b"fake pdf content") 122 | mock_file_uploader.return_value = mock_file 123 | 124 | mock_chat_input.return_value = "What's in this document?" 125 | mock_response = MagicMock() 126 | mock_response.markdown = MagicMock() 127 | mock_chat_message.return_value.__enter__.return_value = mock_response 128 | 129 | with patch('empire_chain.file_reader.DocumentReader.read', return_value="Test document content"), \ 130 | patch.object(self.embeddings, 'embed', return_value=self.mock_embedding), \ 131 | patch.object(self.vector_store, 'query', return_value=["Relevant text"]), \ 132 | patch.object(self.llm, 'generate', return_value="This is a test document"): 133 | chatbot.chat() 134 | self.llm.generate.assert_called_once() 135 | 136 | if __name__ == "__main__": 137 | unittest.main() -------------------------------------------------------------------------------- /empire_chain/agent/agent.py: -------------------------------------------------------------------------------- 1 | from typing import Any, Callable, Dict, List, get_type_hints 2 | import json 3 | from datetime import datetime 4 | import inspect 5 | import requests 6 | from empire_chain.llms.llms import GroqLLM 7 | 8 | class FunctionRegistry: 9 | def __init__(self): 10 | self.functions: Dict[str, Callable] = {} 11 | self.descriptions: Dict[str, Dict[str, Any]] = {} 12 | 13 | def _extract_function_metadata(self, func: Callable) -> Dict[str, Any]: 14 | """Extract function metadata using introspection""" 15 | sig = inspect.signature(func) 16 | 17 | doc = inspect.getdoc(func) or "No description available" 18 | description = doc.split("\n")[0] 19 | 20 | type_hints = get_type_hints(func) 21 | parameters = [] 22 | for param_name, param in sig.parameters.items(): 23 | if param.default == inspect.Parameter.empty: 24 | param_type = type_hints.get(param_name, Any).__name__ 25 | parameters.append({ 26 | "name": param_name, 27 | "type": param_type, 28 | "required": True 29 | }) 30 | else: 31 | param_type = type_hints.get(param_name, Any).__name__ 32 | parameters.append({ 33 | "name": param_name, 34 | "type": param_type, 35 | "required": False, 36 | "default": param.default 37 | }) 38 | 39 | return { 40 | "name": func.__name__, 41 | "description": description, 42 | "parameters": parameters, 43 | "full_docstring": doc 44 | } 45 | 46 | def register(self, func: Callable): 47 | """Register a function with automatically extracted metadata""" 48 | metadata = self._extract_function_metadata(func) 49 | self.functions[metadata["name"]] = func 50 | self.descriptions[metadata["name"]] = metadata 51 | 52 | def list_functions(self) -> List[str]: 53 | """List all registered function names""" 54 | return list(self.functions.keys()) 55 | 56 | class Agent: 57 | def __init__(self, model: str = "llama3-8b-8192"): 58 | self.llm = GroqLLM(model=model) 59 | self.registry = FunctionRegistry() 60 | 61 | def register_function(self, func: Callable): 62 | """Register a function that the agent can call""" 63 | self.registry.register(func) 64 | 65 | def _create_function_prompt(self, query: str) -> str: 66 | functions_json = json.dumps(self.registry.descriptions, indent=2) 67 | return f"""You are a function router that maps user queries to the most appropriate function. Your response must be a valid JSON object. 68 | 69 | User Query: {query} 70 | 71 | Available Functions (with metadata): 72 | {functions_json} 73 | 74 | Instructions: 75 | 1. Analyze the user query and available functions 76 | 2. Select the most appropriate function based on its description and parameters 77 | 3. Extract parameter values from the query, respecting parameter types 78 | 4. Return a JSON object in EXACTLY this format, with NO ADDITIONAL WHITESPACE or FORMATTING: 79 | {{"function":"","parameters":{{"":""}},"reasoning":""}} 80 | 81 | Critical Rules: 82 | - Response must be a SINGLE LINE of valid JSON 83 | - NO line breaks, NO extra spaces 84 | - NO markdown formatting or code blocks 85 | - ALL strings must use double quotes 86 | - Function name must be from available functions 87 | - ALL required parameters must be included 88 | - Parameter values must match the expected type 89 | - Reasoning must be brief and single-line 90 | 91 | Example Valid Response: 92 | {{"function":"get_weather","parameters":{{"location":"New York"}},"reasoning":"Query asks about weather in a specific location"}} 93 | 94 | Response (SINGLE LINE JSON):""" 95 | 96 | def _clean_json_response(self, response: str) -> str: 97 | """Clean and validate JSON response""" 98 | response = response.strip() 99 | if "```" in response: 100 | response = response.split("```")[1] 101 | if response.startswith("json"): 102 | response = response[4:] 103 | response = response.strip() 104 | 105 | response = " ".join(response.split()) 106 | 107 | try: 108 | parsed = json.loads(response) 109 | return json.dumps(parsed, separators=(',', ':')) 110 | except json.JSONDecodeError: 111 | raise ValueError(f"Invalid JSON response: {response[:100]}...") 112 | 113 | def process_query(self, query: str) -> Any: 114 | """Process a natural language query and route it to appropriate function""" 115 | if not self.registry.functions: 116 | raise ValueError("No functions registered with the agent") 117 | 118 | prompt = self._create_function_prompt(query) 119 | response = self.llm.generate(prompt) 120 | 121 | try: 122 | cleaned_response = self._clean_json_response(response) 123 | result = json.loads(cleaned_response) 124 | 125 | func_name = result["function"] 126 | parameters = result["parameters"] 127 | reasoning = result.get("reasoning", "No reasoning provided") 128 | 129 | if func_name not in self.registry.functions: 130 | raise ValueError(f"Function {func_name} not found. Available functions: {', '.join(self.registry.list_functions())}") 131 | 132 | func_metadata = self.registry.descriptions[func_name] 133 | for param in func_metadata["parameters"]: 134 | if param["required"] and param["name"] not in parameters: 135 | raise ValueError(f"Missing required parameter: {param['name']}") 136 | 137 | func = self.registry.functions[func_name] 138 | return { 139 | "result": func(**parameters), 140 | "function_called": func_name, 141 | "parameters_used": parameters, 142 | "reasoning": reasoning 143 | } 144 | 145 | except json.JSONDecodeError as e: 146 | raise ValueError(f"Failed to parse LLM response as JSON. Response: {response[:100]}... Error: {str(e)}") 147 | except KeyError as e: 148 | raise ValueError(f"Missing required field in LLM response: {e}") 149 | except Exception as e: 150 | raise ValueError(f"Error processing query: {str(e)}") -------------------------------------------------------------------------------- /empire_chain/cool_stuff/podcast.py: -------------------------------------------------------------------------------- 1 | import soundfile as sf 2 | from kokoro_onnx import Kokoro 3 | import numpy as np 4 | import random 5 | from groq import Groq 6 | from dotenv import load_dotenv 7 | import json 8 | import os 9 | import requests 10 | from pathlib import Path 11 | import tempfile 12 | from tqdm import tqdm 13 | 14 | 15 | class GeneratePodcast: 16 | def __init__(self): 17 | load_dotenv() 18 | pass 19 | 20 | def download_required_files(self): 21 | """Download required model and voices files if they don't exist.""" 22 | files = { 23 | "kokoro-v0_19.onnx": "https://github.com/thewh1teagle/kokoro-onnx/releases/download/model-files/kokoro-v0_19.onnx", 24 | "voices.bin": "https://github.com/nazdridoy/kokoro-tts/releases/download/v1.0.0/voices-v1.0.bin", 25 | "voices.json": "https://github.com/thewh1teagle/kokoro-onnx/releases/download/model-files/voices.json" # fallback 26 | } 27 | 28 | for filename, url in files.items(): 29 | # Skip voices.json if we already have voices.bin 30 | if filename == "voices.json" and os.path.exists("voices.bin"): 31 | continue 32 | 33 | if not os.path.exists(filename): 34 | print(f"Downloading {filename} from {url}...") 35 | response = requests.get(url, stream=True) 36 | total_size = int(response.headers.get('content-length', 0)) 37 | block_size = 1024 38 | 39 | with open(filename, "wb") as f, tqdm( 40 | desc=filename, 41 | total=total_size, 42 | unit='iB', 43 | unit_scale=True, 44 | unit_divisor=1024, 45 | ) as bar: 46 | for data in response.iter_content(block_size): 47 | f.write(data) 48 | bar.update(len(data)) 49 | print(f"Downloaded {filename} successfully.") 50 | else: 51 | print(f"{filename} already exists, skipping download.") 52 | 53 | def load_dotenv(self): 54 | load_dotenv() 55 | 56 | def client(self, topic: str): 57 | client = Groq() 58 | completion = client.chat.completions.create( 59 | model="llama3-8b-8192", 60 | messages=[ 61 | { 62 | "role": "system", 63 | "content": """You are a podcast scriptwriter for an engaging multi-host podcast show. Create a natural, flowing conversation between 2-3 hosts discussing the given topic. Follow these guidelines: 64 | 65 | 1. Structure: 66 | - Start with a warm welcome and topic introduction 67 | - Have a structured discussion with clear segments 68 | - End with a conclusion and sign-off 69 | 70 | 2. Host Personalities (use these consistently): 71 | - af_sarah: The main host/moderator who guides the conversation 72 | - am_michael: The expert/analyst who provides deep insights 73 | - af_bella: The engaging co-host who asks good questions (optional) 74 | 75 | 3. Make the conversation natural by: 76 | - Including casual reactions ("That's fascinating!", "I agree", etc.) 77 | - Having hosts build on each other's points 78 | - Including brief personal anecdotes 79 | - Using conversational language, not formal speech 80 | 81 | 4. Keep each speaking turn relatively brief (1-3 sentences) to maintain flow. 82 | 83 | Available voices: af, af_bella, af_sarah, af_sky, am_adam, am_michael, bf_emma, bf_isabella, bm_george, bm_lewis 84 | 85 | Return only a JSON array of conversation turns, like: 86 | [ 87 | { 88 | "voice": "af_sarah", 89 | "text": "Welcome to The Deep Dive! Today we're exploring [topic], and I'm thrilled to discuss this with our experts." 90 | }, 91 | { 92 | "voice": "am_michael", 93 | "text": "Thanks Sarah! This is such an interesting topic, and I've actually been researching it recently." 94 | }, 95 | ... 96 | ]""" 97 | }, 98 | { 99 | "role": "user", 100 | "content": f"My topic is '{topic}'." 101 | } 102 | ], 103 | temperature=0.5, 104 | max_completion_tokens=8000, 105 | ) 106 | return completion 107 | 108 | def random_pause(self, sample_rate, min_duration=0.5, max_duration=2.0): 109 | silence_duration = random.uniform(min_duration, max_duration) 110 | silence = np.zeros(int(silence_duration * sample_rate)) 111 | return silence 112 | 113 | def _clean_json_response(self, response: str) -> str: 114 | """Clean and validate JSON response""" 115 | response = response.strip() 116 | 117 | # Try to extract content between square brackets 118 | try: 119 | start_idx = response.index('[') 120 | end_idx = response.rindex(']') + 1 121 | response = response[start_idx:end_idx] 122 | except ValueError: 123 | raise ValueError(f"Could not find JSON array in response: {response[:100]}...") 124 | 125 | try: 126 | parsed = json.loads(response) 127 | return json.dumps(parsed, separators=(',', ':')) 128 | except json.JSONDecodeError: 129 | raise ValueError(f"Invalid JSON response: {response[:100]}...") 130 | 131 | def generate(self, topic: str): 132 | self.download_required_files() # Download required files first 133 | 134 | # Use voices.bin if available, otherwise use voices.json 135 | voices_path = "voices.bin" if os.path.exists("voices.bin") else "voices.json" 136 | 137 | # Initialize Kokoro with the model file and voices path 138 | kokoro = Kokoro("kokoro-v0_19.onnx", voices_path) 139 | 140 | audio = [] 141 | completion = self.client(topic) 142 | response_content = completion.choices[0].message.content 143 | cleaned_json = self._clean_json_response(response_content) 144 | sentences = json.loads(cleaned_json) 145 | 146 | with tqdm(total=len(sentences), desc="Generating Audio", unit="sentence") as pbar: 147 | for sentence in sentences: 148 | voice = sentence["voice"] 149 | text = sentence["text"] 150 | 151 | samples, sample_rate = kokoro.create( 152 | text, 153 | voice=voice, 154 | speed=1.0, 155 | lang="en-us", 156 | ) 157 | audio.append(samples) 158 | audio.append(self.random_pause(sample_rate)) 159 | 160 | pbar.update(1) 161 | 162 | audio = np.concatenate(audio) 163 | sf.write("podcast.wav", audio, sample_rate) 164 | return "The podcast has been created successfully" -------------------------------------------------------------------------------- /docs/user-guide/core-concepts.md: -------------------------------------------------------------------------------- 1 | # Core Concepts 2 | 3 | Empire Chain is built around several core concepts that work together to provide a comprehensive AI development framework. 4 | 5 | ## Language Models (LLMs) 6 | 7 | Empire Chain supports multiple LLM providers through a unified interface: 8 | 9 | ```python 10 | from empire_chain.llms import OpenAILLM, AnthropicLLM, GroqLLM 11 | 12 | # OpenAI 13 | openai_llm = OpenAILLM("gpt-4") 14 | 15 | # Anthropic 16 | anthropic_llm = AnthropicLLM("claude-3-sonnet") 17 | 18 | # Groq 19 | groq_llm = GroqLLM("mixtral-8x7b") 20 | ``` 21 | 22 | Each LLM implementation provides consistent methods: 23 | - `generate()`: Generate text based on a prompt 24 | - Error handling and retry logic 25 | - Streaming support where available 26 | 27 | ## Vector Stores 28 | 29 | Vector stores are used for efficient similarity search and retrieval: 30 | 31 | ```python 32 | from empire_chain.vector_stores import QdrantVectorStore, ChromaVectorStore 33 | 34 | # In-memory Qdrant store 35 | qdrant_store = QdrantVectorStore(":memory:") 36 | 37 | # Persistent ChromaDB store 38 | chroma_store = ChromaVectorStore() 39 | ``` 40 | 41 | Common operations: 42 | - `add()`: Add text and embeddings 43 | - `query()`: Retrieve similar documents 44 | - `delete()`: Remove documents 45 | - `clear()`: Reset the store 46 | 47 | ## Embeddings 48 | 49 | Embeddings convert text into vector representations: 50 | 51 | ```python 52 | from empire_chain.embeddings import OpenAIEmbeddings 53 | 54 | embeddings = OpenAIEmbeddings("text-embedding-3-small") 55 | vector = embeddings.embed("Your text here") 56 | ``` 57 | 58 | Features: 59 | - Batched processing 60 | - Caching support 61 | - Error handling 62 | 63 | ## Document Processing 64 | 65 | The document processing system handles various file formats: 66 | 67 | ```python 68 | from empire_chain.file_reader import DocumentReader 69 | 70 | reader = DocumentReader() 71 | text = reader.read("document.pdf") # Supports PDF, DOCX, etc. 72 | ``` 73 | 74 | Capabilities: 75 | - PDF processing with PyPDF2 76 | - Word document processing with python-docx 77 | - Text extraction and cleaning 78 | - Metadata handling 79 | 80 | ## Speech Processing 81 | 82 | Speech-to-Text capabilities are provided through various models: 83 | 84 | ```python 85 | from empire_chain.stt import GroqSTT 86 | 87 | stt = GroqSTT() 88 | text = stt.transcribe("audio.mp3") 89 | ``` 90 | 91 | Features: 92 | - Audio file support 93 | - Real-time transcription 94 | - Multiple language support 95 | 96 | ## Web Crawling 97 | 98 | Web content extraction is handled through crawl4ai: 99 | 100 | ```python 101 | from empire_chain.crawl4ai import Crawler 102 | 103 | crawler = Crawler() 104 | data = crawler.crawl("https://example.com") 105 | ``` 106 | 107 | Capabilities: 108 | - HTML parsing 109 | - Content extraction 110 | - Rate limiting 111 | - Error handling 112 | 113 | ## Data Visualization 114 | 115 | The visualization system provides tools for data analysis: 116 | 117 | ```python 118 | from empire_chain.visualizer import DataAnalyzer, ChartFactory 119 | 120 | analyzer = DataAnalyzer() 121 | data = analyzer.analyze(your_data) 122 | chart = ChartFactory.create_chart('Bar Graph', data) 123 | ``` 124 | 125 | Chart types: 126 | - Bar graphs 127 | - Line charts 128 | - Scatter plots 129 | - Custom visualizations 130 | 131 | ## Interactive Interfaces 132 | 133 | Streamlit-based interfaces for various applications: 134 | 135 | ```python 136 | from empire_chain.streamlit import Chatbot, VisionChatbot, PDFChatbot 137 | 138 | # Text chatbot 139 | chatbot = Chatbot(llm=OpenAILLM("gpt-4")) 140 | 141 | # Vision chatbot 142 | vision_bot = VisionChatbot() 143 | 144 | # PDF chatbot 145 | pdf_bot = PDFChatbot( 146 | llm=OpenAILLM("gpt-4"), 147 | vector_store=QdrantVectorStore(":memory:") 148 | ) 149 | ``` 150 | 151 | Features: 152 | - File upload 153 | - Interactive chat 154 | - Real-time responses 155 | - Error handling 156 | 157 | ## PhiData Agents 158 | 159 | Specialized agents for specific tasks: 160 | 161 | ```python 162 | from empire_chain.phidata_agents import PhiWebAgent, PhiFinanceAgent 163 | 164 | web_agent = PhiWebAgent() 165 | finance_agent = PhiFinanceAgent() 166 | ``` 167 | 168 | Capabilities: 169 | - Web search and analysis 170 | - Financial data processing 171 | - Task automation 172 | - Structured output 173 | 174 | ## Document Analysis 175 | 176 | Advanced document analysis with Docling: 177 | 178 | ```python 179 | from empire_chain.docling import Docling 180 | 181 | docling = Docling() 182 | analysis = docling.generate("Analyze this document") 183 | ``` 184 | 185 | Features: 186 | - Content analysis 187 | - Topic extraction 188 | - Summary generation 189 | - Key point identification 190 | 191 | ## Processing Pipeline 192 | 193 | The processing pipeline consists of several stages: 194 | 195 | 1. **Input Processing** 196 | - Document loading 197 | - Format detection 198 | - Initial preprocessing 199 | 200 | 2. **Content Extraction** 201 | - Text extraction 202 | - Structure analysis 203 | - Metadata collection 204 | 205 | 3. **Analysis** 206 | - Content analysis 207 | - Feature extraction 208 | - Entity recognition 209 | 210 | 4. **Output Generation** 211 | - Response formatting 212 | - Result compilation 213 | - Export handling 214 | 215 | ## Visualization System 216 | 217 | The visualization system provides tools for: 218 | 219 | - Data plotting 220 | - Process monitoring 221 | - Result analysis 222 | - Interactive dashboards 223 | 224 | ## RAG Architecture 225 | 226 | The RAG (Retrieval Augmented Generation) system consists of: 227 | 228 | ### Components 229 | 1. **Document Indexer** 230 | - Processes and indexes documents 231 | - Creates searchable representations 232 | 233 | 2. **Retriever** 234 | - Searches for relevant information 235 | - Ranks and filters results 236 | 237 | 3. **Generator** 238 | - Combines retrieved information 239 | - Generates coherent responses 240 | 241 | ### Flow 242 | ```mermaid 243 | graph LR 244 | A[Input Query] --> B[Retriever] 245 | B --> C[Context Selection] 246 | C --> D[Generator] 247 | D --> E[Response] 248 | ``` 249 | 250 | ## Error Handling 251 | 252 | Empire Chain uses a hierarchical error system: 253 | 254 | ```python 255 | from empire_chain.exceptions import ( 256 | EmpireChainError, 257 | DocumentError, 258 | ModelError, 259 | ConfigError 260 | ) 261 | ``` 262 | 263 | ## Configuration System 264 | 265 | ### Levels of Configuration 266 | 1. **Global Configuration** 267 | - System-wide settings 268 | - Default behaviors 269 | 270 | 2. **Component Configuration** 271 | - Component-specific settings 272 | - Override capabilities 273 | 274 | 3. **Runtime Configuration** 275 | - Dynamic settings 276 | - Session-specific overrides 277 | 278 | ## Event System 279 | 280 | The event system allows for: 281 | 282 | - Progress monitoring 283 | - Status updates 284 | - Error tracking 285 | - Custom callbacks 286 | 287 | ```python 288 | from empire_chain.events import EventHandler 289 | 290 | def on_document_processed(event): 291 | print(f"Processed: {event.document_id}") 292 | 293 | handler = EventHandler() 294 | handler.subscribe("document_processed", on_document_processed) 295 | ``` 296 | 297 | ## Extension System 298 | 299 | Empire Chain can be extended through: 300 | 301 | 1. **Custom Processors** 302 | 2. **Model Adapters** 303 | 3. **Pipeline Stages** 304 | 4. **Visualization Components** 305 | 306 | Example of a custom processor: 307 | 308 | ```python 309 | from empire_chain.processors import BaseProcessor 310 | 311 | class CustomProcessor(BaseProcessor): 312 | def process(self, document): 313 | # Custom processing logic 314 | return processed_document 315 | ``` -------------------------------------------------------------------------------- /empire_chain/prompt_templates/templates.py: -------------------------------------------------------------------------------- 1 | from typing import List, Optional 2 | 3 | # Medical Templates 4 | MEDICAL_ANALYSIS_TEMPLATE = """ 5 | Patient Information: 6 | {patient_info} 7 | 8 | Symptoms: {symptoms} 9 | Medical History: {medical_history} 10 | 11 | Please provide: 12 | 1. Potential diagnoses to consider 13 | 2. Recommended tests or examinations 14 | 3. Treatment considerations 15 | 4. Important precautions or warnings 16 | 17 | Note: This is for educational purposes only and should not replace professional medical advice.""" 18 | 19 | MEDICAL_RESEARCH_TEMPLATE = """ 20 | Research Topic: {topic} 21 | Current Knowledge: {background} 22 | 23 | Please analyze the latest research and evidence regarding this medical topic, including: 24 | 1. Key findings from recent studies 25 | 2. Clinical implications 26 | 3. Areas of consensus and controversy 27 | 4. Future research directions""" 28 | 29 | # Reasoning Templates 30 | LOGICAL_ANALYSIS_TEMPLATE = """ 31 | Problem Statement: {problem} 32 | Given Information: {context} 33 | 34 | Please provide: 35 | 1. Step-by-step logical analysis 36 | 2. Key assumptions identified 37 | 3. Potential alternative perspectives 38 | 4. Conclusion based on available evidence""" 39 | 40 | CRITICAL_THINKING_TEMPLATE = """ 41 | Scenario: {scenario} 42 | Question: {question} 43 | 44 | Please analyze using: 45 | 1. Fact vs opinion differentiation 46 | 2. Evidence evaluation 47 | 3. Logical fallacy identification 48 | 4. Structured argumentation""" 49 | 50 | # Financial Templates 51 | FINANCIAL_ANALYSIS_TEMPLATE = """ 52 | Financial Data: 53 | {financial_data} 54 | 55 | Analysis Request: {analysis_type} 56 | 57 | Please provide: 58 | 1. Key financial metrics and ratios 59 | 2. Trend analysis 60 | 3. Risk assessment 61 | 4. Recommendations 62 | 5. Important disclaimers""" 63 | 64 | INVESTMENT_TEMPLATE = """ 65 | Investment Vehicle: {investment_type} 66 | Market Context: {market_conditions} 67 | Risk Profile: {risk_tolerance} 68 | 69 | Please analyze: 70 | 1. Potential returns and risks 71 | 2. Market factors to consider 72 | 3. Strategic considerations 73 | 4. Important warnings and disclaimers""" 74 | 75 | # Educational Templates 76 | LESSON_PLAN_TEMPLATE = """ 77 | Subject: {subject} 78 | Grade Level: {grade_level} 79 | Duration: {duration} 80 | 81 | Please create a lesson plan including: 82 | 1. Learning objectives 83 | 2. Required materials 84 | 3. Introduction/hook 85 | 4. Main activities 86 | 5. Assessment methods 87 | 6. Extensions/modifications""" 88 | 89 | CONCEPT_EXPLANATION_TEMPLATE = """ 90 | Topic: {topic} 91 | Student Level: {level} 92 | Prior Knowledge: {prerequisites} 93 | 94 | Please provide: 95 | 1. Clear explanation using appropriate language 96 | 2. Relevant examples and analogies 97 | 3. Common misconceptions 98 | 4. Practice problems or applications""" 99 | 100 | # Creative Writing Templates 101 | STORY_GENERATION_TEMPLATE = """ 102 | Genre: {genre} 103 | Theme: {theme} 104 | Key Elements: {elements} 105 | 106 | Please create: 107 | 1. Engaging plot outline 108 | 2. Character descriptions 109 | 3. Setting details 110 | 4. Key story beats 111 | 5. Thematic elements""" 112 | 113 | CREATIVE_PROMPT_TEMPLATE = """ 114 | Creative Form: {form} 115 | Style: {style} 116 | Requirements: {requirements} 117 | 118 | Please generate creative content that: 119 | 1. Matches the specified form and style 120 | 2. Incorporates required elements 121 | 3. Maintains originality and engagement""" 122 | 123 | # Blog Writing Templates 124 | BLOG_POST_TEMPLATE = """ 125 | Topic: {topic} 126 | Target Audience: {audience} 127 | Purpose: {purpose} 128 | 129 | Please create a blog post structure with: 130 | 1. Attention-grabbing headline 131 | 2. Engaging introduction 132 | 3. Main content points 133 | 4. Call to action 134 | 5. SEO considerations""" 135 | 136 | CONTENT_STRATEGY_TEMPLATE = """ 137 | Blog Focus: {focus} 138 | Target Demographics: {demographics} 139 | Goals: {goals} 140 | 141 | Please provide: 142 | 1. Content pillars 143 | 2. Topic clusters 144 | 3. Content calendar suggestions 145 | 4. Engagement strategies""" 146 | 147 | # Coding Templates 148 | CODE_REVIEW_TEMPLATE = """ 149 | Language: {language} 150 | Code Context: {context} 151 | Code Block: 152 | {code} 153 | 154 | Please review for: 155 | 1. Code quality and best practices 156 | 2. Potential bugs or issues 157 | 3. Performance considerations 158 | 4. Security concerns 159 | 5. Suggested improvements""" 160 | 161 | ARCHITECTURE_DESIGN_TEMPLATE = """ 162 | Project Type: {project_type} 163 | Requirements: {requirements} 164 | Constraints: {constraints} 165 | 166 | Please provide: 167 | 1. System architecture overview 168 | 2. Component relationships 169 | 3. Technical considerations 170 | 4. Implementation guidelines 171 | 5. Potential challenges""" 172 | 173 | class MedicalAnalysis: 174 | patient_info: str 175 | symptoms: str 176 | medical_history: str 177 | 178 | def __str__(self): 179 | return format_prompt(MEDICAL_ANALYSIS_TEMPLATE, 180 | patient_info=self.patient_info, 181 | symptoms=self.symptoms, 182 | medical_history=self.medical_history) 183 | 184 | class MedicalResearch: 185 | topic: str 186 | background: str 187 | 188 | def __str__(self): 189 | return format_prompt(MEDICAL_RESEARCH_TEMPLATE, 190 | topic=self.topic, 191 | background=self.background) 192 | 193 | class LogicalAnalysis: 194 | problem: str 195 | context: str 196 | 197 | def __str__(self): 198 | return format_prompt(LOGICAL_ANALYSIS_TEMPLATE, 199 | problem=self.problem, 200 | context=self.context) 201 | 202 | class CriticalThinking: 203 | scenario: str 204 | question: str 205 | 206 | def __str__(self): 207 | return format_prompt(CRITICAL_THINKING_TEMPLATE, 208 | scenario=self.scenario, 209 | question=self.question) 210 | 211 | class FinancialAnalysis: 212 | financial_data: str 213 | analysis_type: str 214 | 215 | def __str__(self): 216 | return format_prompt(FINANCIAL_ANALYSIS_TEMPLATE, 217 | financial_data=self.financial_data, 218 | analysis_type=self.analysis_type) 219 | 220 | class CodeReview: 221 | language: str 222 | context: str 223 | code: str 224 | 225 | def __str__(self): 226 | return format_prompt(CODE_REVIEW_TEMPLATE, 227 | language=self.language, 228 | context=self.context, 229 | code=self.code) 230 | 231 | # Add list of available templates 232 | available_templates = [ 233 | MedicalAnalysis, 234 | MedicalResearch, 235 | LogicalAnalysis, 236 | CriticalThinking, 237 | FinancialAnalysis, 238 | CodeReview 239 | ] 240 | 241 | def format_prompt(template: str, **kwargs) -> str: 242 | """ 243 | Format a prompt template with the provided arguments. 244 | 245 | Args: 246 | template: The prompt template to format 247 | **kwargs: Keyword arguments to fill in the template 248 | 249 | Returns: 250 | str: The formatted prompt 251 | """ 252 | try: 253 | return template.format(**kwargs) 254 | except KeyError as e: 255 | raise ValueError(f"Missing required argument: {e}") 256 | 257 | def combine_prompts(prompts: List[str], separator: str = "\n\n") -> str: 258 | """ 259 | Combine multiple prompts into a single prompt string. 260 | 261 | Args: 262 | prompts: List of prompts to combine 263 | separator: String to use between prompts (default: double newline) 264 | 265 | Returns: 266 | str: Combined prompt string 267 | """ 268 | return separator.join(prompt.strip() for prompt in prompts if prompt.strip()) 269 | -------------------------------------------------------------------------------- /tests/test_vector_stores.py: -------------------------------------------------------------------------------- 1 | # empire chain 2 | from empire_chain.vector_stores.qdrant import QdrantVectorStore, QdrantWrapper 3 | from qdrant_client.models import ( 4 | Distance, 5 | HnswConfigDiff, 6 | VectorParams, 7 | OptimizersConfigDiff, 8 | WalConfigDiff, 9 | QuantizationConfig, 10 | ScalarQuantization, 11 | ScalarType, 12 | ) 13 | from unittest.mock import Mock, MagicMock, patch 14 | import unittest 15 | import uuid 16 | 17 | class TestQdrantVectorStore(unittest.TestCase): 18 | def setUp(self): 19 | """Set up test fixtures before each test method.""" 20 | self.mock_wrapper = Mock() 21 | self.mock_hit = MagicMock() 22 | self.mock_hit.payload = {"text": "Hello, world!"} 23 | self.mock_wrapper.search.return_value = [self.mock_hit] 24 | 25 | # Create a vector store with mocked client 26 | with patch('empire_chain.vector_stores.qdrant.QdrantWrapper') as mock_wrapper_class: 27 | mock_wrapper_class.return_value = self.mock_wrapper 28 | self.vector_store = QdrantVectorStore() 29 | 30 | def test_initialization_default(self): 31 | """Test initialization with default parameters.""" 32 | with patch('empire_chain.vector_stores.qdrant.QdrantWrapper') as mock_wrapper: 33 | store = QdrantVectorStore() 34 | mock_wrapper.assert_called_once_with(None) 35 | mock_wrapper.return_value.create_collection.assert_called_once() 36 | 37 | def test_initialization_custom(self): 38 | """Test initialization with custom parameters.""" 39 | with patch('empire_chain.vector_stores.qdrant.QdrantWrapper') as mock_wrapper: 40 | # Create a proper quantization config using dictionary format 41 | quantization_config = { 42 | "scalar": { 43 | "type": ScalarType.INT8, 44 | "quantile": 0.99, 45 | "always_ram": True 46 | } 47 | } 48 | 49 | store = QdrantVectorStore( 50 | url="localhost:6333", 51 | collection_name="test_collection", 52 | vector_size=768, 53 | distance=Distance.EUCLID, 54 | on_disk=True, 55 | hnsw_config=HnswConfigDiff(m=32), 56 | quantization_config=quantization_config 57 | ) 58 | mock_wrapper.assert_called_once_with("localhost:6333") 59 | mock_wrapper.return_value.create_collection.assert_called_once_with( 60 | name="test_collection", 61 | vector_size=768, 62 | distance=Distance.EUCLID, 63 | on_disk=True, 64 | hnsw_config=HnswConfigDiff(m=32), 65 | quantization_config=quantization_config 66 | ) 67 | 68 | def test_add_text(self): 69 | """Test adding text and embedding.""" 70 | text = "Hello, world!" 71 | embedding = [1.0, 2.0, 3.0] 72 | self.vector_store.add(text, embedding) 73 | 74 | # Verify upsert was called with correct parameters 75 | self.mock_wrapper.upsert.assert_called_once() 76 | call_args = self.mock_wrapper.upsert.call_args 77 | self.assertEqual(call_args[0][0], "default") # collection name 78 | points = call_args[0][1] 79 | self.assertEqual(len(points), 1) 80 | self.assertEqual(points[0].payload["text"], text) 81 | self.assertEqual(points[0].vector, embedding) 82 | self.assertTrue(isinstance(points[0].id, str)) 83 | 84 | def test_query_basic(self): 85 | """Test basic query functionality.""" 86 | query_embedding = [1.0, 2.0, 3.0] 87 | results = self.vector_store.query(query_embedding) 88 | 89 | self.mock_wrapper.search.assert_called_once_with( 90 | collection_name="default", 91 | query_vector=query_embedding, 92 | limit=10, 93 | score_threshold=None, 94 | query_filter=None 95 | ) 96 | self.assertEqual(results, ["Hello, world!"]) 97 | 98 | def test_query_with_parameters(self): 99 | """Test query with custom parameters.""" 100 | query_embedding = [1.0, 2.0, 3.0] 101 | filter_dict = {"category": "test"} 102 | results = self.vector_store.query( 103 | query_embedding=query_embedding, 104 | k=5, 105 | score_threshold=0.8, 106 | filter=filter_dict 107 | ) 108 | 109 | self.mock_wrapper.search.assert_called_once_with( 110 | collection_name="default", 111 | query_vector=query_embedding, 112 | limit=5, 113 | score_threshold=0.8, 114 | query_filter=filter_dict 115 | ) 116 | 117 | def test_query_empty_results(self): 118 | """Test query with no results.""" 119 | self.mock_wrapper.search.return_value = [] 120 | results = self.vector_store.query([1.0, 2.0, 3.0]) 121 | self.assertEqual(results, []) 122 | 123 | def test_query_multiple_results(self): 124 | """Test query with multiple results.""" 125 | mock_hits = [ 126 | MagicMock(payload={"text": "First result"}), 127 | MagicMock(payload={"text": "Second result"}), 128 | MagicMock(payload={"text": "Third result"}) 129 | ] 130 | self.mock_wrapper.search.return_value = mock_hits 131 | 132 | results = self.vector_store.query([1.0, 2.0, 3.0], k=3) 133 | self.assertEqual(results, ["First result", "Second result", "Third result"]) 134 | 135 | def test_error_handling_add(self): 136 | """Test error handling during add operation.""" 137 | # Setup the mock to raise an exception 138 | error_msg = "Upsert failed" 139 | self.mock_wrapper.upsert.side_effect = RuntimeError(error_msg) 140 | 141 | # Test that the exception is properly caught and re-raised 142 | with self.assertRaises(RuntimeError) as context: 143 | self.vector_store.add("test", [1.0, 2.0, 3.0]) 144 | self.assertEqual(str(context.exception), error_msg) 145 | 146 | def test_error_handling_query(self): 147 | """Test error handling during query operation.""" 148 | # Setup the mock to raise an exception 149 | error_msg = "Search failed" 150 | self.mock_wrapper.search.side_effect = RuntimeError(error_msg) 151 | 152 | # Test that the exception is properly caught and re-raised 153 | with self.assertRaises(RuntimeError) as context: 154 | self.vector_store.query([1.0, 2.0, 3.0]) 155 | self.assertEqual(str(context.exception), error_msg) 156 | 157 | def test_wrapper_initialization(self): 158 | """Test QdrantWrapper initialization.""" 159 | with patch('empire_chain.vector_stores.qdrant.QdrantClient') as mock_client: 160 | # Test in-memory initialization 161 | wrapper = QdrantWrapper() 162 | mock_client.assert_called_with(":memory:", prefer_grpc=True, timeout=None) 163 | 164 | # Test URL initialization 165 | wrapper = QdrantWrapper(url="localhost:6333") 166 | mock_client.assert_called_with(url="localhost:6333", prefer_grpc=True, timeout=None) 167 | 168 | # Test with custom parameters 169 | wrapper = QdrantWrapper(url="localhost:6333", prefer_grpc=False, timeout=30) 170 | mock_client.assert_called_with(url="localhost:6333", prefer_grpc=False, timeout=30) 171 | 172 | def test_wrapper_create_collection(self): 173 | """Test QdrantWrapper create_collection method.""" 174 | wrapper = QdrantWrapper() 175 | wrapper.client = Mock() 176 | 177 | # Test with default parameters 178 | wrapper.create_collection("test_collection") 179 | wrapper.client.create_collection.assert_called_once() 180 | 181 | # Test with custom parameters 182 | wrapper.client.reset_mock() 183 | wrapper.create_collection( 184 | name="test_collection", 185 | vector_size=768, 186 | distance=Distance.EUCLID, 187 | on_disk=True 188 | ) 189 | wrapper.client.create_collection.assert_called_once() 190 | 191 | def test_wrapper_error_handling(self): 192 | """Test QdrantWrapper error handling.""" 193 | wrapper = QdrantWrapper() 194 | wrapper.client = Mock() 195 | error_msg = "Creation failed" 196 | wrapper.client.create_collection.side_effect = RuntimeError(error_msg) 197 | 198 | with self.assertRaises(RuntimeError) as context: 199 | wrapper.create_collection("test_collection") 200 | self.assertEqual(str(context.exception), f"Error creating collection: {error_msg}") 201 | 202 | if __name__ == "__main__": 203 | unittest.main() -------------------------------------------------------------------------------- /empire_chain/cool_stuff/visualizer.py: -------------------------------------------------------------------------------- 1 | from empire_chain.llms.llms import GroqLLM 2 | import matplotlib.pyplot as plt 3 | import json 4 | from dotenv import load_dotenv 5 | import os 6 | from abc import ABC, abstractmethod 7 | from typing import Optional, Dict, Any, Union, Tuple 8 | 9 | load_dotenv() 10 | 11 | class DataAnalyzer: 12 | def __init__(self): 13 | self.client = GroqLLM(custom_instructions="""You are a data analysis expert. Your task is to analyze input data and structure it for visualization. 14 | Your response must ALWAYS be a valid JSON object with no additional text, comments, or explanations outside the JSON structure. 15 | 16 | Rules: 17 | 1. Always return valid JSON with either: 18 | Success: {"title": string, "x-axis": string, "y-axis": string, "datapoints": object} 19 | Failure: {"result": null, "error": string} 20 | 2. For numerical data: Extract or calculate meaningful metrics 21 | 3. For categorical data: Generate frequency counts or proportions 22 | 4. For time-series: Preserve chronological order 23 | 5. Clean and standardize inconsistent data 24 | 6. Handle both structured and unstructured text input 25 | 7. If data format is unclear but usable, make reasonable assumptions and note them in the title 26 | 8. If no numerical values can be extracted, return null result 27 | 9. Ensure all datapoint values are numbers, not text""") 28 | 29 | def analyze(self, data: str) -> Dict[str, Any]: 30 | prompt = f"""IMPORTANT: Respond ONLY with a valid JSON object. No other text or explanation. 31 | 32 | Analyze the following data and structure it for visualization. 33 | If the data cannot be meaningfully converted to datapoints or lacks numerical values, return a null result with explanation. 34 | 35 | Required JSON structure (use exactly this format): 36 | {{ 37 | "title": "Descriptive title of the analysis", 38 | "x-axis": "Label for x-axis", 39 | "y-axis": "Label for y-axis", 40 | "datapoints": {{ 41 | "category1": value1, 42 | "category2": value2 43 | }} or [value1, value2, ...] 44 | }} 45 | OR if data cannot be processed: 46 | {{ 47 | "result": null, 48 | "error": "Explanation of why data cannot be processed" 49 | }} 50 | 51 | Input data to analyze: 52 | {data} 53 | 54 | Remember: Return ONLY the JSON object, no other text.""" 55 | 56 | try: 57 | response = self.client.generate(prompt) 58 | response = response.strip() 59 | if response.startswith('```json'): 60 | response = response[7:] 61 | if response.startswith('```'): 62 | response = response[3:] 63 | if response.endswith('```'): 64 | response = response[:-3] 65 | response = response.strip() 66 | 67 | return json.loads(response) 68 | except json.JSONDecodeError as e: 69 | print(f"Failed to parse LLM response as JSON: {response}") 70 | return { 71 | "result": None, 72 | "error": f"Failed to generate valid JSON from analysis: {str(e)}" 73 | } 74 | except Exception as e: 75 | return { 76 | "result": None, 77 | "error": f"Error during data analysis: {str(e)}" 78 | } 79 | 80 | class BaseChart(ABC): 81 | def __init__(self, data_json: Union[str, Dict], fig_ax: Optional[Tuple[plt.Figure, plt.Axes]] = None): 82 | self.data = json.loads(data_json) if isinstance(data_json, str) else data_json 83 | if fig_ax is None: 84 | self.fig, self.ax = plt.subplots(figsize=(10, 6)) 85 | else: 86 | self.fig, self.ax = fig_ax 87 | 88 | @abstractmethod 89 | def plot(self) -> Optional[plt.Figure]: 90 | pass 91 | 92 | def show(self): 93 | """Display the chart.""" 94 | if self.plot(): 95 | plt.show() 96 | 97 | def _validate_data(self) -> bool: 98 | if "result" in self.data and self.data["result"] is None: 99 | print(f"Error in data: {self.data.get('error', 'Unknown error')}") 100 | return False 101 | try: 102 | if isinstance(self.data['datapoints'], dict): 103 | return all(isinstance(v, (int, float)) for v in self.data['datapoints'].values()) 104 | elif isinstance(self.data['datapoints'], list): 105 | return all(isinstance(v, (int, float)) for v in self.data['datapoints']) 106 | except Exception: 107 | return False 108 | return True 109 | 110 | class LineChart(BaseChart): 111 | def plot(self) -> Optional[plt.Figure]: 112 | if not self._validate_data(): 113 | return None 114 | 115 | if isinstance(self.data['datapoints'], dict): 116 | categories = list(self.data['datapoints'].keys()) 117 | values = list(self.data['datapoints'].values()) 118 | else: 119 | categories = [f'Point {i+1}' for i in range(len(self.data['datapoints']))] 120 | values = self.data['datapoints'] 121 | 122 | self.ax.plot(categories, values, marker='o', linewidth=2) 123 | self.ax.set_xlabel(self.data['x-axis']) 124 | self.ax.set_ylabel(self.data['y-axis']) 125 | self.ax.set_title(self.data['title']) 126 | plt.xticks(rotation=45) 127 | plt.tight_layout() 128 | return self.fig 129 | 130 | class PieChart(BaseChart): 131 | def plot(self) -> Optional[plt.Figure]: 132 | if not self._validate_data(): 133 | return None 134 | 135 | required_fields = ['title', 'datapoints'] 136 | for field in required_fields: 137 | if field not in self.data: 138 | raise ValueError(f"Missing required field '{field}' in data") 139 | 140 | if not isinstance(self.data['datapoints'], dict): 141 | raise ValueError("Pie chart requires dictionary-format datapoints") 142 | 143 | categories = list(self.data['datapoints'].keys()) 144 | values = list(self.data['datapoints'].values()) 145 | 146 | if any(v < 0 for v in values): 147 | raise ValueError("Pie chart cannot display negative values. Please use a different chart type for negative data.") 148 | 149 | self.ax.pie(values, labels=categories, autopct='%1.1f%%') 150 | self.ax.set_title(self.data['title']) 151 | plt.tight_layout() 152 | return self.fig 153 | 154 | class BarGraph(BaseChart): 155 | def plot(self) -> Optional[plt.Figure]: 156 | if not self._validate_data(): 157 | return None 158 | 159 | categories = list(self.data['datapoints'].keys()) 160 | values = list(self.data['datapoints'].values()) 161 | 162 | self.ax.bar(categories, values) 163 | self.ax.set_xlabel(self.data['x-axis']) 164 | self.ax.set_ylabel(self.data['y-axis']) 165 | self.ax.set_title(self.data['title']) 166 | plt.xticks(rotation=45) 167 | plt.tight_layout() 168 | return self.fig 169 | 170 | class ScatterChart(BaseChart): 171 | def plot(self) -> Optional[plt.Figure]: 172 | if not self._validate_data(): 173 | return None 174 | 175 | categories = list(self.data['datapoints'].keys()) 176 | values = list(self.data['datapoints'].values()) 177 | 178 | self.ax.scatter(categories, values) 179 | self.ax.set_xlabel(self.data['x-axis']) 180 | self.ax.set_ylabel(self.data['y-axis']) 181 | self.ax.set_title(self.data['title']) 182 | plt.xticks(rotation=45) 183 | plt.tight_layout() 184 | return self.fig 185 | 186 | class Histogram(BaseChart): 187 | def plot(self) -> Optional[plt.Figure]: 188 | if not self._validate_data(): 189 | return None 190 | 191 | values = list(self.data['datapoints'].values()) 192 | 193 | self.ax.hist(values, bins='auto') 194 | self.ax.set_xlabel(self.data['y-axis']) 195 | self.ax.set_ylabel('Frequency') 196 | self.ax.set_title(self.data['title']) 197 | plt.tight_layout() 198 | return self.fig 199 | 200 | class BoxPlot(BaseChart): 201 | def plot(self) -> Optional[plt.Figure]: 202 | if not self._validate_data(): 203 | return None 204 | 205 | values = list(self.data['datapoints'].values()) 206 | 207 | self.ax.boxplot(values) 208 | self.ax.set_ylabel(self.data['y-axis']) 209 | self.ax.set_title(self.data['title']) 210 | plt.tight_layout() 211 | return self.fig 212 | 213 | class ChartFactory: 214 | _chart_types = { 215 | 'Line Chart': LineChart, 216 | 'Pie Chart': PieChart, 217 | 'Bar Graph': BarGraph, 218 | 'Scatter Plot': ScatterChart, 219 | 'Histogram': Histogram, 220 | 'Box Plot': BoxPlot 221 | } 222 | 223 | @classmethod 224 | def create_chart(cls, chart_type: str, data_json: Union[str, Dict], fig_ax: Optional[Tuple[plt.Figure, plt.Axes]] = None) -> Optional[BaseChart]: 225 | chart_class = cls._chart_types.get(chart_type) 226 | if not chart_class: 227 | available_types = "', '".join(cls._chart_types.keys()) 228 | raise ValueError(f"Invalid chart type '{chart_type}'. Available types are: '{available_types}'") 229 | return chart_class(data_json, fig_ax) -------------------------------------------------------------------------------- /empire_chain/playground/compare_llms.py: -------------------------------------------------------------------------------- 1 | from empire_chain.llms.llms import OpenAILLM, AnthropicLLM, GroqLLM, GeminiLLM 2 | import streamlit as st 3 | from empire_chain.prompt_templates import templates 4 | import time 5 | from dataclasses import dataclass 6 | 7 | class LLMPlayground: 8 | def __init__(self): 9 | self.llms = { 10 | "OpenAI": OpenAILLM(), 11 | "Anthropic": AnthropicLLM(), 12 | "Groq": GroqLLM(), 13 | "Gemini": GeminiLLM() 14 | } 15 | 16 | def launch(self): 17 | st.set_page_config(layout="wide") 18 | st.title("🤖 LLM Battle Arena") 19 | 20 | # Add CSS for the battle animation modal and overlay 21 | st.markdown(""" 22 | 56 | """, unsafe_allow_html=True) 57 | 58 | # Create a placeholder for the battle animation with custom CSS 59 | st.markdown(""" 60 | 70 | """, unsafe_allow_html=True) 71 | battle_animation_placeholder = st.empty() 72 | 73 | # Step 1: LLM Selection and API Keys 74 | st.header("1. Select Two LLMs to Compare") 75 | 76 | # Create two columns for LLM selection 77 | col1, col2 = st.columns(2) 78 | 79 | with col1: 80 | st.subheader("First LLM") 81 | llm1 = st.selectbox( 82 | "Select LLM", 83 | options=list(self.llms.keys()), 84 | key="llm1" 85 | ) 86 | api_key1 = st.text_input( 87 | "API Key", 88 | type="password", 89 | help=f"Enter your {llm1} API key", 90 | key="api_key1" 91 | ) 92 | 93 | with col2: 94 | st.subheader("Second LLM") 95 | remaining_llms = [llm for llm in self.llms.keys() if llm != llm1] 96 | llm2 = st.selectbox( 97 | "Select LLM", 98 | options=remaining_llms, 99 | key="llm2" 100 | ) 101 | api_key2 = st.text_input( 102 | "API Key", 103 | type="password", 104 | help=f"Enter your {llm2} API key", 105 | key="api_key2" 106 | ) 107 | 108 | selected_llms = [(llm1, api_key1), (llm2, api_key2)] 109 | 110 | # Step 2: Template Selection 111 | st.header("2. Select Task") 112 | template_names = [template.__name__ for template in templates.available_templates] 113 | selected_template = st.selectbox( 114 | "Choose the task template", 115 | options=template_names 116 | ) 117 | 118 | # Step 3: Input Parameters 119 | if selected_template: 120 | st.header("3. Fill Task Parameters") 121 | template_class = getattr(templates, selected_template) 122 | 123 | # Get the required parameters from class annotations 124 | template_params = {} 125 | 126 | if hasattr(template_class, '__annotations__'): 127 | for param_name, param_type in template_class.__annotations__.items(): 128 | template_params[param_name] = st.text_area( 129 | f"{param_name} ({param_type.__name__})", 130 | "", 131 | help=f"Enter {param_name}" 132 | ) 133 | 134 | if st.button("⚔️ Battle!", type="primary"): 135 | try: 136 | # Validate API keys 137 | for llm_name, api_key in selected_llms: 138 | if not api_key: 139 | st.error(f"❌ Please provide an API key for {llm_name}") 140 | return 141 | 142 | # Create template instance 143 | template = template_class() 144 | for param_name, value in template_params.items(): 145 | setattr(template, param_name, value) 146 | 147 | prompt = str(template) 148 | 149 | # Display the generated prompt in an expander 150 | with st.expander("🔍 Show generated prompt"): 151 | st.code(prompt, language="text") 152 | 153 | # Create battle animation container at the bottom 154 | battle_container = st.container() 155 | 156 | # Show battle animation 157 | self.show_battle_animation(battle_container, selected_llms[0][0], selected_llms[1][0]) 158 | 159 | # Create two columns for results 160 | col1, col2 = st.columns(2) 161 | 162 | for col, (llm_name, api_key) in zip([col1, col2], selected_llms): 163 | with col: 164 | st.subheader(f"🤖 {llm_name}") 165 | try: 166 | # Create a new instance with the provided API key 167 | llm_class = self.llms[llm_name].__class__ 168 | llm_instance = llm_class(api_key=api_key) 169 | 170 | start_time = time.time() 171 | response = llm_instance.generate(prompt) 172 | end_time = time.time() 173 | 174 | execution_time = end_time - start_time 175 | st.success(f"⏱️ Time taken: {execution_time:.2f} seconds") 176 | 177 | st.code(response, language="text") 178 | except Exception as e: 179 | st.error(f"❌ Error with {llm_name}: {str(e)}") 180 | 181 | except Exception as e: 182 | st.error(f"❌ Error creating template: {str(e)}") 183 | st.error(f"Template parameters: {template_params}") 184 | 185 | def show_battle_animation(self, container, llm1_name, llm2_name): 186 | placeholder = container.empty() 187 | 188 | frames = [ 189 | f""" 190 |
191 |
192 |
🔥 {llm1_name} vs {llm2_name} 🔥
193 |
194 | 🤖    FIGHT!    🤖 195 |
196 |
⚔️      ⚔️
197 |
198 |
199 | """, 200 | f""" 201 |
202 |
203 |
🔥 {llm1_name} vs {llm2_name} 🔥
204 |
205 | 🤖   ⚡CLASH!⚡   🤖 206 |
207 |
⚔️   ⚡   ⚔️
208 |
209 |
210 | """, 211 | f""" 212 |
213 |
214 |
🔥 {llm1_name} vs {llm2_name} 🔥
215 |
216 | 🤖    BANG!    🤖 217 |
218 |
💥      💥
219 |
220 |
221 | """, 222 | f""" 223 |
224 |
225 |
🔥 {llm1_name} vs {llm2_name} 🔥
226 |
227 | 🤖   POW! POW!   🤖 228 |
229 |
💫    ⚡    💫
230 |
231 |
232 | """ 233 | ] 234 | 235 | # Display each frame for a short duration 236 | for frame in frames: 237 | placeholder.markdown(frame, unsafe_allow_html=True) 238 | time.sleep(0.5) # Slightly longer duration for better effect 239 | 240 | # Clear the animation placeholder 241 | placeholder.empty() 242 | 243 | def launch_playground(): 244 | playground = LLMPlayground() 245 | playground.launch() -------------------------------------------------------------------------------- /Readme.md: -------------------------------------------------------------------------------- 1 | # ⚔️🔗 Empire Chain 2 | 3 | ⚡ An orchestration framework for all your AI needs ⚡ 4 | 5 | ``` 6 | ███████╗███╗ ███╗██████╗ ██╗██████╗ ███████╗ 7 | ██╔════╝████╗ ████║██╔══██╗██║██╔══██╗██╔════╝ 8 | █████╗ ██╔████╔██║██████╔╝██║██████╔╝█████╗ 9 | ██╔══╝ ██║╚██╔╝██║██╔═══╝ ██║██╔══██╗██╔══╝ 10 | ███████╗██║ ╚═╝ ██║██║ ██║██║ ██║███████╗ 11 | ╚══════╝╚═╝ ╚═╝╚═╝ ╚═╝╚═╝ ╚═╝╚══════╝ 12 | ██████╗██╗ ██╗ █████╗ ██╗███╗ ██╗ 13 | ██╔════╝██║ ██║██╔══██╗██║████╗ ██║ 14 | ██║ ███████║███████║██║██╔██╗ ██║ 15 | ██║ ██╔══██║██╔══██║██║██║╚██╗██║ 16 | ╚██████╗██║ ██║██║ ██║██║██║ ╚████║ 17 | ╚═════╝╚═╝ ╚═╝╚═╝ ╚═╝╚═╝╚═╝ ╚═══╝ 18 | ============================================= 19 | 🔗 Chain Your AI Dreams Together 🔗 20 | ============================================= 21 | ``` 22 | 23 |

24 | 25 | PyPI version 26 | 27 | 28 | PyPI downloads 29 | 30 | 31 | License 32 | 33 | 34 | GitHub stars 35 | 36 |

37 | 38 | ## Features 39 | 40 | - 🤖 Multiple LLM Support (OpenAI, Anthropic, Groq) 41 | - 📚 Vector Store Integration (Qdrant, ChromaDB) 42 | - 🔍 Advanced Document Processing 43 | - 🎙️ Speech-to-Text Capabilities 44 | - 🌐 Web Crawling with crawl4ai 45 | - 📊 Data Visualization 46 | - 🎯 RAG Applications 47 | - 🤝 PhiData Agent Integration 48 | - 💬 Interactive Chatbots 49 | - 🤖 Agentic Framework 50 | 51 | ## Installation 52 | 53 | ```bash 54 | pip install empire-chain 55 | ``` 56 | 57 | # Empire Chain Components 58 | 59 | ## Empire Agent 60 | 61 | ```python 62 | """ 63 | This is a simple example of how to use the Empire Agent. 64 | Please run the following command to install the necessary dependencies and store keys in .env: 65 | !pip install empire-chain 66 | """ 67 | from datetime import datetime 68 | from empire_chain.agent.agent import Agent 69 | from dotenv import load_dotenv 70 | 71 | load_dotenv() 72 | 73 | def get_weather(location: str) -> str: 74 | return f"The weather in {location} is sunny!" 75 | 76 | def calculate_distance(from_city: str, to_city: str) -> str: 77 | return f"The distance from {from_city} to {to_city} is 500km" 78 | 79 | def get_time(timezone: str) -> str: 80 | return f"Current time in {timezone}: {datetime.now()}" 81 | 82 | def translate_text(text: str, target_language: str) -> str: 83 | return f"Translated '{text}' to {target_language}: [translation would go here]" 84 | 85 | def search_web(query: str, num_results: int) -> str: 86 | return f"Top {num_results} results for '{query}': [search results would go here]" 87 | 88 | def main(): 89 | # Create agent 90 | agent = Agent() 91 | 92 | # Register functions 93 | functions = [ 94 | get_weather, 95 | calculate_distance, 96 | get_time, 97 | translate_text, 98 | search_web 99 | ] 100 | 101 | for func in functions: 102 | agent.register_function(func) 103 | 104 | # Example queries 105 | queries = [ 106 | "What's the weather like in Tokyo?", 107 | "How far is London from Paris?", 108 | "What time is it in EST timezone?", 109 | "Translate 'Hello World' to Spanish", 110 | "Search for latest news about AI and show 3 results" 111 | ] 112 | 113 | # Process queries 114 | for query in queries: 115 | try: 116 | result = agent.process_query(query) 117 | print(f"\nQuery: {query}") 118 | print(f"Result: {result['result']}") 119 | except Exception as e: 120 | print(f"Error processing query '{query}': {str(e)}") 121 | 122 | if __name__ == "__main__": 123 | main() 124 | ``` 125 | 126 | ## RAG 127 | 128 | ```python 129 | from empire_chain.vector_stores import QdrantVectorStore 130 | from empire_chain.embeddings import OpenAIEmbeddings 131 | from empire_chain.llms.llms import GroqLLM 132 | from empire_chain.tools.file_reader import DocumentReader 133 | import os 134 | from dotenv import load_dotenv 135 | from empire_chain.stt.stt import GroqSTT 136 | 137 | def main(if_audio_input: bool = False): 138 | load_dotenv() 139 | 140 | vector_store = QdrantVectorStore(":memory:") 141 | embeddings = OpenAIEmbeddings("text-embedding-3-small") 142 | llm = GroqLLM("llama3-8b-8192") 143 | reader = DocumentReader() 144 | 145 | file_path = "input.pdf" 146 | text = reader.read(file_path) 147 | 148 | text_embedding = embeddings.embed(text) 149 | vector_store.add(text, text_embedding) 150 | 151 | text_query = "What is the main topic of this document?" 152 | if if_audio_input: 153 | stt = GroqSTT() 154 | audio_query = stt.transcribe("audio.mp3") 155 | query_embedding = embeddings.embed(audio_query) 156 | else: 157 | query_embedding = embeddings.embed(text_query) 158 | relevant_texts = vector_store.query(query_embedding, k=3) 159 | 160 | context = "\n".join(relevant_texts) 161 | prompt = f"Based on the following context, {text_query}\n\nContext: {context}" 162 | response = llm.generate(prompt) 163 | print(f"Query: {text_query}") 164 | print(f"Response: {response}") 165 | 166 | if __name__ == "__main__": 167 | main(if_audio_input=False) 168 | ``` 169 | 170 | ## Chatbots 171 | 172 | ### Simple Chatbot 173 | 174 | ```python 175 | """ 176 | This is a simple chatbot that uses the Empire Chain library to create a chatbot. 177 | Please run the following command to install the necessary dependencies and store keys in .env: 178 | !pip install empire-chain streamlit 179 | !streamlit run app.py 180 | """ 181 | from empire_chain.streamlit import Chatbot 182 | from empire_chain.llms.llms import OpenAILLM 183 | 184 | chatbot = Chatbot(title="Empire Chatbot", llm=OpenAILLM("gpt-4o-mini")) 185 | chatbot.chat() 186 | ``` 187 | 188 | ### Chat with Image 189 | 190 | ```python 191 | """ 192 | This is a simple chatbot that uses the Empire Chain library to create a chatbot. 193 | Please run the following command to install the necessary dependencies and groq key in .env (https://console.groq.com/keys): 194 | !pip install empire-chain streamlit 195 | !streamlit run app.py 196 | """ 197 | from empire_chain.streamlit import VisionChatbot 198 | 199 | chatbot = VisionChatbot(title="Empire Chatbot") 200 | chatbot.chat() 201 | ``` 202 | 203 | ### Chat with PDF 204 | 205 | ```python 206 | """ 207 | This is a simple chatbot that uses the Empire Chain library to create a pdf chatbot. 208 | Please run the following command to install the necessary dependencies and store keys in .env: 209 | !pip install empire-chain streamlit 210 | !streamlit run app.py 211 | """ 212 | from empire_chain.streamlit import PDFChatbot 213 | from empire_chain.llms.llms import OpenAILLM 214 | from empire_chain.vector_stores import QdrantVectorStore 215 | from empire_chain.embeddings import OpenAIEmbeddings 216 | 217 | pdf_chatbot = PDFChatbot(title="PDF Chatbot", llm=OpenAILLM("gpt-4o-mini"), vector_store=QdrantVectorStore(":memory:"), embeddings=OpenAIEmbeddings("text-embedding-3-small")) 218 | pdf_chatbot.chat() 219 | ``` 220 | 221 | ## PhiData Agents 222 | 223 | ### Web Agent 224 | 225 | ```python 226 | """ 227 | This is a simple example of how to use the WebAgent class to generate web data. 228 | Please run the following command to install the necessary dependencies and store keys in .env: 229 | !pip install empire-chain phidata duckduckgo-search 230 | """ 231 | from empire_chain.phidata.web_agent import WebAgent 232 | 233 | web_agent = WebAgent() 234 | 235 | web_agent.generate("What is the price of Tesla?") 236 | ``` 237 | 238 | ### Finance Agent 239 | 240 | ```python 241 | """ 242 | This is a simple example of how to use the PhiFinanceAgent class to generate financial data. 243 | Please run the following command to install the necessary dependencies and store keys in .env: 244 | !pip install empire-chain phidata yfinance 245 | """ 246 | from empire_chain.phidata.finance_agent import PhiFinanceAgent 247 | 248 | finance_agent = PhiFinanceAgent() 249 | 250 | finance_agent.generate("Analyze TSLA stock performance") 251 | ``` 252 | 253 | ## Tools 254 | 255 | ### File Reader 256 | 257 | ```python 258 | """ 259 | This is a simple file reader that uses the Empire Chain library to read a file. 260 | It supports 261 | 1. PDF files (.pdf) 262 | 2. Microsoft Word documents (.docx) 263 | 3. Text files (.txt) 264 | 4. JSON files (.json) 265 | 5. CSV files (.csv) 266 | 6. Google Drive files (.gdrive) 267 | """ 268 | from empire_chain.tools.file_reader import DocumentReader 269 | reader = DocumentReader() 270 | 271 | text = reader.read("https://drive.google.com/file/d/1t0Itw6oGO2iVusp=sharing") 272 | print(text) 273 | 274 | text = reader.read("input.pdf") 275 | print(text) 276 | ``` 277 | 278 | ### Website Crawler 279 | 280 | ```python 281 | """ 282 | This is a simple crawler that uses the Empire Chain library to crawl a website and save the content as markdown. 283 | Please run the following command to install the necessary dependencies and store keys in .env: 284 | !pip install empire-chain crawl4ai 285 | """ 286 | from empire_chain.tools.crawl4ai import Crawler 287 | 288 | crawler = Crawler() 289 | result = crawler.crawl(url="https://www.geekroom.in", format="markdown") 290 | print(result) 291 | ``` 292 | 293 | ### Speech to Text 294 | 295 | ```python 296 | from empire_chain.stt.stt import GroqSTT 297 | from empire_chain.stt.stt import HuggingFaceSTT 298 | from dotenv import load_dotenv 299 | import os 300 | load_dotenv() 301 | 302 | stt = GroqSTT() 303 | text = stt.transcribe("audio.mp3") 304 | print(text) 305 | 306 | stt = HuggingFaceSTT() 307 | text = stt.transcribe("audio.mp3") 308 | print(text) 309 | ``` 310 | 311 | ## Cool Stuff 312 | 313 | ### Visualize Data 314 | 315 | ```python 316 | """ 317 | This is a simple example of how to use the DataAnalyzer and ChartFactory classes to visualize data. 318 | Please run the following command to install the necessary dependencies and store keys in .env: 319 | !pip install empire-chain matplotlib 320 | 321 | _chart_types = { 322 | 'Line Chart': LineChart, 323 | 'Pie Chart': PieChart, 324 | 'Bar Graph': BarGraph, 325 | 'Scatter Plot': ScatterChart, 326 | 'Histogram': Histogram, 327 | 'Box Plot': BoxPlot 328 | } 329 | Please adhere to the naming convention for the chart type. 330 | """ 331 | from empire_chain.cool_stuff.visualizer import DataAnalyzer, ChartFactory 332 | 333 | data = """ 334 | Empire chain got a fund raise of $100M from a new investor in 2024 and $50M from a new investor in 2023. 335 | """ 336 | 337 | analyzer = DataAnalyzer() 338 | analyzed_data = analyzer.analyze(data) 339 | 340 | chart = ChartFactory.create_chart('Bar Chart', analyzed_data) 341 | chart.show() 342 | ``` 343 | 344 | ### Text to Podcast 345 | 346 | ```python 347 | """ 348 | This is a simple example of how to use the GeneratePodcast class to generate a podcast. 349 | Please run the following command to install the necessary dependencies and store keys in .env: 350 | !pip install empire-chain kokoro_onnx (It might take a while to download the model files) 351 | """ 352 | from empire_chain.cool_stuff.podcast import GeneratePodcast 353 | 354 | podcast=GeneratePodcast() 355 | podcast.generate(topic="About boom of meal plan and recipe generation apps") 356 | ``` 357 | 358 | ## Contributing 359 | 360 | ```bash 361 | git clone https://github.com/manas95826/empire-chain.git 362 | cd empire-chain 363 | pip install -e . 364 | ``` 365 | 366 | ## License 367 | 368 | This project is licensed under the MIT License - see the [LICENSE](LICENSE) file for details. --------------------------------------------------------------------------------