├── .gitignore
├── PydanticAI
    ├── .env.example
    ├── .gitignore
    ├── README.md
    ├── data
    │   ├── invoice_query.json
    │   ├── product_query.json
    │   └── shipping_query.json
    ├── requirements.txt
    └── src
    │   ├── introduction.py
    │   └── utils
    │       └── markdown.py
├── README.md
├── arize_ai-phoenix
    ├── .env.example
    ├── README.md
    ├── app.py
    ├── chainlit.md
    ├── requirements.txt
    └── storage_mini
    │   ├── default__vector_store.json
    │   ├── docstore.json
    │   ├── graph_store.json
    │   ├── image__vector_store.json
    │   └── index_store.json
├── chroma+langchain
    ├── .env.example
    ├── README.md
    ├── app.py
    └── requirements.txt
├── comet-ml-opik
    └── README.md
├── google
    ├── gemini.ipynb
    └── requirements.txt
├── groq
    ├── .env.example
    ├── README.md
    ├── app.py
    └── groq-test.ipynb
├── knowledge-graph-rag
    ├── .env.example
    ├── README.md
    ├── data
    │   └── paul_graham
    │   │   └── paul_graham_essay.txt
    ├── example_hotels.html
    ├── knowledge_graph.ipynb
    ├── requirements.txt
    └── storage
    │   ├── default__vector_store.json
    │   ├── docstore.json
    │   ├── graph_store.json
    │   ├── image__vector_store.json
    │   └── index_store.json
├── langfuse
    ├── .env.example
    ├── README.md
    ├── app.py
    ├── chainlit.md
    ├── requirements.txt
    └── storage_mini
    │   ├── default__vector_store.json
    │   ├── docstore.json
    │   ├── graph_store.json
    │   ├── image__vector_store.json
    │   └── index_store.json
├── literalai
    ├── .env.example
    ├── RAG LlamaIndex.ipynb
    ├── README.md
    └── requirements.txt
├── llama_agents
    ├── .env.example
    ├── README.md
    ├── agentic_rag_toolservice.ipynb
    ├── data
    │   ├── GOOG-10-Q-Q1-2023.pdf
    │   └── goog-10-q-q1-2024.pdf
    ├── requirements.txt
    └── storage
    │   ├── q1-23
    │       ├── default__vector_store.json
    │       ├── docstore.json
    │       ├── graph_store.json
    │       ├── image__vector_store.json
    │       └── index_store.json
    │   └── q1-24
    │       ├── default__vector_store.json
    │       ├── docstore.json
    │       ├── graph_store.json
    │       ├── image__vector_store.json
    │       └── index_store.json
├── llamaindex
    ├── .env.example
    ├── README.md
    ├── app.py
    ├── rag.ipynb
    └── requirements.txt
├── mlflow
    └── README.md
├── neo4j
    ├── .env.example
    ├── README.md
    ├── data
    │   └── Graph_Retrieval-Augmented_Generation_A_Survey.pdf
    ├── requirements.txt
    ├── storage_hybrid_gemini
    │   ├── docstore.json
    │   ├── graph_store.json
    │   ├── image__vector_store.json
    │   └── index_store.json
    └── vector_graph_rag.ipynb
├── pinecone+langchain
    ├── .env.example
    ├── README.md
    ├── requirements.txt
    ├── rerank+llm.ipynb
    └── upsert.ipynb
├── property-store-graph-rag
    ├── .env.example
    ├── README.md
    ├── data
    │   └── Graph_Retrieval-Augmented_Generation_A_Survey.pdf
    ├── pg-storage
    │   ├── default__vector_store.json
    │   ├── docstore.json
    │   ├── image__vector_store.json
    │   └── index_store.json
    ├── property-graph-rag.ipynb
    └── requirements.txt
├── qdrant+langchain
    ├── README.md
    └── portfolio_manager
    │   ├── .env.example
    │   ├── app.py
    │   ├── data_insertion.ipynb
    │   ├── earning_report_analysis.py
    │   ├── gradio_app.py
    │   ├── portfolio_constants.py
    │   ├── requirements.txt
    │   ├── stock_price_evaluator.py
    │   └── streamlit_app.py
└── rag_eval
    ├── .env.example
    ├── README.md
    ├── data
        └── Graph_Retrieval-Augmented_Generation_A_Survey.pdf
    ├── eval_rag.ipynb
    └── requirements.txt


/.gitignore:
--------------------------------------------------------------------------------
  1 | # Byte-compiled / optimized / DLL files
  2 | __pycache__/
  3 | *.py[cod]
  4 | *$py.class
  5 | qdrant+langchain/portfolio_manager/flagged/
  6 | 
  7 | # C extensions
  8 | *.so
  9 | 
 10 | # Distribution / packaging
 11 | .Python
 12 | build/
 13 | develop-eggs/
 14 | dist/
 15 | downloads/
 16 | eggs/
 17 | .eggs/
 18 | lib/
 19 | lib64/
 20 | parts/
 21 | sdist/
 22 | var/
 23 | wheels/
 24 | share/python-wheels/
 25 | *.egg-info/
 26 | .installed.cfg
 27 | *.egg
 28 | MANIFEST
 29 | 
 30 | # PyInstaller
 31 | #  Usually these files are written by a python script from a template
 32 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
 33 | *.manifest
 34 | *.spec
 35 | 
 36 | # Installer logs
 37 | pip-log.txt
 38 | pip-delete-this-directory.txt
 39 | 
 40 | # Unit test / coverage reports
 41 | htmlcov/
 42 | .tox/
 43 | .nox/
 44 | .coverage
 45 | .coverage.*
 46 | .cache
 47 | nosetests.xml
 48 | coverage.xml
 49 | *.cover
 50 | *.py,cover
 51 | .hypothesis/
 52 | .pytest_cache/
 53 | cover/
 54 | 
 55 | # Translations
 56 | *.mo
 57 | *.pot
 58 | 
 59 | # Django stuff:
 60 | *.log
 61 | local_settings.py
 62 | db.sqlite3
 63 | db.sqlite3-journal
 64 | 
 65 | # Flask stuff:
 66 | instance/
 67 | .webassets-cache
 68 | 
 69 | # Scrapy stuff:
 70 | .scrapy
 71 | 
 72 | # Sphinx documentation
 73 | docs/_build/
 74 | 
 75 | # PyBuilder
 76 | .pybuilder/
 77 | target/
 78 | 
 79 | # Jupyter Notebook
 80 | .ipynb_checkpoints
 81 | 
 82 | # IPython
 83 | profile_default/
 84 | ipython_config.py
 85 | 
 86 | # pyenv
 87 | #   For a library or package, you might want to ignore these files since the code is
 88 | #   intended to run in multiple environments; otherwise, check them in:
 89 | # .python-version
 90 | 
 91 | # pipenv
 92 | #   According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
 93 | #   However, in case of collaboration, if having platform-specific dependencies or dependencies
 94 | #   having no cross-platform support, pipenv may install dependencies that don't work, or not
 95 | #   install all needed dependencies.
 96 | #Pipfile.lock
 97 | 
 98 | # poetry
 99 | #   Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control.
100 | #   This is especially recommended for binary packages to ensure reproducibility, and is more
101 | #   commonly ignored for libraries.
102 | #   https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control
103 | #poetry.lock
104 | 
105 | # pdm
106 | #   Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control.
107 | #pdm.lock
108 | #   pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it
109 | #   in version control.
110 | #   https://pdm.fming.dev/#use-with-ide
111 | .pdm.toml
112 | 
113 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm
114 | __pypackages__/
115 | 
116 | # Celery stuff
117 | celerybeat-schedule
118 | celerybeat.pid
119 | 
120 | # SageMath parsed files
121 | *.sage.py
122 | 
123 | # Environments
124 | .env
125 | .venv
126 | env/
127 | venv/
128 | ENV/
129 | env.bak/
130 | venv.bak/
131 | 
132 | # Spyder project settings
133 | .spyderproject
134 | .spyproject
135 | 
136 | # Rope project settings
137 | .ropeproject
138 | 
139 | # mkdocs documentation
140 | /site
141 | 
142 | # mypy
143 | .mypy_cache/
144 | .dmypy.json
145 | dmypy.json
146 | 
147 | # Pyre type checker
148 | .pyre/
149 | 
150 | # pytype static type analyzer
151 | .pytype/
152 | 
153 | # Cython debug symbols
154 | cython_debug/
155 | 
156 | # PyCharm
157 | #  JetBrains specific template is maintained in a separate JetBrains.gitignore that can
158 | #  be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore
159 | #  and can be added to the global gitignore or merged into this file.  For a more nuclear
160 | #  option (not recommended) you can uncomment the following to ignore the entire idea folder.
161 | #.idea/
162 | 
163 | # chainlit
164 | .chainlit
165 | .files
166 | chainlit.md 
167 | 
168 | # llama-index
169 | llamaindex/storage


--------------------------------------------------------------------------------
/PydanticAI/.env.example:
--------------------------------------------------------------------------------
 1 | # Environment variables go here, can be read by `python-dotenv` package:
 2 | #
 3 | #   ----------------------------------------------------------------
 4 | #    from dotenv import load_dotenv
 5 | #    
 6 | #    load_dotenv()
 7 | #    API_KEY = os.getenv("API_KEY")
 8 | #   ----------------------------------------------------------------
 9 | #
10 | # DO NOT ADD THIS FILE TO VERSION CONTROL!
11 | 
12 | 
13 | OPENAI_API_KEY=your-api-key


--------------------------------------------------------------------------------
/PydanticAI/.gitignore:
--------------------------------------------------------------------------------
 1 | # Byte-compiled / optimized / DLL files
 2 | __pycache__/
 3 | *.py[cod]
 4 | 
 5 | # C extensions
 6 | *.so
 7 | 
 8 | # Distribution / packaging
 9 | .Python
10 | env/
11 | build/
12 | develop-eggs/
13 | dist/
14 | downloads/
15 | eggs/
16 | .eggs/
17 | lib/
18 | lib64/
19 | parts/
20 | sdist/
21 | var/
22 | *.egg-info/
23 | .installed.cfg
24 | *.egg
25 | 
26 | # PyInstaller
27 | #  Usually these files are written by a python script from a template
28 | #  before PyInstaller builds the exe, so as to inject date/other infos into it.
29 | *.manifest
30 | *.spec
31 | 
32 | # Installer logs
33 | pip-log.txt
34 | pip-delete-this-directory.txt
35 | 
36 | # Unit test / coverage reports
37 | htmlcov/
38 | .tox/
39 | .coverage
40 | .coverage.*
41 | .cache
42 | nosetests.xml
43 | coverage.xml
44 | *.cover
45 | 
46 | # Translations
47 | *.mo
48 | *.pot
49 | 
50 | # Django stuff:
51 | *.log
52 | 
53 | # Sphinx documentation
54 | docs/_build/
55 | 
56 | # PyBuilder
57 | target/
58 | 
59 | # DotEnv configuration
60 | .env
61 | 
62 | # Database
63 | *.db
64 | *.rdb
65 | 
66 | # Pycharm
67 | .idea
68 | 
69 | # VS Code
70 | .vscode/
71 | *.code-workspace
72 | 
73 | # Spyder
74 | .spyproject/
75 | 
76 | # Jupyter NB Checkpoints
77 | .ipynb_checkpoints/
78 | 
79 | # Mac OS-specific storage files
80 | .DS_Store
81 | 
82 | # vim
83 | *.swp
84 | *.swo
85 | 
86 | # Mypy cache
87 | .mypy_cache/
88 | 
89 | # Exclude virtual environment
90 | .venv/
91 | 
92 | # Exclude trained models
93 | /models/
94 | 
95 | # exclude data from source control by default
96 | # /data/


--------------------------------------------------------------------------------
/PydanticAI/README.md:
--------------------------------------------------------------------------------
 1 | To run the repo
 2 | 
 3 | ```
 4 | pip install -r requirements.txt
 5 | ```
 6 | 
 7 | Replace ```.env.example``` with ```.env``` credentials 
 8 | 
 9 | Head over to src -> Execute ```introduction.py```
10 | 
11 | 


--------------------------------------------------------------------------------
/PydanticAI/data/invoice_query.json:
--------------------------------------------------------------------------------
1 | {
2 |     "ticket_id": "INV-001",
3 |     "customer_name": "John Doe",
4 |     "email": "john.doe@example.com",
5 |     "query_type": "invoice",
6 |     "description": "I need a copy of the invoice for my order #ORD-2024-123. I can't find it in my email.",
7 |     "order_id": "ORD-2024-123"
8 | } 


--------------------------------------------------------------------------------
/PydanticAI/data/product_query.json:
--------------------------------------------------------------------------------
1 | {
2 |     "ticket_id": "PRD-001",
3 |     "customer_name": "Alice Johnson",
4 |     "email": "alice.j@example.com",
5 |     "query_type": "product",
6 |     "description": "I'm interested in the Pro Model X-1000. Does it come with a warranty? What are the technical specifications?",
7 |     "order_id": null
8 | } 


--------------------------------------------------------------------------------
/PydanticAI/data/shipping_query.json:
--------------------------------------------------------------------------------
1 | {
2 |     "ticket_id": "SHP-001",
3 |     "customer_name": "Jane Smith",
4 |     "email": "jane.smith@example.com",
5 |     "query_type": "shipping",
6 |     "description": "What is your shipping policy for international orders? I'm based in Canada and want to know the estimated delivery time.",
7 |     "order_id": null
8 | } 


--------------------------------------------------------------------------------
/PydanticAI/requirements.txt:
--------------------------------------------------------------------------------
1 | pydantic==2.11.0
2 | pydantic-ai==0.0.46
3 | openai==1.69.0
4 | python-dotenv==1.1.0
5 | pytest==8.3.5
6 | nest_asyncio==1.6.0


--------------------------------------------------------------------------------
/PydanticAI/src/introduction.py:
--------------------------------------------------------------------------------
  1 | """
  2 | Introduction to PydanticAI.
  3 | 
  4 | This module demonstrates how PydanticAI makes it easier to build
  5 | production-grade LLM-powered systems with type safety and structured responses.
  6 | """
  7 | 
  8 | from typing import Dict, List, Optional
  9 | import nest_asyncio
 10 | from pydantic import BaseModel, Field
 11 | from pydantic_ai import Agent, ModelRetry, RunContext, Tool
 12 | from pydantic_ai.models.openai import OpenAIModel
 13 | from utils.markdown import to_markdown
 14 | from dotenv import load_dotenv
 15 | load_dotenv()
 16 | nest_asyncio.apply()
 17 | 
 18 | model = OpenAIModel("gpt-4o")
 19 | 
 20 | # --------------------------------------------------------------
 21 | # 1. Simple Agent - Hello World Example
 22 | # --------------------------------------------------------------
 23 | """
 24 | This example demonstrates the basic usage of PydanticAI agents.
 25 | Key concepts:
 26 | - Creating a basic agent with a system prompt
 27 | - Running synchronous queries
 28 | - Accessing response data, message history, and costs
 29 | """
 30 | 
 31 | agent1 = Agent(
 32 |     model=model,
 33 |     system_prompt="You are a helpful customer support agent. Be concise and friendly.",
 34 | )
 35 | 
 36 | # Example usage of basic agent
 37 | response = agent1.run_sync("How can I track my order #12345?")
 38 | print(response.data)
 39 | print(response.all_messages())
 40 | # print(response.cost())
 41 | 
 42 | 
 43 | response2 = agent1.run_sync(
 44 |     user_prompt="What was my previous question?",
 45 |     message_history=response.new_messages(),
 46 | )
 47 | print(response2.data)
 48 | 
 49 | # --------------------------------------------------------------
 50 | # 2. Agent with Structured Response
 51 | # --------------------------------------------------------------
 52 | """
 53 | This example shows how to get structured, type-safe responses from the agent.
 54 | Key concepts:
 55 | - Using Pydantic models to define response structure
 56 | - Type validation and safety
 57 | - Field descriptions for better model understanding
 58 | """
 59 | 
 60 | 
 61 | class ResponseModel(BaseModel):
 62 |     """Structured response with metadata."""
 63 | 
 64 |     response: str
 65 |     needs_escalation: bool
 66 |     follow_up_required: bool
 67 |     sentiment: str = Field(description="Customer sentiment analysis")
 68 | 
 69 | 
 70 | agent2 = Agent(
 71 |     model=model,
 72 |     result_type=ResponseModel,
 73 |     system_prompt=(
 74 |         "You are an intelligent customer support agent. "
 75 |         "Analyze queries carefully and provide structured responses."
 76 |     ),
 77 | )
 78 | 
 79 | response = agent2.run_sync("How can I track my order #12345?")
 80 | print(response.data.model_dump_json(indent=2))
 81 | 
 82 | 
 83 | # --------------------------------------------------------------
 84 | # 3. Agent with Structured Response & Dependencies
 85 | # --------------------------------------------------------------
 86 | """
 87 | This example demonstrates how to use dependencies and context in agents.
 88 | Key concepts:
 89 | - Defining complex data models with Pydantic
 90 | - Injecting runtime dependencies
 91 | - Using dynamic system prompts
 92 | """
 93 | 
 94 | 
 95 | # Define order schema
 96 | class Order(BaseModel):
 97 |     """Structure for order details."""
 98 | 
 99 |     order_id: str
100 |     status: str
101 |     items: List[str]
102 | 
103 | 
104 | # Define customer schema
105 | class CustomerDetails(BaseModel):
106 |     """Structure for incoming customer queries."""
107 | 
108 |     customer_id: str
109 |     name: str
110 |     email: str
111 |     orders: Optional[List[Order]] = None
112 | 
113 | 
114 | # Agent with structured output and dependencies
115 | agent5 = Agent(
116 |     model=model,
117 |     result_type=ResponseModel,
118 |     deps_type=CustomerDetails,
119 |     retries=3,
120 |     system_prompt=(
121 |         "You are an intelligent customer support agent. "
122 |         "Analyze queries carefully and provide structured responses. "
123 |         "Always great the customer and provide a helpful response."
124 |     ),  # These are known when writing the code
125 | )
126 | 
127 | 
128 | # Add dynamic system prompt based on dependencies
129 | @agent5.system_prompt
130 | async def add_customer_name(ctx: RunContext[CustomerDetails]) -> str:
131 |     return f"Customer details: {to_markdown(ctx.deps)}"  # These depend in some way on context that isn't known until runtime
132 | 
133 | 
134 | customer = CustomerDetails(
135 |     customer_id="1",
136 |     name="John Doe",
137 |     email="john.doe@example.com",
138 |     orders=[
139 |         Order(order_id="12345", status="shipped", items=["Blue Jeans", "T-Shirt"]),
140 |     ],
141 | )
142 | 
143 | response = agent5.run_sync(user_prompt="What did I order?", deps=customer)
144 | 
145 | response.all_messages()
146 | print(response.data.model_dump_json(indent=2))
147 | 
148 | print(
149 |     "Customer Details:\n"
150 |     f"Name: {customer.name}\n"
151 |     f"Email: {customer.email}\n\n"
152 |     "Response Details:\n"
153 |     f"{response.data.response}\n\n"
154 |     "Status:\n"
155 |     f"Follow-up Required: {response.data.follow_up_required}\n"
156 |     f"Needs Escalation: {response.data.needs_escalation}"
157 | )
158 | 
159 | 
160 | # --------------------------------------------------------------
161 | # 4. Agent with Tools
162 | # --------------------------------------------------------------
163 | 
164 | """
165 | This example shows how to enhance agents with custom tools.
166 | Key concepts:
167 | - Creating and registering tools
168 | - Accessing context in tools
169 | """
170 | 
171 | shipping_info_db: Dict[str, str] = {
172 |     "12345": "Shipped on 2024-12-01",
173 |     "67890": "Out for delivery",
174 | }
175 | 
176 | 
177 | def get_shipping_info(ctx: RunContext[CustomerDetails]) -> str:
178 |     """Get the customer's shipping information."""
179 |     return shipping_info_db[ctx.deps.orders[0].order_id]
180 | 
181 | 
182 | # Agent with structured output and dependencies
183 | agent5 = Agent(
184 |     model=model,
185 |     result_type=ResponseModel,
186 |     deps_type=CustomerDetails,
187 |     retries=3,
188 |     system_prompt=(
189 |         "You are an intelligent customer support agent. "
190 |         "Analyze queries carefully and provide structured responses. "
191 |         "Use tools to look up relevant information."
192 |         "Always great the customer and provide a helpful response."
193 |     ),  # These are known when writing the code
194 |     tools=[Tool(get_shipping_info, takes_ctx=True)],  # Add tool via kwarg
195 | )
196 | 
197 | 
198 | @agent5.system_prompt
199 | async def add_customer_name(ctx: RunContext[CustomerDetails]) -> str:
200 |     return f"Customer details: {to_markdown(ctx.deps)}"
201 | 
202 | 
203 | response = agent5.run_sync(
204 |     user_prompt="What's the status of my last order?", deps=customer
205 | )
206 | 
207 | response.all_messages()
208 | print(response.data.model_dump_json(indent=2))
209 | 
210 | print(
211 |     "Customer Details:\n"
212 |     f"Name: {customer.name}\n"
213 |     f"Email: {customer.email}\n\n"
214 |     "Response Details:\n"
215 |     f"{response.data.response}\n\n"
216 |     "Status:\n"
217 |     f"Follow-up Required: {response.data.follow_up_required}\n"
218 |     f"Needs Escalation: {response.data.needs_escalation}"
219 | )
220 | 
221 | 
222 | # --------------------------------------------------------------
223 | # 5. Agent with Reflection and Self-Correction
224 | # --------------------------------------------------------------
225 | 
226 | """
227 | This example demonstrates advanced agent capabilities with self-correction.
228 | Key concepts:
229 | - Implementing self-reflection
230 | - Handling errors gracefully with retries
231 | - Using ModelRetry for automatic retries
232 | - Decorator-based tool registration
233 | """
234 | 
235 | # Simulated database of shipping information
236 | shipping_info_db: Dict[str, str] = {
237 |     "#12345": "Shipped on 2024-12-01",
238 |     "#67890": "Out for delivery",
239 | }
240 | 
241 | customer = CustomerDetails(
242 |     customer_id="1",
243 |     name="John Doe",
244 |     email="john.doe@example.com",
245 | )
246 | 
247 | # Agent with reflection and self-correction
248 | agent5 = Agent(
249 |     model=model,
250 |     result_type=ResponseModel,
251 |     deps_type=CustomerDetails,
252 |     retries=3,
253 |     system_prompt=(
254 |         "You are an intelligent customer support agent. "
255 |         "Analyze queries carefully and provide structured responses. "
256 |         "Use tools to look up relevant information. "
257 |         "Always greet the customer and provide a helpful response."
258 |     ),
259 | )
260 | 
261 | 
262 | @agent5.tool_plain()  # Add plain tool via decorator
263 | 
264 | def get_shipping_status(order_id: str) -> str:
265 |     """Get the shipping status for a given order ID."""
266 |     shipping_status = shipping_info_db.get(order_id)
267 |     if shipping_status is None:
268 |         raise ModelRetry(
269 |             f"No shipping information found for order ID {order_id}. "
270 |             "Make sure the order ID starts with a #: e.g, #624743 "
271 |             "Self-correct this if needed and try again."
272 |         )
273 |     return shipping_info_db[order_id]
274 | 
275 | 
276 | # Example usage
277 | response = agent5.run_sync(
278 |     user_prompt="What's the status of my last order 12345?", deps=customer
279 | )
280 | 
281 | response.all_messages()
282 | print(response.data.model_dump_json(indent=2))
283 | 


--------------------------------------------------------------------------------
/PydanticAI/src/utils/markdown.py:
--------------------------------------------------------------------------------
 1 | from pydantic import BaseModel
 2 | 
 3 | 
 4 | def to_markdown(data, indent=0):
 5 |     markdown = ""
 6 |     if isinstance(data, BaseModel):
 7 |         data = data.model_dump()
 8 |     if isinstance(data, dict):
 9 |         for key, value in data.items():
10 |             markdown += f"{'#' * (indent + 2)} {key.upper()}\n"
11 |             if isinstance(value, (dict, list, BaseModel)):
12 |                 markdown += to_markdown(value, indent + 1)
13 |             else:
14 |                 markdown += f"{value}\n\n"
15 |     elif isinstance(data, list):
16 |         for item in data:
17 |             if isinstance(item, (dict, list, BaseModel)):
18 |                 markdown += to_markdown(item, indent)
19 |             else:
20 |                 markdown += f"- {item}\n"
21 |         markdown += "\n"
22 |     else:
23 |         markdown += f"{data}\n\n"
24 |     return markdown
25 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # GenerativeAI
 2 | Experimentation
 3 | 
 4 | <p>
 5 |   <img align="right" height="300" width="500" src="https://static.liveperson.com/static-assets/2023/04/14142148/Blog_Generative_AI_small_2x.jpg"/>
 6 | </p>
 7 | 
 8 | - LLMs
 9 | - Frameworks - Langchain / Llamaindex
10 | - Vector DBs
11 | - RAG systems
12 | - Evaluations, Monitoring, Observability
13 | - AI Agents
14 | - Graph RAG
15 | - Hybrid RAG
16 | - Performance Optimization
17 | - Production
18 | - Use Cases
19 | - And more...
20 | 


--------------------------------------------------------------------------------
/arize_ai-phoenix/.env.example:
--------------------------------------------------------------------------------
1 | GEMINI_API_KEY=XYZ
2 | GROQ_API_KEY=XYZ
3 | COHERE_API_KEY=XYZ
4 | LANGFUSE_PUBLIC_KEY=XYZ
5 | LANGFUSE_SECRET_KEY=XYZ
6 | LANGFUSE_HOST=https://cloud.langfuse.com


--------------------------------------------------------------------------------
/arize_ai-phoenix/README.md:
--------------------------------------------------------------------------------
 1 | Arize Phoenix - AI Observability and Evaluation
 2 | 
 3 | Phoenix is an open-source observability library designed for experimentation, evaluation, and troubleshooting. It allows AI Engineers and Data Scientists to quickly visualize their data, evaluate performance, track down issues, and export data to improve.
 4 | 
 5 | [Official Docs](https://docs.arize.com/phoenix)
 6 | 
 7 | To run application
 8 | ```
 9 |   pip install -r requirements.txt
10 |   chainlit run app.py
11 | ```
12 |   
13 | For dashboard head onto ```localhost:6006```
14 | 
15 | ![image](https://github.com/jayita13/GenerativeAI/assets/64038928/67add0bd-fd56-478b-a982-2eb48861c7e5)
16 | ![image](https://github.com/jayita13/GenerativeAI/assets/64038928/db2c8631-5b2f-4b3e-8c15-503aa4280f92)
17 | 


--------------------------------------------------------------------------------
/arize_ai-phoenix/app.py:
--------------------------------------------------------------------------------
 1 | from llama_index.core import StorageContext, ServiceContext, load_index_from_storage
 2 | from llama_index.core.callbacks.base import CallbackManager
 3 | from llama_index.embeddings.huggingface import HuggingFaceEmbedding
 4 | from llama_index.llms.groq import Groq
 5 | import os
 6 | from dotenv import load_dotenv
 7 | load_dotenv()
 8 | import chainlit as cl
 9 | 
10 | # Phoenix can display in real time the traces automatically
11 | # collected from your LlamaIndex application.
12 | import phoenix as px
13 | # Look for a URL in the output to open the App in a browser.
14 | px.launch_app()
15 | # The App is initially empty, but as you proceed with the steps below,
16 | # traces will appear automatically as your LlamaIndex application runs.
17 | 
18 | from llama_index.core import set_global_handler
19 | 
20 | set_global_handler("arize_phoenix")
21 | 
22 | # Run all of your LlamaIndex applications as usual and traces
23 | # will be collected and displayed in Phoenix.
24 | 
25 | GROQ_API_KEY = os.getenv("GROQ_API_KEY")
26 | 
27 | @cl.on_chat_start
28 | async def factory():
29 |     storage_context = StorageContext.from_defaults(persist_dir="./storage_mini")
30 | 
31 |     embed_model = HuggingFaceEmbedding(model_name="sentence-transformers/all-MiniLM-L6-v2")
32 | 
33 |     llm = Groq(model="llama3-70b-8192", api_key=GROQ_API_KEY)
34 | 
35 |     service_context = ServiceContext.from_defaults(embed_model=embed_model, llm=llm,
36 |                         callback_manager=CallbackManager([cl.LlamaIndexCallbackHandler()]),
37 |     )
38 | 
39 |     index = load_index_from_storage(storage_context, service_context=service_context)
40 | 
41 |     chat_engine = index.as_chat_engine(service_context=service_context)
42 | 
43 |     cl.user_session.set("chat_engine", chat_engine)
44 | 
45 | @cl.on_message
46 | async def main(message: cl.Message):
47 |     chat_engine = cl.user_session.get("chat_engine")  
48 |     response = await cl.make_async(chat_engine.chat)(message.content)
49 | 
50 |     response_message = cl.Message(content="")
51 | 
52 |     for token in response.response:
53 |         await response_message.stream_token(token=token)
54 | 
55 |     await response_message.send()
56 | 


--------------------------------------------------------------------------------
/arize_ai-phoenix/chainlit.md:
--------------------------------------------------------------------------------
 1 | # Welcome to Chainlit! 🚀🤖
 2 | 
 3 | Hi there, Developer! 👋 We're excited to have you on board. Chainlit is a powerful tool designed to help you prototype, debug and share applications built on top of LLMs.
 4 | 
 5 | ## Useful Links 🔗
 6 | 
 7 | - **Documentation:** Get started with our comprehensive [Chainlit Documentation](https://docs.chainlit.io) 📚
 8 | - **Discord Community:** Join our friendly [Chainlit Discord](https://discord.gg/k73SQ3FyUh) to ask questions, share your projects, and connect with other developers! 💬
 9 | 
10 | We can't wait to see what you create with Chainlit! Happy coding! 💻😊
11 | 
12 | ## Welcome screen
13 | 
14 | To modify the welcome screen, edit the `chainlit.md` file at the root of your project. If you do not want a welcome screen, just leave this file empty.
15 | 


--------------------------------------------------------------------------------
/arize_ai-phoenix/requirements.txt:
--------------------------------------------------------------------------------
1 | llama-index==0.10.18
2 | llama-index-llms-groq==0.1.3
3 | chainlit==1.0.401
4 | groq==0.4.2
5 | python-dotenv==1.0.1
6 | llama-index-callbacks-arize-phoenix>0.1.3
7 | 


--------------------------------------------------------------------------------
/arize_ai-phoenix/storage_mini/graph_store.json:
--------------------------------------------------------------------------------
1 | {"graph_dict": {}}


--------------------------------------------------------------------------------
/arize_ai-phoenix/storage_mini/image__vector_store.json:
--------------------------------------------------------------------------------
1 | {"embedding_dict": {}, "text_id_to_ref_doc_id": {}, "metadata_dict": {}}


--------------------------------------------------------------------------------
/arize_ai-phoenix/storage_mini/index_store.json:
--------------------------------------------------------------------------------
1 | {"index_store/data": {"f42a6af7-0775-453f-9b3b-3a2ae2f23eab": {"__type__": "vector_store", "__data__": "{\"index_id\": \"f42a6af7-0775-453f-9b3b-3a2ae2f23eab\", \"summary\": null, \"nodes_dict\": {\"f719fbb6-e0ed-40ac-839a-c0d6a6505b4c\": \"f719fbb6-e0ed-40ac-839a-c0d6a6505b4c\", \"9c3dc423-ed76-48d2-b81c-0590b366d4ea\": \"9c3dc423-ed76-48d2-b81c-0590b366d4ea\", \"e8dc3a2b-45e9-4a4d-968f-df887afb4193\": \"e8dc3a2b-45e9-4a4d-968f-df887afb4193\", \"36830b4c-c762-45dc-bb77-2693fc244f1d\": \"36830b4c-c762-45dc-bb77-2693fc244f1d\", \"15b83726-755a-4f0c-b035-fe7b7be94d29\": \"15b83726-755a-4f0c-b035-fe7b7be94d29\", \"9c3c3031-2dfe-4b0c-ae9a-ec6fa4178520\": \"9c3c3031-2dfe-4b0c-ae9a-ec6fa4178520\", \"1e00f16c-0f83-4d24-9122-0e3c5dd43381\": \"1e00f16c-0f83-4d24-9122-0e3c5dd43381\", \"18302cce-5e0a-4808-8d63-8eab892c9136\": \"18302cce-5e0a-4808-8d63-8eab892c9136\", \"eb32dd9c-263c-4b5a-a3f1-2090aad558ce\": \"eb32dd9c-263c-4b5a-a3f1-2090aad558ce\", \"95fe93ab-e60b-471d-b80a-4dfa7d35d665\": \"95fe93ab-e60b-471d-b80a-4dfa7d35d665\", \"b3546102-2e25-4f07-a23b-a78da158ae34\": \"b3546102-2e25-4f07-a23b-a78da158ae34\", \"647ce2bb-cd94-42c8-b4c5-0f81509da4fb\": \"647ce2bb-cd94-42c8-b4c5-0f81509da4fb\", \"e8cff6e2-bd24-4282-b4e1-a22b95a53fd1\": \"e8cff6e2-bd24-4282-b4e1-a22b95a53fd1\", \"cd3b0c9a-43e9-482c-af84-493f6b5b98bb\": \"cd3b0c9a-43e9-482c-af84-493f6b5b98bb\", \"fd5d33a1-5db8-4f5f-991d-b54f13243cfe\": \"fd5d33a1-5db8-4f5f-991d-b54f13243cfe\", \"91bac189-20cf-4e89-bcd1-f9112e6b23f3\": \"91bac189-20cf-4e89-bcd1-f9112e6b23f3\", \"5b749bb1-b495-4f9b-9633-c5766aaff86f\": \"5b749bb1-b495-4f9b-9633-c5766aaff86f\", \"139a71be-fecd-4dfb-ad7a-96c5c73bbd13\": \"139a71be-fecd-4dfb-ad7a-96c5c73bbd13\", \"2812354c-1a5b-4d5a-9c70-26c5778bea37\": \"2812354c-1a5b-4d5a-9c70-26c5778bea37\", \"ac0adedf-2f5c-4dbb-8c37-cc3157afed4c\": \"ac0adedf-2f5c-4dbb-8c37-cc3157afed4c\", \"af88595e-fbc6-4b03-8bd1-f4f8e0eaacc3\": \"af88595e-fbc6-4b03-8bd1-f4f8e0eaacc3\", \"9467763a-ab87-450f-a2f6-2a3f29f4b3d0\": \"9467763a-ab87-450f-a2f6-2a3f29f4b3d0\", \"b8778ed1-69a6-486a-a6f9-6598ab89d0f2\": \"b8778ed1-69a6-486a-a6f9-6598ab89d0f2\", \"8a0938fe-5c32-428e-a07a-a327cee6ba9d\": \"8a0938fe-5c32-428e-a07a-a327cee6ba9d\", \"c7d5aa44-17ed-49cf-87df-ac9d21424f00\": \"c7d5aa44-17ed-49cf-87df-ac9d21424f00\", \"48dbadb9-f968-456f-abd6-c6dbfcc05af2\": \"48dbadb9-f968-456f-abd6-c6dbfcc05af2\", \"2a5d0268-38a3-4ae6-bd4a-a0b7d618a8b7\": \"2a5d0268-38a3-4ae6-bd4a-a0b7d618a8b7\", \"b256ea12-a69a-4aa7-9d82-8ad303f3def5\": \"b256ea12-a69a-4aa7-9d82-8ad303f3def5\", \"cc95ab32-410b-4556-a1f9-736e34e6224a\": \"cc95ab32-410b-4556-a1f9-736e34e6224a\", \"52bcb311-a69f-4c04-8cb7-b228d5967004\": \"52bcb311-a69f-4c04-8cb7-b228d5967004\", \"df0fdc8f-c6aa-4f17-a07a-4ea94ee15f5f\": \"df0fdc8f-c6aa-4f17-a07a-4ea94ee15f5f\", \"d10de204-884e-4e17-bbb1-878d487ae62b\": \"d10de204-884e-4e17-bbb1-878d487ae62b\", \"ff107060-ec0c-4609-b181-e3a3b57910b8\": \"ff107060-ec0c-4609-b181-e3a3b57910b8\", \"41a3320c-fc40-4e81-bef2-88c34b84b83a\": \"41a3320c-fc40-4e81-bef2-88c34b84b83a\", \"100de5f1-2721-44fb-b0fe-ecd640a54aca\": \"100de5f1-2721-44fb-b0fe-ecd640a54aca\", \"69fed107-290b-43a9-981c-043aab1d04ec\": \"69fed107-290b-43a9-981c-043aab1d04ec\", \"58d09af6-0af0-4e8f-bd66-b55d7b90082f\": \"58d09af6-0af0-4e8f-bd66-b55d7b90082f\", \"9e42dc7d-25c1-43b7-bf7e-1fdee1019e1a\": \"9e42dc7d-25c1-43b7-bf7e-1fdee1019e1a\", \"81d298c7-edda-410b-bb03-8802a18c5b86\": \"81d298c7-edda-410b-bb03-8802a18c5b86\", \"6e826612-dc68-4a98-9558-f8361108f178\": \"6e826612-dc68-4a98-9558-f8361108f178\", \"7ffbc337-6667-4c61-95a3-5b797123e5d2\": \"7ffbc337-6667-4c61-95a3-5b797123e5d2\", \"8b06e867-10ed-4cd8-ba0c-a94b88d73bfb\": \"8b06e867-10ed-4cd8-ba0c-a94b88d73bfb\", \"5461bd27-e097-49dc-b58e-530fe60a89e0\": \"5461bd27-e097-49dc-b58e-530fe60a89e0\", \"a6d784e3-cf5d-4b6a-ba9c-ec9c3a6cad30\": \"a6d784e3-cf5d-4b6a-ba9c-ec9c3a6cad30\", \"5b0a4e27-8f6b-40c6-9ad9-b7f09d653460\": \"5b0a4e27-8f6b-40c6-9ad9-b7f09d653460\", \"bff67aa8-cc12-4bc4-980e-bb45daed6560\": \"bff67aa8-cc12-4bc4-980e-bb45daed6560\", \"000d2343-ddea-45c1-a806-73a83c9f92f7\": \"000d2343-ddea-45c1-a806-73a83c9f92f7\", \"04acb165-b69a-4940-ba82-9affe6fcbcb7\": \"04acb165-b69a-4940-ba82-9affe6fcbcb7\", \"0b47d871-4134-404b-9b70-2ff8f888a887\": \"0b47d871-4134-404b-9b70-2ff8f888a887\", \"189d8c40-7d88-429f-89b1-d02354f18d8b\": \"189d8c40-7d88-429f-89b1-d02354f18d8b\", \"94a5d3bd-7f93-404e-a21d-3b34af89c622\": \"94a5d3bd-7f93-404e-a21d-3b34af89c622\", \"2ee8190e-003e-47e2-b3f9-11aa768559ce\": \"2ee8190e-003e-47e2-b3f9-11aa768559ce\", \"48a819d4-e040-4801-a003-6bd8077b43ad\": \"48a819d4-e040-4801-a003-6bd8077b43ad\", \"d561a2d0-8d62-43d4-aa05-3b9c9401d490\": \"d561a2d0-8d62-43d4-aa05-3b9c9401d490\", \"8ba8d825-e588-4909-865d-6e53f79d9fab\": \"8ba8d825-e588-4909-865d-6e53f79d9fab\", \"0af23475-0461-48f6-9845-9634d2fd30bf\": \"0af23475-0461-48f6-9845-9634d2fd30bf\", \"ca52491b-16c3-4c72-9699-37b1472443eb\": \"ca52491b-16c3-4c72-9699-37b1472443eb\", \"9a09946c-02f3-4f3d-b1f3-64f9e59d142c\": \"9a09946c-02f3-4f3d-b1f3-64f9e59d142c\", \"a702db54-ac43-452a-afc9-d2225d0e26ce\": \"a702db54-ac43-452a-afc9-d2225d0e26ce\", \"15f333b0-1ea8-4fc4-8554-09cd1587e42a\": \"15f333b0-1ea8-4fc4-8554-09cd1587e42a\", \"e6b8a52f-d0ac-42f0-8848-a499c359b5f5\": \"e6b8a52f-d0ac-42f0-8848-a499c359b5f5\", \"00dfc6ed-879f-4dfc-b3b9-2426d514a8d2\": \"00dfc6ed-879f-4dfc-b3b9-2426d514a8d2\", \"f7ee6304-f742-4583-93df-7d650bd51ec2\": \"f7ee6304-f742-4583-93df-7d650bd51ec2\", \"b21e8060-a593-444b-8b26-d9b49188ca63\": \"b21e8060-a593-444b-8b26-d9b49188ca63\", \"d538a328-8f77-4921-9b28-de7abe7405a2\": \"d538a328-8f77-4921-9b28-de7abe7405a2\", \"f362deff-e7a2-428f-9e0d-01d769ae16e1\": \"f362deff-e7a2-428f-9e0d-01d769ae16e1\", \"bc3b6df4-3140-4824-829f-cd3e614a5a11\": \"bc3b6df4-3140-4824-829f-cd3e614a5a11\", \"b93f1306-641d-4d02-b329-7ce6056646f4\": \"b93f1306-641d-4d02-b329-7ce6056646f4\", \"9a551eb1-05a3-4def-9ceb-5f137869efab\": \"9a551eb1-05a3-4def-9ceb-5f137869efab\", \"76cd02b4-85bc-4d6e-8e61-1446722e8649\": \"76cd02b4-85bc-4d6e-8e61-1446722e8649\", \"11a0886f-1d48-4f81-97ac-76d850ffc7b0\": \"11a0886f-1d48-4f81-97ac-76d850ffc7b0\", \"0e769f56-20ce-4b8e-8b1a-0f267b9fc806\": \"0e769f56-20ce-4b8e-8b1a-0f267b9fc806\", \"f90f8e75-8e99-42e9-b0c4-965389a7f205\": \"f90f8e75-8e99-42e9-b0c4-965389a7f205\", \"7450a420-5b0a-4fc8-9db8-1a900295f084\": \"7450a420-5b0a-4fc8-9db8-1a900295f084\", \"51b44fdd-9994-489d-9a44-50f2ff305d29\": \"51b44fdd-9994-489d-9a44-50f2ff305d29\", \"f8cf5a7b-6e73-47d3-8e68-497201fd7496\": \"f8cf5a7b-6e73-47d3-8e68-497201fd7496\", \"1dadcc93-645d-4e92-8587-73595a882f06\": \"1dadcc93-645d-4e92-8587-73595a882f06\", \"8e80dfd3-5774-4a52-ae0e-14c068b91047\": \"8e80dfd3-5774-4a52-ae0e-14c068b91047\", \"75851282-14cc-4073-962a-7a29cad95540\": \"75851282-14cc-4073-962a-7a29cad95540\", \"85b2d6d5-6af5-4971-b228-94c2d73fc2b7\": \"85b2d6d5-6af5-4971-b228-94c2d73fc2b7\", \"386d6c1a-4414-45b9-9102-58ecb7671cf5\": \"386d6c1a-4414-45b9-9102-58ecb7671cf5\", \"50256f45-02a3-4bd1-87fc-b1e841fb7750\": \"50256f45-02a3-4bd1-87fc-b1e841fb7750\", \"b42fffd1-bfd6-4e9b-9f9f-6a8b92ed3e92\": \"b42fffd1-bfd6-4e9b-9f9f-6a8b92ed3e92\", \"aba0ee12-426e-40fd-93a9-4d1a0996d9ab\": \"aba0ee12-426e-40fd-93a9-4d1a0996d9ab\", \"4ec8b241-2e4e-41e0-b30e-28b1b61d7778\": \"4ec8b241-2e4e-41e0-b30e-28b1b61d7778\", \"bcba2bc2-e4d1-44b3-8c8d-1489c2e93b90\": \"bcba2bc2-e4d1-44b3-8c8d-1489c2e93b90\"}, \"doc_id_dict\": {}, \"embeddings_dict\": {}}"}}}


--------------------------------------------------------------------------------
/chroma+langchain/.env.example:
--------------------------------------------------------------------------------
1 | COHERE_API_KEY=ABC
2 | 
3 | 


--------------------------------------------------------------------------------
/chroma+langchain/README.md:
--------------------------------------------------------------------------------
 1 | RAG PDF QnA with PDF previewer mode
 2 | 
 3 | Steps to generate Cohere API here 
 4 | - Head onto https://dashboard.cohere.com/api-keys
 5 | - Signup for free
 6 | - Create API key and place it in .env same as .env.example
 7 | 
 8 | To run the code
 9 | 
10 | ```
11 | pip install -r requirements.txt
12 | python app.py
13 | ```
14 | 


--------------------------------------------------------------------------------
/chroma+langchain/app.py:
--------------------------------------------------------------------------------
  1 | import gradio as gr
  2 | import fitz
  3 | from PIL import Image
  4 | from langchain.document_loaders import PyPDFLoader
  5 | from langchain.text_splitter import RecursiveCharacterTextSplitter
  6 | from langchain.vectorstores import Chroma
  7 | from langchain.llms import Cohere
  8 | from langchain.embeddings import CohereEmbeddings
  9 | from langchain.chains import ConversationalRetrievalChain
 10 | from dotenv import load_dotenv
 11 | import os
 12 | load_dotenv()
 13 | COHERE_API_KEY = os.getenv('COHERE_API_KEY')
 14 | 
 15 | # Global variables
 16 | count = 0
 17 | n = 0
 18 | chat_history = []
 19 | chain = ''
 20 | 
 21 | def pdf_reader(pdf_doc):
 22 |     
 23 |     loader = PyPDFLoader(pdf_doc.name)
 24 |     documents = loader.load()
 25 |     text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
 26 |     texts = text_splitter.split_documents(documents)
 27 |     embeddings = CohereEmbeddings(cohere_api_key=COHERE_API_KEY)
 28 |     db = Chroma.from_documents(texts, embeddings)
 29 |     retriever = db.as_retriever()
 30 |     chain = ConversationalRetrievalChain.from_llm(Cohere(), retriever=retriever, return_source_documents=True)
 31 |     return chain
 32 | 
 33 | # Gradio application setup
 34 | def create_demo():
 35 |     with gr.Blocks(title= " Resume PDF Chatbot",
 36 |         theme = "Soft"  # Change the theme here
 37 |         ) as demo:
 38 |         
 39 |         # Create a Gradio block
 40 |         with gr.Column():
 41 |             with gr.Row():
 42 |                 chatbot = gr.Chatbot(value=[], elem_id='chatbot', height=680)
 43 |                 show_img = gr.Image(label='PDF Preview', tool='select', height=680)
 44 | 
 45 |         with gr.Row():
 46 |             with gr.Column(scale=0.60):
 47 |                 text_input = gr.Textbox(
 48 |                     show_label=False,
 49 |                     placeholder="Ask your pdf?",
 50 |                 container=False)
 51 | 
 52 |             with gr.Column(scale=0.20):
 53 |                 submit_btn = gr.Button('Send')
 54 | 
 55 |             with gr.Column(scale=0.20):
 56 |                 upload_btn = gr.UploadButton("📁 Upload PDF", file_types=[".pdf"])                
 57 | 
 58 |         return demo, chatbot, show_img, text_input, submit_btn, upload_btn
 59 | 
 60 | # Function to add text to the chat history
 61 | def add_text(history, text):
 62 |     """
 63 |     Adds the user's input text to the chat history.
 64 | 
 65 |     Args:
 66 |         history (list): List of tuples representing the chat history.
 67 |         text (str): The user's input text.
 68 | 
 69 |     Returns:
 70 |         list: Updated chat history with the new user input.
 71 |     """
 72 |     if not text:
 73 |         raise gr.Error('Enter text')
 74 |     history.append((text, ''))
 75 |     return history
 76 | 
 77 | # Function to generate a response based on the chat history and query
 78 | def generate_response(history, query, btn):
 79 |     """
 80 |     Generates a response based on the chat history and user's query.
 81 | 
 82 |     Args:
 83 |         history (list): List of tuples representing the chat history.
 84 |         query (str): The user's query.
 85 |         btn (FileStorage): The uploaded PDF file.
 86 | 
 87 |     Returns:
 88 |         tuple: Updated chat history with the generated response and the next page number.
 89 |     """
 90 |     global count, n, chat_history, chain
 91 | 
 92 |     if not btn:
 93 |         raise gr.Error(message='Upload a PDF')
 94 |     if count == 0:
 95 |         chain = pdf_reader(btn)
 96 |         count += 1
 97 | 
 98 |     result = chain({"question": query, 'chat_history': chat_history}, return_only_outputs=True)
 99 |     chat_history.append((query, result["answer"]))
100 |     n = list(result['source_documents'][0])[1][1]['page']
101 | 
102 |     for char in result['answer']:
103 |         history[-1][-1] += char
104 |     return history, " "
105 | 
106 |     
107 | def render_file(file):
108 |     """
109 |     Renders a specific page of a PDF file as an image.
110 | 
111 |     Args:
112 |         file (FileStorage): The PDF file.
113 | 
114 |     Returns:
115 |         PIL.Image.Image: The rendered page as an image.
116 |     """
117 |     global n
118 |     doc = fitz.open(file.name)
119 |     page = doc[n]
120 |     # Render the page as a PNG image with a resolution of 300 DPI
121 |     pix = page.get_pixmap(matrix=fitz.Matrix(300 / 72, 300 / 72))
122 |     image = Image.frombytes('RGB', [pix.width, pix.height], pix.samples)
123 |     return image
124 | 
125 | if __name__ == '__main__':
126 |     demo, chatbot, show_img, txt, submit_btn, btn = create_demo()
127 |     # Set up event handlers
128 |     with demo:
129 |         # Event handler for uploading a PDF
130 |         btn.upload(render_file, inputs=[btn], outputs=[show_img])
131 | 
132 |         # Event handler for submitting text and generating response
133 |         submit_btn.click(add_text, inputs=[chatbot, txt], outputs=[chatbot], queue=False).\
134 |             success(generate_response, inputs=[chatbot, txt, btn], outputs=[chatbot,txt]).\
135 |             success(render_file, inputs=[btn], outputs=[show_img])
136 |     demo.launch()
137 | 


--------------------------------------------------------------------------------
/chroma+langchain/requirements.txt:
--------------------------------------------------------------------------------
 1 | langchain==0.0.281
 2 | pinecone-client==2.2.2
 3 | cohere==4.22
 4 | pypdf==3.15.5
 5 | python-dotenv==1.0.0
 6 | chromadb==0.4.9
 7 | python-docx==0.8.11
 8 | transformers==4.33.1
 9 | gradio==3.44.4
10 | Pillow==10.0.0
11 | PyMuPDF==1.22.5
12 | 


--------------------------------------------------------------------------------
/comet-ml-opik/README.md:
--------------------------------------------------------------------------------
1 | https://github.com/comet-ml/opik
2 | 


--------------------------------------------------------------------------------
/google/requirements.txt:
--------------------------------------------------------------------------------
1 | google-generativeai==0.3.2
2 | Pillow==10.1.0
3 | 


--------------------------------------------------------------------------------
/groq/.env.example:
--------------------------------------------------------------------------------
1 | GROQ_API_KEY=ABC


--------------------------------------------------------------------------------
/groq/README.md:
--------------------------------------------------------------------------------
1 | Groq LPU speed test with Chainlit
2 | 
3 | Generate GROQ_API_KEY from https://console.groq.com/keys
4 | 
5 | To run the application
6 | 
7 | ```
8 | chainlit run app.py -w
9 | ```


--------------------------------------------------------------------------------
/groq/app.py:
--------------------------------------------------------------------------------
 1 | import chainlit as cl
 2 | import os
 3 | from groq import Groq
 4 | from dotenv import load_dotenv
 5 | load_dotenv()
 6 | 
 7 | groq_api_key = os.getenv("GROQ_API_KEY")
 8 | 
 9 | client = Groq(
10 |     api_key=groq_api_key,
11 | )
12 | 
13 | @cl.on_message
14 | async def main(message: cl.Message):
15 |     # Your custom logic goes here...
16 |     chat_completion = client.chat.completions.create(
17 |         messages=[
18 |             {
19 |                 "role": "user",
20 |                 "content": message.content,
21 |             }
22 |         ],
23 |         model="mixtral-8x7b-32768",
24 |     )
25 | 
26 |     # Send a response back to the user
27 |     await cl.Message(
28 |         content=f"{chat_completion.choices[0].message.content}",
29 |     ).send()
30 | 
31 | 


--------------------------------------------------------------------------------
/groq/groq-test.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": null,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "%pip install groq, chainlit, python-dotenv"
 10 |    ]
 11 |   },
 12 |   {
 13 |    "cell_type": "code",
 14 |    "execution_count": 1,
 15 |    "metadata": {},
 16 |    "outputs": [
 17 |     {
 18 |      "name": "stdout",
 19 |      "output_type": "stream",
 20 |      "text": [
 21 |       "Low latency Large Language Models (LLMs) are important in certain applications due to their ability to process and respond to inputs quickly. Latency refers to the time delay between a user's request and the system's response. In some real-time or time-sensitive applications, low latency is crucial for providing a good user experience and ensuring that the system can respond to changing conditions in a timely manner.\n",
 22 |       "\n",
 23 |       "For example, in conversational agents or chatbots, low latency is important for maintaining the illusion of a real-time conversation. If there is a significant delay between the user's input and the agent's response, it can disrupt the flow of the conversation and make it feel less natural. Similarly, in applications such as online gaming or financial trading, low latency is critical for enabling users to make decisions and take actions quickly based on real-time data.\n",
 24 |       "\n",
 25 |       "Moreover, low latency LLMs can help reduce the computational cost of running large language models. By reducing the amount of time that the model spends processing each input, it is possible to serve more requests within a given amount of time, or to use fewer resources to serve the same number of requests. This can make large language models more practical and cost-effective to deploy in real-world applications.\n",
 26 |       "\n",
 27 |       "Overall, low latency LLMs are important for enabling real-time or time-sensitive applications to provide a good user experience, make decisions quickly, and reduce computational cost.\n"
 28 |      ]
 29 |     }
 30 |    ],
 31 |    "source": [
 32 |     "# testing groq with mistral\n",
 33 |     "\n",
 34 |     "import os\n",
 35 |     "from groq import Groq\n",
 36 |     "from dotenv import load_dotenv\n",
 37 |     "load_dotenv()\n",
 38 |     "\n",
 39 |     "groq_api_key = os.getenv(\"GROQ_API_KEY\")\n",
 40 |     "\n",
 41 |     "client = Groq(\n",
 42 |     "    api_key=groq_api_key,\n",
 43 |     ")\n",
 44 |     "\n",
 45 |     "chat_completion = client.chat.completions.create(\n",
 46 |     "    messages=[\n",
 47 |     "        {\n",
 48 |     "            \"role\": \"user\",\n",
 49 |     "            \"content\": \"Explain the importance of low latency LLMs\",\n",
 50 |     "        }\n",
 51 |     "    ],\n",
 52 |     "    model=\"mixtral-8x7b-32768\",\n",
 53 |     ")\n",
 54 |     "\n",
 55 |     "print(chat_completion.choices[0].message.content)"
 56 |    ]
 57 |   },
 58 |   {
 59 |    "cell_type": "code",
 60 |    "execution_count": 3,
 61 |    "metadata": {},
 62 |    "outputs": [
 63 |     {
 64 |      "name": "stdout",
 65 |      "output_type": "stream",
 66 |      "text": [
 67 |       "Prompt injections are a type of attack where an attacker injects malicious code into a program's input prompt, allowing them to execute arbitrary commands on the user's system. Here are some ways to prevent prompt injections:\n",
 68 |       "\n",
 69 |       "1. Use a secure prompt mechanism: Use a secure prompt mechanism, such as a GUI-based prompt or a secure command-line prompt, to prevent attackers from injecting malicious code into the prompt.\n",
 70 |       "2. Validate user input: Validate user input to ensure that it does not contain malicious code. Use techniques such as input filtering, input validation, and output encoding to prevent malicious input from being executed.\n",
 71 |       "3. Use a whitelist: Use a whitelist of allowed commands or inputs to prevent malicious commands from being executed.\n",
 72 |       "4. Use a sandbox: Run the program in a sandbox environment, which restricts the program's access to system resources and prevents it from executing malicious commands.\n",
 73 |       "5. Use a secure coding practice: Use secure coding practices, such as using parameterized queries instead of concatenating user input into SQL queries, to prevent SQL injection attacks.\n",
 74 |       "6. Use a web application firewall: Use a web application firewall (WAF) to protect web applications from prompt injection attacks.\n",
 75 |       "7. Keep software up to date: Keep software up to date with the latest security patches and updates to prevent known vulnerabilities from being exploited.\n",
 76 |       "8. Use a secure communication protocol: Use a secure communication protocol, such as HTTPS, to protect against eavesdropping and man-in-the-middle attacks.\n",
 77 |       "9. Use a secure authentication mechanism: Use a secure authentication mechanism, such as two-factor authentication, to prevent unauthorized access to the system.\n",
 78 |       "10. Monitor for suspicious activity: Monitor for suspicious activity, such as unexpected user input or attempts to execute unauthorized commands, and respond appropriately to prevent a security breach.\n",
 79 |       "11. Use a security information and event management (SIEM) system: Use a SIEM system to monitor for security-related events and respond to security incidents in real-time.\n",
 80 |       "12. Use a vulnerability scanner: Use a vulnerability scanner to identify and remediate vulnerabilities in the system that could be exploited by attackers.\n",
 81 |       "13. Use a penetration testing tool: Use a penetration testing tool to simulate attacks on the system and identify vulnerabilities that could be exploited by attackers.\n",
 82 |       "14. Use a security orchestration, automation, and response (SOAR) tool: Use a SOAR tool to automate security incident response and remediation.\n",
 83 |       "15. Educate users: Educate users on the dangers of prompt injections and how to prevent them.\n",
 84 |       "\n",
 85 |       "It's important to note that prompt injections can be prevented by implementing a combination of these measures, rather than relying on a single solution.\n"
 86 |      ]
 87 |     }
 88 |    ],
 89 |    "source": [
 90 |     "# testing groq with llama2\n",
 91 |     "import os\n",
 92 |     "from groq import Groq\n",
 93 |     "from dotenv import load_dotenv\n",
 94 |     "load_dotenv()\n",
 95 |     "\n",
 96 |     "groq_api_key = os.getenv(\"GROQ_API_KEY\")\n",
 97 |     "\n",
 98 |     "client = Groq(\n",
 99 |     "    api_key=groq_api_key,\n",
100 |     ")\n",
101 |     "\n",
102 |     "chat_completion = client.chat.completions.create(\n",
103 |     "    messages=[\n",
104 |     "        {\n",
105 |     "            \"role\": \"user\",\n",
106 |     "            \"content\": \"Suggest me ways to prevent prompt injections\",\n",
107 |     "        }\n",
108 |     "    ],\n",
109 |     "    model=\"llama2-70b-4096\",\n",
110 |     ")\n",
111 |     "\n",
112 |     "print(chat_completion.choices[0].message.content)\n"
113 |    ]
114 |   },
115 |   {
116 |    "cell_type": "code",
117 |    "execution_count": 4,
118 |    "metadata": {},
119 |    "outputs": [
120 |     {
121 |      "name": "stdout",
122 |      "output_type": "stream",
123 |      "text": [
124 |       "Albert Einstein was a German-born physicist who revolutionized the field of physics with his theories on relativity. Born in 1879, Einstein was a sickly child who struggled with learning disabilities. Despite his challenges, he went on to become one of the greatest minds in history.\n",
125 |       "\n",
126 |       "Einstein's most significant contributions to physics include:\n",
127 |       "\n",
128 |       "* **Theory of Relativity:** In 1915, Einstein presented his theory of general relativity, which describes gravity as a curvature of spacetime. This theory revolutionized our understanding of the universe and has been widely accepted as one of the most accurate theories in physics.\n",
129 |       "* **Theory of Special Relativity:** In 1905, Einstein published his theory of special relativity, which describes the relationship between space and time. This theory showed that space and time are not absolute and that they can be warped by the presence of mass and energy.\n",
130 |       "* **Photoelectric Effect:** Einstein's explanation of the photoelectric effect in 1905 provided evidence for the quantum nature of light. This discovery was a major breakthrough in physics and helped to pave the way for the development of quantum theory.\n",
131 |       "\n",
132 |       "Einstein's theories and contributions to physics have had a profound impact on our understanding of the universe. He is considered one of the greatest physicists of all time and his work continues to inspire scientists and thinkers around the world.\n"
133 |      ]
134 |     }
135 |    ],
136 |    "source": [
137 |     "# testing groq with gemma\n",
138 |     "\n",
139 |     "import os\n",
140 |     "from groq import Groq\n",
141 |     "from dotenv import load_dotenv\n",
142 |     "load_dotenv()\n",
143 |     "\n",
144 |     "groq_api_key = os.getenv(\"GROQ_API_KEY\")\n",
145 |     "\n",
146 |     "client = Groq(\n",
147 |     "    api_key=groq_api_key,\n",
148 |     ")\n",
149 |     "\n",
150 |     "chat_completion = client.chat.completions.create(\n",
151 |     "    messages=[\n",
152 |     "        {\n",
153 |     "            \"role\": \"user\",\n",
154 |     "            \"content\": \"Who is ALbert Einstein?\",\n",
155 |     "        }\n",
156 |     "    ],\n",
157 |     "    model=\"gemma-7b-it\",\n",
158 |     ")\n",
159 |     "\n",
160 |     "print(chat_completion.choices[0].message.content)"
161 |    ]
162 |   }
163 |  ],
164 |  "metadata": {
165 |   "kernelspec": {
166 |    "display_name": ".venv",
167 |    "language": "python",
168 |    "name": "python3"
169 |   },
170 |   "language_info": {
171 |    "codemirror_mode": {
172 |     "name": "ipython",
173 |     "version": 3
174 |    },
175 |    "file_extension": ".py",
176 |    "mimetype": "text/x-python",
177 |    "name": "python",
178 |    "nbconvert_exporter": "python",
179 |    "pygments_lexer": "ipython3",
180 |    "version": "3.12.1"
181 |   }
182 |  },
183 |  "nbformat": 4,
184 |  "nbformat_minor": 2
185 | }
186 | 


--------------------------------------------------------------------------------
/knowledge-graph-rag/.env.example:
--------------------------------------------------------------------------------
1 | OPENAI_API_KEY=sk...
2 | 


--------------------------------------------------------------------------------
/knowledge-graph-rag/README.md:
--------------------------------------------------------------------------------
 1 | 
 2 | 
 3 | To run application:
 4 | ```
 5 | pip install -r requirements.txt
 6 | ```
 7 | 
 8 | To visualize graph execute ```example_hotels.html```
 9 | 
10 | ![image](https://github.com/jayita13/GenerativeAI/assets/64038928/877be22d-affe-4bf4-853e-55cafcb21c97)
11 | 
12 | 
13 | References:
14 | [Llamaindex Knowledge Graph Index](https://docs.llamaindex.ai/en/stable/examples/index_structs/knowledge_graph/KnowledgeGraphDemo/)
15 | 


--------------------------------------------------------------------------------
/knowledge-graph-rag/knowledge_graph.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 21,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "from llama_index.core import SimpleDirectoryReader, KnowledgeGraphIndex\n",
 10 |     "from llama_index.core.graph_stores import SimpleGraphStore\n",
 11 |     "from dotenv import load_dotenv\n",
 12 |     "load_dotenv()\n",
 13 |     "from llama_index.llms.openai import OpenAI\n",
 14 |     "from llama_index.core import Settings\n",
 15 |     "from IPython.display import Markdown, display\n",
 16 |     "from llama_index.core import StorageContext, load_index_from_storage"
 17 |    ]
 18 |   },
 19 |   {
 20 |    "cell_type": "code",
 21 |    "execution_count": 7,
 22 |    "metadata": {},
 23 |    "outputs": [],
 24 |    "source": [
 25 |     "documents = SimpleDirectoryReader(\n",
 26 |     "     input_files=[\"./data/hotels_50_entries_countries.csv\"], encoding=\"latin-1\"\n",
 27 |     ").load_data()"
 28 |    ]
 29 |   },
 30 |   {
 31 |    "cell_type": "code",
 32 |    "execution_count": 8,
 33 |    "metadata": {},
 34 |    "outputs": [],
 35 |    "source": [
 36 |     "# define LLM\n",
 37 |     "\n",
 38 |     "llm = OpenAI(temperature=0, model=\"gpt-3.5-turbo-0125\")\n",
 39 |     "Settings.llm = llm\n",
 40 |     "Settings.chunk_size = 512"
 41 |    ]
 42 |   },
 43 |   {
 44 |    "cell_type": "code",
 45 |    "execution_count": 9,
 46 |    "metadata": {},
 47 |    "outputs": [],
 48 |    "source": [
 49 |     "graph_store = SimpleGraphStore()\n",
 50 |     "storage_context = StorageContext.from_defaults(graph_store=graph_store)\n",
 51 |     "\n",
 52 |     "# NOTE: can take a while!\n",
 53 |     "index = KnowledgeGraphIndex.from_documents(\n",
 54 |     "    documents,\n",
 55 |     "    max_triplets_per_chunk=2,\n",
 56 |     "    storage_context=storage_context,\n",
 57 |     ")"
 58 |    ]
 59 |   },
 60 |   {
 61 |    "cell_type": "code",
 62 |    "execution_count": 22,
 63 |    "metadata": {},
 64 |    "outputs": [],
 65 |    "source": [
 66 |     "# index.storage_context.persist(persist_dir=\"./storage\")\n",
 67 |     "index = load_index_from_storage(storage_context)"
 68 |    ]
 69 |   },
 70 |   {
 71 |    "cell_type": "code",
 72 |    "execution_count": 23,
 73 |    "metadata": {},
 74 |    "outputs": [],
 75 |    "source": [
 76 |     "query_engine = index.as_query_engine(\n",
 77 |     "    include_text=False, response_mode=\"tree_summarize\"\n",
 78 |     ")\n",
 79 |     "response = query_engine.query(\n",
 80 |     "    \"What are some hotels in Sarande, Albania\",\n",
 81 |     ")"
 82 |    ]
 83 |   },
 84 |   {
 85 |    "cell_type": "code",
 86 |    "execution_count": 24,
 87 |    "metadata": {},
 88 |    "outputs": [
 89 |     {
 90 |      "data": {
 91 |       "text/markdown": [
 92 |        "There are several hotels in Sarande, Albania, such as Hotel Butrinti, Hotel Bahamas, Hotel Kaonia, and Hotel Saranda International."
 93 |       ],
 94 |       "text/plain": [
 95 |        "<IPython.core.display.Markdown object>"
 96 |       ]
 97 |      },
 98 |      "metadata": {},
 99 |      "output_type": "display_data"
100 |     }
101 |    ],
102 |    "source": [
103 |     "display(Markdown(f\"{response}\"))"
104 |    ]
105 |   },
106 |   {
107 |    "cell_type": "code",
108 |    "execution_count": 25,
109 |    "metadata": {},
110 |    "outputs": [],
111 |    "source": [
112 |     "query_engine = index.as_query_engine(\n",
113 |     "    include_text=True, response_mode=\"tree_summarize\"\n",
114 |     ")\n",
115 |     "response = query_engine.query(\n",
116 |     "    \"Facilities available in Las Tholas Hotel, Uyuni, Bolivia\",\n",
117 |     ")"
118 |    ]
119 |   },
120 |   {
121 |    "cell_type": "code",
122 |    "execution_count": 26,
123 |    "metadata": {},
124 |    "outputs": [
125 |     {
126 |      "data": {
127 |       "text/markdown": [
128 |        "The facilities available at Las Tholas Hotel in Uyuni, Bolivia include a 24-hour front desk, laundry facilities, free WiFi throughout the property, a tour desk, and a shared kitchen. Additionally, the hotel offers a terrace, designated smoking areas, and lockers for guests."
129 |       ],
130 |       "text/plain": [
131 |        "<IPython.core.display.Markdown object>"
132 |       ]
133 |      },
134 |      "metadata": {},
135 |      "output_type": "display_data"
136 |     }
137 |    ],
138 |    "source": [
139 |     "display(Markdown(f\"{response}\"))"
140 |    ]
141 |   },
142 |   {
143 |    "cell_type": "code",
144 |    "execution_count": 16,
145 |    "metadata": {},
146 |    "outputs": [],
147 |    "source": [
148 |     "# NOTE: can take a while!\n",
149 |     "# new_index = KnowledgeGraphIndex.from_documents(\n",
150 |     "#     documents,\n",
151 |     "#     max_triplets_per_chunk=2,\n",
152 |     "#     include_embeddings=True,\n",
153 |     "# )"
154 |    ]
155 |   },
156 |   {
157 |    "cell_type": "code",
158 |    "execution_count": 27,
159 |    "metadata": {},
160 |    "outputs": [
161 |     {
162 |      "data": {
163 |       "text/html": [
164 |        "\n",
165 |        "        <iframe\n",
166 |        "            width=\"900\"\n",
167 |        "            height=\"600\"\n",
168 |        "            src=\"example_hotels.html\"\n",
169 |        "            frameborder=\"0\"\n",
170 |        "            allowfullscreen\n",
171 |        "            \n",
172 |        "        ></iframe>\n",
173 |        "        "
174 |       ],
175 |       "text/plain": [
176 |        "<IPython.lib.display.IFrame at 0x2297f1deb00>"
177 |       ]
178 |      },
179 |      "execution_count": 27,
180 |      "metadata": {},
181 |      "output_type": "execute_result"
182 |     }
183 |    ],
184 |    "source": [
185 |     "from pyvis.network import Network\n",
186 |     "\n",
187 |     "# Get the networkx graph\n",
188 |     "g = index.get_networkx_graph()\n",
189 |     "\n",
190 |     "# Create the Pyvis network\n",
191 |     "net = Network(notebook=True, cdn_resources=\"in_line\", directed=True)\n",
192 |     "\n",
193 |     "# Load the networkx graph into the Pyvis network\n",
194 |     "net.from_nx(g)\n",
195 |     "\n",
196 |     "# Generate HTML content\n",
197 |     "html_content = net.generate_html()\n",
198 |     "\n",
199 |     "# Write the HTML content to a file with UTF-8 encoding\n",
200 |     "with open(\"example_hotels.html\", \"w\", encoding=\"utf-8\") as f:\n",
201 |     "    f.write(html_content)\n",
202 |     "\n",
203 |     "# Display the generated HTML file in the notebook\n",
204 |     "from IPython.display import IFrame\n",
205 |     "IFrame(\"example_hotels.html\", width=900, height=600)\n"
206 |    ]
207 |   }
208 |  ],
209 |  "metadata": {
210 |   "kernelspec": {
211 |    "display_name": "venv",
212 |    "language": "python",
213 |    "name": "python3"
214 |   },
215 |   "language_info": {
216 |    "codemirror_mode": {
217 |     "name": "ipython",
218 |     "version": 3
219 |    },
220 |    "file_extension": ".py",
221 |    "mimetype": "text/x-python",
222 |    "name": "python",
223 |    "nbconvert_exporter": "python",
224 |    "pygments_lexer": "ipython3",
225 |    "version": "3.10.4"
226 |   }
227 |  },
228 |  "nbformat": 4,
229 |  "nbformat_minor": 2
230 | }
231 | 


--------------------------------------------------------------------------------
/knowledge-graph-rag/requirements.txt:
--------------------------------------------------------------------------------
1 | llama-index==0.10.44
2 | llama-index-vector-stores-neo4jvector==0.1.4
3 | python-dotenv==1.0.1
4 | pyvis==0.3.2
5 | 


--------------------------------------------------------------------------------
/knowledge-graph-rag/storage/default__vector_store.json:
--------------------------------------------------------------------------------
1 | {"embedding_dict": {}, "text_id_to_ref_doc_id": {}, "metadata_dict": {}}


--------------------------------------------------------------------------------
/knowledge-graph-rag/storage/image__vector_store.json:
--------------------------------------------------------------------------------
1 | {"embedding_dict": {}, "text_id_to_ref_doc_id": {}, "metadata_dict": {}}


--------------------------------------------------------------------------------
/langfuse/.env.example:
--------------------------------------------------------------------------------
1 | GROQ_API_KEY=XYZ
2 | LANGFUSE_PUBLIC_KEY=XYZ
3 | LANGFUSE_SECRET_KEY=XYZ
4 | LANGFUSE_HOST=https://cloud.langfuse.com
5 | 


--------------------------------------------------------------------------------
/langfuse/README.md:
--------------------------------------------------------------------------------
 1 | Open Source LLM Engineering Platform
 2 | Traces, evals, prompt management and metrics to debug and improve LLM application.
 3 | 
 4 | [Official Page](https://langfuse.com/)
 5 | 
 6 | To run application 
 7 | ```
 8 |   pip install -r requirements.txt
 9 |   chainlit run app.py
10 | ```
11 | 
12 | Dashboard head onto ```cloud.langfuse.com```
13 | 
14 | ![image](https://github.com/jayita13/GenerativeAI/assets/64038928/64e0c750-0fc0-42f1-8156-df8f71ae50f1)
15 | ![image](https://github.com/jayita13/GenerativeAI/assets/64038928/40872361-7a96-45e3-85d1-0682984ae4e7)
16 | ![image](https://github.com/jayita13/GenerativeAI/assets/64038928/e0bec746-e6d4-4ce3-81eb-2f6978d277bb)
17 | ![image](https://github.com/jayita13/GenerativeAI/assets/64038928/38b9d4d7-7aa3-4991-bdf9-d05fc28038d7)
18 | 
19 | Traces
20 | ![image](https://github.com/jayita13/GenerativeAI/assets/64038928/383655dc-ddf6-4151-9b71-3db5cbe706ef)
21 | 
22 | Session
23 | ![image](https://github.com/jayita13/GenerativeAI/assets/64038928/19ab8cf1-6983-4235-a57a-cfa650791c3c)
24 | 


--------------------------------------------------------------------------------
/langfuse/app.py:
--------------------------------------------------------------------------------
 1 | from llama_index.core import StorageContext, ServiceContext, load_index_from_storage
 2 | from llama_index.core.callbacks.base import CallbackManager
 3 | from llama_index.embeddings.huggingface import HuggingFaceEmbedding
 4 | from llama_index.llms.groq import Groq
 5 | import os
 6 | from dotenv import load_dotenv
 7 | load_dotenv()
 8 | import chainlit as cl
 9 | from llama_index.core import global_handler
10 | from llama_index.core.callbacks import CallbackManager
11 | from llama_index.core import set_global_handler
12 | 
13 | set_global_handler("langfuse")
14 | 
15 | global_handler.set_trace_params(
16 |   session_id="first-session-01"
17 | )
18 | 
19 | GROQ_API_KEY = os.getenv("GROQ_API_KEY")
20 | 
21 | @cl.on_chat_start
22 | async def factory():
23 |     storage_context = StorageContext.from_defaults(persist_dir="./storage_mini")
24 | 
25 |     embed_model = HuggingFaceEmbedding(model_name="sentence-transformers/all-MiniLM-L6-v2")
26 | 
27 |     llm = Groq(model="llama3-70b-8192", api_key=GROQ_API_KEY)
28 | 
29 |     service_context = ServiceContext.from_defaults(embed_model=embed_model, llm=llm,
30 |                         callback_manager=CallbackManager([cl.LlamaIndexCallbackHandler()]),
31 |     )
32 | 
33 |     index = load_index_from_storage(storage_context, service_context=service_context)
34 | 
35 |     chat_engine = index.as_chat_engine(service_context=service_context)
36 | 
37 |     cl.user_session.set("chat_engine", chat_engine)
38 | 
39 | @cl.on_message
40 | async def main(message: cl.Message):
41 |     chat_engine = cl.user_session.get("chat_engine")  
42 |     response = await cl.make_async(chat_engine.chat)(message.content)
43 | 
44 |     response_message = cl.Message(content="")
45 | 
46 |     for token in response.response:
47 |         await response_message.stream_token(token=token)
48 | 
49 |     await response_message.send()


--------------------------------------------------------------------------------
/langfuse/chainlit.md:
--------------------------------------------------------------------------------
 1 | # Welcome to Chainlit! 🚀🤖
 2 | 
 3 | Hi there, Developer! 👋 We're excited to have you on board. Chainlit is a powerful tool designed to help you prototype, debug and share applications built on top of LLMs.
 4 | 
 5 | ## Useful Links 🔗
 6 | 
 7 | - **Documentation:** Get started with our comprehensive [Chainlit Documentation](https://docs.chainlit.io) 📚
 8 | - **Discord Community:** Join our friendly [Chainlit Discord](https://discord.gg/k73SQ3FyUh) to ask questions, share your projects, and connect with other developers! 💬
 9 | 
10 | We can't wait to see what you create with Chainlit! Happy coding! 💻😊
11 | 
12 | ## Welcome screen
13 | 
14 | To modify the welcome screen, edit the `chainlit.md` file at the root of your project. If you do not want a welcome screen, just leave this file empty.
15 | 


--------------------------------------------------------------------------------
/langfuse/requirements.txt:
--------------------------------------------------------------------------------
1 | llama-index==0.10.18
2 | llama-index-llms-groq==0.1.3
3 | chainlit==1.0.401
4 | groq==0.4.2
5 | python-dotenv==1.0.1
6 | langfuse==2.33.0
7 | llama-index-callbacks-langfuse==0.1.4
8 | 


--------------------------------------------------------------------------------
/langfuse/storage_mini/graph_store.json:
--------------------------------------------------------------------------------
1 | {"graph_dict": {}}


--------------------------------------------------------------------------------
/langfuse/storage_mini/image__vector_store.json:
--------------------------------------------------------------------------------
1 | {"embedding_dict": {}, "text_id_to_ref_doc_id": {}, "metadata_dict": {}}


--------------------------------------------------------------------------------
/langfuse/storage_mini/index_store.json:
--------------------------------------------------------------------------------
1 | {"index_store/data": {"f42a6af7-0775-453f-9b3b-3a2ae2f23eab": {"__type__": "vector_store", "__data__": "{\"index_id\": \"f42a6af7-0775-453f-9b3b-3a2ae2f23eab\", \"summary\": null, \"nodes_dict\": {\"f719fbb6-e0ed-40ac-839a-c0d6a6505b4c\": \"f719fbb6-e0ed-40ac-839a-c0d6a6505b4c\", \"9c3dc423-ed76-48d2-b81c-0590b366d4ea\": \"9c3dc423-ed76-48d2-b81c-0590b366d4ea\", \"e8dc3a2b-45e9-4a4d-968f-df887afb4193\": \"e8dc3a2b-45e9-4a4d-968f-df887afb4193\", \"36830b4c-c762-45dc-bb77-2693fc244f1d\": \"36830b4c-c762-45dc-bb77-2693fc244f1d\", \"15b83726-755a-4f0c-b035-fe7b7be94d29\": \"15b83726-755a-4f0c-b035-fe7b7be94d29\", \"9c3c3031-2dfe-4b0c-ae9a-ec6fa4178520\": \"9c3c3031-2dfe-4b0c-ae9a-ec6fa4178520\", \"1e00f16c-0f83-4d24-9122-0e3c5dd43381\": \"1e00f16c-0f83-4d24-9122-0e3c5dd43381\", \"18302cce-5e0a-4808-8d63-8eab892c9136\": \"18302cce-5e0a-4808-8d63-8eab892c9136\", \"eb32dd9c-263c-4b5a-a3f1-2090aad558ce\": \"eb32dd9c-263c-4b5a-a3f1-2090aad558ce\", \"95fe93ab-e60b-471d-b80a-4dfa7d35d665\": \"95fe93ab-e60b-471d-b80a-4dfa7d35d665\", \"b3546102-2e25-4f07-a23b-a78da158ae34\": \"b3546102-2e25-4f07-a23b-a78da158ae34\", \"647ce2bb-cd94-42c8-b4c5-0f81509da4fb\": \"647ce2bb-cd94-42c8-b4c5-0f81509da4fb\", \"e8cff6e2-bd24-4282-b4e1-a22b95a53fd1\": \"e8cff6e2-bd24-4282-b4e1-a22b95a53fd1\", \"cd3b0c9a-43e9-482c-af84-493f6b5b98bb\": \"cd3b0c9a-43e9-482c-af84-493f6b5b98bb\", \"fd5d33a1-5db8-4f5f-991d-b54f13243cfe\": \"fd5d33a1-5db8-4f5f-991d-b54f13243cfe\", \"91bac189-20cf-4e89-bcd1-f9112e6b23f3\": \"91bac189-20cf-4e89-bcd1-f9112e6b23f3\", \"5b749bb1-b495-4f9b-9633-c5766aaff86f\": \"5b749bb1-b495-4f9b-9633-c5766aaff86f\", \"139a71be-fecd-4dfb-ad7a-96c5c73bbd13\": \"139a71be-fecd-4dfb-ad7a-96c5c73bbd13\", \"2812354c-1a5b-4d5a-9c70-26c5778bea37\": \"2812354c-1a5b-4d5a-9c70-26c5778bea37\", \"ac0adedf-2f5c-4dbb-8c37-cc3157afed4c\": \"ac0adedf-2f5c-4dbb-8c37-cc3157afed4c\", \"af88595e-fbc6-4b03-8bd1-f4f8e0eaacc3\": \"af88595e-fbc6-4b03-8bd1-f4f8e0eaacc3\", \"9467763a-ab87-450f-a2f6-2a3f29f4b3d0\": \"9467763a-ab87-450f-a2f6-2a3f29f4b3d0\", \"b8778ed1-69a6-486a-a6f9-6598ab89d0f2\": \"b8778ed1-69a6-486a-a6f9-6598ab89d0f2\", \"8a0938fe-5c32-428e-a07a-a327cee6ba9d\": \"8a0938fe-5c32-428e-a07a-a327cee6ba9d\", \"c7d5aa44-17ed-49cf-87df-ac9d21424f00\": \"c7d5aa44-17ed-49cf-87df-ac9d21424f00\", \"48dbadb9-f968-456f-abd6-c6dbfcc05af2\": \"48dbadb9-f968-456f-abd6-c6dbfcc05af2\", \"2a5d0268-38a3-4ae6-bd4a-a0b7d618a8b7\": \"2a5d0268-38a3-4ae6-bd4a-a0b7d618a8b7\", \"b256ea12-a69a-4aa7-9d82-8ad303f3def5\": \"b256ea12-a69a-4aa7-9d82-8ad303f3def5\", \"cc95ab32-410b-4556-a1f9-736e34e6224a\": \"cc95ab32-410b-4556-a1f9-736e34e6224a\", \"52bcb311-a69f-4c04-8cb7-b228d5967004\": \"52bcb311-a69f-4c04-8cb7-b228d5967004\", \"df0fdc8f-c6aa-4f17-a07a-4ea94ee15f5f\": \"df0fdc8f-c6aa-4f17-a07a-4ea94ee15f5f\", \"d10de204-884e-4e17-bbb1-878d487ae62b\": \"d10de204-884e-4e17-bbb1-878d487ae62b\", \"ff107060-ec0c-4609-b181-e3a3b57910b8\": \"ff107060-ec0c-4609-b181-e3a3b57910b8\", \"41a3320c-fc40-4e81-bef2-88c34b84b83a\": \"41a3320c-fc40-4e81-bef2-88c34b84b83a\", \"100de5f1-2721-44fb-b0fe-ecd640a54aca\": \"100de5f1-2721-44fb-b0fe-ecd640a54aca\", \"69fed107-290b-43a9-981c-043aab1d04ec\": \"69fed107-290b-43a9-981c-043aab1d04ec\", \"58d09af6-0af0-4e8f-bd66-b55d7b90082f\": \"58d09af6-0af0-4e8f-bd66-b55d7b90082f\", \"9e42dc7d-25c1-43b7-bf7e-1fdee1019e1a\": \"9e42dc7d-25c1-43b7-bf7e-1fdee1019e1a\", \"81d298c7-edda-410b-bb03-8802a18c5b86\": \"81d298c7-edda-410b-bb03-8802a18c5b86\", \"6e826612-dc68-4a98-9558-f8361108f178\": \"6e826612-dc68-4a98-9558-f8361108f178\", \"7ffbc337-6667-4c61-95a3-5b797123e5d2\": \"7ffbc337-6667-4c61-95a3-5b797123e5d2\", \"8b06e867-10ed-4cd8-ba0c-a94b88d73bfb\": \"8b06e867-10ed-4cd8-ba0c-a94b88d73bfb\", \"5461bd27-e097-49dc-b58e-530fe60a89e0\": \"5461bd27-e097-49dc-b58e-530fe60a89e0\", \"a6d784e3-cf5d-4b6a-ba9c-ec9c3a6cad30\": \"a6d784e3-cf5d-4b6a-ba9c-ec9c3a6cad30\", \"5b0a4e27-8f6b-40c6-9ad9-b7f09d653460\": \"5b0a4e27-8f6b-40c6-9ad9-b7f09d653460\", \"bff67aa8-cc12-4bc4-980e-bb45daed6560\": \"bff67aa8-cc12-4bc4-980e-bb45daed6560\", \"000d2343-ddea-45c1-a806-73a83c9f92f7\": \"000d2343-ddea-45c1-a806-73a83c9f92f7\", \"04acb165-b69a-4940-ba82-9affe6fcbcb7\": \"04acb165-b69a-4940-ba82-9affe6fcbcb7\", \"0b47d871-4134-404b-9b70-2ff8f888a887\": \"0b47d871-4134-404b-9b70-2ff8f888a887\", \"189d8c40-7d88-429f-89b1-d02354f18d8b\": \"189d8c40-7d88-429f-89b1-d02354f18d8b\", \"94a5d3bd-7f93-404e-a21d-3b34af89c622\": \"94a5d3bd-7f93-404e-a21d-3b34af89c622\", \"2ee8190e-003e-47e2-b3f9-11aa768559ce\": \"2ee8190e-003e-47e2-b3f9-11aa768559ce\", \"48a819d4-e040-4801-a003-6bd8077b43ad\": \"48a819d4-e040-4801-a003-6bd8077b43ad\", \"d561a2d0-8d62-43d4-aa05-3b9c9401d490\": \"d561a2d0-8d62-43d4-aa05-3b9c9401d490\", \"8ba8d825-e588-4909-865d-6e53f79d9fab\": \"8ba8d825-e588-4909-865d-6e53f79d9fab\", \"0af23475-0461-48f6-9845-9634d2fd30bf\": \"0af23475-0461-48f6-9845-9634d2fd30bf\", \"ca52491b-16c3-4c72-9699-37b1472443eb\": \"ca52491b-16c3-4c72-9699-37b1472443eb\", \"9a09946c-02f3-4f3d-b1f3-64f9e59d142c\": \"9a09946c-02f3-4f3d-b1f3-64f9e59d142c\", \"a702db54-ac43-452a-afc9-d2225d0e26ce\": \"a702db54-ac43-452a-afc9-d2225d0e26ce\", \"15f333b0-1ea8-4fc4-8554-09cd1587e42a\": \"15f333b0-1ea8-4fc4-8554-09cd1587e42a\", \"e6b8a52f-d0ac-42f0-8848-a499c359b5f5\": \"e6b8a52f-d0ac-42f0-8848-a499c359b5f5\", \"00dfc6ed-879f-4dfc-b3b9-2426d514a8d2\": \"00dfc6ed-879f-4dfc-b3b9-2426d514a8d2\", \"f7ee6304-f742-4583-93df-7d650bd51ec2\": \"f7ee6304-f742-4583-93df-7d650bd51ec2\", \"b21e8060-a593-444b-8b26-d9b49188ca63\": \"b21e8060-a593-444b-8b26-d9b49188ca63\", \"d538a328-8f77-4921-9b28-de7abe7405a2\": \"d538a328-8f77-4921-9b28-de7abe7405a2\", \"f362deff-e7a2-428f-9e0d-01d769ae16e1\": \"f362deff-e7a2-428f-9e0d-01d769ae16e1\", \"bc3b6df4-3140-4824-829f-cd3e614a5a11\": \"bc3b6df4-3140-4824-829f-cd3e614a5a11\", \"b93f1306-641d-4d02-b329-7ce6056646f4\": \"b93f1306-641d-4d02-b329-7ce6056646f4\", \"9a551eb1-05a3-4def-9ceb-5f137869efab\": \"9a551eb1-05a3-4def-9ceb-5f137869efab\", \"76cd02b4-85bc-4d6e-8e61-1446722e8649\": \"76cd02b4-85bc-4d6e-8e61-1446722e8649\", \"11a0886f-1d48-4f81-97ac-76d850ffc7b0\": \"11a0886f-1d48-4f81-97ac-76d850ffc7b0\", \"0e769f56-20ce-4b8e-8b1a-0f267b9fc806\": \"0e769f56-20ce-4b8e-8b1a-0f267b9fc806\", \"f90f8e75-8e99-42e9-b0c4-965389a7f205\": \"f90f8e75-8e99-42e9-b0c4-965389a7f205\", \"7450a420-5b0a-4fc8-9db8-1a900295f084\": \"7450a420-5b0a-4fc8-9db8-1a900295f084\", \"51b44fdd-9994-489d-9a44-50f2ff305d29\": \"51b44fdd-9994-489d-9a44-50f2ff305d29\", \"f8cf5a7b-6e73-47d3-8e68-497201fd7496\": \"f8cf5a7b-6e73-47d3-8e68-497201fd7496\", \"1dadcc93-645d-4e92-8587-73595a882f06\": \"1dadcc93-645d-4e92-8587-73595a882f06\", \"8e80dfd3-5774-4a52-ae0e-14c068b91047\": \"8e80dfd3-5774-4a52-ae0e-14c068b91047\", \"75851282-14cc-4073-962a-7a29cad95540\": \"75851282-14cc-4073-962a-7a29cad95540\", \"85b2d6d5-6af5-4971-b228-94c2d73fc2b7\": \"85b2d6d5-6af5-4971-b228-94c2d73fc2b7\", \"386d6c1a-4414-45b9-9102-58ecb7671cf5\": \"386d6c1a-4414-45b9-9102-58ecb7671cf5\", \"50256f45-02a3-4bd1-87fc-b1e841fb7750\": \"50256f45-02a3-4bd1-87fc-b1e841fb7750\", \"b42fffd1-bfd6-4e9b-9f9f-6a8b92ed3e92\": \"b42fffd1-bfd6-4e9b-9f9f-6a8b92ed3e92\", \"aba0ee12-426e-40fd-93a9-4d1a0996d9ab\": \"aba0ee12-426e-40fd-93a9-4d1a0996d9ab\", \"4ec8b241-2e4e-41e0-b30e-28b1b61d7778\": \"4ec8b241-2e4e-41e0-b30e-28b1b61d7778\", \"bcba2bc2-e4d1-44b3-8c8d-1489c2e93b90\": \"bcba2bc2-e4d1-44b3-8c8d-1489c2e93b90\"}, \"doc_id_dict\": {}, \"embeddings_dict\": {}}"}}}


--------------------------------------------------------------------------------
/literalai/.env.example:
--------------------------------------------------------------------------------
1 | GROQ_API_KEY=ABC
2 | LITERAL_API_KEY=ABC
3 | LITERAL_API_URL=ABC


--------------------------------------------------------------------------------
/literalai/README.md:
--------------------------------------------------------------------------------
 1 | To access LiteralAI dashboard signin to - https://cloud.getliteral.ai/
 2 | 
 3 | create project on left sidebar option 
 4 | 
 5 | <img width="188" alt="image" src="https://github.com/jayita13/GenerativeAI/assets/64038928/f653900a-3f05-433e-9b28-58255fe31dd8">
 6 | 
 7 | Create API keys from Settings
 8 | 
 9 | <img width="845" alt="image" src="https://github.com/jayita13/GenerativeAI/assets/64038928/2ab9c398-e352-40cd-9911-a0937e9a0742">
10 | 
11 | 


--------------------------------------------------------------------------------
/literalai/requirements.txt:
--------------------------------------------------------------------------------
1 | python-dotenv>=1.0.1
2 | literalai>=0.0.503
3 | llama-index>=0.10.34
4 | llama-index-llms-groq==0.1.4
5 | groq==0.9.0
6 | llama-index-embeddings-huggingface==0.2.0


--------------------------------------------------------------------------------
/llama_agents/.env.example:
--------------------------------------------------------------------------------
1 | OPENAI_API_KEY = "sk-proj..."


--------------------------------------------------------------------------------
/llama_agents/README.md:
--------------------------------------------------------------------------------
1 | References - https://github.com/run-llama/llama-agents/blob/main/examples/agentic_rag_toolservice.ipynb
2 | 


--------------------------------------------------------------------------------
/llama_agents/agentic_rag_toolservice.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "markdown",
  5 |    "id": "49814a62-0201-4d41-a449-c2c6b4ad2ced",
  6 |    "metadata": {},
  7 |    "source": [
  8 |     "# Build an Agentic RAG Service\n",
  9 |     "\n",
 10 |     "Setup an agent service that can interact with a tool service (containing RAG tools over annual reports).\n",
 11 |     "\n",
 12 |     "In this notebook, we:\n",
 13 |     "- Setup our indexes and query engine tools\n",
 14 |     "- Define our multi-agent framework\n",
 15 |     "  - A message queue.\n",
 16 |     "  - An agentic orchestrator.\n",
 17 |     "  - A tools service containing our query engine tools. This will act as a remote executor for tools\n",
 18 |     "  - Define meta-tools for our agents. These will make calls to the tools service instead of executing directly\n",
 19 |     "  - Our agent services. These wrap existing llama-index agents\n",
 20 |     "  - Put all this into a local launcher, to simulate one task passing through the system at a time."
 21 |    ]
 22 |   },
 23 |   {
 24 |    "cell_type": "code",
 25 |    "execution_count": 1,
 26 |    "id": "e33e5676-8d15-4372-bc32-8d43dd617b80",
 27 |    "metadata": {},
 28 |    "outputs": [],
 29 |    "source": [
 30 |     "import nest_asyncio\n",
 31 |     "\n",
 32 |     "nest_asyncio.apply()\n",
 33 |     "\n",
 34 |     "import os\n",
 35 |     "from dotenv import load_dotenv\n",
 36 |     "load_dotenv()\n",
 37 |     "\n",
 38 |     "OPENAI_API_KEY = os.getenv('OPENAI_API_KEY')"
 39 |    ]
 40 |   },
 41 |   {
 42 |    "cell_type": "markdown",
 43 |    "id": "8b4be8b8-3d1c-4a82-98f5-ec94d97a679e",
 44 |    "metadata": {},
 45 |    "source": [
 46 |     "## Load Data"
 47 |    ]
 48 |   },
 49 |   {
 50 |    "cell_type": "code",
 51 |    "execution_count": 2,
 52 |    "id": "1fe13aa4-55f7-4854-b2b7-b3ae02d8df75",
 53 |    "metadata": {},
 54 |    "outputs": [],
 55 |    "source": [
 56 |     "from llama_index.core import (\n",
 57 |     "    SimpleDirectoryReader,\n",
 58 |     "    VectorStoreIndex,\n",
 59 |     "    StorageContext,\n",
 60 |     "    load_index_from_storage,\n",
 61 |     "    Settings\n",
 62 |     ")\n",
 63 |     "\n",
 64 |     "from llama_index.core.tools import QueryEngineTool, ToolMetadata"
 65 |    ]
 66 |   },
 67 |   {
 68 |    "cell_type": "code",
 69 |    "execution_count": 3,
 70 |    "id": "dc27a6c2-2840-4705-b832-829a18c8571b",
 71 |    "metadata": {},
 72 |    "outputs": [],
 73 |    "source": [
 74 |     "try:\n",
 75 |     "    storage_context = StorageContext.from_defaults(persist_dir=\"./storage/q1-23\")\n",
 76 |     "    q1_23_index = load_index_from_storage(storage_context)\n",
 77 |     "\n",
 78 |     "    storage_context = StorageContext.from_defaults(persist_dir=\"./storage/q1-24\")\n",
 79 |     "    q1_24_index = load_index_from_storage(storage_context)\n",
 80 |     "\n",
 81 |     "    index_loaded = True\n",
 82 |     "except:\n",
 83 |     "    index_loaded = False"
 84 |    ]
 85 |   },
 86 |   {
 87 |    "cell_type": "code",
 88 |    "execution_count": 4,
 89 |    "id": "d17aba20-d21a-45c2-8c0d-4d7c2f52fc7b",
 90 |    "metadata": {},
 91 |    "outputs": [],
 92 |    "source": [
 93 |     "if not index_loaded:\n",
 94 |     "    # load data\n",
 95 |     "    q1_23_docs = SimpleDirectoryReader(\n",
 96 |     "        input_files=[\"./data/GOOG-10-Q-Q1-2023.pdf\"]\n",
 97 |     "    ).load_data()\n",
 98 |     "    q1_24_docs = SimpleDirectoryReader(\n",
 99 |     "        input_files=[\"./data/goog-10-q-q1-2024.pdf\"]\n",
100 |     "    ).load_data()\n",
101 |     "\n",
102 |     "    # build index\n",
103 |     "    q1_23_index = VectorStoreIndex.from_documents(q1_23_docs)\n",
104 |     "    q1_24_index = VectorStoreIndex.from_documents(q1_24_docs)\n",
105 |     "\n",
106 |     "    # persist index\n",
107 |     "    q1_23_index.storage_context.persist(persist_dir=\"./storage/q1-23\")\n",
108 |     "    q1_24_index.storage_context.persist(persist_dir=\"./storage/q1-24\")"
109 |    ]
110 |   },
111 |   {
112 |    "cell_type": "code",
113 |    "execution_count": 5,
114 |    "id": "d2ceb92d-5cb3-415e-aa25-027b5d335ad5",
115 |    "metadata": {},
116 |    "outputs": [],
117 |    "source": [
118 |     "q1_23_engine = q1_23_index.as_query_engine(similarity_top_k=3)\n",
119 |     "q1_24_engine = q1_24_index.as_query_engine(similarity_top_k=3)"
120 |    ]
121 |   },
122 |   {
123 |    "cell_type": "code",
124 |    "execution_count": 6,
125 |    "id": "b1358879-1530-4b4e-9204-0ca2ade4ef56",
126 |    "metadata": {},
127 |    "outputs": [],
128 |    "source": [
129 |     "query_engine_tools = [\n",
130 |     "    QueryEngineTool(\n",
131 |     "        query_engine=q1_23_engine,\n",
132 |     "        metadata=ToolMetadata(\n",
133 |     "            name=\"goog-q1-23\",\n",
134 |     "            description=(\n",
135 |     "                \"Provides information about google financials for first quarter of year 2023. \"\n",
136 |     "                \"Use a detailed plain text question as input to the tool.\"\n",
137 |     "            ),\n",
138 |     "        ),\n",
139 |     "    ),\n",
140 |     "    QueryEngineTool(\n",
141 |     "        query_engine=q1_24_engine,\n",
142 |     "        metadata=ToolMetadata(\n",
143 |     "            name=\"goog_q1_24\",\n",
144 |     "            description=(\n",
145 |     "                \"Provides information about Google financials for first quarter of year 2024. \"\n",
146 |     "                \"Use a detailed plain text question as input to the tool.\"\n",
147 |     "            ),\n",
148 |     "        ),\n",
149 |     "    ),\n",
150 |     "]"
151 |    ]
152 |   },
153 |   {
154 |    "cell_type": "markdown",
155 |    "id": "a8ed9ff6-7195-4213-b8a1-38dc06c3b25d",
156 |    "metadata": {},
157 |    "source": [
158 |     "## Setup Agents\n",
159 |     "\n",
160 |     "Now that we've defined the query tools, we can wrap these under a `ToolService`."
161 |    ]
162 |   },
163 |   {
164 |    "cell_type": "code",
165 |    "execution_count": 7,
166 |    "id": "9adc290c-1650-480f-8cb7-b81a9f1e1b76",
167 |    "metadata": {},
168 |    "outputs": [],
169 |    "source": [
170 |     "from llama_agents import (\n",
171 |     "    AgentService,\n",
172 |     "    ToolService,\n",
173 |     "    LocalLauncher,\n",
174 |     "    MetaServiceTool,\n",
175 |     "    ControlPlaneServer,\n",
176 |     "    SimpleMessageQueue,\n",
177 |     "    AgentOrchestrator,\n",
178 |     ")\n",
179 |     "\n",
180 |     "from llama_index.core.agent import FunctionCallingAgentWorker\n",
181 |     "from llama_index.llms.openai import OpenAI\n",
182 |     "\n",
183 |     "\n",
184 |     "# create our multi-agent framework components\n",
185 |     "message_queue = SimpleMessageQueue()\n",
186 |     "control_plane = ControlPlaneServer(\n",
187 |     "    message_queue=message_queue,\n",
188 |     "    orchestrator=AgentOrchestrator(llm=OpenAI(model=\"gpt-4o\")),\n",
189 |     ")\n",
190 |     "\n",
191 |     "# define Tool Service\n",
192 |     "tool_service = ToolService(\n",
193 |     "    message_queue=message_queue,\n",
194 |     "    tools=query_engine_tools,\n",
195 |     "    running=True,\n",
196 |     "    step_interval=0.5,\n",
197 |     ")\n",
198 |     "\n",
199 |     "# define meta-tools here\n",
200 |     "meta_tools = [\n",
201 |     "    await MetaServiceTool.from_tool_service(\n",
202 |     "        t.metadata.name,\n",
203 |     "        message_queue=message_queue,\n",
204 |     "        tool_service=tool_service,\n",
205 |     "    )\n",
206 |     "    for t in query_engine_tools\n",
207 |     "]\n",
208 |     "\n",
209 |     "\n",
210 |     "# define Agent and agent service\n",
211 |     "worker1 = FunctionCallingAgentWorker.from_tools(\n",
212 |     "    meta_tools,\n",
213 |     "    llm=OpenAI(),\n",
214 |     ")\n",
215 |     "agent1 = worker1.as_agent()\n",
216 |     "agent_server_1 = AgentService(\n",
217 |     "    agent=agent1,\n",
218 |     "    message_queue=message_queue,\n",
219 |     "    description=\"Used to answer questions over Google for financial quarters of year 2023 and 2024 documents\",\n",
220 |     "    service_name=\"goog_q1_23_q1_24_analyst_agent\",\n",
221 |     ")"
222 |    ]
223 |   },
224 |   {
225 |    "cell_type": "markdown",
226 |    "id": "a19df0c2-7abe-4563-a4d8-3a47c13b65d0",
227 |    "metadata": {},
228 |    "source": [
229 |     "## Launch agent \n",
230 |     "\n",
231 |     "With our services, orchestrator, control plane, and message queue defined, we can test our llama-agents network by passing in single messages, and observing the results.\n",
232 |     "\n",
233 |     "This is an excellent way to test, iterate, and debug your llama-agents system."
234 |    ]
235 |   },
236 |   {
237 |    "cell_type": "code",
238 |    "execution_count": 8,
239 |    "id": "7780a9e7",
240 |    "metadata": {},
241 |    "outputs": [],
242 |    "source": [
243 |     "import logging\n",
244 |     "\n",
245 |     "# change logging level to enable or disable more verbose logging\n",
246 |     "logging.getLogger(\"llama_agents\").setLevel(logging.INFO)"
247 |    ]
248 |   },
249 |   {
250 |    "cell_type": "code",
251 |    "execution_count": 9,
252 |    "id": "7b41c86a-fd43-4f82-85ea-90f1208e810f",
253 |    "metadata": {},
254 |    "outputs": [],
255 |    "source": [
256 |     "## Define Launcher\n",
257 |     "launcher = LocalLauncher(\n",
258 |     "    [agent_server_1, tool_service],\n",
259 |     "    control_plane,\n",
260 |     "    message_queue,\n",
261 |     ")"
262 |    ]
263 |   },
264 |   {
265 |    "cell_type": "code",
266 |    "execution_count": 10,
267 |    "id": "6ddf856b-762c-4608-ac02-c1d5fc75bc63",
268 |    "metadata": {},
269 |    "outputs": [
270 |     {
271 |      "name": "stderr",
272 |      "output_type": "stream",
273 |      "text": [
274 |       "INFO:llama_agents.message_queues.simple - Consumer AgentService-ff09d4da-2e19-482f-9a20-7f2ecd736d73: goog_q1_23_q1_24_analyst_agent has been registered.\n",
275 |       "INFO:llama_agents.message_queues.simple - Consumer ToolService-983c62e5-79b7-4f1e-b93f-e9cbadb4fc68: default_tool_service has been registered.\n",
276 |       "INFO:llama_agents.message_queues.simple - Consumer 1c7a7c0a-65cf-43b3-9fb8-6045bc9320f0: human has been registered.\n",
277 |       "INFO:llama_agents.message_queues.simple - Consumer ControlPlaneServer-4d05cde9-f47c-44e3-b55b-6749eb4fe1fc: control_plane has been registered.\n",
278 |       "INFO:llama_agents.services.agent - goog_q1_23_q1_24_analyst_agent launch_local\n",
279 |       "INFO:llama_agents.message_queues.base - Publishing message to 'control_plane' with action 'new_task'\n",
280 |       "INFO:llama_agents.message_queues.simple - Launching message queue locally\n",
281 |       "INFO:llama_agents.message_queues.base - Publishing message to 'goog_q1_23_q1_24_analyst_agent' with action 'new_task'\n",
282 |       "INFO:llama_agents.message_queues.simple - Successfully published message 'control_plane' to consumer.\n",
283 |       "INFO:llama_agents.services.agent - Created new task: eea128e9-a85e-4059-8e6c-cca8273fc1a3\n",
284 |       "INFO:llama_agents.message_queues.simple - Successfully published message 'goog_q1_23_q1_24_analyst_agent' to consumer.\n",
285 |       "INFO:llama_agents.message_queues.simple - Consumer MetaServiceTool-75cdbf61-3928-48b4-afe0-3b760adeda6b: MetaServiceTool-75cdbf61-3928-48b4-afe0-3b760adeda6b has been registered.\n",
286 |       "INFO:llama_agents.message_queues.base - Publishing message to 'default_tool_service' with action 'new_tool_call'\n",
287 |       "INFO:llama_agents.message_queues.simple - Consumer MetaServiceTool-e0a339aa-534e-4b19-afd6-34c18266539d: MetaServiceTool-e0a339aa-534e-4b19-afd6-34c18266539d has been registered.\n",
288 |       "INFO:llama_agents.message_queues.base - Publishing message to 'default_tool_service' with action 'new_tool_call'\n",
289 |       "INFO:llama_agents.message_queues.simple - Successfully published message 'default_tool_service' to consumer.\n",
290 |       "INFO:llama_agents.message_queues.simple - Successfully published message 'default_tool_service' to consumer.\n",
291 |       "INFO:llama_agents.services.tool - Processing tool call id 3139a817-e1b5-4ab6-bbfc-9a09ecf20939 with goog-q1-23\n",
292 |       "INFO:llama_agents.message_queues.base - Publishing message to 'MetaServiceTool-75cdbf61-3928-48b4-afe0-3b760adeda6b' with action 'completed_tool_call'\n",
293 |       "INFO:llama_agents.services.tool - Processing tool call id 5bc92f90-67f7-4a39-8a7f-fb036ca5d182 with goog_q1_24\n",
294 |       "INFO:llama_agents.message_queues.simple - Successfully published message 'MetaServiceTool-75cdbf61-3928-48b4-afe0-3b760adeda6b' to consumer.\n",
295 |       "INFO:llama_agents.message_queues.base - Publishing message to 'MetaServiceTool-e0a339aa-534e-4b19-afd6-34c18266539d' with action 'completed_tool_call'\n",
296 |       "INFO:llama_agents.message_queues.simple - Successfully published message 'MetaServiceTool-e0a339aa-534e-4b19-afd6-34c18266539d' to consumer.\n",
297 |       "INFO:llama_agents.message_queues.base - Publishing message to 'control_plane' with action 'completed_task'\n",
298 |       "INFO:llama_agents.message_queues.base - Publishing message to 'human' with action 'completed_task'\n",
299 |       "INFO:llama_agents.message_queues.simple - Successfully published message 'control_plane' to consumer.\n",
300 |       "INFO:llama_agents.message_queues.simple - Successfully published message 'human' to consumer.\n"
301 |      ]
302 |     }
303 |    ],
304 |    "source": [
305 |     "# query_str = \"What was Lyft's revenue growth in 2021?\"\n",
306 |     "# gets stuck in a loop, should mostly be called once\n",
307 |     "query_str = \"What are the risk factors for Google?\"\n",
308 |     "result = launcher.launch_single(query_str)"
309 |    ]
310 |   },
311 |   {
312 |    "cell_type": "code",
313 |    "execution_count": 11,
314 |    "id": "b0b3688f-cad4-450e-81fa-037dd1190260",
315 |    "metadata": {},
316 |    "outputs": [
317 |     {
318 |      "name": "stdout",
319 |      "output_type": "stream",
320 |      "text": [
321 |       "The risk factors for Google in the first quarter of 2023 include fluctuations in revenues and margins, changes in monetization trends, variability in foreign exchange rates, fluctuations in capital expenditures, potential increases in expenses, uncertainties related to compensation expenses, fluctuations in other income (expense), variations in the effective tax rate, seasonal fluctuations in internet usage and advertiser expenditures, and exposure to regulatory scrutiny and legal proceedings.\n",
322 |       "\n",
323 |       "In the first quarter of 2024, the risk factors include fluctuations in revenues due to changes in foreign currency exchange rates, pricing adjustments, general economic conditions, geopolitical events, regulations, new product and service launches, seasonality, and other external dynamics impacting advertiser, consumer, and enterprise spending.\n"
324 |      ]
325 |     }
326 |    ],
327 |    "source": [
328 |     "print(result)"
329 |    ]
330 |   },
331 |   {
332 |    "cell_type": "code",
333 |    "execution_count": 12,
334 |    "id": "7bbe1896",
335 |    "metadata": {},
336 |    "outputs": [
337 |     {
338 |      "name": "stderr",
339 |      "output_type": "stream",
340 |      "text": [
341 |       "INFO:llama_agents.message_queues.simple - Consumer AgentService-ff09d4da-2e19-482f-9a20-7f2ecd736d73: goog_q1_23_q1_24_analyst_agent has been registered.\n",
342 |       "INFO:llama_agents.message_queues.simple - Consumer ToolService-983c62e5-79b7-4f1e-b93f-e9cbadb4fc68: default_tool_service has been registered.\n",
343 |       "INFO:llama_agents.message_queues.simple - Consumer 2f1e87c7-2553-4903-b174-8ff1e8562d13: human has been registered.\n",
344 |       "INFO:llama_agents.message_queues.simple - Consumer ControlPlaneServer-4d05cde9-f47c-44e3-b55b-6749eb4fe1fc: control_plane has been registered.\n",
345 |       "INFO:llama_agents.services.agent - goog_q1_23_q1_24_analyst_agent launch_local\n",
346 |       "INFO:llama_agents.message_queues.base - Publishing message to 'control_plane' with action 'new_task'\n",
347 |       "INFO:llama_agents.message_queues.simple - Launching message queue locally\n",
348 |       "INFO:llama_agents.message_queues.base - Publishing message to 'goog_q1_23_q1_24_analyst_agent' with action 'new_task'\n",
349 |       "INFO:llama_agents.message_queues.simple - Successfully published message 'control_plane' to consumer.\n",
350 |       "INFO:llama_agents.services.agent - Created new task: a77e14c9-156c-452e-be7b-3b65b6de89e9\n",
351 |       "INFO:llama_agents.message_queues.simple - Successfully published message 'goog_q1_23_q1_24_analyst_agent' to consumer.\n",
352 |       "INFO:llama_agents.message_queues.base - Publishing message to 'default_tool_service' with action 'new_tool_call'\n",
353 |       "INFO:llama_agents.message_queues.simple - Successfully published message 'default_tool_service' to consumer.\n",
354 |       "INFO:llama_agents.services.tool - Processing tool call id ca105ccb-b2d0-420c-bf1c-dd1aeba90567 with goog_q1_24\n",
355 |       "INFO:llama_agents.message_queues.base - Publishing message to 'MetaServiceTool-e0a339aa-534e-4b19-afd6-34c18266539d' with action 'completed_tool_call'\n",
356 |       "INFO:llama_agents.message_queues.simple - Successfully published message 'MetaServiceTool-e0a339aa-534e-4b19-afd6-34c18266539d' to consumer.\n",
357 |       "INFO:llama_agents.message_queues.base - Publishing message to 'control_plane' with action 'completed_task'\n",
358 |       "INFO:llama_agents.message_queues.base - Publishing message to 'human' with action 'completed_task'\n",
359 |       "INFO:llama_agents.message_queues.simple - Successfully published message 'control_plane' to consumer.\n",
360 |       "INFO:llama_agents.message_queues.simple - Successfully published message 'human' to consumer.\n"
361 |      ]
362 |     }
363 |    ],
364 |    "source": [
365 |     "query_str = \"What was Google's revenue growth in q1 2024?\"\n",
366 |     "result = launcher.launch_single(query_str)"
367 |    ]
368 |   },
369 |   {
370 |    "cell_type": "code",
371 |    "execution_count": 13,
372 |    "id": "2aa57e7f",
373 |    "metadata": {},
374 |    "outputs": [
375 |     {
376 |      "name": "stdout",
377 |      "output_type": "stream",
378 |      "text": [
379 |       "The revenue growth for Google in the first quarter of 2024 was $10,752 million.\n"
380 |      ]
381 |     }
382 |    ],
383 |    "source": [
384 |     "print(result)"
385 |    ]
386 |   }
387 |  ],
388 |  "metadata": {
389 |   "kernelspec": {
390 |    "display_name": "Python 3 (ipykernel)",
391 |    "language": "python",
392 |    "name": "python3"
393 |   },
394 |   "language_info": {
395 |    "codemirror_mode": {
396 |     "name": "ipython",
397 |     "version": 3
398 |    },
399 |    "file_extension": ".py",
400 |    "mimetype": "text/x-python",
401 |    "name": "python",
402 |    "nbconvert_exporter": "python",
403 |    "pygments_lexer": "ipython3",
404 |    "version": "3.10.4"
405 |   }
406 |  },
407 |  "nbformat": 4,
408 |  "nbformat_minor": 5
409 | }
410 | 


--------------------------------------------------------------------------------
/llama_agents/data/GOOG-10-Q-Q1-2023.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jayita13/GenerativeAI/67ab29100b303220558f76b56a3861e0a464f09a/llama_agents/data/GOOG-10-Q-Q1-2023.pdf


--------------------------------------------------------------------------------
/llama_agents/data/goog-10-q-q1-2024.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jayita13/GenerativeAI/67ab29100b303220558f76b56a3861e0a464f09a/llama_agents/data/goog-10-q-q1-2024.pdf


--------------------------------------------------------------------------------
/llama_agents/requirements.txt:
--------------------------------------------------------------------------------
1 | llama-index
2 | llama-agents
3 | python-dotenv==1.0.1


--------------------------------------------------------------------------------
/llama_agents/storage/q1-23/graph_store.json:
--------------------------------------------------------------------------------
1 | {"graph_dict": {}}


--------------------------------------------------------------------------------
/llama_agents/storage/q1-23/image__vector_store.json:
--------------------------------------------------------------------------------
1 | {"embedding_dict": {}, "text_id_to_ref_doc_id": {}, "metadata_dict": {}}


--------------------------------------------------------------------------------
/llama_agents/storage/q1-23/index_store.json:
--------------------------------------------------------------------------------
1 | {"index_store/data": {"fe22930a-d268-4077-abc1-a445cd951a82": {"__type__": "vector_store", "__data__": "{\"index_id\": \"fe22930a-d268-4077-abc1-a445cd951a82\", \"summary\": null, \"nodes_dict\": {\"e0053705-fffe-4705-9a59-b817663f67d9\": \"e0053705-fffe-4705-9a59-b817663f67d9\", \"5413bae9-b64c-4c80-9846-c26707e2e7e2\": \"5413bae9-b64c-4c80-9846-c26707e2e7e2\", \"06ae52ae-3dd8-4e64-a452-72ada3d284f0\": \"06ae52ae-3dd8-4e64-a452-72ada3d284f0\", \"2fac8c0e-6064-4651-939c-89290b97d9eb\": \"2fac8c0e-6064-4651-939c-89290b97d9eb\", \"acb55f80-6070-42eb-aaea-579e6857f1d2\": \"acb55f80-6070-42eb-aaea-579e6857f1d2\", \"df0b7476-3d0b-47d7-915d-f050d0033d6d\": \"df0b7476-3d0b-47d7-915d-f050d0033d6d\", \"6d006719-7e3a-4657-bc43-57ccc3f0aa04\": \"6d006719-7e3a-4657-bc43-57ccc3f0aa04\", \"9b1cc144-7488-489d-aeae-826a4431d39e\": \"9b1cc144-7488-489d-aeae-826a4431d39e\", \"6b5786c8-c598-4f3c-9625-0ea3b74a4934\": \"6b5786c8-c598-4f3c-9625-0ea3b74a4934\", \"b5e1c7b2-ceb9-4a6d-b1f8-8a0912b4d9ac\": \"b5e1c7b2-ceb9-4a6d-b1f8-8a0912b4d9ac\", \"85067a83-6348-4381-bc4a-96b30beab8f2\": \"85067a83-6348-4381-bc4a-96b30beab8f2\", \"f9890635-29f5-418d-b5af-d19117e0d30a\": \"f9890635-29f5-418d-b5af-d19117e0d30a\", \"6ff9e3fa-26aa-4d13-9a2b-3056c0d64c52\": \"6ff9e3fa-26aa-4d13-9a2b-3056c0d64c52\", \"74beeb98-138a-45f6-b24b-a5ae269cc2b9\": \"74beeb98-138a-45f6-b24b-a5ae269cc2b9\", \"414f4d3d-d7ae-47bc-93e4-6e819da368a8\": \"414f4d3d-d7ae-47bc-93e4-6e819da368a8\", \"8b0672a3-9f85-4aa3-ae31-70f5e81e2ad4\": \"8b0672a3-9f85-4aa3-ae31-70f5e81e2ad4\", \"96c49d69-3b5d-4bcd-9d6f-3d638a3ab130\": \"96c49d69-3b5d-4bcd-9d6f-3d638a3ab130\", \"3c1c076c-efaa-446f-8a19-00cf7422e610\": \"3c1c076c-efaa-446f-8a19-00cf7422e610\", \"c7581d69-f3e9-4aa5-8727-51cbf70a672b\": \"c7581d69-f3e9-4aa5-8727-51cbf70a672b\", \"7ae52445-9632-4afc-a521-e505af6d9342\": \"7ae52445-9632-4afc-a521-e505af6d9342\", \"ef82364f-32e4-43e7-9c72-71c74bbb4110\": \"ef82364f-32e4-43e7-9c72-71c74bbb4110\", \"f4c30d14-0961-4fd8-a429-7b920c51dd40\": \"f4c30d14-0961-4fd8-a429-7b920c51dd40\", \"58564963-8a4a-40d9-945f-43328185d1a2\": \"58564963-8a4a-40d9-945f-43328185d1a2\", \"d07532e9-760b-4b74-b2de-d6e355d80835\": \"d07532e9-760b-4b74-b2de-d6e355d80835\", \"1aea8ab9-f895-47ce-acea-ef9856ec4690\": \"1aea8ab9-f895-47ce-acea-ef9856ec4690\", \"1f9ab07b-21e3-4fa6-8892-1f18e71222c9\": \"1f9ab07b-21e3-4fa6-8892-1f18e71222c9\", \"0c12b4da-c68e-4079-b348-f472cce369c0\": \"0c12b4da-c68e-4079-b348-f472cce369c0\", \"5ca148d7-c432-48e8-8576-982757905e2f\": \"5ca148d7-c432-48e8-8576-982757905e2f\", \"fb1aa257-a5e4-46a2-ac32-3780d0642b30\": \"fb1aa257-a5e4-46a2-ac32-3780d0642b30\", \"df7647ed-68fb-4f4b-8bcb-0ee47a0b18d4\": \"df7647ed-68fb-4f4b-8bcb-0ee47a0b18d4\", \"08e244c9-948b-4404-b1c3-f48ad1928f95\": \"08e244c9-948b-4404-b1c3-f48ad1928f95\", \"d6ce7157-cbf2-45d8-a89b-656b73dd7c13\": \"d6ce7157-cbf2-45d8-a89b-656b73dd7c13\", \"132d9882-4216-422d-924d-d40255d56f7d\": \"132d9882-4216-422d-924d-d40255d56f7d\", \"faccfa0f-f523-4580-9274-48be7a56897c\": \"faccfa0f-f523-4580-9274-48be7a56897c\", \"aff88f94-2d96-4e71-8253-f21bb9fcca44\": \"aff88f94-2d96-4e71-8253-f21bb9fcca44\", \"48c7e741-c84e-43ac-9aea-0c225c10ac43\": \"48c7e741-c84e-43ac-9aea-0c225c10ac43\", \"3fd3e44a-0993-449e-91c8-a975f0adeef7\": \"3fd3e44a-0993-449e-91c8-a975f0adeef7\", \"7f25eab4-ca78-42c7-918d-fa8034668590\": \"7f25eab4-ca78-42c7-918d-fa8034668590\", \"7b10ef1c-a67b-4063-aa0c-139712c1ed80\": \"7b10ef1c-a67b-4063-aa0c-139712c1ed80\", \"9c60f346-9cc8-44f8-9dfe-73cab3a6cc27\": \"9c60f346-9cc8-44f8-9dfe-73cab3a6cc27\", \"60cb0e71-fa22-46c3-91a1-31ea948824d1\": \"60cb0e71-fa22-46c3-91a1-31ea948824d1\", \"3fca73e0-e575-405a-b5b1-0010c6c36e16\": \"3fca73e0-e575-405a-b5b1-0010c6c36e16\", \"fdd2819b-9179-4287-9dcc-482b5ac4c0f2\": \"fdd2819b-9179-4287-9dcc-482b5ac4c0f2\", \"fd4481e9-c7f5-4a85-b85e-198eabcac5bd\": \"fd4481e9-c7f5-4a85-b85e-198eabcac5bd\", \"7ef46893-fa31-4684-ab01-e608a4d789d5\": \"7ef46893-fa31-4684-ab01-e608a4d789d5\", \"0e3b7d02-2d46-4f09-b041-47e5671b8493\": \"0e3b7d02-2d46-4f09-b041-47e5671b8493\", \"81548ee2-6631-4ff0-948c-9585617dc4b7\": \"81548ee2-6631-4ff0-948c-9585617dc4b7\", \"ce9e2c91-bd3d-4556-940b-aaa6de338ae2\": \"ce9e2c91-bd3d-4556-940b-aaa6de338ae2\"}, \"doc_id_dict\": {}, \"embeddings_dict\": {}}"}}}


--------------------------------------------------------------------------------
/llama_agents/storage/q1-24/graph_store.json:
--------------------------------------------------------------------------------
1 | {"graph_dict": {}}


--------------------------------------------------------------------------------
/llama_agents/storage/q1-24/image__vector_store.json:
--------------------------------------------------------------------------------
1 | {"embedding_dict": {}, "text_id_to_ref_doc_id": {}, "metadata_dict": {}}


--------------------------------------------------------------------------------
/llama_agents/storage/q1-24/index_store.json:
--------------------------------------------------------------------------------
1 | {"index_store/data": {"beda17de-7d87-4531-8d15-ff69b184b349": {"__type__": "vector_store", "__data__": "{\"index_id\": \"beda17de-7d87-4531-8d15-ff69b184b349\", \"summary\": null, \"nodes_dict\": {\"b02e9eb4-3743-49c3-b7af-a85c0557ca87\": \"b02e9eb4-3743-49c3-b7af-a85c0557ca87\", \"29a2fa84-ec0c-40fa-8f75-f58fe5c15a6f\": \"29a2fa84-ec0c-40fa-8f75-f58fe5c15a6f\", \"6aa1ed59-7e2f-4414-9d6c-3f34cfa105fa\": \"6aa1ed59-7e2f-4414-9d6c-3f34cfa105fa\", \"52eac221-ea06-4a23-a2f1-5e4fb06b219f\": \"52eac221-ea06-4a23-a2f1-5e4fb06b219f\", \"cdaa6d19-33ea-478d-a8d0-8230c496f0ce\": \"cdaa6d19-33ea-478d-a8d0-8230c496f0ce\", \"371fb58e-ab12-465c-988b-202f5def84e6\": \"371fb58e-ab12-465c-988b-202f5def84e6\", \"0c8df7c3-c2d3-4b31-aa0f-9faa8b99d45c\": \"0c8df7c3-c2d3-4b31-aa0f-9faa8b99d45c\", \"658f61cc-5088-432f-aca4-14f07705eaf6\": \"658f61cc-5088-432f-aca4-14f07705eaf6\", \"0739285a-a9ab-45c0-912e-999003a75a8a\": \"0739285a-a9ab-45c0-912e-999003a75a8a\", \"eed31a19-2a24-4624-9e2c-a3e2b06ef630\": \"eed31a19-2a24-4624-9e2c-a3e2b06ef630\", \"37b5cdf3-193f-4c24-923c-9edd8deb2aa6\": \"37b5cdf3-193f-4c24-923c-9edd8deb2aa6\", \"523b3cd1-945f-4017-bed9-36d2d6935006\": \"523b3cd1-945f-4017-bed9-36d2d6935006\", \"582094b2-978a-4086-ab91-428a1f234b11\": \"582094b2-978a-4086-ab91-428a1f234b11\", \"586c845b-181f-4c2a-b0ea-3f6eeb228986\": \"586c845b-181f-4c2a-b0ea-3f6eeb228986\", \"db70278d-f874-4826-a1ab-8de24e46a9c4\": \"db70278d-f874-4826-a1ab-8de24e46a9c4\", \"0280d1e4-2f86-4f7e-aa8b-901ccb314e25\": \"0280d1e4-2f86-4f7e-aa8b-901ccb314e25\", \"42dd0f03-7ca5-4348-8d63-8701b8dae4f6\": \"42dd0f03-7ca5-4348-8d63-8701b8dae4f6\", \"0bb06455-b006-4468-8337-81733b5b6d4a\": \"0bb06455-b006-4468-8337-81733b5b6d4a\", \"2ec471e4-46f8-4c81-8465-ceb9d960580c\": \"2ec471e4-46f8-4c81-8465-ceb9d960580c\", \"06fa852d-afa1-4710-8471-390ea7224e76\": \"06fa852d-afa1-4710-8471-390ea7224e76\", \"2ee5743a-48cb-4664-a7db-9654af0a91e0\": \"2ee5743a-48cb-4664-a7db-9654af0a91e0\", \"00953615-5c4b-4805-9185-4d9520c6c078\": \"00953615-5c4b-4805-9185-4d9520c6c078\", \"8699425d-8629-4981-9ee5-b530429f1f73\": \"8699425d-8629-4981-9ee5-b530429f1f73\", \"a3e5a8dc-1730-4396-9957-5e693e716915\": \"a3e5a8dc-1730-4396-9957-5e693e716915\", \"9c822f57-5faa-4dc7-8a22-3ed579bab223\": \"9c822f57-5faa-4dc7-8a22-3ed579bab223\", \"52d7a1a3-2a29-4241-a444-4bb89336195e\": \"52d7a1a3-2a29-4241-a444-4bb89336195e\", \"d6e2ec3c-f896-4cbb-9f64-552d803ad0b0\": \"d6e2ec3c-f896-4cbb-9f64-552d803ad0b0\", \"8ba7bccf-9646-48c8-8fd4-8c5affaab8f7\": \"8ba7bccf-9646-48c8-8fd4-8c5affaab8f7\", \"47e61675-5f7c-4e17-ac85-933b5a1c0afd\": \"47e61675-5f7c-4e17-ac85-933b5a1c0afd\", \"d4cb3d55-87c1-4ea3-a1b7-4b61cb4b1951\": \"d4cb3d55-87c1-4ea3-a1b7-4b61cb4b1951\", \"59923964-c9ab-4e97-b916-8b7cdcc85575\": \"59923964-c9ab-4e97-b916-8b7cdcc85575\", \"85903d66-9387-4471-bc53-e8238c2d898c\": \"85903d66-9387-4471-bc53-e8238c2d898c\", \"5f1bbef9-1ca9-4d10-98ba-eb5fa81fa366\": \"5f1bbef9-1ca9-4d10-98ba-eb5fa81fa366\", \"487db1f7-1325-4ee9-8841-0321af3c1ff0\": \"487db1f7-1325-4ee9-8841-0321af3c1ff0\", \"8433a9c3-4fee-4ce9-962c-ddbdac72ace5\": \"8433a9c3-4fee-4ce9-962c-ddbdac72ace5\", \"1901e6c1-8c70-4e02-b12c-3626215fc683\": \"1901e6c1-8c70-4e02-b12c-3626215fc683\", \"5ce48a0c-e8eb-455f-b79a-d6affa780b45\": \"5ce48a0c-e8eb-455f-b79a-d6affa780b45\", \"84ef95bd-8950-47f7-a7e9-2e534f58e519\": \"84ef95bd-8950-47f7-a7e9-2e534f58e519\", \"d43a8c45-6184-4156-b78d-183828272505\": \"d43a8c45-6184-4156-b78d-183828272505\", \"e28c163a-71c9-4b42-afdc-1caf80717d9b\": \"e28c163a-71c9-4b42-afdc-1caf80717d9b\", \"ff4b51ee-47cd-4803-94b5-3c6131494b3f\": \"ff4b51ee-47cd-4803-94b5-3c6131494b3f\", \"848dd921-b074-49c9-954f-8a2b627cbc8e\": \"848dd921-b074-49c9-954f-8a2b627cbc8e\", \"16ca378c-4fca-473f-b5d2-5fd3327eced3\": \"16ca378c-4fca-473f-b5d2-5fd3327eced3\", \"44c628b6-937f-4039-9b6b-5bb8924d7a9b\": \"44c628b6-937f-4039-9b6b-5bb8924d7a9b\", \"ed5417a1-f196-4235-9cc6-cf2ad7729946\": \"ed5417a1-f196-4235-9cc6-cf2ad7729946\", \"78ec76a4-f3c1-41b3-84d5-5b3bc92fbfce\": \"78ec76a4-f3c1-41b3-84d5-5b3bc92fbfce\", \"17baa2bf-7111-4543-b83e-c2860e8f46f5\": \"17baa2bf-7111-4543-b83e-c2860e8f46f5\", \"5a1bdd8a-7195-4d1a-bf0b-54aa7b22baaf\": \"5a1bdd8a-7195-4d1a-bf0b-54aa7b22baaf\"}, \"doc_id_dict\": {}, \"embeddings_dict\": {}}"}}}


--------------------------------------------------------------------------------
/llamaindex/.env.example:
--------------------------------------------------------------------------------
1 | GEMINI_API_KEY=XYZ
2 | GROQ_API_KEY=XYZ
3 | COHERE_API_KEY=XYZ


--------------------------------------------------------------------------------
/llamaindex/README.md:
--------------------------------------------------------------------------------
 1 | RAG use-case implementation using Llamaindex
 2 | 
 3 | [Updated Colab notebook with Llama3](https://colab.research.google.com/drive/10A9OeLQUQyHXf4KciA-VfWwa9Unv9nh9?usp=sharing)
 4 | 
 5 | Supercharge your RAG pipeline with the following:
 6 | 
 7 | - Framework -> [Llamaindex](https://docs.llamaindex.ai/en/stable/index.html) 
 8 | - Loader -> [SimpleDirectoryLoader](https://docs.llamaindex.ai/en/stable/module_guides/loading/simpledirectoryreader.html)
 9 | - Chunking -> [Semantic Chunking](https://docs.llamaindex.ai/en/stable/examples/node_parsers/semantic_chunking.html)
10 | - Embeddings -> [Gemini Embeddings](https://docs.llamaindex.ai/en/stable/examples/node_parsers/semantic_chunking.html)
11 | - Vector DB -> [Llamaindex VectorStoreIndex](https://docs.llamaindex.ai/en/stable/module_guides/indexing/vector_store_index.html)
12 | - LLM -> [Groq Mistral](https://docs.llamaindex.ai/en/stable/examples/llm/groq.html#groq)
13 | - Reranking -> [Cohere Rerank Model](https://docs.llamaindex.ai/en/stable/examples/node_postprocessor/CohereRerank.html)
14 | 
15 | For index creation follow notebook file ```rag.ipynb```
16 | 
17 | To run application:
18 | 
19 | ``` 
20 | pip install -r requirements.txt 
21 | chainlit run app.py 
22 | ```
23 | 
24 | [Follow-up Medium blog/article](https://itsjb13.medium.com/building-a-rag-chatbot-using-llamaindex-groq-with-llama3-chainlit-b1709f770f55)
25 | 
26 | [Hosted Huggingface App on Spaces](https://huggingface.co/spaces/itsJB/Finance_Knowledge_Bot)
27 | 


--------------------------------------------------------------------------------
/llamaindex/app.py:
--------------------------------------------------------------------------------
 1 | from llama_index.core import StorageContext, ServiceContext, load_index_from_storage
 2 | from llama_index.core.callbacks.base import CallbackManager
 3 | from llama_index.embeddings.gemini import GeminiEmbedding
 4 | from llama_index.llms.groq import Groq
 5 | from llama_index.postprocessor.cohere_rerank import CohereRerank
 6 | import os
 7 | from dotenv import load_dotenv
 8 | load_dotenv()
 9 | import chainlit as cl
10 | 
11 | GOOGLE_API_KEY = os.getenv("GEMINI_API_KEY")
12 | GROQ_API_KEY = os.getenv("GROQ_API_KEY")
13 | COHERE_API_KEY = os.getenv("COHERE_API_KEY")
14 | 
15 | @cl.on_chat_start
16 | async def factory():
17 |     storage_context = StorageContext.from_defaults(persist_dir="./storage")
18 | 
19 |     embed_model = GeminiEmbedding(
20 |         model_name="models/embedding-001", api_key=GOOGLE_API_KEY
21 |     ) 
22 | 
23 |     llm = Groq(model="mixtral-8x7b-32768", api_key=GROQ_API_KEY)
24 | 
25 |     service_context = ServiceContext.from_defaults(embed_model=embed_model, llm=llm,
26 |                         callback_manager=CallbackManager([cl.LlamaIndexCallbackHandler()]),
27 |     )
28 |     cohere_rerank = CohereRerank(api_key=COHERE_API_KEY, top_n=2)
29 | 
30 |     index = load_index_from_storage(storage_context, service_context=service_context)
31 | 
32 |     query_engine = index.as_query_engine(
33 |         service_context=service_context,
34 |         similarity_top_k=10,
35 |         node_postprocessors=[cohere_rerank],
36 |         # streaming=True,
37 |     )
38 | 
39 |     cl.user_session.set("query_engine", query_engine)
40 | 
41 | @cl.on_message
42 | async def main(message: cl.Message):
43 |     query_engine = cl.user_session.get("query_engine")  
44 |     response = await cl.make_async(query_engine.query)(message.content)
45 | 
46 |     response_message = cl.Message(content="")
47 | 
48 |     for token in response.response:
49 |         await response_message.stream_token(token=token)
50 | 
51 |     # if response.response_txt:
52 |     #     response_message.content = response.response_txt
53 | 
54 |     await response_message.send()
55 | 


--------------------------------------------------------------------------------
/llamaindex/rag.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "metadata": {},
  7 |    "outputs": [
  8 |     {
  9 |      "name": "stderr",
 10 |      "output_type": "stream",
 11 |      "text": [
 12 |       "c:\\Users\\jayit\\GenerativeAI\\llamaindex\\.venv\\Lib\\site-packages\\tqdm\\auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
 13 |       "  from .autonotebook import tqdm as notebook_tqdm\n",
 14 |       "None of PyTorch, TensorFlow >= 2.0, or Flax have been found. Models won't be available and only tokenizers, configuration and file/data utilities can be used.\n"
 15 |      ]
 16 |     },
 17 |     {
 18 |      "data": {
 19 |       "text/plain": [
 20 |        "True"
 21 |       ]
 22 |      },
 23 |      "execution_count": 1,
 24 |      "metadata": {},
 25 |      "output_type": "execute_result"
 26 |     }
 27 |    ],
 28 |    "source": [
 29 |     "from llama_index.core import (\n",
 30 |     "    VectorStoreIndex, \n",
 31 |     "    SimpleDirectoryReader, \n",
 32 |     "    StorageContext, \n",
 33 |     "    ServiceContext, \n",
 34 |     "    load_index_from_storage\n",
 35 |     ")\n",
 36 |     "from llama_index.core.node_parser import SemanticSplitterNodeParser\n",
 37 |     "from llama_index.embeddings.gemini import GeminiEmbedding\n",
 38 |     "from llama_index.llms.groq import Groq\n",
 39 |     "from llama_index.postprocessor.cohere_rerank import CohereRerank\n",
 40 |     "import os\n",
 41 |     "from dotenv import load_dotenv\n",
 42 |     "load_dotenv()"
 43 |    ]
 44 |   },
 45 |   {
 46 |    "cell_type": "code",
 47 |    "execution_count": 2,
 48 |    "metadata": {},
 49 |    "outputs": [],
 50 |    "source": [
 51 |     "GOOGLE_API_KEY = os.getenv(\"GEMINI_API_KEY\")\n",
 52 |     "GROQ_API_KEY = os.getenv(\"GROQ_API_KEY\")\n",
 53 |     "COHERE_API_KEY = os.getenv(\"COHERE_API_KEY\")"
 54 |    ]
 55 |   },
 56 |   {
 57 |    "cell_type": "code",
 58 |    "execution_count": 3,
 59 |    "metadata": {},
 60 |    "outputs": [],
 61 |    "source": [
 62 |     "reader = SimpleDirectoryReader(input_dir=\"path/to/directory\")\n",
 63 |     "documents = reader.load_data()"
 64 |    ]
 65 |   },
 66 |   {
 67 |    "cell_type": "code",
 68 |    "execution_count": 4,
 69 |    "metadata": {},
 70 |    "outputs": [],
 71 |    "source": [
 72 |     "embed_model = GeminiEmbedding(\n",
 73 |     "    model_name=\"models/embedding-001\", api_key=GOOGLE_API_KEY\n",
 74 |     ")\n",
 75 |     "splitter = SemanticSplitterNodeParser(\n",
 76 |     "    buffer_size=1, \n",
 77 |     "    breakpoint_percentile_threshold=95, \n",
 78 |     "    embed_model=embed_model\n",
 79 |     ")\n",
 80 |     "nodes = splitter.get_nodes_from_documents(documents, show_progress=True)"
 81 |    ]
 82 |   },
 83 |   {
 84 |    "cell_type": "code",
 85 |    "execution_count": 5,
 86 |    "metadata": {},
 87 |    "outputs": [],
 88 |    "source": [
 89 |     "llm = Groq(model=\"mixtral-8x7b-32768\", api_key=GROQ_API_KEY)"
 90 |    ]
 91 |   },
 92 |   {
 93 |    "cell_type": "code",
 94 |    "execution_count": null,
 95 |    "metadata": {},
 96 |    "outputs": [],
 97 |    "source": [
 98 |     "service_context = ServiceContext.from_defaults(embed_model=embed_model, llm=llm)"
 99 |    ]
100 |   },
101 |   {
102 |    "cell_type": "code",
103 |    "execution_count": 7,
104 |    "metadata": {},
105 |    "outputs": [],
106 |    "source": [
107 |     "vector_index = VectorStoreIndex.from_documents(documents, show_progress=True, \n",
108 |     "               service_context=service_context, node_parser=nodes)"
109 |    ]
110 |   },
111 |   {
112 |    "cell_type": "code",
113 |    "execution_count": 8,
114 |    "metadata": {},
115 |    "outputs": [],
116 |    "source": [
117 |     "vector_index.storage_context.persist(persist_dir=\"./storage\")"
118 |    ]
119 |   },
120 |   {
121 |    "cell_type": "code",
122 |    "execution_count": 9,
123 |    "metadata": {},
124 |    "outputs": [],
125 |    "source": [
126 |     "storage_context = StorageContext.from_defaults(persist_dir=\"./storage\")"
127 |    ]
128 |   },
129 |   {
130 |    "cell_type": "code",
131 |    "execution_count": 10,
132 |    "metadata": {},
133 |    "outputs": [],
134 |    "source": [
135 |     "index = load_index_from_storage(storage_context, service_context=service_context)"
136 |    ]
137 |   },
138 |   {
139 |    "cell_type": "code",
140 |    "execution_count": 11,
141 |    "metadata": {},
142 |    "outputs": [],
143 |    "source": [
144 |     "cohere_rerank = CohereRerank(api_key=COHERE_API_KEY, top_n=2)"
145 |    ]
146 |   },
147 |   {
148 |    "cell_type": "code",
149 |    "execution_count": 12,
150 |    "metadata": {},
151 |    "outputs": [],
152 |    "source": [
153 |     "query_engine = index.as_query_engine(service_context=service_context,\n",
154 |     "                similarity_top_k=10,\n",
155 |     "                node_postprocessors=[cohere_rerank],)"
156 |    ]
157 |   },
158 |   {
159 |    "cell_type": "code",
160 |    "execution_count": 13,
161 |    "metadata": {},
162 |    "outputs": [],
163 |    "source": [
164 |     "query = \"What is difference between double top and double bottom pattern?\"\n",
165 |     "resp = query_engine.query(query)"
166 |    ]
167 |   },
168 |   {
169 |    "cell_type": "code",
170 |    "execution_count": 14,
171 |    "metadata": {},
172 |    "outputs": [
173 |     {
174 |      "data": {
175 |       "text/plain": [
176 |        "\"A double top pattern is a bearish technical reversal pattern that forms after an asset reaches a high price twice with a moderate decline between the two highs, and is confirmed when the asset's price falls below a support level equal to the low between the two prior highs. On the other hand, a double bottom pattern is a bullish reversal pattern that occurs at the bottom of a downtrend, signaling that the sellers are losing momentum and resembles the letter “W” due to the two-touched low and a change in the trend direction from a downtrend to an uptrend. In summary, the key difference lies in the direction of the trend change and the shape of the pattern.\""
177 |       ]
178 |      },
179 |      "execution_count": 14,
180 |      "metadata": {},
181 |      "output_type": "execute_result"
182 |     }
183 |    ],
184 |    "source": [
185 |     "resp.response"
186 |    ]
187 |   },
188 |   {
189 |    "cell_type": "markdown",
190 |    "metadata": {},
191 |    "source": [
192 |     "\"A double top pattern is a bearish technical reversal pattern that forms after an asset reaches a high price twice with a moderate decline between the two highs, and is confirmed when the asset's price falls below a support level equal to the low between the two prior highs. On the other hand, a double bottom pattern is a bullish reversal pattern that occurs at the bottom of a downtrend, signaling that the sellers are losing momentum and resembles the letter “W” due to the two-touched low and a change in the trend direction from a downtrend to an uptrend. In summary, the key difference lies in the direction of the trend change and the shape of the pattern.\""
193 |    ]
194 |   },
195 |   {
196 |    "cell_type": "code",
197 |    "execution_count": null,
198 |    "metadata": {},
199 |    "outputs": [],
200 |    "source": []
201 |   }
202 |  ],
203 |  "metadata": {
204 |   "kernelspec": {
205 |    "display_name": ".venv",
206 |    "language": "python",
207 |    "name": "python3"
208 |   },
209 |   "language_info": {
210 |    "codemirror_mode": {
211 |     "name": "ipython",
212 |     "version": 3
213 |    },
214 |    "file_extension": ".py",
215 |    "mimetype": "text/x-python",
216 |    "name": "python",
217 |    "nbconvert_exporter": "python",
218 |    "pygments_lexer": "ipython3",
219 |    "version": "3.12.1"
220 |   }
221 |  },
222 |  "nbformat": 4,
223 |  "nbformat_minor": 2
224 | }
225 | 


--------------------------------------------------------------------------------
/llamaindex/requirements.txt:
--------------------------------------------------------------------------------
1 | llama-index==0.10.18
2 | llama-index-llms-groq==0.1.3
3 | chainlit==1.0.401
4 | groq==0.4.2
5 | python-dotenv==1.0.1
6 | llama-index-embeddings-gemini==0.1.4
7 | llama-index-postprocessor-cohere-rerank==0.1.2


--------------------------------------------------------------------------------
/mlflow/README.md:
--------------------------------------------------------------------------------
1 | https://mlflow.org/docs/latest/getting-started/index.html
2 | 


--------------------------------------------------------------------------------
/neo4j/.env.example:
--------------------------------------------------------------------------------
1 | OPENAI_API_KEY=sk-....
2 | NEO4J_URI=ABC
3 | NEO4J_USERNAME=ABC
4 | NEO4J_PASSWORD=ABC
5 | AURA_INSTANCEID=ABC
6 | AURA_INSTANCENAME=ABC
7 | 


--------------------------------------------------------------------------------
/neo4j/README.md:
--------------------------------------------------------------------------------
1 | [Llamaindex Neo4j Hybrid RAG Implementation](https://neo4j.com/labs/genai-ecosystem/llamaindex/)
2 | 
3 | #### ** P.S -> I've used Google VertexAI Gemini as LLM & Embeddings. 
4 | #### To generate credentials.json navigate to
5 | #### console.cloud.google.com -> IAM -> Service Account -> Generate key -> Credentials.json will be downloaded
6 | #### You can choose any other LLM & embedding model of your choice
7 | 


--------------------------------------------------------------------------------
/neo4j/data/Graph_Retrieval-Augmented_Generation_A_Survey.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jayita13/GenerativeAI/67ab29100b303220558f76b56a3861e0a464f09a/neo4j/data/Graph_Retrieval-Augmented_Generation_A_Survey.pdf


--------------------------------------------------------------------------------
/neo4j/requirements.txt:
--------------------------------------------------------------------------------
1 | llama-index==0.11.5
2 | python-dotenv==1.0.1
3 | llama-index-llms-vertex==0.3.3
4 | llama-index-embeddings-vertex==0.2.0
5 | llama-index-graph-stores-neo4j==0.3.1
6 | neo4j==5.24.0
7 | llama-index-vector-stores-neo4jvector==0.2.1
8 | 


--------------------------------------------------------------------------------
/neo4j/storage_hybrid_gemini/docstore.json:
--------------------------------------------------------------------------------
1 | {"docstore/metadata": {"b85e8335-f9bb-4c3c-8440-e517ad1445bb": {"doc_hash": "872ea36e8ff28cdb8d10f29b62895b1c7ea823f1e60478a842b2ef3222dcb11f"}, "f1d1925a-2951-48fb-99ea-fbe9588b8e50": {"doc_hash": "0f0779720fd7011eb0d8be22feae2a8ae223b9fea640c58d68ae63ccd3d7fe09"}, "35871227-e990-4a07-a8f8-540d38122abb": {"doc_hash": "b9cec75bd5d9c0c1eb9bc80d3147f5f2743c0c69f58f970e2e201341f21b046a"}, "35a77c37-31e6-4f00-bd75-87a81ac35ae7": {"doc_hash": "a28bb8ef187f9c5c4eb5e4c200937eed6d2eb0424e6b22646f277b4ba0b0fc58"}, "33d62f5c-5371-4996-a7ad-97aade1c3e4a": {"doc_hash": "a15cf986b19e310afe59792d444959364c95698bb1081275bc345ff8822f10d4"}, "4c9b8a40-95c8-4a8f-aa8f-78558519919c": {"doc_hash": "885f9104fb70c9f8e55947b76e110ec51493e5240bae1f6298ec22d46810a349"}, "a7d5e51e-9490-4806-96f7-83d45fde728b": {"doc_hash": "0a06a15822faac35dd75bab091eaf9b69f440a403c2a087376e6f406e5f034dd"}, "99714e4e-d406-483e-b65a-c31d82c40ca7": {"doc_hash": "e5eff05b1788c33d597d52e013c061e6a2f05395f628856d6def91328942ee84"}, "f75fd598-b307-4c6d-a39e-03d1f0858d74": {"doc_hash": "962610726fb1f2204ef76fe32c08560213189b24cf648525f5aafeda6b833e93"}, "0ad0dead-a46e-476f-83e5-c29469e60b53": {"doc_hash": "ea92403656fc149fd07123720be7de336b5701e26b01eb0ee11eb7c352448aab"}, "4bf11da5-17fd-4c79-bf04-5eca0ea4f70f": {"doc_hash": "26054fa8c264c4fc8af66843c975211b900b35b3bc6d4140e1fac068312b8e95"}, "b9c7f8ca-da85-451c-9a5e-48edb8bacb72": {"doc_hash": "76e2354d049a88eafd4a4037ff04a1b85559992795ba2c0601de22fb85ad480c"}, "30e99422-38f9-413a-9270-e770ee287c57": {"doc_hash": "9d8b4005a8585b71ab4c12995bde745c62e295c7b950885d492a23b18f65f279"}, "f1dd1a6a-9ddc-4bb7-849d-849352949773": {"doc_hash": "0c15a0f8f75efe6f0e40d5b1567dcb923cb710bc7ff8aeaa252bbc63fd13302c"}, "b8d6bb01-3788-4456-9a37-25cd0fd58b34": {"doc_hash": "5d46563204ff076d617b01a87e990c24a74f960a0f258be9cab5ab60041c6076"}, "0e49dc61-0826-4991-aa34-55aff47ca76e": {"doc_hash": "70cb7b890b4ff2ba16bbbb832189d317b0275321e20ce00338a84aae19f934c7"}, "07eecae1-b490-49e2-8ac4-9e7fa07324c2": {"doc_hash": "b7fc6b41778c0709fa21d4636586a7ac074c47852a23136c32318db823b6fa3d"}, "fb2cbe59-e456-4b73-a868-d5d491c7cf1f": {"doc_hash": "e7fc2f51662efc5d76eb2e8f0b0416dd7d82c40761fbe89c14e82e8754c417a3"}, "beaa49d6-f54b-4914-a084-80db501a4244": {"doc_hash": "e708ba6070fe75ba632b5e3026bd1d188241f3c92f354cce0c292427c254525f"}, "c7f6329e-8f2c-4377-86be-b70590adcbd6": {"doc_hash": "8d6420336dbcbd7523649fe161dfb898866806288eb5b83497df89bf59939105"}, "be54717a-1a6f-423f-aa16-5c6aeb788ac4": {"doc_hash": "16582d19258d505b3a29a9c3c9dfa8bc4943d273f9ca23eadca69a72ddf38c49"}, "e7784086-6b44-4f1e-84cc-2e91b642f461": {"doc_hash": "20b80fa920a0a46ab961acd64d87cac463d7781493aa3f3f557852b16c5b728e"}, "38eef5c1-3821-466b-8335-a5d73005cba6": {"doc_hash": "603e99fef491c2726525e5e7f593db32851a37cdd7fd4689b14674428d7a04fa"}, "5a8296db-e069-4646-96f5-51a82c4fdb99": {"doc_hash": "63362b21ebf50bfbf04cab24b1ea2e51e57a544a356b2e0e3bf2b835aad498ad"}, "fbee59b2-5b67-4777-9ea9-db8aececde99": {"doc_hash": "a2c3bb4852cc7aa6354cd6d8fbd89dbddffbf3ba44b2b48cc3622f61f2c043c7"}, "ec32dff5-d540-47de-82b4-9985276bcd22": {"doc_hash": "ea7b3a1aff1a481b3fed6b43facef8752e9a8b88dfad8ffe21f9f579120222bf"}, "949763ac-5f99-4bf3-b05a-9246c87a4f84": {"doc_hash": "5615844b3e31f852ad8ce07f3e1184b68cce022a3a37bf1eea05702b818ab820"}, "f0709d31-7de3-4fcd-8c6e-a9235bc247ff": {"doc_hash": "c86c31646fbbaa2b3c656f0d1bb3f731d914e1aa080658a881a99f6047d5fdb5"}, "a039ac2e-a1df-4980-af2b-63a8ffb0601d": {"doc_hash": "1c14a7b4b5f485751ab3cea8cf21856691f38c30aff101f9a23d90598108979e"}, "194a0057-8749-48ed-80e4-b3debd48ff7f": {"doc_hash": "d95cbd5a0ae17ef03c4f840e2dae8668e93516a070073a31958e37fe9049249d"}, "b3ced31d-096e-4093-8a92-e036e2a2bc48": {"doc_hash": "b1f264257250b470211c5a15a7f76b441014613e6e64a3574c7bed3529a3315e"}, "97a77f39-b167-464f-a85e-07d08f42194f": {"doc_hash": "9dc2ac1c6a3b749854d08953bfd6693eb969e345e545f1c7723df77005dbdd53"}, "1d7850dd-0410-4cb2-97b8-84289c132f54": {"doc_hash": "6590ebfed2322c9b8adefbfceb9bdd3aa77edae135eba0686919851ed60c238d"}, "c1862509-9174-4e51-8004-7c7851021adc": {"doc_hash": "e1a341c57c2a37a70af980810de456e606fe25257dce24e844e41d2558c74bd1"}, "2afdcf3e-2a29-4dfa-b5d1-0397c8281f2d": {"doc_hash": "1ade24486317c6e01ac6e7eaa85efa7f03fd41a3d81e7f5e01ae839cdf958694"}, "b23abb3e-12e8-4251-970a-308bfddb72da": {"doc_hash": "9cac4a5253a3a6c62b6d0e4a81378ed9ed16ffe5bb04f2ec6c134034845b4c8c"}, "b10f69b4-590d-4d6e-9fe8-dc3840a42ec2": {"doc_hash": "463d7dc2d713034c733b9b2c01774f933b2e64698105009e6f15c1441c2c8f16"}, "6967cc46-6434-4410-a337-f186e90569f9": {"doc_hash": "00eface5c412b53a4f80807fde1257e1a2a9b4522bc6a059f43589d2c686f6da"}, "eaa29b4e-fcc1-4303-845b-bdf5837d595f": {"doc_hash": "bb03bc0813148d8df72991060c11f504aa83fbd8ba8ba04840615cf88c64efc9"}, "fda68c16-cb79-43f4-972c-4c28cdb6c10e": {"doc_hash": "c36e09f26729da31fdc1eda92cf629fe115e55fecf112ce29ee00c2713b398a2"}}}


--------------------------------------------------------------------------------
/neo4j/storage_hybrid_gemini/graph_store.json:
--------------------------------------------------------------------------------
1 | {"graph_dict": {}}


--------------------------------------------------------------------------------
/neo4j/storage_hybrid_gemini/image__vector_store.json:
--------------------------------------------------------------------------------
1 | {"embedding_dict": {}, "text_id_to_ref_doc_id": {}, "metadata_dict": {}}


--------------------------------------------------------------------------------
/neo4j/storage_hybrid_gemini/index_store.json:
--------------------------------------------------------------------------------
1 | {"index_store/data": {"5c7b62a5-ce23-44c4-97d3-5762db6babcf": {"__type__": "vector_store", "__data__": "{\"index_id\": \"5c7b62a5-ce23-44c4-97d3-5762db6babcf\", \"summary\": null, \"nodes_dict\": {}, \"doc_id_dict\": {}, \"embeddings_dict\": {}}"}}}


--------------------------------------------------------------------------------
/neo4j/vector_graph_rag.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "from llama_index.core import SimpleDirectoryReader\n",
 10 |     "from dotenv import load_dotenv\n",
 11 |     "load_dotenv()\n",
 12 |     "from llama_index.core import Settings\n",
 13 |     "from IPython.display import Markdown, display\n",
 14 |     "from llama_index.core import StorageContext, load_index_from_storage\n",
 15 |     "from google.oauth2 import service_account\n",
 16 |     "from llama_index.llms.vertex import Vertex\n",
 17 |     "from llama_index.embeddings.vertex import VertexTextEmbedding\n",
 18 |     "import nest_asyncio\n",
 19 |     "nest_asyncio.apply()"
 20 |    ]
 21 |   },
 22 |   {
 23 |    "cell_type": "code",
 24 |    "execution_count": 2,
 25 |    "metadata": {},
 26 |    "outputs": [],
 27 |    "source": [
 28 |     "from llama_index.vector_stores.neo4jvector import Neo4jVectorStore\n",
 29 |     "from llama_index.core import VectorStoreIndex\n",
 30 |     "import os\n",
 31 |     "\n",
 32 |     "username = os.getenv(\"NEO4J_USERNAME\")\n",
 33 |     "password = os.getenv(\"NEO4J_PASSWORD\")\n",
 34 |     "url = os.getenv(\"NEO4J_URI\")"
 35 |    ]
 36 |   },
 37 |   {
 38 |    "cell_type": "code",
 39 |    "execution_count": 3,
 40 |    "metadata": {},
 41 |    "outputs": [],
 42 |    "source": [
 43 |     "documents = SimpleDirectoryReader(\n",
 44 |     "     input_files=[\"data/Graph_Retrieval-Augmented_Generation_A_Survey.pdf\"]\n",
 45 |     ").load_data()"
 46 |    ]
 47 |   },
 48 |   {
 49 |    "cell_type": "code",
 50 |    "execution_count": 4,
 51 |    "metadata": {},
 52 |    "outputs": [],
 53 |    "source": [
 54 |     "filename = \"credentials.json\"\n",
 55 |     "credentials: service_account.Credentials = (\n",
 56 |     "    service_account.Credentials.from_service_account_file(filename)\n",
 57 |     ")"
 58 |    ]
 59 |   },
 60 |   {
 61 |    "cell_type": "code",
 62 |    "execution_count": 5,
 63 |    "metadata": {},
 64 |    "outputs": [],
 65 |    "source": [
 66 |     "llm = Vertex(\n",
 67 |     "        model=\"gemini-pro\", \n",
 68 |     "        project=credentials.project_id, credentials=credentials,\n",
 69 |     "        max_tokens=8000\n",
 70 |     "    )\n",
 71 |     "    \n",
 72 |     "Settings.llm = llm\n",
 73 |     "Settings.chunk_size = 512"
 74 |    ]
 75 |   },
 76 |   {
 77 |    "cell_type": "code",
 78 |    "execution_count": 6,
 79 |    "metadata": {},
 80 |    "outputs": [],
 81 |    "source": [
 82 |     "embed_model = VertexTextEmbedding(\n",
 83 |     "    model_name=\"textembedding-gecko@003\",\n",
 84 |     "    project=credentials.project_id, credentials=credentials\n",
 85 |     ")\n",
 86 |     "\n",
 87 |     "Settings.embed_model = embed_model"
 88 |    ]
 89 |   },
 90 |   {
 91 |    "cell_type": "code",
 92 |    "execution_count": 7,
 93 |    "metadata": {},
 94 |    "outputs": [
 95 |     {
 96 |      "name": "stderr",
 97 |      "output_type": "stream",
 98 |      "text": [
 99 |       "Received notification from DBMS server: {severity: WARNING} {code: Neo.ClientNotification.Statement.FeatureDeprecationWarning} {category: DEPRECATION} {title: This feature is deprecated and will be removed in future versions.} {description: CALL subquery without a variable scope clause is now deprecated. Use CALL (row) { ... }} {position: line: 1, column: 21, offset: 20} for query: \"UNWIND $data AS row CALL { WITH row MERGE (c:`Chunk` {id: row.id}) WITH c, row CALL db.create.setVectorProperty(c, 'embedding', row.embedding) YIELD node SET c.`text` = row.text SET c += row.metadata } IN TRANSACTIONS OF 1000 ROWS\"\n",
100 |       "Received notification from DBMS server: {severity: WARNING} {code: Neo.ClientNotification.Statement.FeatureDeprecationWarning} {category: DEPRECATION} {title: This feature is deprecated and will be removed in future versions.} {description: The query used a deprecated procedure. ('db.create.setVectorProperty' has been replaced by 'db.create.setNodeVectorProperty')} {position: line: 1, column: 80, offset: 79} for query: \"UNWIND $data AS row CALL { WITH row MERGE (c:`Chunk` {id: row.id}) WITH c, row CALL db.create.setVectorProperty(c, 'embedding', row.embedding) YIELD node SET c.`text` = row.text SET c += row.metadata } IN TRANSACTIONS OF 1000 ROWS\"\n"
101 |      ]
102 |     }
103 |    ],
104 |    "source": [
105 |     "# Extract graph from documents\n",
106 |     "embed_dim = 768\n",
107 |     "\n",
108 |     "neo4j_vector_hybrid = Neo4jVectorStore(\n",
109 |     "    username, password, url, embed_dim, hybrid_search=True\n",
110 |     ")\n",
111 |     "\n",
112 |     "storage_context = StorageContext.from_defaults(\n",
113 |     "    vector_store=neo4j_vector_hybrid\n",
114 |     ")\n",
115 |     "\n",
116 |     "index = VectorStoreIndex.from_documents(\n",
117 |     "    documents, storage_context=storage_context\n",
118 |     ")"
119 |    ]
120 |   },
121 |   {
122 |    "cell_type": "code",
123 |    "execution_count": 8,
124 |    "metadata": {},
125 |    "outputs": [],
126 |    "source": [
127 |     "index.storage_context.persist(persist_dir=\"./storage_hybrid_gemini\")\n",
128 |     "index = load_index_from_storage(storage_context)"
129 |    ]
130 |   },
131 |   {
132 |    "cell_type": "code",
133 |    "execution_count": 9,
134 |    "metadata": {},
135 |    "outputs": [
136 |     {
137 |      "name": "stderr",
138 |      "output_type": "stream",
139 |      "text": [
140 |       "Received notification from DBMS server: {severity: WARNING} {code: Neo.ClientNotification.Statement.FeatureDeprecationWarning} {category: DEPRECATION} {title: This feature is deprecated and will be removed in future versions.} {description: CALL subquery without a variable scope clause is now deprecated. Use CALL () { ... }} {position: line: 1, column: 1, offset: 0} for query: 'CALL { CALL db.index.vector.queryNodes($index, $k, $embedding) YIELD node, score WITH collect({node:node, score:score}) AS nodes, max(score) AS max UNWIND nodes AS n RETURN n.node AS node, (n.score / max) AS score UNION CALL db.index.fulltext.queryNodes($keyword_index, $query, {limit: $k}) YIELD node, score WITH collect({node:node, score:score}) AS nodes, max(score) AS max UNWIND nodes AS n RETURN n.node AS node, (n.score / max) AS score } WITH node, max(score) AS score ORDER BY score DESC LIMIT $k RETURN node.`text` AS text, score, node.id AS id, node {.*, `text`: Null, `embedding`: Null, id: Null } AS metadata'\n"
141 |      ]
142 |     },
143 |     {
144 |      "name": "stdout",
145 |      "output_type": "stream",
146 |      "text": [
147 |       "Graph-Based Indexing (G-Indexing). Graph-Based Indexing constitutes the initial phase of\n",
148 |       "GraphRAG, aimed at identifying or constructing a graph database Gthat aligns with downstream\n",
149 |       "tasks and establishing indices on it. The graph database can originate from public knowledge\n",
150 |       "graphs [ 2,7,91,131,138,151], graph data [ 112], or be constructed based on proprietary data\n",
151 |       "sources such as textual [ 25,43,80,160] or other forms of data [ 169]. The indexing process typi-\n",
152 |       "cally includes mapping node and edge properties, establishing pointers between connected nodes,\n",
153 |       "and organizing data to support fast traversal and retrieval operations. Indexing determines the\n",
154 |       "granularity of the subsequent retrieval stage, playing a crucial role in enhancing query efficiency.\n",
155 |       "Graph-Guided Retrieval (G-Retrieval). Following graph-based indexing, the graph-guided retrieval\n",
156 |       "phase focuses on extracting pertinent information from the graph database in response to user\n",
157 |       "queries or input. Specifically, given a user query 𝑞which is expressed in natural language, the\n",
158 |       "retrieval stage aims to extract the most relevant elements (e.g., entities, triplets, paths, subgraphs)\n",
159 |       "from knowledge graphs, which can be formulated as\n",
160 |       "𝐺∗=G-Retriever(𝑞,G)\n",
161 |       "=arg max\n",
162 |       "𝐺⊆R(G)𝑝𝜃(𝐺|𝑞,G)\n",
163 |       "=arg max\n",
164 |       "𝐺⊆R(G)Sim(𝑞,𝐺),(5)\n",
165 |       "where𝐺∗is the optimal retrieved graph elements and Sim(·,·)is a function that measures the\n",
166 |       "semantic similarity between user queries and the graph data. R(·) represents a function to narrow\n",
167 |       "down the search range of subgraphs, considering the efficiency.\n",
168 |       "Graph-Enhanced Generation (G-Generation). The graph-enhanced generation phase involves\n",
169 |       "synthesizing meaningful outputs or responses based on the retrieved graph data. This could\n",
170 |       "encompass answering user queries, generating reports, etc.\n",
171 |       "2020. What Dis-\n",
172 |       "ease does this Patient Have? A Large-scale Open Domain Question Answering Dataset from Medical Exams.\n",
173 |       "arXiv:2009.13081 [cs.CL] https://arxiv.org/abs/2009.13081\n",
174 |       "[68] Mandar Joshi, Eunsol Choi, Daniel S. Weld, and Luke Zettlemoyer. 2017. TriviaQA: A Large Scale Distantly Supervised\n",
175 |       "Challenge Dataset for Reading Comprehension. In Proceedings of the 55th Annual Meeting of the Association for\n",
176 |       "Computational Linguistics, ACL 2017, Vancouver, Canada, July 30 - August 4, Volume 1: Long Papers . 1601–1611.\n",
177 |       "[69] Vladimir Karpukhin, Barlas Oguz, Sewon Min, Patrick S. H. Lewis, Ledell Wu, Sergey Edunov, Danqi Chen, and Wen-\n",
178 |       "tau Yih. 2020. Dense Passage Retrieval for Open-Domain Question Answering. In Proceedings of the 2020 Conference\n",
179 |       "on Empirical Methods in Natural Language Processing, EMNLP 2020, Online, November 16-20, 2020 . 6769–6781.\n"
180 |      ]
181 |     }
182 |    ],
183 |    "source": [
184 |     "# Define retriever\n",
185 |     "retriever = index.as_retriever(\n",
186 |     "    include_text=False,  # include source text in returned nodes, default True\n",
187 |     ")\n",
188 |     "results = retriever.retrieve(\"What is Graph Retrieval-Augmented Generation?\")\n",
189 |     "for record in results:\n",
190 |     "    print(record.text)"
191 |    ]
192 |   },
193 |   {
194 |    "cell_type": "code",
195 |    "execution_count": 10,
196 |    "metadata": {},
197 |    "outputs": [
198 |     {
199 |      "name": "stderr",
200 |      "output_type": "stream",
201 |      "text": [
202 |       "Received notification from DBMS server: {severity: WARNING} {code: Neo.ClientNotification.Statement.FeatureDeprecationWarning} {category: DEPRECATION} {title: This feature is deprecated and will be removed in future versions.} {description: CALL subquery without a variable scope clause is now deprecated. Use CALL () { ... }} {position: line: 1, column: 1, offset: 0} for query: 'CALL { CALL db.index.vector.queryNodes($index, $k, $embedding) YIELD node, score WITH collect({node:node, score:score}) AS nodes, max(score) AS max UNWIND nodes AS n RETURN n.node AS node, (n.score / max) AS score UNION CALL db.index.fulltext.queryNodes($keyword_index, $query, {limit: $k}) YIELD node, score WITH collect({node:node, score:score}) AS nodes, max(score) AS max UNWIND nodes AS n RETURN n.node AS node, (n.score / max) AS score } WITH node, max(score) AS score ORDER BY score DESC LIMIT $k RETURN node.`text` AS text, score, node.id AS id, node {.*, `text`: Null, `embedding`: Null, id: Null } AS metadata'\n"
203 |      ]
204 |     },
205 |     {
206 |      "data": {
207 |       "text/markdown": [
208 |        "## Graph Retrieval-Augmented Generation (GraphRAG)\n",
209 |        "\n",
210 |        "Graph Retrieval-Augmented Generation (GraphRAG) is a framework that combines graph-based indexing, retrieval, and generation to process information and generate responses. It consists of three main phases:\n",
211 |        "\n",
212 |        "**1. Graph-Based Indexing (G-Indexing):**\n",
213 |        "\n",
214 |        "* This phase focuses on identifying or constructing a graph database (G) that aligns with the downstream tasks.\n",
215 |        "* The graph database can be sourced from public knowledge graphs, graph data, or constructed from proprietary data sources like textual or other forms of data.\n",
216 |        "* The indexing process involves mapping node and edge properties, establishing pointers between connected nodes, and organizing data for efficient traversal and retrieval.\n",
217 |        "* This phase determines the granularity of the subsequent retrieval stage and plays a crucial role in enhancing query efficiency.\n",
218 |        "\n",
219 |        "**2. Graph-Guided Retrieval (G-Retrieval):**\n",
220 |        "\n",
221 |        "* This phase focuses on extracting relevant information from the graph database in response to user queries or input.\n",
222 |        "* Given a user query (q) expressed in natural language, the retrieval stage aims to extract the most relevant elements (e.g., entities, triplets, paths, subgraphs) from the knowledge graph.\n",
223 |        "* This can be formulated as finding the optimal retrieved graph elements (G*) that maximize the semantic similarity between the user query and the graph data.\n",
224 |        "* The search range of subgraphs is narrowed down using a function R(·) to ensure efficiency.\n",
225 |        "\n",
226 |        "**3. Graph-Enhanced Generation (G-Generation):**\n",
227 |        "\n",
228 |        "* This phase involves synthesizing meaningful outputs or responses based on the retrieved graph data.\n",
229 |        "* This could encompass answering user queries, generating reports, etc.\n",
230 |        "\n",
231 |        "Overall, GraphRAG leverages the power of graph databases to enhance information retrieval and generation tasks, leading to more accurate and efficient results."
232 |       ],
233 |       "text/plain": [
234 |        "<IPython.core.display.Markdown object>"
235 |       ]
236 |      },
237 |      "metadata": {},
238 |      "output_type": "display_data"
239 |     }
240 |    ],
241 |    "source": [
242 |     "# Question answering\n",
243 |     "query_engine = index.as_query_engine(include_text=True)\n",
244 |     "response = query_engine.query(\"What is Graph Retrieval-Augmented Generation?\")\n",
245 |     "display(Markdown(f\"{response}\"))"
246 |    ]
247 |   },
248 |   {
249 |    "cell_type": "code",
250 |    "execution_count": 11,
251 |    "metadata": {},
252 |    "outputs": [
253 |     {
254 |      "name": "stderr",
255 |      "output_type": "stream",
256 |      "text": [
257 |       "Received notification from DBMS server: {severity: WARNING} {code: Neo.ClientNotification.Statement.FeatureDeprecationWarning} {category: DEPRECATION} {title: This feature is deprecated and will be removed in future versions.} {description: CALL subquery without a variable scope clause is now deprecated. Use CALL () { ... }} {position: line: 1, column: 1, offset: 0} for query: 'CALL { CALL db.index.vector.queryNodes($index, $k, $embedding) YIELD node, score WITH collect({node:node, score:score}) AS nodes, max(score) AS max UNWIND nodes AS n RETURN n.node AS node, (n.score / max) AS score UNION CALL db.index.fulltext.queryNodes($keyword_index, $query, {limit: $k}) YIELD node, score WITH collect({node:node, score:score}) AS nodes, max(score) AS max UNWIND nodes AS n RETURN n.node AS node, (n.score / max) AS score } WITH node, max(score) AS score ORDER BY score DESC LIMIT $k RETURN node.`text` AS text, score, node.id AS id, node {.*, `text`: Null, `embedding`: Null, id: Null } AS metadata'\n"
258 |      ]
259 |     },
260 |     {
261 |      "data": {
262 |       "text/markdown": [
263 |        "## Query-Focused Summarization (QFS)\n",
264 |        "\n",
265 |        "While the provided context doesn't directly address QFS techniques, it does mention Graph Retrieval-Augmented Generation (GraphRAG) as a solution for tasks like QFS. \n",
266 |        "\n",
267 |        "Here's what we can glean about QFS from the context:\n",
268 |        "\n",
269 |        "**Challenges:**\n",
270 |        "\n",
271 |        "* Traditional RAG models struggle with QFS due to limitations in retrieving relevant information.\n",
272 |        "* They often rely on a subset of documents and fail to capture global information comprehensively.\n",
273 |        "\n",
274 |        "**GraphRAG as a solution:**\n",
275 |        "\n",
276 |        "* GraphRAG retrieves graph elements containing relational knowledge pertinent to a given query from a pre-constructed graph database.\n",
277 |        "* This allows for a more accurate and comprehensive retrieval of information, including interconnections between texts.\n",
278 |        "* Graph data offers abstraction and summarization of textual data, mitigating concerns of verbosity.\n",
279 |        "* By retrieving subgraphs or graph communities, GraphRAG can access comprehensive information to effectively address the QFS challenge.\n",
280 |        "\n",
281 |        "**Further research:**\n",
282 |        "\n",
283 |        "The context highlights the need for further research in GraphRAG, particularly in areas like:\n",
284 |        "\n",
285 |        "* **Graph-Based Indexing (G-Indexing):** Optimizing methods for indexing and storing graph data for efficient retrieval.\n",
286 |        "* **Graph-Guided Retrieval (G-Retrieval):** Developing advanced techniques for retrieving relevant graph elements based on the query and context.\n",
287 |        "* **Graph-Enhanced Generation (G-Generation):** Utilizing retrieved graph information to generate summaries that are both informative and focused on the query.\n",
288 |        "\n",
289 |        "**Additional resources:**\n",
290 |        "\n",
291 |        "While the context doesn't provide specific QFS techniques, it offers valuable insights into the potential of GraphRAG for this task. \n",
292 |        "\n",
293 |        "For further information on QFS, you can explore the following resources:\n",
294 |        "\n",
295 |        "* **A Survey on Complex Knowledge Base Question Answering: Methods, Challenges and Solutions:** This paper provides a comprehensive overview of QFS methods, including those based on graph neural networks.\n",
296 |        "* **Query Graph Generation for Answering Multi-hop Complex Questions from Knowledge Bases:** This paper explores the use of query graphs for QFS in the context of knowledge base question answering.\n",
297 |        "* **Natural Questions: a Benchmark for Question Answering Research:** This paper introduces the Natural Questions dataset, a large-scale benchmark for QFS and other question answering tasks.\n",
298 |        "\n",
299 |        "By combining the insights from the provided context with these additional resources, you can gain a deeper understanding of QFS and explore the potential of GraphRAG for this task."
300 |       ],
301 |       "text/plain": [
302 |        "<IPython.core.display.Markdown object>"
303 |       ]
304 |      },
305 |      "metadata": {},
306 |      "output_type": "display_data"
307 |     }
308 |    ],
309 |    "source": [
310 |     "response = query_engine.query(\"How to do Query-Focused Summarization (QFS)?\")\n",
311 |     "display(Markdown(f\"{response}\"))"
312 |    ]
313 |   }
314 |  ],
315 |  "metadata": {
316 |   "kernelspec": {
317 |    "display_name": "venv",
318 |    "language": "python",
319 |    "name": "python3"
320 |   },
321 |   "language_info": {
322 |    "codemirror_mode": {
323 |     "name": "ipython",
324 |     "version": 3
325 |    },
326 |    "file_extension": ".py",
327 |    "mimetype": "text/x-python",
328 |    "name": "python",
329 |    "nbconvert_exporter": "python",
330 |    "pygments_lexer": "ipython3",
331 |    "version": "3.10.4"
332 |   }
333 |  },
334 |  "nbformat": 4,
335 |  "nbformat_minor": 2
336 | }
337 | 


--------------------------------------------------------------------------------
/pinecone+langchain/.env.example:
--------------------------------------------------------------------------------
1 | PINECONE_API_KEY=ABC
2 | PINECONE_ENV=ABC
3 | OPENAI_API_KEY=ABC
4 | COHERE_API_KEY=ABC
5 | 


--------------------------------------------------------------------------------
/pinecone+langchain/README.md:
--------------------------------------------------------------------------------
1 | PDF Question Anserwing bot
2 | 


--------------------------------------------------------------------------------
/pinecone+langchain/requirements.txt:
--------------------------------------------------------------------------------
 1 | langchain==0.0.281
 2 | pinecone-client==2.2.2
 3 | cohere==4.22
 4 | pypdf==3.15.5
 5 | python-dotenv==1.0.0
 6 | chromadb==0.4.9
 7 | python-docx==0.8.11
 8 | transformers==4.33.1
 9 | gradio==3.44.4
10 | Pillow==10.0.0
11 | PyMuPDF==1.22.5
12 | beautifulsoup4==4.12.2
13 | requests==2.31.0
14 | selenium==4.14.0
15 | openai==0.28.1
16 | tiktoken==0.5.1
17 | pandas==2.1.4
18 | openpyxl==3.1.2


--------------------------------------------------------------------------------
/pinecone+langchain/rerank+llm.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 11,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "import time\n",
 10 |     "start = time.time()"
 11 |    ]
 12 |   },
 13 |   {
 14 |    "cell_type": "code",
 15 |    "execution_count": 12,
 16 |    "metadata": {},
 17 |    "outputs": [],
 18 |    "source": [
 19 |     "from langchain.vectorstores import Pinecone\n",
 20 |     "import pinecone\n",
 21 |     "from langchain.chat_models import ChatOpenAI\n",
 22 |     "from langchain.embeddings import OpenAIEmbeddings\n",
 23 |     "from langchain.chains import RetrievalQA\n",
 24 |     "from langchain.retrievers import ContextualCompressionRetriever\n",
 25 |     "from langchain.retrievers.document_compressors import CohereRerank\n",
 26 |     "from dotenv import load_dotenv\n",
 27 |     "import os\n",
 28 |     "load_dotenv()\n",
 29 |     "OPENAI_API_KEY = os.getenv('OPENAI_API_KEY')\n",
 30 |     "PINECONE_API_KEY = os.getenv('PINECONE_API_KEY')\n",
 31 |     "PINECONE_ENV = os.getenv('PINECONE_ENV')\n",
 32 |     "\n",
 33 |     "# Initializing Pinecone Vector DB\n",
 34 |     "pinecone.init(\n",
 35 |     "    api_key=PINECONE_API_KEY,\n",
 36 |     "    environment=PINECONE_ENV\n",
 37 |     ")\n",
 38 |     "\n",
 39 |     "# Pinecone Vector DB index name\n",
 40 |     "# index_name = 'it-case-files'\n",
 41 |     "index_name = 'slp'\n",
 42 |     "index = pinecone.Index(index_name)\n",
 43 |     "    "
 44 |    ]
 45 |   },
 46 |   {
 47 |    "cell_type": "code",
 48 |    "execution_count": 13,
 49 |    "metadata": {},
 50 |    "outputs": [],
 51 |    "source": [
 52 |     "embeddings = OpenAIEmbeddings()\n",
 53 |     "docsearch = Pinecone.from_existing_index(index_name, embeddings)"
 54 |    ]
 55 |   },
 56 |   {
 57 |    "cell_type": "code",
 58 |    "execution_count": 14,
 59 |    "metadata": {},
 60 |    "outputs": [],
 61 |    "source": [
 62 |     "compressor = CohereRerank(top_n=5)\n",
 63 |     "compression_retriever = ContextualCompressionRetriever(\n",
 64 |     "    base_compressor=compressor, base_retriever=docsearch.as_retriever()\n",
 65 |     ")"
 66 |    ]
 67 |   },
 68 |   {
 69 |    "cell_type": "code",
 70 |    "execution_count": 15,
 71 |    "metadata": {},
 72 |    "outputs": [],
 73 |    "source": [
 74 |     "def pretty_print_docs(docs):\n",
 75 |     "    print(\n",
 76 |     "        f\"\\n{'-' * 100}\\n\".join(\n",
 77 |     "            [f\"Document {i+1}:\\n\" + d.page_content + \"\\n\" +str(d.metadata) for i, d in enumerate(docs)]\n",
 78 |     "        )\n",
 79 |     "    )"
 80 |    ]
 81 |   },
 82 |   {
 83 |    "cell_type": "code",
 84 |    "execution_count": 16,
 85 |    "metadata": {},
 86 |    "outputs": [],
 87 |    "source": [
 88 |     "# completion llm\n",
 89 |     "llm = ChatOpenAI(\n",
 90 |     "    openai_api_key=OPENAI_API_KEY,\n",
 91 |     "    model_name='gpt-4',\n",
 92 |     "    temperature=0.5\n",
 93 |     ")"
 94 |    ]
 95 |   },
 96 |   {
 97 |    "cell_type": "code",
 98 |    "execution_count": 17,
 99 |    "metadata": {},
100 |    "outputs": [],
101 |    "source": [
102 |     "query = \"What will be the consequences of not deducting/depositing TDS?\""
103 |    ]
104 |   },
105 |   {
106 |    "cell_type": "code",
107 |    "execution_count": null,
108 |    "metadata": {},
109 |    "outputs": [],
110 |    "source": [
111 |     "compressed_docs = compression_retriever.get_relevant_documents(query)\n",
112 |     "pretty_print_docs(compressed_docs)"
113 |    ]
114 |   },
115 |   {
116 |    "cell_type": "code",
117 |    "execution_count": 19,
118 |    "metadata": {},
119 |    "outputs": [],
120 |    "source": [
121 |     "qa = RetrievalQA.from_chain_type(llm=llm,\n",
122 |     "                                 chain_type=\"stuff\",\n",
123 |     "                                 retriever=compression_retriever )"
124 |    ]
125 |   },
126 |   {
127 |    "cell_type": "code",
128 |    "execution_count": 20,
129 |    "metadata": {},
130 |    "outputs": [
131 |     {
132 |      "data": {
133 |       "text/plain": [
134 |        "'If the tax is not deducted or deposited under s. 194C or other provisions of the IT Act, the assessee will face the consequences provided under Chapter XVII of the Act. This could involve penalties or interest. If the TDS is not deposited, it is a clear contradiction of the provisions of the Act and the relevant provisions are attracted. Even if the assessee is entitled to claim a refund or get it adjusted against the tax liability, they cannot withhold the TDS deducted. If they do, the provisions of s. 40(a)(ia) are attracted. Furthermore, failure to deposit TDS into the Government account is not accepted as a reasonable cause for non-issue of certificate in Form No. 16A, and penalty under s. 272A(2)(g) is leviable for default in issuing TDS certificate even if tax is not deposited.'"
135 |       ]
136 |      },
137 |      "execution_count": 20,
138 |      "metadata": {},
139 |      "output_type": "execute_result"
140 |     }
141 |    ],
142 |    "source": [
143 |     "qa.run(query=query)"
144 |    ]
145 |   },
146 |   {
147 |    "cell_type": "markdown",
148 |    "metadata": {},
149 |    "source": [
150 |     "MAP_REDUCE\n",
151 |     "'Yes, the deductee should get the credit of TDS even if the deductor did not deposit TDS with the government. The text states that the assessee should not be denied the benefit of TDS on the sole ground that the tax so deducted has not been paid to the credit of the Central Government.'"
152 |    ]
153 |   },
154 |   {
155 |    "cell_type": "markdown",
156 |    "metadata": {},
157 |    "source": [
158 |     "MAP_RERANK\n",
159 |     "\n",
160 |     "\"Yes, the deductee should still get the credit of TDS even if the deductor did not deposit the TDS with the government. It's the department's responsibility to recover such amount from the deductor.\""
161 |    ]
162 |   },
163 |   {
164 |    "cell_type": "markdown",
165 |    "metadata": {},
166 |    "source": [
167 |     "REFINE\n",
168 |     "'While in theory, a deductee should be able to claim the credit for Tax Deducted at Source (TDS) even if the deductor has not deposited the TDS with the government, this may not always be the case in practice. The credit for TDS is typically granted based on the record of TDS payments held by the government. Consequently, if the deductor fails to deposit the TDS, it will not appear in these records and the deductee may encounter difficulties in claiming the credit. Therefore, it is crucial for the deductee to ensure the deductor is making timely TDS deposits with the government.'\n"
169 |    ]
170 |   },
171 |   {
172 |    "cell_type": "markdown",
173 |    "metadata": {},
174 |    "source": [
175 |     "STUFF\n",
176 |     "'Yes, the deductee should get the credit of TDS even if the deductor did not deposit the TDS with the government. The liability to pay income-tax if deducted at source is on the employer, or the deductor. If the deductor fails to deposit the TDS, the deductee should not be made to suffer for the illegalities committed by the deductor. It is the responsibility of the department to recover such amount from the deductor.'"
177 |    ]
178 |   }
179 |  ],
180 |  "metadata": {
181 |   "kernelspec": {
182 |    "display_name": "Python 3 (ipykernel)",
183 |    "language": "python",
184 |    "name": "python3"
185 |   },
186 |   "language_info": {
187 |    "codemirror_mode": {
188 |     "name": "ipython",
189 |     "version": 3
190 |    },
191 |    "file_extension": ".py",
192 |    "mimetype": "text/x-python",
193 |    "name": "python",
194 |    "nbconvert_exporter": "python",
195 |    "pygments_lexer": "ipython3",
196 |    "version": "3.9.7"
197 |   }
198 |  },
199 |  "nbformat": 4,
200 |  "nbformat_minor": 2
201 | }
202 | 


--------------------------------------------------------------------------------
/pinecone+langchain/upsert.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "metadata": {},
  7 |    "outputs": [
  8 |     {
  9 |      "name": "stderr",
 10 |      "output_type": "stream",
 11 |      "text": [
 12 |       "c:\\Users\\jayit\\freelance\\finance\\.venv\\lib\\site-packages\\pinecone\\index.py:4: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
 13 |       "  from tqdm.autonotebook import tqdm\n"
 14 |      ]
 15 |     }
 16 |    ],
 17 |    "source": [
 18 |     "from langchain.document_loaders import PyPDFLoader, DirectoryLoader\n",
 19 |     "from langchain.text_splitter import RecursiveCharacterTextSplitter\n",
 20 |     "from langchain.vectorstores import Pinecone\n",
 21 |     "import pinecone\n",
 22 |     "from langchain.embeddings import OpenAIEmbeddings\n",
 23 |     "from dotenv import load_dotenv\n",
 24 |     "import os\n",
 25 |     "load_dotenv()\n",
 26 |     "OPENAI_API_KEY = os.getenv('OPENAI_API_KEY')\n",
 27 |     "PINECONE_API_KEY = os.getenv('PINECONE_API_KEY')\n",
 28 |     "PINECONE_ENV = os.getenv('PINECONE_ENV')\n",
 29 |     "\n",
 30 |     "# Initializing Pinecone Vector DB\n",
 31 |     "pinecone.init(\n",
 32 |     "    api_key=PINECONE_API_KEY,\n",
 33 |     "    environment=PINECONE_ENV\n",
 34 |     ")\n",
 35 |     "\n",
 36 |     "# Pinecone Vector DB index name\n",
 37 |     "index_name = 'slp'\n",
 38 |     "index = pinecone.Index(index_name)\n",
 39 |     "    "
 40 |    ]
 41 |   },
 42 |   {
 43 |    "cell_type": "code",
 44 |    "execution_count": 2,
 45 |    "metadata": {},
 46 |    "outputs": [],
 47 |    "source": [
 48 |     "path = \"\""
 49 |    ]
 50 |   },
 51 |   {
 52 |    "cell_type": "code",
 53 |    "execution_count": 3,
 54 |    "metadata": {},
 55 |    "outputs": [
 56 |     {
 57 |      "name": "stderr",
 58 |      "output_type": "stream",
 59 |      "text": [
 60 |       "100%|██████████| 4467/4467 [00:55<00:00, 80.85it/s] \n"
 61 |      ]
 62 |     }
 63 |    ],
 64 |    "source": [
 65 |     "loader = DirectoryLoader(path, glob=\"**/*.pdf\", loader_cls=PyPDFLoader, show_progress=True, use_multithreading=True)\n",
 66 |     "documents = loader.load()"
 67 |    ]
 68 |   },
 69 |   {
 70 |    "cell_type": "code",
 71 |    "execution_count": 4,
 72 |    "metadata": {},
 73 |    "outputs": [],
 74 |    "source": [
 75 |     "embeddings = OpenAIEmbeddings()\n",
 76 |     "text_splitter = RecursiveCharacterTextSplitter(\n",
 77 |     "                    chunk_size=1000, \n",
 78 |     "                    chunk_overlap=20,\n",
 79 |     "                    separators=[\"\\n\\n\", \"\\n\", \" \", \"\"],\n",
 80 |     "                    length_function=len\n",
 81 |     "                )\n",
 82 |     "texts = text_splitter.split_documents(documents)\n",
 83 |     "db = Pinecone.from_documents(\n",
 84 |     "        texts,\n",
 85 |     "        embeddings,\n",
 86 |     "        index_name=index_name\n",
 87 |     "    )"
 88 |    ]
 89 |   },
 90 |   {
 91 |    "cell_type": "code",
 92 |    "execution_count": 10,
 93 |    "metadata": {},
 94 |    "outputs": [
 95 |     {
 96 |      "data": {
 97 |       "text/plain": [
 98 |        "{'dimension': 1536,\n",
 99 |        " 'index_fullness': 0.06484,\n",
100 |        " 'namespaces': {'': {'vector_count': 6484}},\n",
101 |        " 'total_vector_count': 6484}"
102 |       ]
103 |      },
104 |      "execution_count": 10,
105 |      "metadata": {},
106 |      "output_type": "execute_result"
107 |     }
108 |    ],
109 |    "source": [
110 |     "index.describe_index_stats()"
111 |    ]
112 |   },
113 |   {
114 |    "cell_type": "code",
115 |    "execution_count": 6,
116 |    "metadata": {},
117 |    "outputs": [
118 |     {
119 |      "data": {
120 |       "text/plain": [
121 |        "WhoAmIResponse(username='', user_label='', projectname='30876aa')"
122 |       ]
123 |      },
124 |      "execution_count": 6,
125 |      "metadata": {},
126 |      "output_type": "execute_result"
127 |     }
128 |    ],
129 |    "source": [
130 |     "pinecone.whoami()"
131 |    ]
132 |   },
133 |   {
134 |    "cell_type": "code",
135 |    "execution_count": 7,
136 |    "metadata": {},
137 |    "outputs": [
138 |     {
139 |      "data": {
140 |       "text/plain": [
141 |        "5303"
142 |       ]
143 |      },
144 |      "execution_count": 7,
145 |      "metadata": {},
146 |      "output_type": "execute_result"
147 |     }
148 |    ],
149 |    "source": [
150 |     "len(documents)"
151 |    ]
152 |   },
153 |   {
154 |    "cell_type": "code",
155 |    "execution_count": 8,
156 |    "metadata": {},
157 |    "outputs": [
158 |     {
159 |      "data": {
160 |       "text/plain": [
161 |        "6484"
162 |       ]
163 |      },
164 |      "execution_count": 8,
165 |      "metadata": {},
166 |      "output_type": "execute_result"
167 |     }
168 |    ],
169 |    "source": [
170 |     "len(texts)"
171 |    ]
172 |   },
173 |   {
174 |    "cell_type": "markdown",
175 |    "metadata": {},
176 |    "source": [
177 |     "## Delete your index\n",
178 |     "\n",
179 |     "Once finished with the index we can delete it to save resources."
180 |    ]
181 |   },
182 |   {
183 |    "cell_type": "code",
184 |    "execution_count": 9,
185 |    "metadata": {},
186 |    "outputs": [],
187 |    "source": [
188 |     "# my_index.delete(delete_all=True, namespace='')"
189 |    ]
190 |   }
191 |  ],
192 |  "metadata": {
193 |   "kernelspec": {
194 |    "display_name": ".venv",
195 |    "language": "python",
196 |    "name": "python3"
197 |   },
198 |   "language_info": {
199 |    "codemirror_mode": {
200 |     "name": "ipython",
201 |     "version": 3
202 |    },
203 |    "file_extension": ".py",
204 |    "mimetype": "text/x-python",
205 |    "name": "python",
206 |    "nbconvert_exporter": "python",
207 |    "pygments_lexer": "ipython3",
208 |    "version": "3.10.4"
209 |   }
210 |  },
211 |  "nbformat": 4,
212 |  "nbformat_minor": 2
213 | }
214 | 


--------------------------------------------------------------------------------
/property-store-graph-rag/.env.example:
--------------------------------------------------------------------------------
1 | OPENAI_API_KEY=sk-....
2 | NEO4J_URI=ABC
3 | NEO4J_USERNAME=ABC
4 | NEO4J_PASSWORD=ABC
5 | AURA_INSTANCEID=ABC
6 | AURA_INSTANCENAME=ABC
7 | 


--------------------------------------------------------------------------------
/property-store-graph-rag/README.md:
--------------------------------------------------------------------------------
 1 | References -> https://docs.llamaindex.ai/en/stable/examples/property_graph/property_graph_basic/
 2 | 
 3 | Snippet -> https://neo4j.com/blog/what-is-knowledge-graph/
 4 | 
 5 | Property Graphs
 6 | Native property graph databases, such as Neo4j, are a logical choice for implementing knowledge graphs. They natively store information as nodes, relationships, and properties, allowing for an intuitive visualization of highly interconnected data structures. The physical database matches the conceptual data model, making designing and developing the knowledge graph easier. When you use property graphs, you get:
 7 | 
 8 | * Simplicity and ease of design: Property graphs allow for straightforward data modeling when designing the knowledge graph. Because the conceptual and physical models are very similar (often the same), the transition from design to implementation is more straightforward (and easy to explain to non-technical users).
 9 | 
10 | - Flexibility: It’s easy to add new data, properties, relationship types, and organizing principles without extensive refactoring or code rewrites. As needs change, you can iterate and incrementally expand the knowledge graph’s data, relationships, and organization.
11 | 
12 | - Performance: Property graphs offer superior query performance compared to alternatives like RDF databases or relational databases, especially for complex traversals and many-to-many relationships. This performance comes from storing the relationships between entities directly in the database rather than re-generating them using joins in queries. A native property graph database traverses relationships by following pointers in memory, making queries that traverse even complex chains of many relationships very fast.
13 | 
14 | - Developer-friendly Code: Property graphs support an intuitive and expressive ISO query language standard, GQL, which means you have less code to write, debug, and maintain than SQL or SPARQL. Neo4j’s Cypher is the most widely used implementation of GQL.
15 | 
16 | Difference between Triplet Store(RDF) & Property Graph -> https://neo4j.com/blog/rdf-vs-property-graphs-knowledge-graphs/
17 | 
18 | More in depth -> https://docs.llamaindex.ai/en/latest/module_guides/indexing/lpg_index_guide/
19 | 


--------------------------------------------------------------------------------
/property-store-graph-rag/data/Graph_Retrieval-Augmented_Generation_A_Survey.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jayita13/GenerativeAI/67ab29100b303220558f76b56a3861e0a464f09a/property-store-graph-rag/data/Graph_Retrieval-Augmented_Generation_A_Survey.pdf


--------------------------------------------------------------------------------
/property-store-graph-rag/pg-storage/default__vector_store.json:
--------------------------------------------------------------------------------
1 | {"embedding_dict": {}, "text_id_to_ref_doc_id": {}, "metadata_dict": {}}


--------------------------------------------------------------------------------
/property-store-graph-rag/pg-storage/image__vector_store.json:
--------------------------------------------------------------------------------
1 | {"embedding_dict": {}, "text_id_to_ref_doc_id": {}, "metadata_dict": {}}


--------------------------------------------------------------------------------
/property-store-graph-rag/pg-storage/index_store.json:
--------------------------------------------------------------------------------
1 | {"index_store/data": {"604d6cde-87a3-431d-bb05-c7d1b54d3259": {"__type__": "simple_lpg", "__data__": "{\"index_id\": \"604d6cde-87a3-431d-bb05-c7d1b54d3259\", \"summary\": null}"}}}


--------------------------------------------------------------------------------
/property-store-graph-rag/property-graph-rag.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "from llama_index.core import SimpleDirectoryReader, StorageContext, load_index_from_storage, Settings\n",
 10 |     "import os\n",
 11 |     "from dotenv import load_dotenv\n",
 12 |     "load_dotenv()\n",
 13 |     "import nest_asyncio\n",
 14 |     "\n",
 15 |     "nest_asyncio.apply()"
 16 |    ]
 17 |   },
 18 |   {
 19 |    "cell_type": "code",
 20 |    "execution_count": 3,
 21 |    "metadata": {},
 22 |    "outputs": [],
 23 |    "source": [
 24 |     "documents = SimpleDirectoryReader(\"data/\").load_data()"
 25 |    ]
 26 |   },
 27 |   {
 28 |    "cell_type": "code",
 29 |    "execution_count": 4,
 30 |    "metadata": {},
 31 |    "outputs": [],
 32 |    "source": [
 33 |     "username = os.getenv(\"NEO4J_USERNAME\")\n",
 34 |     "password = os.getenv(\"NEO4J_PASSWORD\")\n",
 35 |     "url = os.getenv(\"NEO4J_URI\")"
 36 |    ]
 37 |   },
 38 |   {
 39 |    "cell_type": "code",
 40 |    "execution_count": 5,
 41 |    "metadata": {},
 42 |    "outputs": [
 43 |     {
 44 |      "name": "stderr",
 45 |      "output_type": "stream",
 46 |      "text": [
 47 |       "Received notification from DBMS server: {severity: WARNING} {code: Neo.ClientNotification.Statement.FeatureDeprecationWarning} {category: DEPRECATION} {title: This feature is deprecated and will be removed in future versions.} {description: The procedure has a deprecated field. ('config' used by 'apoc.meta.graphSample' is deprecated.)} {position: line: 1, column: 1, offset: 0} for query: \"CALL apoc.meta.graphSample() YIELD nodes, relationships RETURN nodes, [rel in relationships | {name:apoc.any.property(rel, 'type'), count: apoc.any.property(rel, 'count')}] AS relationships\"\n"
 48 |      ]
 49 |     }
 50 |    ],
 51 |    "source": [
 52 |     "from llama_index.core import PropertyGraphIndex\n",
 53 |     "from llama_index.embeddings.openai import OpenAIEmbedding\n",
 54 |     "from llama_index.llms.openai import OpenAI\n",
 55 |     "from llama_index.core.indices.property_graph import SchemaLLMPathExtractor\n",
 56 |     "from llama_index.graph_stores.neo4j import Neo4jPropertyGraphStore\n",
 57 |     "from IPython.display import Markdown, display\n",
 58 |     "\n",
 59 |     "graph_store = Neo4jPropertyGraphStore(\n",
 60 |     "    username=username,\n",
 61 |     "    password=password,\n",
 62 |     "    url=url,\n",
 63 |     ")"
 64 |    ]
 65 |   },
 66 |   {
 67 |    "cell_type": "code",
 68 |    "execution_count": 6,
 69 |    "metadata": {},
 70 |    "outputs": [],
 71 |    "source": [
 72 |     "storage_context = StorageContext.from_defaults(\n",
 73 |     "    graph_store=graph_store\n",
 74 |     ")"
 75 |    ]
 76 |   },
 77 |   {
 78 |    "cell_type": "code",
 79 |    "execution_count": 7,
 80 |    "metadata": {},
 81 |    "outputs": [],
 82 |    "source": [
 83 |     "llm=OpenAI(model=\"gpt-4o-mini\", temperature=0.3)\n",
 84 |     "Settings.llm = llm\n",
 85 |     "\n",
 86 |     "embed_model=OpenAIEmbedding(model_name=\"text-embedding-3-small\")\n",
 87 |     "Settings.embed_model = embed_model\n"
 88 |    ]
 89 |   },
 90 |   {
 91 |    "cell_type": "code",
 92 |    "execution_count": 8,
 93 |    "metadata": {},
 94 |    "outputs": [
 95 |     {
 96 |      "name": "stderr",
 97 |      "output_type": "stream",
 98 |      "text": [
 99 |       "c:\\Users\\jayit\\GCCD\\triplet-store-graph-rag\\venv\\lib\\site-packages\\tqdm\\auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
100 |       "  from .autonotebook import tqdm as notebook_tqdm\n",
101 |       "Parsing nodes: 100%|██████████| 40/40 [00:00<00:00, 318.77it/s]\n",
102 |       "Extracting paths from text: 100%|██████████| 56/56 [00:30<00:00,  1.84it/s]\n",
103 |       "Extracting implicit paths: 100%|██████████| 56/56 [00:00<00:00, 3585.86it/s]\n",
104 |       "Generating embeddings: 100%|██████████| 1/1 [00:03<00:00,  3.34s/it]\n",
105 |       "Generating embeddings: 100%|██████████| 11/11 [00:12<00:00,  1.14s/it]\n",
106 |       "Received notification from DBMS server: {severity: WARNING} {code: Neo.ClientNotification.Statement.FeatureDeprecationWarning} {category: DEPRECATION} {title: This feature is deprecated and will be removed in future versions.} {description: CALL subquery without a variable scope clause is now deprecated. Use CALL (e, row) { ... }} {position: line: 10, column: 21, offset: 397} for query: \"\\n                    UNWIND $data AS row\\n                    MERGE (e:__Node__ {id: row.id})\\n                    SET e += apoc.map.clean(row.properties, [], [])\\n                    SET e.name = row.name, e:`__Entity__`\\n                    WITH e, row\\n                    CALL apoc.create.addLabels(e, [row.label])\\n                    YIELD node\\n                    WITH e, row\\n                    CALL {\\n                        WITH e, row\\n                        WITH e, row\\n                        WHERE row.embedding IS NOT NULL\\n                        CALL db.create.setNodeVectorProperty(e, 'embedding', row.embedding)\\n                        RETURN count(*) AS count\\n                    }\\n                    WITH e, row WHERE row.properties.triplet_source_id IS NOT NULL\\n                    MERGE (c:__Node__ {id: row.properties.triplet_source_id})\\n                    MERGE (e)<-[:MENTIONS]-(c)\\n                    \"\n",
107 |       "Received notification from DBMS server: {severity: WARNING} {code: Neo.ClientNotification.Statement.FeatureDeprecationWarning} {category: DEPRECATION} {title: This feature is deprecated and will be removed in future versions.} {description: CALL subquery without a variable scope clause is now deprecated. Use CALL (e, row) { ... }} {position: line: 10, column: 21, offset: 397} for query: \"\\n                    UNWIND $data AS row\\n                    MERGE (e:__Node__ {id: row.id})\\n                    SET e += apoc.map.clean(row.properties, [], [])\\n                    SET e.name = row.name, e:`__Entity__`\\n                    WITH e, row\\n                    CALL apoc.create.addLabels(e, [row.label])\\n                    YIELD node\\n                    WITH e, row\\n                    CALL {\\n                        WITH e, row\\n                        WITH e, row\\n                        WHERE row.embedding IS NOT NULL\\n                        CALL db.create.setNodeVectorProperty(e, 'embedding', row.embedding)\\n                        RETURN count(*) AS count\\n                    }\\n                    WITH e, row WHERE row.properties.triplet_source_id IS NOT NULL\\n                    MERGE (c:__Node__ {id: row.properties.triplet_source_id})\\n                    MERGE (e)<-[:MENTIONS]-(c)\\n                    \"\n",
108 |       "Received notification from DBMS server: {severity: WARNING} {code: Neo.ClientNotification.Statement.FeatureDeprecationWarning} {category: DEPRECATION} {title: This feature is deprecated and will be removed in future versions.} {description: The procedure has a deprecated field. ('config' used by 'apoc.meta.graphSample' is deprecated.)} {position: line: 1, column: 1, offset: 0} for query: \"CALL apoc.meta.graphSample() YIELD nodes, relationships RETURN nodes, [rel in relationships | {name:apoc.any.property(rel, 'type'), count: apoc.any.property(rel, 'count')}] AS relationships\"\n"
109 |      ]
110 |     }
111 |    ],
112 |    "source": [
113 |     "index_pg = PropertyGraphIndex.from_documents(\n",
114 |     "    documents,\n",
115 |     "    llm=llm,\n",
116 |     "    embed_model=embed_model,\n",
117 |     "    show_progress=True,\n",
118 |     "    property_graph_store=graph_store,\n",
119 |     "    storage_context=storage_context,\n",
120 |     ")"
121 |    ]
122 |   },
123 |   {
124 |    "cell_type": "code",
125 |    "execution_count": 9,
126 |    "metadata": {},
127 |    "outputs": [],
128 |    "source": [
129 |     "index_pg.storage_context.persist(persist_dir=\"./pg-storage\")\n",
130 |     "index_pg = load_index_from_storage(storage_context)"
131 |    ]
132 |   },
133 |   {
134 |    "cell_type": "code",
135 |    "execution_count": 10,
136 |    "metadata": {},
137 |    "outputs": [
138 |     {
139 |      "name": "stdout",
140 |      "output_type": "stream",
141 |      "text": [
142 |       "Rag ({'creation_date': '2024-09-04', 'last_modified_date': '2024-08-28', 'file_size': 1750518, 'file_path': 'c:\\\\Users\\\\jayit\\\\GCCD\\\\triplet-store-graph-rag\\\\data\\\\Graph_Retrieval-Augmented_Generation_A_Survey.pdf', 'name': 'Rag', 'file_name': 'Graph_Retrieval-Augmented_Generation_A_Survey.pdf', 'page_label': '4', 'triplet_source_id': '77fa7ee5-02b6-4108-be84-de699704cffd', 'file_type': 'application/pdf'}) -> Combines ({'creation_date': '2024-09-04', 'last_modified_date': '2024-08-28', 'file_size': 1750518, 'file_path': 'c:\\\\Users\\\\jayit\\\\GCCD\\\\triplet-store-graph-rag\\\\data\\\\Graph_Retrieval-Augmented_Generation_A_Survey.pdf', 'file_name': 'Graph_Retrieval-Augmented_Generation_A_Survey.pdf', 'page_label': '4', 'triplet_source_id': '77fa7ee5-02b6-4108-be84-de699704cffd', 'file_type': 'application/pdf'}) -> External knowledge ({'creation_date': '2024-09-04', 'last_modified_date': '2024-08-28', 'file_size': 1750518, 'file_path': 'c:\\\\Users\\\\jayit\\\\GCCD\\\\triplet-store-graph-rag\\\\data\\\\Graph_Retrieval-Augmented_Generation_A_Survey.pdf', 'name': 'External knowledge', 'file_name': 'Graph_Retrieval-Augmented_Generation_A_Survey.pdf', 'page_label': '4', 'triplet_source_id': '77fa7ee5-02b6-4108-be84-de699704cffd', 'file_type': 'application/pdf'})\n",
143 |       "Rag ({'creation_date': '2024-09-04', 'last_modified_date': '2024-08-28', 'file_size': 1750518, 'file_path': 'c:\\\\Users\\\\jayit\\\\GCCD\\\\triplet-store-graph-rag\\\\data\\\\Graph_Retrieval-Augmented_Generation_A_Survey.pdf', 'name': 'Rag', 'file_name': 'Graph_Retrieval-Augmented_Generation_A_Survey.pdf', 'page_label': '4', 'triplet_source_id': '77fa7ee5-02b6-4108-be84-de699704cffd', 'file_type': 'application/pdf'}) -> Lacks ({'creation_date': '2024-09-04', 'last_modified_date': '2024-08-28', 'file_size': 1750518, 'file_path': 'c:\\\\Users\\\\jayit\\\\GCCD\\\\triplet-store-graph-rag\\\\data\\\\Graph_Retrieval-Augmented_Generation_A_Survey.pdf', 'file_name': 'Graph_Retrieval-Augmented_Generation_A_Survey.pdf', 'page_label': '2', 'triplet_source_id': '1002bfc5-4b03-4cc6-a2dc-24890b329822', 'file_type': 'application/pdf'}) -> Global information ({'creation_date': '2024-09-04', 'last_modified_date': '2024-08-28', 'file_size': 1750518, 'file_path': 'c:\\\\Users\\\\jayit\\\\GCCD\\\\triplet-store-graph-rag\\\\data\\\\Graph_Retrieval-Augmented_Generation_A_Survey.pdf', 'name': 'Global information', 'file_name': 'Graph_Retrieval-Augmented_Generation_A_Survey.pdf', 'page_label': '2', 'triplet_source_id': '1002bfc5-4b03-4cc6-a2dc-24890b329822', 'file_type': 'application/pdf'})\n",
144 |       "Rag ({'creation_date': '2024-09-04', 'last_modified_date': '2024-08-28', 'file_size': 1750518, 'file_path': 'c:\\\\Users\\\\jayit\\\\GCCD\\\\triplet-store-graph-rag\\\\data\\\\Graph_Retrieval-Augmented_Generation_A_Survey.pdf', 'name': 'Rag', 'file_name': 'Graph_Retrieval-Augmented_Generation_A_Survey.pdf', 'page_label': '4', 'triplet_source_id': '77fa7ee5-02b6-4108-be84-de699704cffd', 'file_type': 'application/pdf'}) -> Recounts ({'creation_date': '2024-09-04', 'last_modified_date': '2024-08-28', 'file_size': 1750518, 'file_path': 'c:\\\\Users\\\\jayit\\\\GCCD\\\\triplet-store-graph-rag\\\\data\\\\Graph_Retrieval-Augmented_Generation_A_Survey.pdf', 'file_name': 'Graph_Retrieval-Augmented_Generation_A_Survey.pdf', 'page_label': '2', 'triplet_source_id': '1002bfc5-4b03-4cc6-a2dc-24890b329822', 'file_type': 'application/pdf'}) -> Content in textual snippets ({'creation_date': '2024-09-04', 'last_modified_date': '2024-08-28', 'file_size': 1750518, 'file_path': 'c:\\\\Users\\\\jayit\\\\GCCD\\\\triplet-store-graph-rag\\\\data\\\\Graph_Retrieval-Augmented_Generation_A_Survey.pdf', 'name': 'Content in textual snippets', 'file_name': 'Graph_Retrieval-Augmented_Generation_A_Survey.pdf', 'page_label': '2', 'triplet_source_id': '1002bfc5-4b03-4cc6-a2dc-24890b329822', 'file_type': 'application/pdf'})\n",
145 |       "Rag ({'creation_date': '2024-09-04', 'last_modified_date': '2024-08-28', 'file_size': 1750518, 'file_path': 'c:\\\\Users\\\\jayit\\\\GCCD\\\\triplet-store-graph-rag\\\\data\\\\Graph_Retrieval-Augmented_Generation_A_Survey.pdf', 'name': 'Rag', 'file_name': 'Graph_Retrieval-Augmented_Generation_A_Survey.pdf', 'page_label': '4', 'triplet_source_id': '77fa7ee5-02b6-4108-be84-de699704cffd', 'file_type': 'application/pdf'}) -> Faces ({'creation_date': '2024-09-04', 'last_modified_date': '2024-08-28', 'file_size': 1750518, 'file_path': 'c:\\\\Users\\\\jayit\\\\GCCD\\\\triplet-store-graph-rag\\\\data\\\\Graph_Retrieval-Augmented_Generation_A_Survey.pdf', 'file_name': 'Graph_Retrieval-Augmented_Generation_A_Survey.pdf', 'page_label': '2', 'triplet_source_id': '1002bfc5-4b03-4cc6-a2dc-24890b329822', 'file_type': 'application/pdf'}) -> Limitations in real-world scenarios ({'creation_date': '2024-09-04', 'last_modified_date': '2024-08-28', 'file_size': 1750518, 'file_path': 'c:\\\\Users\\\\jayit\\\\GCCD\\\\triplet-store-graph-rag\\\\data\\\\Graph_Retrieval-Augmented_Generation_A_Survey.pdf', 'name': 'Limitations in real-world scenarios', 'file_name': 'Graph_Retrieval-Augmented_Generation_A_Survey.pdf', 'page_label': '2', 'triplet_source_id': '1002bfc5-4b03-4cc6-a2dc-24890b329822', 'file_type': 'application/pdf'})\n",
146 |       "Rag ({'creation_date': '2024-09-04', 'last_modified_date': '2024-08-28', 'file_size': 1750518, 'file_path': 'c:\\\\Users\\\\jayit\\\\GCCD\\\\triplet-store-graph-rag\\\\data\\\\Graph_Retrieval-Augmented_Generation_A_Survey.pdf', 'name': 'Rag', 'file_name': 'Graph_Retrieval-Augmented_Generation_A_Survey.pdf', 'page_label': '4', 'triplet_source_id': '77fa7ee5-02b6-4108-be84-de699704cffd', 'file_type': 'application/pdf'}) -> Has gained ({'creation_date': '2024-09-04', 'last_modified_date': '2024-08-28', 'file_size': 1750518, 'file_path': 'c:\\\\Users\\\\jayit\\\\GCCD\\\\triplet-store-graph-rag\\\\data\\\\Graph_Retrieval-Augmented_Generation_A_Survey.pdf', 'file_name': 'Graph_Retrieval-Augmented_Generation_A_Survey.pdf', 'page_label': '2', 'triplet_source_id': '1002bfc5-4b03-4cc6-a2dc-24890b329822', 'file_type': 'application/pdf'}) -> Widespread attention ({'creation_date': '2024-09-04', 'last_modified_date': '2024-08-28', 'file_size': 1750518, 'file_path': 'c:\\\\Users\\\\jayit\\\\GCCD\\\\triplet-store-graph-rag\\\\data\\\\Graph_Retrieval-Augmented_Generation_A_Survey.pdf', 'name': 'Widespread attention', 'file_name': 'Graph_Retrieval-Augmented_Generation_A_Survey.pdf', 'page_label': '2', 'triplet_source_id': '1002bfc5-4b03-4cc6-a2dc-24890b329822', 'file_type': 'application/pdf'})\n",
147 |       "Rag ({'creation_date': '2024-09-04', 'last_modified_date': '2024-08-28', 'file_size': 1750518, 'file_path': 'c:\\\\Users\\\\jayit\\\\GCCD\\\\triplet-store-graph-rag\\\\data\\\\Graph_Retrieval-Augmented_Generation_A_Survey.pdf', 'name': 'Rag', 'file_name': 'Graph_Retrieval-Augmented_Generation_A_Survey.pdf', 'page_label': '4', 'triplet_source_id': '77fa7ee5-02b6-4108-be84-de699704cffd', 'file_type': 'application/pdf'}) -> Ensures ({'creation_date': '2024-09-04', 'last_modified_date': '2024-08-28', 'file_size': 1750518, 'file_path': 'c:\\\\Users\\\\jayit\\\\GCCD\\\\triplet-store-graph-rag\\\\data\\\\Graph_Retrieval-Augmented_Generation_A_Survey.pdf', 'file_name': 'Graph_Retrieval-Augmented_Generation_A_Survey.pdf', 'page_label': '2', 'triplet_source_id': '1002bfc5-4b03-4cc6-a2dc-24890b329822', 'file_type': 'application/pdf'}) -> Higher degree of factual accuracy ({'creation_date': '2024-09-04', 'last_modified_date': '2024-08-28', 'file_size': 1750518, 'file_path': 'c:\\\\Users\\\\jayit\\\\GCCD\\\\triplet-store-graph-rag\\\\data\\\\Graph_Retrieval-Augmented_Generation_A_Survey.pdf', 'name': 'Higher degree of factual accuracy', 'file_name': 'Graph_Retrieval-Augmented_Generation_A_Survey.pdf', 'page_label': '2', 'triplet_source_id': '1002bfc5-4b03-4cc6-a2dc-24890b329822', 'file_type': 'application/pdf'})\n",
148 |       "Rag ({'creation_date': '2024-09-04', 'last_modified_date': '2024-08-28', 'file_size': 1750518, 'file_path': 'c:\\\\Users\\\\jayit\\\\GCCD\\\\triplet-store-graph-rag\\\\data\\\\Graph_Retrieval-Augmented_Generation_A_Survey.pdf', 'name': 'Rag', 'file_name': 'Graph_Retrieval-Augmented_Generation_A_Survey.pdf', 'page_label': '4', 'triplet_source_id': '77fa7ee5-02b6-4108-be84-de699704cffd', 'file_type': 'application/pdf'}) -> Enriches ({'creation_date': '2024-09-04', 'last_modified_date': '2024-08-28', 'file_size': 1750518, 'file_path': 'c:\\\\Users\\\\jayit\\\\GCCD\\\\triplet-store-graph-rag\\\\data\\\\Graph_Retrieval-Augmented_Generation_A_Survey.pdf', 'file_name': 'Graph_Retrieval-Augmented_Generation_A_Survey.pdf', 'page_label': '2', 'triplet_source_id': '1002bfc5-4b03-4cc6-a2dc-24890b329822', 'file_type': 'application/pdf'}) -> Contextual depth of responses ({'creation_date': '2024-09-04', 'last_modified_date': '2024-08-28', 'file_size': 1750518, 'file_path': 'c:\\\\Users\\\\jayit\\\\GCCD\\\\triplet-store-graph-rag\\\\data\\\\Graph_Retrieval-Augmented_Generation_A_Survey.pdf', 'name': 'Contextual depth of responses', 'file_name': 'Graph_Retrieval-Augmented_Generation_A_Survey.pdf', 'page_label': '2', 'triplet_source_id': '1002bfc5-4b03-4cc6-a2dc-24890b329822', 'file_type': 'application/pdf'})\n",
149 |       "Rag ({'creation_date': '2024-09-04', 'last_modified_date': '2024-08-28', 'file_size': 1750518, 'file_path': 'c:\\\\Users\\\\jayit\\\\GCCD\\\\triplet-store-graph-rag\\\\data\\\\Graph_Retrieval-Augmented_Generation_A_Survey.pdf', 'name': 'Rag', 'file_name': 'Graph_Retrieval-Augmented_Generation_A_Survey.pdf', 'page_label': '4', 'triplet_source_id': '77fa7ee5-02b6-4108-be84-de699704cffd', 'file_type': 'application/pdf'}) -> Incorporates ({'creation_date': '2024-09-04', 'last_modified_date': '2024-08-28', 'file_size': 1750518, 'file_path': 'c:\\\\Users\\\\jayit\\\\GCCD\\\\triplet-store-graph-rag\\\\data\\\\Graph_Retrieval-Augmented_Generation_A_Survey.pdf', 'file_name': 'Graph_Retrieval-Augmented_Generation_A_Survey.pdf', 'page_label': '2', 'triplet_source_id': '1002bfc5-4b03-4cc6-a2dc-24890b329822', 'file_type': 'application/pdf'}) -> Relevant factual knowledge ({'creation_date': '2024-09-04', 'last_modified_date': '2024-08-28', 'file_size': 1750518, 'file_path': 'c:\\\\Users\\\\jayit\\\\GCCD\\\\triplet-store-graph-rag\\\\data\\\\Graph_Retrieval-Augmented_Generation_A_Survey.pdf', 'name': 'Relevant factual knowledge', 'file_name': 'Graph_Retrieval-Augmented_Generation_A_Survey.pdf', 'page_label': '2', 'triplet_source_id': '1002bfc5-4b03-4cc6-a2dc-24890b329822', 'file_type': 'application/pdf'})\n",
150 |       "Rag ({'creation_date': '2024-09-04', 'last_modified_date': '2024-08-28', 'file_size': 1750518, 'file_path': 'c:\\\\Users\\\\jayit\\\\GCCD\\\\triplet-store-graph-rag\\\\data\\\\Graph_Retrieval-Augmented_Generation_A_Survey.pdf', 'name': 'Rag', 'file_name': 'Graph_Retrieval-Augmented_Generation_A_Survey.pdf', 'page_label': '4', 'triplet_source_id': '77fa7ee5-02b6-4108-be84-de699704cffd', 'file_type': 'application/pdf'}) -> Lies in ({'creation_date': '2024-09-04', 'last_modified_date': '2024-08-28', 'file_size': 1750518, 'file_path': 'c:\\\\Users\\\\jayit\\\\GCCD\\\\triplet-store-graph-rag\\\\data\\\\Graph_Retrieval-Augmented_Generation_A_Survey.pdf', 'file_name': 'Graph_Retrieval-Augmented_Generation_A_Survey.pdf', 'page_label': '2', 'triplet_source_id': '1002bfc5-4b03-4cc6-a2dc-24890b329822', 'file_type': 'application/pdf'}) -> Ability to query text corpus ({'creation_date': '2024-09-04', 'last_modified_date': '2024-08-28', 'file_size': 1750518, 'file_path': 'c:\\\\Users\\\\jayit\\\\GCCD\\\\triplet-store-graph-rag\\\\data\\\\Graph_Retrieval-Augmented_Generation_A_Survey.pdf', 'name': 'Ability to query text corpus', 'file_name': 'Graph_Retrieval-Augmented_Generation_A_Survey.pdf', 'page_label': '2', 'triplet_source_id': '1002bfc5-4b03-4cc6-a2dc-24890b329822', 'file_type': 'application/pdf'})\n",
151 |       "Rag ({'creation_date': '2024-09-04', 'last_modified_date': '2024-08-28', 'file_size': 1750518, 'file_path': 'c:\\\\Users\\\\jayit\\\\GCCD\\\\triplet-store-graph-rag\\\\data\\\\Graph_Retrieval-Augmented_Generation_A_Survey.pdf', 'name': 'Rag', 'file_name': 'Graph_Retrieval-Augmented_Generation_A_Survey.pdf', 'page_label': '4', 'triplet_source_id': '77fa7ee5-02b6-4108-be84-de699704cffd', 'file_type': 'application/pdf'}) -> Integrates ({'creation_date': '2024-09-04', 'last_modified_date': '2024-08-28', 'file_size': 1750518, 'file_path': 'c:\\\\Users\\\\jayit\\\\GCCD\\\\triplet-store-graph-rag\\\\data\\\\Graph_Retrieval-Augmented_Generation_A_Survey.pdf', 'file_name': 'Graph_Retrieval-Augmented_Generation_A_Survey.pdf', 'page_label': '4', 'triplet_source_id': '77fa7ee5-02b6-4108-be84-de699704cffd', 'file_type': 'application/pdf'}) -> Domain-specific information ({'creation_date': '2024-09-04', 'last_modified_date': '2024-08-28', 'file_size': 1750518, 'file_path': 'c:\\\\Users\\\\jayit\\\\GCCD\\\\triplet-store-graph-rag\\\\data\\\\Graph_Retrieval-Augmented_Generation_A_Survey.pdf', 'name': 'Domain-specific information', 'file_name': 'Graph_Retrieval-Augmented_Generation_A_Survey.pdf', 'page_label': '4', 'triplet_source_id': '77fa7ee5-02b6-4108-be84-de699704cffd', 'file_type': 'application/pdf'})\n",
152 |       "Rag ({'creation_date': '2024-09-04', 'last_modified_date': '2024-08-28', 'file_size': 1750518, 'file_path': 'c:\\\\Users\\\\jayit\\\\GCCD\\\\triplet-store-graph-rag\\\\data\\\\Graph_Retrieval-Augmented_Generation_A_Survey.pdf', 'name': 'Rag', 'file_name': 'Graph_Retrieval-Augmented_Generation_A_Survey.pdf', 'page_label': '4', 'triplet_source_id': '77fa7ee5-02b6-4108-be84-de699704cffd', 'file_type': 'application/pdf'}) -> Integrates ({'creation_date': '2024-09-04', 'last_modified_date': '2024-08-28', 'file_size': 1750518, 'file_path': 'c:\\\\Users\\\\jayit\\\\GCCD\\\\triplet-store-graph-rag\\\\data\\\\Graph_Retrieval-Augmented_Generation_A_Survey.pdf', 'file_name': 'Graph_Retrieval-Augmented_Generation_A_Survey.pdf', 'page_label': '2', 'triplet_source_id': '952f0a60-6066-42dd-b765-9cc29fd176cf', 'file_type': 'application/pdf'}) -> Retrieval component ({'creation_date': '2024-09-04', 'last_modified_date': '2024-08-28', 'file_size': 1750518, 'file_path': 'c:\\\\Users\\\\jayit\\\\GCCD\\\\triplet-store-graph-rag\\\\data\\\\Graph_Retrieval-Augmented_Generation_A_Survey.pdf', 'name': 'Retrieval component', 'file_name': 'Graph_Retrieval-Augmented_Generation_A_Survey.pdf', 'page_label': '2', 'triplet_source_id': '952f0a60-6066-42dd-b765-9cc29fd176cf', 'file_type': 'application/pdf'})\n",
153 |       "Rag ({'creation_date': '2024-09-04', 'last_modified_date': '2024-08-28', 'file_size': 1750518, 'file_path': 'c:\\\\Users\\\\jayit\\\\GCCD\\\\triplet-store-graph-rag\\\\data\\\\Graph_Retrieval-Augmented_Generation_A_Survey.pdf', 'name': 'Rag', 'file_name': 'Graph_Retrieval-Augmented_Generation_A_Survey.pdf', 'page_label': '4', 'triplet_source_id': '77fa7ee5-02b6-4108-be84-de699704cffd', 'file_type': 'application/pdf'}) -> Aims to enhance ({'creation_date': '2024-09-04', 'last_modified_date': '2024-08-28', 'file_size': 1750518, 'file_path': 'c:\\\\Users\\\\jayit\\\\GCCD\\\\triplet-store-graph-rag\\\\data\\\\Graph_Retrieval-Augmented_Generation_A_Survey.pdf', 'file_name': 'Graph_Retrieval-Augmented_Generation_A_Survey.pdf', 'page_label': '2', 'triplet_source_id': '952f0a60-6066-42dd-b765-9cc29fd176cf', 'file_type': 'application/pdf'}) -> Quality of generated content ({'creation_date': '2024-09-04', 'last_modified_date': '2024-08-28', 'file_size': 1750518, 'file_path': 'c:\\\\Users\\\\jayit\\\\GCCD\\\\triplet-store-graph-rag\\\\data\\\\Graph_Retrieval-Augmented_Generation_A_Survey.pdf', 'name': 'Quality of generated content', 'file_name': 'Graph_Retrieval-Augmented_Generation_A_Survey.pdf', 'page_label': '2', 'triplet_source_id': '952f0a60-6066-42dd-b765-9cc29fd176cf', 'file_type': 'application/pdf'})\n",
154 |       "Rag ({'creation_date': '2024-09-04', 'last_modified_date': '2024-08-28', 'file_size': 1750518, 'file_path': 'c:\\\\Users\\\\jayit\\\\GCCD\\\\triplet-store-graph-rag\\\\data\\\\Graph_Retrieval-Augmented_Generation_A_Survey.pdf', 'name': 'Rag', 'file_name': 'Graph_Retrieval-Augmented_Generation_A_Survey.pdf', 'page_label': '4', 'triplet_source_id': '77fa7ee5-02b6-4108-be84-de699704cffd', 'file_type': 'application/pdf'}) -> Mitigates issues such as ({'creation_date': '2024-09-04', 'last_modified_date': '2024-08-28', 'file_size': 1750518, 'file_path': 'c:\\\\Users\\\\jayit\\\\GCCD\\\\triplet-store-graph-rag\\\\data\\\\Graph_Retrieval-Augmented_Generation_A_Survey.pdf', 'file_name': 'Graph_Retrieval-Augmented_Generation_A_Survey.pdf', 'page_label': '1', 'triplet_source_id': 'b5299952-c574-46cf-b40e-6e7e9a4070fb', 'file_type': 'application/pdf'}) -> Hallucination ({'creation_date': '2024-09-04', 'last_modified_date': '2024-08-28', 'file_size': 1750518, 'file_path': 'c:\\\\Users\\\\jayit\\\\GCCD\\\\triplet-store-graph-rag\\\\data\\\\Graph_Retrieval-Augmented_Generation_A_Survey.pdf', 'name': 'Hallucination', 'file_name': 'Graph_Retrieval-Augmented_Generation_A_Survey.pdf', 'page_label': '1', 'triplet_source_id': 'b5299952-c574-46cf-b40e-6e7e9a4070fb', 'file_type': 'application/pdf'})\n",
155 |       "Rag ({'creation_date': '2024-09-04', 'last_modified_date': '2024-08-28', 'file_size': 1750518, 'file_path': 'c:\\\\Users\\\\jayit\\\\GCCD\\\\triplet-store-graph-rag\\\\data\\\\Graph_Retrieval-Augmented_Generation_A_Survey.pdf', 'name': 'Rag', 'file_name': 'Graph_Retrieval-Augmented_Generation_A_Survey.pdf', 'page_label': '4', 'triplet_source_id': '77fa7ee5-02b6-4108-be84-de699704cffd', 'file_type': 'application/pdf'}) -> Refines ({'creation_date': '2024-09-04', 'last_modified_date': '2024-08-28', 'file_size': 1750518, 'file_path': 'c:\\\\Users\\\\jayit\\\\GCCD\\\\triplet-store-graph-rag\\\\data\\\\Graph_Retrieval-Augmented_Generation_A_Survey.pdf', 'file_name': 'Graph_Retrieval-Augmented_Generation_A_Survey.pdf', 'page_label': '1', 'triplet_source_id': 'b5299952-c574-46cf-b40e-6e7e9a4070fb', 'file_type': 'application/pdf'}) -> Llm outputs ({'creation_date': '2024-09-04', 'last_modified_date': '2024-08-28', 'file_size': 1750518, 'file_path': 'c:\\\\Users\\\\jayit\\\\GCCD\\\\triplet-store-graph-rag\\\\data\\\\Graph_Retrieval-Augmented_Generation_A_Survey.pdf', 'name': 'Llm outputs', 'file_name': 'Graph_Retrieval-Augmented_Generation_A_Survey.pdf', 'page_label': '1', 'triplet_source_id': 'b5299952-c574-46cf-b40e-6e7e9a4070fb', 'file_type': 'application/pdf'})\n",
156 |       "Rag ({'creation_date': '2024-09-04', 'last_modified_date': '2024-08-28', 'file_size': 1750518, 'file_path': 'c:\\\\Users\\\\jayit\\\\GCCD\\\\triplet-store-graph-rag\\\\data\\\\Graph_Retrieval-Augmented_Generation_A_Survey.pdf', 'name': 'Rag', 'file_name': 'Graph_Retrieval-Augmented_Generation_A_Survey.pdf', 'page_label': '4', 'triplet_source_id': '77fa7ee5-02b6-4108-be84-de699704cffd', 'file_type': 'application/pdf'}) -> References ({'creation_date': '2024-09-04', 'last_modified_date': '2024-08-28', 'file_size': 1750518, 'file_path': 'c:\\\\Users\\\\jayit\\\\GCCD\\\\triplet-store-graph-rag\\\\data\\\\Graph_Retrieval-Augmented_Generation_A_Survey.pdf', 'file_name': 'Graph_Retrieval-Augmented_Generation_A_Survey.pdf', 'page_label': '1', 'triplet_source_id': 'b5299952-c574-46cf-b40e-6e7e9a4070fb', 'file_type': 'application/pdf'}) -> External knowledge base ({'creation_date': '2024-09-04', 'last_modified_date': '2024-08-28', 'file_size': 1750518, 'file_path': 'c:\\\\Users\\\\jayit\\\\GCCD\\\\triplet-store-graph-rag\\\\data\\\\Graph_Retrieval-Augmented_Generation_A_Survey.pdf', 'name': 'External knowledge base', 'file_name': 'Graph_Retrieval-Augmented_Generation_A_Survey.pdf', 'page_label': '1', 'triplet_source_id': 'b5299952-c574-46cf-b40e-6e7e9a4070fb', 'file_type': 'application/pdf'})\n",
157 |       "Grag ({'creation_date': '2024-09-04', 'last_modified_date': '2024-08-28', 'file_size': 1750518, 'file_path': 'c:\\\\Users\\\\jayit\\\\GCCD\\\\triplet-store-graph-rag\\\\data\\\\Graph_Retrieval-Augmented_Generation_A_Survey.pdf', 'name': 'Grag', 'file_name': 'Graph_Retrieval-Augmented_Generation_A_Survey.pdf', 'page_label': '32', 'triplet_source_id': '1020a7c3-8d3e-4cf3-ac21-afa86da5023c', 'file_type': 'application/pdf'}) -> Is ({'creation_date': '2024-09-04', 'last_modified_date': '2024-08-28', 'file_size': 1750518, 'file_path': 'c:\\\\Users\\\\jayit\\\\GCCD\\\\triplet-store-graph-rag\\\\data\\\\Graph_Retrieval-Augmented_Generation_A_Survey.pdf', 'file_name': 'Graph_Retrieval-Augmented_Generation_A_Survey.pdf', 'page_label': '32', 'triplet_source_id': '1020a7c3-8d3e-4cf3-ac21-afa86da5023c', 'file_type': 'application/pdf'}) -> Graph retrieval-augmented generation ({'creation_date': '2024-09-04', 'last_modified_date': '2024-08-28', 'file_size': 1750518, 'file_path': 'c:\\\\Users\\\\jayit\\\\GCCD\\\\triplet-store-graph-rag\\\\data\\\\Graph_Retrieval-Augmented_Generation_A_Survey.pdf', 'name': 'Graph retrieval-augmented generation', 'file_name': 'Graph_Retrieval-Augmented_Generation_A_Survey.pdf', 'page_label': '32', 'triplet_source_id': '1020a7c3-8d3e-4cf3-ac21-afa86da5023c', 'file_type': 'application/pdf'})\n",
158 |       "Graph retrieval-augmented generation ({'creation_date': '2024-09-04', 'last_modified_date': '2024-08-28', 'file_size': 1750518, 'file_path': 'c:\\\\Users\\\\jayit\\\\GCCD\\\\triplet-store-graph-rag\\\\data\\\\Graph_Retrieval-Augmented_Generation_A_Survey.pdf', 'name': 'Graph retrieval-augmented generation', 'file_name': 'Graph_Retrieval-Augmented_Generation_A_Survey.pdf', 'page_label': '32', 'triplet_source_id': '1020a7c3-8d3e-4cf3-ac21-afa86da5023c', 'file_type': 'application/pdf'}) -> Is ({'creation_date': '2024-09-04', 'last_modified_date': '2024-08-28', 'file_size': 1750518, 'file_path': 'c:\\\\Users\\\\jayit\\\\GCCD\\\\triplet-store-graph-rag\\\\data\\\\Graph_Retrieval-Augmented_Generation_A_Survey.pdf', 'file_name': 'Graph_Retrieval-Augmented_Generation_A_Survey.pdf', 'page_label': '7', 'triplet_source_id': '9cb1f2bc-c266-42fc-bc12-9b9c6d186c7f', 'file_type': 'application/pdf'}) -> Survey ({'creation_date': '2024-09-04', 'last_modified_date': '2024-08-28', 'file_size': 1750518, 'file_path': 'c:\\\\Users\\\\jayit\\\\GCCD\\\\triplet-store-graph-rag\\\\data\\\\Graph_Retrieval-Augmented_Generation_A_Survey.pdf', 'name': 'Survey', 'file_name': 'Graph_Retrieval-Augmented_Generation_A_Survey.pdf', 'page_label': '35', 'triplet_source_id': 'a6fe9888-a799-4168-a595-cae2578ff9dd', 'file_type': 'application/pdf'})\n",
159 |       "Graph retrieval-augmented generation ({'creation_date': '2024-09-04', 'last_modified_date': '2024-08-28', 'file_size': 1750518, 'file_path': 'c:\\\\Users\\\\jayit\\\\GCCD\\\\triplet-store-graph-rag\\\\data\\\\Graph_Retrieval-Augmented_Generation_A_Survey.pdf', 'name': 'Graph retrieval-augmented generation', 'file_name': 'Graph_Retrieval-Augmented_Generation_A_Survey.pdf', 'page_label': '32', 'triplet_source_id': '1020a7c3-8d3e-4cf3-ac21-afa86da5023c', 'file_type': 'application/pdf'}) -> Is ({'creation_date': '2024-09-04', 'last_modified_date': '2024-08-28', 'file_size': 1750518, 'file_path': 'c:\\\\Users\\\\jayit\\\\GCCD\\\\triplet-store-graph-rag\\\\data\\\\Graph_Retrieval-Augmented_Generation_A_Survey.pdf', 'file_name': 'Graph_Retrieval-Augmented_Generation_A_Survey.pdf', 'page_label': '3', 'triplet_source_id': 'b578a952-8332-4e48-90fa-d44accfc1009', 'file_type': 'application/pdf'}) -> Innovative solution ({'creation_date': '2024-09-04', 'last_modified_date': '2024-08-28', 'file_size': 1750518, 'file_path': 'c:\\\\Users\\\\jayit\\\\GCCD\\\\triplet-store-graph-rag\\\\data\\\\Graph_Retrieval-Augmented_Generation_A_Survey.pdf', 'name': 'Innovative solution', 'file_name': 'Graph_Retrieval-Augmented_Generation_A_Survey.pdf', 'page_label': '3', 'triplet_source_id': 'b578a952-8332-4e48-90fa-d44accfc1009', 'file_type': 'application/pdf'})\n",
160 |       "Knowledge graphs ({'creation_date': '2024-09-04', 'last_modified_date': '2024-08-28', 'file_size': 1750518, 'file_path': 'c:\\\\Users\\\\jayit\\\\GCCD\\\\triplet-store-graph-rag\\\\data\\\\Graph_Retrieval-Augmented_Generation_A_Survey.pdf', 'name': 'Knowledge graphs', 'file_name': 'Graph_Retrieval-Augmented_Generation_A_Survey.pdf', 'page_label': '29', 'triplet_source_id': '2a78ec44-12ea-46d7-b41a-790a17f15922', 'file_type': 'application/pdf'}) -> Lack ({'creation_date': '2024-09-04', 'last_modified_date': '2024-08-28', 'file_size': 1750518, 'file_path': 'c:\\\\Users\\\\jayit\\\\GCCD\\\\triplet-store-graph-rag\\\\data\\\\Graph_Retrieval-Augmented_Generation_A_Survey.pdf', 'file_name': 'Graph_Retrieval-Augmented_Generation_A_Survey.pdf', 'page_label': '28', 'triplet_source_id': 'b851868d-92fa-4293-91a4-072d9c7aa84b', 'file_type': 'application/pdf'}) -> Inclusion of multi-modal data ({'creation_date': '2024-09-04', 'last_modified_date': '2024-08-28', 'file_size': 1750518, 'file_path': 'c:\\\\Users\\\\jayit\\\\GCCD\\\\triplet-store-graph-rag\\\\data\\\\Graph_Retrieval-Augmented_Generation_A_Survey.pdf', 'name': 'Inclusion of multi-modal data', 'file_name': 'Graph_Retrieval-Augmented_Generation_A_Survey.pdf', 'page_label': '28', 'triplet_source_id': 'b851868d-92fa-4293-91a4-072d9c7aa84b', 'file_type': 'application/pdf'})\n",
161 |       "Knowledge graphs ({'creation_date': '2024-09-04', 'last_modified_date': '2024-08-28', 'file_size': 1750518, 'file_path': 'c:\\\\Users\\\\jayit\\\\GCCD\\\\triplet-store-graph-rag\\\\data\\\\Graph_Retrieval-Augmented_Generation_A_Survey.pdf', 'name': 'Knowledge graphs', 'file_name': 'Graph_Retrieval-Augmented_Generation_A_Survey.pdf', 'page_label': '29', 'triplet_source_id': '2a78ec44-12ea-46d7-b41a-790a17f15922', 'file_type': 'application/pdf'}) -> Encompass ({'creation_date': '2024-09-04', 'last_modified_date': '2024-08-28', 'file_size': 1750518, 'file_path': 'c:\\\\Users\\\\jayit\\\\GCCD\\\\triplet-store-graph-rag\\\\data\\\\Graph_Retrieval-Augmented_Generation_A_Survey.pdf', 'file_name': 'Graph_Retrieval-Augmented_Generation_A_Survey.pdf', 'page_label': '29', 'triplet_source_id': '2a78ec44-12ea-46d7-b41a-790a17f15922', 'file_type': 'application/pdf'}) -> Millions of entities ({'creation_date': '2024-09-04', 'last_modified_date': '2024-08-28', 'file_size': 1750518, 'file_path': 'c:\\\\Users\\\\jayit\\\\GCCD\\\\triplet-store-graph-rag\\\\data\\\\Graph_Retrieval-Augmented_Generation_A_Survey.pdf', 'name': 'Millions of entities', 'file_name': 'Graph_Retrieval-Augmented_Generation_A_Survey.pdf', 'page_label': '29', 'triplet_source_id': '2a78ec44-12ea-46d7-b41a-790a17f15922', 'file_type': 'application/pdf'})\n",
162 |       "Knowledge graphs ({'creation_date': '2024-09-04', 'last_modified_date': '2024-08-28', 'file_size': 1750518, 'file_path': 'c:\\\\Users\\\\jayit\\\\GCCD\\\\triplet-store-graph-rag\\\\data\\\\Graph_Retrieval-Augmented_Generation_A_Survey.pdf', 'name': 'Knowledge graphs', 'file_name': 'Graph_Retrieval-Augmented_Generation_A_Survey.pdf', 'page_label': '29', 'triplet_source_id': '2a78ec44-12ea-46d7-b41a-790a17f15922', 'file_type': 'application/pdf'}) -> Typical kind of ({'creation_date': '2024-09-04', 'last_modified_date': '2024-08-28', 'file_size': 1750518, 'file_path': 'c:\\\\Users\\\\jayit\\\\GCCD\\\\triplet-store-graph-rag\\\\data\\\\Graph_Retrieval-Augmented_Generation_A_Survey.pdf', 'file_name': 'Graph_Retrieval-Augmented_Generation_A_Survey.pdf', 'page_label': '5', 'triplet_source_id': '9174cebf-7624-4f98-ac53-c84929ab640a', 'file_type': 'application/pdf'}) -> Text-attributed graphs ({'creation_date': '2024-09-04', 'last_modified_date': '2024-08-28', 'file_size': 1750518, 'file_path': 'c:\\\\Users\\\\jayit\\\\GCCD\\\\triplet-store-graph-rag\\\\data\\\\Graph_Retrieval-Augmented_Generation_A_Survey.pdf', 'name': 'Text-attributed graphs', 'file_name': 'Graph_Retrieval-Augmented_Generation_A_Survey.pdf', 'page_label': '5', 'triplet_source_id': '9174cebf-7624-4f98-ac53-c84929ab640a', 'file_type': 'application/pdf'})\n",
163 |       "Graphrag ({'creation_date': '2024-09-04', 'last_modified_date': '2024-08-28', 'file_size': 1750518, 'file_path': 'c:\\\\Users\\\\jayit\\\\GCCD\\\\triplet-store-graph-rag\\\\data\\\\Graph_Retrieval-Augmented_Generation_A_Survey.pdf', 'name': 'Graphrag', 'file_name': 'Graph_Retrieval-Augmented_Generation_A_Survey.pdf', 'page_label': '30', 'triplet_source_id': 'd8bf6309-e2a6-4ee2-bf4e-651f334d564a', 'file_type': 'application/pdf'}) -> Enhances ({'creation_date': '2024-09-04', 'last_modified_date': '2024-08-28', 'file_size': 1750518, 'file_path': 'c:\\\\Users\\\\jayit\\\\GCCD\\\\triplet-store-graph-rag\\\\data\\\\Graph_Retrieval-Augmented_Generation_A_Survey.pdf', 'file_name': 'Graph_Retrieval-Augmented_Generation_A_Survey.pdf', 'page_label': '30', 'triplet_source_id': 'd8bf6309-e2a6-4ee2-bf4e-651f334d564a', 'file_type': 'application/pdf'}) -> Information retrieval ({'creation_date': '2024-09-04', 'last_modified_date': '2024-08-28', 'file_size': 1750518, 'file_path': 'c:\\\\Users\\\\jayit\\\\GCCD\\\\triplet-store-graph-rag\\\\data\\\\Graph_Retrieval-Augmented_Generation_A_Survey.pdf', 'name': 'Information retrieval', 'file_name': 'Graph_Retrieval-Augmented_Generation_A_Survey.pdf', 'page_label': '30', 'triplet_source_id': 'd8bf6309-e2a6-4ee2-bf4e-651f334d564a', 'file_type': 'application/pdf'})\n",
164 |       "Retrieval-augmented generation for ai-generated content: a survey ({'creation_date': '2024-09-04', 'last_modified_date': '2024-08-28', 'file_size': 1750518, 'file_path': 'c:\\\\Users\\\\jayit\\\\GCCD\\\\triplet-store-graph-rag\\\\data\\\\Graph_Retrieval-Augmented_Generation_A_Survey.pdf', 'name': 'Retrieval-augmented generation for ai-generated content: a survey', 'file_name': 'Graph_Retrieval-Augmented_Generation_A_Survey.pdf', 'page_label': '40', 'triplet_source_id': 'd8d43ccf-610c-4d11-ae9a-ec5d571de370', 'file_type': 'application/pdf'}) -> Arxiv ({'creation_date': '2024-09-04', 'last_modified_date': '2024-08-28', 'file_size': 1750518, 'file_path': 'c:\\\\Users\\\\jayit\\\\GCCD\\\\triplet-store-graph-rag\\\\data\\\\Graph_Retrieval-Augmented_Generation_A_Survey.pdf', 'file_name': 'Graph_Retrieval-Augmented_Generation_A_Survey.pdf', 'page_label': '40', 'triplet_source_id': 'd8d43ccf-610c-4d11-ae9a-ec5d571de370', 'file_type': 'application/pdf'}) -> 2402.19473 ({'creation_date': '2024-09-04', 'last_modified_date': '2024-08-28', 'file_size': 1750518, 'file_path': 'c:\\\\Users\\\\jayit\\\\GCCD\\\\triplet-store-graph-rag\\\\data\\\\Graph_Retrieval-Augmented_Generation_A_Survey.pdf', 'name': '2402.19473', 'file_name': 'Graph_Retrieval-Augmented_Generation_A_Survey.pdf', 'page_label': '40', 'triplet_source_id': 'd8d43ccf-610c-4d11-ae9a-ec5d571de370', 'file_type': 'application/pdf'})\n",
165 |       "Peng et al. ({'creation_date': '2024-09-04', 'last_modified_date': '2024-08-28', 'file_size': 1750518, 'file_path': 'c:\\\\Users\\\\jayit\\\\GCCD\\\\triplet-store-graph-rag\\\\data\\\\Graph_Retrieval-Augmented_Generation_A_Survey.pdf', 'name': 'Peng et al.', 'file_name': 'Graph_Retrieval-Augmented_Generation_A_Survey.pdf', 'page_label': '40', 'triplet_source_id': 'd8d43ccf-610c-4d11-ae9a-ec5d571de370', 'file_type': 'application/pdf'}) -> Authored ({'creation_date': '2024-09-04', 'last_modified_date': '2024-08-28', 'file_size': 1750518, 'file_path': 'c:\\\\Users\\\\jayit\\\\GCCD\\\\triplet-store-graph-rag\\\\data\\\\Graph_Retrieval-Augmented_Generation_A_Survey.pdf', 'file_name': 'Graph_Retrieval-Augmented_Generation_A_Survey.pdf', 'page_label': '40', 'triplet_source_id': 'd8d43ccf-610c-4d11-ae9a-ec5d571de370', 'file_type': 'application/pdf'}) -> Retrieval-augmented generation for ai-generated content: a survey ({'creation_date': '2024-09-04', 'last_modified_date': '2024-08-28', 'file_size': 1750518, 'file_path': 'c:\\\\Users\\\\jayit\\\\GCCD\\\\triplet-store-graph-rag\\\\data\\\\Graph_Retrieval-Augmented_Generation_A_Survey.pdf', 'name': 'Retrieval-augmented generation for ai-generated content: a survey', 'file_name': 'Graph_Retrieval-Augmented_Generation_A_Survey.pdf', 'page_label': '40', 'triplet_source_id': 'd8d43ccf-610c-4d11-ae9a-ec5d571de370', 'file_type': 'application/pdf'})\n",
166 |       "Hao yu ({'creation_date': '2024-09-04', 'last_modified_date': '2024-08-28', 'file_size': 1750518, 'file_path': 'c:\\\\Users\\\\jayit\\\\GCCD\\\\triplet-store-graph-rag\\\\data\\\\Graph_Retrieval-Augmented_Generation_A_Survey.pdf', 'name': 'Hao yu', 'file_name': 'Graph_Retrieval-Augmented_Generation_A_Survey.pdf', 'page_label': '39', 'triplet_source_id': 'fa9c7c19-9506-47c8-bf63-ee0f7763bc7e', 'file_type': 'application/pdf'}) -> Authored ({'creation_date': '2024-09-04', 'last_modified_date': '2024-08-28', 'file_size': 1750518, 'file_path': 'c:\\\\Users\\\\jayit\\\\GCCD\\\\triplet-store-graph-rag\\\\data\\\\Graph_Retrieval-Augmented_Generation_A_Survey.pdf', 'file_name': 'Graph_Retrieval-Augmented_Generation_A_Survey.pdf', 'page_label': '39', 'triplet_source_id': 'fa9c7c19-9506-47c8-bf63-ee0f7763bc7e', 'file_type': 'application/pdf'}) -> Evaluation of retrieval-augmented generation ({'creation_date': '2024-09-04', 'last_modified_date': '2024-08-28', 'file_size': 1750518, 'file_path': 'c:\\\\Users\\\\jayit\\\\GCCD\\\\triplet-store-graph-rag\\\\data\\\\Graph_Retrieval-Augmented_Generation_A_Survey.pdf', 'name': 'Evaluation of retrieval-augmented generation', 'file_name': 'Graph_Retrieval-Augmented_Generation_A_Survey.pdf', 'page_label': '39', 'triplet_source_id': 'fa9c7c19-9506-47c8-bf63-ee0f7763bc7e', 'file_type': 'application/pdf'})\n",
167 |       "Atlantic ({'creation_date': '2024-09-04', 'last_modified_date': '2024-08-28', 'file_size': 1750518, 'file_path': 'c:\\\\Users\\\\jayit\\\\GCCD\\\\triplet-store-graph-rag\\\\data\\\\Graph_Retrieval-Augmented_Generation_A_Survey.pdf', 'name': 'Atlantic', 'file_name': 'Graph_Retrieval-Augmented_Generation_A_Survey.pdf', 'page_label': '36', 'triplet_source_id': '7ea0d842-e0e3-49a5-b94b-7fdd4fbd26b5', 'file_type': 'application/pdf'}) -> Is ({'creation_date': '2024-09-04', 'last_modified_date': '2024-08-28', 'file_size': 1750518, 'file_path': 'c:\\\\Users\\\\jayit\\\\GCCD\\\\triplet-store-graph-rag\\\\data\\\\Graph_Retrieval-Augmented_Generation_A_Survey.pdf', 'file_name': 'Graph_Retrieval-Augmented_Generation_A_Survey.pdf', 'page_label': '36', 'triplet_source_id': '7ea0d842-e0e3-49a5-b94b-7fdd4fbd26b5', 'file_type': 'application/pdf'}) -> Structure-aware retrieval-augmented language model ({'creation_date': '2024-09-04', 'last_modified_date': '2024-08-28', 'file_size': 1750518, 'file_path': 'c:\\\\Users\\\\jayit\\\\GCCD\\\\triplet-store-graph-rag\\\\data\\\\Graph_Retrieval-Augmented_Generation_A_Survey.pdf', 'name': 'Structure-aware retrieval-augmented language model', 'file_name': 'Graph_Retrieval-Augmented_Generation_A_Survey.pdf', 'page_label': '36', 'triplet_source_id': '7ea0d842-e0e3-49a5-b94b-7fdd4fbd26b5', 'file_type': 'application/pdf'})\n",
168 |       "Shangyu wu ({'creation_date': '2024-09-04', 'last_modified_date': '2024-08-28', 'file_size': 1750518, 'file_path': 'c:\\\\Users\\\\jayit\\\\GCCD\\\\triplet-store-graph-rag\\\\data\\\\Graph_Retrieval-Augmented_Generation_A_Survey.pdf', 'name': 'Shangyu wu', 'file_name': 'Graph_Retrieval-Augmented_Generation_A_Survey.pdf', 'page_label': '38', 'triplet_source_id': '44560e13-a048-4b9f-b3e0-7cf8636986a9', 'file_type': 'application/pdf'}) -> Authored ({'creation_date': '2024-09-04', 'last_modified_date': '2024-08-28', 'file_size': 1750518, 'file_path': 'c:\\\\Users\\\\jayit\\\\GCCD\\\\triplet-store-graph-rag\\\\data\\\\Graph_Retrieval-Augmented_Generation_A_Survey.pdf', 'file_name': 'Graph_Retrieval-Augmented_Generation_A_Survey.pdf', 'page_label': '38', 'triplet_source_id': '44560e13-a048-4b9f-b3e0-7cf8636986a9', 'file_type': 'application/pdf'}) -> Retrieval-augmented generation for natural language processing ({'creation_date': '2024-09-04', 'last_modified_date': '2024-08-28', 'file_size': 1750518, 'file_path': 'c:\\\\Users\\\\jayit\\\\GCCD\\\\triplet-store-graph-rag\\\\data\\\\Graph_Retrieval-Augmented_Generation_A_Survey.pdf', 'name': 'Retrieval-augmented generation for natural language processing', 'file_name': 'Graph_Retrieval-Augmented_Generation_A_Survey.pdf', 'page_label': '38', 'triplet_source_id': '44560e13-a048-4b9f-b3e0-7cf8636986a9', 'file_type': 'application/pdf'})\n"
169 |      ]
170 |     }
171 |    ],
172 |    "source": [
173 |     "# Define retriever\n",
174 |     "retriever = index_pg.as_retriever(\n",
175 |     "    include_text=False,  # include source text in returned nodes, default True\n",
176 |     ")\n",
177 |     "results = retriever.retrieve(\"What is Graph Retrieval-Augmented Generation?\")\n",
178 |     "for record in results:\n",
179 |     "    print(record.text)"
180 |    ]
181 |   },
182 |   {
183 |    "cell_type": "code",
184 |    "execution_count": 11,
185 |    "metadata": {},
186 |    "outputs": [
187 |     {
188 |      "data": {
189 |       "text/markdown": [
190 |        "Graph Retrieval-Augmented Generation (GraphRAG) is a framework designed for enhancing question-answering tasks by integrating relational knowledge from graph databases. It operates in three main stages: G-Indexing, G-Retrieval, and G-Generation. This approach emphasizes the retrieval of structured graph data, distinguishing it from traditional text-based methods. GraphRAG incorporates various enhancement techniques, such as query and knowledge enhancements, to improve the relevance and accuracy of the generated responses. By transforming retrieved graph information into formats suitable for language model generators, it aims to leverage the relationships and structural information inherent in graph data to enhance overall task performance."
191 |       ],
192 |       "text/plain": [
193 |        "<IPython.core.display.Markdown object>"
194 |       ]
195 |      },
196 |      "metadata": {},
197 |      "output_type": "display_data"
198 |     }
199 |    ],
200 |    "source": [
201 |     "# Question answering\n",
202 |     "query_engine = index_pg.as_query_engine(include_text=True)\n",
203 |     "response = query_engine.query(\"What is Graph Retrieval-Augmented Generation?\")\n",
204 |     "display(Markdown(f\"{response}\"))"
205 |    ]
206 |   },
207 |   {
208 |    "cell_type": "code",
209 |    "execution_count": 14,
210 |    "metadata": {},
211 |    "outputs": [
212 |     {
213 |      "data": {
214 |       "text/markdown": [
215 |        "Query-Focused Summarization is a process that involves generating summaries of documents or information that are specifically tailored to address a particular query or information need. This approach enhances the relevance of the summary by ensuring that it captures the most pertinent information related to the user's query, rather than providing a general overview of the entire content. It typically utilizes techniques such as knowledge graphs and retrieval-augmented generation to improve the quality and relevance of the summaries produced."
216 |       ],
217 |       "text/plain": [
218 |        "<IPython.core.display.Markdown object>"
219 |       ]
220 |      },
221 |      "metadata": {},
222 |      "output_type": "display_data"
223 |     }
224 |    ],
225 |    "source": [
226 |     "response = query_engine.query(\"What is Query-Focused Summarization?\")\n",
227 |     "display(Markdown(f\"{response}\"))"
228 |    ]
229 |   },
230 |   {
231 |    "cell_type": "code",
232 |    "execution_count": null,
233 |    "metadata": {},
234 |    "outputs": [],
235 |    "source": []
236 |   }
237 |  ],
238 |  "metadata": {
239 |   "kernelspec": {
240 |    "display_name": "venv",
241 |    "language": "python",
242 |    "name": "python3"
243 |   },
244 |   "language_info": {
245 |    "codemirror_mode": {
246 |     "name": "ipython",
247 |     "version": 3
248 |    },
249 |    "file_extension": ".py",
250 |    "mimetype": "text/x-python",
251 |    "name": "python",
252 |    "nbconvert_exporter": "python",
253 |    "pygments_lexer": "ipython3",
254 |    "version": "3.10.4"
255 |   }
256 |  },
257 |  "nbformat": 4,
258 |  "nbformat_minor": 2
259 | }
260 | 


--------------------------------------------------------------------------------
/property-store-graph-rag/requirements.txt:
--------------------------------------------------------------------------------
1 | llama-index==0.11.5
2 | python-dotenv==1.0.1
3 | llama-index-graph-stores-neo4j==0.3.1
4 | neo4j==5.24.0
5 | 


--------------------------------------------------------------------------------
/qdrant+langchain/README.md:
--------------------------------------------------------------------------------
 1 | Reference - https://rito.hashnode.dev/daily-portfolio-summarizer-with-langchain-qdrant-and-mistral-ai
 2 | 
 3 | Tweaking -> LLM - GEMINI, Embedding - PALM, DATA - latest
 4 | 
 5 | To run application
 6 | 
 7 | ```
 8 | cd portfolio_manager
 9 | pip install -r requirements.txt
10 | ```
11 | 
12 | To run streamlit app
13 | ```
14 | streamlit run streamlit_app.py
15 | ```
16 | 
17 | To run gradio app -> Uncomment last line ```demo.launch()```
18 | ```
19 | python gradio_app.py
20 | ```
21 | 
22 | To run FastAPI mounted with gradio
23 | ```
24 | uvicorn app:app --reload
25 | ```
26 | 


--------------------------------------------------------------------------------
/qdrant+langchain/portfolio_manager/.env.example:
--------------------------------------------------------------------------------
1 | QDRANT_URL=XYZ
2 | QDRANT_API_KEY=XYZ
3 | GOOGLE_API_KEY=XYZ


--------------------------------------------------------------------------------
/qdrant+langchain/portfolio_manager/app.py:
--------------------------------------------------------------------------------
 1 | from fastapi import FastAPI
 2 | import gradio as gr
 3 | from gradio_app import demo
 4 | 
 5 | app = FastAPI()
 6 | 
 7 | @app.get("/")
 8 | def read_main():
 9 |     return {"message": "This is your main app"}
10 | 
11 | app = gr.mount_gradio_app(app, demo, path="/gradio")


--------------------------------------------------------------------------------
/qdrant+langchain/portfolio_manager/data_insertion.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "metadata": {},
  7 |    "outputs": [
  8 |     {
  9 |      "name": "stderr",
 10 |      "output_type": "stream",
 11 |      "text": [
 12 |       "c:\\Users\\jayit\\portfolio_manager\\.venv\\Lib\\site-packages\\tqdm\\auto.py:21: TqdmWarning: IProgress not found. Please update jupyter and ipywidgets. See https://ipywidgets.readthedocs.io/en/stable/user_install.html\n",
 13 |       "  from .autonotebook import tqdm as notebook_tqdm\n"
 14 |      ]
 15 |     },
 16 |     {
 17 |      "data": {
 18 |       "text/plain": [
 19 |        "True"
 20 |       ]
 21 |      },
 22 |      "execution_count": 1,
 23 |      "metadata": {},
 24 |      "output_type": "execute_result"
 25 |     }
 26 |    ],
 27 |    "source": [
 28 |     "import os\n",
 29 |     "\n",
 30 |     "from langchain_core.prompts import ChatPromptTemplate\n",
 31 |     "from langchain.schema.document import Document\n",
 32 |     "\n",
 33 |     "from langchain.text_splitter import RecursiveCharacterTextSplitter\n",
 34 |     "\n",
 35 |     "from langchain_community.document_loaders import SeleniumURLLoader\n",
 36 |     "from langchain_community.vectorstores import Qdrant\n",
 37 |     "from langchain_google_genai import (\n",
 38 |     "    ChatGoogleGenerativeAI,\n",
 39 |     "    GoogleGenerativeAIEmbeddings,\n",
 40 |     "    HarmBlockThreshold,\n",
 41 |     "    HarmCategory,\n",
 42 |     ")\n",
 43 |     "from dotenv import load_dotenv\n",
 44 |     "load_dotenv()"
 45 |    ]
 46 |   },
 47 |   {
 48 |    "cell_type": "code",
 49 |    "execution_count": 2,
 50 |    "metadata": {},
 51 |    "outputs": [],
 52 |    "source": [
 53 |     "# portfolio_constants is the constants file which contains \n",
 54 |     "# these 2 variables. The constants file can be found below.\n",
 55 |     "from portfolio_constants import VECTOR_DB_COLLECTION, portfolio\n",
 56 |     "\n",
 57 |     "QDRANT_URL = os.getenv(\"QDRANT_URL\")\n",
 58 |     "QDRANT_API_KEY = os.getenv(\"QDRANT_API_KEY\")\n",
 59 |     "GOOGLE_API_KEY = os.getenv(\"GOOGLE_API_KEY\")\n",
 60 |     "\n",
 61 |     "CHUNK_SIZE = 1000\n",
 62 |     "CHUNK_OVERLAP = 200\n",
 63 |     "\n",
 64 |     "llm = ChatGoogleGenerativeAI(\n",
 65 |     "        model=\"gemini-pro\",\n",
 66 |     "        safety_settings={\n",
 67 |     "        HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT: HarmBlockThreshold.BLOCK_NONE,\n",
 68 |     "        },\n",
 69 |     "    )\n",
 70 |     "Embeddings = GoogleGenerativeAIEmbeddings(model=\"models/embedding-001\")\n",
 71 |     "\n",
 72 |     "cnbc_quarter_report = {\n",
 73 |     "    'GOOGL': {\"report_link\": 'https://www.cnbc.com/2024/01/30/alphabet-googl-q4-earnings-report-2023.html', \"quarter\": \"Q4\", \"year\": \"2023\"},\n",
 74 |     "    'AAPL': {\"report_link\": 'https://www.cnbc.com/2023/11/02/apple-aapl-earnings-report-q4-2023.html', \"quarter\": \"Q4\", \"year\": \"2023\"},\n",
 75 |     "    'TSLA': {\"report_link\": 'https://www.cnbc.com/2024/01/24/tesla-tsla-earnings-q4-2023.html', \"quarter\": \"Q4\", \"year\": \"2023\"},\n",
 76 |     "    'MSFT': {\"report_link\": 'https://www.cnbc.com/2024/01/30/microsoft-msft-q2-earnings-report-2024.html', \"quarter\": \"Q4\", \"year\": \"2023\"},\n",
 77 |     "    'WMT': {\"report_link\": 'https://www.cnbc.com/2024/02/20/walmart-wmt-q4-2024-earnings-.html', \"quarter\": \"Q4\", \"year\": \"2023\"}\n",
 78 |     "}"
 79 |    ]
 80 |   },
 81 |   {
 82 |    "cell_type": "code",
 83 |    "execution_count": 3,
 84 |    "metadata": {},
 85 |    "outputs": [],
 86 |    "source": [
 87 |     "# To locate the earning report, we're meticulously extracting relevant information like financial data,\n",
 88 |     "# key metrics, and analyst commentary from this webpage, while discarding distractions such as navigation menus,\n",
 89 |     "# page reference links, banner ads, social media widgets, contact information, and legal disclaimers.\n",
 90 |     "def extract_content(url):\n",
 91 |     "    template = \"\"\"You are an experienced equity research analyst and you do a fantastic job of extracting company's earning information from the `company's earning report`. \n",
 92 |     "\n",
 93 |     "        You are instructed that, if the given text doesnot belong to `company's earning report` then ignore the text and return only the text `**NA**`.\n",
 94 |     "\n",
 95 |     "        You are instructed to extract the exact lines from the `company's earning report` as it is. Don't update or modify the extracted lines.\n",
 96 |     "\n",
 97 |     "        Below is the `company's earning report`:\n",
 98 |     "        {earning_report}\n",
 99 |     "    \"\"\"\n",
100 |     "\n",
101 |     "    chunked_docs = chunk_web_data(url)\n",
102 |     "    extracted_text_content = \"\"\n",
103 |     "    for doc in chunked_docs:\n",
104 |     "        prompt = ChatPromptTemplate.from_template(template)\n",
105 |     "        chain = prompt | llm\n",
106 |     "        data = chain.invoke({\"earning_report\": doc}).content\n",
107 |     "        if \"**NA**\" in data:\n",
108 |     "            continue\n",
109 |     "        extracted_text_content += data\n",
110 |     "\n",
111 |     "    return extracted_text_content"
112 |    ]
113 |   },
114 |   {
115 |    "cell_type": "code",
116 |    "execution_count": 4,
117 |    "metadata": {},
118 |    "outputs": [],
119 |    "source": [
120 |     "# Breaking down the webpage content into small documents so that it can be passed to the LLM to remove the noise\n",
121 |     "# from the financial data\n",
122 |     "def chunk_web_data(url):\n",
123 |     "    documents = scrape_content(url)\n",
124 |     "    text_splitter = RecursiveCharacterTextSplitter(\n",
125 |     "        chunk_size=CHUNK_SIZE, chunk_overlap=CHUNK_OVERLAP)\n",
126 |     "    return text_splitter.split_documents(documents)"
127 |    ]
128 |   },
129 |   {
130 |    "cell_type": "code",
131 |    "execution_count": 5,
132 |    "metadata": {},
133 |    "outputs": [],
134 |    "source": [
135 |     "# We are using Selenium to scrape the webpage content of the given URL\n",
136 |     "def scrape_content(url):\n",
137 |     "    urls = [url]\n",
138 |     "    loader = SeleniumURLLoader(urls=urls)\n",
139 |     "    return loader.load()\n"
140 |    ]
141 |   },
142 |   {
143 |    "cell_type": "code",
144 |    "execution_count": 6,
145 |    "metadata": {},
146 |    "outputs": [],
147 |    "source": [
148 |     "# The LLM filtered data is now broken down smaller documents before storing them in the Qdrant Vector store. In the\n",
149 |     "# metadata we are passing the company ticker, and the quarter and the year of the earning report. This will help in\n",
150 |     "# fetching the relevant information.\n",
151 |     "def chunk_text_data(text, ticker, quarter, year):\n",
152 |     "    text_splitter = RecursiveCharacterTextSplitter(\n",
153 |     "        chunk_size=CHUNK_SIZE, chunk_overlap=CHUNK_OVERLAP)\n",
154 |     "    metadata_source = ticker + \"-\" + quarter + \"-\" + year\n",
155 |     "    document = Document(page_content=text, metadata={\"source\": metadata_source})\n",
156 |     "    return text_splitter.split_documents([document])"
157 |    ]
158 |   },
159 |   {
160 |    "cell_type": "code",
161 |    "execution_count": 7,
162 |    "metadata": {},
163 |    "outputs": [],
164 |    "source": [
165 |     "# Using this function we are inserting the docs in the Qdrant DB\n",
166 |     "def insert_data_to_vector_store(docs):\n",
167 |     "    Qdrant.from_documents(\n",
168 |     "        docs,\n",
169 |     "        Embeddings,\n",
170 |     "        url=QDRANT_URL,\n",
171 |     "        prefer_grpc=True,\n",
172 |     "        api_key=QDRANT_API_KEY,\n",
173 |     "        collection_name=VECTOR_DB_COLLECTION,\n",
174 |     "    )\n"
175 |    ]
176 |   },
177 |   {
178 |    "cell_type": "code",
179 |    "execution_count": 8,
180 |    "metadata": {},
181 |    "outputs": [
182 |     {
183 |      "name": "stdout",
184 |      "output_type": "stream",
185 |      "text": [
186 |       "Extracting content for:  Tesla\n",
187 |       "Chunking document for TSLA-Q4-2023\n",
188 |       "Inserting Report to Qdrant for Tesla\n"
189 |      ]
190 |     }
191 |    ],
192 |    "source": [
193 |     "# This is the main function which orchestrates the entire flow from fetching content to storing them in the vector store.\n",
194 |     "# def main():\n",
195 |     "for entry in portfolio:\n",
196 |     "    ticker = entry[\"ticker\"]\n",
197 |     "    company_name = entry[\"company_name\"]\n",
198 |     "\n",
199 |     "    report_dict = cnbc_quarter_report[ticker]\n",
200 |     "    report_link = report_dict[\"report_link\"]\n",
201 |     "    year = report_dict[\"year\"]\n",
202 |     "    quarter = report_dict[\"quarter\"]\n",
203 |     "\n",
204 |     "    print(\"Extracting content for: \", company_name)\n",
205 |     "    extracted_text_content = extract_content(report_link)\n",
206 |     "\n",
207 |     "    print(\"Chunking document for \" + ticker + \"-\" + quarter + \"-\" + year)\n",
208 |     "    chunked_docs = chunk_text_data(extracted_text_content, ticker, quarter, year)\n",
209 |     "\n",
210 |     "    print(\"Inserting Report to Qdrant for \" + company_name)\n",
211 |     "    insert_data_to_vector_store(chunked_docs)\n"
212 |    ]
213 |   }
214 |  ],
215 |  "metadata": {
216 |   "kernelspec": {
217 |    "display_name": ".venv",
218 |    "language": "python",
219 |    "name": "python3"
220 |   },
221 |   "language_info": {
222 |    "codemirror_mode": {
223 |     "name": "ipython",
224 |     "version": 3
225 |    },
226 |    "file_extension": ".py",
227 |    "mimetype": "text/x-python",
228 |    "name": "python",
229 |    "nbconvert_exporter": "python",
230 |    "pygments_lexer": "ipython3",
231 |    "version": "3.12.1"
232 |   }
233 |  },
234 |  "nbformat": 4,
235 |  "nbformat_minor": 2
236 | }
237 | 


--------------------------------------------------------------------------------
/qdrant+langchain/portfolio_manager/earning_report_analysis.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | from dotenv import load_dotenv
  3 | load_dotenv()
  4 | from langchain_core.prompts import ChatPromptTemplate
  5 | 
  6 | from langchain_google_genai import (
  7 |     ChatGoogleGenerativeAI,
  8 |     GoogleGenerativeAIEmbeddings,
  9 |     HarmBlockThreshold,
 10 |     HarmCategory,
 11 | )
 12 | from langchain_community.vectorstores import Qdrant
 13 | 
 14 | import qdrant_client
 15 | 
 16 | # portfolio_summarizer_constants is the constants file which contains 
 17 | # these 2 variables.
 18 | from portfolio_constants import VECTOR_DB_COLLECTION, portfolio
 19 | 
 20 | QDRANT_URL = os.getenv("QDRANT_URL")
 21 | QDRANT_API_KEY = os.getenv("QDRANT_API_KEY")
 22 | google_api_key = os.getenv("GOOGLE_API_KEY")
 23 | 
 24 | CHUNK_SIZE = 1500
 25 | CHUNK_OVERLAP = 300
 26 | LATEST_EARNINGS_QUARTER = "Q3"
 27 | LATEST_EARNINGS_YEAR = "2023"
 28 | 
 29 | llm = ChatGoogleGenerativeAI(
 30 |         model="gemini-pro",
 31 |         safety_settings={
 32 |         HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT: HarmBlockThreshold.BLOCK_NONE,
 33 |         },
 34 |     )
 35 | Embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001")
 36 | 
 37 | # Queries the database to fetch the earnings report of the company
 38 | def get_earnings_from_vector_store(portfolio_stock):
 39 |     company_name = portfolio_stock["company_name"]
 40 |     question = "Get the quarterly earning report of " + company_name + """ Get the information like Revenue, 
 41 |             Earnings, Net Income, Expenses, Cash Flow, Guidance, Market Conditions, Growth Drivers, 
 42 |             Challenges and Management Outlook"""
 43 | 
 44 |     source_info = generate_db_source_info(portfolio_stock)
 45 |     qdrant_retriever = get_qdrant_retriever(source_info);
 46 |     result_docs = qdrant_retriever.get_relevant_documents(question)
 47 |     print("Number of documents found: ", len(result_docs))
 48 | 
 49 |     retrieved_text = ""
 50 |     for i in range(len(result_docs)):
 51 |         retrieved_text += result_docs[i].page_content
 52 | 
 53 |     return retrieved_text
 54 | 
 55 | 
 56 | # Generating the source filter which will be used as filter when querying the vector store for better query results
 57 | def generate_db_source_info(entry):
 58 |     ticker = entry["ticker"]
 59 |     return ticker + "-" + LATEST_EARNINGS_QUARTER + "-" + LATEST_EARNINGS_YEAR
 60 | 
 61 | 
 62 | # Generates the Qdrant retriever object
 63 | def get_qdrant_retriever(source_info):
 64 |     qdrant_client_obj = qdrant_client.QdrantClient(
 65 |         url=QDRANT_URL,
 66 |         prefer_grpc=True,
 67 |         api_key=QDRANT_API_KEY)
 68 |     qdrant = Qdrant(qdrant_client_obj, VECTOR_DB_COLLECTION, Embeddings)
 69 |     return qdrant.as_retriever(search_kwargs={'filter': {'source': source_info}, 'k': 2})
 70 | 
 71 | 
 72 | # This function is an orchestrator function which controls the flow, first it calls the Qdrant vector store to get
 73 | # earnings report of the company. Next it passes the fetched earning report to the LLM to perform analysis.
 74 | def report_analysis(portfolio_stock):
 75 |     company_name = portfolio_stock["company_name"]
 76 |     template = "You are a stock analyst analyzing " + company_name + "'s quarterly earnings report." + """
 77 |         Based on the report summarize the sentiment towards the company and its performance. Identify \
 78 |         key positive and negative aspects, focusing on financial results, future outlook, and investment potential. 
 79 | 
 80 |         Additionally, answer the following questions:
 81 |         1/ Does the company meet or exceed analyst expectations? 
 82 |         2/ What are the main risks and opportunities facing the company in the coming year?
 83 | 
 84 |         Below is the earnings report:
 85 | 
 86 |         {earnings_report}
 87 |     """
 88 | 
 89 |     earnings_report = get_earnings_from_vector_store(portfolio_stock)
 90 |     print("Generating Analysis for " + company_name)
 91 | 
 92 |     prompt = ChatPromptTemplate.from_template(template)
 93 |     chain = prompt | llm
 94 |     return chain.invoke({"earnings_report": earnings_report}).content
 95 | 
 96 | 
 97 | # Performs Earning Analysis of a single company
 98 | def get_company_analysis(company_name):
 99 |     print("Selected company: ", company_name)
100 |     portfolio_stock = None
101 |     for entry in portfolio:
102 |         if entry["company_name"] == company_name:
103 |             portfolio_stock = entry
104 |             break
105 |     analysed_report = report_analysis(portfolio_stock)
106 |     print("Analysis by LLM: ", analysed_report)
107 |     return analysed_report


--------------------------------------------------------------------------------
/qdrant+langchain/portfolio_manager/gradio_app.py:
--------------------------------------------------------------------------------
 1 | import gradio as gr
 2 | import earning_report_analysis
 3 | import stock_price_evaluator
 4 | 
 5 | def analyze_stock(selected_company):
 6 |     analysed_report = earning_report_analysis.get_company_analysis(selected_company)
 7 |     price_performance = stock_price_evaluator.get_daily_price_performance(selected_company)
 8 |     final_user_report = analysed_report + "\n\n" + price_performance
 9 |     return final_user_report
10 | 
11 | dropdown = gr.Dropdown(['Alphabet', 'Apple', 'Tesla', 'Microsoft', 'Walmart'], 
12 |                 label="Select your stock from portfolio list")
13 | output_text = gr.Textbox()
14 | 
15 | def get_report(selected_company):
16 |     return analyze_stock(selected_company)
17 | 
18 | demo = gr.Interface(fn=get_report, inputs=[dropdown], 
19 |                     outputs=output_text, 
20 |                     title="Stock Performance Report")
21 | # demo.launch()
22 | 


--------------------------------------------------------------------------------
/qdrant+langchain/portfolio_manager/portfolio_constants.py:
--------------------------------------------------------------------------------
 1 | VECTOR_DB_COLLECTION = "stock_earnings_report"
 2 | 
 3 | portfolio = [
 4 |     {"company_name": "Alphabet", "ticker": "GOOGL"},
 5 |     {"company_name": "Apple", "ticker": "AAPL"},
 6 |     {"company_name": "Tesla", "ticker": "TSLA"},
 7 |     {"company_name": "Microsoft", "ticker": "MSFT"},
 8 |     {"company_name": "Walmart", "ticker": "WMT"}
 9 | ]
10 | 


--------------------------------------------------------------------------------
/qdrant+langchain/portfolio_manager/requirements.txt:
--------------------------------------------------------------------------------
 1 | langchain==0.1.9
 2 | langchain-community==0.0.24
 3 | langchain-core==0.1.27
 4 | yfinance==0.2.37
 5 | streamlit==1.31.1
 6 | qdrant_client==1.7.3
 7 | python-dotenv==1.0.1
 8 | langchain-google-genai==0.0.9
 9 | google-generativeai==0.3.2
10 | selenium==4.18.1
11 | unstructured==0.11.8
12 | gradio==4.21.0
13 | fastapi==0.110.0


--------------------------------------------------------------------------------
/qdrant+langchain/portfolio_manager/stock_price_evaluator.py:
--------------------------------------------------------------------------------
 1 | import yfinance as yf
 2 | from datetime import datetime, timedelta
 3 | 
 4 | from portfolio_constants import portfolio
 5 | 
 6 | def get_daily_price_performance(company_name):
 7 |     portfolio_stock = None
 8 |     for entry in portfolio:
 9 |         if entry["company_name"] == company_name:
10 |             portfolio_stock = entry
11 |             break
12 | 
13 |     stock_symbol = portfolio_stock["ticker"]
14 |     today = datetime.now()  # Get today's date
15 |     start_date = today - timedelta(days=4)  # Calculate the start date
16 |     start_date_str = start_date.strftime('%Y-%m-%d')
17 | 
18 |     end_date_str = today.strftime('%Y-%m-%d')
19 | 
20 |     # Fetch stock data for the given ticker
21 |     stock_data = yf.download(stock_symbol, start=start_date_str, end=end_date_str)
22 | 
23 |     if not stock_data.empty:
24 |         current_close_price = stock_data['Close'][-1]
25 |         previous_close_price = stock_data['Close'][-2]
26 | 
27 |         daily_profit_loss = current_close_price - previous_close_price
28 |         profit_loss_percentage = (daily_profit_loss / previous_close_price) * 100
29 | 
30 |         if daily_profit_loss > 0:
31 |             return f"{stock_symbol} Went up by {profit_loss_percentage:.2f} % in the last trading session"
32 |         elif daily_profit_loss < 0:
33 |             return f"{stock_symbol} Went down {abs(profit_loss_percentage):.2f} % in the last trading session"
34 |         else:
35 |             return "No change in the stock price in the last trading session"
36 |     else:
37 |         print(f"No data available for {stock_symbol} on {end_date_str}")
38 | 


--------------------------------------------------------------------------------
/qdrant+langchain/portfolio_manager/streamlit_app.py:
--------------------------------------------------------------------------------
 1 | import streamlit as st
 2 | import earning_report_analysis
 3 | import stock_price_evaluator
 4 | 
 5 | selected_company = st.selectbox(
 6 |     'Select your stock from portfolio list',
 7 |     ('Alphabet', 'Apple', 'Tesla', 'Microsoft', 'Walmart'))
 8 | 
 9 | st.write('You selected:', selected_company)
10 | if st.button('Performance Report'):
11 |     analysed_report = earning_report_analysis.get_company_analysis(selected_company)
12 |     price_performance = stock_price_evaluator.get_daily_price_performance(selected_company)
13 |     final_user_report = analysed_report + "\n\n" + price_performance
14 |     st.write(final_user_report)


--------------------------------------------------------------------------------
/rag_eval/.env.example:
--------------------------------------------------------------------------------
1 | OPENAI_API_KEY=ABC
2 | GROQ_API_KEY=ABC
3 | 


--------------------------------------------------------------------------------
/rag_eval/README.md:
--------------------------------------------------------------------------------
 1 | ### Evaluating RAG Systems
 2 | 
 3 | The evaluation modules manifest in the following categories:
 4 | 
 5 | Faithfulness: Assesses whether the response remains true to the retrieved contexts, ensuring there's no distortion or "hallucination."
 6 | 
 7 | Relevancy: Evaluates the relevance of both the retrieved context and the generated answer to the initial query.
 8 | 
 9 | Correctness: Determines if the generated answer aligns with the reference answer based on the query (this does require labels).
10 | 
11 | ---------------------------------------------------------------------------------------------------------------------------
12 | 
13 | Faithfullness Evaluator - Measures if the response from a query engine matches any source nodes. This is useful for measuring if the response was hallucinated.
14 | 
15 | Relevency Evaluation - Measures if the response + source nodes match the query.
16 | 
17 | Correctness Evaluator - Evaluates the relevance and correctness of a generated answer against a reference answer.
18 | 
19 | Retrieval Evaluation - Evaluates the quality of any Retriever module defined in LlamaIndex. use metrics like hit-rate and MRR. These compare retrieved results to ground-truth context for any question. For simpler evaluation dataset creation, we utilize synthetic data generation.
20 | 
21 | ------------------------------------------------------------------------------------------------------------------------------
22 | 
23 | Reference -> https://docs.llamaindex.ai/en/stable/examples/cookbooks/oreilly_course_cookbooks/Module-3/Evaluating_RAG_Systems/
24 | 
25 | 
26 | 


--------------------------------------------------------------------------------
/rag_eval/data/Graph_Retrieval-Augmented_Generation_A_Survey.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/jayita13/GenerativeAI/67ab29100b303220558f76b56a3861e0a464f09a/rag_eval/data/Graph_Retrieval-Augmented_Generation_A_Survey.pdf


--------------------------------------------------------------------------------
/rag_eval/requirements.txt:
--------------------------------------------------------------------------------
1 | llama-index==0.11.10
2 | python-dotenv==1.0.1
3 | spacy==3.7.6
4 | 


--------------------------------------------------------------------------------