├── .DS_Store
├── .gitignore
├── .vscode
    └── settings.json
├── =4.55.0
├── README.md
├── corrective-rag
    ├── __pycache__
    │   └── prompts.cpython-313.pyc
    ├── agent.ipynb
    ├── agent_graph.py
    └── prompts.py
├── debugging-workflow
    └── debug-agent.ipynb
├── deep-research
    ├── .DS_Store
    ├── __pycache__
    │   └── prompt.cpython-313.pyc
    ├── agent.ipynb
    ├── deep_research_agent.py
    ├── evals
    │   ├── chinese
    │   │   ├── race_result.txt
    │   │   └── raw_results.jsonl
    │   ├── english
    │   │   ├── race_result.txt
    │   │   └── raw_results.jsonl
    │   └── overall
    │   │   ├── race_result.txt
    │   │   └── raw_results.jsonl
    ├── prompt.py
    └── research_output
    │   ├── .DS_Store
    │   ├── model_output
    │       ├── gemini-2-5-pro-chinese.jsonl
    │       ├── gemini-2-5-pro-english.jsonl
    │       ├── gemini-2-5-pro-overall.jsonl
    │       ├── output.jsonl
    │       └── output_chinese.jsonl
    │   ├── output_final_report_1.md
    │   ├── output_final_report_10.md
    │   ├── output_final_report_100.md
    │   ├── output_final_report_11.md
    │   ├── output_final_report_12.md
    │   ├── output_final_report_13.md
    │   ├── output_final_report_14.md
    │   ├── output_final_report_15.md
    │   ├── output_final_report_16.md
    │   ├── output_final_report_17.md
    │   ├── output_final_report_18.md
    │   ├── output_final_report_19.md
    │   ├── output_final_report_2.md
    │   ├── output_final_report_20.md
    │   ├── output_final_report_21.md
    │   ├── output_final_report_22.md
    │   ├── output_final_report_23.md
    │   ├── output_final_report_24.md
    │   ├── output_final_report_25.md
    │   ├── output_final_report_26.md
    │   ├── output_final_report_27.md
    │   ├── output_final_report_28.md
    │   ├── output_final_report_29.md
    │   ├── output_final_report_3.md
    │   ├── output_final_report_30.md
    │   ├── output_final_report_31.md
    │   ├── output_final_report_32.md
    │   ├── output_final_report_33.md
    │   ├── output_final_report_34.md
    │   ├── output_final_report_35.md
    │   ├── output_final_report_36.md
    │   ├── output_final_report_37.md
    │   ├── output_final_report_38.md
    │   ├── output_final_report_39.md
    │   ├── output_final_report_4.md
    │   ├── output_final_report_40.md
    │   ├── output_final_report_41.md
    │   ├── output_final_report_42.md
    │   ├── output_final_report_43.md
    │   ├── output_final_report_44.md
    │   ├── output_final_report_45.md
    │   ├── output_final_report_46.md
    │   ├── output_final_report_47.md
    │   ├── output_final_report_48.md
    │   ├── output_final_report_49.md
    │   ├── output_final_report_5.md
    │   ├── output_final_report_50.md
    │   ├── output_final_report_51.md
    │   ├── output_final_report_52.md
    │   ├── output_final_report_53.md
    │   ├── output_final_report_54.md
    │   ├── output_final_report_55.md
    │   ├── output_final_report_56.md
    │   ├── output_final_report_57.md
    │   ├── output_final_report_58.md
    │   ├── output_final_report_59.md
    │   ├── output_final_report_6.md
    │   ├── output_final_report_60.md
    │   ├── output_final_report_61.md
    │   ├── output_final_report_62.md
    │   ├── output_final_report_63.md
    │   ├── output_final_report_64.md
    │   ├── output_final_report_65.md
    │   ├── output_final_report_66.md
    │   ├── output_final_report_67.md
    │   ├── output_final_report_68.md
    │   ├── output_final_report_69.md
    │   ├── output_final_report_7.md
    │   ├── output_final_report_70.md
    │   ├── output_final_report_71.md
    │   ├── output_final_report_72.md
    │   ├── output_final_report_73.md
    │   ├── output_final_report_74.md
    │   ├── output_final_report_75.md
    │   ├── output_final_report_76.md
    │   ├── output_final_report_77.md
    │   ├── output_final_report_78.md
    │   ├── output_final_report_79.md
    │   ├── output_final_report_8.md
    │   ├── output_final_report_80.md
    │   ├── output_final_report_81.md
    │   ├── output_final_report_82.md
    │   ├── output_final_report_83.md
    │   ├── output_final_report_84.md
    │   ├── output_final_report_85.md
    │   ├── output_final_report_86.md
    │   ├── output_final_report_87.md
    │   ├── output_final_report_88.md
    │   ├── output_final_report_89.md
    │   ├── output_final_report_9.md
    │   ├── output_final_report_90.md
    │   ├── output_final_report_91.md
    │   ├── output_final_report_92.md
    │   ├── output_final_report_93.md
    │   ├── output_final_report_94.md
    │   ├── output_final_report_95.md
    │   ├── output_final_report_96.md
    │   ├── output_final_report_97.md
    │   ├── output_final_report_98.md
    │   ├── output_final_report_99.md
    │   └── research_report_variations
    │       ├── note.ipynb
    │       ├── output_1.jsonl
    │       ├── output_2.jsonl
    │       ├── output_3.jsonl
    │       ├── output_4.jsonl
    │       ├── output_final_report_52_depth_1_breadth_2.md
    │       ├── output_final_report_52_depth_2_breadth_3.md
    │       ├── output_final_report_52_depth_2_breadth_5.md
    │       └── variation_metric.txt
├── few-shot-prompting
    ├── agent.py
    └── dataset
    │   └── linkedin-posts.json
├── gdg-agent-ai
    ├── notebook1.ipynb
    ├── notebook2.ipynb
    └── notebook3.ipynb
├── google-job-search-agent
    ├── .DS_Store
    ├── __pycache__
    │   └── prompts.cpython-313.pyc
    ├── agent.ipynb
    ├── agent_graph.py
    ├── knowledge_base
    │   ├── .DS_Store
    │   ├── muskan-resume.pdf
    │   └── resume.pdf
    ├── prompts.py
    └── report
    │   ├── Report.md
    │   └── sample-report.md
├── human-in-loop
    └── agent.ipynb
├── mcp
    ├── agent1.ipynb
    ├── math_server.py
    └── weather_server.py
├── media
    ├── .DS_Store
    ├── banner.png
    ├── banner1.png
    ├── c-rag.png
    ├── debug-workflow.png
    ├── few-shot.png
    ├── google-job-agent.png
    ├── human-in-loop.png
    ├── mcp.png
    ├── memory-agent.png
    ├── outreach-agent.png
    ├── parallel-execute.png
    ├── prompt-chaining.png
    ├── routing.png
    ├── self-rag.png
    ├── text2sql.png
    ├── tool-call.png
    └── vector-emb.png
├── memory-agent
    └── memory-agent.ipynb
├── orchestrator-worker-design-pattern
    └── agent.ipynb
├── outreach-agent
    ├── agent.ipynb
    ├── agent.py
    └── profiles
    │   ├── profile1.txt
    │   └── profile2.txt
├── parallel-execution
    ├── agent.ipynb
    ├── agent.py
    └── articles
    │   ├── bloom-filters.txt
    │   └── graph-db.txt
├── prompt-chaining-design-pattern
    ├── agent.ipynb
    └── design.png
├── reasoning-agent
    └── agent.ipynb
├── routing-design-pattern
    └── agent.ipynb
├── self-rag
    ├── __pycache__
    │   └── prompts.cpython-313.pyc
    ├── agent.ipynb
    ├── agent_graph.py
    └── prompts.py
├── text-to-sql
    ├── Chinook.db
    └── agent.ipynb
├── tool-calls
    ├── __pycache__
    │   ├── article.cpython-313.pyc
    │   └── articles.cpython-313.pyc
    ├── articles.py
    ├── newsletter-db-call.ipynb
    └── tool-call.ipynb
└── vector-embeddings
    └── vector_embed.ipynb


/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SauravP97/AI-Engineering-101/69b0c825ea2cfc9a43696192230878f9ee3f1dd3/.DS_Store


--------------------------------------------------------------------------------
/.gitignore:
--------------------------------------------------------------------------------
 1 | saurav-env/*
 2 | outreach-agent/.env
 3 | parallel-execution/.env
 4 | tool-calls/.env
 5 | memory-agent/.env
 6 | debugging-workflow/.env
 7 | few-shot-prompting/.env
 8 | text-to-sql/.env
 9 | reasoning-agent/.env
10 | self-rag/.env
11 | google-job-search-agent/.env
12 | gpt-oss-20b/*
13 | corrective-rag/.env
14 | fine-tune/*
15 | human-in-loop/.env
16 | vector-embeddings/.env
17 | mcp/.env
18 | deep-research/.env
19 | gdg-agent-ai/.env
20 | prompt-chaining-design-pattern/.env
21 | routing-design-pattern/.env
22 | orchestrator-worker-design-pattern/.env
23 | evaluator-optimizer-agent/.env


--------------------------------------------------------------------------------
/.vscode/settings.json:
--------------------------------------------------------------------------------
1 | {
2 |     "editor.wordWrap": "on",
3 |     "chat.editor.wordWrap": "on",
4 |     "editor.wordWrapColumn": 50,
5 |     "notebook.output.wordWrap": true
6 | }


--------------------------------------------------------------------------------
/=4.55.0:
--------------------------------------------------------------------------------
 1 | Requirement already satisfied: transformers in ./saurav-env/lib/python3.13/site-packages (4.55.0)
 2 | Requirement already satisfied: kernels in ./saurav-env/lib/python3.13/site-packages (0.9.0)
 3 | Requirement already satisfied: torch==2.6.0 in ./saurav-env/lib/python3.13/site-packages (2.6.0)
 4 | Requirement already satisfied: filelock in ./saurav-env/lib/python3.13/site-packages (from torch==2.6.0) (3.18.0)
 5 | Requirement already satisfied: typing-extensions>=4.10.0 in ./saurav-env/lib/python3.13/site-packages (from torch==2.6.0) (4.14.1)
 6 | Requirement already satisfied: networkx in ./saurav-env/lib/python3.13/site-packages (from torch==2.6.0) (3.5)
 7 | Requirement already satisfied: jinja2 in ./saurav-env/lib/python3.13/site-packages (from torch==2.6.0) (3.1.6)
 8 | Requirement already satisfied: fsspec in ./saurav-env/lib/python3.13/site-packages (from torch==2.6.0) (2025.7.0)
 9 | Requirement already satisfied: setuptools in ./saurav-env/lib/python3.13/site-packages (from torch==2.6.0) (80.9.0)
10 | Requirement already satisfied: sympy==1.13.1 in ./saurav-env/lib/python3.13/site-packages (from torch==2.6.0) (1.13.1)
11 | Requirement already satisfied: mpmath<1.4,>=1.1.0 in ./saurav-env/lib/python3.13/site-packages (from sympy==1.13.1->torch==2.6.0) (1.3.0)
12 | Requirement already satisfied: huggingface-hub<1.0,>=0.34.0 in ./saurav-env/lib/python3.13/site-packages (from transformers) (0.34.4)
13 | Requirement already satisfied: numpy>=1.17 in ./saurav-env/lib/python3.13/site-packages (from transformers) (2.3.1)
14 | Requirement already satisfied: packaging>=20.0 in ./saurav-env/lib/python3.13/site-packages (from transformers) (25.0)
15 | Requirement already satisfied: pyyaml>=5.1 in ./saurav-env/lib/python3.13/site-packages (from transformers) (6.0.2)
16 | Requirement already satisfied: regex!=2019.12.17 in ./saurav-env/lib/python3.13/site-packages (from transformers) (2024.11.6)
17 | Requirement already satisfied: requests in ./saurav-env/lib/python3.13/site-packages (from transformers) (2.32.4)
18 | Requirement already satisfied: tokenizers<0.22,>=0.21 in ./saurav-env/lib/python3.13/site-packages (from transformers) (0.21.4)
19 | Requirement already satisfied: safetensors>=0.4.3 in ./saurav-env/lib/python3.13/site-packages (from transformers) (0.5.3)
20 | Requirement already satisfied: tqdm>=4.27 in ./saurav-env/lib/python3.13/site-packages (from transformers) (4.67.1)
21 | Requirement already satisfied: hf-xet<2.0.0,>=1.1.3 in ./saurav-env/lib/python3.13/site-packages (from huggingface-hub<1.0,>=0.34.0->transformers) (1.1.5)
22 | Requirement already satisfied: MarkupSafe>=2.0 in ./saurav-env/lib/python3.13/site-packages (from jinja2->torch==2.6.0) (3.0.2)
23 | Requirement already satisfied: charset_normalizer<4,>=2 in ./saurav-env/lib/python3.13/site-packages (from requests->transformers) (3.4.2)
24 | Requirement already satisfied: idna<4,>=2.5 in ./saurav-env/lib/python3.13/site-packages (from requests->transformers) (3.10)
25 | Requirement already satisfied: urllib3<3,>=1.21.1 in ./saurav-env/lib/python3.13/site-packages (from requests->transformers) (2.5.0)
26 | Requirement already satisfied: certifi>=2017.4.17 in ./saurav-env/lib/python3.13/site-packages (from requests->transformers) (2025.7.14)
27 | 


--------------------------------------------------------------------------------
/corrective-rag/__pycache__/prompts.cpython-313.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SauravP97/AI-Engineering-101/69b0c825ea2cfc9a43696192230878f9ee3f1dd3/corrective-rag/__pycache__/prompts.cpython-313.pyc


--------------------------------------------------------------------------------
/corrective-rag/agent_graph.py:
--------------------------------------------------------------------------------
  1 | from langchain.text_splitter import RecursiveCharacterTextSplitter
  2 | from langchain_community.document_loaders import WebBaseLoader
  3 | from langchain_community.vectorstores import Chroma
  4 | from langchain_openai import OpenAIEmbeddings
  5 | from langchain_core.prompts import ChatPromptTemplate
  6 | from pydantic import BaseModel, Field, SkipValidation
  7 | from langchain_openai import ChatOpenAI
  8 | from typing_extensions import TypedDict
  9 | from langchain import hub
 10 | from langchain_core.output_parsers import StrOutputParser
 11 | from langgraph.graph import END, StateGraph, START
 12 | from langchain_tavily import TavilySearch
 13 | 
 14 | from dotenv import load_dotenv
 15 | 
 16 | from prompts import GRADE_DOCUMENTS_PROMPT, QUESTION_REWRITER_PROMPT
 17 | 
 18 | 
 19 | KNOWLEDGE_BASE_URLS = [
 20 |     "https://www.linkedin.com/pulse/word-embeddings-how-neural-net-understands-words-space-prateek-sbl5c/",
 21 |     "https://www.linkedin.com/pulse/dissecting-backpropagation-neural-networks-saurav-prateek-krcvc/"
 22 | ]
 23 | 
 24 | 
 25 | # Data model
 26 | class SharedState(TypedDict):
 27 |     """ Shared state for the RAG system. """
 28 |     question: str
 29 |     agent_response: str
 30 |     vector_store: Chroma
 31 |     relevant_documents: list[str]
 32 |     model: ChatOpenAI
 33 | 
 34 | class GradeDocuments(BaseModel):
 35 |     """Binary score for relevance check on retrieved documents."""
 36 | 
 37 |     binary_score: str = Field(
 38 |         description="Documents are relevant to the question, 'yes' or 'no'"
 39 |     )
 40 | 
 41 | 
 42 | def get_model(shared_state):
 43 |     shared_state['model'] = ChatOpenAI(model="gpt-4o-mini", temperature=0)
 44 |     return shared_state
 45 | 
 46 | 
 47 | def build_vector_store(shared_state):
 48 |     """
 49 |     Build a vector store from the knowledge base URLs.
 50 |     """
 51 |     docs = [WebBaseLoader(url).load() for url in KNOWLEDGE_BASE_URLS]
 52 |     docs_list = [item for sublist in docs for item in sublist]
 53 | 
 54 |     text_splitter = RecursiveCharacterTextSplitter.from_tiktoken_encoder(
 55 |         chunk_size=250, chunk_overlap=0
 56 |     )
 57 |     doc_splits = text_splitter.split_documents(docs_list)
 58 | 
 59 |     # Add to vectorDB
 60 |     vector_store = Chroma.from_documents(
 61 |         documents=doc_splits,
 62 |         collection_name="rag-chroma",
 63 |         embedding=OpenAIEmbeddings(),
 64 |     )
 65 |     shared_state['vector_store'] = vector_store.as_retriever()
 66 | 
 67 |     return shared_state
 68 | 
 69 | 
 70 | def get_relevant_documents(shared_state):
 71 |     """
 72 |     Get relevant documents from the vector store.
 73 |     """
 74 |     question = shared_state["question"]
 75 |     vector_store = shared_state["vector_store"]
 76 | 
 77 |     documents = vector_store.invoke(question)
 78 |     shared_state["relevant_documents"] = [doc.page_content for doc in documents]
 79 | 
 80 |     return shared_state
 81 | 
 82 | 
 83 | def grade_and_filter_documents(shared_state):
 84 |     """
 85 |     Grade the relevance of retrieved documents to a user question.
 86 |     """
 87 |     print("\n\n Grading documents for relevance... \n")
 88 |     question = shared_state['question']
 89 |     model = shared_state['model']
 90 |     documents = shared_state['relevant_documents']
 91 |     structured_llm_grader = model.with_structured_output(GradeDocuments)
 92 | 
 93 |     grade_prompt = ChatPromptTemplate.from_messages(
 94 |         [
 95 |             ("system", GRADE_DOCUMENTS_PROMPT),
 96 |             ("human", "Retrieved document: \n\n {document} \n\n User question: {question}"),
 97 |         ]
 98 |     )
 99 | 
100 |     retrieval_grader = grade_prompt | structured_llm_grader
101 |     filtered_documents = []
102 | 
103 |     for document in documents:
104 |         grader_response = retrieval_grader.invoke({"question": question, "document": document})
105 |         if grader_response.binary_score.lower() == "yes":
106 |             print("---GRADE: DOCUMENT RELEVANT---")
107 |             filtered_documents.append(document)
108 |         else:
109 |             print("---GRADE: DOCUMENT NOT RELEVANT---")
110 |     
111 |     print("Relevant documents left after filtering:", len(filtered_documents))
112 |     shared_state['relevant_documents'] = filtered_documents
113 | 
114 |     return shared_state
115 | 
116 | 
117 | def generate_answer_from_documents(shared_state):
118 |     """ Generate an answer to the question using the relevant documents. """
119 |     model = shared_state['model']
120 |     rag_prompt = hub.pull("rlm/rag-prompt")
121 |     question = shared_state['question']
122 |     documents = shared_state['relevant_documents']
123 | 
124 |     rag_chain = rag_prompt | model | StrOutputParser()
125 | 
126 |     model_response = rag_chain.invoke({"context": documents, "question": question})
127 |     shared_state['agent_response'] = model_response
128 | 
129 |     return shared_state
130 | 
131 | 
132 | def decide_to_generate(shared_state):
133 |     """ Decide whether to generate an answer or perform a web search. """
134 |     if len(shared_state['relevant_documents']) > 0:
135 |         print("\n Generating answer from relevant documents... \n\n")
136 |         return "generate"
137 |     else:
138 |         print("\n No relevant documents found, transform query and performing web search... \n\n")
139 |         return "transform_query"
140 | 
141 | 
142 | def transform_query(shared_state):
143 |     """
144 |     Transform the query to produce a better question.
145 |     """
146 | 
147 |     print("\n\n ---TRANSFORMING QUERY---")
148 |     question = shared_state["question"]
149 |     model = shared_state["model"]
150 | 
151 |     re_write_prompt = ChatPromptTemplate.from_messages(
152 |         [
153 |             ("system", QUESTION_REWRITER_PROMPT),
154 |             (
155 |                 "human",
156 |                 "Here is the initial question: \n\n {question} \n Formulate an improved question.",
157 |             ),
158 |         ]
159 |     )
160 |     question_rewriter = re_write_prompt | model | StrOutputParser()
161 |     
162 |     better_question = question_rewriter.invoke({"question": question})
163 |     print("Transformed question: \n", better_question)
164 |     shared_state['question'] = better_question
165 |     
166 |     return shared_state
167 | 
168 | 
169 | def perform_web_search(shared_state):
170 |     """ Perform a web search to as a fallback. """
171 |     print("\n\n Performing a Web Search--- \n\n")
172 | 
173 |     question = shared_state["question"]
174 |     web_search_tool = TavilySearch()
175 | 
176 |     web_results = web_search_tool.invoke({"query": question}) 
177 |     print("Web search result:", web_results['results'][0])
178 |     documents = [web_result['content'] for web_result in web_results['results']]
179 | 
180 |     shared_state['relevant_documents'] = documents
181 |     return shared_state
182 | 
183 | 
184 | def build_graph():
185 |     workflow = StateGraph(SharedState)
186 | 
187 |     # Define the nodes
188 |     workflow.add_node("get_model", get_model)
189 |     workflow.add_node("build_vector_store", build_vector_store)
190 |     workflow.add_node("get_relevant_documents", get_relevant_documents)
191 |     workflow.add_node("grade_and_filter_documents", grade_and_filter_documents)
192 |     workflow.add_node("generate_answer_from_documents", generate_answer_from_documents)
193 |     workflow.add_node("perform_web_search", perform_web_search)  # web search
194 |     workflow.add_node("transform_query", transform_query)
195 | 
196 |     # Build graph
197 |     workflow.add_edge(START, "get_model")
198 |     workflow.add_edge("get_model", "build_vector_store")
199 |     workflow.add_edge("build_vector_store", "get_relevant_documents")
200 |     workflow.add_edge("get_relevant_documents", "grade_and_filter_documents")
201 |     workflow.add_conditional_edges(
202 |         "grade_and_filter_documents",
203 |         decide_to_generate,
204 |         {
205 |             "transform_query": "transform_query",
206 |             "generate": "generate_answer_from_documents",
207 |         },
208 |     )
209 |     workflow.add_edge("transform_query", "perform_web_search")
210 |     workflow.add_edge("perform_web_search", "generate_answer_from_documents")
211 |     workflow.add_edge("generate_answer_from_documents", END)
212 | 
213 |     # Compile
214 |     return workflow.compile()
215 | 
216 | 
217 | 
218 | # Execute Agent Workflow
219 | 
220 | load_dotenv()
221 | 
222 | # Explain Binary Heap?
223 | # What are word embeddings and how do they work?
224 | compiled_graph = build_graph()
225 | # print(compiled_graph.get_graph().draw_mermaid())
226 | shared_state = compiled_graph.invoke({
227 |     'question': "Explain Binary Heap?"
228 | })
229 | 
230 | print("\n Agent Response \n")
231 | print(shared_state['agent_response'])


--------------------------------------------------------------------------------
/corrective-rag/prompts.py:
--------------------------------------------------------------------------------
 1 | 
 2 | GRADE_DOCUMENTS_PROMPT = """
 3 |     You are a grader assessing relevance of a retrieved document to a user question. \n 
 4 |     If the document contains keyword(s) or semantic meaning related to the question, grade it as relevant. \n
 5 |     Give a binary score 'yes' or 'no' score to indicate whether the document is relevant to the question.
 6 | """
 7 | 
 8 | QUESTION_REWRITER_PROMPT = """
 9 |     You a question re-writer that converts an input question to a better version that is optimized \n 
10 |     for web search. Look at the input and try to reason about the underlying semantic intent / meaning.
11 | """


--------------------------------------------------------------------------------
/debugging-workflow/debug-agent.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "from langgraph.graph import StateGraph, START, END\n",
 10 |     "from langgraph.checkpoint.memory import InMemorySaver\n",
 11 |     "from typing import Annotated\n",
 12 |     "from typing_extensions import TypedDict\n",
 13 |     "from operator import add\n",
 14 |     "from langchain_openai import ChatOpenAI\n",
 15 |     "\n",
 16 |     "from dotenv import load_dotenv"
 17 |    ]
 18 |   },
 19 |   {
 20 |    "cell_type": "code",
 21 |    "execution_count": 2,
 22 |    "metadata": {},
 23 |    "outputs": [
 24 |     {
 25 |      "data": {
 26 |       "text/plain": [
 27 |        "True"
 28 |       ]
 29 |      },
 30 |      "execution_count": 2,
 31 |      "metadata": {},
 32 |      "output_type": "execute_result"
 33 |     }
 34 |    ],
 35 |    "source": [
 36 |     "load_dotenv()"
 37 |    ]
 38 |   },
 39 |   {
 40 |    "cell_type": "code",
 41 |    "execution_count": 3,
 42 |    "metadata": {},
 43 |    "outputs": [],
 44 |    "source": [
 45 |     "class SharedState(TypedDict):\n",
 46 |     "    dsa_topics: list[str]\n",
 47 |     "    system_design_topics: list[str]\n",
 48 |     "\n",
 49 |     "def parse_model_response(response):\n",
 50 |     "    \"\"\" Parse the model response\"\"\"\n",
 51 |     "    return [topic.strip() for topic in response.split(\",\") if topic.strip()]"
 52 |    ]
 53 |   },
 54 |   {
 55 |    "cell_type": "code",
 56 |    "execution_count": 4,
 57 |    "metadata": {},
 58 |    "outputs": [],
 59 |    "source": [
 60 |     "def find_relevant_dsa_topics(shared_state: SharedState) -> SharedState:\n",
 61 |     "    \"\"\" Find relevant DSA topics for year 2025.\"\"\"\n",
 62 |     "    query = \"\"\"\n",
 63 |     "    Can you provide top 5 DSA topics to master for Software Engineering interviews in 2025?.\n",
 64 |     "    Please only return the DSA topics comma separated, no other detail is needed. \n",
 65 |     "    \"\"\"\n",
 66 |     "    model = ChatOpenAI(model=\"gpt-4o\")\n",
 67 |     "    \n",
 68 |     "    response = model.invoke(query)\n",
 69 |     "    # shared_state[\"dsa_topics\"] = parse_model_response(response.content)\n",
 70 |     "\n",
 71 |     "    return {\n",
 72 |     "        \"dsa_topics\": parse_model_response(response.content),\n",
 73 |     "        \"system_design_topics\": []\n",
 74 |     "    }"
 75 |    ]
 76 |   },
 77 |   {
 78 |    "cell_type": "code",
 79 |    "execution_count": 12,
 80 |    "metadata": {},
 81 |    "outputs": [],
 82 |    "source": [
 83 |     "def find_relevant_system_design_topics(shared_state: SharedState) -> SharedState:\n",
 84 |     "    \"\"\" Find relevant System Design topics for year 2025.\"\"\"\n",
 85 |     "    query = \"\"\"\n",
 86 |     "    Can you provide top 5 System Design topics to master for Software Engineering interviews in 2025?.\n",
 87 |     "    Please only return the System Design topics comma separated, no other detail is needed. \n",
 88 |     "    \"\"\"\n",
 89 |     "    model = ChatOpenAI(model=\"gpt-4o\")\n",
 90 |     "    \n",
 91 |     "    response = model.invoke(query)\n",
 92 |     "    shared_state[\"system_design_topics\"] = parse_model_response(response.content)\n",
 93 |     "\n",
 94 |     "    return shared_state"
 95 |    ]
 96 |   },
 97 |   {
 98 |    "cell_type": "code",
 99 |    "execution_count": 6,
100 |    "metadata": {},
101 |    "outputs": [],
102 |    "source": [
103 |     "def build_graph():\n",
104 |     "    # Building a Graph\n",
105 |     "  # State of the Graph that will be shared among nodes.\n",
106 |     "  workflow = StateGraph(SharedState)\n",
107 |     "\n",
108 |     "  # Add nodes.\n",
109 |     "  workflow.add_node(\"find_relevant_dsa_topics\", find_relevant_dsa_topics)\n",
110 |     "  workflow.add_node(\"find_relevant_system_design_topics\", find_relevant_system_design_topics)\n",
111 |     "\n",
112 |     "  # Define the edges of the graph.\n",
113 |     "  workflow.add_edge(START, \"find_relevant_dsa_topics\")\n",
114 |     "  workflow.add_edge(\"find_relevant_dsa_topics\", \"find_relevant_system_design_topics\")\n",
115 |     "  workflow.add_edge(\"find_relevant_system_design_topics\", END)\n",
116 |     "\n",
117 |     "  checkpointer = InMemorySaver()\n",
118 |     "  graph = workflow.compile(checkpointer=checkpointer)\n",
119 |     "\n",
120 |     "  config = {\"configurable\": {\"thread_id\": \"1\"}}\n",
121 |     "  response = graph.invoke({}, config)\n",
122 |     "\n",
123 |     "  # print(graph.get_graph().draw_mermaid())\n",
124 |     "\n",
125 |     "  return response, graph, config"
126 |    ]
127 |   },
128 |   {
129 |    "cell_type": "code",
130 |    "execution_count": 13,
131 |    "metadata": {},
132 |    "outputs": [
133 |     {
134 |      "name": "stdout",
135 |      "output_type": "stream",
136 |      "text": [
137 |       "{'dsa_topics': ['Arrays', 'Strings', 'Trees', 'Graphs', 'Dynamic Programming'], 'system_design_topics': ['Scalability and Load Balancing', 'Data Storage and Management', 'Microservices Architecture', 'High Availability and Fault Tolerance', 'Real-Time Data Processing.']}\n"
138 |      ]
139 |     }
140 |    ],
141 |    "source": [
142 |     "response, graph, config = build_graph()\n",
143 |     "print(response)"
144 |    ]
145 |   },
146 |   {
147 |    "cell_type": "code",
148 |    "execution_count": 14,
149 |    "metadata": {},
150 |    "outputs": [],
151 |    "source": [
152 |     "snapshots = list(graph.get_state_history(config))"
153 |    ]
154 |   },
155 |   {
156 |    "cell_type": "code",
157 |    "execution_count": 15,
158 |    "metadata": {},
159 |    "outputs": [
160 |     {
161 |      "name": "stdout",
162 |      "output_type": "stream",
163 |      "text": [
164 |       "StateSnapshot(values={'dsa_topics': ['Arrays', 'Strings', 'Trees', 'Graphs', 'Dynamic Programming'], 'system_design_topics': []}, next=('find_relevant_system_design_topics',), config={'configurable': {'thread_id': '1', 'checkpoint_ns': '', 'checkpoint_id': '1f0621d7-4614-6ed8-8001-15c409bf22f5'}}, metadata={'source': 'loop', 'step': 1, 'parents': {}, 'thread_id': '1'}, created_at='2025-07-16T08:18:29.459290+00:00', parent_config={'configurable': {'thread_id': '1', 'checkpoint_ns': '', 'checkpoint_id': '1f0621d7-3c11-6b2a-8000-dc2ec3bde44b'}}, tasks=(PregelTask(id='200ba79c-7371-04c9-3118-9437577f3cce', name='find_relevant_system_design_topics', path=('__pregel_pull', 'find_relevant_system_design_topics'), error=None, interrupts=(), state=None, result={'dsa_topics': ['Arrays', 'Strings', 'Trees', 'Graphs', 'Dynamic Programming'], 'system_design_topics': ['Scalability and Load Balancing', 'Data Storage and Management', 'Microservices Architecture', 'High Availability and Fault Tolerance', 'Real-Time Data Processing.']}),), interrupts=())\n"
165 |      ]
166 |     }
167 |    ],
168 |    "source": [
169 |     "print(snapshots[1])"
170 |    ]
171 |   },
172 |   {
173 |    "cell_type": "code",
174 |    "execution_count": 16,
175 |    "metadata": {},
176 |    "outputs": [
177 |     {
178 |      "name": "stdout",
179 |      "output_type": "stream",
180 |      "text": [
181 |       "================== Snapshot details: ==================\n",
182 |       "{}\n",
183 |       "('__start__',)\n",
184 |       "\n",
185 |       "\n",
186 |       "\n",
187 |       "================== Snapshot details: ==================\n",
188 |       "{}\n",
189 |       "('find_relevant_dsa_topics',)\n",
190 |       "\n",
191 |       "\n",
192 |       "\n",
193 |       "================== Snapshot details: ==================\n",
194 |       "{'dsa_topics': ['Arrays', 'Strings', 'Trees', 'Graphs', 'Dynamic Programming'], 'system_design_topics': []}\n",
195 |       "('find_relevant_system_design_topics',)\n",
196 |       "\n",
197 |       "\n",
198 |       "\n",
199 |       "================== Snapshot details: ==================\n",
200 |       "{'dsa_topics': ['Arrays', 'Strings', 'Trees', 'Graphs', 'Dynamic Programming'], 'system_design_topics': ['Scalability and Load Balancing', 'Data Storage and Management', 'Microservices Architecture', 'High Availability and Fault Tolerance', 'Real-Time Data Processing.']}\n",
201 |       "()\n",
202 |       "\n",
203 |       "\n",
204 |       "\n"
205 |      ]
206 |     }
207 |    ],
208 |    "source": [
209 |     "list.reverse(snapshots)\n",
210 |     "\n",
211 |     "for snapshot in snapshots:\n",
212 |     "    print(\"================== Snapshot details: ==================\")\n",
213 |     "    print(snapshot.values)\n",
214 |     "    print(snapshot.next)\n",
215 |     "    print(\"\\n\\n\")"
216 |    ]
217 |   }
218 |  ],
219 |  "metadata": {
220 |   "kernelspec": {
221 |    "display_name": "saurav-env",
222 |    "language": "python",
223 |    "name": "python3"
224 |   },
225 |   "language_info": {
226 |    "codemirror_mode": {
227 |     "name": "ipython",
228 |     "version": 3
229 |    },
230 |    "file_extension": ".py",
231 |    "mimetype": "text/x-python",
232 |    "name": "python",
233 |    "nbconvert_exporter": "python",
234 |    "pygments_lexer": "ipython3",
235 |    "version": "3.13.1"
236 |   }
237 |  },
238 |  "nbformat": 4,
239 |  "nbformat_minor": 2
240 | }
241 | 


--------------------------------------------------------------------------------
/deep-research/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SauravP97/AI-Engineering-101/69b0c825ea2cfc9a43696192230878f9ee3f1dd3/deep-research/.DS_Store


--------------------------------------------------------------------------------
/deep-research/__pycache__/prompt.cpython-313.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SauravP97/AI-Engineering-101/69b0c825ea2cfc9a43696192230878f9ee3f1dd3/deep-research/__pycache__/prompt.cpython-313.pyc


--------------------------------------------------------------------------------
/deep-research/evals/chinese/race_result.txt:
--------------------------------------------------------------------------------
1 | Comprehensiveness: 0.3701
2 | Insight: 0.3425
3 | Instruction Following: 0.3952
4 | Readability: 0.3702
5 | Overall Score: 0.3680
6 | 


--------------------------------------------------------------------------------
/deep-research/evals/english/race_result.txt:
--------------------------------------------------------------------------------
1 | Comprehensiveness: 0.3323
2 | Insight: 0.2665
3 | Instruction Following: 0.3820
4 | Readability: 0.3387
5 | Overall Score: 0.3265
6 | 


--------------------------------------------------------------------------------
/deep-research/evals/overall/race_result.txt:
--------------------------------------------------------------------------------
1 | Comprehensiveness: 0.3512
2 | Insight: 0.3045
3 | Instruction Following: 0.3886
4 | Readability: 0.3544
5 | Overall Score: 0.3472


--------------------------------------------------------------------------------
/deep-research/prompt.py:
--------------------------------------------------------------------------------
 1 | INDEPENDENT_AGENT_PROMPT = """
 2 | You are a "Distributor" Node, a Master Research Strategist and Planner for a "Deep Research" multi-agent system.
 3 | 
 4 | Your sole purpose is to receive a single, high-level research query and create a list of discrete, non-overlapping sub-queries for independent investigation.
 5 | 
 6 | [TASK INPUTS]
 7 | 1.  **User_Research_Query:** "{query}"
 8 | 2. **Limit of Sub Queries:** "{limit}"
 9 | 
10 | [INSTRUCTIONS]
11 | 1.  **Analyze Query:** First, deeply analyze the `query`. Identify the core themes, entities, and implicit questions.
12 | 2. **Respect the Sub Query limit**: You can break a query into a limited number of sub queries mentioned as a **limit** in the input.
13 | 3.  **Identify Pillars:** Brainstorm the fundamental pillars or dimensions of the main topic. (e.g., for "The future of AI in healthcare," pillars might be: 1. Current Applications, 2. Key Technologies (ML/NLP/CV), 3. Regulatory Challenges, 4. Major Companies & Startups, 5. Ethical Implications, 6. Future Projections & Innovations).
14 | 4.  **Apply MECE Principle:** Convert these pillars into a set of sub-topics. These sub-topics MUST be **MECE** (Mutually Exclusive, Collectively Exhaustive).
15 |     * **Mutually Exclusive:** No two sub-topics should overlap. This prevents two worker agents from doing the same research.
16 |     * **Collectively Exhaustive:** All the sub-topics, when combined, must fully answer the original `query`.
17 | 
18 | Given the user research query return the sub queries.
19 | """
20 | 
21 | WORKER_PROMPT = """
22 | You are a Specialist Research Agent, a worker node in a decentralized "Deep Research" multi-agent system. Your purpose is to autonomously conduct exhaustive, in-depth research on a single, assigned sub-topic.
23 | 
24 | Your sole focus is to deeply investigate and report on your specific assigned sub-topic. You have also been provided with relevant web search results to aid your research.
25 | 
26 | A Web Search Result has the following format:
27 | 
28 |     "cited_url": "The URL of the web page where the information was found",
29 |     "content": "The content extracted from the web page",
30 |     "score": "The relevance score of the content to the query"
31 | 
32 | 
33 | [TASK INPUTS]
34 | 1. ** Assigned Sub Topic :** "{query}"
35 | 2. ** Web Search Results:** "{web_search_results}"
36 | 
37 | [INSTRUCTIONS]
38 | 1.  **Analyze Task:** Carefully review your `Assigned_Sub_Topic`.
39 | 2.  **Formulate Queries:** Generate a series of precise, deep-diving search queries to investigate your sub-topic. Go beyond superficial keywords.
40 | 3. **Take Web Search Results into Account:** Thoroughly examine the provided web search results. Identify and prioritize the most relevant and credible sources.
41 | 4.  **Synthesize & Analyze:** Do not just list search results. Read and synthesize the information you find. Extract key facts, figures, arguments, and counter-arguments.
42 | 5.  **Cite All Sources:** For every key fact or claim you report, you MUST provide an inline citation.
43 | 
44 | [RULES & CONSTRAINTS]
45 | * **Autonomy:** You must complete this task independently without asking for clarification.
46 | * **Focus:** Stick *strictly* to your `Assigned_Sub_Topic`. Do not deviate.
47 | * **Depth:** Superficial, top-level summaries are not acceptable. Your analysis must be detailed and well-supported.
48 | * **Objectivity:** Report findings factually.
49 | * **Verification:** If you find conflicting information, report the conflict and cite both perspectives.
50 | * **No Hallucination:** If you cannot find a definitive answer to a key question, state that the information is "inconclusive" or "not publicly available," and explain what you found. Do not invent an answer.
51 | """
52 | 
53 | QUERY_SPLITTER_PROMPT = """ 
54 | You are a query splitter and your goal is to inform whether then given query needs to be split into multiple sub queries or not. Based on the complexity of the query, you need to decide whether to split the query or not.
55 | """
56 | 
57 | UNIQUE_RESEARCH_TOPIC_PROMPT = """
58 | You are a Research Topics Reviewer and your goal is to determine whether the given research topic is semantically different from the previous research topics which have already been addressed.
59 | 
60 | [TASK INPUTS]
61 | 1. **New Research Topic:** "{research_topic}"
62 | 2. **Previous Research Topics:** "{previous_research_topics}"
63 | 
64 | [INSTRUCTIONS]
65 | 1. **Analyze New Topic:** Carefully review the `New_Research_Topic`.
66 | 2. **Compare with Previous Topics:** Compare the `New_Research_Topic` with the `Previous_Research_Topics` to determine if it is semantically unique.
67 | 3. **Decision Criteria:** A topic is considered semantically unique if it does not overlap significantly in scope, focus, or intent with any of the `Previous_Research_Topics`.
68 | """


--------------------------------------------------------------------------------
/deep-research/research_output/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SauravP97/AI-Engineering-101/69b0c825ea2cfc9a43696192230878f9ee3f1dd3/deep-research/research_output/.DS_Store


--------------------------------------------------------------------------------
/deep-research/research_output/output_final_report_15.md:
--------------------------------------------------------------------------------
  1 | # Deep Research Report
  2 | 
  3 | ## Table of Contents 
  4 | - 识别并列出欧洲（重点关注荷兰、德国、英国、瑞士、奥地利、法国等）在量子网络研究领域内，被广泛引用、具有高知名度或获得重要资助的顶尖研究课题组，并注明其所属机构和国家。
  5 | - 针对课题组名单中的第二部分（例如，中间1/3），基于其核心出版物，分析并总结每个课题组的主要研究方向。
  6 | - 针对课题组名单中的剩余部分（例如，后1/3），基于其核心出版物，提炼并总结每个课题组的关键技术贡献。
  7 | - 调查各个课题组从政府机构获得的科研基金详情，例如国家自然科学基金、科技部重点研发计划等。请列出具体的项目名称、批准号、起止时间及资助金额。
  8 | - 调查各个课题组与企业之间的横向合作项目。内容应包括合作企业名称、项目主题、合作模式（例如技术开发、咨询服务）、项目金额及执行周期。
  9 | - 研究各个课题组参与的其他形式的公私合作计划（PPP）或国际合作项目。请说明合作方的性质（例如非政府组织、国际机构）、资金来源构成、项目目标和已取得的成果。
 10 | 
 11 | ## Report 
 12 | ## 收集整理目前世界上关于量子网络的研究，横向比较各课题组的相关工作，从以下几个维度，也可以不局限于这些维度：文章发表期刊或会议的等级，课题组成员和领导者的技术背景或学术头衔，课题组经费来源，课题组横向或纵向项目等维度，并为我评估出最有潜力的可以引领未来量子网络发展的十个课题组
 13 | 
 14 | 
 15 | 
 16 | ## 识别并列出全球在量子网络研究领域内，被广泛引用、具有高知名度或获得重要资助的顶尖研究课题组。请提供一个包含至少20-30个候选课题组的初步名单，并注明其所属机构和国家。
 17 | 
 18 | 
 19 | 
 20 |  
 21 |  ### 识别并列出欧洲（重点关注荷兰、德国、英国、瑞士、奥地利、法国等）在量子网络研究领域内，被广泛引用、具有高知名度或获得重要资助的顶尖研究课题组，并注明其所属机构和国家。
 22 | 
 23 | 好的，根据您提供的任务和现有信息，由于提供的网络搜索结果与“量子网络”这一主题无关，我将基于我知识库中关于欧洲量子技术研究的既有信息，为您识别和列出在该领域内顶尖的研究课题组。
 24 | 
 25 | 以下是在欧洲量子网络研究领域内，被广泛引用、具有高知名度或获得重要资助的顶尖研究课题组列表，重点覆盖了荷兰、德国、英国、瑞士、奥地利和法国：
 26 | 
 27 | ### **荷兰 (Netherlands)**
 28 | 
 29 | *   **研究课题组/机构:** **QuTech**
 30 |     *   **所属机构:** 代尔夫特理工大学 (Delft University of Technology, TU Delft) 与荷兰国家应用科学院 (TNO) 联合创办。
 31 |     *   **国家:** 荷兰
 32 |     *   **简介:** QuTech 是全球量子技术研究的绝对领导者之一，尤其在量子互联网领域处于前沿地位。由 **Ronald Hanson** 教授领导的团队在量子纠缠和远距离量子通信方面取得了多项突破性成果，包括首次实现无漏洞贝尔不等式检验。另一位核心人物 **Stephanie Wehner** 教授则在量子网络协议和软件栈方面做出了开创性工作。QuTech 获得了大量国家和欧盟的资助，正在建设连接荷兰多个城市的国家级量子网络测试平台。
 33 | 
 34 | ### **奥地利 (Austria)**
 35 | 
 36 | *   **研究课题组/机构:** **Anton Zeilinger 研究组**
 37 |     *   **所属机构:** 奥地利科学院下属的量子光学与量子信息研究所 (IQOQI Vienna) 和维也纳大学。
 38 |     *   **国家:** 奥地利
 39 |     *   **简介:** 由2022年诺贝尔物理学奖得主 **Anton Zeilinger** 领导的团队是量子信息科学的奠基者之一。他们在量子纠缠分发、量子隐形传态和基于卫星的全球量子通信方面进行了大量开创性实验。该团队的研究被广泛引用，对整个量子网络领域的发展产生了深远影响。团队中的 **Rupert Ursin** 等研究员也在长距离量子通信实验中扮演着关键角色。
 40 | 
 41 | ### **瑞士 (Switzerland)**
 42 | 
 43 | *   **研究课题组/机构:** **Nicolas Gisin 研究组 (Group of Applied Physics)**
 44 |     *   **所属机构:** 日内瓦大学 (University of Geneva)。
 45 |     *   **国家:** 瑞士
 46 |     *   **简介:** **Nicolas Gisin** 教授是量子通信和量子密码学领域的先驱。他的团队在量子密钥分发 (QKD) 的理论和实验上做出了卓越贡献，并成功将技术商业化，创办了全球领先的量子安全公司 ID Quantique。该团队的研究成果是量子网络安全领域引用率最高的文献之一。
 47 | 
 48 | *   **研究课题组/机构:** **Andreas Wallraff 研究组 (Quantum Device Lab)**
 49 |     *   **所属机构:** 苏黎世联邦理工学院 (ETH Zurich)。
 50 |     *   **国家:** 瑞士
 51 |     *   **简介:** 该团队专注于基于超导电路的量子计算和量子光学研究。虽然其核心是量子计算，但他们开发的用于量子比特之间耦合和信息传输的技术，对于构建量子网络中的处理节点至关重要。他们的工作获得了瑞士国家科学基金和欧盟的大力资助。
 52 | 
 53 | ### **德国 (Germany)**
 54 | 
 55 | *   **研究课题组/机构:** **Gerhard Rempe 研究组 (Quantum Dynamics Division)**
 56 |     *   **所属机构:** 马克斯·普朗克量子光学研究所 (Max Planck Institute of Quantum Optics, MPQ)。
 57 |     *   **国家:** 德国
 58 |     *   **简介:** **Gerhard Rempe** 教授的团队在腔量子电动力学 (Cavity QED) 领域处于世界领先地位，这是实现光与物质（单个原子）强相互作用的关键技术，也是构建量子网络节点和量子中继器的核心物理系统之一。他们的基础性研究具有极高的知名度和引用率。
 59 | 
 60 | *   **研究课题组/机构:** **Harald Weinfurter 研究组**
 61 |     *   **所属机构:** 慕尼黑大学 (Ludwig Maximilian University of Munich, LMU)。
 62 |     *   **国家:** 德国
 63 |     *   **简介:** 该团队专注于实验量子密码学和长距离量子通信。他们在自由空间和基于光纤的量子密钥分发 (QKD) 方面取得了显著成就，并积极参与德国和欧洲的量子通信网络项目。
 64 | 
 65 | ### **法国 (France)**
 66 | 
 67 | *   **研究课题组/机构:** **Pascale Senellart 研究组**
 68 |     *   **所属机构:** 法国国家科学研究中心 (CNRS) 下属的纳米科学与纳米技术中心 (C2N)，隶属于巴黎萨克雷大学。
 69 |     *   **国家:** 法国
 70 |     *   **简介:** **Pascale Senellart** 的团队在半导体量子点单光子源的开发方面全球领先。高质量、按需式的单光子源是实现可扩展光量子网络和计算的关键瓶颈技术。他们的工作因其卓越的性能和高影响力而闻名，并获得了法国国家量子计划的重点资助。
 71 | 
 72 | *   **研究课题组/机构:** **Julien Laurat 研究组**
 73 |     *   **所属机构:** 索邦大学 (Sorbonne University) 卡斯特勒-布罗塞尔实验室 (LKB)。
 74 |     *   **国家:** 法国
 75 |     *   **简介:** 该团队的研究重点是量子存储器以及光与冷原子系综的量子接口。量子存储器是构建量子中继器、从而实现远距离量子网络的核心部件。他们在这一关键技术领域做出了重要贡献。
 76 | 
 77 | ### **英国 (United Kingdom)**
 78 | 
 79 | *   **研究课题组/机构:** **东芝欧洲研究有限公司剑桥研究实验室 (Toshiba Europe Research Laboratories, Cambridge Research Laboratory)**
 80 |     *   **所属机构:** 东芝公司 / 剑桥大学 (合作紧密)。
 81 |     *   **国家:** 英国
 82 |     *   **简介:** 由 **Andrew Shields** 领导的团队是量子密钥分发 (QKD) 领域的全球领导者，尤其是在商业化和实际应用方面。他们创造了多项 QKD 传输距离和密钥速率的世界纪录，并开发了稳定运行的商用QKD系统。
 83 | 
 84 | *   **研究课题组/机构:** **牛津大学量子光学与量子信息团队**
 85 |     *   **所属机构:** 牛津大学 (University of Oxford)。
 86 |     *   **国家:** 英国
 87 |     *   **简介:** 以 **Ian Walmsley** 教授等人为代表，牛津大学是英国国家量子技术计划的核心。他们曾领导“网络化量子信息技术中心 (NQIT)”，专注于构建模块化、可扩展的量子计算机和量子网络。研究涵盖了离子阱、光子学和固态系统等多种技术路径。
 88 | 
 89 | ## 针对第一步识别出的课题组名单，调研并整理每个课题组在过去五年内发表的核心出版物。重点关注发表期刊或会议的等级，并总结其主要研究方向和技术贡献。
 90 | 
 91 | 
 92 | 
 93 |  
 94 |  ### 针对课题组名单中的第二部分（例如，中间1/3），基于其核心出版物，分析并总结每个课题组的主要研究方向。
 95 | 
 96 | 根据提供的Web搜索结果，无法完成“针对课题组名单中的第二部分（例如，中间1/3），基于其核心出版物，分析并总结每个课题组的主要研究方向”这一任务。
 97 | 
 98 | 主要原因如下：
 99 | 1.  **缺少课题组名单**：提供的资料是一份关于国家社科基金项目申报的问答汇编，其中提到了来自扬州大学、南京林业大学、南京财经大学等多所高校的教师提问。然而，这些信息并不构成一个可供分析的、有序的“课题组名单”。因此，无法定位到“第二部分（中间1/3）”的课题组。
100 | 2.  **缺少核心出版物信息**：资料内容主要是秦兴方教授对项目申报策略、选题方向、书写技巧等问题的解答。虽然文中引用了一些关于“新质生产力”的期刊文章作为例子 (http://teacher.yzu.edu.cn/QXF/zh_CN/article/388562/content/2434.htm)，但这些并非提问者所在课题组的核心出版物，而是用于说明观点的参考文献。
101 | 
102 | 尽管无法直接完成任务，但可以从提问内容中间接分析出部分提问者（可视为代表了某些研究方向）的**潜在研究兴趣**：
103 | 
104 | *   **南京工业大学的提问者**：关注“未来产业”作为国家社科基金的申报选题 (http://teacher.yzu.edu.cn/QXF/zh_CN/article/388562/content/2434.htm)。
105 | *   **天津大学的提问者**：研究方向可能与“新质生产力”相关，并有一定前期研究成果 (http://teacher.yzu.edu.cn/QXF/zh_CN/article/388562/content/2434.htm)。
106 | *   **扬州大学的提问者**：研究兴趣广泛，涉及“新质生产力”、“农民共同富裕”、“应用经济学研究方法创新”以及“新质消费力”等多个前沿经济学领域 (http://teacher.yzu.edu.cn/QXF/zh_CN/article/388562/content/2434.htm)。
107 | *   **南京林业大学的提问者**：研究背景为农业经济，并考虑将研究拓展至林业经济领域，或从经济学与法学交叉的角度研究“林权制度改革赋能绿色发展”问题 (http://teacher.yzu.edu.cn/QXF/zh_CN/article/388562/content/2434.htm)。
108 | *   **南京财经大学的提问者**：研究领域涉及“超大规模市场优势与区域知识集聚”、“粮食经济”和“财政学” (http://teacher.yzu.edu.cn/QXF/zh_CN/article/388562/content/2434.htm)。
109 | 
110 | **结论**：由于核心信息的缺失（课题组名单及其核心出版物），无法对特定部分的课题组进行研究方向的分析和总结。现有材料只能揭示部分高校个别研究人员的潜在研究兴趣点。
111 | 
112 |  
113 |  ### 针对课题组名单中的剩余部分（例如，后1/3），基于其核心出版物，提炼并总结每个课题组的关键技术贡献。
114 | 
115 | 根据提供的网络搜索结果，无法完成“针对课题组名单中的剩余部分（例如，后1/3），基于其核心出版物，提炼并总结每个课题组的关键技术贡献”这一任务。
116 | 
117 | 主要原因如下：
118 | 1.  **缺乏课题组名单**：提供的资料是一篇关于如何申请国家社科基金项目的问答式文章，作者是扬州大学的秦兴方教授。文中虽然提到了多个大学（如扬州大学、南京林业大学、南京财经大学、中国矿业大学等），但这仅是提问者所在的单位，并未提供一个明确的、可供分析的“课题组名单” (http://teacher.yzu.edu.cn/QXF/zh_CN/article/388562/content/2434.htm)。
119 | 2.  **缺乏核心出版物列表**：文中引用了一些关于“新质生产力”的期刊文章，例如发表在《经济研究》和《管理世界》上的论文。然而，这些文章是作为当前研究热点的例子被引用的，而不是作为特定课题组的核心研究成果来介绍的 (http://teacher.yzu.edu.cn/QXF/zh_CN/article/388562/content/2434.htm)。
120 | 3.  **内容主旨不符**：该网页内容的核心是为科研人员，特别是人文社科领域的青年教师，提供关于国家社科基金项目申报的指导和建议。其内容涵盖了选题、论证、利用AI辅助工具、理解评审标准等方面，完全不涉及对任何课题组技术贡献的总结或评述 (http://teacher.yzu.edu.cn/QXF/zh_CN/article/388562/content/2434.htm)。
121 | 
122 | 综上所述，所提供的单一网络搜索结果是一份关于科研项目申请策略的指南，无法从中提取出任何关于特定课题组及其关键技术贡献的信息。
123 | 
124 | ## 针对第一步识别出的课题组名单，深入研究各课题组的领导者（PI）和核心成员。收集整理他们的技术背景、学术头衔（例如，院士、IEEE Fellow）、教育经历以及过往的关键学术或工业界成就。
125 | 
126 | 
127 | 
128 | ## 针对第一步识别出的课题组名单，调查各个课题组的经费来源和项目情况。包括主要的政府科研基金、企业横向合作项目、以及其他形式的公私合作计划。
129 | 
130 | 
131 | 
132 |  
133 |  ### 调查各个课题组从政府机构获得的科研基金详情，例如国家自然科学基金、科技部重点研发计划等。请列出具体的项目名称、批准号、起止时间及资助金额。
134 | 
135 | 根据现有的公开信息，无法直接提供一个涵盖“各个课题组”从政府机构获得的所有科研基金的详尽清单。这类信息通常分散在各个资助机构的官方查询平台，并且需要针对特定的课题组（例如，通过负责人姓名、依托单位等信息）进行检索才能获得。
136 | 
137 | 不过，我们可以根据您提供的资料，指明查询这些信息的权威渠道和方法。
138 | 
139 | ### 主要政府科研基金查询途径
140 | 
141 | **1. 科技部重点研发计划等项目:**
142 | 
143 | *   **信息系统**: 科技部的项目，如国家重点研发计划，统一通过“国家科技管理信息系统公共服务平台”进行管理。该系统覆盖了从项目申报、立项、预算安排到验收结果的全过程信息管理 (cited_url: https://fuwu.most.gov.cn/html/bszx/kjjhxm/20181218/2898.html)。
144 | *   **信息公开**: 根据规定，该系统会主动向社会公开非涉密的项目信息，以接受社会监督 (cited_url: https://fuwu.most.gov.cn/html/bszx/kjjhxm/20181218/2898.html)。
145 | *   **查询方法**: 用户可以访问该公共服务平台（网址通常为 fuwu.most.gov.cn），利用其信息查询或项目公示功能，查找特定项目或单位的资助情况。在项目申报环节，系统要求填报的信息就包括项目名称、执行周期、预算总额等关键数据 (cited_url: https://fuwu.most.gov.cn/html/bszx/kjjhxm/20181218/2898.html)。
146 | 
147 | **2. 国家自然科学基金 (NSFC):**
148 | 
149 | *   **信息系统**: 国家自然科学基金委员会的项目有专门的查询渠道。根据自然科学基金委发布的通告，项目结题后，结题/成果报告全文会在“国家自然科学基金大数据知识管理服务平台”（网址：https://kd.nsfc.cn）上公布 (cited_url: https://www.nsfc.gov.cn/p1/3381/2824/66780.html)。
150 | *   **可查询信息**: 这些公开的报告或项目信息中通常会包含：
151 |     *   **项目名称**
152 |     *   **项目批准号**
153 |     *   **资助金额**
154 |     *   **项目起止时间** (研究期限)
155 |     *   项目负责人及参与者信息
156 | *   **查询方法**: 用户可以直接访问“国家自然科学基金大数据知识管理服务平台”，使用关键词（如项目负责人、单位名称、项目名称等）进行检索，以查找已获资助项目的详细信息。此外，“国家科技报告服务系统”（https://www.nstrs.cn）也是另一个公布受资助项目成果的平台 (cited_url: https://www.nsfc.gov.cn/p1/3381/2824/66780.html)。
157 | 
158 | ### 总结
159 | 
160 | 要获取特定课题组的政府科研基金详情，您需要：
161 | 1.  **确定资助机构**: 首先判断研究项目可能由哪个机构资助，是科技部还是国家自然科学基金委等。
162 | 2.  **访问官方平台**:
163 |     *   对于国家重点研发计划等，访问 **国家科技管理信息系统公共服务平台**。
164 |     *   对于国家自然科学基金，访问 **国家自然科学基金大数据知识管理服务平台 (https://kd.nsfc.cn)**。
165 | 3.  **进行检索**: 在相应的平台上，使用课题组负责人姓名、所在单位、研究方向等关键词进行精确检索。
166 | 
167 | 由于信息保密和公开范围的限制，并非所有项目的全部细节都会被公开，但对于已结题和非涉密的项目，上述平台是获取项目名称、批准号、起止时间和资助金额等核心信息的权威官方渠道。
168 | 
169 |  
170 |  ### 调查各个课题组与企业之间的横向合作项目。内容应包括合作企业名称、项目主题、合作模式（例如技术开发、咨询服务）、项目金额及执行周期。
171 | 
172 | 根据现有的搜索结果，无法提供关于各个课题组与企业之间横向合作项目的具体信息。
173 | 
174 | 唯一的搜索结果是一个来自东南大学科技处的PDF文件，该文件对横向科研项目进行了定义，指出这类项目包括“各类科技服务项目、专利许可实施项目以及...与企业合作研究开发高新技术产品及产业化项目” (https://kjc.seu.edu.cn/_upload/article/0f/91/86ca24d349c292497df863e5581b/eabfede1-bb71-4a5f-95ae-06d08b9c2808.pdf)。
175 | 
176 | 然而，该文件并未提供任何具体的合作案例，因此缺少以下关键信息：
177 | *   合作企业名称
178 | *   具体的项目主题
179 | *   详细的合作模式（例如是技术开发还是咨询服务）
180 | *   项目金额
181 | *   项目的执行周期
182 | 
183 | 综上所述，由于信息缺失，无法完成关于横向合作项目的详细调查。
184 | 
185 |  
186 |  ### 研究各个课题组参与的其他形式的公私合作计划（PPP）或国际合作项目。请说明合作方的性质（例如非政府组织、国际机构）、资金来源构成、项目目标和已取得的成果。
187 | 
188 | 根据现有的资料，公私合作伙伴关系（PPP）中的公共合作伙伴通常指的是政府机构，例如部委、司局、地方政府或国有企业。私营合作伙伴则可以是本地或国际的，通常是具备项目所需技术或金融领域专长的企业或投资者 [1](https://www.adb.org/sites/default/files/institutional-document/33996/files/public-private-partnership-zh.pdf)。
189 | 
190 | 然而，在当前提供的搜索结果中，并未包含任何关于特定课题组参与其他形式公私合作计划（PPP）或国际合作项目的具体案例。因此，无法详细说明这些合作中合作方的具体性质（如非政府组织、国际机构）、资金来源构成、项目目标以及已取得的成果。相关信息尚不明确。
191 | 
192 | ## 综合利用前述步骤收集的关于出版物、团队背景、经费项目的信息，对第一步识别出的课题组进行横向比较和综合评估，最终筛选并排序出十个最有潜力引领未来量子网络发展的课题组，并为每个入选的课题组提供简要的评估理由。
193 | 
194 | 
195 | 
196 | 
197 | ## Citations
198 | - https://www.adb.org/sites/default/files/institutional-document/33996/files/public-private-partnership-zh.pdf 
199 | - https://www.nsfc.gov.cn/p1/3381/2824/66780.html 
200 | - https://kjc.seu.edu.cn/_upload/article/0f/91/86ca24d349c292497df863e5581b/eabfede1-bb71-4a5f-95ae-06d08b9c2808.pdf 
201 | - https://www.chinaeuropeforum.org/skin/file/Conference_Plan.pdf 
202 | - http://teacher.yzu.edu.cn/QXF/zh_CN/article/388562/content/2434.htm 
203 | - https://fuwu.most.gov.cn/html/bszx/kjjhxm/20181218/2898.html 
204 | 


--------------------------------------------------------------------------------
/deep-research/research_output/output_final_report_24.md:
--------------------------------------------------------------------------------
 1 | # Deep Research Report
 2 | 
 3 | ## Table of Contents 
 4 | - 如何增强自闭症学生课堂参与度？有哪些有效的策略可供选择？
 5 | 
 6 | ## Report 
 7 |  
 8 |  ### 如何增强自闭症学生课堂参与度？有哪些有效的策略可供选择？
 9 | 
10 | 增强自闭症学生在课堂上的参与度，需要教育工作者、家长和专业团队采取一套全面且个体化的策略。这些策略旨在创造一个支持性的学习环境，并直接教授学生参与课堂所需的技能。
11 | 
12 | ### **有效的策略选择**
13 | 
14 | #### **1. 环境调整与结构化教学**
15 | 
16 | *   **提供个人空间和有利位置**：为自闭症学生提供一个属于自己的、可预测的物理空间。同时，为了减少分心，可以安排他们坐在靠近老师、远离门窗的位置 (https://blog.stageslearning.com/zh/blog/%E7%89%B9%E6%AE%8A%E9%9C%80%E8%A6%81%E5%84%BF%E7%AB%A5%E7%9A%84%E5%8D%81%E4%B8%80%E4%B8%AA%E8%AF%BE%E5%A0%82%E7%AE%A1%E7%90%86%E7%AD%96%E7%95%A5-11-classroom-management-strategies-for-children-with-special-needs)。
17 | *   **视觉支持**：利用视觉提示（如图片、符号、日程表）帮助学生理解课堂流程、活动内容和行为期望。这对于语言理解能力有限的学生尤其重要，可以有效减少因无法表达而产生的挫败感和不当行为 (https://iris.peabody.vanderbilt.edu/zh-CN/module/asd2/cresource/q2/p04/)。
18 | *   **结构化学习 (Structured Learning)**：这是一种有效的教学介入方式，通过清晰、一致的结构来组织学习环境和教学活动，帮助学生理解和预测接下来会发生什么，从而增强他们的安全感和参与度 (https://speccen.utaipei.edu.tw/var/file/64/1064/img/1633/687164071.pdf)。
19 | 
20 | #### **2. 教学与沟通策略**
21 | 
22 | *   **个体化教学**：充分了解并利用学生的个人兴趣、优势和弱点来调整教学内容。将学生的兴趣融入课程，可以显著提升他们的参与动机和投入度 (https://sense.edb.gov.hk/uploads/content/asd/aim_book/EDB_AIM_Full%20Book%20Manual%20(Pri).pdf)。
23 | *   **明确的沟通方式**：
24 |     *   **图片交换沟通系统 (PECS)**：对于语言能力有限的学生，PECS是一种有效的增强和替代沟通（AAC）方法。学生通过交换图片来表达自己的需求和愿望，分阶段学习如何沟通、组织句子并回答问题 (https://iris.peabody.vanderbilt.edu/zh-CN/module/asd2/cresource/q2/p04/)。
25 |     *   **社交故事 (Social Stories)**：通过简短的故事描述特定的社交情境、他人的想法和感受以及期望的行为，帮助学生理解和应对各种社交场景 (https://speccen.utaipei.edu.tw/var/file/64/1064/img/1633/687164071.pdf)。
26 | *   **循证教学实践**：采用被证明有效的教学方法，例如：
27 |     *   **明确尝试训练 (Discrete Trial Training)**
28 |     *   **关键反应训练 (Pivotal Response Training)**
29 |     *   **随机教学法 (Incidental Teaching)**
30 |     这些方法侧重于分解技能、在自然情境中教学，并强化学生的自发性沟通 (https://speccen.utaipei.edu.tw/var/file/64/1064/img/1633/687164071.pdf)。
31 | 
32 | #### **3. 行为与社交支持**
33 | 
34 | *   **正向行为支持 (Positive Behavior Support)**：这是一个系统性的框架，重点在于预防行为问题的发生。它包括功能性行为评量（分析行为背后的原因）、前事控制（调整环境以避免触发问题行为）以及教导替代行为 (https://speccen.utaipei.edu.tw/var/file/64/1064/img/1633/687164071.pdf)。
35 | *   **教导替代行为**：当学生出现不当行为时，关键在于教会他们一种功能相同但更合适的替代行为来满足自身需求。例如，如果学生因为想要逃避困难任务而大叫，可以教他使用“我需要帮助”的卡片 (https://speccen.utaipei.edu.tw/var/file/64/1064/img/1633/687164071.pdf)。
36 | *   **同伴支持与训练**：设计班级活动，增进同学间的了解和互动。可以对班上的同龄伙伴进行训练，指导他们如何与自闭症学生进行有效的沟通和互动，如何示范恰当的社交行为 (https://www.htjh.ntpc.edu.tw/var/file/0/1000/img/388609362.pdf)。
37 | 
38 | #### **4. 团队协作与家庭合作**
39 | 
40 | *   **建立专业团队**：有效的支持需要一个由教师、行政人员、心理师、治疗师等组成的专业团队共同协作，形成共识并有效介入 (https://speccen.utaipei.edu.tw/var/file/64/1064/img/1633/687164071.pdf)。
41 | *   **加强亲师合作**：家庭与学校的合作至关重要。通过有效的沟通与关怀加强亲师关系，确保家庭和学校对学生的行为支持策略保持一致。家长的积极配合是学生在校行为改善的重要保障 (https://www.htjh.ntpc.edu.tw/var/file/0/1000/img/388609362.pdf, https://speccen.utaipei.edu.tw/var/file/64/1064/img/1633/687164071.pdf)。
42 | 
43 | 总之，提升自闭症学生的课堂参与度没有单一的“万能”方法。最有效的方式是采取一种多层次、个体化的综合策略，结合环境改造、循证教学、正向行为支持以及紧密的团队合作，创造一个既有结构又能灵活适应学生需求的学习环境。
44 | 
45 | 
46 | ## Citations
47 | - https://speccen.utaipei.edu.tw/var/file/64/1064/img/1633/687164071.pdf 
48 | - https://blog.stageslearning.com/zh/blog/%E7%89%B9%E6%AE%8A%E9%9C%80%E8%A6%81%E5%84%BF%E7%AB%A5%E7%9A%84%E5%8D%81%E4%B8%80%E4%B8%AA%E8%AF%BE%E5%A0%82%E7%AE%A1%E7%90%86%E7%AD%96%E7%95%A5-11-classroom-management-strategies-for-children-with-special-needs 
49 | - https://iris.peabody.vanderbilt.edu/zh-CN/module/asd2/cresource/q2/p04/ 
50 | - https://sense.edb.gov.hk/uploads/content/asd/aim_book/EDB_AIM_Full%20Book%20Manual%20(Pri).pdf 
51 | - https://www.htjh.ntpc.edu.tw/var/file/0/1000/img/388609362.pdf 
52 | 


--------------------------------------------------------------------------------
/deep-research/research_output/output_final_report_7.md:
--------------------------------------------------------------------------------
 1 | # Deep Research Report
 2 | 
 3 | ## Table of Contents 
 4 | - 在当前中国房地产市场低迷的情况下，政府税收减少，这会多大程度上影响地方政府的财政收入
 5 | 
 6 | ## Report 
 7 |  
 8 |  ### 在当前中国房地产市场低迷的情况下，政府税收减少，这会多大程度上影响地方政府的财政收入
 9 | 
10 | ### 房地产市场低迷对地方政府财政收入的影响分析
11 | 
12 | 中国地方政府长期以来对房地产和土地相关收入存在较高依赖度。随着当前房地产市场进入深度调整期，相关税收及土地出让收入的显著下滑，对地方政府的财政收入造成了多方面的冲击，加剧了地方的财政收支矛盾。
13 | 
14 | #### **一、 核心财政收入来源的显著萎缩**
15 | 
16 | 地方政府的财政收入中，与房地产和土地直接相关的部分主要包括两大块：相关税收和国有土地使用权出让收入（即“卖地”收入）。
17 | 
18 | 1.  **房地产相关税收下降**：
19 |     与房地产行业直接相关的五大税种——契税、土地增值税、房产税、城镇土地使用税、耕地占用税，是地方财政的重要组成部分。根据财政部数据，2023年前10个月，这五项税收的总收入约为1.57万亿元人民币，同比下降约4% (证券时报)。虽然整体降幅看似不大，但其中土地增值税、城镇土地使用税和耕地占用税均出现下滑。这五大税收合计占地方税种总收入的比重约为10%，较去年全年下降了约2个百分点 (证券时报)。
20 | 
21 | 2.  **土地出让收入（“土地财政”）大幅锐减**：
22 |     对地方财政冲击最大的是土地出让收入的急剧下降。这部分收入是地方政府性基金预算的主要来源，也是过去支撑地方进行基础设施建设的重要资金。
23 |     *   **降幅巨大**：2023年前10个月，国有土地使用权出让收入约为3.5万亿元，同比大幅下降20.5% (证券时报)。这一收入已从2021年8.7万亿元的高峰，降至2022年的约6.7万亿元，预计2023年将进一步跌破6万亿元 (证券时报)。
24 |     *   **依赖度下降**：综合来看，上述五大房产相关税收和土地出让收入合计，在2023年前10个月约占地方财政总收入的35%。这一比例相较于2021年约53%的峰值，出现了非常明显的下滑，直观地反映了“土地财政”的萎缩 (证券时报)。
25 | 
26 | #### **二、 影响程度的官方评估与缓解因素**
27 | 
28 | 尽管数据下滑严重，但有观点认为，其对地方公共财政的实际冲击可能没有数字表面上那么剧烈。
29 | 
30 | *   **“毛收入”与“净收入”的差异**：前财政部部长刘昆曾指出，土地出让收入是“毛收入”，其中近八成需用于拆迁补偿等成本性支出。因此，当毛收入大幅减少时，相应的成本性支出也会随之下降，形成收支联动，从而缓冲了对财政的净冲击 (证券时报)。
31 | *   **对一般公共预算影响可控**：地方政府性基金预算中，大约只有15%的收入会调入一般公共预算使用。据此匡算，2022年土地出让收入减少约2万亿元，对地方一般公共预算财力的直接影响约为3000亿元。因此，官方认为，房地产市场下行带来的影响是“可控的、结构性的” (证券时报)。
32 | 
33 | #### **三、 地方政府的应对措施与挑战**
34 | 
35 | 面对财政收入的减少，各级政府已采取多种措施来应对挑战。
36 | 
37 | 1.  **中央加大转移支付**：为支持地方财政平稳运行，中央财政显著加大了对地方的转移支付力度。例如，2023年四季度增发1万亿元国债，全部通过转移支付给地方使用，使得当年中央对地方的转移支付总规模首次突破10万亿元，重点向中西部及欠发达地区倾斜 (证券时报)。
38 | 
39 | 2.  **地方政府“开源节流”**：地方层面，政府正努力盘活存量资产资源，优化支出结构，大力压减一般性和非紧急支出 (证券时报)。然而，在税收和土地出让收入双双下降的压力下，一些地方政府开始寻求非税收入。数据显示，2024年中国政府财政收入中，税收收入同比下降，但非税收入却大幅增长 (美国之音)。这在一定程度上反映了地方政府为弥补资金缺口所做的努力。
40 | 
41 | 3.  **长期改革方案（房地产税）的延迟**：开征房地产税曾被视为替代“土地财政”、为地方政府提供稳定税源的理想方案。然而，在当前需要刺激房地产市场复苏的背景下，推出房地产税被认为“不合时宜”，可能打断市场的回暖进程 (BBC News, 中文)。因此，这项备受关注的改革在近年内落地的可能性已经降低，使得地方政府寻找新税源的希望被推迟 (BBC News, 中文)。
42 | 
43 | 综上所述，房地产市场的低迷通过税收，特别是土地出让收入的大幅下滑，对地方政府的财政收入造成了显著的负面影响，直接导致其对“土地财政”的依赖度被动下降。虽然有成本联动效应和中央转移支付等因素进行缓冲，官方也认为影响可控，但这无疑加剧了地方的财政收支矛盾，并迫使地方政府采取包括增加非税收入在内的各种方式应对资金短缺的困境 (美国之音; 证券时报)。
44 | 
45 | 
46 | ## Citations
47 | - https://cn.nytimes.com/business/20250321/china-taxes-trump-tariffs/ 
48 | - https://www.bbc.com/zhongwen/simp/chinese-news-66797074 
49 | - https://www.stcn.com/article/detail/1059039.html 
50 | - https://www.voachinese.com/a/china-2024-fiscal-revenue-growth-shrinks-non-tax-income-jumps-20250124/7948819.html 
51 | - https://www.citieschina.org.cn/show/id/a1718868199555.html 
52 | 


--------------------------------------------------------------------------------
/deep-research/research_output/output_final_report_77.md:
--------------------------------------------------------------------------------
 1 | # Deep Research Report
 2 | 
 3 | ## Table of Contents 
 4 | - What is the role of need for closure on misinformation acceptance?
 5 | 
 6 | ## Report 
 7 |  
 8 |  ### What is the role of need for closure on misinformation acceptance?
 9 | 
10 | The need for cognitive closure (NfCC), which describes an individual's desire for a firm answer to a question and an aversion to ambiguity, plays a significant role in the acceptance of misinformation. Research indicates that individuals with a high need for closure are more susceptible to believing and spreading false information, particularly in situations involving memory and uncertainty.
11 | 
12 | ### **Impact on Eyewitness Memory and False Confessions**
13 | 
14 | A primary area where the influence of NfCC on misinformation is studied is eyewitness memory.
15 | 
16 | *   **Increased Susceptibility:** Studies show that a high need for cognitive closure is a determinant of susceptibility to misinformation. In one experiment, individuals with high NfCC who were presented with misinformation had a higher susceptibility score (mean of 1.84) compared to those with low NfCC (mean of 1.36) [https://arch.ies.gov.pl/images/PDF/2019/vol_118/118_Hejniak_m.pdf](https://arch.ies.gov.pl/images/PDF/2019/vol_118/118_Hejniak_m.pdf). This suggests that the desire for a clear, unambiguous narrative makes individuals more likely to accept and integrate false details into their memory.
17 | *   **Cognitive Mechanisms:** The mechanism behind this involves retrieval-induced forgetting. Research published in *Social Cognition* demonstrates that a high NfCC enhances retrieval-induced forgetting, a process where remembering certain information causes the forgetting of other related details. This cognitive side-effect, in turn, magnifies the effects of misinformation in eyewitness accounts [https://www.researchgate.net/publication/264342616_The_Role_of_Need_for_Cognitive_Closure_in_Retrieval-Induced_Forgetting_and_Misinformation_Effects_in_Eyewitness_Memory](https://www.researchgate.net/publication/264342616_The_Role_of_Need_for_Cognitive_Closure_in_Retrieval-Induced_Forgetting_and_Misinformation_Effects_in_Eyewitness_Memory), [https://www.kruglanskiarie.com/the-need-for-closure-and-conservati](https://www.kruglanskiarie.com/the-need-for-closure-and-conservati).
18 | *   **False Confessions:** The same study also examined NfCC as a factor in the proneness to false confessions, indicating a broader link between the need for closure and the acceptance of false narratives in forensic contexts [https://arch.ies.gov.pl/images/PDF/2019/vol_118/118_Hejniak_m.pdf](https://arch.ies.gov.pl/images/PDF/2019/vol_118/118_Hejniak_m.pdf).
19 | 
20 | ### **Broader Implications**
21 | 
22 | The influence of NfCC on misinformation extends beyond eyewitness testimony.
23 | 
24 | *   **Uncertainty and Crisis:** During periods of high uncertainty, such as the COVID-19 pandemic, the need for closure can drive people to accept misinformation as a way to resolve ambiguity and gain a sense of certainty [https://pmc.ncbi.nlm.nih.gov/articles/PMC9690611/](https://pmc.ncbi.nlm.nih.gov/articles/PMC9690611/).
25 | *   **Stereotypes and Information Discounting:** A high need for closure can lead to a pattern of discounting information that contradicts one's established beliefs. This cognitive shortcut helps maintain closure but can foster the formation of stereotypes and accelerate the spread of misinformation when it aligns with pre-existing conclusions [https://scc.sg/e/the-need-for-closure/](https://scc.sg/e/the-need-for-closure/).
26 | 
27 | In summary, a high need for cognitive closure acts as a vulnerability factor for misinformation acceptance. The strong desire to eliminate ambiguity can lead individuals to prematurely accept and defend information, even if it is false, particularly when their memory is suggestible or they are in a state of uncertainty.<ctrl42>call:ToolCode{code:
28 | 
29 | 
30 | ## Citations
31 | - https://arch.ies.gov.pl/images/PDF/2019/vol_118/118_Hejniak_m.pdf 
32 | - https://www.researchgate.net/publication/264342616_The_Role_of_Need_for_Cognitive_Closure_in_Retrieval-Induced_Forgetting_and_Misinformation_Effects_in_Eyewitness_Memory 
33 | - https://pmc.ncbi.nlm.nih.gov/articles/PMC9690611/ 
34 | - https://www.kruglanskiarie.com/the-need-for-closure-and-conservati 
35 | - https://scc.sg/e/the-need-for-closure/ 
36 | 


--------------------------------------------------------------------------------
/deep-research/research_output/output_final_report_89.md:
--------------------------------------------------------------------------------
 1 | # Deep Research Report
 2 | 
 3 | ## Table of Contents 
 4 | - "1. Foundational Analysis of MDA: Detail the core principles of the Mechanics-Dynamics-Aesthetics (MDA) framework as originally conceived. Explain each component (Mechanics, Dynamics, Aesthetics) and its intended role in game analysis and design.",
 5 | 
 6 | ## Report 
 7 | ## Research and analyze the latest advancements and cutting-edge theories within the field of game design. Specifically include recent developments, research, and practical design applications related to established frameworks like MDA (Mechanics-Dynamics-Aesthetics).
 8 | 
 9 | 
10 | 
11 |  
12 |  ### "1. Foundational Analysis of MDA: Detail the core principles of the Mechanics-Dynamics-Aesthetics (MDA) framework as originally conceived. Explain each component (Mechanics, Dynamics, Aesthetics) and its intended role in game analysis and design.",
13 | 
14 | ### 1. Foundational Analysis of the Mechanics-Dynamics-Aesthetics (MDA) Framework
15 | 
16 | The Mechanics-Dynamics-Aesthetics (MDA) framework is a formal tool for the analysis and design of games, first presented by Robin Hunicke, Marc LeBlanc, and Robert Zubek at the Game Developers Conference between 2001 and 2004 [http://www.cs.northwestern.edu/~hunicke/MDA.pdf](http://www.cs.northwestern.edu/~hunicke/MDA.pdf). Its core principle is to deconstruct games into three distinct but interrelated components—Mechanics, Dynamics, and Aesthetics—to better understand the player's experience and provide a structured approach for game designers [https://www.researchgate.net/publication/360258656_GAME_DESIGN_FRAMEWORK_MDA_ANALYSIS](https://www.researchgate.net/publication/360258656_GAME_DESIGN_FRAMEWORK_MDA_ANALYSIS).
17 | 
18 | A central tenet of the MDA framework is its recognition that designers and players view games from opposite perspectives. Designers work from the bottom up: they create the **Mechanics**, which in turn give rise to systemic **Dynamics**, which ultimately evoke the desired **Aesthetics** (the player's emotional experience). Conversely, players experience the game from the top down: they are first engaged by the **Aesthetics**, from which they begin to discern the **Dynamics** at play, and only with deeper engagement do they understand the specific **Mechanics** driving the system [https://andrewfischergames.com/blog/mda-framework](https://andrewfischergames.com/blog/mda-framework). This creates a significant challenge for designers, as they can only directly control the mechanics and must use them to indirectly craft the intended dynamics and aesthetic experiences for the player [https://en.wikipedia.org/wiki/MDA_framework](https://en.wikipedia.org/wiki/MDA_framework).
19 | 
20 | #### **Components of the MDA Framework**
21 | 
22 | **1. Mechanics:**
23 | Mechanics are the fundamental rules, algorithms, and data structures of the game. They are the most concrete components, representing the "hard-coded" elements and actions the designer specifies. This includes aspects like the physics of the game world, the conditions for winning or losing, the actions available to the player, and the effects of those actions.
24 | 
25 | *   **Role in Design & Analysis:** From a design perspective, mechanics are the primary tools the creator uses to build the game. From an analysis standpoint, mechanics are the specific rules one would identify to understand how a game functions at its lowest level (e.g., the movement rules for a knight in chess, the damage calculation of a weapon in an RPG, or the speed of a falling block in Tetris).
26 | 
27 | **2. Dynamics:**
28 | Dynamics describe the run-time behavior of the mechanics as they are put into action by the player. They are the emergent behaviors and strategies that arise from the interaction between the player's inputs and the game's mechanics over time. Dynamics are not explicitly programmed by the designer but are a consequence of the mechanical systems.
29 | 
30 | *   **Role in Design & Analysis:** For a designer, understanding dynamics is about predicting how the mechanics will behave in play and how players will interact with them to create interesting situations. For an analyst, dynamics represent the strategic layer of the game. Examples include the emergence of "camping" behavior in a first-person shooter (a result of mechanics like map layout, weapon accuracy, and player vulnerability) or a "bluffing" strategy in poker (a result of mechanics like hidden cards and betting rounds).
31 | 
32 | **3. Aesthetics:**
33 | Aesthetics are the desirable emotional responses evoked in the player as they interact with the game. This component describes the "fun" and overall experience that emerges from the dynamics of the system. Aesthetics are the ultimate goal of the design process, as they define the player's takeaway feeling.
34 | 
35 | *   **Role in Design & Analysis:** In design, aesthetics are the target emotional experience. A designer might aim for a feeling of "Challenge," "Discovery," or "Fantasy." They then select mechanics that are likely to produce dynamics that evoke these feelings. In analysis, aesthetics provide a vocabulary for describing the player's experience and judging the success of the game's design. The original MDA framework outlines eight primary types of aesthetics:
36 |     1.  **Sensation:** Game as sense-pleasure.
37 |     2.  **Fantasy:** Game as make-believe.
38 |     3.  **Narrative:** Game as drama.
39 |     4.  **Challenge:** Game as an obstacle course.
40 |     5.  **Fellowship:** Game as a social framework.
41 |     6.  **Discovery:** Game as uncharted territory.
42 |     7.  **Expression:** Game as self-discovery.
43 |     8.  **Submission:** Game as a pastime.
44 | 
45 | By breaking a game down into these three layers, the MDA framework provides a clear methodology for both creating and critiquing interactive experiences, linking the designer's explicit rules to the player's ultimate emotional journey [http://www.cs.northwestern.edu/~hunicke/MDA.pdf](http://www.cs.northwestern.edu/~hunicke/MDA.pdf).
46 | 
47 | 
48 | ## Citations
49 | - https://en.wikipedia.org/wiki/MDA_framework 
50 | - http://www.cs.northwestern.edu/~hunicke/MDA.pdf 
51 | - https://andrewfischergames.com/blog/mda-framework 
52 | - https://www.researchgate.net/publication/360258656_GAME_DESIGN_FRAMEWORK_MDA_ANALYSIS 
53 | - https://www.reddit.com/r/RPGdesign/comments/1175tuy/this_paper_discusses_the_mechanics_dynamics_and/ 
54 | 


--------------------------------------------------------------------------------
/deep-research/research_output/output_final_report_9.md:
--------------------------------------------------------------------------------
 1 | # Deep Research Report
 2 | 
 3 | ## Table of Contents 
 4 | - 在计算化学这个领域，我们通常使用Gaussian软件模拟各种情况下分子的结构和性质计算，比如在关键词中加入'field=x+100'代表了在x方向增加了电场。但是，当体系是经典的单原子催化剂时，它属于分子催化剂，在反应环境中分子的朝向是不确定的，那么理论模拟的x方向电场和实际电场是不一致的。请问：通常情况下，理论计算是如何模拟外加电场存在的情况？
 5 | 
 6 | ## Report 
 7 |  
 8 |  ### 在计算化学这个领域，我们通常使用Gaussian软件模拟各种情况下分子的结构和性质计算，比如在关键词中加入'field=x+100'代表了在x方向增加了电场。但是，当体系是经典的单原子催化剂时，它属于分子催化剂，在反应环境中分子的朝向是不确定的，那么理论模拟的x方向电场和实际电场是不一致的。请问：通常情况下，理论计算是如何模拟外加电场存在的情况？
 9 | 
10 | 您提出的问题非常深刻，直指理论计算模拟与真实物理化学环境之间的一个关键差异。在真实的溶液或气相环境中，分子催化剂的朝向是不断变化的，因此它与外加电场的相互作用实际上是一个在所有空间取向上进行统计平均的结果。直接在Gaussian等软件中使用一个固定方向的电场（如 `Field=X+100`）来模拟一个固定取向的分子，确实只是一个简化的近似模型。
11 | 
12 | 为了更真实地模拟这种情况，理论计算化学领域发展出了几种不同的策略和方法，复杂度和计算成本各不相同。
13 | 
14 | ### 理论计算模拟外加电场的主要方法：
15 | 
16 | 1.  **方向平均法 (Orientational Averaging)**
17 |     *   **原理**：这是最直接、最符合物理直觉的方法。研究者会手动或通过脚本改变分子在坐标系中的取向，然后对每个取向都施加一个固定方向（例如X方向）的电场进行计算。例如，可以围绕Z轴和Y轴旋转分子，每隔一定角度（如10度）就进行一次能量和性质的计算。
18 |     *   **处理**：最后，将所有不同取向下的计算结果（如能量、偶极矩、极化率等）进行玻尔兹曼加权平均或简单算术平均，从而得到在各向同性环境中，分子与电场相互作用的宏观平均效应。
19 |     *   **优缺点**：这种方法的优点是概念清晰，易于理解和实现。缺点是计算量巨大，特别是对于不对称的分子，需要计算数百甚至数千个不同取向的构型才能得到收敛的结果。
20 | 
21 | 2.  **隐式溶剂模型 (Implicit Solvation Models) 与反应场**
22 |     *   **原理**：在很多催化反应中，所谓的“外加电场”效应实际上来自于极性溶剂分子或电解质离子在催化剂周围形成的局部微观电场。使用隐式溶剂模型（如PCM, SMD等）可以在一定程度上模拟这种平均的静电环境。
23 |     *   **处理**：在Gaussian计算中加入溶剂模型（例如 `SCRF=(Solvent=Water)`），溶剂被视为一个可极化的连续介质。溶质分子（催化剂）会使周围的溶剂介质极化，而极化后的介质会反过来产生一个“反应场”（Reaction Field），作用于溶质分子上。这个反应场是根据分子自身的电荷分布和形状自洽生成的，已经包含了来自各个方向的平均静电作用。
24 |     *   **优缺点**：计算成本相对较低，是目前研究溶液中化学反应最常规的方法之一。它能很好地模拟溶剂的平均静电效应，但它模拟的是由溶剂化产生的内建电场，而不是一个独立施加的、方向性的外场。
25 | 
26 | 3.  **分子动力学模拟 (Molecular Dynamics, MD)**
27 |     *   **原理**：这是最接近真实情况的模拟方法，尤其适用于研究动态过程。通过分子动力学，可以模拟催化剂分子在大量溶剂分子盒子中的平动和转动。
28 |     *   **处理**：可以对整个模拟盒子施加一个外电场。在模拟过程中，催化剂分子会因为热运动而自由旋转。通过对整个模拟轨迹进行长时间的采样，并对催化剂分子的某一性质（如能量、键长、原子电荷）进行统计平均，就可以得到该性质在电场作用下、考虑了所有分子取向后的期望值。如果需要高精度，还可以采用量子力学/分子力学(QM/MM)的方法，只对催化剂核心部分进行高精度的量子化学计算。
29 |     *   **优缺点**：能够提供包含温度和动力学效应在内的最全面的物理图像。但计算成本极高，设置复杂，通常用于研究反应机理的动态路径而非简单的静态性质。
30 | 
31 | ### 总结与建议
32 | 
33 | 对于您提到的单原子催化剂这类分子催化剂体系，不存在一个简单的关键词来完美解决分子取向不确定的问题。方法的选择取决于您想研究的具体问题：
34 | 
35 | *   **如果想了解电场对分子本征性质（如极化率）的影响**: 可以采用**方向平均法**，虽然繁琐但物理意义最明确。
36 | *   **如果想研究在极性环境中反应活性的变化**: 采用**隐式溶剂模型**是更常用且高效的选择，因为它直接模拟了核心的静电相互作用环境。
37 | *   **如果想深入理解电场和溶剂共同作用下的动态行为**: **分子动力学模拟**是最终极的工具，但需要大量的计算资源和专业知识。
38 | 
39 | 在实践中，很多研究会结合使用这些方法。例如，先用隐式溶剂模型优化结构和计算大致的能量，再选取关键结构用方向平均法来精确评估电场效应。正如您在问题中指出的，理解理论模型与真实体系的差异，并选择合适的计算策略，是进行高质量计算模拟的关键。
40 | 
41 | 
42 | ## Citations
43 | - https://www.researchgate.net/publication/360773643_fenzidonglixuemonijiqiLAMMPSshixian-jiangyi 
44 | - https://github.com/modelscope/ms-agent 
45 | 


--------------------------------------------------------------------------------
/deep-research/research_output/output_final_report_93.md:
--------------------------------------------------------------------------------
 1 | # Deep Research Report
 2 | 
 3 | ## Table of Contents 
 4 | - "1. What is the current global market size and projected growth rate (CAGR) for the video editing software market through 2028?",
 5 | - "2. What are the key market drivers propelling the growth of the global video editing software market?",
 6 | - "3. What are the primary market restraints and challenges impeding the growth of the global video editing software market?"
 7 | 
 8 | ## Report 
 9 | ## Please prepare a market research analysis of the global video editing and creation software/tool market. Include major products like those from Adobe (Premiere Pro, After Effects), CapCut, DaVinci Resolve, Final Cut Pro, and others relevant in the current landscape.
10 | 
11 | 
12 | 
13 | ## "1. Conduct a detailed market overview of the global video editing software market, including its current size, projected growth rate (CAGR) through 2028, and key market drivers and restraints.",
14 | 
15 | 
16 | 
17 |  
18 |  ### "1. What is the current global market size and projected growth rate (CAGR) for the video editing software market through 2028?",
19 | 
20 | The global market size and projected growth rate for the video editing software market show significant variation across different market research reports. No specific data is available for the year 2028 in the provided results, but projections for 2025 and 2030 offer insight into the market's trajectory.
21 | 
22 | **Market Size:**
23 | 
24 | Estimates for the market size in 2025 vary considerably:
25 | *   **MarketsandMarkets** projects the market size to be **USD 7.50 billion** in 2025 (MarketsandMarkets).
26 | *   **Mordor Intelligence** forecasts a market generation of **USD 3.54 billion** in 2025 (Mordor Intelligence).
27 | *   **Cognitive Market Research** offers a similar figure, estimating the market at **USD 3.51 billion** in 2025 (Cognitive Market Research).
28 | 
29 | **Projected Growth Rate (CAGR) and Future Valuation:**
30 | 
31 | The projected Compound Annual Growth Rate (CAGR) also differs between sources, leading to different future valuations:
32 | *   **MarketsandMarkets** projects a **CAGR of 10.6%** between 2025 and 2030, expecting the market to reach **USD 12.40 billion** by 2030 (MarketsandMarkets).
33 | *   **Mordor Intelligence** projects a more modest **CAGR of 6.19%**, forecasting the market to reach **USD 4.78 billion** by 2030 (Mordor Intelligence).
34 | *   Another projection expects the market to reach approximately **$5.13 billion** by 2032 (tripleareview.com).
35 | 
36 | In summary, while the provided data does not contain a specific forecast for 2028, the market is expected to grow. Projections for 2025 range from approximately $3.5 billion to $7.5 billion, with forecasted CAGRs between 6.19% and 10.6% for the period leading up to 2030.
37 | 
38 |  
39 |  ### "2. What are the key market drivers propelling the growth of the global video editing software market?",
40 | 
41 | The global video editing software market is experiencing robust growth, propelled by several key drivers. The market was estimated at $15 billion in 2025 and is projected to grow at a Compound Annual Growth Rate (CAGR) of 12% between 2025 and 2033, with some sectors like open-source video editing software showing a potential CAGR of 12.3% for 2025-2032 [https://www.archivemarketresearch.com/reports/audio-and-video-editing-system-561551, https://www.linkedin.com/pulse/market-report-years-2025-2032-123-cagrshowing-significant-growth-hgddc].
42 | 
43 | The primary factors fueling this expansion include:
44 | 
45 | *   **Growing Demand in Commercial and Personal Sectors:** A fundamental driver is the increasing demand for video editing software from both businesses and individual users [https://www.businessresearchinsights.com/market-reports/video-editing-software-market-102146].
46 | *   **Proliferation of Online Content Platforms:** The expansion of online video-sharing platforms such as YouTube and Vimeo has created a massive demand for new content. This directly fuels the need for video editing software to produce and refine this content [https://straitsresearch.com/report/video-editing-software-market].
47 | *   **Increased Demand for High-Quality Content:** As audiences become more sophisticated, the demand for high-quality video content has risen. This necessitates the use of advanced editing software to meet consumer expectations for professional-grade videos [https://straitsresearch.com/report/video-editing-software-market].
48 | *   **Rise of Cloud-Based and Mobile Editing:** The market is increasingly shifting towards cloud-based solutions, which offer greater accessibility and collaboration features. Concurrently, the growing use of mobile devices for shooting and editing videos on the go is a significant trend driving market growth [https://straitsresearch.com/report/video-editing-software-market].
49 | 
50 | The image and video editing segment is a dominant force within the broader creative software market, holding an estimated share of 42.2% in 2025 [https://www.coherentmarketinsights.com/industry-reports/creative-software-market].
51 | 
52 |  
53 |  ### "3. What are the primary market restraints and challenges impeding the growth of the global video editing software market?"
54 | 
55 | Based on the provided web search results, the primary market restraints and challenges impeding the growth of the global video editing software market are:
56 | 
57 | *   **High Initial Costs:** The expense associated with acquiring professional-grade video editing software is a significant restraint on the market (https://www.linkedin.com/pulse/evaluating-market-players-ai-video-editing-softwares-xnslf). This can be a barrier for individual creators, small businesses, and emerging professionals who may not have the budget for premium software licenses.
58 | 
59 | *   **High Competition from Low-Cost Providers:** The market is experiencing intense competition due to the influx of numerous low-cost and even free video editing software providers. These competitors offer basic editing tools at minimal prices, creating a challenging environment for established and premium software vendors (https://www.snsinsider.com/reports/video-editing-software-market-1543). This saturation can lead to price wars and pressure on profit margins.
60 | 
61 | While other reports allude to the existence of market challenges, the provided snippets do not specify what they are (https://www.datainsightsmarket.com/reports/video-editing-software-1459830). The most clearly articulated restraints are the financial barriers to entry and the competitive landscape.
62 | 
63 | 
64 | ## Citations
65 | - https://www.coherentmarketinsights.com/industry-reports/creative-software-market 
66 | - https://www.snsinsider.com/reports/video-editing-software-market-1543 
67 | - https://www.precedenceresearch.com/software-market 
68 | - https://www.businessresearchinsights.com/market-reports/video-editing-software-market-102146 
69 | - https://straitsresearch.com/report/video-editing-software-market 
70 | - https://www.linkedin.com/pulse/evaluating-market-players-ai-video-editing-softwares-xnslf 
71 | - https://www.mordorintelligence.com/industry-reports/video-editing-market 
72 | - https://www.cognitivemarketresearch.com/audio-video-editing-software-market-report 
73 | - https://www.marketsandmarkets.com/Market-Reports/video-editing-software-market-600.html 
74 | - https://www.archivemarketresearch.com/reports/audio-and-video-editing-system-561551 
75 | - https://www.datainsightsmarket.com/reports/video-editing-software-1459830 
76 | - https://dataintelo.com/report/global-video-editing-software-market 
77 | - https://tripleareview.com/video-editing-statistics/ 
78 | - https://www.linkedin.com/pulse/market-report-years-2025-2032-123-cagrshowing-significant-growth-hgddc 
79 | 


--------------------------------------------------------------------------------
/deep-research/research_output/research_report_variations/note.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "id": "b595ea1a",
  7 |    "metadata": {},
  8 |    "outputs": [],
  9 |    "source": [
 10 |     "import json\n",
 11 |     "\n",
 12 |     "def append_to_model_output_file(task_id: int, prompt: str, research_report: str):\n",
 13 |     "    \"\"\"Append the model output to a JSONL file\"\"\"\n",
 14 |     "    filename = \"output_4.jsonl\"\n",
 15 |     "    model_output = {\"id\": task_id, \"prompt\": prompt, \"article\": research_report}\n",
 16 |     "\n",
 17 |     "    try:\n",
 18 |     "        with open(filename, \"r\") as file:\n",
 19 |     "            existing_data = json.load(file)\n",
 20 |     "    except FileNotFoundError:\n",
 21 |     "        existing_data = []  # Initialize as an empty list if the file doesn't exist\n",
 22 |     "\n",
 23 |     "    # Example for appending to a list within a dictionary\n",
 24 |     "    if isinstance(existing_data, list):\n",
 25 |     "        existing_data.append(model_output)\n",
 26 |     "\n",
 27 |     "    with open(filename, \"w\") as file:\n",
 28 |     "        json.dump(existing_data, file, indent=4, ensure_ascii=False)\n"
 29 |    ]
 30 |   },
 31 |   {
 32 |    "cell_type": "code",
 33 |    "execution_count": 2,
 34 |    "id": "7b8fe65e",
 35 |    "metadata": {},
 36 |    "outputs": [],
 37 |    "source": [
 38 |     "with open('../output_final_report_1.md', \"r\") as file:\n",
 39 |     "    report = file.read()\n",
 40 |     "\n",
 41 |     "task_id = 52\n",
 42 |     "prompt = 'What are the investment philosophies of Duan Yongping, Warren Buffett, and Charlie Munger?'\n",
 43 |     "append_to_model_output_file(task_id=52, prompt=prompt, research_report=report)"
 44 |    ]
 45 |   },
 46 |   {
 47 |    "cell_type": "code",
 48 |    "execution_count": 23,
 49 |    "id": "f4b20e5d",
 50 |    "metadata": {},
 51 |    "outputs": [],
 52 |    "source": [
 53 |     "import json\n",
 54 |     "\n",
 55 |     "CHINESE_EVALS_PATH = \"../../evals/chinese/raw_results.jsonl\"\n",
 56 |     "ENGLISH_EVALS_PATH = \"../../evals/english/raw_results.jsonl\"\n",
 57 |     "OVERALL_EVALS_PATH = \"../../evals/overall/raw_results.jsonl\"\n",
 58 |     "\n",
 59 |     "def append_to_model_output_file(filepath):\n",
 60 |     "    \"\"\"Append the model output to a JSONL file\"\"\"\n",
 61 |     "    try:\n",
 62 |     "        existing_data = []\n",
 63 |     "        with open(filepath, 'r', encoding='utf-8') as f:\n",
 64 |     "            for line in f:\n",
 65 |     "                # Strip whitespace and parse the JSON string\n",
 66 |     "                json_object = json.loads(line.strip())\n",
 67 |     "                existing_data.append(json_object)\n",
 68 |     "    except FileNotFoundError as e:\n",
 69 |     "        print(\"File not found. Creating a new one.\", e)\n",
 70 |     "        existing_data = []  # Initialize as an empty list if the file doesn't exist\n",
 71 |     "\n",
 72 |     "    return existing_data\n"
 73 |    ]
 74 |   },
 75 |   {
 76 |    "cell_type": "code",
 77 |    "execution_count": 25,
 78 |    "id": "20832877",
 79 |    "metadata": {},
 80 |    "outputs": [
 81 |     {
 82 |      "name": "stdout",
 83 |      "output_type": "stream",
 84 |      "text": [
 85 |       "Comprehensiveness: 0.3512\n",
 86 |       "Insight: 0.3045\n",
 87 |       "Instruction Following: 0.3886\n",
 88 |       "Readability: 0.3544\n",
 89 |       "Overall Score: 0.3472\n"
 90 |      ]
 91 |     }
 92 |    ],
 93 |    "source": [
 94 |     "evals = append_to_model_output_file(OVERALL_EVALS_PATH)\n",
 95 |     "\n",
 96 |     "comprehensiveness = 0.0\n",
 97 |     "insight = 0.0\n",
 98 |     "instruction_following = 0.0\n",
 99 |     "readability = 0.0\n",
100 |     "overall_score = 0.0\n",
101 |     "\n",
102 |     "for eval in evals:\n",
103 |     "    overall_score += eval['overall_score']\n",
104 |     "    insight += eval['insight']\n",
105 |     "    comprehensiveness += eval['comprehensiveness']\n",
106 |     "    instruction_following += eval['instruction_following']\n",
107 |     "    readability += eval['readability']\n",
108 |     "\n",
109 |     "print(f'Comprehensiveness: {comprehensiveness/len(evals):.4f}')\n",
110 |     "print(f'Insight: {insight/len(evals):.4f}')\n",
111 |     "print(f'Instruction Following: {instruction_following/len(evals):.4f}')\n",
112 |     "print(f'Readability: {readability/len(evals):.4f}')\n",
113 |     "print(f'Overall Score: {overall_score/len(evals):.4f}')"
114 |    ]
115 |   }
116 |  ],
117 |  "metadata": {
118 |   "kernelspec": {
119 |    "display_name": "saurav-env",
120 |    "language": "python",
121 |    "name": "python3"
122 |   },
123 |   "language_info": {
124 |    "codemirror_mode": {
125 |     "name": "ipython",
126 |     "version": 3
127 |    },
128 |    "file_extension": ".py",
129 |    "mimetype": "text/x-python",
130 |    "name": "python",
131 |    "nbconvert_exporter": "python",
132 |    "pygments_lexer": "ipython3",
133 |    "version": "3.13.1"
134 |   }
135 |  },
136 |  "nbformat": 4,
137 |  "nbformat_minor": 5
138 | }
139 | 


--------------------------------------------------------------------------------
/deep-research/research_output/research_report_variations/output_1.jsonl:
--------------------------------------------------------------------------------
1 | [
2 |     {
3 |         "id": 52,
4 |         "prompt": "What are the investment philosophies of Duan Yongping, Warren Buffett, and Charlie Munger?",
5 |         "article": "# Deep Research Report\n\n## Table of Contents \n- \"What are the core tenets and principles of Warren Buffett's and Charlie Munger's investment philosophy?\",\n\n## Report \n## What are the investment philosophies of Duan Yongping, Warren Buffett, and Charlie Munger?\n\n\n\n \n ### \"What are the core tenets and principles of Warren Buffett's and Charlie Munger's investment philosophy?\",\n\n### The Core Tenets of Warren Buffett's and Charlie Munger's Investment Philosophy\n\nThe investment philosophy of Warren Buffett and Charlie Munger, the driving forces behind Berkshire Hathaway's monumental success, is a blend of disciplined principles that prioritize long-term value creation over short-term market speculation. Their shared approach is centered on quality, patience, rational thinking, and a deep understanding of business fundamentals.\n\n**1. A Philosophy of Quality, Patience, and Rationality**\nAt its heart, the Buffett-Munger investment strategy is a coherent philosophy built on three pillars: quality, patience, and rational thinking (https://www.newtraderu.com/2025/10/01/charlie-mungers-investment-strategy-8-principles-every-investor-should-know/). This approach eschews rapid trading in favor of methodical, well-reasoned investment decisions. Buffett himself has acknowledged the synergy of their views, which have been honed over a 45-year business partnership (https://jamesrobertwer.medium.com/every-entrepreneur-should-keep-these-pearls-of-wisdom-in-mind-b11a5bf1145a).\n\n**2. Investing in High-Quality Businesses**\nA primary driver of Berkshire Hathaway's success is its unwavering focus on buying and investing in high-quality businesses (https://risewithdrew.com/warren-buffetts-investment-philosophy-explained/). Their strategy involves identifying companies that are undervalued but possess long-term growth potential (https://www.youtube.com/watch?v=dUgoUzxHD5Y). This means seeking out businesses with exceptional leadership and fundamental strength (https://jamesrobertwer.medium.com/every-entrepreneur-should-keep-these-pearls-of-wisdom-in-mind-b11a5bf1145a).\n\n**3. The \"Perpetuity\" Holding Period**\nA defining characteristic of their philosophy is an exceptionally long-term investment horizon. Munger has echoed Buffett's sentiment that \"our preferred duration of ownership is perpetuity\" (https://jamesrobertwer.medium.com/every-entrepreneur-should-keep-these-pearls-of-wisdom-in-mind-b11a5bf1145a). This principle is not absolute; they are prepared to sell if they lose confidence in a company's stability. However, the prerequisite for this long-term retention is that the core business and its management demonstrate exceptional and sustained prowess (https://jamesrobertwer.medium.com/every-entrepreneur-should-keep-these-pearls-of-wisdom-in-mind-b11a5bf1145a).\n\n**4. \"Extreme Patience and Extreme Decisiveness\"**\nCharlie Munger has famously described their investment strategy as \"extreme patience combined with extreme decisiveness\" (https://risewithdrew.com/warren-buffetts-investment-philosophy-explained/). They do not feel compelled to act constantly. Instead, they wait for what Munger calls \"no-brainer decisions,\" opportunities where the value proposition is overwhelmingly clear.\n\n**5. Concentrated Capital Allocation**\nContrary to the common wisdom of broad diversification, Buffett and Munger advocate for a more concentrated approach. Munger argues that good investment ideas are rare, and therefore, \"it makes sense to load up on the very few good insights you have instead of pretending to know everything about everything at all times\" (https://risewithdrew.com/warren-buffetts-investment-philosophy-explained/). Buffett reinforces this by stating that such a disciplined approach forces an investor to think very carefully about their decisions and to invest heavily in their most well-reasoned ideas (https://risewithdrew.com/warren-buffetts-investment-philosophy-explained/). For them, the wise allocation of capital is the investor's primary job (https://www.businessinsider.com/charlie-munger-investing-success-from-mental-models-amp-checklists-2012-2).\n\n\n## Citations\n- https://www.businessinsider.com/charlie-munger-investing-success-from-mental-models-amp-checklists-2012-2 \n- https://risewithdrew.com/warren-buffetts-investment-philosophy-explained/ \n- https://www.newtraderu.com/2025/10/01/charlie-mungers-investment-strategy-8-principles-every-investor-should-know/ \n- https://www.youtube.com/watch?v=dUgoUzxHD5Y \n- https://jamesrobertwer.medium.com/every-entrepreneur-should-keep-these-pearls-of-wisdom-in-mind-b11a5bf1145a \n"
6 |     }
7 | ]


--------------------------------------------------------------------------------
/deep-research/research_output/research_report_variations/output_2.jsonl:
--------------------------------------------------------------------------------
1 | [
2 |     {
3 |         "id": 52,
4 |         "prompt": "What are the investment philosophies of Duan Yongping, Warren Buffett, and Charlie Munger?",
5 |         "article": "# Deep Research Report\n\n## Table of Contents \n- What is the investment philosophy of Duan Yongping?\n- What is the investment philosophy of Warren Buffett?\n- What is the investment philosophy of Charlie Munger?\n\n## Report \n## What are the investment philosophies of Duan Yongping, Warren Buffett, and Charlie Munger?\n\n\n\n \n ### What is the investment philosophy of Duan Yongping?\n\nDuan Yongping's investment philosophy is rooted in **value investing**, with a focus on long-term, high-conviction bets in companies he thoroughly understands. His approach is often compared to that of Warren Buffett, emphasizing simplicity and a deep understanding of business fundamentals over chasing trends.\n\n### Core Principles of Duan Yongping's Investment Philosophy:\n\n*   **Value-Driven and Long-Term Horizon:** Yongping is known for his profound value investment philosophy, seeking out high-quality assets that are undervalued by the market (https://www.binance.com/en/square/post/13966098863626). His decisions are guided by long-term impacts rather than immediate benefits, embodying a \"do the right thing\" mentality with a long-term vision (https://www.binance.com/en/square/post/18537661287098). This strategy is characterized as \"high-conviction, value-driven, long-term investing\" (https://www.gainify.io/blog/duan-yongping-portfolio).\n\n*   **Concentrated Portfolio:** Unlike investors who diversify across hundreds of stocks, Yongping prefers a concentrated portfolio, typically holding only 8-12 stocks. This reflects his high-conviction approach, where he invests significantly in a few businesses he believes in (https://www.gainify.io/blog/duan-yongping-portfolio).\n\n*   **Deep Understanding Over Chasing Trends:** A cornerstone of his philosophy is to gain a deep, essential understanding of a company's business model, products, and user needs. He advocates for selective learning and avoids blindly chasing new things or imitating trends, a crucial discipline in an era of information overload (https://www.binance.com/en/square/post/18537661287098). His business philosophy also includes being cautious about the risks and uncertainties associated with new technologies and products (https://link.springer.com/content/pdf/10.1007/978-981-95-0545-6_10.pdf).\n\n*   **Adaptability within a Value Framework:** While a disciplined value investor, Yongping demonstrates adaptability. He is willing to embrace growth opportunities, as shown by his investments in companies like Alphabet and NVIDIA, when the long-term economics are favorable. This indicates that his value framework is flexible enough to incorporate growth assets (https://www.gainify.io/blog/duan-yongping-portfolio).\n\n*   **Simplicity and Consistency:** His investment style is consistent with his life philosophy, which centers on simplifying complexities and focusing on the essence (https://www.alphaexponent.net/p/22-duan-the-dilettante, https://www.binance.com/en/square/post/18537661287098). This approach involves focusing on doing one thing well and maintaining a clear, uncluttered mindset (https://www.binance.com/en/square/post/18537661287098).\n\n \n ### What is the investment philosophy of Warren Buffett?\n\nWarren Buffett's investment philosophy is a disciplined, principle-based approach to investing that has generated significant wealth over several decades (Simply Ethical). It is rooted in the value investing school of Benjamin Graham, but Buffett has expanded upon these initial principles (Investopedia). His strategy serves as a guide for long-term investors, emphasizing wealth creation while avoiding the pitfalls of short-term speculation (Simply Ethical).\n\nThe core tenets of his philosophy include:\n\n*   **Focus on Value Investing:** At its heart, Buffett's strategy is about value investing. This involves identifying a company's intrinsic value by analyzing its business fundamentals, such as earnings, revenue, and assets (Investopedia). The goal is to purchase these companies when they are undervalued, particularly during market downturns when prices become more attractive (IIFL Capital, Investopedia).\n\n*   **Invest in Quality Businesses:** Buffett's philosophy has evolved from just buying cheap stocks to investing in \"wonderful businesses at 'fair' valuations\" (Investor.fm, HBR). He describes a good business as a \"castle\" with a protective \"moat\" that management should continuously widen. These high-quality businesses are often unique franchises that consistently generate cash (HBR). However, finding such businesses that remain good investments over time can be difficult (Investor.fm).\n\n*   **Maintain a Long-Term Perspective:** Buffett's principles are designed for long-term success. He invests in a company based on its underlying business quality, not on whether the market will soon recognize its worth (Investopedia). This approach acts as a shield against \"the most common pitfalls of speculation and short-termism\" (Simply Ethical).\n\n*   **Demand Capable and Transparent Management:** The quality and transparency of a company's management are non-negotiable elements in Buffett's approach (Investor.fm). His proposition to managers of the businesses he invests in is that if their company generates cash, they can trust him to reinvest it wisely (HBR).\n\n*   **Emphasize Simplicity:** Buffett advocates for a simple investment strategy (IIFL Capital). For investors who may not have the time or expertise to analyze individual businesses, he has recommended periodically investing in an index fund, stating that a \"know-nothing investor can actually out-perform most investment professionals\" this way (IIFL Capital).\n\n*   **Practice Patience and Emotional Discipline:** A key takeaway from Buffett's philosophy is that extraordinary results can be achieved through patience and discipline (Simply Ethical). This involves managing emotions and having the patience to wait for the right opportunities (IIFL Capital). Part of this discipline includes keeping cash reserves available to deploy when market conditions are favorable (IIFL Capital).\n\nWhile timeless, implementing Buffett's philosophy today has its challenges. Finding wonderful businesses at fair prices in the current market is difficult, and Buffett's own massive portfolio puts him at a competitive disadvantage compared to those managing smaller sums (Investor.fm). Nonetheless, his core principles offer clear guidance: focus on quality, maintain a long-term view, and keep the strategy simple (IIFL Capital).\n\n \n ### What is the investment philosophy of Charlie Munger?\n\n### The Investment Philosophy of Charlie Munger\n\nCharlie Munger, the long-time business partner of Warren Buffett, was a legendary investor whose philosophy was built on a foundation of discipline, rationality, and a deep understanding of business fundamentals. His approach can be summarized as buying great companies at fair prices and holding them for the long term.\n\n**Core Tenets of Munger's Philosophy:**\n\n*   **Focus on Quality Businesses:** The cornerstone of Munger's philosophy is that the \"real money is in great companies\" (https://www.yapss.com/blogs/collection-charlie-munger/the-real-money-is-in-great-companies-charlie-mungers-investment-philosophy-final-interview-with-cnbc-2023-c-c-m-327?srsltid=AfmBOoq4JcL0DFNuDyKkGWzph9NfpmYt43J2I7DG5QtTJwnfx6ov-5c8). His strategy prioritized quality over quantity, seeking out high-quality businesses to invest in (https://www.fool.com/investing/how-to-invest/famous-investors/charlie-munger/, https://www.yapss.com/blogs/collection-charlie-munger/the-real-money-is-in-great-companies-charlie-mungers-investment-philosophy-final-interview-with-cnbc-2023-c-c-m-327?srsltid=AfmBOoq4JcL0DFNuDyKkGWzph9NfpmYt43J2I7DG5QtTJwnfx6ov-5c8).\n\n*   **Invest within Your \"Circle of Competence\":** Munger was a staunch advocate for only investing in businesses that you can thoroughly understand. He called this principle the \"circle of competence\" and made it clear that if you don't deeply understand a business, you should not buy its stock (https://www.ruleoneinvesting.com/blog/personal-development/5-charlie-munger-investing-lessons-every-value-investor-should-live-by). This approach isn't about picking stocks, but about buying into businesses you comprehend.\n\n*   **Patience and Long-Term Holding:** Munger firmly believed in the long-term holding of quality investments (https://www.investinassets.net/p/10-lessons-from-charlie-munger). His strategy was not focused on short-term market movements but on the long-term growth and value creation of the companies in his portfolio.\n\n*   **Concentrated Portfolio:** Unlike the common advice to diversify broadly, Munger preferred a non-diversified, concentrated portfolio (https://www.fool.com/investing/how-to-invest/famous-investors/charlie-munger/). This reflects his confidence in making a few high-conviction bets on excellent companies, often suggesting a portfolio of 10-20 high-quality compounders (https://www.investinassets.net/p/10-lessons-from-charlie-munger).\n\n*   **Rationality and Discipline:** A key element of his success was an \"unmatched rationality and discipline\" that he maintained through numerous market cycles (https://www.investinassets.net/p/10-lessons-from-charlie-munger). His philosophy emphasized staying calm during market volatility and steering clear of common behavioral mistakes that trap many investors (https://www.ruleoneinvesting.com/blog/personal-development/5-charlie-munger-investing-lessons-every-value-investor-should-live-by, https://finance.yahoo.com/news/charlie-mungers-timeless-investment-advice-113042652.html).\n\n*   **Capitalizing on Market Inefficiency:** Munger's approach was rooted in capitalizing on market inefficiencies (https://finance.yahoo.com/news/charlie-mungers-timeless-investment-advice-113042652.html). This involves identifying opportunities where the market has mispriced a great business, allowing for the purchase of a quality asset at a fair price.\n\n\n## Citations\n- https://investor.fm/unpacking-buffetts-investment-philosophy-a-personal-perspective/ \n- https://www.binance.com/en/square/post/18537661287098 \n- https://www.fool.com/investing/how-to-invest/famous-investors/charlie-munger/ \n- https://www.investinassets.net/p/10-lessons-from-charlie-munger \n- https://www.ruleoneinvesting.com/blog/personal-development/5-charlie-munger-investing-lessons-every-value-investor-should-live-by \n- https://www.gainify.io/blog/duan-yongping-portfolio \n- https://www.iiflcapital.com/blog/personal-finance/lessons-from-warren-buffetts-investment-philosophy \n- https://www.binance.com/en/square/post/13966098863626 \n- https://www.yapss.com/blogs/collection-charlie-munger/the-real-money-is-in-great-companies-charlie-mungers-investment-philosophy-final-interview-with-cnbc-2023-c-c-m-327?srsltid=AfmBOoq4JcL0DFNuDyKkGWzph9NfpmYt43J2I7DG5QtTJwnfx6ov-5c8 \n- https://simplyethical.com/blog/warren-buffetts-investment-tenets/ \n- https://hbr.org/1996/01/what-i-learned-from-warren-buffett \n- https://www.investopedia.com/articles/01/071801.asp \n- https://www.alphaexponent.net/p/22-duan-the-dilettante \n- https://finance.yahoo.com/news/charlie-mungers-timeless-investment-advice-113042652.html \n- https://link.springer.com/content/pdf/10.1007/978-981-95-0545-6_10.pdf \n"
6 |     }
7 | ]


--------------------------------------------------------------------------------
/deep-research/research_output/research_report_variations/output_final_report_52_depth_1_breadth_2.md:
--------------------------------------------------------------------------------
 1 | # Deep Research Report
 2 | 
 3 | ## Table of Contents 
 4 | - "What are the core tenets and principles of Warren Buffett's and Charlie Munger's investment philosophy?",
 5 | 
 6 | ## Report 
 7 | ## What are the investment philosophies of Duan Yongping, Warren Buffett, and Charlie Munger?
 8 | 
 9 | 
10 | 
11 |  
12 |  ### "What are the core tenets and principles of Warren Buffett's and Charlie Munger's investment philosophy?",
13 | 
14 | ### The Core Tenets of Warren Buffett's and Charlie Munger's Investment Philosophy
15 | 
16 | The investment philosophy of Warren Buffett and Charlie Munger, the driving forces behind Berkshire Hathaway's monumental success, is a blend of disciplined principles that prioritize long-term value creation over short-term market speculation. Their shared approach is centered on quality, patience, rational thinking, and a deep understanding of business fundamentals.
17 | 
18 | **1. A Philosophy of Quality, Patience, and Rationality**
19 | At its heart, the Buffett-Munger investment strategy is a coherent philosophy built on three pillars: quality, patience, and rational thinking (https://www.newtraderu.com/2025/10/01/charlie-mungers-investment-strategy-8-principles-every-investor-should-know/). This approach eschews rapid trading in favor of methodical, well-reasoned investment decisions. Buffett himself has acknowledged the synergy of their views, which have been honed over a 45-year business partnership (https://jamesrobertwer.medium.com/every-entrepreneur-should-keep-these-pearls-of-wisdom-in-mind-b11a5bf1145a).
20 | 
21 | **2. Investing in High-Quality Businesses**
22 | A primary driver of Berkshire Hathaway's success is its unwavering focus on buying and investing in high-quality businesses (https://risewithdrew.com/warren-buffetts-investment-philosophy-explained/). Their strategy involves identifying companies that are undervalued but possess long-term growth potential (https://www.youtube.com/watch?v=dUgoUzxHD5Y). This means seeking out businesses with exceptional leadership and fundamental strength (https://jamesrobertwer.medium.com/every-entrepreneur-should-keep-these-pearls-of-wisdom-in-mind-b11a5bf1145a).
23 | 
24 | **3. The "Perpetuity" Holding Period**
25 | A defining characteristic of their philosophy is an exceptionally long-term investment horizon. Munger has echoed Buffett's sentiment that "our preferred duration of ownership is perpetuity" (https://jamesrobertwer.medium.com/every-entrepreneur-should-keep-these-pearls-of-wisdom-in-mind-b11a5bf1145a). This principle is not absolute; they are prepared to sell if they lose confidence in a company's stability. However, the prerequisite for this long-term retention is that the core business and its management demonstrate exceptional and sustained prowess (https://jamesrobertwer.medium.com/every-entrepreneur-should-keep-these-pearls-of-wisdom-in-mind-b11a5bf1145a).
26 | 
27 | **4. "Extreme Patience and Extreme Decisiveness"**
28 | Charlie Munger has famously described their investment strategy as "extreme patience combined with extreme decisiveness" (https://risewithdrew.com/warren-buffetts-investment-philosophy-explained/). They do not feel compelled to act constantly. Instead, they wait for what Munger calls "no-brainer decisions," opportunities where the value proposition is overwhelmingly clear.
29 | 
30 | **5. Concentrated Capital Allocation**
31 | Contrary to the common wisdom of broad diversification, Buffett and Munger advocate for a more concentrated approach. Munger argues that good investment ideas are rare, and therefore, "it makes sense to load up on the very few good insights you have instead of pretending to know everything about everything at all times" (https://risewithdrew.com/warren-buffetts-investment-philosophy-explained/). Buffett reinforces this by stating that such a disciplined approach forces an investor to think very carefully about their decisions and to invest heavily in their most well-reasoned ideas (https://risewithdrew.com/warren-buffetts-investment-philosophy-explained/). For them, the wise allocation of capital is the investor's primary job (https://www.businessinsider.com/charlie-munger-investing-success-from-mental-models-amp-checklists-2012-2).
32 | 
33 | 
34 | ## Citations
35 | - https://www.businessinsider.com/charlie-munger-investing-success-from-mental-models-amp-checklists-2012-2 
36 | - https://risewithdrew.com/warren-buffetts-investment-philosophy-explained/ 
37 | - https://www.newtraderu.com/2025/10/01/charlie-mungers-investment-strategy-8-principles-every-investor-should-know/ 
38 | - https://www.youtube.com/watch?v=dUgoUzxHD5Y 
39 | - https://jamesrobertwer.medium.com/every-entrepreneur-should-keep-these-pearls-of-wisdom-in-mind-b11a5bf1145a 
40 | 
41 | 
42 | # Scores
43 |  - Comprehensiveness: 0.1408
44 |  - Insight: 0.1224
45 |  - Instruction Following: 0.2248
46 |  - Readability: 0.3373
47 |  - Overall Score: 0.1805


--------------------------------------------------------------------------------
/deep-research/research_output/research_report_variations/variation_metric.txt:
--------------------------------------------------------------------------------
 1 | 1. Depth = 1 and Breadth = 2
 2 |     - No. of Sub-Topics: 1
 3 |     - Size of Report: 4.5 Kb
 4 |     - Scores:  
 5 | 
 6 | 2. Depth = 2 and Breadth = 3
 7 |     - No. of Sub-Topics: 3
 8 |     - Size of Report: 11.3 Kb
 9 |     - Scores:  
10 | 
11 | 3. Depth = 2 and Breadth = 5
12 |     - No. of Sub-Topics: 13 (5 * 3 * 1)
13 |     - Size of Report: 74.5Kb
14 |     - Scores:  


--------------------------------------------------------------------------------
/few-shot-prompting/agent.py:
--------------------------------------------------------------------------------
  1 | import json
  2 | from langchain_community.document_loaders import WebBaseLoader
  3 | from langchain_core.messages import AIMessage, HumanMessage, SystemMessage
  4 | from langchain_openai import ChatOpenAI
  5 | from typing_extensions import TypedDict, Annotated
  6 | from langgraph.graph import START, StateGraph, END
  7 | 
  8 | from dotenv import load_dotenv
  9 | 
 10 | MAX_FEW_SHOT_EXAMPLES = 3
 11 | 
 12 | BASE_PROMPT = '''You are Saurav Prateek. 
 13 | You post highlighting information related to technical concepts on Linkedin.
 14 | Your field of interests are Generative AI and Software Engineering.
 15 | You use emojis. You use exclamation points but are not overly enthusiastic. 
 16 | You are not overly formal.
 17 | You are not "salesy". You are nice.
 18 | 
 19 | When given an article, write a summarized linkedin post about it. 
 20 | Make it relevant and specific to the article at hand.
 21 | 
 22 | Pay attention to the examples below. These are good examples. 
 23 | Generate future summarized posts in the style of the posts below.
 24 | '''
 25 | 
 26 | class SharedState(TypedDict):
 27 |     post_url: str
 28 |     post_content: str
 29 |     few_shot_examples: list
 30 |     linkedin_post_content: str
 31 | 
 32 | 
 33 | def get_article_content_for_post(shared_state: SharedState):
 34 |     post_content = get_content_from_url(shared_state['post_url'])
 35 |     shared_state['post_content'] = post_content
 36 |     return shared_state
 37 | 
 38 | 
 39 | def prepare_few_shot_data(shared_state: SharedState):
 40 |     print("Running prepare_few_shot_data...")
 41 |     linkedin_posts = get_linkedin_posts()
 42 |     few_shot_examples = []
 43 | 
 44 |     for linkedin_post in linkedin_posts:
 45 |         newsletter_article_url = linkedin_post['url']
 46 |         newsletter_article_content = get_content_from_url(newsletter_article_url)
 47 |         newsletter_linkedin_post = linkedin_post['post_content']
 48 |         
 49 |         few_shot_examples.append({
 50 |             'article_content': newsletter_article_content,
 51 |             'post_content': newsletter_linkedin_post
 52 |         })
 53 |     
 54 |     shared_state['few_shot_examples'] = few_shot_examples[:MAX_FEW_SHOT_EXAMPLES]
 55 |     return shared_state
 56 | 
 57 | 
 58 | def summarize_linkedin_post(shared_state: SharedState):
 59 |     print("Summarizing Linkedin Post for you...")
 60 |     messages = [SystemMessage(content=BASE_PROMPT)]
 61 | 
 62 |     for few_shot_example in shared_state['few_shot_examples']:
 63 |         messages.append(HumanMessage(content=few_shot_example['article_content']))
 64 |         messages.append(AIMessage(content=few_shot_example['post_content']))
 65 |     
 66 |     messages.append(HumanMessage(content=shared_state['post_content']))
 67 | 
 68 |     model = ChatOpenAI(model="gpt-3.5-turbo", temperature=0.0)
 69 |     response = model.invoke(messages)
 70 | 
 71 |     shared_state['linkedin_post_content'] = response.content.strip() if response else "No summary available."
 72 | 
 73 |     return shared_state
 74 | 
 75 | 
 76 | def get_linkedin_posts():
 77 |     linkedin_posts = open('./dataset/linkedin-posts.json', 'r')
 78 |     post_json_data = linkedin_posts.read()
 79 |     return json.loads(post_json_data)['examples']
 80 | 
 81 | 
 82 | def get_content_from_url(url:str):
 83 |     documents = WebBaseLoader(url).load()
 84 |     page_content = ''
 85 | 
 86 |     for document in documents:
 87 |         page_content += document.page_content
 88 |     
 89 |     return page_content.strip()
 90 | 
 91 | 
 92 | def build_graph():
 93 |     load_dotenv()
 94 |     # Building a Graph
 95 |     # State of the Graph that will be shared among nodes.
 96 |     workflow = StateGraph(SharedState)
 97 | 
 98 |     workflow.add_node("get_article_content_for_post", get_article_content_for_post)
 99 |     workflow.add_node("prepare_few_shot_data", prepare_few_shot_data)
100 |     workflow.add_node("summarize_linkedin_post", summarize_linkedin_post)
101 | 
102 |     workflow.add_edge(START, "get_article_content_for_post",)
103 |     workflow.add_edge("get_article_content_for_post", "prepare_few_shot_data")
104 |     workflow.add_edge("prepare_few_shot_data", "summarize_linkedin_post")
105 |     workflow.add_edge("summarize_linkedin_post", END)
106 | 
107 |     graph = workflow.compile()
108 | 
109 |     response = graph.invoke({
110 |         "post_url": "https://www.linkedin.com/pulse/parallel-execution-nodes-langgraph-enhancing-your-graph-prateek-qqwrc/"
111 |     })
112 | 
113 |     return response
114 | 
115 | 
116 | state = build_graph()
117 | print("Summarized Linkedin Post: \n")
118 | print(state['linkedin_post_content'])


--------------------------------------------------------------------------------
/few-shot-prompting/dataset/linkedin-posts.json:
--------------------------------------------------------------------------------
 1 | {
 2 |     "name": "Training dataset for LangPost",
 3 |     "examples": [
 4 |         {
 5 |             "url": "https://www.linkedin.com/pulse/tool-calling-langchain-do-more-your-ai-agents-saurav-prateek-so20c",
 6 |             "post_content": "In this edition we discussed how the AI agents can perform a Tool call with LangChain. We defined our tools and further bounded them with the LLM model and saw them in action. 🔥 ⚡ Read On! ☀ "
 7 |         },
 8 |         {
 9 |             "url": "https://www.linkedin.com/pulse/evaluating-our-retrieval-augmented-generation-rag-saurav-prateek-7fksc",
10 |             "post_content": "Let's evaluate our RAG framework's performance in LangChain. 🔥 ⚡ In this edition we build a Performance Grader that evaluates the performance of our RAG framework. It checks whether the answer generated by the RAG was able to solve for the Human Question or not. 💯 Read On! ☀ "
11 |         },
12 |         {
13 |             "url": "https://www.linkedin.com/pulse/hallucination-our-retrieval-augmented-generation-rag-saurav-prateek-uuauc",
14 |             "post_content": "Let's evaluate our RAG framework for Hallucination! 🚀 In this article we have implemented a Hallucination Grader node that evaluates our RAG framework by assessing the Human Question and the external Knowledge Base. ⚡ 💯 Read On! 💚 "
15 |         },
16 |         {
17 |             "url": "https://www.linkedin.com/pulse/facade-pattern-simplifying-your-design-complexity-saurav-prateek-j0enc",
18 |             "post_content": "We discussed the concept of “Facade Pattern”. This is Part-7 in the Low Level Design Pattern series which I am covering under my newsletter. 🔥 I have covered the Code Implementation of this design pattern from scratch as well. Do check-out the full code implementation of Facade Pattern on my Github repo. ⚡ Link to the repo is in the comments. 🔗 Read On!"
19 |         },
20 |         {
21 |             "url": "https://www.linkedin.com/pulse/strategy-pattern-flexible-architecture-saurav-prateek-xmbwc",
22 |             "post_content": "We discussed the concept of “Strategy Pattern”. This is Part 6 in the Low Level Design Pattern series which I am covering under my newsletter. 🔥 I have covered the Code Implementation of this design pattern from scratch as well. Do check-out the full code implementation of Strategy Pattern on my Github repo. 💯 Link to the repo is in the comments. 🔗 Read On!"
23 |         },
24 |         {
25 |             "url": "https://www.linkedin.com/pulse/graph-database-trying-out-neo4j-saurav-prateek-vlgzc",
26 |             "post_content": "✅ We explored Neo4J a Graph Database that stores nodes and relationships instead of classic tables or documents. ✅ We also explored Cypher, a Neo4J’s graph query language that lets us retrieve data from the graph. Read On! 💯 "
27 |         },
28 |         {
29 |             "url": "https://www.linkedin.com/pulse/bloom-filters-probabilistic-data-structure-saurav-prateek-d8u9c",
30 |             "post_content": "We discussed Bloom Filters a probabilistic data structure in depth. This edition involve: Implementation of Standard Bloom Filters. Inserting an element into the Bloom Filters and Membership Queries. Code demonstration of the insertion and membership-query methods. Read On! 💯 "
31 |         }
32 |     ]
33 | }


--------------------------------------------------------------------------------
/gdg-agent-ai/notebook1.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 6,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "from langchain_openai import ChatOpenAI\n",
 10 |     "from dotenv import load_dotenv"
 11 |    ]
 12 |   },
 13 |   {
 14 |    "cell_type": "code",
 15 |    "execution_count": 7,
 16 |    "metadata": {},
 17 |    "outputs": [
 18 |     {
 19 |      "data": {
 20 |       "text/plain": [
 21 |        "True"
 22 |       ]
 23 |      },
 24 |      "execution_count": 7,
 25 |      "metadata": {},
 26 |      "output_type": "execute_result"
 27 |     }
 28 |    ],
 29 |    "source": [
 30 |     "load_dotenv()"
 31 |    ]
 32 |   },
 33 |   {
 34 |    "cell_type": "code",
 35 |    "execution_count": 8,
 36 |    "metadata": {},
 37 |    "outputs": [],
 38 |    "source": [
 39 |     "MODEL_NAME = \"gpt-5\"\n",
 40 |     "\n",
 41 |     "# A Node in LangGraph\n",
 42 |     "def node(query):\n",
 43 |     "    model = ChatOpenAI(model=MODEL_NAME)\n",
 44 |     "    return model.invoke(query)\n"
 45 |    ]
 46 |   },
 47 |   {
 48 |    "cell_type": "code",
 49 |    "execution_count": 9,
 50 |    "metadata": {},
 51 |    "outputs": [
 52 |     {
 53 |      "name": "stdout",
 54 |      "output_type": "stream",
 55 |      "text": [
 56 |       "content='Large Language Models (LLMs) are neural networks trained on vast text to predict the next token. Using the transformer architecture, they learn statistical patterns of language and world knowledge. After pretraining, they can be prompted to generate, summarize, translate, code, and answer questions, often with few-shot examples. They rely on context windows, not memory, and may produce plausible but false outputs, reflect bias, or be sensitive to prompts. Fine-tuning and tools can specialize or extend them.' additional_kwargs={'refusal': None} response_metadata={'token_usage': {'completion_tokens': 492, 'prompt_tokens': 16, 'total_tokens': 508, 'completion_tokens_details': {'accepted_prediction_tokens': 0, 'audio_tokens': 0, 'reasoning_tokens': 384, 'rejected_prediction_tokens': 0}, 'prompt_tokens_details': {'audio_tokens': 0, 'cached_tokens': 0}}, 'model_name': 'gpt-5-2025-08-07', 'system_fingerprint': None, 'id': 'chatcmpl-CHlpNKZq5eXQTq0nRux9bRlBHssmZ', 'service_tier': 'default', 'finish_reason': 'stop', 'logprobs': None} id='run--ec564e8d-6ad5-41ce-a1e1-659f99b8f720-0' usage_metadata={'input_tokens': 16, 'output_tokens': 492, 'total_tokens': 508, 'input_token_details': {'audio': 0, 'cache_read': 0}, 'output_token_details': {'audio': 0, 'reasoning': 384}}\n"
 57 |      ]
 58 |     }
 59 |    ],
 60 |    "source": [
 61 |     "response = node('What are Large Language Models under 100 words?')\n",
 62 |     "print(response)"
 63 |    ]
 64 |   },
 65 |   {
 66 |    "cell_type": "code",
 67 |    "execution_count": 10,
 68 |    "metadata": {},
 69 |    "outputs": [
 70 |     {
 71 |      "name": "stdout",
 72 |      "output_type": "stream",
 73 |      "text": [
 74 |       "Large Language Models (LLMs) are neural networks trained on vast text to predict the next token. Using the transformer architecture, they learn statistical patterns of language and world knowledge. After pretraining, they can be prompted to generate, summarize, translate, code, and answer questions, often with few-shot examples. They rely on context windows, not memory, and may produce plausible but false outputs, reflect bias, or be sensitive to prompts. Fine-tuning and tools can specialize or extend them.\n"
 75 |      ]
 76 |     }
 77 |    ],
 78 |    "source": [
 79 |     "print(response.content)"
 80 |    ]
 81 |   }
 82 |  ],
 83 |  "metadata": {
 84 |   "kernelspec": {
 85 |    "display_name": "saurav-env",
 86 |    "language": "python",
 87 |    "name": "python3"
 88 |   },
 89 |   "language_info": {
 90 |    "codemirror_mode": {
 91 |     "name": "ipython",
 92 |     "version": 3
 93 |    },
 94 |    "file_extension": ".py",
 95 |    "mimetype": "text/x-python",
 96 |    "name": "python",
 97 |    "nbconvert_exporter": "python",
 98 |    "pygments_lexer": "ipython3",
 99 |    "version": "3.13.1"
100 |   }
101 |  },
102 |  "nbformat": 4,
103 |  "nbformat_minor": 2
104 | }
105 | 


--------------------------------------------------------------------------------
/gdg-agent-ai/notebook2.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 23,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "from langchain_openai import ChatOpenAI\n",
 10 |     "from dotenv import load_dotenv\n",
 11 |     "from typing_extensions import TypedDict, Annotated\n",
 12 |     "from langgraph.graph import START, StateGraph, END\n",
 13 |     "from langchain_community.document_loaders import WebBaseLoader\n"
 14 |    ]
 15 |   },
 16 |   {
 17 |    "cell_type": "code",
 18 |    "execution_count": 24,
 19 |    "metadata": {},
 20 |    "outputs": [],
 21 |    "source": [
 22 |     "class SharedState(TypedDict):\n",
 23 |     "    github_handle: str\n",
 24 |     "    github_profile_content: str\n",
 25 |     "    github_profile_summary: str\n",
 26 |     "    model: ChatOpenAI"
 27 |    ]
 28 |   },
 29 |   {
 30 |    "cell_type": "code",
 31 |    "execution_count": 17,
 32 |    "metadata": {},
 33 |    "outputs": [],
 34 |    "source": [
 35 |     "def build_model(shared_state: SharedState) -> SharedState:\n",
 36 |     "    model = ChatOpenAI(model= \"gpt-5\")\n",
 37 |     "    shared_state['model'] = model\n",
 38 |     "\n",
 39 |     "    return shared_state"
 40 |    ]
 41 |   },
 42 |   {
 43 |    "cell_type": "code",
 44 |    "execution_count": 26,
 45 |    "metadata": {},
 46 |    "outputs": [],
 47 |    "source": [
 48 |     "def pull_github_profile_content(shared_state: SharedState) -> SharedState:\n",
 49 |     "    '''Pulls the GitHub profile content for the given GitHub handle.'''\n",
 50 |     "    print('Pulling GitHub profile content...')\n",
 51 |     "    github_profile_url = 'https://www.github.com/' + shared_state['github_handle']\n",
 52 |     "    documents = WebBaseLoader(github_profile_url).load()\n",
 53 |     "    page_content = ''\n",
 54 |     "\n",
 55 |     "    for document in documents:\n",
 56 |     "        page_content += document.page_content\n",
 57 |     "    \n",
 58 |     "    shared_state['github_profile_content'] = page_content.strip()\n",
 59 |     "    return shared_state"
 60 |    ]
 61 |   },
 62 |   {
 63 |    "cell_type": "code",
 64 |    "execution_count": 27,
 65 |    "metadata": {},
 66 |    "outputs": [],
 67 |    "source": [
 68 |     "def summarize_github_profile(shared_state: SharedState) -> SharedState:\n",
 69 |     "    '''Summarizes the GitHub profile content.'''\n",
 70 |     "    print('Summarizing GitHub profile content...')\n",
 71 |     "    model = shared_state['model']\n",
 72 |     "    github_profile_content = shared_state['github_profile_content']\n",
 73 |     "\n",
 74 |     "    prompt = f'''Summarize the following GitHub profile content in a concise manner\n",
 75 |     "    and highlight name, organization, followers, location, contact information key skills, \n",
 76 |     "    projects, and contributions.\n",
 77 |     "\n",
 78 |     "    Github Profile Content: {github_profile_content}\n",
 79 |     "    '''\n",
 80 |     "    response = model.invoke(prompt)\n",
 81 |     "    shared_state['github_profile_summary'] = response.content.strip()\n",
 82 |     "\n",
 83 |     "    return shared_state\n"
 84 |    ]
 85 |   },
 86 |   {
 87 |    "cell_type": "code",
 88 |    "execution_count": 28,
 89 |    "metadata": {},
 90 |    "outputs": [],
 91 |    "source": [
 92 |     "def build_serial_graph():\n",
 93 |     "  # Building a Graph\n",
 94 |     "  # State of the Graph that will be shared among nodes.\n",
 95 |     "  workflow = StateGraph(SharedState)\n",
 96 |     "\n",
 97 |     "  # Add nodes.\n",
 98 |     "  workflow.add_node(\"build_model\", build_model)\n",
 99 |     "  workflow.add_node(\"pull_github_profile_content\", pull_github_profile_content)\n",
100 |     "  workflow.add_node(\"summarize_github_profile\", summarize_github_profile)\n",
101 |     "\n",
102 |     "  # Define the edges of the graph.\n",
103 |     "  workflow.add_edge(START, \"build_model\")\n",
104 |     "  workflow.add_edge(\"build_model\", \"pull_github_profile_content\")\n",
105 |     "  workflow.add_edge(\"pull_github_profile_content\", \"summarize_github_profile\")\n",
106 |     "  workflow.add_edge(\"summarize_github_profile\", END)\n",
107 |     "\n",
108 |     "  graph = workflow.compile()\n",
109 |     "  \n",
110 |     "  return graph"
111 |    ]
112 |   },
113 |   {
114 |    "cell_type": "code",
115 |    "execution_count": 29,
116 |    "metadata": {},
117 |    "outputs": [
118 |     {
119 |      "name": "stdout",
120 |      "output_type": "stream",
121 |      "text": [
122 |       "Pulling GitHub profile content...\n",
123 |       "Summarizing GitHub profile content...\n",
124 |       "- Name: Saurav Prateek (username: SauravP97)\n",
125 |       "- Organization/Role: Web Solutions Engineer II at Google (gTech); ex-Software Engineer at GeeksForGeeks\n",
126 |       "- Followers: 163 (following 10)\n",
127 |       "- Location: Gurugram, India\n",
128 |       "- Contact: LinkedIn – https://www.linkedin.com/in/saurav-prateek-7b2096140/\n",
129 |       "- Tagline: “Practicing the subtle art of not giving a bug!”\n",
130 |       "- Key skills and interests: Web solutions engineering; Java; TypeScript; Python/Jupyter; AI/ML; LLMs; LangChain; RAG; agentic workflows; distributed systems; low-level design; data structures & algorithms; content/community building\n",
131 |       "- Notable projects (pinned):\n",
132 |       "  - Saurav-Low-Level-Design-Template (Java) — 121★, 35 forks\n",
133 |       "  - Saurav-s-DSA-Templates (Java) — 51★, 31 forks\n",
134 |       "  - LangPost (TypeScript) — 39★, 4 forks — Agent that summarizes LinkedIn articles into post content\n",
135 |       "  - AI-Engineering-101 (Jupyter Notebook) — 29★, 4 forks — Collection of agentic workflows\n",
136 |       "  - micrograd-java (Jupyter Notebook) — 9★, 3 forks — Deep neural network from scratch in Java\n",
137 |       "  - makemore (Jupyter Notebook) — 1★, 1 fork — Character-level language model\n",
138 |       "- Contributions/Activity:\n",
139 |       "  - 54 repositories; 0 projects; 0 packages; 18 starred repositories\n",
140 |       "  - Creator of “Systems That Scale” engineering newsletter (30,000+ subscribers)\n",
141 |       "  - YouTube content: “Distributed Systems 101” series; “RAG and LangChain – Building Multi-agent workflows” playlist\n"
142 |      ]
143 |     }
144 |    ],
145 |    "source": [
146 |     "load_dotenv()\n",
147 |     "\n",
148 |     "compiled_graph = build_serial_graph()\n",
149 |     "response = compiled_graph.invoke({\n",
150 |     "    'github_handle': 'SauravP97'\n",
151 |     "})\n",
152 |     "\n",
153 |     "print(response['github_profile_summary'])"
154 |    ]
155 |   },
156 |   {
157 |    "cell_type": "code",
158 |    "execution_count": null,
159 |    "metadata": {},
160 |    "outputs": [],
161 |    "source": [
162 |     "print(build_serial_graph().get_graph().draw_mermaid())"
163 |    ]
164 |   }
165 |  ],
166 |  "metadata": {
167 |   "kernelspec": {
168 |    "display_name": "saurav-env",
169 |    "language": "python",
170 |    "name": "python3"
171 |   },
172 |   "language_info": {
173 |    "codemirror_mode": {
174 |     "name": "ipython",
175 |     "version": 3
176 |    },
177 |    "file_extension": ".py",
178 |    "mimetype": "text/x-python",
179 |    "name": "python",
180 |    "nbconvert_exporter": "python",
181 |    "pygments_lexer": "ipython3",
182 |    "version": "3.13.1"
183 |   }
184 |  },
185 |  "nbformat": 4,
186 |  "nbformat_minor": 2
187 | }
188 | 


--------------------------------------------------------------------------------
/google-job-search-agent/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SauravP97/AI-Engineering-101/69b0c825ea2cfc9a43696192230878f9ee3f1dd3/google-job-search-agent/.DS_Store


--------------------------------------------------------------------------------
/google-job-search-agent/__pycache__/prompts.cpython-313.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SauravP97/AI-Engineering-101/69b0c825ea2cfc9a43696192230878f9ee3f1dd3/google-job-search-agent/__pycache__/prompts.cpython-313.pyc


--------------------------------------------------------------------------------
/google-job-search-agent/agent_graph.py:
--------------------------------------------------------------------------------
  1 | from langchain_community.document_loaders import WebBaseLoader
  2 | from bs4 import BeautifulSoup as Soup
  3 | from langchain_community.document_loaders.recursive_url_loader import RecursiveUrlLoader
  4 | from langchain_huggingface import ChatHuggingFace, HuggingFaceEndpoint, HuggingFacePipeline
  5 | from langchain_core.output_parsers import StrOutputParser
  6 | from langchain import hub
  7 | from langchain_openai import ChatOpenAI
  8 | from langchain_core.prompts import ChatPromptTemplate
  9 | from langchain_community.document_loaders import PyPDFLoader
 10 | 
 11 | from dotenv import load_dotenv
 12 | 
 13 | from prompts import RESUME_ANALYSIS_PROMPT, RELATABLE_JOB_ROLES_PROMPT
 14 | 
 15 | import requests
 16 | 
 17 | 
 18 | BASE_URL = 'https://www.google.com/about/careers/applications/jobs/results?q=%22software%20engineer%22&employment_type=FULL_TIME&company=Google&location=India'
 19 | 
 20 | GOOGLE_JOB_LINK_PREFIX = 'https://www.google.com/about/careers/applications/'
 21 | FETCHED_GOOGLE_JOB_URL_PREFIX = 'jobs/results/'
 22 | 
 23 | MINIMUM_QUALIFICATION_QUESTION = 'What are the Minimum Qualifications in the job?'
 24 | PREFERRED_QUALIFICATION_QUESTION = 'What are the Preferred Qualifications in the job?'
 25 | JOB_RESPONSIBILITIES_QUESTION = 'What are the Responsibilities in the job?'
 26 | 
 27 | MAX_ROLES = 3
 28 | 
 29 | ########## Helper Methods Start ##########
 30 | 
 31 | def get_all_urls(base_url, page=1):
 32 |     try:
 33 |         # Fetch the HTML content from the URL
 34 |         response = requests.get(base_url + f'&page={page}' + f'&target_level=MID')
 35 |         response.raise_for_status()  # This will raise an exception for bad responses (4xx or 5xx)
 36 | 
 37 |         # Parse the HTML content
 38 |         soup = Soup(response.text, 'html.parser')
 39 | 
 40 |         # Find all 'a' tags (anchor tags) which contain links
 41 |         links = soup.find_all('a')
 42 | 
 43 |         # Store the URLs in a list
 44 |         all_urls = []
 45 |         for link in links:
 46 |             href = link.get('href')  # Get the value of the 'href' attribute
 47 |             if href:
 48 |                 all_urls.append(href)
 49 | 
 50 |         return all_urls
 51 | 
 52 |     except requests.exceptions.RequestException as e:
 53 |         print(f"An error occurred: {e}")
 54 |         return []
 55 | 
 56 | 
 57 | def get_google_job_urls(urls):
 58 |     google_job_urls = []
 59 |     
 60 |     for url in urls:
 61 |         if url.startswith(FETCHED_GOOGLE_JOB_URL_PREFIX):
 62 |             google_job_urls.append(GOOGLE_JOB_LINK_PREFIX + url)
 63 |     
 64 |     return google_job_urls
 65 | 
 66 | 
 67 | def get_url_content(urls):
 68 |     return WebBaseLoader(urls, requests_per_second=10).load()
 69 | 
 70 | ########## Helper Methods End ##########
 71 | 
 72 | def get_google_jobs_content():
 73 |     page = 1
 74 |     google_job_urls = []
 75 | 
 76 |     while(True):
 77 |         print(f'Extracting Google open Roles. Page: {page}')
 78 |         urls = get_all_urls(BASE_URL, page)
 79 |         current_page_google_job_urls = get_google_job_urls(urls)
 80 |         google_job_urls.extend(current_page_google_job_urls)
 81 |         page += 1
 82 | 
 83 |         if len(current_page_google_job_urls) == 0:
 84 |             print('\n\n')
 85 |             break
 86 | 
 87 |     print(f"Crunching Job Content...")
 88 |     contents = get_url_content(google_job_urls)
 89 |     google_jobs_content = [
 90 |         {
 91 |             'link': content.metadata['source'], 
 92 |             'title': content.metadata['title'],
 93 |             'content': content.page_content,
 94 |         } 
 95 |         for content in contents
 96 |     ]
 97 | 
 98 |     return google_jobs_content
 99 | 
100 | 
101 | def get_model(load_from_hugging_face=False):
102 |     if load_from_hugging_face:
103 |         llm = HuggingFaceEndpoint(
104 |             repo_id="openai/gpt-oss-120b",
105 |             task="text-generation",
106 |             provider="auto",  # set your provider here
107 |         )
108 | 
109 |         return ChatHuggingFace(llm=llm)
110 |     
111 |     return ChatOpenAI(model="gpt-4", temperature=0.0)
112 | 
113 | 
114 | def get_resume_content():
115 |     loader = PyPDFLoader('knowledge_base/muskan-resume.pdf')
116 |     pages = []
117 | 
118 |     for page in loader.lazy_load():
119 |         pages.append(page)
120 |     
121 |     page_content = [page.page_content for page in pages]
122 |     return '\n'.join(page_content)
123 | 
124 | 
125 | def get_related_job_roles(open_roles, resume_content):
126 |     model = get_model(load_from_hugging_face=False)
127 |     job_roles = ', '.join([open_role['title'] for open_role in open_roles])
128 | 
129 |     grade_prompt = ChatPromptTemplate.from_messages(
130 |         [
131 |             ("system", RELATABLE_JOB_ROLES_PROMPT),
132 |             ("human", "Job Roles: \n\n {job_roles} \n\n Candidate's resume: {resume}"),
133 |         ]
134 |     )
135 | 
136 |     grade_chain = grade_prompt | model | StrOutputParser()
137 |     response = grade_chain.invoke({"job_roles": job_roles, "resume": resume_content})
138 | 
139 |     return get_recommended_role_profiles(response.split('\n'), open_roles)
140 | 
141 | 
142 | def get_recommended_role_profiles(recommended_role_titles, open_roles):
143 |     print(f"\n\n Recommended Role Titles: {recommended_role_titles}")
144 |     recommended_role_profiles = []
145 |     recommended_role_titles = [
146 |         recommended_role_title.strip().lower() 
147 |         for recommended_role_title in recommended_role_titles
148 |     ]
149 |     
150 |     # print('Recommended Role Titles:', recommended_role_titles)
151 |     for open_role in open_roles:
152 |         # print('Checking Open Role:', open_role['title'])
153 |         if open_role['title'].strip().lower() in recommended_role_titles:
154 |             recommended_role_profiles.append(open_role)
155 | 
156 |     return recommended_role_profiles
157 | 
158 | 
159 | def get_minimum_qualifications(job_profile_content):
160 |     prompt = hub.pull("rlm/rag-prompt")
161 |     llm = get_model(load_from_hugging_face=False)
162 |     rag_chain = prompt | llm | StrOutputParser()
163 |     
164 |     response = rag_chain.invoke(
165 |         {"context": job_profile_content, "question": MINIMUM_QUALIFICATION_QUESTION})
166 | 
167 |     return response
168 | 
169 | 
170 | def get_preferred_qualification(job_profile_content):
171 |     prompt = hub.pull("rlm/rag-prompt")
172 |     llm = get_model(load_from_hugging_face=False)
173 |     rag_chain = prompt | llm | StrOutputParser()
174 |     
175 |     response = rag_chain.invoke(
176 |         {"context": job_profile_content, "question": PREFERRED_QUALIFICATION_QUESTION})
177 | 
178 |     return response
179 | 
180 | 
181 | def get_job_responsibilities(job_profile_content):
182 |     prompt = hub.pull("rlm/rag-prompt")
183 |     llm = get_model(load_from_hugging_face=False)
184 |     rag_chain = prompt | llm | StrOutputParser()
185 |     
186 |     response = rag_chain.invoke(
187 |         {"context": job_profile_content, "question": JOB_RESPONSIBILITIES_QUESTION})
188 | 
189 |     return response
190 | 
191 | 
192 | def analyze_recommended_roles(recommended_roles, resume_content):
193 |     reports = []
194 | 
195 |     for recommended_role in recommended_roles[:MAX_ROLES]:
196 |         print(f"\n\n Analyzing your Resume for: {recommended_role['title']} ...")
197 | 
198 |         model = get_model(load_from_hugging_face=False)
199 |         min_qual = get_minimum_qualifications(recommended_role['content'])
200 |         preferred_qual = get_preferred_qualification(recommended_role['content'])
201 |         responsibilities = get_job_responsibilities(recommended_role['content'])
202 | 
203 |         grade_prompt = ChatPromptTemplate.from_messages(
204 |             [
205 |                 ("system", RESUME_ANALYSIS_PROMPT),
206 |                 ("human", """
207 |                     Minimum Qualification: {min_qual} \n\n 
208 |                     Preferred Qualification: {preferred_qual} \n\n 
209 |                     Role Responsibilities: {responsibilities} \n\n
210 |                     Candidate's resume: {resume}"""),
211 |             ]
212 |         )
213 | 
214 |         grade_chain = grade_prompt | model | StrOutputParser()
215 |         response = grade_chain.invoke(
216 |             {
217 |                 "min_qual": min_qual, 
218 |                 "preferred_qual": preferred_qual,
219 |                 "responsibilities": responsibilities,
220 |                 "resume": resume_content
221 |             }
222 |         )
223 | 
224 |         reports.append({
225 |             'title': recommended_role['title'],
226 |             'link': recommended_role['link'],
227 |             'report': response
228 |         })
229 | 
230 |     return reports
231 | 
232 | 
233 | load_dotenv()
234 | 
235 | google_jobs_content = get_google_jobs_content()
236 | resume_content = get_resume_content()
237 | 
238 | related_job_roles = get_related_job_roles(google_jobs_content, resume_content)
239 | print(f"\n\n Recommended Job Roles: {[related_job_role['title'] for related_job_role in related_job_roles]}")
240 | 
241 | if len(related_job_roles) > 0:
242 |     generated_reports = analyze_recommended_roles(related_job_roles, resume_content)
243 |     print('\n\n Report generated!')
244 |     final_report = '# Google SWE Job Role Report (India) \n'
245 |     final_report += f'## This report is generated after analyzing on your Resume for the top {MAX_ROLES} most suited open Google job roles.\n\n'
246 |     final_report += '---\n\n'
247 | 
248 |     for generated_report in generated_reports:
249 |         final_report += f"## [{generated_report['title']}]({generated_report['link']})\n\n"
250 |         final_report += f"{generated_report['report']}\n\n"
251 |         final_report += '---\n\n'
252 |     
253 |     with open("report/Report.md", "w") as file:
254 |         file.write(final_report)
255 | 
256 |     print('\n Report saved!')
257 | else:
258 |     print('\n No related job roles found based on the resume content!')


--------------------------------------------------------------------------------
/google-job-search-agent/knowledge_base/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SauravP97/AI-Engineering-101/69b0c825ea2cfc9a43696192230878f9ee3f1dd3/google-job-search-agent/knowledge_base/.DS_Store


--------------------------------------------------------------------------------
/google-job-search-agent/knowledge_base/muskan-resume.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SauravP97/AI-Engineering-101/69b0c825ea2cfc9a43696192230878f9ee3f1dd3/google-job-search-agent/knowledge_base/muskan-resume.pdf


--------------------------------------------------------------------------------
/google-job-search-agent/knowledge_base/resume.pdf:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SauravP97/AI-Engineering-101/69b0c825ea2cfc9a43696192230878f9ee3f1dd3/google-job-search-agent/knowledge_base/resume.pdf


--------------------------------------------------------------------------------
/google-job-search-agent/prompts.py:
--------------------------------------------------------------------------------
 1 | RESUME_ANALYSIS_PROMPT = """
 2 |     You are an expert in coaching new candidates for cracking interviews at Google. \n 
 3 |     You have been provided with the Minimum Qualification, Preferred Qualification and Responsibilities required for the role and the candidate's resume content. \n
 4 |     
 5 |     Please provide the candidate with the detailed report on the following:
 6 |       1. Does the candidate meet the minimum and preferred qualifications?
 7 |       2. If not, what are the gaps?
 8 |       3. How can the candidate improve to meet the minimum qualifications?
 9 |       4. How can the candidate improve to meet the preferred qualifications?
10 |       5. How well does the candidate's resume align with the job responsibilities?
11 |       6. Some actionable feedback for the candidate to improve their resume based on the job requirements.
12 |       7. Additional Resources or skills the candidate should focus on to improve their chances of getting hired for this role.
13 | 
14 |     Please provide the feedback in a markdown format. You can use tables for clarity. \n
15 |     Please keep the response within 2000 words.
16 | """
17 | 
18 | RELATABLE_JOB_ROLES_PROMPT = """
19 |     You are an expert in coaching new candidates for cracking interviews at Google. \n
20 |     You have been provided with the job roles (comma separated) and the candidate's resume content. \n
21 |     Please select the top 5 relatable job roles (new line separated) among the provided ones for the candidate based on their resume. \n
22 |     Please keep the Job Roles unchanged and do not modify them. \n
23 | """


--------------------------------------------------------------------------------
/google-job-search-agent/report/Report.md:
--------------------------------------------------------------------------------
  1 | # Google SWE Job Role Report (India) 
  2 | ## This report is generated after analyzing on your Resume for the top 3 most suited open Google job roles.
  3 | 
  4 | ---
  5 | 
  6 | ## [Software Engineer, AI Infrastructure, Cloud ML — Google Careers](https://www.google.com/about/careers/applications/jobs/results/129754964472799942-software-engineer-ai-infrastructure-cloud-ml?q=%22software+engineer%22&employment_type=FULL_TIME&company=Google&location=India&page=1&target_level=MID)
  7 | 
  8 | # Candidate Feedback
  9 | 
 10 | ## Qualifications
 11 | 
 12 | ### Minimum Qualifications
 13 | 
 14 | | Qualification | Does the Candidate Meet the Qualification? | Notes |
 15 | | --- | --- | --- |
 16 | | Bachelor’s degree or equivalent practical experience | Yes | The candidate has a B.Tech degree in Electronics & Communication Engineering. |
 17 | | 5 years of experience with software development in one or more programming languages | No | The candidate has 4 years of experience in software development. |
 18 | | 3 years of experience with ML infrastructure | No | The candidate does not mention any experience with ML infrastructure. |
 19 | | 3 years of experience in testing, maintaining, or launching software products | Yes | The candidate has experience in developing, testing, and maintaining software products. |
 20 | | 3 years of experience with one or more of the following: Speech/audio technology, reinforcement learning, ML infrastructure, or specialization in a ML related field | No | The candidate does not mention any experience in these areas. |
 21 | 
 22 | ### Preferred Qualifications
 23 | 
 24 | | Qualification | Does the Candidate Meet the Qualification? | Notes |
 25 | | --- | --- | --- |
 26 | | Master's degree in Computer Science or a related field, or equivalent practical experience | No | The candidate has a Bachelor's degree but does not mention a Master's degree or equivalent experience. |
 27 | | 10 years of experience in the software industry creating distributed systems, tools, or services | No | The candidate has 4 years of experience in the software industry. |
 28 | | Experience working with Graphics Processing Unit (GPU), Tensor Processing Unit (TPU), or HPC clusters | No | The candidate does not mention any experience with these technologies. |
 29 | | Experience with orchestration tools like SLURM or Kubernetes | No | The candidate does not mention any experience with these tools. |
 30 | | Knowledge of GPUs and related ecosystems and technologies | No | The candidate does not mention any knowledge of GPUs or related technologies. |
 31 | 
 32 | ## Role Responsibilities
 33 | 
 34 | The candidate's resume does not align well with the job responsibilities. The candidate has experience in Java backend development, but the role requires experience with ML infrastructure, orchestration tools, and GPU technologies, which the candidate does not mention.
 35 | 
 36 | ## Actionable Feedback
 37 | 
 38 | 1. Gain experience with ML infrastructure, orchestration tools like SLURM or Kubernetes, and GPU technologies.
 39 | 2. Consider pursuing a Master's degree in Computer Science or a related field.
 40 | 3. Gain more experience in the software industry, particularly in creating distributed systems, tools, or services.
 41 | 4. Highlight any experience with Speech/audio technology, reinforcement learning, or other ML related fields.
 42 | 5. Tailor the resume to better align with the job responsibilities, emphasizing relevant skills and experiences.
 43 | 
 44 | ## Additional Resources
 45 | 
 46 | 1. Online courses on ML infrastructure, orchestration tools, and GPU technologies.
 47 | 2. Books or articles on distributed systems, tools, or services.
 48 | 3. Networking events or meetups in the software industry.
 49 | 4. Mentorship or coaching programs in the software industry.
 50 | 5. Internships or part-time jobs to gain more practical experience.
 51 | 
 52 | ---
 53 | 
 54 | ## [Software Engineer III, AI/ML, Photos Reminiscing — Google Careers](https://www.google.com/about/careers/applications/jobs/results/129350515992470214-software-engineer-iii-aiml-photos-reminiscing?q=%22software+engineer%22&employment_type=FULL_TIME&company=Google&location=India&page=1&target_level=MID)
 55 | 
 56 | # Candidate Feedback
 57 | 
 58 | ## 1. Does the candidate meet the minimum and preferred qualifications?
 59 | 
 60 | | Qualifications | Does the candidate meet the qualifications? |
 61 | | --- | --- |
 62 | | Minimum Qualifications | Partially |
 63 | | Preferred Qualifications | No |
 64 | 
 65 | ## 2. What are the gaps?
 66 | 
 67 | | Qualifications | Gaps |
 68 | | --- | --- |
 69 | | Minimum Qualifications | The candidate does not have experience with Large Language Model (LLM), Machine Learning, Distributed Systems, and Technical Design. |
 70 | | Preferred Qualifications | The candidate does not have a Master's degree or PhD in Computer Science or a related technical field, experience with performance, systems data analysis, visualization tools, debugging, data structures or algorithms in an academic or industry setting, and experience in developing accessible technologies. |
 71 | 
 72 | ## 3. How can the candidate improve to meet the minimum qualifications?
 73 | 
 74 | The candidate can improve by gaining experience in Large Language Model (LLM), Machine Learning, Distributed Systems, and Technical Design. They can do this by taking online courses, working on projects that involve these areas, or getting a job that requires these skills.
 75 | 
 76 | ## 4. How can the candidate improve to meet the preferred qualifications?
 77 | 
 78 | The candidate can improve by pursuing a Master's degree or PhD in Computer Science or a related technical field. They can also gain experience with performance, systems data analysis, visualization tools, debugging, data structures or algorithms in an academic or industry setting, and developing accessible technologies. They can do this by taking online courses, working on projects that involve these areas, or getting a job that requires these skills.
 79 | 
 80 | ## 5. How well does the candidate's resume align with the job responsibilities?
 81 | 
 82 | The candidate's resume aligns well with some of the job responsibilities such as writing product or system development code, reviewing code developed by other engineers and providing feedback to ensure best practices, and contributing to existing documentation or educational content. However, the candidate does not have experience with triaging product or system issues and debugging/tracking/resolving them, and leading design reviews with peers and stakeholders to select among available technologies.
 83 | 
 84 | ## 6. Some actionable feedback for the candidate to improve their resume based on the job requirements.
 85 | 
 86 | The candidate should highlight their experience with software development in one or more programming languages, and their experience with AI. They should also add any experience they have with Large Language Model (LLM), Machine Learning, Distributed Systems, and Technical Design. They should also consider gaining experience in these areas if they do not have any.
 87 | 
 88 | ## 7. Additional Resources or skills the candidate should focus on to improve their chances of getting hired for this role.
 89 | 
 90 | The candidate should focus on improving their skills in Large Language Model (LLM), Machine Learning, Distributed Systems, and Technical Design. They should also consider pursuing a Master's degree or PhD in Computer Science or a related technical field. They can use resources such as online courses, books, and tutorials to improve these skills. They should also consider working on projects that involve these areas to gain practical experience.
 91 | 
 92 | ---
 93 | 
 94 | ## [Software Engineer III, Full Stack, Core — Google Careers](https://www.google.com/about/careers/applications/jobs/results/92258380634038982-software-engineer-iii-full-stack-core?q=%22software+engineer%22&employment_type=FULL_TIME&company=Google&location=India&page=1&target_level=MID)
 95 | 
 96 | # Candidate Feedback
 97 | 
 98 | ## 1. Does the candidate meet the minimum and preferred qualifications?
 99 | 
100 | | Qualifications | Does the candidate meet the qualifications? |
101 | | --- | --- |
102 | | Minimum Qualifications | Yes |
103 | | Preferred Qualifications | Partially |
104 | 
105 | ## 2. What are the gaps?
106 | 
107 | The candidate meets the minimum qualifications as they have a Bachelor's degree and more than 2 years of experience in software development. However, they do not meet all the preferred qualifications. The candidate does not have a Master's degree or PhD in Computer Science or a related technical field. Also, there is no evidence of experience in developing accessible technologies.
108 | 
109 | ## 3. How can the candidate improve to meet the minimum qualifications?
110 | 
111 | The candidate already meets the minimum qualifications.
112 | 
113 | ## 4. How can the candidate improve to meet the preferred qualifications?
114 | 
115 | The candidate can consider pursuing a Master's degree or PhD in Computer Science or a related technical field. They can also gain experience in developing accessible technologies.
116 | 
117 | ## 5. How well does the candidate's resume align with the job responsibilities?
118 | 
119 | The candidate's resume aligns well with the job responsibilities. They have experience in writing product or system development code, participating in design reviews, reviewing code developed by other developers, contributing to existing documentation, and triaging product or system issues.
120 | 
121 | ## 6. Some actionable feedback for the candidate to improve their resume based on the job requirements.
122 | 
123 | The candidate should highlight their experience with data structures or algorithms in either an academic or industry setting. They should also mention any experience they have with full stack development, including back-end and front-end experience.
124 | 
125 | ## 7. Additional Resources or skills the candidate should focus on to improve their chances of getting hired for this role.
126 | 
127 | The candidate should focus on improving their skills in full stack development and accessible technologies. They can consider taking online courses or attending workshops to gain these skills. They should also consider pursuing a Master's degree or PhD in Computer Science or a related technical field.
128 | 
129 | ---
130 | 
131 | 


--------------------------------------------------------------------------------
/google-job-search-agent/report/sample-report.md:
--------------------------------------------------------------------------------
 1 | # Candidate Feedback Report
 2 | 
 3 | ## Qualifications
 4 | 
 5 | ### Minimum Qualifications
 6 | 
 7 | | Qualification | Does the Candidate Meet this Qualification? | 
 8 | | --- | --- |
 9 | | Bachelor’s degree or equivalent practical experience | Yes |
10 | | 5 years of experience with software development in one or more programming languages | Yes |
11 | | 3 years of experience with full stack development | No |
12 | | 3 years of experience testing, maintaining, or launching software products | Yes |
13 | | 1 year of experience with software design and architecture | Yes |
14 | 
15 | ### Preferred Qualifications
16 | 
17 | | Qualification | Does the Candidate Meet this Qualification? | 
18 | | --- | --- |
19 | | Master's degree or PhD in Computer Science or a related technical field | No |
20 | | 5 years of experience with data structures/algorithms | Yes |
21 | | 1 year of experience in a technical leadership role | Yes |
22 | | Experience developing accessible technologies | No |
23 | 
24 | ## Gaps in Qualifications
25 | 
26 | The candidate does not meet the minimum qualification of 3 years of experience with full stack development. In terms of preferred qualifications, the candidate does not have a Master's degree or PhD in Computer Science or a related technical field, and there is no evidence of experience developing accessible technologies.
27 | 
28 | ## Improvement Suggestions
29 | 
30 | To meet the minimum qualifications, the candidate should gain experience in full stack development. This could be achieved through further work experience, online courses, or personal projects.
31 | 
32 | To meet the preferred qualifications, the candidate could consider pursuing further education in Computer Science or a related field. Additionally, the candidate should seek opportunities to gain experience in developing accessible technologies, which could include volunteering, internships, or online courses.
33 | 
34 | ## Alignment with Job Responsibilities
35 | 
36 | The candidate's resume aligns well with the job responsibilities. The candidate has experience in writing and testing product or system development code, participating in design reviews, and reviewing code developed by other developers. The candidate also has experience contributing to existing documentation and triaging product or system issues.
37 | 
38 | ## Actionable Feedback
39 | 
40 | The candidate should highlight their full stack development experience, if any, on their resume. If the candidate does not have this experience, they should seek opportunities to gain it. The candidate should also consider pursuing further education in Computer Science or a related field, and seek opportunities to gain experience in developing accessible technologies.
41 | 
42 | ## Additional Resources
43 | 
44 | The candidate could benefit from online courses in full stack development and accessible technologies. Websites like Coursera, Udemy, and edX offer courses in these areas. Additionally, the candidate could consider pursuing a Master's degree or PhD in Computer Science or a related field.


--------------------------------------------------------------------------------
/mcp/agent1.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "# Create server parameters for stdio connection\n",
 10 |     "from langchain_core.messages import AIMessage, HumanMessage, ToolCall\n",
 11 |     "from langchain_mcp_adapters.tools import load_mcp_tools\n",
 12 |     "from langgraph.prebuilt import create_react_agent\n",
 13 |     "from dotenv import load_dotenv\n",
 14 |     "from langchain_mcp_adapters.client import MultiServerMCPClient\n",
 15 |     "from langgraph.prebuilt import create_react_agent"
 16 |    ]
 17 |   },
 18 |   {
 19 |    "cell_type": "code",
 20 |    "execution_count": 2,
 21 |    "metadata": {},
 22 |    "outputs": [
 23 |     {
 24 |      "data": {
 25 |       "text/plain": [
 26 |        "True"
 27 |       ]
 28 |      },
 29 |      "execution_count": 2,
 30 |      "metadata": {},
 31 |      "output_type": "execute_result"
 32 |     }
 33 |    ],
 34 |    "source": [
 35 |     "load_dotenv()"
 36 |    ]
 37 |   },
 38 |   {
 39 |    "cell_type": "code",
 40 |    "execution_count": 3,
 41 |    "metadata": {},
 42 |    "outputs": [],
 43 |    "source": [
 44 |     "# Layer 2: Middleware between MCP Servers and Agent\n",
 45 |     "client = MultiServerMCPClient(\n",
 46 |     "    {\n",
 47 |     "        \"math\": {\n",
 48 |     "            \"command\": \"python\",\n",
 49 |     "            # Make sure to update to the full absolute path to your math_server.py file\n",
 50 |     "            \"args\": [\"./math_server.py\"],\n",
 51 |     "            \"transport\": \"stdio\",\n",
 52 |     "        },\n",
 53 |     "        # Add more MCP Servers here...\n",
 54 |     "    }\n",
 55 |     ")"
 56 |    ]
 57 |   },
 58 |   {
 59 |    "cell_type": "code",
 60 |    "execution_count": 4,
 61 |    "metadata": {},
 62 |    "outputs": [],
 63 |    "source": [
 64 |     "tools = await client.get_tools()\n",
 65 |     "agent = create_react_agent(\"openai:gpt-4.1\", tools)\n",
 66 |     "math_response = await agent.ainvoke({\"messages\": \"what's (3 + 5) x 12?\"})"
 67 |    ]
 68 |   },
 69 |   {
 70 |    "cell_type": "code",
 71 |    "execution_count": 5,
 72 |    "metadata": {},
 73 |    "outputs": [],
 74 |    "source": [
 75 |     "def parse_agent_response(response):\n",
 76 |     "    for message in response['messages']:\n",
 77 |     "        if isinstance(message, HumanMessage):\n",
 78 |     "            print(f'Human: {message.content}')\n",
 79 |     "        if isinstance(message, AIMessage):\n",
 80 |     "            if message.tool_calls:\n",
 81 |     "                print(f'Tool Call: {message.tool_calls}')\n",
 82 |     "            else:\n",
 83 |     "                print(f'Agent: {message.content}')"
 84 |    ]
 85 |   },
 86 |   {
 87 |    "cell_type": "code",
 88 |    "execution_count": 6,
 89 |    "metadata": {},
 90 |    "outputs": [
 91 |     {
 92 |      "name": "stdout",
 93 |      "output_type": "stream",
 94 |      "text": [
 95 |       "Human: what's (3 + 5) x 12?\n",
 96 |       "Tool Call: [{'name': 'add', 'args': {'a': 3, 'b': 5}, 'id': 'call_bHnJCosEr6tdxHve1QxSkBff', 'type': 'tool_call'}]\n",
 97 |       "Tool Call: [{'name': 'multiply', 'args': {'a': 8, 'b': 12}, 'id': 'call_h3DSXYBb08Ys5bQTz1tXTDee', 'type': 'tool_call'}]\n",
 98 |       "Agent: (3 + 5) × 12 = 8 × 12 = 96.\n"
 99 |      ]
100 |     }
101 |    ],
102 |    "source": [
103 |     "parse_agent_response(math_response)"
104 |    ]
105 |   },
106 |   {
107 |    "cell_type": "code",
108 |    "execution_count": 7,
109 |    "metadata": {},
110 |    "outputs": [],
111 |    "source": [
112 |     "client = MultiServerMCPClient(\n",
113 |     "    {\n",
114 |     "        \"math\": {\n",
115 |     "            \"command\": \"python\",\n",
116 |     "            # Make sure to update to the full absolute path to your math_server.py file\n",
117 |     "            \"args\": [\"./math_server.py\"],\n",
118 |     "            \"transport\": \"stdio\",\n",
119 |     "        },\n",
120 |     "        \"weather\": {\n",
121 |     "            \"command\": \"python\",\n",
122 |     "            # Make sure to update to the full absolute path to your math_server.py file\n",
123 |     "            \"args\": [\"./weather_server.py\"],\n",
124 |     "            \"transport\": \"stdio\",\n",
125 |     "        },\n",
126 |     "        # Add more MCP Servers here...\n",
127 |     "    }\n",
128 |     ")"
129 |    ]
130 |   },
131 |   {
132 |    "cell_type": "code",
133 |    "execution_count": 10,
134 |    "metadata": {},
135 |    "outputs": [],
136 |    "source": [
137 |     "tools = await client.get_tools()\n",
138 |     "agent = create_react_agent(\"openai:gpt-4.1\", tools)\n",
139 |     "agent_response = await agent.ainvoke({\"messages\": \"How's the weather in San Francisco\"})"
140 |    ]
141 |   },
142 |   {
143 |    "cell_type": "code",
144 |    "execution_count": 11,
145 |    "metadata": {},
146 |    "outputs": [
147 |     {
148 |      "name": "stdout",
149 |      "output_type": "stream",
150 |      "text": [
151 |       "Human: How's the weather in San Francisco\n",
152 |       "Tool Call: [{'name': 'get_weather_status', 'args': {'location': 'San Francisco'}, 'id': 'call_aeKxBFxNb7UNAQjd6BNFT1ZS', 'type': 'tool_call'}]\n",
153 |       "Agent: It's very sunny in San Francisco right now. If you need a detailed forecast or tips for enjoying the sunshine, let me know!\n"
154 |      ]
155 |     }
156 |    ],
157 |    "source": [
158 |     "parse_agent_response(agent_response)"
159 |    ]
160 |   },
161 |   {
162 |    "cell_type": "code",
163 |    "execution_count": null,
164 |    "metadata": {},
165 |    "outputs": [],
166 |    "source": []
167 |   }
168 |  ],
169 |  "metadata": {
170 |   "kernelspec": {
171 |    "display_name": "saurav-env",
172 |    "language": "python",
173 |    "name": "python3"
174 |   },
175 |   "language_info": {
176 |    "codemirror_mode": {
177 |     "name": "ipython",
178 |     "version": 3
179 |    },
180 |    "file_extension": ".py",
181 |    "mimetype": "text/x-python",
182 |    "name": "python",
183 |    "nbconvert_exporter": "python",
184 |    "pygments_lexer": "ipython3",
185 |    "version": "3.13.1"
186 |   }
187 |  },
188 |  "nbformat": 4,
189 |  "nbformat_minor": 2
190 | }
191 | 


--------------------------------------------------------------------------------
/mcp/math_server.py:
--------------------------------------------------------------------------------
 1 | from mcp.server.fastmcp import FastMCP
 2 | 
 3 | mcp = FastMCP("Math")
 4 | 
 5 | @mcp.tool()
 6 | def add(a: int, b: int) -> int:
 7 |     """Add two numbers"""
 8 |     return a + b
 9 | 
10 | @mcp.tool()
11 | def multiply(a: int, b: int) -> int:
12 |     """Multiply two numbers"""
13 |     return a * b
14 | 
15 | if __name__ == "__main__":
16 |     mcp.run(transport="stdio")


--------------------------------------------------------------------------------
/mcp/weather_server.py:
--------------------------------------------------------------------------------
 1 | from mcp.server.fastmcp import FastMCP
 2 | 
 3 | mcp = FastMCP("Weather")
 4 | 
 5 | @mcp.tool()
 6 | def get_weather_status(location: str) -> str:
 7 |     """Get Weather details for the given location"""
 8 |     return "It's very sunny in San Francisco"
 9 | 
10 | 
11 | if __name__ == "__main__":
12 |     mcp.run(transport="stdio")


--------------------------------------------------------------------------------
/media/.DS_Store:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SauravP97/AI-Engineering-101/69b0c825ea2cfc9a43696192230878f9ee3f1dd3/media/.DS_Store


--------------------------------------------------------------------------------
/media/banner.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SauravP97/AI-Engineering-101/69b0c825ea2cfc9a43696192230878f9ee3f1dd3/media/banner.png


--------------------------------------------------------------------------------
/media/banner1.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SauravP97/AI-Engineering-101/69b0c825ea2cfc9a43696192230878f9ee3f1dd3/media/banner1.png


--------------------------------------------------------------------------------
/media/c-rag.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SauravP97/AI-Engineering-101/69b0c825ea2cfc9a43696192230878f9ee3f1dd3/media/c-rag.png


--------------------------------------------------------------------------------
/media/debug-workflow.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SauravP97/AI-Engineering-101/69b0c825ea2cfc9a43696192230878f9ee3f1dd3/media/debug-workflow.png


--------------------------------------------------------------------------------
/media/few-shot.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SauravP97/AI-Engineering-101/69b0c825ea2cfc9a43696192230878f9ee3f1dd3/media/few-shot.png


--------------------------------------------------------------------------------
/media/google-job-agent.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SauravP97/AI-Engineering-101/69b0c825ea2cfc9a43696192230878f9ee3f1dd3/media/google-job-agent.png


--------------------------------------------------------------------------------
/media/human-in-loop.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SauravP97/AI-Engineering-101/69b0c825ea2cfc9a43696192230878f9ee3f1dd3/media/human-in-loop.png


--------------------------------------------------------------------------------
/media/mcp.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SauravP97/AI-Engineering-101/69b0c825ea2cfc9a43696192230878f9ee3f1dd3/media/mcp.png


--------------------------------------------------------------------------------
/media/memory-agent.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SauravP97/AI-Engineering-101/69b0c825ea2cfc9a43696192230878f9ee3f1dd3/media/memory-agent.png


--------------------------------------------------------------------------------
/media/outreach-agent.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SauravP97/AI-Engineering-101/69b0c825ea2cfc9a43696192230878f9ee3f1dd3/media/outreach-agent.png


--------------------------------------------------------------------------------
/media/parallel-execute.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SauravP97/AI-Engineering-101/69b0c825ea2cfc9a43696192230878f9ee3f1dd3/media/parallel-execute.png


--------------------------------------------------------------------------------
/media/prompt-chaining.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SauravP97/AI-Engineering-101/69b0c825ea2cfc9a43696192230878f9ee3f1dd3/media/prompt-chaining.png


--------------------------------------------------------------------------------
/media/routing.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SauravP97/AI-Engineering-101/69b0c825ea2cfc9a43696192230878f9ee3f1dd3/media/routing.png


--------------------------------------------------------------------------------
/media/self-rag.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SauravP97/AI-Engineering-101/69b0c825ea2cfc9a43696192230878f9ee3f1dd3/media/self-rag.png


--------------------------------------------------------------------------------
/media/text2sql.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SauravP97/AI-Engineering-101/69b0c825ea2cfc9a43696192230878f9ee3f1dd3/media/text2sql.png


--------------------------------------------------------------------------------
/media/tool-call.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SauravP97/AI-Engineering-101/69b0c825ea2cfc9a43696192230878f9ee3f1dd3/media/tool-call.png


--------------------------------------------------------------------------------
/media/vector-emb.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SauravP97/AI-Engineering-101/69b0c825ea2cfc9a43696192230878f9ee3f1dd3/media/vector-emb.png


--------------------------------------------------------------------------------
/outreach-agent/agent.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "from langchain_openai import ChatOpenAI\n",
 10 |     "from dotenv import load_dotenv"
 11 |    ]
 12 |   },
 13 |   {
 14 |    "cell_type": "code",
 15 |    "execution_count": 2,
 16 |    "metadata": {},
 17 |    "outputs": [
 18 |     {
 19 |      "data": {
 20 |       "text/plain": [
 21 |        "True"
 22 |       ]
 23 |      },
 24 |      "execution_count": 2,
 25 |      "metadata": {},
 26 |      "output_type": "execute_result"
 27 |     }
 28 |    ],
 29 |    "source": [
 30 |     "load_dotenv()"
 31 |    ]
 32 |   },
 33 |   {
 34 |    "cell_type": "code",
 35 |    "execution_count": 3,
 36 |    "metadata": {},
 37 |    "outputs": [],
 38 |    "source": [
 39 |     "def get_profile_content(profile) -> str:\n",
 40 |     "    \"\"\" Get Profile data from Linkedin profile url or from a Profile file.\"\"\"\n",
 41 |     "    profile = open('./profiles/' + profile, 'r')\n",
 42 |     "    profile_content = profile.read()\n",
 43 |     "    return profile_content\n",
 44 |     "\n",
 45 |     "\n",
 46 |     "def extract_receiver_profile_information(profile_content) -> str:\n",
 47 |     "    \"\"\"Extract useful information from the profile content.\"\"\"\n",
 48 |     "\n",
 49 |     "    query = \"Get Profile details like name, organization and current role from the profile content.\"\n",
 50 |     "    model = ChatOpenAI(model=\"gpt-4o\")\n",
 51 |     "    response = model.invoke([\n",
 52 |     "            {\"role\": \"system\", \"content\": \"You are a helpful assistant that extracts profile details from the provided content.\"},\n",
 53 |     "            {\"role\": \"user\", \"content\": f\"Profile URL: {profile_content} \\n\\n Question: {query}\"}\n",
 54 |     "        ])\n",
 55 |     "    \n",
 56 |     "    return response.content\n",
 57 |     "\n",
 58 |     "def extract_candidate_profile_information(profile_content) -> str:\n",
 59 |     "    \"\"\"Extract useful information from the candidate profile content.\"\"\"\n",
 60 |     "\n",
 61 |     "    query = \"Get Profile details like name, organization, current role, experience and skill from the profile content.\"\n",
 62 |     "    model = ChatOpenAI(model=\"gpt-4o\")\n",
 63 |     "    response = model.invoke([\n",
 64 |     "            {\"role\": \"system\", \"content\": \"You are a helpful assistant that extracts profile details from the provided content.\"},\n",
 65 |     "            {\"role\": \"user\", \"content\": f\"Profile URL: {profile_content} \\n\\n Question: {query}\"}\n",
 66 |     "        ])\n",
 67 |     "    \n",
 68 |     "    return response.content"
 69 |    ]
 70 |   },
 71 |   {
 72 |    "cell_type": "code",
 73 |    "execution_count": 4,
 74 |    "metadata": {},
 75 |    "outputs": [
 76 |     {
 77 |      "name": "stdout",
 78 |      "output_type": "stream",
 79 |      "text": [
 80 |       "- Name: Zach Johnson\n",
 81 |       "- Organization: Mindera\n",
 82 |       "- Current Role: Co-Founder and Head of AI Product\n",
 83 |       "- Location: Zurich, Switzerland\n",
 84 |       "- Name: John Doe\n",
 85 |       "- Organization: UBS\n",
 86 |       "- Current Role: Senior Software Engineer\n",
 87 |       "- Experience:\n",
 88 |       "  1. **Senior Software Engineer at UBS, Zurich, Switzerland (2018 - 2025)**\n",
 89 |       "     - Designed and implemented multiple microservices using Java, Spring Boot, and AWS.\n",
 90 |       "     - Collaborated with cross-functional teams to deliver high-quality software solutions.\n",
 91 |       "     - Mentored junior engineers and contributed to the development of the company's technical strategy.\n",
 92 |       "\n",
 93 |       "  2. **Software Engineer at Easter Genetics, Uster, Switzerland (2015 - 2018)**\n",
 94 |       "     - Worked on a team to develop a genetic algorithm to predict the growth of a new species.\n",
 95 |       "     - Utilized C++ and OpenCV to analyze images and data.\n",
 96 |       "     - Contributed to the development of the company's software architecture and led the implementation of a new feature.\n",
 97 |       "\n",
 98 |       "  3. **Research Assistant at ETH Zurich, Zurich, Switzerland (2012 - 2015)**\n",
 99 |       "     - Assisted a research group in developing algorithms for real-time video processing.\n",
100 |       "     - Utilized MATLAB and C++ to implement and test new methods.\n",
101 |       "\n",
102 |       "- Skills:\n",
103 |       "  - Object-oriented programming\n",
104 |       "  - Data structures\n",
105 |       "  - Software design patterns\n",
106 |       "  - Java, Python, C++\n",
107 |       "  - Agile development methodologies\n",
108 |       "  - Version control systems (Git)\n",
109 |       "  - Microservices design and implementation\n",
110 |       "  - Spring Boot\n",
111 |       "  - AWS\n",
112 |       "  - C++, OpenCV\n",
113 |       "  - MATLAB\n",
114 |       "  - Excellent communication skills.\n"
115 |      ]
116 |     }
117 |    ],
118 |    "source": [
119 |     "CANDIDATE_PROFILE = 'profile2.txt'\n",
120 |     "RECEIVER_PROFILE = 'profile1.txt'\n",
121 |     "\n",
122 |     "receiver_profile_content = get_profile_content(RECEIVER_PROFILE)\n",
123 |     "receiver_profile_information = extract_receiver_profile_information(receiver_profile_content)\n",
124 |     "\n",
125 |     "candidate_profile_content = get_profile_content(CANDIDATE_PROFILE)\n",
126 |     "candidate_profile_information = extract_candidate_profile_information(candidate_profile_content)\n",
127 |     "\n",
128 |     "print(receiver_profile_information)\n",
129 |     "print(candidate_profile_information)"
130 |    ]
131 |   },
132 |   {
133 |    "cell_type": "code",
134 |    "execution_count": 5,
135 |    "metadata": {},
136 |    "outputs": [],
137 |    "source": [
138 |     "def write_a_referral_pitch(receiver_information, candidate_information):\n",
139 |     "    \"\"\" Write a referral pitch based on the receiver's and candidate profile information.\"\"\"\n",
140 |     "    query = '''\n",
141 |     "    You are a candidate applying to a role.\n",
142 |     "    Write a referral pitch for applying to an open position in the receiver's organization \n",
143 |     "    based on the receiver's profile information and the candidate profile information. \n",
144 |     "    The receiver profile information and candidate profile information is provided to you and \n",
145 |     "    the receiver information has the receiver name, organization, and current role.\n",
146 |     "    The candidate profile information has the candidate name, organization, current role, experience, and skills.\n",
147 |     "    \n",
148 |     "    The pitch should be concise, professional, and highlight the candidate's skills and\n",
149 |     "    experiences that make them a good fit for the position.\n",
150 |     "\n",
151 |     "    Keep the pitch message concise and to the point, under 100 words.\n",
152 |     "    '''\n",
153 |     "\n",
154 |     "    model = ChatOpenAI(model=\"gpt-4o\")\n",
155 |     "    response = model.invoke([\n",
156 |     "            {\n",
157 |     "                \"role\": \"system\", \n",
158 |     "                \"content\": \"You are the Candidate trying to write a referral pitch for applying to an open position in the receiver's organization.\"\n",
159 |     "            },\n",
160 |     "            {\n",
161 |     "                \"role\": \"user\", \n",
162 |     "                \"content\": f\"Receiver Information: {receiver_information} \\n\\n Candidate Information: {candidate_information} \\n\\n Question: {query}\"\n",
163 |     "            }\n",
164 |     "        ])\n",
165 |     "    \n",
166 |     "    return response.content"
167 |    ]
168 |   },
169 |   {
170 |    "cell_type": "code",
171 |    "execution_count": 6,
172 |    "metadata": {},
173 |    "outputs": [],
174 |    "source": [
175 |     "referral_pitch = write_a_referral_pitch(receiver_profile_information, candidate_profile_information)"
176 |    ]
177 |   },
178 |   {
179 |    "cell_type": "code",
180 |    "execution_count": 7,
181 |    "metadata": {},
182 |    "outputs": [
183 |     {
184 |      "name": "stdout",
185 |      "output_type": "stream",
186 |      "text": [
187 |       "Subject: Referral Opportunity at Mindera\n",
188 |       "\n",
189 |       "Hi Zach,\n",
190 |       "\n",
191 |       "I hope this message finds you well. I am reaching out to express my interest in joining Mindera. With over a decade of experience in software engineering, including designing scalable microservices at UBS and developing advanced algorithms, I believe my skills in Java, C++, and AI align well with your team’s objectives. My experience in leading projects and mentoring has prepared me to make a meaningful contribution under your leadership. I would be thrilled to explore potential opportunities at Mindera.\n",
192 |       "\n",
193 |       "Thank you for your consideration.\n",
194 |       "\n",
195 |       "Best regards,  \n",
196 |       "John Doe\n"
197 |      ]
198 |     }
199 |    ],
200 |    "source": [
201 |     "print(referral_pitch)"
202 |    ]
203 |   },
204 |   {
205 |    "cell_type": "code",
206 |    "execution_count": null,
207 |    "metadata": {},
208 |    "outputs": [],
209 |    "source": []
210 |   }
211 |  ],
212 |  "metadata": {
213 |   "kernelspec": {
214 |    "display_name": "saurav-env",
215 |    "language": "python",
216 |    "name": "python3"
217 |   },
218 |   "language_info": {
219 |    "codemirror_mode": {
220 |     "name": "ipython",
221 |     "version": 3
222 |    },
223 |    "file_extension": ".py",
224 |    "mimetype": "text/x-python",
225 |    "name": "python",
226 |    "nbconvert_exporter": "python",
227 |    "pygments_lexer": "ipython3",
228 |    "version": "3.13.1"
229 |   }
230 |  },
231 |  "nbformat": 4,
232 |  "nbformat_minor": 2
233 | }
234 | 


--------------------------------------------------------------------------------
/outreach-agent/agent.py:
--------------------------------------------------------------------------------
  1 | from langchain_openai import ChatOpenAI
  2 | from dotenv import load_dotenv
  3 | from typing_extensions import TypedDict, Annotated
  4 | from langgraph.graph import START, StateGraph, END
  5 | 
  6 | CANDIDATE_PROFILE = 'profile2.txt'
  7 | RECEIVER_PROFILE = 'profile1.txt'
  8 | 
  9 | 
 10 | def custom_reducer(obj1, obj2):
 11 |   return obj2
 12 | 
 13 | 
 14 | class SharedState(TypedDict):
 15 |   """
 16 |   Represents the state of our graph.
 17 |   """
 18 |   candidate_profile: Annotated[str, custom_reducer]
 19 |   receiver_profile: Annotated[str, custom_reducer]
 20 | 
 21 |   candidate_profile_content: Annotated[str, custom_reducer]
 22 |   receiver_profile_content: Annotated[str, custom_reducer]
 23 |   
 24 |   candidate_profile_information: Annotated[str, custom_reducer]
 25 |   receiver_profile_information: Annotated[str, custom_reducer]
 26 | 
 27 |   pitch: Annotated[str, custom_reducer]
 28 | 
 29 | 
 30 | def get_profile_content(profile) -> str:
 31 |     """ Get Profile data from Linkedin profile url or from a Profile file."""
 32 |     profile = open('./profiles/' + profile, 'r')
 33 |     profile_content = profile.read()
 34 |     return profile_content
 35 | 
 36 | 
 37 | def get_candidate_profile_content(shared_state: SharedState) -> str:
 38 |     """ Get Candidate Profile data from Linkedin profile url or from a Profile file."""
 39 |     shared_state['candidate_profile_content'] = get_profile_content(shared_state['candidate_profile'])
 40 |     return shared_state
 41 | 
 42 | 
 43 | def get_receiver_profile_content(shared_state: SharedState) -> str:
 44 |     """ Get Receiver Profile data from Linkedin profile url or from a Profile file."""
 45 |     shared_state['receiver_profile_content'] = get_profile_content(shared_state['receiver_profile'])
 46 |     return shared_state
 47 | 
 48 | 
 49 | def extract_receiver_profile_information(shared_state: SharedState) -> str:
 50 |     """Extract useful information from the profile content."""
 51 | 
 52 |     query = "Get Profile details like name, organization and current role from the profile content."
 53 |     model = ChatOpenAI(model="gpt-4o")
 54 |     response = model.invoke([
 55 |             {"role": "system", "content": "You are a helpful assistant that extracts profile details from the provided content."},
 56 |             {"role": "user", "content": f"Profile URL: {shared_state['receiver_profile_content']} \n\n Question: {query}"}
 57 |         ])
 58 |     
 59 |     shared_state['receiver_profile_information'] = response.content
 60 |     return shared_state
 61 | 
 62 | 
 63 | def extract_candidate_profile_information(shared_state: SharedState) -> str:
 64 |     """Extract useful information from the candidate profile content."""
 65 | 
 66 |     query = "Get Profile details like name, organization, current role, experience and skill from the profile content."
 67 |     model = ChatOpenAI(model="gpt-4o")
 68 |     response = model.invoke([
 69 |             {"role": "system", "content": "You are a helpful assistant that extracts profile details from the provided content."},
 70 |             {"role": "user", "content": f"Profile URL: {shared_state['candidate_profile_content']} \n\n Question: {query}"}
 71 |         ])
 72 |     
 73 |     shared_state['candidate_profile_information'] = response.content
 74 |     return shared_state
 75 | 
 76 | 
 77 | def write_a_referral_pitch(shared_state: SharedState):
 78 |     """ Write a referral pitch based on the receiver's and candidate profile information."""
 79 |     query = '''
 80 |     You are a candidate applying for an AI Engineer role.
 81 |     Write a referral pitch for applying to an open position in the receiver's organization 
 82 |     based on the receiver's profile information and the candidate profile information. 
 83 |     The receiver profile information and candidate profile information is provided to you and 
 84 |     the receiver information has the receiver name, organization, and current role.
 85 |     The candidate profile information has the candidate name, organization, current role, experience, and skills.
 86 |     
 87 |     The pitch should be concise, professional, and highlight the candidate's skills and
 88 |     experiences that make them a good fit for the position.
 89 | 
 90 |     Keep the pitch message concise and to the point, under 100 words.
 91 |     '''
 92 |     print("Writing Referral Pitch...")
 93 |     print(f'Receiver Profile info {shared_state["receiver_profile_information"]}')
 94 |     print(f'Candidate Profile info {shared_state["candidate_profile_information"]}')
 95 | 
 96 |     model = ChatOpenAI(model="gpt-4o")
 97 |     response = model.invoke([
 98 |             {
 99 |                 "role": "system", 
100 |                 "content": f"""You are the Candidate trying to write a referral pitch for applying to 
101 |                 an open position in the receiver's organization.
102 |                 """
103 |             },
104 |             {
105 |                 "role": "user", 
106 |                 "content": f"""
107 |                 Receiver Information: {shared_state['receiver_profile_information']} 
108 |                 \n\n Candidate Information: {shared_state['candidate_profile_information']} 
109 |                 \n\n Question: {query}
110 |                 """
111 |             }
112 |         ])
113 |     
114 |     shared_state['pitch'] = response.content
115 |     return shared_state
116 | 
117 | 
118 | def build_graph():
119 |   load_dotenv()
120 | 
121 |   # Building a Graph
122 |   # State of the Graph that will be shared among nodes.
123 |   workflow = StateGraph(SharedState)
124 | 
125 |   # Add nodes.
126 |   workflow.add_node("get_candidate_profile_content", get_candidate_profile_content)
127 |   workflow.add_node("get_receiver_profile_content", get_receiver_profile_content)
128 |   workflow.add_node("extract_candidate_profile_information", extract_candidate_profile_information)
129 |   workflow.add_node("extract_receiver_profile_information", extract_receiver_profile_information)
130 |   workflow.add_node("write_a_referral_pitch", write_a_referral_pitch)
131 | 
132 |   workflow.add_edge(START, "get_candidate_profile_content")
133 |   workflow.add_edge("get_candidate_profile_content", "get_receiver_profile_content")
134 |   workflow.add_edge("get_receiver_profile_content", "extract_candidate_profile_information")
135 |   workflow.add_edge("extract_candidate_profile_information", "extract_receiver_profile_information")
136 |   workflow.add_edge("extract_receiver_profile_information", "write_a_referral_pitch")
137 |   workflow.add_edge("write_a_referral_pitch", END)
138 | 
139 |   graph = workflow.compile()
140 | 
141 |   response = graph.invoke({
142 |       'candidate_profile': CANDIDATE_PROFILE,
143 |       'receiver_profile': RECEIVER_PROFILE,
144 |   })
145 | 
146 |   # print(graph.get_graph().draw_mermaid())
147 | 
148 |   return response
149 | 
150 | 
151 | agent_response = build_graph()
152 | print(f'Referral Pitch {agent_response["pitch"]}')


--------------------------------------------------------------------------------
/outreach-agent/profiles/profile1.txt:
--------------------------------------------------------------------------------
 1 | Zach Johnson is a Co-Founder AI Product | Transforming Industries with Intelligent Solutions Results-driven 
 2 | technology entrepreneur with a passion for leveraging AI to drive innovation and growth. 
 3 | Proven track record of successfully co-founding and leading AI product teams in high-tech environments. 
 4 | Skilled in strategy development, team leadership, and technology evangelism. Mindera | 
 5 | Co-Founder and Head of AI Product | Zurich, Switzerland | 2018-2025 | 
 6 | Developed and implemented AI-powered solutions for clients across various industries, 
 7 | driving significant revenue growth and improving operational efficiency.;
 8 | EPFL | Research Assistant | Lausanne, Switzerland | 2015-2018 | Collaborated with professors and 
 9 | researchers on AI-related projects, contributing to the development of novel machine learning algorithms 
10 | and applications. EPFL | Master of Science in Computer Science | 2018; University of Lausanne | 
11 | Bachelor of Science in Computer Science | 2015 AI Strategy; Machine Learning; Deep Learning; 
12 | Computer Vision; 
13 | Natural Language Processing; Team Leadership; Technology Evangelism; Cloud Computing


--------------------------------------------------------------------------------
/outreach-agent/profiles/profile2.txt:
--------------------------------------------------------------------------------
 1 | John Doe is a Software Engineer | Passionate about building scalable and maintainable systems 
 2 | Results-driven software engineer with a strong background in object-oriented programming,
 3 |  data structures, and software design patterns. Skilled in Java, Python, and C++. 
 4 |  Proficient in agile development methodologies and version control systems like Git. 
 5 |  Collaborative team player with excellent communication skills. Senior Software Engineer 
 6 |  | UBS | Zurich, Switzerland | 2018 - 2025 | Designed and implemented multiple microservices using Java, 
 7 |  Spring Boot, and AWS. Collaborated with cross-functional teams to deliver high-quality software solutions. 
 8 |  Mentored junior engineers and contributed to the development of the company's technical strategy.; 
 9 |  Software Engineer | Easter Genetics | Uster, Switzerland | 2015 - 2018 | Worked on a team to develop a 
10 |  genetic algorithm to predict the growth of a new species. Utilized C++ and OpenCV to analyze images and data. 
11 |  Contributed to the development of the company's software architecture and led the implementation of a new 
12 |  feature.; Research Assistant | ETH Zurich | Zurich, Switzerland | 2012 - 2015 |
13 | Assisted a research group in developing algorithms for real-time video processing.
14 | Utilized MATLAB and C++ to implement and test new methods.
15 | 


--------------------------------------------------------------------------------
/parallel-execution/agent.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 6,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "from langchain_openai import ChatOpenAI\n",
 10 |     "from dotenv import load_dotenv"
 11 |    ]
 12 |   },
 13 |   {
 14 |    "cell_type": "code",
 15 |    "execution_count": 7,
 16 |    "metadata": {},
 17 |    "outputs": [
 18 |     {
 19 |      "data": {
 20 |       "text/plain": [
 21 |        "True"
 22 |       ]
 23 |      },
 24 |      "execution_count": 7,
 25 |      "metadata": {},
 26 |      "output_type": "execute_result"
 27 |     }
 28 |    ],
 29 |    "source": [
 30 |     "load_dotenv()"
 31 |    ]
 32 |   },
 33 |   {
 34 |    "cell_type": "code",
 35 |    "execution_count": 21,
 36 |    "metadata": {},
 37 |    "outputs": [],
 38 |    "source": [
 39 |     "def get_article_content(article_name: str) -> str:\n",
 40 |     "    \"\"\" Function to get the content of an article.\"\"\"\n",
 41 |     "    profile = open('./articles/' + article_name, 'r')\n",
 42 |     "    profile_content = profile.read()\n",
 43 |     "    return profile_content"
 44 |    ]
 45 |   },
 46 |   {
 47 |    "cell_type": "code",
 48 |    "execution_count": 23,
 49 |    "metadata": {},
 50 |    "outputs": [],
 51 |    "source": [
 52 |     "def get_bloom_filter_article_summary():\n",
 53 |     "    \"\"\" Function to get the summary of the Bloom Filter article.\"\"\"\n",
 54 |     "    article_name = 'bloom-filters.txt'\n",
 55 |     "    article_content = get_article_content(article_name)\n",
 56 |     "\n",
 57 |     "    llm = ChatOpenAI(model=\"gpt-4\", temperature=0.0)\n",
 58 |     "    \n",
 59 |     "    response = llm.invoke(\n",
 60 |     "        f\"Summarize the following article:\\n\\n{article_content}\\n\\n\"\n",
 61 |     "        \"Please provide a concise summary that captures the main points. Please keep the summary under 200 words.\"\n",
 62 |     "    )\n",
 63 |     "    \n",
 64 |     "    return response.content.strip() if response else \"No summary available.\""
 65 |    ]
 66 |   },
 67 |   {
 68 |    "cell_type": "code",
 69 |    "execution_count": 28,
 70 |    "metadata": {},
 71 |    "outputs": [],
 72 |    "source": [
 73 |     "def get_graph_db_article_summary():\n",
 74 |     "    \"\"\" Function to get the summary of the Graph database article.\"\"\"\n",
 75 |     "    article_name = 'graph-db.txt'\n",
 76 |     "    article_content = get_article_content(article_name)\n",
 77 |     "\n",
 78 |     "    llm = ChatOpenAI(model=\"gpt-4\", temperature=0.0)\n",
 79 |     "    \n",
 80 |     "    response = llm.invoke(\n",
 81 |     "        f\"Summarize the following article:\\n\\n{article_content}\\n\\n\"\n",
 82 |     "        \"Please provide a concise summary that captures the main points. Please keep the summary under 200 words.\"\n",
 83 |     "    )\n",
 84 |     "    \n",
 85 |     "    return response.content.strip() if response else \"No summary available.\""
 86 |    ]
 87 |   },
 88 |   {
 89 |    "cell_type": "code",
 90 |    "execution_count": 24,
 91 |    "metadata": {},
 92 |    "outputs": [],
 93 |    "source": [
 94 |     "bloom_filter_summary = get_bloom_filter_article_summary()"
 95 |    ]
 96 |   },
 97 |   {
 98 |    "cell_type": "code",
 99 |    "execution_count": 29,
100 |    "metadata": {},
101 |    "outputs": [],
102 |    "source": [
103 |     "graph_db_summary = get_graph_db_article_summary()"
104 |    ]
105 |   },
106 |   {
107 |    "cell_type": "code",
108 |    "execution_count": 27,
109 |    "metadata": {},
110 |    "outputs": [
111 |     {
112 |      "name": "stdout",
113 |      "output_type": "stream",
114 |      "text": [
115 |       "Bloom Filters are compact data structures used to determine if an element is part of a set. They can respond in two ways: \"Maybe\", indicating there is some probability the element is in the set, and \"Definitely Not\", indicating zero probability the element is in the set. Bloom Filters can always recognize true negatives but may generate false positives. They are used in scenarios where false positives are acceptable. A standard Bloom Filter can be implemented using a group of hash functions and works most efficiently when the size of the search space is known in advance. The process of inserting an element into the Bloom Filter involves computing the hash value of the element with all the hash functions and setting the corresponding addresses in the Bloom Filter vector to 1. To search for an element, the hash of the element is computed with all the hash functions and the corresponding locations in the Bloom Filter vector are checked. The run time complexity of both operations is O(K), where K is the number of distinct hash functions used.\n"
116 |      ]
117 |     }
118 |    ],
119 |    "source": [
120 |     "print(bloom_filter_summary)"
121 |    ]
122 |   },
123 |   {
124 |    "cell_type": "code",
125 |    "execution_count": 30,
126 |    "metadata": {},
127 |    "outputs": [
128 |     {
129 |      "name": "stdout",
130 |      "output_type": "stream",
131 |      "text": [
132 |       "Neo4J is a popular graph database that stores data in nodes and relationships, rather than traditional tables or documents. The data model in Neo4J is composed of nodes, relationships, and properties. Nodes are the core entities in the database, each with a label that determines its type. Each node can also have multiple properties, described in key-value pairs. Nodes can be related to each other through relationships, which are equivalent to edges in the graph data structure. Unlike in the graph data structure, in Neo4J it is mandatory to add a direction when creating a relationship between two nodes. However, Neo4J allows querying an undirected relationship, making the queries flexible. Cypher is Neo4J's graph query language that allows users to create nodes with specific properties, create relationships between nodes, and query or visualize the nodes and their relationships.\n"
133 |      ]
134 |     }
135 |    ],
136 |    "source": [
137 |     "print(graph_db_summary)"
138 |    ]
139 |   }
140 |  ],
141 |  "metadata": {
142 |   "kernelspec": {
143 |    "display_name": "saurav-env",
144 |    "language": "python",
145 |    "name": "python3"
146 |   },
147 |   "language_info": {
148 |    "codemirror_mode": {
149 |     "name": "ipython",
150 |     "version": 3
151 |    },
152 |    "file_extension": ".py",
153 |    "mimetype": "text/x-python",
154 |    "name": "python",
155 |    "nbconvert_exporter": "python",
156 |    "pygments_lexer": "ipython3",
157 |    "version": "3.13.1"
158 |   }
159 |  },
160 |  "nbformat": 4,
161 |  "nbformat_minor": 2
162 | }
163 | 


--------------------------------------------------------------------------------
/parallel-execution/agent.py:
--------------------------------------------------------------------------------
  1 | from langchain_openai import ChatOpenAI
  2 | from dotenv import load_dotenv
  3 | from typing_extensions import TypedDict, Annotated
  4 | from langgraph.graph import START, StateGraph, END
  5 | 
  6 | import time
  7 | 
  8 | BLOOM_FILTER_ARTICLE_LINK = "bloom-filters.txt"
  9 | GRAPH_DB_ARTICLE_LINK = "graph-db.txt"
 10 | 
 11 | # To Handle: https://langchain-ai.github.io/langgraph/troubleshooting/errors/INVALID_CONCURRENT_GRAPH_UPDATE/
 12 | def custom_reducer(obj1: str, obj2: str):
 13 |   return obj1 if obj1.strip() else obj2
 14 | 
 15 | 
 16 | class SharedState(TypedDict):
 17 |   """
 18 |   Represents the shared state of our graph.
 19 |   """
 20 |   bloom_filter_article_link: Annotated[str, custom_reducer]
 21 |   graph_db_article_link: Annotated[str, custom_reducer]
 22 | 
 23 |   bloom_filter_article_summary: Annotated[str, custom_reducer]
 24 |   graph_db_article_summary: Annotated[str, custom_reducer]
 25 | 
 26 |   summary: Annotated[str, custom_reducer]
 27 | 
 28 | 
 29 | def get_article_content(article_name: str) -> str:
 30 |     """ Function to get the content of an article."""
 31 |     profile = open('./articles/' + article_name, 'r')
 32 |     profile_content = profile.read()
 33 |     return profile_content
 34 | 
 35 | 
 36 | def get_bloom_filter_article_summary(state: SharedState) -> SharedState:
 37 |     """ Function to get the summary of the Bloom Filter article."""
 38 |     print('Summarizing Bloom Filter article...')
 39 |     article_content = get_article_content(state['bloom_filter_article_link'])
 40 | 
 41 |     llm = ChatOpenAI(model="gpt-4", temperature=0.0)
 42 |     response = llm.invoke(
 43 |         f"Summarize the following article:\n\n{article_content}\n\n"
 44 |         "Please provide a concise summary that captures the main points. Please keep the summary under 200 words."
 45 |     )
 46 |     state['bloom_filter_article_summary'] = response.content.strip() if response else "No summary available."
 47 | 
 48 |     return state
 49 | 
 50 | 
 51 | def get_graph_db_article_summary(state: SharedState) -> SharedState:
 52 |     """ Function to get the summary of the Graph database article."""
 53 |     print('Summarizing Graph DB article...')
 54 |     article_content = get_article_content(state['graph_db_article_link'])
 55 | 
 56 |     llm = ChatOpenAI(model="gpt-4", temperature=0.0)
 57 |     response = llm.invoke(
 58 |         f"Summarize the following article:\n\n{article_content}\n\n"
 59 |         "Please provide a concise summary that captures the main points. Please keep the summary under 200 words."
 60 |     )
 61 |     state['graph_db_article_summary'] = response.content.strip() if response else "No summary available."
 62 | 
 63 |     return state
 64 | 
 65 | 
 66 | def join_node(state: SharedState) -> SharedState:
 67 |     """
 68 |     This node acts as a router/conditional check.
 69 |     It decides the next step based on whether A and B have completed.
 70 |     """
 71 |     print(f"Bloom Filter article summary fetched = {state.get('bloom_filter_article_summary') is not None}")
 72 |     print(f"Graph DB article summary fetched = {state.get('graph_db_article_summary') is not None}")
 73 | 
 74 |     if state.get("bloom_filter_article_summary") is not None and state.get("graph_db_article_summary") is not None:
 75 |         print("Articles summarized. Proceeding to final processing.")
 76 |         # Perform the work that requires both A and B's output
 77 |         summarized_result = f"Bloom Filter Article summary: \n\n {state['bloom_filter_article_summary']} \n\n\n"
 78 |         summarized_result += f"Graph DB Article summary: \n\n {state['graph_db_article_summary']} \n\n\n"
 79 |         state["summary"] = summarized_result
 80 | 
 81 |         return state
 82 | 
 83 |     print("Still waiting for one or more nodes...")
 84 |     return state
 85 | 
 86 | 
 87 | def build_parallel_graph():
 88 |   # Building a Graph
 89 |   # State of the Graph that will be shared among nodes.
 90 |   workflow = StateGraph(SharedState)
 91 | 
 92 |   # Add nodes.
 93 |   workflow.add_node("get_bloom_filter_article_summary", get_bloom_filter_article_summary)
 94 |   workflow.add_node("get_graph_db_article_summary", get_graph_db_article_summary)
 95 |   workflow.add_node("join_node", join_node)
 96 | 
 97 |   # Define the edges of the graph.
 98 |   workflow.add_edge(START, "get_bloom_filter_article_summary")
 99 |   workflow.add_edge(START, "get_graph_db_article_summary")
100 |   workflow.add_edge("get_bloom_filter_article_summary", "join_node")
101 |   workflow.add_edge("get_graph_db_article_summary", "join_node")
102 |   workflow.add_conditional_edges(
103 |     "join_node",
104 |     # This function determines the next node based on the state
105 |     lambda state: END if state.get("summary") is not None else "join_node"
106 |   )
107 | 
108 |   graph = workflow.compile()
109 | 
110 |   response = graph.invoke({
111 |       'bloom_filter_article_link': BLOOM_FILTER_ARTICLE_LINK,
112 |       'graph_db_article_link': GRAPH_DB_ARTICLE_LINK,
113 |   })
114 | 
115 |   # print(graph.get_graph().draw_mermaid())
116 | 
117 |   return response
118 | 
119 | 
120 | def build_serial_graph():
121 |   # Building a Graph
122 |   # State of the Graph that will be shared among nodes.
123 |   workflow = StateGraph(SharedState)
124 | 
125 |   # Add nodes.
126 |   workflow.add_node("get_bloom_filter_article_summary", get_bloom_filter_article_summary)
127 |   workflow.add_node("get_graph_db_article_summary", get_graph_db_article_summary)
128 |   workflow.add_node("join_node", join_node)
129 | 
130 |   # Define the edges of the graph.
131 |   workflow.add_edge(START, "get_bloom_filter_article_summary")
132 |   workflow.add_edge("get_bloom_filter_article_summary", "get_graph_db_article_summary")
133 |   workflow.add_edge("get_graph_db_article_summary", "join_node")
134 |   workflow.add_edge("join_node", END)
135 | 
136 |   graph = workflow.compile()
137 | 
138 |   response = graph.invoke({
139 |       'bloom_filter_article_link': BLOOM_FILTER_ARTICLE_LINK,
140 |       'graph_db_article_link': GRAPH_DB_ARTICLE_LINK,
141 |   })
142 | 
143 |   # print(graph.get_graph().draw_mermaid())
144 | 
145 |   return response
146 | 
147 | 
148 | load_dotenv()
149 | start_time = time.perf_counter()
150 | agent_response = build_parallel_graph()
151 | # agent_response = build_serial_graph()
152 | end_time = time.perf_counter()
153 | 
154 | print(f'\n\n\n Summary generated: \n\n {agent_response["summary"]}')
155 | print(f'\n\n\n Total execution time: {end_time - start_time:.2f} seconds')


--------------------------------------------------------------------------------
/parallel-execution/articles/bloom-filters.txt:
--------------------------------------------------------------------------------
 1 | Bloom Filter
 2 | 
 3 | Introduction
 4 | Bloom Filters are compact data-structures that are responsible for answering the membership queries. They successfully filter out the elements which are not a part of the set.
 5 | 
 6 | In summary the Bloom Filters are responsible for answering whether an element is a part of a given set or not. Suppose we have a list of elements present in a set, say S. Now we want to check whether an element X is a part of the set S or not. In this case Bloom Filters can answer in two ways:
 7 | 
 8 | Element X can or can not be a part of Set S: The Bloom Filters can respond with a Maybe! This means that there is some probability for the element X to be present in the set S. The more efficient the filters, higher will be the prediction of probability for an element to be present in the set.
 9 | 
10 | Element X is definitely not a part of Set S: The Bloom Filters can respond with a Definitely Not! This means that there is a 0 probability for the element X to be present in the set S. In this case we can be completely sure that the element is not present in the set.
11 | 
12 | In this way Bloom Filters are always able to recognise True Negatives but can not always recognize True Positives and hence can generate some False Positives. These data structures are generally used in those scenarios where False Positives are accepted.
13 | 
14 | 
15 | For Example we can raise a false Alarm for a Fire hazard which did not take place but we can never avoid raising an Alarm for an actual fire Hazard.
16 | 
17 | Implementing a Standard Bloom Filter
18 | A Standard Bloom Filter can be implemented through a group of Hash Functions.
19 | 
20 | Standard Bloom Filters work most efficiently when we know the size of our search space in advance. Suppose for us the size of our search space was N. Now let’s understand how we can insert an element in the Bloom Filter data-structure and how we can answer the Membership queries.
21 | 
22 | Inserting an element into Bloom Filter
23 | Suppose we have an element X which needs to be inserted into the Bloom Filter. We will take the Bloom Filter to be a vector of size N. Remember N was the size of our search space!
24 | 
25 | Initially the Bloom Filter vector has all its values set to 0 denoting an empty filter.
26 | 
27 | We will also have a set of K hash functions H where:
28 | 
29 | H = {h1, h2, h3, … , hk}
30 | 
31 | These hash functions will generate a distributed random value in the range from 0 to N-1.
32 | 
33 | Now, during insertion we will compute the hash value of X with all the K hash functions and set the corresponding addresses in the Bloom Filter vector to 1.
34 | 
35 | 
36 | The Algorithm for this method can be described as follows.
37 | 
38 | 
39 | Searching an element in the Bloom Filter
40 | Suppose we have an element X which needs to be checked if it is present in a set S or not. Given that all the elements of the set S are present in the Bloom Filter (BF), we will use the data-structure to answer the Membership queries.
41 | 
42 | To answer this we again need to perform the hash of element X with all the K hash functions we talked about in the previous section. Once computed, we check whether for all the hash values the corresponding locations in the Bloom Filter vector are set to 1 or not. If any one of the locations is set to 0, we can safely say that the element does not exist in the set.
43 | 
44 | The algorithm for the operation looks like this.
45 | 
46 | It’s safe to say that the run time complexity of both the operations for setting an element into the Bloom Filters and the Membership query is O(K) where K is the size of the set of Hash Functions i.e. the number of distinct hash functions used in the Bloom Filter.
47 | 
48 | Conclusion
49 | We discussed Bloom Filters in detail along with the implementation process of a Standard Bloom Filter. We also looked into how we can insert and search and element in the Bloom Filter along with their run-time complexity and code demonstrations.
50 | 
51 | 
52 | 


--------------------------------------------------------------------------------
/parallel-execution/articles/graph-db.txt:
--------------------------------------------------------------------------------
 1 | Graph Database - Trying out Neo4J
 2 | Author: Saurav Prateek
 3 | 
 4 | Introduction
 5 | You must have heard of Graph data structures before. If not, a graph data structure stores data in the format of Nodes and Relationships between them which are also called Edges. A Graph database is similar to this structure which also stores relationship details between multiple entities which can be viewed in a graphical format. These structures can prove to be white-board friendly.
 6 | 
 7 | Neo4J is a popular Graph database that stores nodes and relationships instead of classic tables or documents. As we mentioned before, the stored data is white-board friendly which means we can easily sketch the structure and ideas out on a whiteboard.
 8 | 
 9 | Data Model for Neo4J
10 | Let’s explore what the Data Model in a Neo4J database is composed of. In Neo4J the information is organized as nodes, relationships and properties.
11 | 
12 | Nodes
13 | Nodes can be considered as the core entities in the Graph database. Each node can have a Label which determines the Type of node in our database. These nodes can also have multiple properties.
14 | 
15 | A basic Node can look like this in a Neo4J database.
16 | 
17 | Properties
18 | As we discussed, every Node can be associated with a set of properties. The properties associated with every node can be described in the form of key-value pairs.
19 | 
20 | Relationships
21 | Nodes can be related to each other through Relationships in the Neo4J database. This is equivalent to the Edges in the graph data structure. In the Graph data-structure we could add an undirected edge between two nodes but in Neo4J it’s mandatory to add a direction while creating a relationship between two nodes.
22 | 
23 | Although Neo4J doesn’t allow us to create an undirected relationship, we can query an undirected relationship which allows Neo4J to ignore any particular direction and retrieve the relationship and connected nodes, no matter what the physical direction is.
24 | 
25 | This allows the queries to be flexible and not force the user to know the physical direction of the relationship stored in the database.
26 | 
27 | Cypher - A query language for Neo4J database
28 | Cypher is Neo4J’s graph query language that lets you retrieve data from the graph. Let’s try out Cypher to:
29 | 
30 | Create Nodes with a specific set of Properties.
31 | Relationships between the Nodes.
32 | Query / Visualize the Nodes and their Relationships created.
33 | 
34 | Creating Nodes with a specific set of Properties
35 | Let’s write queries in Cypher to create some Nodes with a specific set of Properties.
36 | 
37 | Create 3 Nodes of Type: Person
38 | 
39 | The above Cypher query will create 3 Person nodes with the mentioned properties. I query the database to check if the nodes were created and it looks like this.
40 | 
41 | Create a Node of Type: Post
42 | 
43 | The above Cypher query will create 1 Post node with the mentioned properties. I query the database to check if the nodes were created and it looks like this.
44 | 
45 | Creating Relationships between the Nodes
46 | Let’s write queries in Cypher to create Relationships between the previously created nodes. We will be creating the following nodes.
47 | 
48 | Person: Max IS_FOLLOWED_BY Person: Dave
49 | Person: Max FOLLOWS Person: Alex
50 | Person: Max SHARED Post: “Hey I am Saurav!”
51 | Person: Dave LIKED Post: “Hey I am Saurav!”
52 | Person: Alex COMMENTED_ON Post: “Hey I am Saurav!”
53 | 
54 | Create 4 Relationships between Nodes
55 | 
56 | The above Cypher query will create 4 Relationships between the nodes. I query the database to check if the relationships were created and it looks like this.
57 | 
58 | 
59 | We created an entire Graph where we have multiple People who can follow each other and can Like, Share or Comment on a Post.
60 | 
61 | I hope this hands-on was helpful!
62 | 
63 | 
64 | 
65 | 


--------------------------------------------------------------------------------
/prompt-chaining-design-pattern/agent.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "from typing_extensions import TypedDict\n",
 10 |     "from langchain_openai import ChatOpenAI\n",
 11 |     "from langchain_core.prompts import ChatPromptTemplate\n",
 12 |     "from langgraph.graph import END, StateGraph, START\n",
 13 |     "from langchain_core.output_parsers import StrOutputParser\n",
 14 |     "\n",
 15 |     "from dotenv import load_dotenv"
 16 |    ]
 17 |   },
 18 |   {
 19 |    "cell_type": "code",
 20 |    "execution_count": 2,
 21 |    "metadata": {},
 22 |    "outputs": [],
 23 |    "source": [
 24 |     "class SharedState(TypedDict):\n",
 25 |     "    query: str\n",
 26 |     "    model: ChatOpenAI\n",
 27 |     "    from_ml_topic: bool\n",
 28 |     "    ai_answer: str\n",
 29 |     "\n",
 30 |     "class GraderOutput(TypedDict):\n",
 31 |     "    from_machine_learning_topic: bool"
 32 |    ]
 33 |   },
 34 |   {
 35 |    "cell_type": "code",
 36 |    "execution_count": 3,
 37 |    "metadata": {},
 38 |    "outputs": [],
 39 |    "source": [
 40 |     "def build_model(shared_state: SharedState):\n",
 41 |     "    model = ChatOpenAI(model = 'gpt-4o-mini')\n",
 42 |     "    shared_state['model'] = model\n",
 43 |     "    return shared_state"
 44 |    ]
 45 |   },
 46 |   {
 47 |    "cell_type": "code",
 48 |    "execution_count": 5,
 49 |    "metadata": {},
 50 |    "outputs": [],
 51 |    "source": [
 52 |     "def get_query_topic(shared_state: SharedState):\n",
 53 |     "    print(\"Determining if the query is related to machine learning topics...\")\n",
 54 |     "    \n",
 55 |     "    prompt = \"\"\"\n",
 56 |     "You are a classifier that determines if a user's query is related to machine learning topics.\n",
 57 |     "Given the user's query, return True if it is related to machine learning topics, otherwise return False.\n",
 58 |     "    \"\"\"\n",
 59 |     "\n",
 60 |     "    model = shared_state['model']\n",
 61 |     "    structured_llm_grader = model.with_structured_output(GraderOutput)\n",
 62 |     "    query = shared_state['query']\n",
 63 |     "\n",
 64 |     "    grade_prompt = ChatPromptTemplate.from_messages(\n",
 65 |     "        [\n",
 66 |     "            (\"system\", prompt),\n",
 67 |     "            (\"human\", \"User's Query: \\n\\n {query}\"),\n",
 68 |     "        ]\n",
 69 |     "    )\n",
 70 |     "\n",
 71 |     "    retrieval_grader = grade_prompt | structured_llm_grader\n",
 72 |     "\n",
 73 |     "    result = retrieval_grader.invoke({\"query\": query})\n",
 74 |     "    shared_state['from_ml_topic'] = result['from_machine_learning_topic']\n",
 75 |     "\n",
 76 |     "    return shared_state\n"
 77 |    ]
 78 |   },
 79 |   {
 80 |    "cell_type": "code",
 81 |    "execution_count": 6,
 82 |    "metadata": {},
 83 |    "outputs": [],
 84 |    "source": [
 85 |     "def grader_node(shared_state: SharedState):\n",
 86 |     "    if shared_state['from_ml_topic']:\n",
 87 |     "        return \"continue\"\n",
 88 |     "\n",
 89 |     "    return \"exit\""
 90 |    ]
 91 |   },
 92 |   {
 93 |    "cell_type": "code",
 94 |    "execution_count": 7,
 95 |    "metadata": {},
 96 |    "outputs": [],
 97 |    "source": [
 98 |     "def answer_query(shared_state: SharedState):\n",
 99 |     "    print(\"Answering the user's query...\")\n",
100 |     "    prompt = \"\"\"\n",
101 |     "You are an expert in machine learning. Answer the user's query under 200 words.\n",
102 |     "    \"\"\"\n",
103 |     "    model = shared_state['model']\n",
104 |     "    query = shared_state['query']\n",
105 |     "    answer_prompt = ChatPromptTemplate.from_messages(\n",
106 |     "        [\n",
107 |     "            (\"system\", prompt),\n",
108 |     "            (\"human\", \"User's Query: \\n\\n {query}\"),\n",
109 |     "        ]\n",
110 |     "    )\n",
111 |     "    answer_chain = answer_prompt | model | StrOutputParser()\n",
112 |     "    result = answer_chain.invoke({\"query\": query})\n",
113 |     "    shared_state['ai_answer'] = result\n",
114 |     "\n",
115 |     "    return shared_state"
116 |    ]
117 |   },
118 |   {
119 |    "cell_type": "code",
120 |    "execution_count": 8,
121 |    "metadata": {},
122 |    "outputs": [],
123 |    "source": [
124 |     "def build_graph():\n",
125 |     "    workflow = StateGraph(SharedState)\n",
126 |     "\n",
127 |     "    # Add Nodes\n",
128 |     "    workflow.add_node(build_model, \"build_model\")\n",
129 |     "    workflow.add_node(get_query_topic, \"get_query_topic\")\n",
130 |     "    workflow.add_node(answer_query, \"answer_query\")\n",
131 |     "\n",
132 |     "    workflow.add_edge(START, \"build_model\")\n",
133 |     "    workflow.add_edge(\"build_model\", \"get_query_topic\")\n",
134 |     "    workflow.add_conditional_edges(\n",
135 |     "        \"get_query_topic\", \n",
136 |     "        grader_node, \n",
137 |     "        { \n",
138 |     "            \"continue\": \"answer_query\",\n",
139 |     "            \"exit\": END \n",
140 |     "        }\n",
141 |     "    )\n",
142 |     "    workflow.add_edge(\"answer_query\", END)\n",
143 |     "\n",
144 |     "    return workflow.compile()"
145 |    ]
146 |   },
147 |   {
148 |    "cell_type": "code",
149 |    "execution_count": 13,
150 |    "metadata": {},
151 |    "outputs": [],
152 |    "source": [
153 |     "# Query 1: \"What are the latest advancements in machine learning?\"\n",
154 |     "# Query 2: \"What is the capital of India?\"\n",
155 |     "def execute_prompt_chain_workflow():\n",
156 |     "    workflow = build_graph()\n",
157 |     "    initial_state: SharedState = {\n",
158 |     "        \"query\": \"What is the capital of India?\",\n",
159 |     "    }\n",
160 |     "\n",
161 |     "    agent_response = workflow.invoke(initial_state)\n",
162 |     "    print(agent_response)\n",
163 |     "\n",
164 |     "    if agent_response['from_ml_topic']:\n",
165 |     "        print(\"AI's Answer:\", agent_response['ai_answer'])\n",
166 |     "    else:\n",
167 |     "        print(\"The query is not related to machine learning topics.\")\n"
168 |    ]
169 |   },
170 |   {
171 |    "cell_type": "code",
172 |    "execution_count": 14,
173 |    "metadata": {},
174 |    "outputs": [
175 |     {
176 |      "name": "stdout",
177 |      "output_type": "stream",
178 |      "text": [
179 |       "Determining if the query is related to machine learning topics...\n",
180 |       "{'query': 'What is the capital of India?', 'model': ChatOpenAI(client=<openai.resources.chat.completions.completions.Completions object at 0x121b37ce0>, async_client=<openai.resources.chat.completions.completions.AsyncCompletions object at 0x1225d11d0>, root_client=<openai.OpenAI object at 0x12231a490>, root_async_client=<openai.AsyncOpenAI object at 0x12231bed0>, model_name='gpt-4o-mini', model_kwargs={}, openai_api_key=SecretStr('**********')), 'from_ml_topic': False}\n",
181 |       "The query is not related to machine learning topics.\n"
182 |      ]
183 |     }
184 |    ],
185 |    "source": [
186 |     "load_dotenv()\n",
187 |     "execute_prompt_chain_workflow()"
188 |    ]
189 |   }
190 |  ],
191 |  "metadata": {
192 |   "kernelspec": {
193 |    "display_name": "saurav-env",
194 |    "language": "python",
195 |    "name": "python3"
196 |   },
197 |   "language_info": {
198 |    "codemirror_mode": {
199 |     "name": "ipython",
200 |     "version": 3
201 |    },
202 |    "file_extension": ".py",
203 |    "mimetype": "text/x-python",
204 |    "name": "python",
205 |    "nbconvert_exporter": "python",
206 |    "pygments_lexer": "ipython3",
207 |    "version": "3.13.1"
208 |   }
209 |  },
210 |  "nbformat": 4,
211 |  "nbformat_minor": 2
212 | }
213 | 


--------------------------------------------------------------------------------
/prompt-chaining-design-pattern/design.png:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SauravP97/AI-Engineering-101/69b0c825ea2cfc9a43696192230878f9ee3f1dd3/prompt-chaining-design-pattern/design.png


--------------------------------------------------------------------------------
/routing-design-pattern/agent.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "metadata": {},
  7 |    "outputs": [
  8 |     {
  9 |      "name": "stderr",
 10 |      "output_type": "stream",
 11 |      "text": [
 12 |       "USER_AGENT environment variable not set, consider setting it to identify your requests.\n"
 13 |      ]
 14 |     }
 15 |    ],
 16 |    "source": [
 17 |     "from pydantic import BaseModel\n",
 18 |     "from typing_extensions import Literal, TypedDict\n",
 19 |     "from pydantic import Field\n",
 20 |     "from langchain_openai import ChatOpenAI\n",
 21 |     "from langgraph.graph import END, StateGraph, START\n",
 22 |     "from langchain_core.prompts import ChatPromptTemplate\n",
 23 |     "from langchain_community.document_loaders import WebBaseLoader\n",
 24 |     "\n",
 25 |     "from dotenv import load_dotenv"
 26 |    ]
 27 |   },
 28 |   {
 29 |    "cell_type": "code",
 30 |    "execution_count": 2,
 31 |    "metadata": {},
 32 |    "outputs": [],
 33 |    "source": [
 34 |     "class RouterOutput(TypedDict):\n",
 35 |     "    output: Literal['github', 'medium', 'none'] = Field(\n",
 36 |     "        default='none',\n",
 37 |     "        description=\"The output destination, either 'github' or 'medium'. If neither is suitable, use 'none'.\"\n",
 38 |     "    ),\n",
 39 |     "    user_handle: str = Field(\n",
 40 |     "        default='none',\n",
 41 |     "        description=\"The user's profile handle for the specified output destination.\"\n",
 42 |     "    )"
 43 |    ]
 44 |   },
 45 |   {
 46 |    "cell_type": "code",
 47 |    "execution_count": 4,
 48 |    "metadata": {},
 49 |    "outputs": [],
 50 |    "source": [
 51 |     "class SharedState(TypedDict):\n",
 52 |     "    query: str\n",
 53 |     "    user_handle: str\n",
 54 |     "    destination: Literal['github', 'medium']\n",
 55 |     "    profile_summary: str\n",
 56 |     "    model: ChatOpenAI"
 57 |    ]
 58 |   },
 59 |   {
 60 |    "cell_type": "code",
 61 |    "execution_count": 5,
 62 |    "metadata": {},
 63 |    "outputs": [],
 64 |    "source": [
 65 |     "def build_model(shared_state: SharedState):\n",
 66 |     "    shared_state['model'] = ChatOpenAI(model=\"gpt-4o\", temperature=0)\n",
 67 |     "    return shared_state"
 68 |    ]
 69 |   },
 70 |   {
 71 |    "cell_type": "code",
 72 |    "execution_count": 6,
 73 |    "metadata": {},
 74 |    "outputs": [],
 75 |    "source": [
 76 |     "def router_agent(shared_state: SharedState):\n",
 77 |     "    prompt = f\"\"\"\n",
 78 |     "    You are an expert social media profile assistant. Based on the user's preference, you will determine whether to pull their GitHub or Medium profile details.\n",
 79 |     "    \n",
 80 |     "    Ensure your response is a valid JSON object matching the RouterOutput schema.\n",
 81 |     "    \"\"\"\n",
 82 |     "\n",
 83 |     "    model_with_structured_output = shared_state['model'].with_structured_output(RouterOutput)\n",
 84 |     "    grade_prompt = ChatPromptTemplate.from_messages(\n",
 85 |     "        [\n",
 86 |     "            (\"system\", prompt),\n",
 87 |     "            (\"human\", \"User's Query: \\n\\n {query}\"),\n",
 88 |     "        ]\n",
 89 |     "    )\n",
 90 |     "\n",
 91 |     "    retrieval_grader = grade_prompt | model_with_structured_output\n",
 92 |     "    response = retrieval_grader.invoke({\"query\": shared_state['query']})\n",
 93 |     "    shared_state['destination'] = response[\"output\"]\n",
 94 |     "    shared_state['user_handle'] = response[\"user_handle\"]\n",
 95 |     "\n",
 96 |     "    return shared_state"
 97 |    ]
 98 |   },
 99 |   {
100 |    "cell_type": "code",
101 |    "execution_count": 15,
102 |    "metadata": {},
103 |    "outputs": [
104 |     {
105 |      "name": "stdout",
106 |      "output_type": "stream",
107 |      "text": [
108 |       "medium SauravP97\n"
109 |      ]
110 |     }
111 |    ],
112 |    "source": [
113 |     "load_dotenv()\n",
114 |     "shared_state = build_model({\n",
115 |     "    \"query\": \"Can you summarize medium profile of SauravP97\",\n",
116 |     "})\n",
117 |     "router_agent(shared_state)\n",
118 |     "print(shared_state['destination'], shared_state['user_handle'])"
119 |    ]
120 |   },
121 |   {
122 |    "cell_type": "code",
123 |    "execution_count": 16,
124 |    "metadata": {},
125 |    "outputs": [],
126 |    "source": [
127 |     "# Conditional edge function to route to the appropriate node\n",
128 |     "def route_destination(shared_state: SharedState):\n",
129 |     "    # Return the node name you want to visit next\n",
130 |     "    if shared_state[\"destination\"] == \"github\":\n",
131 |     "        return \"summarize_github_profile\"\n",
132 |     "    elif shared_state[\"destination\"] == \"medium\":\n",
133 |     "        return \"summarize_medium_profile\"\n",
134 |     "    else:\n",
135 |     "        return \"end_node\""
136 |    ]
137 |   },
138 |   {
139 |    "cell_type": "code",
140 |    "execution_count": 17,
141 |    "metadata": {},
142 |    "outputs": [],
143 |    "source": [
144 |     "# Helper Functions\n",
145 |     "def read_github_profile(user_handle: str):\n",
146 |     "    '''Reads the GitHub profile content for the given GitHub handle.'''\n",
147 |     "    print('Reading GitHub profile content...')\n",
148 |     "    github_profile_url = 'https://www.github.com/' + user_handle\n",
149 |     "    documents = WebBaseLoader(github_profile_url).load()\n",
150 |     "    page_content = ''\n",
151 |     "\n",
152 |     "    for document in documents:\n",
153 |     "        page_content += document.page_content\n",
154 |     "    \n",
155 |     "    return page_content.strip()\n",
156 |     "\n",
157 |     "def read_medium_profile(user_handle: str):\n",
158 |     "    '''Reads the Medium profile content for the given Medium user handle.'''\n",
159 |     "    print('Reading Medium profile content...')\n",
160 |     "    medium_profile_url = f'https://{user_handle}.medium.com'\n",
161 |     "    documents = WebBaseLoader(medium_profile_url).load()\n",
162 |     "    page_content = ''\n",
163 |     "\n",
164 |     "    for document in documents:\n",
165 |     "        page_content += document.page_content\n",
166 |     "    \n",
167 |     "    return page_content.strip()"
168 |    ]
169 |   },
170 |   {
171 |    "cell_type": "code",
172 |    "execution_count": 18,
173 |    "metadata": {},
174 |    "outputs": [],
175 |    "source": [
176 |     "def summarize_github_profile(shared_state: SharedState) -> SharedState:\n",
177 |     "    '''Summarizes the GitHub profile content.'''\n",
178 |     "    print('Summarizing GitHub profile content...') \n",
179 |     "    model = shared_state['model']\n",
180 |     "    github_profile_content = read_github_profile(shared_state['user_handle'])   \n",
181 |     "\n",
182 |     "    prompt = f'''Summarize the following GitHub profile content in a concise manner\n",
183 |     "    and highlight name, organization, followers, location, contact information key skills, \n",
184 |     "    projects, and contributions.\n",
185 |     "\n",
186 |     "    Github Profile Content: {github_profile_content}\n",
187 |     "    '''\n",
188 |     "    response = model.invoke(prompt)\n",
189 |     "    shared_state['profile_summary'] = response.content.strip()\n",
190 |     "\n",
191 |     "    return shared_state\n"
192 |    ]
193 |   },
194 |   {
195 |    "cell_type": "code",
196 |    "execution_count": 19,
197 |    "metadata": {},
198 |    "outputs": [],
199 |    "source": [
200 |     "def summarize_medium_profile(shared_state: SharedState) -> SharedState:\n",
201 |     "    '''Summarizes the Medium profile content.'''\n",
202 |     "    print('Summarizing Medium profile content...')\n",
203 |     "    model = shared_state['model']\n",
204 |     "    medium_profile_content = read_medium_profile(shared_state['user_handle'])\n",
205 |     "\n",
206 |     "    prompt = f'''Summarize the following Medium profile content in a concise manner\n",
207 |     "    and highlight name, followers, posted articles, and description.\n",
208 |     "\n",
209 |     "    Medium Profile Content: {medium_profile_content}\n",
210 |     "    '''\n",
211 |     "    response = model.invoke(prompt)\n",
212 |     "    shared_state['profile_summary'] = response.content.strip()\n",
213 |     "\n",
214 |     "    return shared_state\n"
215 |    ]
216 |   },
217 |   {
218 |    "cell_type": "code",
219 |    "execution_count": 20,
220 |    "metadata": {},
221 |    "outputs": [],
222 |    "source": [
223 |     "def build_graph():\n",
224 |     "    workflow = StateGraph(SharedState)\n",
225 |     "\n",
226 |     "    # Add Nodes\n",
227 |     "    workflow.add_node(build_model, \"build_model\")\n",
228 |     "    workflow.add_node(router_agent, \"router_agent\")\n",
229 |     "    workflow.add_node(summarize_github_profile, \"summarize_github_profile\")\n",
230 |     "    workflow.add_node(summarize_medium_profile, \"summarize_medium_profile\")\n",
231 |     "\n",
232 |     "    workflow.add_edge(START, \"build_model\")\n",
233 |     "    workflow.add_edge(\"build_model\", \"router_agent\")\n",
234 |     "    workflow.add_conditional_edges(\n",
235 |     "        \"router_agent\", \n",
236 |     "        route_destination, \n",
237 |     "        { \n",
238 |     "            \"summarize_github_profile\": \"summarize_github_profile\",\n",
239 |     "            \"summarize_medium_profile\": \"summarize_medium_profile\",\n",
240 |     "            \"end_node\": END\n",
241 |     "        }\n",
242 |     "    )\n",
243 |     "    workflow.add_edge(\"summarize_github_profile\", END)\n",
244 |     "    workflow.add_edge(\"summarize_medium_profile\", END)\n",
245 |     "\n",
246 |     "    return workflow.compile()"
247 |    ]
248 |   },
249 |   {
250 |    "cell_type": "code",
251 |    "execution_count": 23,
252 |    "metadata": {},
253 |    "outputs": [
254 |     {
255 |      "name": "stdout",
256 |      "output_type": "stream",
257 |      "text": [
258 |       "\n",
259 |       " Profile Type: none\n",
260 |       "\n",
261 |       "\n",
262 |       " No suitable profile found.\n"
263 |      ]
264 |     }
265 |    ],
266 |    "source": [
267 |     "load_dotenv()\n",
268 |     "compiled_graph = build_graph()\n",
269 |     "workflow_response: SharedState = compiled_graph.invoke(\n",
270 |     "    {\n",
271 |     "        \"query\": \"Summarize the linkedin profile for user handle Saurav\"\n",
272 |     "    }\n",
273 |     ")\n",
274 |     "\n",
275 |     "print(f\"\\n Profile Type: {workflow_response['destination']}\")\n",
276 |     "if workflow_response['destination'] != 'none':\n",
277 |     "    print(f\"\\n\\n Profile Summary: {workflow_response['profile_summary']}\")\n",
278 |     "else:\n",
279 |     "    print(\"\\n\\n No suitable profile found.\")"
280 |    ]
281 |   }
282 |  ],
283 |  "metadata": {
284 |   "kernelspec": {
285 |    "display_name": "saurav-env",
286 |    "language": "python",
287 |    "name": "python3"
288 |   },
289 |   "language_info": {
290 |    "codemirror_mode": {
291 |     "name": "ipython",
292 |     "version": 3
293 |    },
294 |    "file_extension": ".py",
295 |    "mimetype": "text/x-python",
296 |    "name": "python",
297 |    "nbconvert_exporter": "python",
298 |    "pygments_lexer": "ipython3",
299 |    "version": "3.13.1"
300 |   }
301 |  },
302 |  "nbformat": 4,
303 |  "nbformat_minor": 2
304 | }
305 | 


--------------------------------------------------------------------------------
/self-rag/__pycache__/prompts.cpython-313.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SauravP97/AI-Engineering-101/69b0c825ea2cfc9a43696192230878f9ee3f1dd3/self-rag/__pycache__/prompts.cpython-313.pyc


--------------------------------------------------------------------------------
/self-rag/prompts.py:
--------------------------------------------------------------------------------
 1 | DOCUMENT_GRADER_PROMPT = """
 2 |     You are a grader assessing relevance of a retrieved document to a user question. \n 
 3 |     It does not need to be a stringent test. The goal is to filter out erroneous retrievals. \n
 4 |     If the document contains keyword(s) or semantic meaning related to the user question, grade it as relevant. \n
 5 |     Give a binary score 'yes' or 'no' score to indicate whether the document is relevant to the question.
 6 | """
 7 | 
 8 | HALLUCINATION_GRADER_PROMPT = """
 9 |     You are a grader assessing whether an LLM generation is grounded in / supported by a set of retrieved facts. \n 
10 | 
11 |     Give a binary score 'yes' or 'no'. 'Yes' means that the answer is grounded in / supported by the set of facts.
12 | """
13 | 
14 | ANSWER_GRADER_PROMPT = """
15 |     You are a grader assessing whether an answer addresses / resolves a question \n 
16 |     Give a binary score 'yes' or 'no'. Yes' means that the answer resolves the question.
17 | """


--------------------------------------------------------------------------------
/text-to-sql/Chinook.db:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SauravP97/AI-Engineering-101/69b0c825ea2cfc9a43696192230878f9ee3f1dd3/text-to-sql/Chinook.db


--------------------------------------------------------------------------------
/tool-calls/__pycache__/article.cpython-313.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SauravP97/AI-Engineering-101/69b0c825ea2cfc9a43696192230878f9ee3f1dd3/tool-calls/__pycache__/article.cpython-313.pyc


--------------------------------------------------------------------------------
/tool-calls/__pycache__/articles.cpython-313.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/SauravP97/AI-Engineering-101/69b0c825ea2cfc9a43696192230878f9ee3f1dd3/tool-calls/__pycache__/articles.cpython-313.pyc


--------------------------------------------------------------------------------
/tool-calls/articles.py:
--------------------------------------------------------------------------------
 1 | import datetime
 2 | 
 3 | ARTICLE_DB_RESULTS = [
 4 |     {
 5 |         "title": "Understanding AI in 2025",
 6 |         "link": "https://example.com/article1",
 7 |         "views": 1000,
 8 |         "likes": 102,
 9 |         "published_date": datetime.datetime(2025, 1, 10)
10 |     },
11 |     {
12 |         "title": "The Future of Quantum Computing",
13 |         "link": "https://example.com/article2",
14 |         "views": 1500,
15 |         "likes": 250,
16 |         "published_date": datetime.datetime(2025, 6, 15)
17 |     },
18 |     {
19 |         "title": "Advancements in Renewable Energy",
20 |         "link": "https://example.com/article3",
21 |         "views": 800,
22 |         "likes": 80,
23 |         "published_date": datetime.datetime(2025, 4, 20)
24 |     },
25 |     {
26 |         "title": "The Rise of Autonomous Vehicles",
27 |         "link": "https://example.com/article4",
28 |         "views": 1200,
29 |         "likes": 300,
30 |         "published_date": datetime.datetime(2025, 3, 5)
31 |     },
32 |     {
33 |         "title": "Exploring the Metaverse",
34 |         "link": "https://example.com/article5",
35 |         "views": 900,
36 |         "likes": 150,
37 |         "published_date": datetime.datetime(2025, 2, 28)
38 |     },
39 |     {
40 |         "title": "AI Ethics and Governance",
41 |         "link": "https://example.com/article6",
42 |         "views": 1100,
43 |         "likes": 200,
44 |         "published_date": datetime.datetime(2025, 5, 10)
45 |     },
46 |     {
47 |         "title": "The Impact of 5G Technology",
48 |         "link": "https://example.com/article7",
49 |         "views": 950,
50 |         "likes": 120,
51 |         "published_date": datetime.datetime(2024, 7, 1)
52 |     },
53 |     {
54 |         "title": "Blockchain Beyond Cryptocurrency",
55 |         "link": "https://example.com/article8",
56 |         "views": 1300,
57 |         "likes": 400,
58 |         "published_date": datetime.datetime(2024, 8, 20)
59 |     }
60 | ]


--------------------------------------------------------------------------------
/tool-calls/newsletter-db-call.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "from langchain_openai import ChatOpenAI\n",
 10 |     "from dotenv import load_dotenv\n",
 11 |     "from langchain_core.tools import tool"
 12 |    ]
 13 |   },
 14 |   {
 15 |    "cell_type": "code",
 16 |    "execution_count": 2,
 17 |    "metadata": {},
 18 |    "outputs": [],
 19 |    "source": [
 20 |     "import datetime\n",
 21 |     "from articles import ARTICLE_DB_RESULTS"
 22 |    ]
 23 |   },
 24 |   {
 25 |    "cell_type": "code",
 26 |    "execution_count": 3,
 27 |    "metadata": {},
 28 |    "outputs": [
 29 |     {
 30 |      "data": {
 31 |       "text/plain": [
 32 |        "8"
 33 |       ]
 34 |      },
 35 |      "execution_count": 3,
 36 |      "metadata": {},
 37 |      "output_type": "execute_result"
 38 |     }
 39 |    ],
 40 |    "source": [
 41 |     "len(ARTICLE_DB_RESULTS)"
 42 |    ]
 43 |   },
 44 |   {
 45 |    "cell_type": "code",
 46 |    "execution_count": 4,
 47 |    "metadata": {},
 48 |    "outputs": [],
 49 |    "source": [
 50 |     "@tool\n",
 51 |     "def get_top_article_by_view(number_of_articles: int) -> list:\n",
 52 |     "    \"\"\"Get the top article by views. Return top `number_of_articles` articles\"\"\"\n",
 53 |     "    top_articles = sorted(ARTICLE_DB_RESULTS, key=lambda x: x['views'], reverse=True)\n",
 54 |     "    \n",
 55 |     "    return top_articles[:number_of_articles]\n",
 56 |     "\n",
 57 |     "\n",
 58 |     "@tool\n",
 59 |     "def get_top_article_by_likes(number_of_articles: int) -> list:\n",
 60 |     "    \"\"\"Get the top article by likes. Return top `number_of_articles` articles\"\"\"\n",
 61 |     "    top_articles = sorted(ARTICLE_DB_RESULTS, key=lambda x: x['likes'], reverse=True)\n",
 62 |     "    \n",
 63 |     "    return top_articles[:number_of_articles]\n",
 64 |     "\n",
 65 |     "\n",
 66 |     "@tool\n",
 67 |     "def get_most_recent_articles(number_of_articles: int) -> list:\n",
 68 |     "    \"\"\"Get the most recent articles. Return top `number_of_articles` articles\"\"\"\n",
 69 |     "    top_articles = sorted(\n",
 70 |     "        ARTICLE_DB_RESULTS, \n",
 71 |     "        key=lambda x: x['published_date'], \n",
 72 |     "        reverse=True)\n",
 73 |     "    \n",
 74 |     "    return top_articles[:number_of_articles]\n",
 75 |     "\n",
 76 |     "\n",
 77 |     "@tool\n",
 78 |     "def get_all_articles() -> list:\n",
 79 |     "    \"\"\"Get all articles.\"\"\"\n",
 80 |     "    return ARTICLE_DB_RESULTS"
 81 |    ]
 82 |   },
 83 |   {
 84 |    "cell_type": "code",
 85 |    "execution_count": 5,
 86 |    "metadata": {},
 87 |    "outputs": [],
 88 |    "source": [
 89 |     "def execute_tool_calls(tool_calls: list) -> list:\n",
 90 |     "    result = []\n",
 91 |     "\n",
 92 |     "    for tool_call in tool_calls:\n",
 93 |     "        print(f'Executing Tool call: {tool_call['name']}')\n",
 94 |     "        print(f'Arguments: {tool_call[\"args\"]}')\n",
 95 |     "        \n",
 96 |     "        if tool_call['name'] == 'get_top_article_by_view':\n",
 97 |     "            result.append({\n",
 98 |     "                'name': tool_call['name'],\n",
 99 |     "                'result': get_top_article_by_view.invoke(tool_call['args'])\n",
100 |     "            })\n",
101 |     "        \n",
102 |     "        if tool_call['name'] == 'get_top_article_by_likes':\n",
103 |     "            result.append({\n",
104 |     "                'name': tool_call['name'],\n",
105 |     "                'result': get_top_article_by_likes.invoke(tool_call['args'])\n",
106 |     "            }) \n",
107 |     "        \n",
108 |     "        if tool_call['name'] == 'get_most_recent_articles':\n",
109 |     "            result.append({\n",
110 |     "                'name': tool_call['name'],\n",
111 |     "                'result': get_most_recent_articles.invoke(tool_call['args'])\n",
112 |     "            })\n",
113 |     "        \n",
114 |     "        if tool_call['name'] == 'get_all_articles':\n",
115 |     "            result.append({\n",
116 |     "                'name': tool_call['name'],\n",
117 |     "                'result': get_all_articles.invoke(tool_call['args'])\n",
118 |     "            })\n",
119 |     "        \n",
120 |     "    return result"
121 |    ]
122 |   },
123 |   {
124 |    "cell_type": "code",
125 |    "execution_count": 6,
126 |    "metadata": {},
127 |    "outputs": [],
128 |    "source": [
129 |     "def initiate_the_agent(query, debug_mode = False) -> list:\n",
130 |     "    \"\"\"Initiate the agent with the given query.\"\"\"\n",
131 |     "    load_dotenv()\n",
132 |     "\n",
133 |     "    tools = [\n",
134 |     "        get_top_article_by_view,\n",
135 |     "        get_top_article_by_likes,\n",
136 |     "        get_most_recent_articles,\n",
137 |     "        get_all_articles\n",
138 |     "    ]\n",
139 |     "\n",
140 |     "    llm = ChatOpenAI(model=\"gpt-3.5-turbo\", temperature=0.0)\n",
141 |     "    llm_with_tools = llm.bind_tools(tools)\n",
142 |     "    \n",
143 |     "    response = llm_with_tools.invoke(query)\n",
144 |     "    \n",
145 |     "    if debug_mode:\n",
146 |     "        print(response.tool_calls)\n",
147 |     "    \n",
148 |     "    return execute_tool_calls(response.tool_calls)"
149 |    ]
150 |   },
151 |   {
152 |    "cell_type": "code",
153 |    "execution_count": 8,
154 |    "metadata": {},
155 |    "outputs": [
156 |     {
157 |      "name": "stdout",
158 |      "output_type": "stream",
159 |      "text": [
160 |       "[{'name': 'get_top_article_by_likes', 'args': {'number_of_articles': 3}, 'id': 'call_L7KUpn99X47sbRBqRraW9idA', 'type': 'tool_call'}, {'name': 'get_top_article_by_view', 'args': {'number_of_articles': 3}, 'id': 'call_BbSXbHFQ62FNy6rtEhFOFcNu', 'type': 'tool_call'}]\n",
161 |       "Executing Tool call: get_top_article_by_likes\n",
162 |       "Arguments: {'number_of_articles': 3}\n",
163 |       "Executing Tool call: get_top_article_by_view\n",
164 |       "Arguments: {'number_of_articles': 3}\n"
165 |      ]
166 |     },
167 |     {
168 |      "data": {
169 |       "text/plain": [
170 |        "[{'name': 'get_top_article_by_likes',\n",
171 |        "  'result': [{'title': 'Blockchain Beyond Cryptocurrency',\n",
172 |        "    'link': 'https://example.com/article8',\n",
173 |        "    'views': 1300,\n",
174 |        "    'likes': 400,\n",
175 |        "    'published_date': datetime.datetime(2024, 8, 20, 0, 0)},\n",
176 |        "   {'title': 'The Rise of Autonomous Vehicles',\n",
177 |        "    'link': 'https://example.com/article4',\n",
178 |        "    'views': 1200,\n",
179 |        "    'likes': 300,\n",
180 |        "    'published_date': datetime.datetime(2025, 3, 5, 0, 0)},\n",
181 |        "   {'title': 'The Future of Quantum Computing',\n",
182 |        "    'link': 'https://example.com/article2',\n",
183 |        "    'views': 1500,\n",
184 |        "    'likes': 250,\n",
185 |        "    'published_date': datetime.datetime(2025, 6, 15, 0, 0)}]},\n",
186 |        " {'name': 'get_top_article_by_view',\n",
187 |        "  'result': [{'title': 'The Future of Quantum Computing',\n",
188 |        "    'link': 'https://example.com/article2',\n",
189 |        "    'views': 1500,\n",
190 |        "    'likes': 250,\n",
191 |        "    'published_date': datetime.datetime(2025, 6, 15, 0, 0)},\n",
192 |        "   {'title': 'Blockchain Beyond Cryptocurrency',\n",
193 |        "    'link': 'https://example.com/article8',\n",
194 |        "    'views': 1300,\n",
195 |        "    'likes': 400,\n",
196 |        "    'published_date': datetime.datetime(2024, 8, 20, 0, 0)},\n",
197 |        "   {'title': 'The Rise of Autonomous Vehicles',\n",
198 |        "    'link': 'https://example.com/article4',\n",
199 |        "    'views': 1200,\n",
200 |        "    'likes': 300,\n",
201 |        "    'published_date': datetime.datetime(2025, 3, 5, 0, 0)}]}]"
202 |       ]
203 |      },
204 |      "execution_count": 8,
205 |      "metadata": {},
206 |      "output_type": "execute_result"
207 |     }
208 |    ],
209 |    "source": [
210 |     "# query1: \"Can you give me top 3 most liked and viewed articles?\"\n",
211 |     "# query 2: \"Can you give me top 5 most recent articles?\"\n",
212 |     "response = initiate_the_agent(\"Can you give me top 3 most liked and viewed articles?\", True)\n",
213 |     "response"
214 |    ]
215 |   }
216 |  ],
217 |  "metadata": {
218 |   "kernelspec": {
219 |    "display_name": "saurav-env",
220 |    "language": "python",
221 |    "name": "python3"
222 |   },
223 |   "language_info": {
224 |    "codemirror_mode": {
225 |     "name": "ipython",
226 |     "version": 3
227 |    },
228 |    "file_extension": ".py",
229 |    "mimetype": "text/x-python",
230 |    "name": "python",
231 |    "nbconvert_exporter": "python",
232 |    "pygments_lexer": "ipython3",
233 |    "version": "3.13.1"
234 |   }
235 |  },
236 |  "nbformat": 4,
237 |  "nbformat_minor": 2
238 | }
239 | 


--------------------------------------------------------------------------------
/tool-calls/tool-call.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 1,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "from langchain_openai import ChatOpenAI\n",
 10 |     "from dotenv import load_dotenv"
 11 |    ]
 12 |   },
 13 |   {
 14 |    "cell_type": "code",
 15 |    "execution_count": 3,
 16 |    "metadata": {},
 17 |    "outputs": [
 18 |     {
 19 |      "data": {
 20 |       "text/plain": [
 21 |        "True"
 22 |       ]
 23 |      },
 24 |      "execution_count": 3,
 25 |      "metadata": {},
 26 |      "output_type": "execute_result"
 27 |     }
 28 |    ],
 29 |    "source": [
 30 |     "load_dotenv()"
 31 |    ]
 32 |   },
 33 |   {
 34 |    "cell_type": "code",
 35 |    "execution_count": 17,
 36 |    "metadata": {},
 37 |    "outputs": [],
 38 |    "source": [
 39 |     "from langchain_core.tools import tool\n",
 40 |     "\n",
 41 |     "@tool\n",
 42 |     "def multiply(x: float, y:float):\n",
 43 |     "    \"\"\" Multiplies two integers x and y \"\"\"\n",
 44 |     "    return x * y\n",
 45 |     "\n",
 46 |     "@tool\n",
 47 |     "def add(x: float, y:float):\n",
 48 |     "    \"\"\" Adds two integers x and y \"\"\"\n",
 49 |     "    return x + y\n",
 50 |     "\n",
 51 |     "@tool\n",
 52 |     "def subtract(x: float, y:float):\n",
 53 |     "    \"\"\" Subtract integers y from y \"\"\"\n",
 54 |     "    return x - y\n",
 55 |     "\n",
 56 |     "@tool\n",
 57 |     "def divide(x:float, y:float):\n",
 58 |     "    \"\"\" Divides x by y \"\"\"\n",
 59 |     "    if y == 0:\n",
 60 |     "        return \"Infinity\"\n",
 61 |     "    return x / y"
 62 |    ]
 63 |   },
 64 |   {
 65 |    "cell_type": "code",
 66 |    "execution_count": 30,
 67 |    "metadata": {},
 68 |    "outputs": [],
 69 |    "source": [
 70 |     "tools = [add, multiply, subtract, divide]\n",
 71 |     "model = ChatOpenAI(model=\"gpt-4\", temperature=0.0)\n",
 72 |     "model_with_tools = model.bind_tools(tools)\n",
 73 |     "\n",
 74 |     "# What is 2 + 3 and then multiply the result by 4?\n",
 75 |     "# 711 = 3555 * x, Find x?\n",
 76 |     "# 5.9 = x + 5.11, Find x?\n",
 77 |     "response = model_with_tools.invoke(\n",
 78 |     "    \"5.9 = x + 5.11, Find x?\"\n",
 79 |     ")"
 80 |    ]
 81 |   },
 82 |   {
 83 |    "cell_type": "code",
 84 |    "execution_count": 31,
 85 |    "metadata": {},
 86 |    "outputs": [
 87 |     {
 88 |      "name": "stdout",
 89 |      "output_type": "stream",
 90 |      "text": [
 91 |       "[{'name': 'subtract', 'args': {'x': 5.9, 'y': 5.11}, 'id': 'call_SHQmC54oV4q2m7FVZCUfE68m', 'type': 'tool_call'}]\n"
 92 |      ]
 93 |     }
 94 |    ],
 95 |    "source": [
 96 |     "print(response.tool_calls)"
 97 |    ]
 98 |   },
 99 |   {
100 |    "cell_type": "code",
101 |    "execution_count": 32,
102 |    "metadata": {},
103 |    "outputs": [
104 |     {
105 |      "name": "stdout",
106 |      "output_type": "stream",
107 |      "text": [
108 |       "Tool call result: 0.79\n"
109 |      ]
110 |     }
111 |    ],
112 |    "source": [
113 |     "def execute_tool_calls(tool_calls):\n",
114 |     "    for tool_call in tool_calls:\n",
115 |     "        if tool_call[\"name\"] == 'add':\n",
116 |     "            result = add.invoke(tool_call[\"args\"])\n",
117 |     "        if tool_call[\"name\"] == 'multiply':\n",
118 |     "            result = multiply.invoke(tool_call[\"args\"])\n",
119 |     "        if tool_call[\"name\"] == 'subtract':\n",
120 |     "            result = subtract.invoke(tool_call[\"args\"])\n",
121 |     "        if tool_call[\"name\"] == 'divide':\n",
122 |     "            result = divide.invoke(tool_call[\"args\"])\n",
123 |     "        \n",
124 |     "        print(f\"Tool call result: {result}\")\n",
125 |     "\n",
126 |     "execute_tool_calls(response.tool_calls)"
127 |    ]
128 |   }
129 |  ],
130 |  "metadata": {
131 |   "kernelspec": {
132 |    "display_name": "saurav-env",
133 |    "language": "python",
134 |    "name": "python3"
135 |   },
136 |   "language_info": {
137 |    "codemirror_mode": {
138 |     "name": "ipython",
139 |     "version": 3
140 |    },
141 |    "file_extension": ".py",
142 |    "mimetype": "text/x-python",
143 |    "name": "python",
144 |    "nbconvert_exporter": "python",
145 |    "pygments_lexer": "ipython3",
146 |    "version": "3.13.1"
147 |   }
148 |  },
149 |  "nbformat": 4,
150 |  "nbformat_minor": 2
151 | }
152 | 


--------------------------------------------------------------------------------
/vector-embeddings/vector_embed.ipynb:
--------------------------------------------------------------------------------
  1 | {
  2 |  "cells": [
  3 |   {
  4 |    "cell_type": "code",
  5 |    "execution_count": 4,
  6 |    "metadata": {},
  7 |    "outputs": [],
  8 |    "source": [
  9 |     "from google.genai import types\n",
 10 |     "from google import genai\n",
 11 |     "from dotenv import load_dotenv\n",
 12 |     "\n",
 13 |     "import numpy as np\n",
 14 |     "import os"
 15 |    ]
 16 |   },
 17 |   {
 18 |    "cell_type": "code",
 19 |    "execution_count": 6,
 20 |    "metadata": {},
 21 |    "outputs": [],
 22 |    "source": [
 23 |     "load_dotenv()\n",
 24 |     "API_KEY = os.getenv(\"API_KEY\")"
 25 |    ]
 26 |   },
 27 |   {
 28 |    "cell_type": "code",
 29 |    "execution_count": 7,
 30 |    "metadata": {},
 31 |    "outputs": [],
 32 |    "source": [
 33 |     "client = genai.Client(\n",
 34 |     "    api_key=API_KEY, http_options=types.HttpOptions(api_version=\"v1alpha\")\n",
 35 |     ")"
 36 |    ]
 37 |   },
 38 |   {
 39 |    "cell_type": "code",
 40 |    "execution_count": 8,
 41 |    "metadata": {},
 42 |    "outputs": [],
 43 |    "source": [
 44 |     "word = \"health care\"\n",
 45 |     "\n",
 46 |     "response = client.models.embed_content(\n",
 47 |     "    model='gemini-embedding-001',\n",
 48 |     "    contents=word,\n",
 49 |     ")\n",
 50 |     "\n",
 51 |     "word_embedding = response.embeddings[0].values"
 52 |    ]
 53 |   },
 54 |   {
 55 |    "cell_type": "code",
 56 |    "execution_count": 9,
 57 |    "metadata": {},
 58 |    "outputs": [
 59 |     {
 60 |      "name": "stdout",
 61 |      "output_type": "stream",
 62 |      "text": [
 63 |       "[0.003963938, 0.028064894, -0.007084594, -0.038220156, -0.019706577, 0.0020031198, 0.030192053, 0.0043494836, 0.024036549, 0.013419201, -0.011595545, -0.01581009, -0.011495624, 0.03382001, 0.09447588, -0.0057725194, -0.0136032365, 0.006117389, -0.013846604, 0.011768127]\n"
 64 |      ]
 65 |     }
 66 |    ],
 67 |    "source": [
 68 |     "print(word_embedding[:20])"
 69 |    ]
 70 |   },
 71 |   {
 72 |    "cell_type": "code",
 73 |    "execution_count": 10,
 74 |    "metadata": {},
 75 |    "outputs": [],
 76 |    "source": [
 77 |     "def calculate_cosine_similarity_numpy(vec1, vec2):\n",
 78 |     "  dot_product = np.dot(vec1, vec2)\n",
 79 |     "  norm_vec1 = np.linalg.norm(vec1)\n",
 80 |     "  norm_vec2 = np.linalg.norm(vec2)\n",
 81 |     "\n",
 82 |     "  if norm_vec1 == 0 or norm_vec2 == 0:\n",
 83 |     "    return 0  # Handle cases where one or both vectors are zero vectors\n",
 84 |     "\n",
 85 |     "  return dot_product / (norm_vec1 * norm_vec2)"
 86 |    ]
 87 |   },
 88 |   {
 89 |    "cell_type": "code",
 90 |    "execution_count": 11,
 91 |    "metadata": {},
 92 |    "outputs": [],
 93 |    "source": [
 94 |     "# word2 = \"medical insurance\"\n",
 95 |     "word3 = \"hospital care\"\n",
 96 |     "\n",
 97 |     "response2 = client.models.embed_content(\n",
 98 |     "    model='gemini-embedding-001',\n",
 99 |     "    contents=word3,\n",
100 |     ")\n",
101 |     "\n",
102 |     "word_embedding2 = response2.embeddings[0].values"
103 |    ]
104 |   },
105 |   {
106 |    "cell_type": "code",
107 |    "execution_count": 12,
108 |    "metadata": {},
109 |    "outputs": [
110 |     {
111 |      "data": {
112 |       "text/plain": [
113 |        "np.float64(0.6905654135529404)"
114 |       ]
115 |      },
116 |      "execution_count": 12,
117 |      "metadata": {},
118 |      "output_type": "execute_result"
119 |     }
120 |    ],
121 |    "source": [
122 |     "#  \"medical insurance\",  \"health care\" => 64.27%\n",
123 |     "#  \"customer care\",  \"health care\" => 57.45%\n",
124 |     "#  \"hospital care\",  \"health care\" => 69.05%\n",
125 |     "calculate_cosine_similarity_numpy(word_embedding, word_embedding2)"
126 |    ]
127 |   }
128 |  ],
129 |  "metadata": {
130 |   "kernelspec": {
131 |    "display_name": "saurav-env",
132 |    "language": "python",
133 |    "name": "python3"
134 |   },
135 |   "language_info": {
136 |    "codemirror_mode": {
137 |     "name": "ipython",
138 |     "version": 3
139 |    },
140 |    "file_extension": ".py",
141 |    "mimetype": "text/x-python",
142 |    "name": "python",
143 |    "nbconvert_exporter": "python",
144 |    "pygments_lexer": "ipython3",
145 |    "version": "3.13.1"
146 |   }
147 |  },
148 |  "nbformat": 4,
149 |  "nbformat_minor": 2
150 | }
151 | 


--------------------------------------------------------------------------------