├── LICENSE
├── LLM with RAG Fundamentals
├── 02 Text Generation
│ ├── text_generation.py
│ └── README.md
├── 01 API Foundations
│ ├── api_foundations.py
│ └── README.md
├── 03 Embeddings and Vector Stores
│ ├── embeddings_vector_stores.py
│ └── README.md
├── 04 RAG Fundamentals
│ ├── rag_fundamentals.py
│ └── README.md
└── 05 Capstone RAG Application
│ ├── capstone_rag_application.py
│ └── README.md
├── README.md
└── LLM with RAG Interview Questions
└── README.md
/LICENSE:
--------------------------------------------------------------------------------
1 | MIT License
2 |
3 | Copyright (c) 2025 rohanmistry231
4 |
5 | Permission is hereby granted, free of charge, to any person obtaining a copy
6 | of this software and associated documentation files (the "Software"), to deal
7 | in the Software without restriction, including without limitation the rights
8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 |
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 |
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
--------------------------------------------------------------------------------
/LLM with RAG Fundamentals/02 Text Generation/text_generation.py:
--------------------------------------------------------------------------------
1 | # Setup: pip install openai requests matplotlib pandas nltk
2 | import os
3 | import openai
4 | import matplotlib.pyplot as plt
5 | import nltk
6 |
7 | def run_text_generation_demo():
8 | # Synthetic Query Data
9 | queries = [
10 | "Create a chatbot response for 'What is machine learning?'",
11 | "Write a 50-word sci-fi story about AI.",
12 | "Summarize blockchain technology in 3 sentences."
13 | ]
14 | print("Synthetic Data: Queries created")
15 | print(f"Queries: {queries}")
16 |
17 | # OpenAI API Configuration
18 | openai.api_key = os.getenv("OPENAI_API_KEY", "your-openai-api-key")
19 |
20 | responses = []
21 | for query in queries:
22 | try:
23 | response = openai.ChatCompletion.create(
24 | model="gpt-3.5-turbo",
25 | messages=[{"role": "user", "content": query}],
26 | max_tokens=150
27 | )
28 | text = response.choices[0].message.content.strip()
29 | responses.append(text)
30 | print(f"Query: {query}")
31 | print(f"Response: {text}")
32 | except openai.error.OpenAIError as e:
33 | print(f"Error for {query}: {e}")
34 |
35 | # Visualization
36 | response_lengths = [len(nltk.word_tokenize(resp)) for resp in responses]
37 | plt.figure(figsize=(8, 4))
38 | plt.bar(range(1, len(queries) + 1), response_lengths, color='blue')
39 | plt.title("Text Generation Response Lengths")
40 | plt.xlabel("Query")
41 | plt.ylabel("Word Count")
42 | plt.savefig("text_generation_output.png")
43 | print("Visualization: Response lengths saved as text_generation_output.png")
44 |
45 | # Execute the demo
46 | if __name__ == "__main__":
47 | nltk.download('punkt', quiet=True)
48 | run_text_generation_demo()
--------------------------------------------------------------------------------
/LLM with RAG Fundamentals/01 API Foundations/api_foundations.py:
--------------------------------------------------------------------------------
1 | # Setup: pip install openai requests matplotlib pandas nltk
2 | import os
3 | import requests
4 | import matplotlib.pyplot as plt
5 | from collections import Counter
6 | import nltk
7 |
8 | def run_api_foundations_demo():
9 | # Synthetic Query Data
10 | queries = [
11 | "Explain neural networks in simple terms.",
12 | "Write a short story about AI.",
13 | "Summarize the benefits of cloud computing."
14 | ]
15 | print("Synthetic Data: Queries created")
16 | print(f"Queries: {queries}")
17 |
18 | # OpenAI API Configuration
19 | api_key = os.getenv("OPENAI_API_KEY", "your-openai-api-key")
20 | url = "https://api.openai.com/v1/chat/completions"
21 | headers = {
22 | "Authorization": f"Bearer {api_key}",
23 | "Content-Type": "application/json"
24 | }
25 |
26 | # Track request success
27 | success_counts = {"Successful": 0, "Failed": 0}
28 |
29 | for query in queries:
30 | payload = {
31 | "model": "gpt-3.5-turbo",
32 | "messages": [{"role": "user", "content": query}],
33 | "max_tokens": 100
34 | }
35 | try:
36 | response = requests.post(url, headers=headers, json=payload)
37 | response.raise_for_status()
38 | success_counts["Successful"] += 1
39 | print(f"Query: {query}")
40 | print(f"Response: {response.json()['choices'][0]['message']['content'].strip()}")
41 | except requests.RequestException as e:
42 | success_counts["Failed"] += 1
43 | print(f"Error for {query}: {e}")
44 |
45 | # Visualization
46 | plt.figure(figsize=(8, 4))
47 | plt.bar(success_counts.keys(), success_counts.values(), color=['green', 'red'])
48 | plt.title("API Request Success Rates")
49 | plt.xlabel("Status")
50 | plt.ylabel("Count")
51 | plt.savefig("api_foundations_output.png")
52 | print("Visualization: Success rates saved as api_foundations_output.png")
53 |
54 | # Execute the demo
55 | if __name__ == "__main__":
56 | nltk.download('punkt', quiet=True)
57 | run_api_foundations_demo()
--------------------------------------------------------------------------------
/LLM with RAG Fundamentals/03 Embeddings and Vector Stores/embeddings_vector_stores.py:
--------------------------------------------------------------------------------
1 | # Setup: pip install openai faiss-cpu requests matplotlib pandas nltk numpy
2 | import os
3 | import openai
4 | import faiss
5 | import numpy as np
6 | import matplotlib.pyplot as plt
7 | from sklearn.metrics.pairwise import cosine_similarity
8 |
9 | def run_embeddings_vector_stores_demo():
10 | # Synthetic Document Data
11 | documents = [
12 | "Machine learning is a subset of AI focusing on data-driven models.",
13 | "Deep learning uses neural networks for complex pattern recognition.",
14 | "Natural language processing enables computers to understand text.",
15 | "AI is transforming industries with automation and insights."
16 | ]
17 | query = "What is machine learning?"
18 | print("Synthetic Data: Documents and query created")
19 | print(f"Documents: {documents}")
20 | print(f"Query: {query}")
21 |
22 | # OpenAI API Configuration
23 | openai.api_key = os.getenv("OPENAI_API_KEY", "your-openai-api-key")
24 |
25 | # Generate Embeddings
26 | embeddings = []
27 | for text in documents + [query]:
28 | try:
29 | response = openai.Embedding.create(
30 | model="text-embedding-ada-002",
31 | input=text
32 | )
33 | embeddings.append(response['data'][0]['embedding'])
34 | except openai.error.OpenAIError as e:
35 | print(f"Error for {text}: {e}")
36 | return
37 |
38 | # Store in FAISS
39 | dimension = len(embeddings[0])
40 | index = faiss.IndexFlatL2(dimension)
41 | index.add(np.array(embeddings[:-1]).astype('float32'))
42 |
43 | # Search with Query
44 | query_embedding = np.array([embeddings[-1]]).astype('float32')
45 | distances, indices = index.search(query_embedding, len(documents))
46 | similarities = 1 - distances[0] / 2 # Approximate cosine similarity
47 | print("Similarities:", similarities)
48 |
49 | # Visualization
50 | plt.figure(figsize=(8, 4))
51 | plt.bar(range(1, len(documents) + 1), similarities, color='purple')
52 | plt.title("Semantic Search Similarity Scores")
53 | plt.xlabel("Document")
54 | plt.ylabel("Similarity")
55 | plt.savefig("embeddings_vector_stores_output.png")
56 | print("Visualization: Similarity scores saved as embeddings_vector_stores_output.png")
57 |
58 | # Execute the demo
59 | if __name__ == "__main__":
60 | run_embeddings_vector_stores_demo()
--------------------------------------------------------------------------------
/LLM with RAG Fundamentals/04 RAG Fundamentals/rag_fundamentals.py:
--------------------------------------------------------------------------------
1 | # Setup: pip install openai langchain faiss-cpu requests matplotlib pandas nltk
2 | import os
3 | import openai
4 | from langchain.vectorstores import FAISS
5 | from langchain.embeddings import OpenAIEmbeddings
6 | from langchain.chat_models import ChatOpenAI
7 | from langchain.chains import RetrievalQA
8 | import matplotlib.pyplot as plt
9 | import numpy as np
10 |
11 | def run_rag_fundamentals_demo():
12 | # Synthetic Document Data
13 | documents = [
14 | "AI is transforming healthcare with predictive diagnostics.",
15 | "Machine learning models require large datasets for training.",
16 | "Deep learning excels in image and speech recognition.",
17 | "Natural language processing powers chatbots and translation."
18 | ]
19 | queries = [
20 | "How is AI used in healthcare?",
21 | "What is needed for machine learning?",
22 | "What does deep learning do?"
23 | ]
24 | print("Synthetic Data: Documents and queries created")
25 | print(f"Documents: {documents}")
26 | print(f"Queries: {queries}")
27 |
28 | # OpenAI API Configuration
29 | openai.api_key = os.getenv("OPENAI_API_KEY", "your-openai-api-key")
30 |
31 | # Create Vector Store
32 | embeddings = OpenAIEmbeddings()
33 | vector_store = FAISS.from_texts(documents, embeddings)
34 |
35 | # RAG Pipeline
36 | llm = ChatOpenAI(model="gpt-3.5-turbo", max_tokens=150)
37 | qa_chain = RetrievalQA.from_chain_type(
38 | llm=llm,
39 | chain_type="stuff",
40 | retriever=vector_store.as_retriever()
41 | )
42 |
43 | # Run Queries
44 | responses = []
45 | for query in queries:
46 | try:
47 | response = qa_chain.run(query)
48 | responses.append(response)
49 | print(f"Query: {query}")
50 | print(f"Response: {response.strip()}")
51 | except Exception as e:
52 | print(f"Error for {query}: {e}")
53 |
54 | # Visualization (Simulated Retrieval Accuracy)
55 | accuracy_scores = [0.9, 0.85, 0.95] # Simulated for demo
56 | plt.figure(figsize=(8, 4))
57 | plt.bar(range(1, len(queries) + 1), accuracy_scores, color='green')
58 | plt.title("Simulated Retrieval Accuracy")
59 | plt.xlabel("Query")
60 | plt.ylabel("Accuracy")
61 | plt.savefig("rag_fundamentals_output.png")
62 | print("Visualization: Retrieval accuracy saved as rag_fundamentals_output.png")
63 |
64 | # Execute the demo
65 | if __name__ == "__main__":
66 | nltk.download('punkt', quiet=True)
67 | run_rag_fundamentals_demo()
--------------------------------------------------------------------------------
/LLM with RAG Fundamentals/05 Capstone RAG Application/capstone_rag_application.py:
--------------------------------------------------------------------------------
1 | # Setup: pip install openai langchain faiss-cpu requests matplotlib pandas nltk
2 | import os
3 | import openai
4 | from langchain.vectorstores import FAISS
5 | from langchain.embeddings import OpenAIEmbeddings
6 | from langchain.chat_models import ChatOpenAI
7 | from langchain.chains import RetrievalQA
8 | import matplotlib.pyplot as plt
9 | import numpy as np
10 | import time
11 |
12 | def run_capstone_rag_application_demo():
13 | # Synthetic Knowledge Base
14 | knowledge_base = [
15 | "AI in healthcare improves diagnostics with predictive models.",
16 | "Machine learning requires large, clean datasets for effective training.",
17 | "Deep learning uses neural networks for tasks like image recognition.",
18 | "NLP enables chatbots to understand and respond to human language.",
19 | "Cloud computing provides scalable infrastructure for AI applications."
20 | ]
21 | queries = [
22 | "How does AI improve healthcare?",
23 | "What are the requirements for machine learning?",
24 | "What is deep learning used for?",
25 | "How does NLP work in chatbots?"
26 | ]
27 | print("Synthetic Data: Knowledge base and queries created")
28 | print(f"Knowledge Base: {knowledge_base}")
29 | print(f"Queries: {queries}")
30 |
31 | # OpenAI API Configuration
32 | openai.api_key = os.getenv("OPENAI_API_KEY", "your-openai-api-key")
33 |
34 | # Create Vector Store
35 | embeddings = OpenAIEmbeddings()
36 | vector_store = FAISS.from_texts(knowledge_base, embeddings)
37 |
38 | # RAG Pipeline
39 | llm = ChatOpenAI(model="gpt-3.5-turbo", max_tokens=200)
40 | qa_chain = RetrievalQA.from_chain_type(
41 | llm=llm,
42 | chain_type="stuff",
43 | retriever=vector_store.as_retriever(search_kwargs={"k": 2})
44 | )
45 |
46 | # Run Queries and Measure Performance
47 | responses = []
48 | latencies = []
49 | for query in queries:
50 | start_time = time.time()
51 | try:
52 | response = qa_chain.run(query)
53 | responses.append(response)
54 | latencies.append(time.time() - start_time)
55 | print(f"Query: {query}")
56 | print(f"Response: {response.strip()}")
57 | except Exception as e:
58 | print(f"Error for {query}: {e}")
59 |
60 | # Visualization
61 | plt.figure(figsize=(8, 4))
62 | plt.plot(range(1, len(queries) + 1), latencies, marker='o', color='blue')
63 | plt.title("RAG Application Query Latencies")
64 | plt.xlabel("Query")
65 | plt.ylabel("Latency (seconds)")
66 | plt.savefig("capstone_rag_application_output.png")
67 | print("Visualization: Latencies saved as capstone_rag_application_output.png")
68 |
69 | # Execute the demo
70 | if __name__ == "__main__":
71 | nltk.download('punkt', quiet=True)
72 | run_capstone_rag_application_demo()
--------------------------------------------------------------------------------
/LLM with RAG Fundamentals/02 Text Generation/README.md:
--------------------------------------------------------------------------------
1 | # 📝 Text Generation
2 |
3 | ## 📖 Introduction
4 |
5 | **Text Generation** uses OpenAI’s Chat and Completion APIs to create conversational and creative text, a core component for RAG applications. This guide provides Python examples, focusing on the AI-driven era (May 3, 2025).
6 |
7 | ## 🌟 Key Concepts
8 |
9 | - **Chat API**: Generating conversational responses.
10 | - **Completion API**: Creating structured or creative text.
11 | - **Prompt Design**: Crafting prompts for quality outputs.
12 | - **Response Metrics**: Evaluating length and relevance.
13 |
14 | ## 🛠️ Practical Example
15 |
16 | ```python
17 | %% text_generation.py
18 | # Setup: pip install openai requests matplotlib pandas nltk
19 | import os
20 | import openai
21 | import matplotlib.pyplot as plt
22 | import nltk
23 |
24 | def run_text_generation_demo():
25 | # Synthetic Query Data
26 | queries = [
27 | "Create a chatbot response for 'What is machine learning?'",
28 | "Write a 50-word sci-fi story about AI.",
29 | "Summarize blockchain technology in 3 sentences."
30 | ]
31 | print("Synthetic Data: Queries created")
32 | print(f"Queries: {queries}")
33 |
34 | # OpenAI API Configuration
35 | openai.api_key = os.getenv("OPENAI_API_KEY", "your-openai-api-key")
36 |
37 | responses = []
38 | for query in queries:
39 | try:
40 | response = openai.ChatCompletion.create(
41 | model="gpt-3.5-turbo",
42 | messages=[{"role": "user", "content": query}],
43 | max_tokens=150
44 | )
45 | text = response.choices[0].message.content.strip()
46 | responses.append(text)
47 | print(f"Query: {query}")
48 | print(f"Response: {text}")
49 | except openai.error.OpenAIError as e:
50 | print(f"Error for {query}: {e}")
51 |
52 | # Visualization
53 | response_lengths = [len(nltk.word_tokenize(resp)) for resp in responses]
54 | plt.figure(figsize=(8, 4))
55 | plt.bar(range(1, len(queries) + 1), response_lengths, color='blue')
56 | plt.title("Text Generation Response Lengths")
57 | plt.xlabel("Query")
58 | plt.ylabel("Word Count")
59 | plt.savefig("text_generation_output.png")
60 | print("Visualization: Response lengths saved as text_generation_output.png")
61 |
62 | # Execute the demo
63 | if __name__ == "__main__":
64 | nltk.download('punkt', quiet=True)
65 | run_text_generation_demo()
66 | ```
67 |
68 | ## 📊 Visualization Output
69 |
70 | The code generates a bar chart (`text_generation_output.png`) showing response word counts, illustrating output consistency.
71 |
72 | ## 💡 Applications
73 |
74 | - **Chatbots**: Build conversational interfaces for RAG systems.
75 | - **Content Creation**: Generate summaries or stories.
76 | - **RAG Integration**: Provide LLM responses for retrieved data.
77 |
78 | ## 🏆 Practical Tasks
79 |
80 | 1. Build a chatbot prompt for user queries.
81 | 2. Generate creative text (e.g., story, summary).
82 | 3. Visualize response lengths for different queries.
83 |
84 | ## 💡 Interview Scenarios
85 |
86 | **Question**: How do you use OpenAI’s Chat API for text generation?
87 | **Answer**: The Chat API generates text using a model and messages array, guided by prompts.
88 | **Key**: Prompt design ensures relevant outputs.
89 | **Example**: `openai.ChatCompletion.create(model="gpt-3.5-turbo", messages=[{"role": "user", "content": query}])`
90 |
91 | **Coding Task**: Create a chatbot response using OpenAI’s Chat API.
92 | **Tip**: Use `ChatCompletion.create` with a user prompt.
93 |
94 | ## 📚 Resources
95 |
96 | - [OpenAI API Documentation](https://platform.openai.com/docs/)
97 | - [NLTK Documentation](https://www.nltk.org/)
98 |
99 | ## 🤝 Contributions
100 |
101 | 1. Fork the repository.
102 | 2. Create a feature branch (`git checkout -b feature/text-generation`).
103 | 3. Commit changes (`git commit -m 'Add text generation content'`).
104 | 4. Push to the branch (`git push origin feature/text-generation`).
105 | 5. Open a Pull Request.
--------------------------------------------------------------------------------
/LLM with RAG Fundamentals/01 API Foundations/README.md:
--------------------------------------------------------------------------------
1 | # 🛠️ API Foundations
2 |
3 | ## 📖 Introduction
4 |
5 | **API Foundations** introduces integrating OpenAI’s API, covering authentication, request structure, and error handling. This guide provides hands-on Python examples, setting the stage for RAG applications, aligned with the AI-driven era (May 3, 2025).
6 |
7 | ## 🌟 Key Concepts
8 |
9 | - **API Concepts**: REST APIs, endpoints, JSON payloads.
10 | - **Authentication**: Securing OpenAI API with keys.
11 | - **Environment Setup**: Configuring Python for API use.
12 | - **Request Handling**: Sending and parsing responses.
13 |
14 | ## 🛠️ Practical Example
15 |
16 | ```python
17 | %% api_foundations.py
18 | # Setup: pip install openai requests matplotlib pandas nltk
19 | import os
20 | import requests
21 | import matplotlib.pyplot as plt
22 | from collections import Counter
23 | import nltk
24 |
25 | def run_api_foundations_demo():
26 | # Synthetic Query Data
27 | queries = [
28 | "Explain neural networks in simple terms.",
29 | "Write a short story about AI.",
30 | "Summarize the benefits of cloud computing."
31 | ]
32 | print("Synthetic Data: Queries created")
33 | print(f"Queries: {queries}")
34 |
35 | # OpenAI API Configuration
36 | api_key = os.getenv("OPENAI_API_KEY", "your-openai-api-key")
37 | url = "https://api.openai.com/v1/chat/completions"
38 | headers = {
39 | "Authorization": f"Bearer {api_key}",
40 | "Content-Type": "application/json"
41 | }
42 |
43 | # Track request success
44 | success_counts = {"Successful": 0, "Failed": 0}
45 |
46 | for query in queries:
47 | payload = {
48 | "model": "gpt-3.5-turbo",
49 | "messages": [{"role": "user", "content": query}],
50 | "max_tokens": 100
51 | }
52 | try:
53 | response = requests.post(url, headers=headers, json=payload)
54 | response.raise_for_status()
55 | success_counts["Successful"] += 1
56 | print(f"Query: {query}")
57 | print(f"Response: {response.json()['choices'][0]['message']['content'].strip()}")
58 | except requests.RequestException as e:
59 | success_counts["Failed"] += 1
60 | print(f"Error for {query}: {e}")
61 |
62 | # Visualization
63 | plt.figure(figsize=(8, 4))
64 | plt.bar(success_counts.keys(), success_counts.values(), color=['green', 'red'])
65 | plt.title("API Request Success Rates")
66 | plt.xlabel("Status")
67 | plt.ylabel("Count")
68 | plt.savefig("api_foundations_output.png")
69 | print("Visualization: Success rates saved as api_foundations_output.png")
70 |
71 | # Execute the demo
72 | if __name__ == "__main__":
73 | nltk.download('punkt', quiet=True)
74 | run_api_foundations_demo()
75 | ```
76 |
77 | ## 📊 Visualization Output
78 |
79 | The code generates a bar chart (`api_foundations_output.png`) showing successful and failed API requests, illustrating reliability.
80 |
81 | ## 💡 Applications
82 |
83 | - **Chatbots**: Authenticate APIs for conversational systems.
84 | - **Content Generation**: Set up APIs for text creation.
85 | - **RAG Preparation**: Establish API connectivity for retrieval systems.
86 |
87 | ## 🏆 Practical Tasks
88 |
89 | 1. Authenticate and test OpenAI API connectivity.
90 | 2. Handle basic API errors (e.g., invalid key, rate limits).
91 | 3. Visualize API request success and failure rates.
92 |
93 | ## 💡 Interview Scenarios
94 |
95 | **Question**: What are the key components of an OpenAI API request?
96 | **Answer**: Endpoint, headers (with API key), and JSON payload (model, messages).
97 | **Key**: Authentication ensures secure access.
98 | **Example**: `requests.post(url, headers={"Authorization": f"Bearer {api_key}"}, json=payload)`
99 |
100 | **Coding Task**: Authenticate and send a query to OpenAI API.
101 | **Tip**: Use `requests.post` with headers and payload.
102 |
103 | ## 📚 Resources
104 |
105 | - [OpenAI API Documentation](https://platform.openai.com/docs/)
106 | - [Python Requests Documentation](https://requests.readthedocs.io/)
107 |
108 | ## 🤝 Contributions
109 |
110 | 1. Fork the repository.
111 | 2. Create a feature branch (`git checkout -b feature/api-foundations`).
112 | 3. Commit changes (`git commit -m 'Add API foundations content'`).
113 | 4. Push to the branch (`git push origin feature/api-foundations`).
114 | 5. Open a Pull Request.
--------------------------------------------------------------------------------
/LLM with RAG Fundamentals/03 Embeddings and Vector Stores/README.md:
--------------------------------------------------------------------------------
1 | # 🔍 Embeddings and Vector Stores
2 |
3 | ## 📖 Introduction
4 |
5 | **Embeddings and Vector Stores** leverages OpenAI’s Embeddings API and vector databases (e.g., FAISS) to enable semantic search and data storage, critical for RAG systems. This guide provides Python examples, focusing on the AI-driven era (May 3, 2025).
6 |
7 | ## 🌟 Key Concepts
8 |
9 | - **Embeddings API**: Generating text vectors.
10 | - **Vector Stores**: Storing and querying embeddings (e.g., FAISS).
11 | - **Semantic Search**: Retrieving relevant texts by meaning.
12 | - **RAG Preparation**: Building retrieval components.
13 |
14 | ## 🛠️ Practical Example
15 |
16 | ```python
17 | %% embeddings_vector_stores.py
18 | # Setup: pip install openai faiss-cpu requests matplotlib pandas nltk numpy
19 | import os
20 | import openai
21 | import faiss
22 | import numpy as np
23 | import matplotlib.pyplot as plt
24 | from sklearn.metrics.pairwise import cosine_similarity
25 |
26 | def run_embeddings_vector_stores_demo():
27 | # Synthetic Document Data
28 | documents = [
29 | "Machine learning is a subset of AI focusing on data-driven models.",
30 | "Deep learning uses neural networks for complex pattern recognition.",
31 | "Natural language processing enables computers to understand text.",
32 | "AI is transforming industries with automation and insights."
33 | ]
34 | query = "What is machine learning?"
35 | print("Synthetic Data: Documents and query created")
36 | print(f"Documents: {documents}")
37 | print(f"Query: {query}")
38 |
39 | # OpenAI API Configuration
40 | openai.api_key = os.getenv("OPENAI_API_KEY", "your-openai-api-key")
41 |
42 | # Generate Embeddings
43 | embeddings = []
44 | for text in documents + [query]:
45 | try:
46 | response = openai.Embedding.create(
47 | model="text-embedding-ada-002",
48 | input=text
49 | )
50 | embeddings.append(response['data'][0]['embedding'])
51 | except openai.error.OpenAIError as e:
52 | print(f"Error for {text}: {e}")
53 | return
54 |
55 | # Store in FAISS
56 | dimension = len(embeddings[0])
57 | index = faiss.IndexFlatL2(dimension)
58 | index.add(np.array(embeddings[:-1]).astype('float32'))
59 |
60 | # Search with Query
61 | query_embedding = np.array([embeddings[-1]]).astype('float32')
62 | distances, indices = index.search(query_embedding, len(documents))
63 | similarities = 1 - distances[0] / 2 # Approximate cosine similarity
64 | print("Similarities:", similarities)
65 |
66 | # Visualization
67 | plt.figure(figsize=(8, 4))
68 | plt.bar(range(1, len(documents) + 1), similarities, color='purple')
69 | plt.title("Semantic Search Similarity Scores")
70 | plt.xlabel("Document")
71 | plt.ylabel("Similarity")
72 | plt.savefig("embeddings_vector_stores_output.png")
73 | print("Visualization: Similarity scores saved as embeddings_vector_stores_output.png")
74 |
75 | # Execute the demo
76 | if __name__ == "__main__":
77 | run_embeddings_vector_stores_demo()
78 | ```
79 |
80 | ## 📊 Visualization Output
81 |
82 | The code generates a bar chart (`embeddings_vector_stores_output.png`) showing similarity scores between the query and documents, illustrating retrieval relevance.
83 |
84 | ## 💡 Applications
85 |
86 | - **RAG Systems**: Retrieve context for LLM responses.
87 | - **Search Engines**: Implement semantic search for documents.
88 | - **Recommendations**: Suggest similar content based on embeddings.
89 |
90 | ## 🏆 Practical Tasks
91 |
92 | 1. Generate embeddings for documents.
93 | 2. Store embeddings in FAISS and perform semantic search.
94 | 3. Visualize similarity scores for search results.
95 |
96 | ## 💡 Interview Scenarios
97 |
98 | **Question**: How do embeddings and vector stores enable RAG?
99 | **Answer**: Embeddings convert text to vectors; vector stores like FAISS retrieve relevant documents for LLMs.
100 | **Key**: Enhances context-aware responses.
101 | **Example**: `openai.Embedding.create` and `faiss.IndexFlatL2`
102 |
103 | **Coding Task**: Implement semantic search with FAISS.
104 | **Tip**: Use `Embedding.create` and FAISS index.
105 |
106 | ## 📚 Resources
107 |
108 | - [OpenAI API Documentation](https://platform.openai.com/docs/)
109 | - [FAISS Documentation](https://github.com/facebookresearch/faiss)
110 |
111 | ## 🤝 Contributions
112 |
113 | 1. Fork the repository.
114 | 2. Create a feature branch (`git checkout -b feature/embeddings-vector-stores`).
115 | 3. Commit changes (`git commit -m 'Add embeddings and vector stores content'`).
116 | 4. Push to the branch (`git push origin feature/embeddings-vector-stores`).
117 | 5. Open a Pull Request.
--------------------------------------------------------------------------------
/LLM with RAG Fundamentals/04 RAG Fundamentals/README.md:
--------------------------------------------------------------------------------
1 | # 📚 RAG Fundamentals
2 |
3 | ## 📖 Introduction
4 |
5 | **RAG Fundamentals** introduces Retrieval-Augmented Generation, combining OpenAI’s LLM with external knowledge bases for context-aware responses. This guide provides Python examples using LangChain and FAISS, focusing on the AI-driven era (May 3, 2025).
6 |
7 | ## 🌟 Key Concepts
8 |
9 | - **RAG Overview**: Integrating retrieval with generation.
10 | - **LangChain**: Building RAG pipelines with OpenAI and vector stores.
11 | - **Retrieval**: Fetching relevant documents for LLM context.
12 | - **Evaluation**: Assessing retrieval and response quality.
13 |
14 | ## 🛠️ Practical Example
15 |
16 | ```python
17 | %% rag_fundamentals.py
18 | # Setup: pip install openai langchain faiss-cpu requests matplotlib pandas nltk
19 | import os
20 | import openai
21 | from langchain.vectorstores import FAISS
22 | from langchain.embeddings import OpenAIEmbeddings
23 | from langchain.chat_models import ChatOpenAI
24 | from langchain.chains import RetrievalQA
25 | import matplotlib.pyplot as plt
26 | import numpy as np
27 |
28 | def run_rag_fundamentals_demo():
29 | # Synthetic Document Data
30 | documents = [
31 | "AI is transforming healthcare with predictive diagnostics.",
32 | "Machine learning models require large datasets for training.",
33 | "Deep learning excels in image and speech recognition.",
34 | "Natural language processing powers chatbots and translation."
35 | ]
36 | queries = [
37 | "How is AI used in healthcare?",
38 | "What is needed for machine learning?",
39 | "What does deep learning do?"
40 | ]
41 | print("Synthetic Data: Documents and queries created")
42 | print(f"Documents: {documents}")
43 | print(f"Queries: {queries}")
44 |
45 | # OpenAI API Configuration
46 | openai.api_key = os.getenv("OPENAI_API_KEY", "your-openai-api-key")
47 |
48 | # Create Vector Store
49 | embeddings = OpenAIEmbeddings()
50 | vector_store = FAISS.from_texts(documents, embeddings)
51 |
52 | # RAG Pipeline
53 | llm = ChatOpenAI(model="gpt-3.5-turbo", max_tokens=150)
54 | qa_chain = RetrievalQA.from_chain_type(
55 | llm=llm,
56 | chain_type="stuff",
57 | retriever=vector_store.as_retriever()
58 | )
59 |
60 | # Run Queries
61 | responses = []
62 | for query in queries:
63 | try:
64 | response = qa_chain.run(query)
65 | responses.append(response)
66 | print(f"Query: {query}")
67 | print(f"Response: {response.strip()}")
68 | except Exception as e:
69 | print(f"Error for {query}: {e}")
70 |
71 | # Visualization (Simulated Retrieval Accuracy)
72 | accuracy_scores = [0.9, 0.85, 0.95] # Simulated for demo
73 | plt.figure(figsize=(8, 4))
74 | plt.bar(range(1, len(queries) + 1), accuracy_scores, color='green')
75 | plt.title("Simulated Retrieval Accuracy")
76 | plt.xlabel("Query")
77 | plt.ylabel("Accuracy")
78 | plt.savefig("rag_fundamentals_output.png")
79 | print("Visualization: Retrieval accuracy saved as rag_fundamentals_output.png")
80 |
81 | # Execute the demo
82 | if __name__ == "__main__":
83 | nltk.download('punkt', quiet=True)
84 | run_rag_fundamentals_demo()
85 | ```
86 |
87 | ## 📊 Visualization Output
88 |
89 | The code generates a bar chart (`rag_fundamentals_output.png`) showing simulated retrieval accuracy scores, illustrating RAG performance.
90 |
91 | ## 💡 Applications
92 |
93 | - **Question Answering**: Enhance LLM responses with external data.
94 | - **Chatbots**: Provide context-aware conversational agents.
95 | - **Knowledge Management**: Query internal documents intelligently.
96 |
97 | ## 🏆 Practical Tasks
98 |
99 | 1. Build a RAG pipeline with LangChain and FAISS.
100 | 2. Test RAG with sample queries and documents.
101 | 3. Visualize retrieval accuracy or response metrics.
102 |
103 | ## 💡 Interview Scenarios
104 |
105 | **Question**: What is Retrieval-Augmented Generation?
106 | **Answer**: RAG combines LLMs with retrieval from vector stores to provide context-aware responses.
107 | **Key**: Improves accuracy over standalone LLMs.
108 | **Example**: `RetrievalQA.from_chain_type` with FAISS retriever.
109 |
110 | **Coding Task**: Build a basic RAG pipeline.
111 | **Tip**: Use `LangChain` with `OpenAIEmbeddings` and `FAISS`.
112 |
113 | ## 📚 Resources
114 |
115 | - [LangChain Documentation](https://python.langchain.com/docs/)
116 | - [OpenAI API Documentation](https://platform.openai.com/docs/)
117 | - [FAISS Documentation](https://github.com/facebookresearch/faiss)
118 |
119 | ## 🤝 Contributions
120 |
121 | 1. Fork the repository.
122 | 2. Create a feature branch (`git checkout -b feature/rag-fundamentals`).
123 | 3. Commit changes (`git commit -m 'Add RAG fundamentals content'`).
124 | 4. Push to the branch (`git push origin feature/rag-fundamentals`).
125 | 5. Open a Pull Request.
--------------------------------------------------------------------------------
/LLM with RAG Fundamentals/05 Capstone RAG Application/README.md:
--------------------------------------------------------------------------------
1 | # 🌟 Capstone RAG Application
2 |
3 | ## 📖 Introduction
4 |
5 | **Capstone RAG Application** guides you through building a production-ready question-answering app using OpenAI’s API, LangChain, and FAISS. This capstone project integrates LLM and retrieval for a scalable AI solution, aligned with the AI-driven era (May 3, 2025).
6 |
7 | ## 🌟 Key Concepts
8 |
9 | - **RAG Application**: Combining LLM with vector store for Q&A.
10 | - **Scalability**: Handling large document sets and queries.
11 | - **Performance Optimization**: Balancing latency and accuracy.
12 | - **Evaluation**: Measuring response quality and retrieval accuracy.
13 |
14 | ## 🛠️ Practical Example
15 |
16 | ```python
17 | %% capstone_rag_application.py
18 | # Setup: pip install openai langchain faiss-cpu requests matplotlib pandas nltk
19 | import os
20 | import openai
21 | from langchain.vectorstores import FAISS
22 | from langchain.embeddings import OpenAIEmbeddings
23 | from langchain.chat_models import ChatOpenAI
24 | from langchain.chains import RetrievalQA
25 | import matplotlib.pyplot as plt
26 | import numpy as np
27 | import time
28 |
29 | def run_capstone_rag_application_demo():
30 | # Synthetic Knowledge Base
31 | knowledge_base = [
32 | "AI in healthcare improves diagnostics with predictive models.",
33 | "Machine learning requires large, clean datasets for effective training.",
34 | "Deep learning uses neural networks for tasks like image recognition.",
35 | "NLP enables chatbots to understand and respond to human language.",
36 | "Cloud computing provides scalable infrastructure for AI applications."
37 | ]
38 | queries = [
39 | "How does AI improve healthcare?",
40 | "What are the requirements for machine learning?",
41 | "What is deep learning used for?",
42 | "How does NLP work in chatbots?"
43 | ]
44 | print("Synthetic Data: Knowledge base and queries created")
45 | print(f"Knowledge Base: {knowledge_base}")
46 | print(f"Queries: {queries}")
47 |
48 | # OpenAI API Configuration
49 | openai.api_key = os.getenv("OPENAI_API_KEY", "your-openai-api-key")
50 |
51 | # Create Vector Store
52 | embeddings = OpenAIEmbeddings()
53 | vector_store = FAISS.from_texts(knowledge_base, embeddings)
54 |
55 | # RAG Pipeline
56 | llm = ChatOpenAI(model="gpt-3.5-turbo", max_tokens=200)
57 | qa_chain = RetrievalQA.from_chain_type(
58 | llm=llm,
59 | chain_type="stuff",
60 | retriever=vector_store.as_retriever(search_kwargs={"k": 2})
61 | )
62 |
63 | # Run Queries and Measure Performance
64 | responses = []
65 | latencies = []
66 | for query in queries:
67 | start_time = time.time()
68 | try:
69 | response = qa_chain.run(query)
70 | responses.append(response)
71 | latencies.append(time.time() - start_time)
72 | print(f"Query: {query}")
73 | print(f"Response: {response.strip()}")
74 | except Exception as e:
75 | print(f"Error for {query}: {e}")
76 |
77 | # Visualization
78 | plt.figure(figsize=(8, 4))
79 | plt.plot(range(1, len(queries) + 1), latencies, marker='o', color='blue')
80 | plt.title("RAG Application Query Latencies")
81 | plt.xlabel("Query")
82 | plt.ylabel("Latency (seconds)")
83 | plt.savefig("capstone_rag_application_output.png")
84 | print("Visualization: Latencies saved as capstone_rag_application_output.png")
85 |
86 | # Execute the demo
87 | if __name__ == "__main__":
88 | nltk.download('punkt', quiet=True)
89 | run_capstone_rag_application_demo()
90 | ```
91 |
92 | ## 📊 Visualization Output
93 |
94 | The code generates a line plot (`capstone_rag_application_output.png`) showing query latencies, illustrating application performance.
95 |
96 | ## 💡 Applications
97 |
98 | - **Enterprise Q&A**: Query internal documents for insights.
99 | - **Customer Support**: Build context-aware chatbots.
100 | - **Research Tools**: Enhance research with knowledge-backed answers.
101 |
102 | ## 🏆 Practical Tasks
103 |
104 | 1. Build a RAG Q&A app with a large knowledge base.
105 | 2. Optimize retrieval and LLM response times.
106 | 3. Visualize performance metrics (e.g., latency, accuracy).
107 |
108 | ## 💡 Interview Scenarios
109 |
110 | **Question**: How do you build a scalable RAG application?
111 | **Answer**: Use LangChain with OpenAI and FAISS for retrieval and generation, optimizing for latency and accuracy.
112 | **Key**: Balances retrieval and LLM performance.
113 | **Example**: `RetrievalQA` with `FAISS` retriever.
114 |
115 | **Coding Task**: Develop a RAG Q&A system.
116 | **Tip**: Use `LangChain`, `OpenAIEmbeddings`, and `FAISS`.
117 |
118 | ## 📚 Resources
119 |
120 | - [LangChain Documentation](https://python.langchain.com/docs/)
121 | - [OpenAI API Documentation](https://platform.openai.com/docs/)
122 | - [FAISS Documentation](https://github.com/facebookresearch/faiss)
123 |
124 | ## 🤝 Contributions
125 |
126 | 1. Fork the repository.
127 | 2. Create a feature branch (`git checkout -b feature/capstone-rag`).
128 | 3. Commit changes (`git commit -m 'Add capstone RAG content'`).
129 | 4. Push to the branch (`git push origin feature/capstone-rag`).
130 | 5. Open a Pull Request.
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # 🧠 LLM API and RAG Integration with Python
2 |
3 |
9 | Your step-by-step guide to mastering LLM API integration and building Retrieval-Augmented Generation (RAG) applications with Python for AI-driven solutions and interview preparation
10 |
11 | ---
12 |
13 | ## 📖 Introduction
14 |
15 | Welcome to the **LLM API and RAG Integration with Python Roadmap**! 🚀 This roadmap guides you through integrating large language model (LLM) APIs, focusing on OpenAI, and building Retrieval-Augmented Generation (RAG) applications using Python. It progresses from API basics to creating a capstone RAG project—a sophisticated app leveraging LLMs and external knowledge bases. Designed for the AI-driven era (May 3, 2025), this roadmap prepares you for AI/ML interviews and equips you with practical skills for 6 LPA+ roles.
16 |
17 | ## 🌟 What’s Inside?
18 |
19 | - **API Foundations**: Mastering OpenAI API setup and authentication.
20 | - **Text Generation**: Creating conversational and creative text outputs.
21 | - **Embeddings and Vector Stores**: Using embeddings for semantic search and storage.
22 | - **RAG Fundamentals**: Combining LLMs with external knowledge for enhanced responses.
23 | - **Capstone RAG Application**: Building a production-ready RAG app for question answering.
24 | - **Hands-on Code**: Five `.md` files with Python examples, visualizations, and a capstone project.
25 | - **Interview Scenarios**: Key questions and answers for LLM and RAG interviews.
26 |
27 | ## 🔍 Who Is This For?
28 |
29 | - AI Engineers building LLM and RAG-based applications.
30 | - Machine Learning Engineers mastering API-driven AI and retrieval systems.
31 | - AI Researchers exploring OpenAI and RAG frameworks.
32 | - Software Engineers deepening Python-based AI expertise.
33 | - Anyone preparing for AI/ML interviews in tech.
34 |
35 | ## 🗺️ Learning Roadmap
36 |
37 | This roadmap covers five key areas, each with a dedicated `.md` file, progressing from LLM API basics to a capstone RAG application:
38 |
39 | ### 🛠️ API Foundations (`api_foundations.md`)
40 | - API Concepts and OpenAI Authentication
41 | - Environment Setup and Testing
42 | - API Request Visualization
43 |
44 | ### 📝 Text Generation (`text_generation.md`)
45 | - OpenAI Chat and Completion APIs
46 | - Conversational and Creative Text Applications
47 | - Response Metrics Visualization
48 |
49 | ### 🔍 Embeddings and Vector Stores (`embeddings_vector_stores.md`)
50 | - OpenAI Embeddings API and Vector Databases
51 | - Semantic Search and Data Storage
52 | - Similarity Score Visualization
53 |
54 | ### 📚 RAG Fundamentals (`rag_fundamentals.md`)
55 | - Retrieval-Augmented Generation Concepts
56 | - Integrating LLMs with Knowledge Bases
57 | - Retrieval Accuracy Visualization
58 |
59 | ### 🌟 Capstone RAG Application (`capstone_rag_application.md`)
60 | - Building a Question-Answering RAG App
61 | - Scalable API and Retrieval Integration
62 | - Application Performance Visualization
63 |
64 | ## 💡 Why Master LLM API and RAG Integration?
65 |
66 | LLM APIs and RAG are game-changers in AI:
67 | 1. **Versatility**: Powers chatbots, Q&A systems, and analytics with context-aware responses.
68 | 2. **Interview Relevance**: Tested in coding challenges (e.g., API integration, RAG pipelines).
69 | 3. **Scalability**: Enables production-ready, knowledge-enhanced AI solutions.
70 | 4. **Industry Demand**: Critical for AI/ML roles in tech.
71 |
72 | ## 📆 Study Plan
73 |
74 | - **Week 1**:
75 | - Day 1-2: API Foundations
76 | - Day 3-4: Text Generation
77 | - Day 5-6: Embeddings and Vector Stores
78 | - Day 7: Review Week 1
79 | - **Week 2**:
80 | - Day 1-2: RAG Fundamentals
81 | - Day 3-4: Capstone RAG Application
82 | - Day 5-7: Review `.md` files and practice interview scenarios.
83 |
84 | ## 🛠️ Setup Instructions
85 |
86 | 1. **Python Environment**:
87 | - Install Python 3.8+ and pip.
88 | - Create a virtual environment: `python -m venv rag_env; source rag_env/bin/activate`.
89 | - Install dependencies: `pip install openai langchain faiss-cpu requests matplotlib pandas nltk`.
90 | 2. **API Keys**:
91 | - Obtain an OpenAI API key from [OpenAI](https://platform.openai.com/).
92 | - Set environment variable:
93 | ```bash
94 | export OPENAI_API_KEY="your-openai-api-key"
95 | ```
96 | 3. **Datasets**:
97 | - Uses synthetic data (e.g., queries, documents).
98 | - Optional: Download datasets from [Hugging Face Datasets](https://huggingface.co/datasets).
99 | - Note: Code uses simulated data to avoid file I/O constraints.
100 | 4. **Running Code**:
101 | - Copy code from `.md` files into a Python environment (e.g., `api_foundations.py`).
102 | - Use Google Colab or local setup.
103 | - View outputs in terminal and Matplotlib visualizations (PNGs).
104 | - Check terminal for errors; ensure dependencies and API keys are set.
105 |
106 | ## 🏆 Practical Tasks
107 |
108 | 1. **API Foundations**:
109 | - Authenticate and test OpenAI API connectivity.
110 | - Visualize API request success rates.
111 | 2. **Text Generation**:
112 | - Build a conversational chatbot with OpenAI.
113 | - Plot response lengths and quality metrics.
114 | 3. **Embeddings and Vector Stores**:
115 | - Implement semantic search with FAISS.
116 | - Visualize similarity scores.
117 | 4. **RAG Fundamentals**:
118 | - Build a basic RAG pipeline with LangChain.
119 | - Visualize retrieval accuracy.
120 | 5. **Capstone RAG Application**:
121 | - Develop a Q&A app with LLM and vector store.
122 | - Visualize application performance metrics.
123 |
124 | ## 💡 Interview Tips
125 |
126 | - **Common Questions**:
127 | - What are the components of an OpenAI API request?
128 | - How do you integrate OpenAI with LangChain for RAG?
129 | - What is Retrieval-Augmented Generation, and how does it work?
130 | - How do you optimize API calls for a RAG application?
131 | - What are real-world use cases for RAG systems?
132 | - **Tips**:
133 | - Explain API and RAG setups with code (e.g., `openai.ChatCompletion.create`, `LangChain` pipelines).
134 | - Demonstrate use cases like Q&A or chatbots.
135 | - Code tasks like error handling or retrieval optimization.
136 | - Discuss trade-offs (e.g., retrieval accuracy vs. latency).
137 | - **Coding Tasks**:
138 | - Integrate OpenAI API for a chatbot.
139 | - Build a RAG pipeline for document Q&A.
140 | - **Conceptual Clarity**:
141 | - Explain how RAG enhances LLM performance.
142 | - Describe optimization techniques for API and retrieval.
143 |
144 | ## 📚 Resources
145 |
146 | - [OpenAI API Documentation](https://platform.openai.com/docs/)
147 | - [LangChain Documentation](https://python.langchain.com/docs/)
148 | - [FAISS Documentation](https://github.com/facebookresearch/faiss)
149 | - [Matplotlib Documentation](https://matplotlib.org/stable/contents.html)
150 | - [“Prompt Engineering Guide” by DAIR.AI](https://www.promptingguide.ai/)
151 |
152 | ## 🤝 Contributions
153 |
154 | 1. Fork the repository.
155 | 2. Create a feature branch (`git checkout -b feature/amazing-addition`).
156 | 3. Commit changes (`git commit -m 'Add some amazing content'`).
157 | 4. Push to the branch (`git push origin feature/amazing-addition`).
158 | 5. Open a Pull Request.
159 |
160 | ---
161 |
162 |
163 |
Happy Learning and Good Luck with Your Interviews! ✨
164 |
--------------------------------------------------------------------------------
/LLM with RAG Interview Questions/README.md:
--------------------------------------------------------------------------------
1 | # LLM API and RAG Integration Interview Questions for AI/ML Roles
2 |
3 | This README provides 170 interview questions tailored for AI/ML roles, focusing on integrating Large Language Model (LLM) APIs and Retrieval-Augmented Generation (RAG) using Python. The questions cover **core concepts** (e.g., LLM API usage, RAG pipeline setup, vector stores, embeddings, evaluation) and their applications in tasks like question answering, document search, and contextual text generation. Questions are categorized by topic and divided into **Basic**, **Intermediate**, and **Advanced** levels to support candidates preparing for roles requiring LLM and RAG integration in generative AI workflows.
4 |
5 | ## LLM API Usage
6 |
7 | ### Basic
8 | 1. **What is an LLM API, and why is it used in AI applications?**
9 | Provides access to pre-trained language models for tasks like text generation.
10 | ```python
11 | from openai import OpenAI
12 | client = OpenAI(api_key="your-api-key")
13 | response = client.chat.completions.create(model="gpt-4", messages=[{"role": "user", "content": "Hello!"}])
14 | ```
15 |
16 | 2. **How do you authenticate an LLM API in Python?**
17 | Uses API keys for secure access.
18 | ```python
19 | import os
20 | os.environ["OPENAI_API_KEY"] = "your-api-key"
21 | client = OpenAI()
22 | ```
23 |
24 | 3. **How do you make a basic API call to an LLM?**
25 | Sends a prompt and retrieves a response.
26 | ```python
27 | response = client.chat.completions.create(model="gpt-3.5-turbo", messages=[{"role": "user", "content": "What is AI?"}])
28 | output = response.choices[0].message.content
29 | ```
30 |
31 | 4. **What is prompt engineering in the context of LLM APIs?**
32 | Crafts inputs to optimize model outputs.
33 | ```python
34 | prompt = "Summarize this text in 50 words: [text]"
35 | response = client.chat.completions.create(model="gpt-4", messages=[{"role": "user", "content": prompt}])
36 | ```
37 |
38 | 5. **How do you handle API rate limits in Python?**
39 | Implements retries or delays.
40 | ```python
41 | import time
42 | def safe_api_call(client, prompt, retries=3):
43 | for _ in range(retries):
44 | try:
45 | return client.chat.completions.create(model="gpt-3.5-turbo", messages=[{"role": "user", "content": prompt}])
46 | except Exception as e:
47 | time.sleep(1)
48 | raise Exception("API call failed")
49 | ```
50 |
51 | 6. **How do you visualize API response latency?**
52 | Plots latency metrics.
53 | ```python
54 | import matplotlib.pyplot as plt
55 | def plot_latency(latencies):
56 | plt.plot(latencies)
57 | plt.savefig("api_latency.png")
58 | ```
59 |
60 | #### Intermediate
61 | 7. **Write a function to call an LLM API with custom parameters.**
62 | Configures temperature, max tokens, etc.
63 | ```python
64 | def call_llm(client, prompt, model="gpt-3.5-turbo", temperature=0.7, max_tokens=100):
65 | response = client.chat.completions.create(
66 | model=model,
67 | messages=[{"role": "user", "content": prompt}],
68 | temperature=temperature,
69 | max_tokens=max_tokens
70 | )
71 | return response.choices[0].message.content
72 | ```
73 |
74 | 8. **How do you implement streaming responses from an LLM API?**
75 | Processes real-time outputs.
76 | ```python
77 | def stream_llm_response(client, prompt):
78 | stream = client.chat.completions.create(
79 | model="gpt-3.5-turbo",
80 | messages=[{"role": "user", "content": prompt}],
81 | stream=True
82 | )
83 | for chunk in stream:
84 | if chunk.choices[0].delta.content:
85 | print(chunk.choices[0].delta.content, end="")
86 | ```
87 |
88 | 9. **Write a function to handle batch API calls.**
89 | Processes multiple prompts efficiently.
90 | ```python
91 | def batch_llm_call(client, prompts, model="gpt-3.5-turbo"):
92 | responses = []
93 | for prompt in prompts:
94 | response = client.chat.completions.create(
95 | model=model,
96 | messages=[{"role": "user", "content": prompt}]
97 | )
98 | responses.append(response.choices[0].message.content)
99 | return responses
100 | ```
101 |
102 | 10. **How do you integrate xAI’s Grok API in Python?**
103 | Uses Grok for generative tasks.
104 | ```python
105 | from xai_sdk import XAIClient
106 | client = XAIClient(api_key="your-xai-key")
107 | response = client.generate_text(prompt="Explain AI", model="grok-3")
108 | ```
109 |
110 | 11. **Write a function to log LLM API usage.**
111 | Tracks API calls and costs.
112 | ```python
113 | import logging
114 | def log_api_call(prompt, response, model):
115 | logging.basicConfig(filename="llm_api.log", level=logging.INFO)
116 | logging.info(f"Model: {model}, Prompt: {prompt}, Response: {response}")
117 | ```
118 |
119 | 12. **How do you handle errors in LLM API calls?**
120 | Implements robust error handling.
121 | ```python
122 | def robust_api_call(client, prompt):
123 | try:
124 | return client.chat.completions.create(
125 | model="gpt-3.5-turbo",
126 | messages=[{"role": "user", "content": prompt}]
127 | )
128 | except Exception as e:
129 | print(f"Error: {e}")
130 | return None
131 | ```
132 |
133 | #### Advanced
134 | 13. **Write a function to implement rate-limited API calls.**
135 | Respects API quotas.
136 | ```python
137 | from ratelimit import limits, sleep_and_retry
138 | @sleep_and_retry
139 | @limits(calls=10, period=60)
140 | def rate_limited_call(client, prompt):
141 | return client.chat.completions.create(
142 | model="gpt-3.5-turbo",
143 | messages=[{"role": "user", "content": prompt}]
144 | )
145 | ```
146 |
147 | 14. **How do you optimize LLM API costs in Python?**
148 | Uses caching or smaller models.
149 | ```python
150 | from functools import lru_cache
151 | @lru_cache(maxsize=1000)
152 | def cached_llm_call(prompt, model="gpt-3.5-turbo"):
153 | client = OpenAI()
154 | return client.chat.completions.create(
155 | model=model,
156 | messages=[{"role": "user", "content": prompt}]
157 | ).choices[0].message.content
158 | ```
159 |
160 | 15. **Write a function to implement asynchronous LLM API calls.**
161 | Improves throughput.
162 | ```python
163 | import asyncio
164 | async def async_llm_call(client, prompt):
165 | loop = asyncio.get_event_loop()
166 | response = await loop.run_in_executor(None, lambda: client.chat.completions.create(
167 | model="gpt-3.5-turbo",
168 | messages=[{"role": "user", "content": prompt}]
169 | ))
170 | return response.choices[0].message.content
171 | ```
172 |
173 | 16. **How do you integrate multiple LLM APIs in Python?**
174 | Combines OpenAI and xAI’s Grok.
175 | ```python
176 | def multi_llm_call(prompt):
177 | openai_client = OpenAI()
178 | xai_client = XAIClient(api_key="your-xai-key")
179 | openai_response = openai_client.chat.completions.create(
180 | model="gpt-3.5-turbo",
181 | messages=[{"role": "user", "content": prompt}]
182 | )
183 | xai_response = xai_client.generate_text(prompt, model="grok-3")
184 | return {"openai": openai_response.choices[0].message.content, "xai": xai_response}
185 | ```
186 |
187 | 17. **Write a function to monitor API performance.**
188 | Tracks latency and success rates.
189 | ```python
190 | import time
191 | def monitor_api_performance(client, prompts):
192 | latencies = []
193 | for prompt in prompts:
194 | start = time.time()
195 | response = robust_api_call(client, prompt)
196 | latencies.append(time.time() - start if response else float("inf"))
197 | return {"avg_latency": sum(latencies) / len(latencies), "success_rate": sum(1 for l in latencies if l != float("inf")) / len(latencies)}
198 | ```
199 |
200 | 18. **How do you implement fallback mechanisms for LLM APIs?**
201 | Switches to alternative APIs on failure.
202 | ```python
203 | def fallback_llm_call(primary_client, fallback_client, prompt):
204 | try:
205 | return primary_client.chat.completions.create(
206 | model="gpt-3.5-turbo",
207 | messages=[{"role": "user", "content": prompt}]
208 | ).choices[0].message.content
209 | except:
210 | return fallback_client.generate_text(prompt, model="grok-3")
211 | ```
212 |
213 | ## RAG Pipeline Setup
214 |
215 | ### Basic
216 | 19. **What is Retrieval-Augmented Generation (RAG)?**
217 | Combines retrieval and generation for contextual responses.
218 | ```python
219 | from langchain.chains import RetrievalQA
220 | from langchain.vectorstores import FAISS
221 | qa_chain = RetrievalQA.from_chain_type(llm=llm, retriever=vector_store.as_retriever())
222 | ```
223 |
224 | 20. **How do you set up a basic RAG pipeline in Python?**
225 | Uses LangChain for RAG.
226 | ```python
227 | from langchain.llms import OpenAI
228 | from langchain.vectorstores import FAISS
229 | from langchain.embeddings import OpenAIEmbeddings
230 | embeddings = OpenAIEmbeddings()
231 | vector_store = FAISS.from_texts(["Sample text"], embeddings)
232 | llm = OpenAI()
233 | qa_chain = RetrievalQA.from_chain_type(llm=llm, retriever=vector_store.as_retriever())
234 | ```
235 |
236 | 21. **What is a vector store in RAG?**
237 | Stores document embeddings for retrieval.
238 | ```python
239 | vector_store = FAISS.from_texts(["Document 1", "Document 2"], embeddings)
240 | ```
241 |
242 | 22. **How do you create embeddings for RAG?**
243 | Converts text to vectors.
244 | ```python
245 | from langchain.embeddings import OpenAIEmbeddings
246 | embeddings = OpenAIEmbeddings()
247 | vectors = embeddings.embed_documents(["Hello, world!"])
248 | ```
249 |
250 | 23. **How do you perform retrieval in a RAG pipeline?**
251 | Fetches relevant documents.
252 | ```python
253 | docs = vector_store.similarity_search("What is AI?", k=3)
254 | ```
255 |
256 | 24. **How do you visualize document similarity scores?**
257 | Plots retrieval scores.
258 | ```python
259 | import matplotlib.pyplot as plt
260 | def plot_similarity_scores(scores):
261 | plt.bar(range(len(scores)), scores)
262 | plt.savefig("similarity_scores.png")
263 | ```
264 |
265 | #### Intermediate
266 | 25. **Write a function to set up a RAG pipeline with LangChain.**
267 | Configures LLM and vector store.
268 | ```python
269 | from langchain.chains import RetrievalQA
270 | from langchain.vectorstores import FAISS
271 | from langchain.llms import OpenAI
272 | def setup_rag_pipeline(documents, llm_model="text-davinci-003"):
273 | embeddings = OpenAIEmbeddings()
274 | vector_store = FAISS.from_texts(documents, embeddings)
275 | llm = OpenAI(model=llm_model)
276 | return RetrievalQA.from_chain_type(llm=llm, retriever=vector_store.as_retriever())
277 | ```
278 |
279 | 26. **How do you integrate Hugging Face models in a RAG pipeline?**
280 | Uses Hugging Face LLMs.
281 | ```python
282 | from langchain.llms import HuggingFacePipeline
283 | from transformers import pipeline
284 | llm = HuggingFacePipeline(pipeline=pipeline("text-generation", model="gpt2"))
285 | qa_chain = RetrievalQA.from_chain_type(llm=llm, retriever=vector_store.as_retriever())
286 | ```
287 |
288 | 27. **Write a function to load documents into a vector store.**
289 | Indexes documents for RAG.
290 | ```python
291 | from langchain.document_loaders import TextLoader
292 | def load_documents_to_vector_store(file_path, embeddings):
293 | loader = TextLoader(file_path)
294 | documents = loader.load()
295 | return FAISS.from_documents(documents, embeddings)
296 | ```
297 |
298 | 28. **How do you optimize retrieval in a RAG pipeline?**
299 | Uses efficient vector stores or indexing.
300 | ```python
301 | from langchain.vectorstores import FAISS
302 | vector_store = FAISS.from_texts(["Doc"], embeddings, index_type="hnsw")
303 | ```
304 |
305 | 29. **Write a function to visualize retrieved documents.**
306 | Displays document relevance.
307 | ```python
308 | import matplotlib.pyplot as plt
309 | def plot_retrieved_docs(docs, scores):
310 | plt.bar([doc.metadata["source"] for doc in docs], scores)
311 | plt.savefig("retrieved_docs.png")
312 | ```
313 |
314 | 30. **How do you handle large document sets in RAG?**
315 | Uses chunking or batch processing.
316 | ```python
317 | from langchain.text_splitter import RecursiveCharacterTextSplitter
318 | def chunk_documents(documents, chunk_size=1000):
319 | splitter = RecursiveCharacterTextSplitter(chunk_size=chunk_size)
320 | return splitter.split_documents(documents)
321 | ```
322 |
323 | #### Advanced
324 | 31. **Write a function to implement hybrid retrieval in RAG.**
325 | Combines dense and sparse retrieval.
326 | ```python
327 | from langchain.retrievers import BM25Retriever, EnsembleRetriever
328 | def hybrid_retriever(documents, embeddings):
329 | dense_retriever = FAISS.from_documents(documents, embeddings).as_retriever()
330 | sparse_retriever = BM25Retriever.from_documents(documents)
331 | return EnsembleRetriever(retrievers=[dense_retriever, sparse_retriever], weights=[0.5, 0.5])
332 | ```
333 |
334 | 32. **How do you optimize RAG pipelines for latency?**
335 | Uses caching or smaller models.
336 | ```python
337 | from langchain.cache import InMemoryCache
338 | def enable_rag_caching():
339 | langchain.llm_cache = InMemoryCache()
340 | ```
341 |
342 | 33. **Write a function to implement multi-query retrieval in RAG.**
343 | Enhances retrieval with multiple queries.
344 | ```python
345 | from langchain.retrievers import MultiQueryRetriever
346 | def multi_query_rag(llm, vector_store):
347 | return MultiQueryRetriever.from_llm(retriever=vector_store.as_retriever(), llm=llm)
348 | ```
349 |
350 | 34. **How do you integrate external APIs in a RAG pipeline?**
351 | Uses xAI’s Grok for generation.
352 | ```python
353 | from langchain.llms import BaseLLM
354 | class GrokLLM(BaseLLM):
355 | def _generate(self, prompts, **kwargs):
356 | client = XAIClient(api_key="your-xai-key")
357 | return [client.generate_text(prompt, model="grok-3") for prompt in prompts]
358 | qa_chain = RetrievalQA.from_chain_type(llm=GrokLLM(), retriever=vector_store.as_retriever())
359 | ```
360 |
361 | 35. **Write a function to evaluate RAG pipeline performance.**
362 | Measures retrieval and generation quality.
363 | ```python
364 | from datasets import load_metric
365 | def evaluate_rag(qa_chain, questions, references):
366 | bleu = load_metric("bleu")
367 | responses = [qa_chain.run(q) for q in questions]
368 | return bleu.compute(predictions=responses, references=references)
369 | ```
370 |
371 | 36. **How do you implement dynamic document indexing in RAG?**
372 | Updates vector store incrementally.
373 | ```python
374 | def update_vector_store(vector_store, new_documents, embeddings):
375 | new_store = FAISS.from_documents(new_documents, embeddings)
376 | vector_store.merge_from(new_store)
377 | return vector_store
378 | ```
379 |
380 | ## Vector Stores and Embeddings
381 |
382 | ### Basic
383 | 37. **What is a vector store, and why is it used in RAG?**
384 | Stores embeddings for efficient retrieval.
385 | ```python
386 | from langchain.vectorstores import Chroma
387 | vector_store = Chroma.from_texts(["Sample text"], embeddings)
388 | ```
389 |
390 | 38. **How do you create embeddings with Hugging Face models?**
391 | Uses Sentence Transformers.
392 | ```python
393 | from langchain.embeddings import HuggingFaceEmbeddings
394 | embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
395 | ```
396 |
397 | 39. **How do you perform similarity search in a vector store?**
398 | Retrieves similar documents.
399 | ```python
400 | results = vector_store.similarity_search_with_score("Query", k=5)
401 | ```
402 |
403 | 40. **What is cosine similarity in the context of RAG?**
404 | Measures vector similarity.
405 | ```python
406 | from sklearn.metrics.pairwise import cosine_similarity
407 | similarity = cosine_similarity([vector1], [vector2])[0][0]
408 | ```
409 |
410 | 41. **How do you save a vector store in Python?**
411 | Persists embeddings to disk.
412 | ```python
413 | vector_store.save_local("vector_store")
414 | ```
415 |
416 | 42. **How do you visualize embedding distributions?**
417 | Plots embeddings in 2D.
418 | ```python
419 | from sklearn.manifold import TSNE
420 | import matplotlib.pyplot as plt
421 | def plot_embeddings(embeddings):
422 | tsne = TSNE(n_components=2)
423 | reduced = tsne.fit_transform(embeddings)
424 | plt.scatter(reduced[:, 0], reduced[:, 1])
425 | plt.savefig("embeddings.png")
426 | ```
427 |
428 | #### Intermediate
429 | 43. **Write a function to create a Chroma vector store.**
430 | Indexes documents with embeddings.
431 | ```python
432 | from langchain.vectorstores import Chroma
433 | def create_chroma_store(documents, embeddings, persist_dir="chroma"):
434 | return Chroma.from_documents(documents, embeddings, persist_directory=persist_dir)
435 | ```
436 |
437 | 44. **How do you integrate Pinecone with a RAG pipeline?**
438 | Uses cloud-based vector storage.
439 | ```python
440 | from langchain.vectorstores import Pinecone
441 | import pinecone
442 | pinecone.init(api_key="your-pinecone-key", environment="us-west1-gcp")
443 | vector_store = Pinecone.from_texts(["Doc"], embeddings, index_name="rag-index")
444 | ```
445 |
446 | 45. **Write a function to compare embedding models.**
447 | Evaluates embedding quality.
448 | ```python
449 | def compare_embeddings(texts, embedding_models):
450 | similarities = []
451 | for model in embedding_models:
452 | embeddings = model.embed_documents(texts)
453 | similarity = cosine_similarity(embeddings[0:1], embeddings[1:2])[0][0]
454 | similarities.append(similarity)
455 | return similarities
456 | ```
457 |
458 | 46. **How do you optimize vector store queries?**
459 | Uses approximate nearest neighbors.
460 | ```python
461 | vector_store = FAISS.from_texts(["Doc"], embeddings, index_type="hnsw")
462 | ```
463 |
464 | 47. **Write a function to visualize vector store query results.**
465 | Plots similarity scores.
466 | ```python
467 | import matplotlib.pyplot as plt
468 | def plot_query_results(results):
469 | scores = [score for _, score in results]
470 | plt.bar(range(len(scores)), scores)
471 | plt.savefig("query_results.png")
472 | ```
473 |
474 | 48. **How do you handle high-dimensional embeddings in RAG?**
475 | Uses dimensionality reduction.
476 | ```python
477 | from sklearn.decomposition import PCA
478 | def reduce_embeddings(embeddings, n_components=50):
479 | pca = PCA(n_components=n_components)
480 | return pca.fit_transform(embeddings)
481 | ```
482 |
483 | #### Advanced
484 | 49. **Write a function to implement custom vector store indexing.**
485 | Builds a custom index.
486 | ```python
487 | from langchain.vectorstores import VectorStore
488 | class CustomVectorStore(VectorStore):
489 | def __init__(self, embeddings):
490 | self.embeddings = embeddings
491 | self.index = {}
492 | def add_texts(self, texts):
493 | vectors = self.embeddings.embed_documents(texts)
494 | for i, vector in enumerate(vectors):
495 | self.index[i] = (texts[i], vector)
496 | def similarity_search(self, query, k=4):
497 | query_vector = self.embeddings.embed_query(query)
498 | return sorted(
499 | [(text, cosine_similarity([query_vector], [vector])[0][0]) for text, vector in self.index.values()],
500 | key=lambda x: x[1], reverse=True
501 | )[:k]
502 | ```
503 |
504 | 50. **How do you scale vector stores for large datasets?**
505 | Uses sharding or distributed stores.
506 | ```python
507 | from langchain.vectorstores import Weaviate
508 | vector_store = Weaviate.from_texts(["Doc"], embeddings, client=weaviate.Client("http://localhost:8080"))
509 | ```
510 |
511 | 51. **Write a function to update embeddings dynamically.**
512 | Refreshes vector store incrementally.
513 | ```python
514 | def update_embeddings(vector_store, new_texts, embeddings):
515 | new_store = FAISS.from_texts(new_texts, embeddings)
516 | vector_store.merge_from(new_store)
517 | return vector_store
518 | ```
519 |
520 | 52. **How do you implement cross-lingual embeddings in RAG?**
521 | Uses multilingual models.
522 | ```python
523 | embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/LaBSE")
524 | vector_store = FAISS.from_texts(["Doc"], embeddings)
525 | ```
526 |
527 | 53. **Write a function to evaluate vector store retrieval quality.**
528 | Measures precision and recall.
529 | ```python
530 | def evaluate_retrieval(vector_store, queries, relevant_docs):
531 | precision = []
532 | for query, relevant in zip(queries, relevant_docs):
533 | results = vector_store.similarity_search(query, k=5)
534 | retrieved = [doc.page_content for doc, _ in results]
535 | precision.append(sum(1 for doc in retrieved if doc in relevant) / len(retrieved))
536 | return {"avg_precision": sum(precision) / len(precision)}
537 | ```
538 |
539 | 54. **How do you integrate vector stores with real-time data?**
540 | Uses streaming updates.
541 | ```python
542 | def stream_vector_store(vector_store, data_stream, embeddings):
543 | for batch in data_stream:
544 | vector_store.add_texts([doc["text"] for doc in batch], embeddings)
545 | return vector_store
546 | ```
547 |
548 | ## Evaluation and Metrics
549 |
550 | ### Basic
551 | 55. **How do you evaluate LLM API responses?**
552 | Uses metrics like BLEU or ROUGE.
553 | ```python
554 | from datasets import load_metric
555 | bleu = load_metric("bleu")
556 | score = bleu.compute(predictions=["Hello"], references=[["Hello, world!"]])
557 | ```
558 |
559 | 56. **What is precision in the context of RAG retrieval?**
560 | Measures relevant retrieved documents.
561 | ```python
562 | def compute_precision(retrieved, relevant):
563 | return sum(1 for doc in retrieved if doc in relevant) / len(retrieved)
564 | ```
565 |
566 | 57. **How do you evaluate RAG generation quality?**
567 | Uses ROUGE for text similarity.
568 | ```python
569 | rouge = load_metric("rouge")
570 | score = rouge.compute(predictions=["Generated text"], references=["Reference text"])
571 | ```
572 |
573 | 58. **How do you visualize evaluation metrics?**
574 | Plots metric scores.
575 | ```python
576 | import matplotlib.pyplot as plt
577 | def plot_metrics(metrics, metric_name):
578 | plt.plot(metrics)
579 | plt.savefig(f"{metric_name}.png")
580 | ```
581 |
582 | 59. **How do you measure latency in a RAG pipeline?**
583 | Tracks execution time.
584 | ```python
585 | import time
586 | def measure_rag_latency(qa_chain, query):
587 | start = time.time()
588 | qa_chain.run(query)
589 | return time.time() - start
590 | ```
591 |
592 | 60. **What is recall in the context of RAG retrieval?**
593 | Measures retrieved relevant documents.
594 | ```python
595 | def compute_recall(retrieved, relevant):
596 | return sum(1 for doc in retrieved if doc in relevant) / len(relevant)
597 | ```
598 |
599 | #### Intermediate
600 | 61. **Write a function to evaluate RAG pipeline accuracy.**
601 | Compares outputs to ground truth.
602 | ```python
603 | def evaluate_rag_accuracy(qa_chain, questions, answers):
604 | correct = 0
605 | for q, a in zip(questions, answers):
606 | response = qa_chain.run(q)
607 | if response.strip() == a.strip():
608 | correct += 1
609 | return correct / len(questions)
610 | ```
611 |
612 | 62. **How do you implement human-in-the-loop evaluation for RAG?**
613 | Collects user feedback.
614 | ```python
615 | def human_eval_rag(qa_chain, query):
616 | response = qa_chain.run(query)
617 | feedback = input(f"Rate this response (1-5): {response}\n")
618 | return {"response": response, "score": int(feedback)}
619 | ```
620 |
621 | 63. **Write a function to compute F1 score for RAG retrieval.**
622 | Balances precision and recall.
623 | ```python
624 | def compute_f1(retrieved, relevant):
625 | precision = compute_precision(retrieved, relevant)
626 | recall = compute_recall(retrieved, relevant)
627 | return 2 * (precision * recall) / (precision + recall) if precision + recall > 0 else 0
628 | ```
629 |
630 | 64. **How do you evaluate contextual relevance in RAG?**
631 | Measures document alignment.
632 | ```python
633 | def evaluate_context_relevance(qa_chain, query, relevant_context):
634 | response = qa_chain.run(query)
635 | return 1 if relevant_context in response else 0
636 | ```
637 |
638 | 65. **Write a function to visualize RAG evaluation metrics.**
639 | Plots precision, recall, and F1.
640 | ```python
641 | import matplotlib.pyplot as plt
642 | def plot_rag_metrics(precisions, recalls, f1s):
643 | plt.plot(precisions, label="Precision")
644 | plt.plot(recalls, label="Recall")
645 | plt.plot(f1s, label="F1")
646 | plt.legend()
647 | plt.savefig("rag_metrics.png")
648 | ```
649 |
650 | 66. **How do you implement A/B testing for RAG pipelines?**
651 | Compares two configurations.
652 | ```python
653 | def ab_test_rag(qa_chain_a, qa_chain_b, questions, answers):
654 | metrics_a = evaluate_rag_accuracy(qa_chain_a, questions, answers)
655 | metrics_b = evaluate_rag_accuracy(qa_chain_b, questions, answers)
656 | return {"chain_a": metrics_a, "chain_b": metrics_b}
657 | ```
658 |
659 | #### Advanced
660 | 67. **Write a function to implement automated evaluation for RAG.**
661 | Uses multiple metrics.
662 | ```python
663 | def auto_evaluate_rag(qa_chain, questions, answers, relevant_docs):
664 | bleu = load_metric("bleu")
665 | responses = [qa_chain.run(q) for q in questions]
666 | bleu_score = bleu.compute(predictions=responses, references=answers)
667 | retrieval_metrics = evaluate_retrieval(qa_chain.retriever.vectorstore, questions, relevant_docs)
668 | return {"bleu": bleu_score, "retrieval": retrieval_metrics}
669 | ```
670 |
671 | 68. **How do you evaluate RAG robustness under noisy inputs?**
672 | Tests performance with perturbations.
673 | ```python
674 | def evaluate_robustness(qa_chain, questions, noise_level=0.1):
675 | noisy_questions = [q + " " + "".join(random.choices("abc", k=int(len(q) * noise_level))) for q in questions]
676 | return evaluate_rag_accuracy(qa_chain, noisy_questions, questions)
677 | ```
678 |
679 | 69. **Write a function to implement cross-validation for RAG.**
680 | Validates pipeline stability.
681 | ```python
682 | from sklearn.model_selection import KFold
683 | def cross_validate_rag(documents, questions, answers, folds=5):
684 | kf = KFold(n_splits=folds)
685 | scores = []
686 | for train_idx, test_idx in kf.split(documents):
687 | train_docs = [documents[i] for i in train_idx]
688 | qa_chain = setup_rag_pipeline(train_docs)
689 | test_questions = [questions[i] for i in test_idx]
690 | test_answers = [answers[i] for i in test_idx]
691 | scores.append(evaluate_rag_accuracy(qa_chain, test_questions, test_answers))
692 | return sum(scores) / len(scores)
693 | ```
694 |
695 | 70. **How do you implement real-time evaluation for RAG?**
696 | Monitors performance during inference.
697 | ```python
698 | def realtime_evaluate_rag(qa_chain, query, reference):
699 | response = qa_chain.run(query)
700 | bleu = load_metric("bleu")
701 | score = bleu.compute(predictions=[response], references=[[reference]])
702 | return {"response": response, "bleu": score}
703 | ```
704 |
705 | 71. **Write a function to evaluate RAG fairness.**
706 | Checks bias in responses.
707 | ```python
708 | def evaluate_fairness(qa_chain, questions, groups):
709 | responses = [qa_chain.run(q) for q in questions]
710 | group_scores = {g: [] for g in set(groups)}
711 | for response, group in zip(responses, groups):
712 | group_scores[group].append(len(response.split()))
713 | return {g: sum(scores) / len(scores) for g, scores in group_scores.items()}
714 | ```
715 |
716 | 72. **How do you visualize RAG performance over time?**
717 | Plots metrics across queries.
718 | ```python
719 | import matplotlib.pyplot as plt
720 | def plot_performance_over_time(metrics):
721 | plt.plot(metrics["bleu"], label="BLEU")
722 | plt.plot(metrics["retrieval"], label="Retrieval Precision")
723 | plt.legend()
724 | plt.savefig("performance_over_time.png")
725 | ```
726 |
727 | ## Debugging and Error Handling
728 |
729 | ### Basic
730 | 73. **How do you debug LLM API responses?**
731 | Logs inputs and outputs.
732 | ```python
733 | def debug_api_call(client, prompt):
734 | response = client.chat.completions.create(
735 | model="gpt-3.5-turbo",
736 | messages=[{"role": "user", "content": prompt}]
737 | )
738 | print(f"Prompt: {prompt}, Response: {response.choices[0].message.content}")
739 | return response
740 | ```
741 |
742 | 74. **What is a try-except block in RAG pipelines?**
743 | Handles runtime errors.
744 | ```python
745 | try:
746 | response = qa_chain.run("Query")
747 | except Exception as e:
748 | print(f"Error: {e}")
749 | ```
750 |
751 | 75. **How do you validate inputs in a RAG pipeline?**
752 | Ensures correct formats.
753 | ```python
754 | def validate_rag_input(query, vector_store):
755 | if not query or not vector_store:
756 | raise ValueError("Invalid query or vector store")
757 | return query
758 | ```
759 |
760 | 76. **How do you handle vector store errors in RAG?**
761 | Manages retrieval failures.
762 | ```python
763 | def safe_retrieval(vector_store, query):
764 | try:
765 | return vector_store.similarity_search(query)
766 | except Exception as e:
767 | print(f"Retrieval error: {e}")
768 | return []
769 | ```
770 |
771 | 77. **What is logging in the context of RAG pipelines?**
772 | Tracks operations and errors.
773 | ```python
774 | import logging
775 | logging.basicConfig(filename="rag.log", level=logging.INFO)
776 | logging.info("RAG pipeline started")
777 | ```
778 |
779 | 78. **How do you handle API timeouts in LLM calls?**
780 | Implements timeouts and retries.
781 | ```python
782 | import requests
783 | def handle_timeout(client, prompt, timeout=10):
784 | try:
785 | return client.chat.completions.create(
786 | model="gpt-3.5-turbo",
787 | messages=[{"role": "user", "content": prompt}],
788 | timeout=timeout
789 | )
790 | except requests.exceptions.Timeout:
791 | print("API timeout")
792 | return None
793 | ```
794 |
795 | #### Intermediate
796 | 79. **Write a function to retry RAG pipeline queries.**
797 | Handles transient failures.
798 | ```python
799 | def retry_rag_query(qa_chain, query, max_attempts=3):
800 | for attempt in range(max_attempts):
801 | try:
802 | return qa_chain.run(query)
803 | except Exception as e:
804 | if attempt == max_attempts - 1:
805 | raise
806 | print(f"Attempt {attempt+1} failed: {e}")
807 | ```
808 |
809 | 80. **How do you debug vector store retrieval issues?**
810 | Inspects retrieved documents.
811 | ```python
812 | def debug_retrieval(vector_store, query):
813 | results = vector_store.similarity_search_with_score(query)
814 | print(f"Query: {query}, Results: {[(doc.page_content, score) for doc, score in results]}")
815 | return results
816 | ```
817 |
818 | 81. **Write a function to validate LLM API responses.**
819 | Ensures valid outputs.
820 | ```python
821 | def validate_response(response):
822 | if not response or not response.choices:
823 | raise ValueError("Invalid API response")
824 | return response.choices[0].message.content
825 | ```
826 |
827 | 82. **How do you profile RAG pipeline performance?**
828 | Measures component times.
829 | ```python
830 | import time
831 | def profile_rag(qa_chain, query):
832 | start = time.time()
833 | response = qa_chain.run(query)
834 | print(f"RAG took {time.time() - start}s")
835 | return response
836 | ```
837 |
838 | 83. **Write a function to handle embedding errors.**
839 | Manages embedding failures.
840 | ```python
841 | def safe_embedding(embeddings, texts):
842 | try:
843 | return embeddings.embed_documents(texts)
844 | except Exception as e:
845 | print(f"Embedding error: {e}")
846 | return [[] for _ in texts]
847 | ```
848 |
849 | 84. **How do you debug inconsistent RAG outputs?**
850 | Logs pipeline state.
851 | ```python
852 | def debug_rag_output(qa_chain, query):
853 | response = qa_chain.run(query)
854 | print(f"Query: {query}, Response: {response}, Retriever State: {qa_chain.retriever}")
855 | return response
856 | ```
857 |
858 | #### Advanced
859 | 85. **Write a function to implement a custom error handler for RAG.**
860 | Logs specific errors.
861 | ```python
862 | import logging
863 | def custom_rag_error_handler(operation, *args):
864 | logging.basicConfig(filename="rag_errors.log", level=logging.ERROR)
865 | try:
866 | return operation(*args)
867 | except Exception as e:
868 | logging.error(f"RAG error: {e}")
869 | raise
870 | ```
871 |
872 | 86. **How do you implement circuit breakers in RAG pipelines?**
873 | Prevents cascading failures.
874 | ```python
875 | from pybreaker import CircuitBreaker
876 | breaker = CircuitBreaker(fail_max=3, reset_timeout=60)
877 | @breaker
878 | def safe_rag_call(qa_chain, query):
879 | return qa_chain.run(query)
880 | ```
881 |
882 | 87. **Write a function to detect retrieval failures in RAG.**
883 | Checks for empty results.
884 | ```python
885 | def detect_retrieval_failure(vector_store, query):
886 | results = vector_store.similarity_search(query)
887 | if not results:
888 | print("Warning: No documents retrieved")
889 | return results
890 | ```
891 |
892 | 88. **How do you implement logging for distributed RAG pipelines?**
893 | Centralizes logs.
894 | ```python
895 | import logging.handlers
896 | def setup_distributed_logging():
897 | handler = logging.handlers.SocketHandler("log-server", 9090)
898 | logging.getLogger().addHandler(handler)
899 | logging.info("RAG pipeline started")
900 | ```
901 |
902 | 89. **Write a function to handle version compatibility in RAG.**
903 | Checks library versions.
904 | ```python
905 | from langchain import __version__
906 | def check_langchain_version():
907 | if __version__ < "0.0.150":
908 | raise ValueError("Unsupported LangChain version")
909 | ```
910 |
911 | 90. **How do you debug RAG performance bottlenecks?**
912 | Profiles retrieval and generation.
913 | ```python
914 | import cProfile
915 | def debug_rag_bottlenecks(qa_chain, query):
916 | cProfile.runctx("qa_chain.run(query)", globals(), locals(), "rag_profile.prof")
917 | ```
918 |
919 | ## Visualization and Interpretation
920 |
921 | ### Basic
922 | 91. **How do you visualize LLM API response quality?**
923 | Plots BLEU scores.
924 | ```python
925 | import matplotlib.pyplot as plt
926 | def plot_bleu_scores(scores):
927 | plt.plot(scores)
928 | plt.savefig("bleu_scores.png")
929 | ```
930 |
931 | 92. **How do you create a word cloud for RAG outputs?**
932 | Visualizes word frequencies.
933 | ```python
934 | from wordcloud import WordCloud
935 | import matplotlib.pyplot as plt
936 | def plot_word_cloud(text):
937 | wc = WordCloud().generate(text)
938 | plt.imshow(wc, interpolation="bilinear")
939 | plt.savefig("word_cloud.png")
940 | ```
941 |
942 | 93. **How do you visualize retrieval scores in RAG?**
943 | Plots similarity scores.
944 | ```python
945 | import matplotlib.pyplot as plt
946 | def plot_retrieval_scores(results):
947 | scores = [score for _, score in results]
948 | plt.bar(range(len(scores)), scores)
949 | plt.savefig("retrieval_scores.png")
950 | ```
951 |
952 | 94. **How do you visualize RAG pipeline latency?**
953 | Plots execution times.
954 | ```python
955 | import matplotlib.pyplot as plt
956 | def plot_rag_latency(latencies):
957 | plt.plot(latencies)
958 | plt.savefig("rag_latency.png")
959 | ```
960 |
961 | 95. **How do you visualize document embeddings in RAG?**
962 | Projects embeddings to 2D.
963 | ```python
964 | from sklearn.manifold import TSNE
965 | import matplotlib.pyplot as plt
966 | def plot_doc_embeddings(embeddings):
967 | tsne = TSNE(n_components=2)
968 | reduced = tsne.fit_transform(embeddings)
969 | plt.scatter(reduced[:, 0], reduced[:, 1])
970 | plt.savefig("doc_embeddings.png")
971 | ```
972 |
973 | 96. **How do you visualize RAG response diversity?**
974 | Plots unique token counts.
975 | ```python
976 | import matplotlib.pyplot as plt
977 | def plot_response_diversity(responses):
978 | unique_tokens = [len(set(response.split())) for response in responses]
979 | plt.hist(unique_tokens, bins=20)
980 | plt.savefig("response_diversity.png")
981 | ```
982 |
983 | #### Intermediate
984 | 97. **Write a function to visualize RAG retrieval accuracy.**
985 | Plots precision over queries.
986 | ```python
987 | import matplotlib.pyplot as plt
988 | def plot_retrieval_accuracy(precisions):
989 | plt.plot(precisions)
990 | plt.savefig("retrieval_accuracy.png")
991 | ```
992 |
993 | 98. **How do you visualize LLM API usage patterns?**
994 | Plots API call frequency.
995 | ```python
996 | import matplotlib.pyplot as plt
997 | def plot_api_usage(calls):
998 | plt.hist(calls["timestamps"], bins=24)
999 | plt.savefig("api_usage.png")
1000 | ```
1001 |
1002 | 99. **Write a function to visualize RAG fairness metrics.**
1003 | Plots group-wise performance.
1004 | ```python
1005 | import matplotlib.pyplot as plt
1006 | def plot_fairness_metrics(metrics):
1007 | plt.bar(metrics.keys(), metrics.values())
1008 | plt.savefig("fairness_metrics.png")
1009 | ```
1010 |
1011 | 100. **How do you visualize RAG pipeline throughput?**
1012 | Plots queries per second.
1013 | ```python
1014 | import matplotlib.pyplot as plt
1015 | def plot_throughput(queries, times):
1016 | throughput = [1 / t for t in times]
1017 | plt.plot(throughput)
1018 | plt.savefig("throughput.png")
1019 | ```
1020 |
1021 | 101. **Write a function to visualize embedding clusters.**
1022 | Plots document clusters.
1023 | ```python
1024 | from sklearn.cluster import KMeans
1025 | import matplotlib.pyplot as plt
1026 | def plot_embedding_clusters(embeddings, n_clusters=3):
1027 | kmeans = KMeans(n_clusters=n_clusters)
1028 | labels = kmeans.fit_predict(embeddings)
1029 | tsne = TSNE(n_components=2)
1030 | reduced = tsne.fit_transform(embeddings)
1031 | plt.scatter(reduced[:, 0], reduced[:, 1], c=labels)
1032 | plt.savefig("embedding_clusters.png")
1033 | ```
1034 |
1035 | 102. **How do you visualize RAG response consistency?**
1036 | Plots response similarity.
1037 | ```python
1038 | import matplotlib.pyplot as plt
1039 | from sklearn.metrics.pairwise import cosine_similarity
1040 | def plot_response_consistency(responses, embeddings):
1041 | vectors = embeddings.embed_documents(responses)
1042 | similarities = cosine_similarity(vectors)
1043 | plt.imshow(similarities, cmap="hot")
1044 | plt.savefig("response_consistency.png")
1045 | ```
1046 |
1047 | #### Advanced
1048 | 103. **Write a function to visualize RAG pipeline robustness.**
1049 | Plots performance under noise.
1050 | ```python
1051 | import matplotlib.pyplot as plt
1052 | def plot_robustness(metrics, noise_levels):
1053 | plt.plot(noise_levels, metrics)
1054 | plt.savefig("robustness.png")
1055 | ```
1056 |
1057 | 104. **How do you implement a dashboard for RAG metrics?**
1058 | Displays real-time stats.
1059 | ```python
1060 | from fastapi import FastAPI
1061 | app = FastAPI()
1062 | metrics = []
1063 | @app.get("/rag_metrics")
1064 | async def get_metrics():
1065 | return {"metrics": metrics}
1066 | ```
1067 |
1068 | 105. **Write a function to visualize data drift in RAG.**
1069 | Tracks document distribution changes.
1070 | ```python
1071 | import matplotlib.pyplot as plt
1072 | def plot_data_drift(old_embeddings, new_embeddings):
1073 | tsne = TSNE(n_components=2)
1074 | old_reduced = tsne.fit_transform(old_embeddings)
1075 | new_reduced = tsne.fit_transform(new_embeddings)
1076 | plt.scatter(old_reduced[:, 0], old_reduced[:, 1], label="Old")
1077 | plt.scatter(new_reduced[:, 0], new_reduced[:, 1], label="New")
1078 | plt.legend()
1079 | plt.savefig("data_drift.png")
1080 | ```
1081 |
1082 | 106. **How do you visualize RAG retrieval latency distribution?**
1083 | Plots latency histogram.
1084 | ```python
1085 | import matplotlib.pyplot as plt
1086 | def plot_retrieval_latency(latencies):
1087 | plt.hist(latencies, bins=20)
1088 | plt.savefig("retrieval_latency.png")
1089 | ```
1090 |
1091 | 107. **Write a function to visualize LLM API cost trends.**
1092 | Plots API usage costs.
1093 | ```python
1094 | import matplotlib.pyplot as plt
1095 | def plot_api_costs(costs):
1096 | plt.plot(costs)
1097 | plt.savefig("api_costs.png")
1098 | ```
1099 |
1100 | 108. **How do you visualize RAG pipeline error rates?**
1101 | Plots error frequency.
1102 | ```python
1103 | import matplotlib.pyplot as plt
1104 | def plot_error_rates(errors):
1105 | plt.plot([1 if e else 0 for e in errors])
1106 | plt.savefig("error_rates.png")
1107 | ```
1108 |
1109 | ## Best Practices and Optimization
1110 |
1111 | ### Basic
1112 | 109. **What are best practices for LLM API integration?**
1113 | Includes secure key management and caching.
1114 | ```python
1115 | import os
1116 | os.environ["OPENAI_API_KEY"] = "your-api-key"
1117 | ```
1118 |
1119 | 110. **How do you ensure reproducibility in RAG pipelines?**
1120 | Sets random seeds and versions.
1121 | ```python
1122 | import random
1123 | random.seed(42)
1124 | ```
1125 |
1126 | 111. **What is caching in the context of RAG pipelines?**
1127 | Stores query results for reuse.
1128 | ```python
1129 | from langchain.cache import InMemoryCache
1130 | langchain.llm_cache = InMemoryCache()
1131 | ```
1132 |
1133 | 112. **How do you handle large-scale RAG pipelines?**
1134 | Uses efficient vector stores.
1135 | ```python
1136 | vector_store = FAISS.from_texts(["Doc"], embeddings, index_type="hnsw")
1137 | ```
1138 |
1139 | 113. **What is the role of environment configuration in RAG?**
1140 | Manages API keys and settings.
1141 | ```python
1142 | import os
1143 | os.environ["PINECONE_API_KEY"] = "your-pinecone-key"
1144 | ```
1145 |
1146 | 114. **How do you document RAG pipeline code?**
1147 | Uses docstrings for clarity.
1148 | ```python
1149 | def setup_rag_pipeline(documents):
1150 | """Sets up a RAG pipeline with LangChain."""
1151 | embeddings = OpenAIEmbeddings()
1152 | vector_store = FAISS.from_texts(documents, embeddings)
1153 | return RetrievalQA.from_chain_type(llm=OpenAI(), retriever=vector_store.as_retriever())
1154 | ```
1155 |
1156 | #### Intermediate
1157 | 115. **Write a function to optimize RAG memory usage.**
1158 | Clears unused objects.
1159 | ```python
1160 | import gc
1161 | def optimize_rag_memory(qa_chain, query):
1162 | response = qa_chain.run(query)
1163 | gc.collect()
1164 | return response
1165 | ```
1166 |
1167 | 116. **How do you implement unit tests for RAG pipelines?**
1168 | Validates components.
1169 | ```python
1170 | import unittest
1171 | class TestRAG(unittest.TestCase):
1172 | def test_retrieval(self):
1173 | vector_store = FAISS.from_texts(["Test"], embeddings)
1174 | results = vector_store.similarity_search("Test")
1175 | self.assertGreater(len(results), 0)
1176 | ```
1177 |
1178 | 117. **Write a function to create reusable RAG templates.**
1179 | Standardizes pipeline setup.
1180 | ```python
1181 | def rag_template(documents, llm_model="text-davinci-003"):
1182 | embeddings = OpenAIEmbeddings()
1183 | vector_store = FAISS.from_texts(documents, embeddings)
1184 | llm = OpenAI(model=llm_model)
1185 | return RetrievalQA.from_chain_type(llm=llm, retriever=vector_store.as_retriever())
1186 | ```
1187 |
1188 | 118. **How do you optimize RAG for batch processing?**
1189 | Processes queries in batches.
1190 | ```python
1191 | def batch_rag_process(qa_chain, queries, batch_size=10):
1192 | results = []
1193 | for i in range(0, len(queries), batch_size):
1194 | batch = queries[i:i+batch_size]
1195 | results.extend([qa_chain.run(q) for q in batch])
1196 | return results
1197 | ```
1198 |
1199 | 119. **Write a function to handle RAG configuration.**
1200 | Centralizes settings.
1201 | ```python
1202 | def configure_rag():
1203 | return {
1204 | "llm_model": "text-davinci-003",
1205 | "embedding_model": "text-embedding-ada-002",
1206 | "vector_store": "faiss"
1207 | }
1208 | ```
1209 |
1210 | 120. **How do you ensure RAG pipeline consistency?**
1211 | Standardizes versions and settings.
1212 | ```python
1213 | from langchain import __version__
1214 | def check_rag_env():
1215 | print(f"LangChain version: {__version__}")
1216 | ```
1217 |
1218 | #### Advanced
1219 | 121. **Write a function to implement RAG pipeline caching.**
1220 | Reuses processed data.
1221 | ```python
1222 | from langchain.cache import SQLiteCache
1223 | def enable_rag_pipeline_cache():
1224 | langchain.llm_cache = SQLiteCache(database_path="rag_cache.db")
1225 | ```
1226 |
1227 | 122. **How do you optimize RAG for high-throughput processing?**
1228 | Uses parallel execution.
1229 | ```python
1230 | from joblib import Parallel, delayed
1231 | def high_throughput_rag(qa_chain, queries):
1232 | return Parallel(n_jobs=-1)(delayed(qa_chain.run)(q) for q in queries)
1233 | ```
1234 |
1235 | 123. **Write a function to implement RAG pipeline versioning.**
1236 | Tracks changes in workflows.
1237 | ```python
1238 | import json
1239 | def version_rag_pipeline(config, version):
1240 | with open(f"rag_v{version}.json", "w") as f:
1241 | json.dump(config, f)
1242 | ```
1243 |
1244 | 124. **How do you implement RAG pipeline monitoring?**
1245 | Logs performance metrics.
1246 | ```python
1247 | import logging
1248 | def monitored_rag(qa_chain, query):
1249 | logging.basicConfig(filename="rag.log", level=logging.INFO)
1250 | start = time.time()
1251 | response = qa_chain.run(query)
1252 | logging.info(f"Query: {query}, Latency: {time.time() - start}s")
1253 | return response
1254 | ```
1255 |
1256 | 125. **Write a function to handle RAG scalability.**
1257 | Processes large datasets efficiently.
1258 | ```python
1259 | def scalable_rag(qa_chain, queries, chunk_size=100):
1260 | results = []
1261 | for i in range(0, len(queries), chunk_size):
1262 | results.extend(batch_rag_process(qa_chain, queries[i:i+chunk_size]))
1263 | return results
1264 | ```
1265 |
1266 | 126. **How do you implement RAG pipeline automation?**
1267 | Scripts end-to-end workflows.
1268 | ```python
1269 | def automate_rag_pipeline(documents, queries):
1270 | qa_chain = setup_rag_pipeline(documents)
1271 | responses = batch_rag_process(qa_chain, queries)
1272 | with open("rag_outputs.json", "w") as f:
1273 | json.dump(responses, f)
1274 | return responses
1275 | ```
1276 |
1277 | ## Ethical Considerations in LLM and RAG
1278 |
1279 | ### Basic
1280 | 127. **What are ethical concerns in LLM API usage?**
1281 | Includes bias and privacy risks.
1282 | ```python
1283 | def check_response_bias(responses, groups):
1284 | return {g: len([r for r, g_ in zip(responses, groups) if g_ == g]) / len(responses) for g in set(groups)}
1285 | ```
1286 |
1287 | 128. **How do you detect bias in RAG outputs?**
1288 | Analyzes group disparities.
1289 | ```python
1290 | def detect_rag_bias(qa_chain, queries, groups):
1291 | responses = [qa_chain.run(q) for q in queries]
1292 | return {g: len([r for r, g_ in zip(responses, groups) if g_ == g]) / len(responses) for g in set(groups)}
1293 | ```
1294 |
1295 | 129. **What is data privacy in RAG pipelines?**
1296 | Protects sensitive documents.
1297 | ```python
1298 | def anonymize_documents(documents):
1299 | return [doc.replace("sensitive", "[REDACTED]") for doc in documents]
1300 | ```
1301 |
1302 | 130. **How do you ensure fairness in RAG pipelines?**
1303 | Balances retrieval across groups.
1304 | ```python
1305 | def fair_retrieval(vector_store, query, weights):
1306 | results = vector_store.similarity_search_with_score(query)
1307 | return [(doc, score * weights[doc.metadata["group"]]) for doc, score in results]
1308 | ```
1309 |
1310 | 131. **What is explainability in LLM and RAG applications?**
1311 | Clarifies model decisions.
1312 | ```python
1313 | def explain_rag_response(qa_chain, query):
1314 | response = qa_chain.run(query)
1315 | docs = qa_chain.retriever.get_relevant_documents(query)
1316 | return {"response": response, "retrieved_docs": [doc.page_content for doc in docs]}
1317 | ```
1318 |
1319 | 132. **How do you visualize bias in RAG outputs?**
1320 | Plots group-wise response distribution.
1321 | ```python
1322 | import matplotlib.pyplot as plt
1323 | def plot_rag_bias(bias_metrics):
1324 | plt.bar(bias_metrics.keys(), bias_metrics.values())
1325 | plt.savefig("rag_bias.png")
1326 | ```
1327 |
1328 | #### Intermediate
1329 | 133. **Write a function to mitigate bias in RAG pipelines.**
1330 | Reweights retrieved documents.
1331 | ```python
1332 | def mitigate_rag_bias(vector_store, query, group_weights):
1333 | results = vector_store.similarity_search_with_score(query)
1334 | return sorted([(doc, score * group_weights[doc.metadata["group"]]) for doc, score in results], key=lambda x: x[1], reverse=True)
1335 | ```
1336 |
1337 | 134. **How do you implement differential privacy in RAG?**
1338 | Adds noise to embeddings.
1339 | ```python
1340 | import numpy as np
1341 | def private_embeddings(embeddings, texts, epsilon=0.1):
1342 | vectors = embeddings.embed_documents(texts)
1343 | return [v + np.random.normal(0, epsilon, len(v)) for v in vectors]
1344 | ```
1345 |
1346 | 135. **Write a function to assess fairness in RAG pipelines.**
1347 | Computes group-wise metrics.
1348 | ```python
1349 | def fairness_metrics_rag(qa_chain, queries, groups, references):
1350 | responses = [qa_chain.run(q) for q in queries]
1351 | return {g: sum(1 for r, ref, g_ in zip(responses, references, groups) if r == ref and g_ == g) / sum(1 for g_ in groups if g_ == g) for g in set(groups)}
1352 | ```
1353 |
1354 | 136. **How do you ensure energy-efficient RAG pipelines?**
1355 | Optimizes resource usage.
1356 | ```python
1357 | def efficient_rag(qa_chain, query, max_docs=3):
1358 | qa_chain.retriever.search_kwargs["k"] = max_docs
1359 | return qa_chain.run(query)
1360 | ```
1361 |
1362 | 137. **Write a function to audit RAG pipeline decisions.**
1363 | Logs queries and responses.
1364 | ```python
1365 | import logging
1366 | def audit_rag(qa_chain, query):
1367 | logging.basicConfig(filename="rag_audit.log", level=logging.INFO)
1368 | response = qa_chain.run(query)
1369 | logging.info(f"Query: {query}, Response: {response}")
1370 | return response
1371 | ```
1372 |
1373 | 138. **How do you visualize fairness metrics in RAG?**
1374 | Plots group-wise performance.
1375 | ```python
1376 | import matplotlib.pyplot as plt
1377 | def plot_fairness_metrics_rag(metrics):
1378 | plt.bar(metrics.keys(), metrics.values())
1379 | plt.savefig("rag_fairness.png")
1380 | ```
1381 |
1382 | #### Advanced
1383 | 139. **Write a function to implement fairness-aware RAG.**
1384 | Uses balanced retrieval.
1385 | ```python
1386 | def fairness_aware_rag(qa_chain, query, group_weights):
1387 | results = qa_chain.retriever.get_relevant_documents(query)
1388 | weighted_results = [(doc, group_weights[doc.metadata["group"]]) for doc in results]
1389 | return qa_chain.run(query, documents=sorted(weighted_results, key=lambda x: x[1], reverse=True))
1390 | ```
1391 |
1392 | 140. **How do you implement privacy-preserving RAG?**
1393 | Uses encrypted retrieval.
1394 | ```python
1395 | def private_rag(vector_store, query, epsilon=0.1):
1396 | query_vector = embeddings.embed_query(query)
1397 | noisy_vector = query_vector + np.random.normal(0, epsilon, len(query_vector))
1398 | return vector_store.similarity_search_by_vector(noisy_vector)
1399 | ```
1400 |
1401 | 141. **Write a function to monitor ethical risks in RAG.**
1402 | Tracks bias and fairness metrics.
1403 | ```python
1404 | import logging
1405 | def monitor_rag_ethics(qa_chain, queries, groups, references):
1406 | logging.basicConfig(filename="rag_ethics.log", level=logging.INFO)
1407 | metrics = fairness_metrics_rag(qa_chain, queries, groups, references)
1408 | logging.info(f"Fairness metrics: {metrics}")
1409 | return metrics
1410 | ```
1411 |
1412 | 142. **How do you implement explainable RAG?**
1413 | Provides retrieval context.
1414 | ```python
1415 | def explainable_rag(qa_chain, query):
1416 | docs = qa_chain.retriever.get_relevant_documents(query)
1417 | response = qa_chain.run(query)
1418 | return {"response": response, "context": [doc.page_content for doc in docs]}
1419 | ```
1420 |
1421 | 143. **Write a function to ensure regulatory compliance in RAG.**
1422 | Logs pipeline metadata.
1423 | ```python
1424 | import json
1425 | def log_rag_compliance(qa_chain, metadata):
1426 | with open("rag_compliance.json", "w") as f:
1427 | json.dump({"pipeline": str(qa_chain), "metadata": metadata}, f)
1428 | ```
1429 |
1430 | 144. **How do you implement ethical evaluation in RAG?**
1431 | Assesses fairness and robustness.
1432 | ```python
1433 | def ethical_rag_evaluation(qa_chain, queries, groups, references):
1434 | fairness = fairness_metrics_rag(qa_chain, queries, groups, references)
1435 | robustness = evaluate_robustness(qa_chain, queries)
1436 | return {"fairness": fairness, "robustness": robustness}
1437 | ```
1438 |
1439 | ## Integration with Other Libraries
1440 |
1441 | ### Basic
1442 | 145. **How do you integrate LLM APIs with LangChain?**
1443 | Uses LangChain for orchestration.
1444 | ```python
1445 | from langchain.llms import OpenAI
1446 | llm = OpenAI(model="text-davinci-003")
1447 | ```
1448 |
1449 | 146. **How do you integrate RAG with Hugging Face?**
1450 | Uses Hugging Face embeddings.
1451 | ```python
1452 | from langchain.embeddings import HuggingFaceEmbeddings
1453 | embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
1454 | ```
1455 |
1456 | 147. **How do you use RAG with Matplotlib?**
1457 | Visualizes pipeline metrics.
1458 | ```python
1459 | import matplotlib.pyplot as plt
1460 | def plot_rag_data(data):
1461 | plt.plot(data)
1462 | plt.savefig("rag_data.png")
1463 | ```
1464 |
1465 | 148. **How do you integrate LLM APIs with FastAPI?**
1466 | Serves responses via API.
1467 | ```python
1468 | from fastapi import FastAPI
1469 | app = FastAPI()
1470 | client = OpenAI()
1471 | @app.post("/llm")
1472 | async def llm_call(prompt: str):
1473 | return {"response": call_llm(client, prompt)}
1474 | ```
1475 |
1476 | 149. **How do you use RAG with Pandas?**
1477 | Preprocesses document data.
1478 | ```python
1479 | import pandas as pd
1480 | def preprocess_with_pandas(df, column="text"):
1481 | return FAISS.from_texts(df[column].tolist(), embeddings)
1482 | ```
1483 |
1484 | 150. **How do you integrate RAG with SQLite?**
1485 | Stores document metadata.
1486 | ```python
1487 | import sqlite3
1488 | def store_metadata(documents, db_path="metadata.db"):
1489 | conn = sqlite3.connect(db_path)
1490 | c = conn.cursor()
1491 | c.execute("CREATE TABLE IF NOT EXISTS docs (id INTEGER PRIMARY KEY, text TEXT)")
1492 | c.executemany("INSERT INTO docs (text) VALUES (?)", [(doc,) for doc in documents])
1493 | conn.commit()
1494 | conn.close()
1495 | ```
1496 |
1497 | #### Intermediate
1498 | 151. **Write a function to integrate RAG with LlamaIndex.**
1499 | Uses LlamaIndex for indexing.
1500 | ```python
1501 | from llama_index import VectorStoreIndex, SimpleDirectoryReader
1502 | def setup_llama_rag(directory):
1503 | documents = SimpleDirectoryReader(directory).load_data()
1504 | index = VectorStoreIndex.from_documents(documents)
1505 | return index.as_query_engine()
1506 | ```
1507 |
1508 | 152. **How do you integrate LLM APIs with Streamlit?**
1509 | Builds interactive apps.
1510 | ```python
1511 | import streamlit as st
1512 | def llm_streamlit_app(client):
1513 | st.title("LLM Chat")
1514 | prompt = st.text_input("Enter prompt")
1515 | if prompt:
1516 | response = call_llm(client, prompt)
1517 | st.write(response)
1518 | ```
1519 |
1520 | 153. **Write a function to integrate RAG with Weaviate.**
1521 | Uses Weaviate for vector storage.
1522 | ```python
1523 | from langchain.vectorstores import Weaviate
1524 | def setup_weaviate_rag(documents, embeddings):
1525 | client = weaviate.Client("http://localhost:8080")
1526 | return Weaviate.from_documents(documents, embeddings, client=client)
1527 | ```
1528 |
1529 | 154. **How do you integrate RAG with SQL databases?**
1530 | Stores and queries metadata.
1531 | ```python
1532 | import sqlite3
1533 | def query_metadata(query, db_path="metadata.db"):
1534 | conn = sqlite3.connect(db_path)
1535 | c = conn.cursor()
1536 | c.execute("SELECT text FROM docs WHERE text LIKE ?", (f"%{query}%",))
1537 | results = c.fetchall()
1538 | conn.close()
1539 | return [r[0] for r in results]
1540 | ```
1541 |
1542 | 155. **Write a function to integrate LLM APIs with Celery.**
1543 | Runs asynchronous tasks.
1544 | ```python
1545 | from celery import Celery
1546 | app = Celery("llm_tasks", broker="redis://localhost:6379")
1547 | @app.task
1548 | def async_llm_task(prompt):
1549 | client = OpenAI()
1550 | return call_llm(client, prompt)
1551 | ```
1552 |
1553 | 156. **How do you integrate RAG with Elasticsearch?**
1554 | Uses Elasticsearch for retrieval.
1555 | ```python
1556 | from langchain.vectorstores import ElasticsearchStore
1557 | def setup_elasticsearch_rag(documents, embeddings):
1558 | return ElasticsearchStore.from_documents(documents, embeddings, es_url="http://localhost:9200")
1559 | ```
1560 |
1561 | #### Advanced
1562 | 157. **Write a function to integrate RAG with GraphQL.**
1563 | Exposes RAG via GraphQL API.
1564 | ```python
1565 | from ariadne import QueryType, gql, make_executable_schema
1566 | from ariadne.asgi import GraphQL
1567 | type_defs = gql("""
1568 | type Query {
1569 | rag(query: String!): String
1570 | }
1571 | """)
1572 | query = QueryType()
1573 | @query.field("rag")
1574 | def resolve_rag(_, info, query):
1575 | qa_chain = info.context["qa_chain"]
1576 | return qa_chain.run(query)
1577 | schema = make_executable_schema(type_defs, query)
1578 | app = GraphQL(schema, context_value={"qa_chain": qa_chain})
1579 | ```
1580 |
1581 | 158. **How do you integrate RAG with Kubernetes?**
1582 | Deploys scalable RAG services.
1583 | ```python
1584 | from kubernetes import client, config
1585 | def deploy_rag_service():
1586 | config.load_kube_config()
1587 | v1 = client.CoreV1Api()
1588 | service = client.V1Service(
1589 | metadata=client.V1ObjectMeta(name="rag-service"),
1590 | spec=client.V1ServiceSpec(
1591 | selector={"app": "rag"},
1592 | ports=[client.V1ServicePort(port=80)]
1593 | )
1594 | )
1595 | v1.create_namespaced_service(namespace="default", body=service)
1596 | ```
1597 |
1598 | 159. **Write a function to integrate RAG with Apache Kafka.**
1599 | Processes streaming data.
1600 | ```python
1601 | from kafka import KafkaConsumer
1602 | def stream_rag_data(qa_chain, topic="rag_queries"):
1603 | consumer = KafkaConsumer(topic, bootstrap_servers="localhost:9092")
1604 | for message in consumer:
1605 | query = message.value.decode("utf-8")
1606 | yield qa_chain.run(query)
1607 | ```
1608 |
1609 | 160. **How do you integrate LLM APIs with Airflow?**
1610 | Orchestrates LLM workflows.
1611 | ```python
1612 | from airflow import DAG
1613 | from airflow.operators.python import PythonOperator
1614 | from datetime import datetime
1615 | def llm_task():
1616 | client = OpenAI()
1617 | return call_llm(client, "Test prompt")
1618 | with DAG("llm_dag", start_date=datetime(2025, 1, 1)) as dag:
1619 | task = PythonOperator(task_id="llm_task", python_callable=llm_task)
1620 | ```
1621 |
1622 | 161. **Write a function to integrate RAG with Redis.**
1623 | Caches query results.
1624 | ```python
1625 | import redis
1626 | def cache_rag_results(qa_chain, query):
1627 | r = redis.Redis(host="localhost", port=6379)
1628 | cached = r.get(query)
1629 | if cached:
1630 | return cached.decode("utf-8")
1631 | response = qa_chain.run(query)
1632 | r.set(query, response)
1633 | return response
1634 | ```
1635 |
1636 | 162. **How do you integrate RAG with MLflow?**
1637 | Tracks pipeline experiments.
1638 | ```python
1639 | import mlflow
1640 | def log_rag_experiment(qa_chain, query, metrics):
1641 | with mlflow.start_run():
1642 | mlflow.log_param("query", query)
1643 | for metric, value in metrics.items():
1644 | mlflow.log_metric(metric, value)
1645 | ```
1646 |
1647 | ## Deployment and Scalability
1648 |
1649 | ### Basic
1650 | 163. **How do you deploy an LLM API service?**
1651 | Uses FastAPI for serving.
1652 | ```python
1653 | from fastapi import FastAPI
1654 | app = FastAPI()
1655 | client = OpenAI()
1656 | @app.post("/llm")
1657 | async def llm_endpoint(prompt: str):
1658 | return {"response": call_llm(client, prompt)}
1659 | ```
1660 |
1661 | 164. **How do you deploy a RAG pipeline?**
1662 | Serves RAG via API.
1663 | ```python
1664 | from fastapi import FastAPI
1665 | app = FastAPI()
1666 | qa_chain = setup_rag_pipeline(["Doc"])
1667 | @app.post("/rag")
1668 | async def rag_endpoint(query: str):
1669 | return {"response": qa_chain.run(query)}
1670 | ```
1671 |
1672 | 165. **What is model quantization in the context of LLM deployment?**
1673 | Reduces model size for efficiency.
1674 | ```python
1675 | from transformers import AutoModelForCausalLM
1676 | model = AutoModelForCausalLM.from_pretrained("distilgpt2", torch_dtype="int8")
1677 | ```
1678 |
1679 | 166. **How do you save a RAG pipeline for deployment?**
1680 | Persists vector store and model.
1681 | ```python
1682 | def save_rag_pipeline(qa_chain, path="rag_pipeline"):
1683 | qa_chain.retriever.vectorstore.save_local(path)
1684 | ```
1685 |
1686 | 167. **How do you load a deployed RAG pipeline?**
1687 | Restores pipeline state.
1688 | ```python
1689 | from langchain.vectorstores import FAISS
1690 | def load_rag_pipeline(path="rag_pipeline"):
1691 | vector_store = FAISS.load_local(path, embeddings)
1692 | return RetrievalQA.from_chain_type(llm=OpenAI(), retriever=vector_store.as_retriever())
1693 | ```
1694 |
1695 | 168. **How do you visualize deployment metrics?**
1696 | Plots latency and throughput.
1697 | ```python
1698 | import matplotlib.pyplot as plt
1699 | def plot_deployment_metrics(latencies, throughputs):
1700 | plt.plot(latencies, label="Latency")
1701 | plt.plot(throughputs, label="Throughput")
1702 | plt.legend()
1703 | plt.savefig("deployment_metrics.png")
1704 | ```
1705 |
1706 | #### Intermediate
1707 | 169. **Write a function to deploy a RAG pipeline with Docker.**
1708 | Containerizes the service.
1709 | ```python
1710 | def create_dockerfile():
1711 | with open("Dockerfile", "w") as f:
1712 | f.write("""
1713 | FROM python:3.9
1714 | COPY . /app
1715 | WORKDIR /app
1716 | RUN pip install langchain openai faiss-cpu
1717 | CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "80"]
1718 | """)
1719 | ```
1720 |
1721 | 170. **How do you scale a RAG pipeline for production?**
1722 | Uses distributed vector stores and load balancing.
1723 | ```python
1724 | from langchain.vectorstores import Weaviate
1725 | def scale_rag_pipeline(documents, embeddings):
1726 | client = weaviate.Client("http://weaviate-cluster:8080")
1727 | vector_store = Weaviate.from_documents(documents, embeddings, client=client)
1728 | return RetrievalQA.from_chain_type(llm=OpenAI(), retriever=vector_store.as_retriever())
1729 | ```
--------------------------------------------------------------------------------