├── LICENSE
├── LLM with RAG Fundamentals
    ├── 02 Text Generation
    │   ├── text_generation.py
    │   └── README.md
    ├── 01 API Foundations
    │   ├── api_foundations.py
    │   └── README.md
    ├── 03 Embeddings and Vector Stores
    │   ├── embeddings_vector_stores.py
    │   └── README.md
    ├── 04 RAG Fundamentals
    │   ├── rag_fundamentals.py
    │   └── README.md
    └── 05 Capstone RAG Application
    │   ├── capstone_rag_application.py
    │   └── README.md
├── README.md
└── LLM with RAG Interview Questions
    └── README.md


/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License  
 2 | 
 3 | Copyright (c) 2025 rohanmistry231
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy  
 6 | of this software and associated documentation files (the "Software"), to deal  
 7 | in the Software without restriction, including without limitation the rights  
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell  
 9 | copies of the Software, and to permit persons to whom the Software is  
10 | furnished to do so, subject to the following conditions:  
11 | 
12 | The above copyright notice and this permission notice shall be included in all  
13 | copies or substantial portions of the Software.  
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR  
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,  
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE  
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER  
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,  
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE  
21 | SOFTWARE.


--------------------------------------------------------------------------------
/LLM with RAG Fundamentals/02 Text Generation/text_generation.py:
--------------------------------------------------------------------------------
 1 | # Setup: pip install openai requests matplotlib pandas nltk
 2 | import os
 3 | import openai
 4 | import matplotlib.pyplot as plt
 5 | import nltk
 6 | 
 7 | def run_text_generation_demo():
 8 |     # Synthetic Query Data
 9 |     queries = [
10 |         "Create a chatbot response for 'What is machine learning?'",
11 |         "Write a 50-word sci-fi story about AI.",
12 |         "Summarize blockchain technology in 3 sentences."
13 |     ]
14 |     print("Synthetic Data: Queries created")
15 |     print(f"Queries: {queries}")
16 | 
17 |     # OpenAI API Configuration
18 |     openai.api_key = os.getenv("OPENAI_API_KEY", "your-openai-api-key")
19 | 
20 |     responses = []
21 |     for query in queries:
22 |         try:
23 |             response = openai.ChatCompletion.create(
24 |                 model="gpt-3.5-turbo",
25 |                 messages=[{"role": "user", "content": query}],
26 |                 max_tokens=150
27 |             )
28 |             text = response.choices[0].message.content.strip()
29 |             responses.append(text)
30 |             print(f"Query: {query}")
31 |             print(f"Response: {text}")
32 |         except openai.error.OpenAIError as e:
33 |             print(f"Error for {query}: {e}")
34 | 
35 |     # Visualization
36 |     response_lengths = [len(nltk.word_tokenize(resp)) for resp in responses]
37 |     plt.figure(figsize=(8, 4))
38 |     plt.bar(range(1, len(queries) + 1), response_lengths, color='blue')
39 |     plt.title("Text Generation Response Lengths")
40 |     plt.xlabel("Query")
41 |     plt.ylabel("Word Count")
42 |     plt.savefig("text_generation_output.png")
43 |     print("Visualization: Response lengths saved as text_generation_output.png")
44 | 
45 | # Execute the demo
46 | if __name__ == "__main__":
47 |     nltk.download('punkt', quiet=True)
48 |     run_text_generation_demo()


--------------------------------------------------------------------------------
/LLM with RAG Fundamentals/01 API Foundations/api_foundations.py:
--------------------------------------------------------------------------------
 1 | # Setup: pip install openai requests matplotlib pandas nltk
 2 | import os
 3 | import requests
 4 | import matplotlib.pyplot as plt
 5 | from collections import Counter
 6 | import nltk
 7 | 
 8 | def run_api_foundations_demo():
 9 |     # Synthetic Query Data
10 |     queries = [
11 |         "Explain neural networks in simple terms.",
12 |         "Write a short story about AI.",
13 |         "Summarize the benefits of cloud computing."
14 |     ]
15 |     print("Synthetic Data: Queries created")
16 |     print(f"Queries: {queries}")
17 | 
18 |     # OpenAI API Configuration
19 |     api_key = os.getenv("OPENAI_API_KEY", "your-openai-api-key")
20 |     url = "https://api.openai.com/v1/chat/completions"
21 |     headers = {
22 |         "Authorization": f"Bearer {api_key}",
23 |         "Content-Type": "application/json"
24 |     }
25 | 
26 |     # Track request success
27 |     success_counts = {"Successful": 0, "Failed": 0}
28 | 
29 |     for query in queries:
30 |         payload = {
31 |             "model": "gpt-3.5-turbo",
32 |             "messages": [{"role": "user", "content": query}],
33 |             "max_tokens": 100
34 |         }
35 |         try:
36 |             response = requests.post(url, headers=headers, json=payload)
37 |             response.raise_for_status()
38 |             success_counts["Successful"] += 1
39 |             print(f"Query: {query}")
40 |             print(f"Response: {response.json()['choices'][0]['message']['content'].strip()}")
41 |         except requests.RequestException as e:
42 |             success_counts["Failed"] += 1
43 |             print(f"Error for {query}: {e}")
44 | 
45 |     # Visualization
46 |     plt.figure(figsize=(8, 4))
47 |     plt.bar(success_counts.keys(), success_counts.values(), color=['green', 'red'])
48 |     plt.title("API Request Success Rates")
49 |     plt.xlabel("Status")
50 |     plt.ylabel("Count")
51 |     plt.savefig("api_foundations_output.png")
52 |     print("Visualization: Success rates saved as api_foundations_output.png")
53 | 
54 | # Execute the demo
55 | if __name__ == "__main__":
56 |     nltk.download('punkt', quiet=True)
57 |     run_api_foundations_demo()


--------------------------------------------------------------------------------
/LLM with RAG Fundamentals/03 Embeddings and Vector Stores/embeddings_vector_stores.py:
--------------------------------------------------------------------------------
 1 | # Setup: pip install openai faiss-cpu requests matplotlib pandas nltk numpy
 2 | import os
 3 | import openai
 4 | import faiss
 5 | import numpy as np
 6 | import matplotlib.pyplot as plt
 7 | from sklearn.metrics.pairwise import cosine_similarity
 8 | 
 9 | def run_embeddings_vector_stores_demo():
10 |     # Synthetic Document Data
11 |     documents = [
12 |         "Machine learning is a subset of AI focusing on data-driven models.",
13 |         "Deep learning uses neural networks for complex pattern recognition.",
14 |         "Natural language processing enables computers to understand text.",
15 |         "AI is transforming industries with automation and insights."
16 |     ]
17 |     query = "What is machine learning?"
18 |     print("Synthetic Data: Documents and query created")
19 |     print(f"Documents: {documents}")
20 |     print(f"Query: {query}")
21 | 
22 |     # OpenAI API Configuration
23 |     openai.api_key = os.getenv("OPENAI_API_KEY", "your-openai-api-key")
24 | 
25 |     # Generate Embeddings
26 |     embeddings = []
27 |     for text in documents + [query]:
28 |         try:
29 |             response = openai.Embedding.create(
30 |                 model="text-embedding-ada-002",
31 |                 input=text
32 |             )
33 |             embeddings.append(response['data'][0]['embedding'])
34 |         except openai.error.OpenAIError as e:
35 |             print(f"Error for {text}: {e}")
36 |             return
37 | 
38 |     # Store in FAISS
39 |     dimension = len(embeddings[0])
40 |     index = faiss.IndexFlatL2(dimension)
41 |     index.add(np.array(embeddings[:-1]).astype('float32'))
42 | 
43 |     # Search with Query
44 |     query_embedding = np.array([embeddings[-1]]).astype('float32')
45 |     distances, indices = index.search(query_embedding, len(documents))
46 |     similarities = 1 - distances[0] / 2  # Approximate cosine similarity
47 |     print("Similarities:", similarities)
48 | 
49 |     # Visualization
50 |     plt.figure(figsize=(8, 4))
51 |     plt.bar(range(1, len(documents) + 1), similarities, color='purple')
52 |     plt.title("Semantic Search Similarity Scores")
53 |     plt.xlabel("Document")
54 |     plt.ylabel("Similarity")
55 |     plt.savefig("embeddings_vector_stores_output.png")
56 |     print("Visualization: Similarity scores saved as embeddings_vector_stores_output.png")
57 | 
58 | # Execute the demo
59 | if __name__ == "__main__":
60 |     run_embeddings_vector_stores_demo()


--------------------------------------------------------------------------------
/LLM with RAG Fundamentals/04 RAG Fundamentals/rag_fundamentals.py:
--------------------------------------------------------------------------------
 1 | # Setup: pip install openai langchain faiss-cpu requests matplotlib pandas nltk
 2 | import os
 3 | import openai
 4 | from langchain.vectorstores import FAISS
 5 | from langchain.embeddings import OpenAIEmbeddings
 6 | from langchain.chat_models import ChatOpenAI
 7 | from langchain.chains import RetrievalQA
 8 | import matplotlib.pyplot as plt
 9 | import numpy as np
10 | 
11 | def run_rag_fundamentals_demo():
12 |     # Synthetic Document Data
13 |     documents = [
14 |         "AI is transforming healthcare with predictive diagnostics.",
15 |         "Machine learning models require large datasets for training.",
16 |         "Deep learning excels in image and speech recognition.",
17 |         "Natural language processing powers chatbots and translation."
18 |     ]
19 |     queries = [
20 |         "How is AI used in healthcare?",
21 |         "What is needed for machine learning?",
22 |         "What does deep learning do?"
23 |     ]
24 |     print("Synthetic Data: Documents and queries created")
25 |     print(f"Documents: {documents}")
26 |     print(f"Queries: {queries}")
27 | 
28 |     # OpenAI API Configuration
29 |     openai.api_key = os.getenv("OPENAI_API_KEY", "your-openai-api-key")
30 | 
31 |     # Create Vector Store
32 |     embeddings = OpenAIEmbeddings()
33 |     vector_store = FAISS.from_texts(documents, embeddings)
34 | 
35 |     # RAG Pipeline
36 |     llm = ChatOpenAI(model="gpt-3.5-turbo", max_tokens=150)
37 |     qa_chain = RetrievalQA.from_chain_type(
38 |         llm=llm,
39 |         chain_type="stuff",
40 |         retriever=vector_store.as_retriever()
41 |     )
42 | 
43 |     # Run Queries
44 |     responses = []
45 |     for query in queries:
46 |         try:
47 |             response = qa_chain.run(query)
48 |             responses.append(response)
49 |             print(f"Query: {query}")
50 |             print(f"Response: {response.strip()}")
51 |         except Exception as e:
52 |             print(f"Error for {query}: {e}")
53 | 
54 |     # Visualization (Simulated Retrieval Accuracy)
55 |     accuracy_scores = [0.9, 0.85, 0.95]  # Simulated for demo
56 |     plt.figure(figsize=(8, 4))
57 |     plt.bar(range(1, len(queries) + 1), accuracy_scores, color='green')
58 |     plt.title("Simulated Retrieval Accuracy")
59 |     plt.xlabel("Query")
60 |     plt.ylabel("Accuracy")
61 |     plt.savefig("rag_fundamentals_output.png")
62 |     print("Visualization: Retrieval accuracy saved as rag_fundamentals_output.png")
63 | 
64 | # Execute the demo
65 | if __name__ == "__main__":
66 |     nltk.download('punkt', quiet=True)
67 |     run_rag_fundamentals_demo()


--------------------------------------------------------------------------------
/LLM with RAG Fundamentals/05 Capstone RAG Application/capstone_rag_application.py:
--------------------------------------------------------------------------------
 1 | # Setup: pip install openai langchain faiss-cpu requests matplotlib pandas nltk
 2 | import os
 3 | import openai
 4 | from langchain.vectorstores import FAISS
 5 | from langchain.embeddings import OpenAIEmbeddings
 6 | from langchain.chat_models import ChatOpenAI
 7 | from langchain.chains import RetrievalQA
 8 | import matplotlib.pyplot as plt
 9 | import numpy as np
10 | import time
11 | 
12 | def run_capstone_rag_application_demo():
13 |     # Synthetic Knowledge Base
14 |     knowledge_base = [
15 |         "AI in healthcare improves diagnostics with predictive models.",
16 |         "Machine learning requires large, clean datasets for effective training.",
17 |         "Deep learning uses neural networks for tasks like image recognition.",
18 |         "NLP enables chatbots to understand and respond to human language.",
19 |         "Cloud computing provides scalable infrastructure for AI applications."
20 |     ]
21 |     queries = [
22 |         "How does AI improve healthcare?",
23 |         "What are the requirements for machine learning?",
24 |         "What is deep learning used for?",
25 |         "How does NLP work in chatbots?"
26 |     ]
27 |     print("Synthetic Data: Knowledge base and queries created")
28 |     print(f"Knowledge Base: {knowledge_base}")
29 |     print(f"Queries: {queries}")
30 | 
31 |     # OpenAI API Configuration
32 |     openai.api_key = os.getenv("OPENAI_API_KEY", "your-openai-api-key")
33 | 
34 |     # Create Vector Store
35 |     embeddings = OpenAIEmbeddings()
36 |     vector_store = FAISS.from_texts(knowledge_base, embeddings)
37 | 
38 |     # RAG Pipeline
39 |     llm = ChatOpenAI(model="gpt-3.5-turbo", max_tokens=200)
40 |     qa_chain = RetrievalQA.from_chain_type(
41 |         llm=llm,
42 |         chain_type="stuff",
43 |         retriever=vector_store.as_retriever(search_kwargs={"k": 2})
44 |     )
45 | 
46 |     # Run Queries and Measure Performance
47 |     responses = []
48 |     latencies = []
49 |     for query in queries:
50 |         start_time = time.time()
51 |         try:
52 |             response = qa_chain.run(query)
53 |             responses.append(response)
54 |             latencies.append(time.time() - start_time)
55 |             print(f"Query: {query}")
56 |             print(f"Response: {response.strip()}")
57 |         except Exception as e:
58 |             print(f"Error for {query}: {e}")
59 | 
60 |     # Visualization
61 |     plt.figure(figsize=(8, 4))
62 |     plt.plot(range(1, len(queries) + 1), latencies, marker='o', color='blue')
63 |     plt.title("RAG Application Query Latencies")
64 |     plt.xlabel("Query")
65 |     plt.ylabel("Latency (seconds)")
66 |     plt.savefig("capstone_rag_application_output.png")
67 |     print("Visualization: Latencies saved as capstone_rag_application_output.png")
68 | 
69 | # Execute the demo
70 | if __name__ == "__main__":
71 |     nltk.download('punkt', quiet=True)
72 |     run_capstone_rag_application_demo()


--------------------------------------------------------------------------------
/LLM with RAG Fundamentals/02 Text Generation/README.md:
--------------------------------------------------------------------------------
  1 | # 📝 Text Generation
  2 | 
  3 | ## 📖 Introduction
  4 | 
  5 | **Text Generation** uses OpenAI’s Chat and Completion APIs to create conversational and creative text, a core component for RAG applications. This guide provides Python examples, focusing on the AI-driven era (May 3, 2025).
  6 | 
  7 | ## 🌟 Key Concepts
  8 | 
  9 | - **Chat API**: Generating conversational responses.
 10 | - **Completion API**: Creating structured or creative text.
 11 | - **Prompt Design**: Crafting prompts for quality outputs.
 12 | - **Response Metrics**: Evaluating length and relevance.
 13 | 
 14 | ## 🛠️ Practical Example
 15 | 
 16 | ```python
 17 | %% text_generation.py
 18 | # Setup: pip install openai requests matplotlib pandas nltk
 19 | import os
 20 | import openai
 21 | import matplotlib.pyplot as plt
 22 | import nltk
 23 | 
 24 | def run_text_generation_demo():
 25 |     # Synthetic Query Data
 26 |     queries = [
 27 |         "Create a chatbot response for 'What is machine learning?'",
 28 |         "Write a 50-word sci-fi story about AI.",
 29 |         "Summarize blockchain technology in 3 sentences."
 30 |     ]
 31 |     print("Synthetic Data: Queries created")
 32 |     print(f"Queries: {queries}")
 33 | 
 34 |     # OpenAI API Configuration
 35 |     openai.api_key = os.getenv("OPENAI_API_KEY", "your-openai-api-key")
 36 | 
 37 |     responses = []
 38 |     for query in queries:
 39 |         try:
 40 |             response = openai.ChatCompletion.create(
 41 |                 model="gpt-3.5-turbo",
 42 |                 messages=[{"role": "user", "content": query}],
 43 |                 max_tokens=150
 44 |             )
 45 |             text = response.choices[0].message.content.strip()
 46 |             responses.append(text)
 47 |             print(f"Query: {query}")
 48 |             print(f"Response: {text}")
 49 |         except openai.error.OpenAIError as e:
 50 |             print(f"Error for {query}: {e}")
 51 | 
 52 |     # Visualization
 53 |     response_lengths = [len(nltk.word_tokenize(resp)) for resp in responses]
 54 |     plt.figure(figsize=(8, 4))
 55 |     plt.bar(range(1, len(queries) + 1), response_lengths, color='blue')
 56 |     plt.title("Text Generation Response Lengths")
 57 |     plt.xlabel("Query")
 58 |     plt.ylabel("Word Count")
 59 |     plt.savefig("text_generation_output.png")
 60 |     print("Visualization: Response lengths saved as text_generation_output.png")
 61 | 
 62 | # Execute the demo
 63 | if __name__ == "__main__":
 64 |     nltk.download('punkt', quiet=True)
 65 |     run_text_generation_demo()
 66 | ```
 67 | 
 68 | ## 📊 Visualization Output
 69 | 
 70 | The code generates a bar chart (`text_generation_output.png`) showing response word counts, illustrating output consistency.
 71 | 
 72 | ## 💡 Applications
 73 | 
 74 | - **Chatbots**: Build conversational interfaces for RAG systems.
 75 | - **Content Creation**: Generate summaries or stories.
 76 | - **RAG Integration**: Provide LLM responses for retrieved data.
 77 | 
 78 | ## 🏆 Practical Tasks
 79 | 
 80 | 1. Build a chatbot prompt for user queries.
 81 | 2. Generate creative text (e.g., story, summary).
 82 | 3. Visualize response lengths for different queries.
 83 | 
 84 | ## 💡 Interview Scenarios
 85 | 
 86 | **Question**: How do you use OpenAI’s Chat API for text generation?  
 87 | **Answer**: The Chat API generates text using a model and messages array, guided by prompts.  
 88 | **Key**: Prompt design ensures relevant outputs.  
 89 | **Example**: `openai.ChatCompletion.create(model="gpt-3.5-turbo", messages=[{"role": "user", "content": query}])`
 90 | 
 91 | **Coding Task**: Create a chatbot response using OpenAI’s Chat API.  
 92 | **Tip**: Use `ChatCompletion.create` with a user prompt.
 93 | 
 94 | ## 📚 Resources
 95 | 
 96 | - [OpenAI API Documentation](https://platform.openai.com/docs/)
 97 | - [NLTK Documentation](https://www.nltk.org/)
 98 | 
 99 | ## 🤝 Contributions
100 | 
101 | 1. Fork the repository.
102 | 2. Create a feature branch (`git checkout -b feature/text-generation`).
103 | 3. Commit changes (`git commit -m 'Add text generation content'`).
104 | 4. Push to the branch (`git push origin feature/text-generation`).
105 | 5. Open a Pull Request.


--------------------------------------------------------------------------------
/LLM with RAG Fundamentals/01 API Foundations/README.md:
--------------------------------------------------------------------------------
  1 | # 🛠️ API Foundations
  2 | 
  3 | ## 📖 Introduction
  4 | 
  5 | **API Foundations** introduces integrating OpenAI’s API, covering authentication, request structure, and error handling. This guide provides hands-on Python examples, setting the stage for RAG applications, aligned with the AI-driven era (May 3, 2025).
  6 | 
  7 | ## 🌟 Key Concepts
  8 | 
  9 | - **API Concepts**: REST APIs, endpoints, JSON payloads.
 10 | - **Authentication**: Securing OpenAI API with keys.
 11 | - **Environment Setup**: Configuring Python for API use.
 12 | - **Request Handling**: Sending and parsing responses.
 13 | 
 14 | ## 🛠️ Practical Example
 15 | 
 16 | ```python
 17 | %% api_foundations.py
 18 | # Setup: pip install openai requests matplotlib pandas nltk
 19 | import os
 20 | import requests
 21 | import matplotlib.pyplot as plt
 22 | from collections import Counter
 23 | import nltk
 24 | 
 25 | def run_api_foundations_demo():
 26 |     # Synthetic Query Data
 27 |     queries = [
 28 |         "Explain neural networks in simple terms.",
 29 |         "Write a short story about AI.",
 30 |         "Summarize the benefits of cloud computing."
 31 |     ]
 32 |     print("Synthetic Data: Queries created")
 33 |     print(f"Queries: {queries}")
 34 | 
 35 |     # OpenAI API Configuration
 36 |     api_key = os.getenv("OPENAI_API_KEY", "your-openai-api-key")
 37 |     url = "https://api.openai.com/v1/chat/completions"
 38 |     headers = {
 39 |         "Authorization": f"Bearer {api_key}",
 40 |         "Content-Type": "application/json"
 41 |     }
 42 | 
 43 |     # Track request success
 44 |     success_counts = {"Successful": 0, "Failed": 0}
 45 | 
 46 |     for query in queries:
 47 |         payload = {
 48 |             "model": "gpt-3.5-turbo",
 49 |             "messages": [{"role": "user", "content": query}],
 50 |             "max_tokens": 100
 51 |         }
 52 |         try:
 53 |             response = requests.post(url, headers=headers, json=payload)
 54 |             response.raise_for_status()
 55 |             success_counts["Successful"] += 1
 56 |             print(f"Query: {query}")
 57 |             print(f"Response: {response.json()['choices'][0]['message']['content'].strip()}")
 58 |         except requests.RequestException as e:
 59 |             success_counts["Failed"] += 1
 60 |             print(f"Error for {query}: {e}")
 61 | 
 62 |     # Visualization
 63 |     plt.figure(figsize=(8, 4))
 64 |     plt.bar(success_counts.keys(), success_counts.values(), color=['green', 'red'])
 65 |     plt.title("API Request Success Rates")
 66 |     plt.xlabel("Status")
 67 |     plt.ylabel("Count")
 68 |     plt.savefig("api_foundations_output.png")
 69 |     print("Visualization: Success rates saved as api_foundations_output.png")
 70 | 
 71 | # Execute the demo
 72 | if __name__ == "__main__":
 73 |     nltk.download('punkt', quiet=True)
 74 |     run_api_foundations_demo()
 75 | ```
 76 | 
 77 | ## 📊 Visualization Output
 78 | 
 79 | The code generates a bar chart (`api_foundations_output.png`) showing successful and failed API requests, illustrating reliability.
 80 | 
 81 | ## 💡 Applications
 82 | 
 83 | - **Chatbots**: Authenticate APIs for conversational systems.
 84 | - **Content Generation**: Set up APIs for text creation.
 85 | - **RAG Preparation**: Establish API connectivity for retrieval systems.
 86 | 
 87 | ## 🏆 Practical Tasks
 88 | 
 89 | 1. Authenticate and test OpenAI API connectivity.
 90 | 2. Handle basic API errors (e.g., invalid key, rate limits).
 91 | 3. Visualize API request success and failure rates.
 92 | 
 93 | ## 💡 Interview Scenarios
 94 | 
 95 | **Question**: What are the key components of an OpenAI API request?  
 96 | **Answer**: Endpoint, headers (with API key), and JSON payload (model, messages).  
 97 | **Key**: Authentication ensures secure access.  
 98 | **Example**: `requests.post(url, headers={"Authorization": f"Bearer {api_key}"}, json=payload)`
 99 | 
100 | **Coding Task**: Authenticate and send a query to OpenAI API.  
101 | **Tip**: Use `requests.post` with headers and payload.
102 | 
103 | ## 📚 Resources
104 | 
105 | - [OpenAI API Documentation](https://platform.openai.com/docs/)
106 | - [Python Requests Documentation](https://requests.readthedocs.io/)
107 | 
108 | ## 🤝 Contributions
109 | 
110 | 1. Fork the repository.
111 | 2. Create a feature branch (`git checkout -b feature/api-foundations`).
112 | 3. Commit changes (`git commit -m 'Add API foundations content'`).
113 | 4. Push to the branch (`git push origin feature/api-foundations`).
114 | 5. Open a Pull Request.


--------------------------------------------------------------------------------
/LLM with RAG Fundamentals/03 Embeddings and Vector Stores/README.md:
--------------------------------------------------------------------------------
  1 | # 🔍 Embeddings and Vector Stores
  2 | 
  3 | ## 📖 Introduction
  4 | 
  5 | **Embeddings and Vector Stores** leverages OpenAI’s Embeddings API and vector databases (e.g., FAISS) to enable semantic search and data storage, critical for RAG systems. This guide provides Python examples, focusing on the AI-driven era (May 3, 2025).
  6 | 
  7 | ## 🌟 Key Concepts
  8 | 
  9 | - **Embeddings API**: Generating text vectors.
 10 | - **Vector Stores**: Storing and querying embeddings (e.g., FAISS).
 11 | - **Semantic Search**: Retrieving relevant texts by meaning.
 12 | - **RAG Preparation**: Building retrieval components.
 13 | 
 14 | ## 🛠️ Practical Example
 15 | 
 16 | ```python
 17 | %% embeddings_vector_stores.py
 18 | # Setup: pip install openai faiss-cpu requests matplotlib pandas nltk numpy
 19 | import os
 20 | import openai
 21 | import faiss
 22 | import numpy as np
 23 | import matplotlib.pyplot as plt
 24 | from sklearn.metrics.pairwise import cosine_similarity
 25 | 
 26 | def run_embeddings_vector_stores_demo():
 27 |     # Synthetic Document Data
 28 |     documents = [
 29 |         "Machine learning is a subset of AI focusing on data-driven models.",
 30 |         "Deep learning uses neural networks for complex pattern recognition.",
 31 |         "Natural language processing enables computers to understand text.",
 32 |         "AI is transforming industries with automation and insights."
 33 |     ]
 34 |     query = "What is machine learning?"
 35 |     print("Synthetic Data: Documents and query created")
 36 |     print(f"Documents: {documents}")
 37 |     print(f"Query: {query}")
 38 | 
 39 |     # OpenAI API Configuration
 40 |     openai.api_key = os.getenv("OPENAI_API_KEY", "your-openai-api-key")
 41 | 
 42 |     # Generate Embeddings
 43 |     embeddings = []
 44 |     for text in documents + [query]:
 45 |         try:
 46 |             response = openai.Embedding.create(
 47 |                 model="text-embedding-ada-002",
 48 |                 input=text
 49 |             )
 50 |             embeddings.append(response['data'][0]['embedding'])
 51 |         except openai.error.OpenAIError as e:
 52 |             print(f"Error for {text}: {e}")
 53 |             return
 54 | 
 55 |     # Store in FAISS
 56 |     dimension = len(embeddings[0])
 57 |     index = faiss.IndexFlatL2(dimension)
 58 |     index.add(np.array(embeddings[:-1]).astype('float32'))
 59 | 
 60 |     # Search with Query
 61 |     query_embedding = np.array([embeddings[-1]]).astype('float32')
 62 |     distances, indices = index.search(query_embedding, len(documents))
 63 |     similarities = 1 - distances[0] / 2  # Approximate cosine similarity
 64 |     print("Similarities:", similarities)
 65 | 
 66 |     # Visualization
 67 |     plt.figure(figsize=(8, 4))
 68 |     plt.bar(range(1, len(documents) + 1), similarities, color='purple')
 69 |     plt.title("Semantic Search Similarity Scores")
 70 |     plt.xlabel("Document")
 71 |     plt.ylabel("Similarity")
 72 |     plt.savefig("embeddings_vector_stores_output.png")
 73 |     print("Visualization: Similarity scores saved as embeddings_vector_stores_output.png")
 74 | 
 75 | # Execute the demo
 76 | if __name__ == "__main__":
 77 |     run_embeddings_vector_stores_demo()
 78 | ```
 79 | 
 80 | ## 📊 Visualization Output
 81 | 
 82 | The code generates a bar chart (`embeddings_vector_stores_output.png`) showing similarity scores between the query and documents, illustrating retrieval relevance.
 83 | 
 84 | ## 💡 Applications
 85 | 
 86 | - **RAG Systems**: Retrieve context for LLM responses.
 87 | - **Search Engines**: Implement semantic search for documents.
 88 | - **Recommendations**: Suggest similar content based on embeddings.
 89 | 
 90 | ## 🏆 Practical Tasks
 91 | 
 92 | 1. Generate embeddings for documents.
 93 | 2. Store embeddings in FAISS and perform semantic search.
 94 | 3. Visualize similarity scores for search results.
 95 | 
 96 | ## 💡 Interview Scenarios
 97 | 
 98 | **Question**: How do embeddings and vector stores enable RAG?  
 99 | **Answer**: Embeddings convert text to vectors; vector stores like FAISS retrieve relevant documents for LLMs.  
100 | **Key**: Enhances context-aware responses.  
101 | **Example**: `openai.Embedding.create` and `faiss.IndexFlatL2`
102 | 
103 | **Coding Task**: Implement semantic search with FAISS.  
104 | **Tip**: Use `Embedding.create` and FAISS index.
105 | 
106 | ## 📚 Resources
107 | 
108 | - [OpenAI API Documentation](https://platform.openai.com/docs/)
109 | - [FAISS Documentation](https://github.com/facebookresearch/faiss)
110 | 
111 | ## 🤝 Contributions
112 | 
113 | 1. Fork the repository.
114 | 2. Create a feature branch (`git checkout -b feature/embeddings-vector-stores`).
115 | 3. Commit changes (`git commit -m 'Add embeddings and vector stores content'`).
116 | 4. Push to the branch (`git push origin feature/embeddings-vector-stores`).
117 | 5. Open a Pull Request.


--------------------------------------------------------------------------------
/LLM with RAG Fundamentals/04 RAG Fundamentals/README.md:
--------------------------------------------------------------------------------
  1 | # 📚 RAG Fundamentals
  2 | 
  3 | ## 📖 Introduction
  4 | 
  5 | **RAG Fundamentals** introduces Retrieval-Augmented Generation, combining OpenAI’s LLM with external knowledge bases for context-aware responses. This guide provides Python examples using LangChain and FAISS, focusing on the AI-driven era (May 3, 2025).
  6 | 
  7 | ## 🌟 Key Concepts
  8 | 
  9 | - **RAG Overview**: Integrating retrieval with generation.
 10 | - **LangChain**: Building RAG pipelines with OpenAI and vector stores.
 11 | - **Retrieval**: Fetching relevant documents for LLM context.
 12 | - **Evaluation**: Assessing retrieval and response quality.
 13 | 
 14 | ## 🛠️ Practical Example
 15 | 
 16 | ```python
 17 | %% rag_fundamentals.py
 18 | # Setup: pip install openai langchain faiss-cpu requests matplotlib pandas nltk
 19 | import os
 20 | import openai
 21 | from langchain.vectorstores import FAISS
 22 | from langchain.embeddings import OpenAIEmbeddings
 23 | from langchain.chat_models import ChatOpenAI
 24 | from langchain.chains import RetrievalQA
 25 | import matplotlib.pyplot as plt
 26 | import numpy as np
 27 | 
 28 | def run_rag_fundamentals_demo():
 29 |     # Synthetic Document Data
 30 |     documents = [
 31 |         "AI is transforming healthcare with predictive diagnostics.",
 32 |         "Machine learning models require large datasets for training.",
 33 |         "Deep learning excels in image and speech recognition.",
 34 |         "Natural language processing powers chatbots and translation."
 35 |     ]
 36 |     queries = [
 37 |         "How is AI used in healthcare?",
 38 |         "What is needed for machine learning?",
 39 |         "What does deep learning do?"
 40 |     ]
 41 |     print("Synthetic Data: Documents and queries created")
 42 |     print(f"Documents: {documents}")
 43 |     print(f"Queries: {queries}")
 44 | 
 45 |     # OpenAI API Configuration
 46 |     openai.api_key = os.getenv("OPENAI_API_KEY", "your-openai-api-key")
 47 | 
 48 |     # Create Vector Store
 49 |     embeddings = OpenAIEmbeddings()
 50 |     vector_store = FAISS.from_texts(documents, embeddings)
 51 | 
 52 |     # RAG Pipeline
 53 |     llm = ChatOpenAI(model="gpt-3.5-turbo", max_tokens=150)
 54 |     qa_chain = RetrievalQA.from_chain_type(
 55 |         llm=llm,
 56 |         chain_type="stuff",
 57 |         retriever=vector_store.as_retriever()
 58 |     )
 59 | 
 60 |     # Run Queries
 61 |     responses = []
 62 |     for query in queries:
 63 |         try:
 64 |             response = qa_chain.run(query)
 65 |             responses.append(response)
 66 |             print(f"Query: {query}")
 67 |             print(f"Response: {response.strip()}")
 68 |         except Exception as e:
 69 |             print(f"Error for {query}: {e}")
 70 | 
 71 |     # Visualization (Simulated Retrieval Accuracy)
 72 |     accuracy_scores = [0.9, 0.85, 0.95]  # Simulated for demo
 73 |     plt.figure(figsize=(8, 4))
 74 |     plt.bar(range(1, len(queries) + 1), accuracy_scores, color='green')
 75 |     plt.title("Simulated Retrieval Accuracy")
 76 |     plt.xlabel("Query")
 77 |     plt.ylabel("Accuracy")
 78 |     plt.savefig("rag_fundamentals_output.png")
 79 |     print("Visualization: Retrieval accuracy saved as rag_fundamentals_output.png")
 80 | 
 81 | # Execute the demo
 82 | if __name__ == "__main__":
 83 |     nltk.download('punkt', quiet=True)
 84 |     run_rag_fundamentals_demo()
 85 | ```
 86 | 
 87 | ## 📊 Visualization Output
 88 | 
 89 | The code generates a bar chart (`rag_fundamentals_output.png`) showing simulated retrieval accuracy scores, illustrating RAG performance.
 90 | 
 91 | ## 💡 Applications
 92 | 
 93 | - **Question Answering**: Enhance LLM responses with external data.
 94 | - **Chatbots**: Provide context-aware conversational agents.
 95 | - **Knowledge Management**: Query internal documents intelligently.
 96 | 
 97 | ## 🏆 Practical Tasks
 98 | 
 99 | 1. Build a RAG pipeline with LangChain and FAISS.
100 | 2. Test RAG with sample queries and documents.
101 | 3. Visualize retrieval accuracy or response metrics.
102 | 
103 | ## 💡 Interview Scenarios
104 | 
105 | **Question**: What is Retrieval-Augmented Generation?  
106 | **Answer**: RAG combines LLMs with retrieval from vector stores to provide context-aware responses.  
107 | **Key**: Improves accuracy over standalone LLMs.  
108 | **Example**: `RetrievalQA.from_chain_type` with FAISS retriever.
109 | 
110 | **Coding Task**: Build a basic RAG pipeline.  
111 | **Tip**: Use `LangChain` with `OpenAIEmbeddings` and `FAISS`.
112 | 
113 | ## 📚 Resources
114 | 
115 | - [LangChain Documentation](https://python.langchain.com/docs/)
116 | - [OpenAI API Documentation](https://platform.openai.com/docs/)
117 | - [FAISS Documentation](https://github.com/facebookresearch/faiss)
118 | 
119 | ## 🤝 Contributions
120 | 
121 | 1. Fork the repository.
122 | 2. Create a feature branch (`git checkout -b feature/rag-fundamentals`).
123 | 3. Commit changes (`git commit -m 'Add RAG fundamentals content'`).
124 | 4. Push to the branch (`git push origin feature/rag-fundamentals`).
125 | 5. Open a Pull Request.


--------------------------------------------------------------------------------
/LLM with RAG Fundamentals/05 Capstone RAG Application/README.md:
--------------------------------------------------------------------------------
  1 | # 🌟 Capstone RAG Application
  2 | 
  3 | ## 📖 Introduction
  4 | 
  5 | **Capstone RAG Application** guides you through building a production-ready question-answering app using OpenAI’s API, LangChain, and FAISS. This capstone project integrates LLM and retrieval for a scalable AI solution, aligned with the AI-driven era (May 3, 2025).
  6 | 
  7 | ## 🌟 Key Concepts
  8 | 
  9 | - **RAG Application**: Combining LLM with vector store for Q&A.
 10 | - **Scalability**: Handling large document sets and queries.
 11 | - **Performance Optimization**: Balancing latency and accuracy.
 12 | - **Evaluation**: Measuring response quality and retrieval accuracy.
 13 | 
 14 | ## 🛠️ Practical Example
 15 | 
 16 | ```python
 17 | %% capstone_rag_application.py
 18 | # Setup: pip install openai langchain faiss-cpu requests matplotlib pandas nltk
 19 | import os
 20 | import openai
 21 | from langchain.vectorstores import FAISS
 22 | from langchain.embeddings import OpenAIEmbeddings
 23 | from langchain.chat_models import ChatOpenAI
 24 | from langchain.chains import RetrievalQA
 25 | import matplotlib.pyplot as plt
 26 | import numpy as np
 27 | import time
 28 | 
 29 | def run_capstone_rag_application_demo():
 30 |     # Synthetic Knowledge Base
 31 |     knowledge_base = [
 32 |         "AI in healthcare improves diagnostics with predictive models.",
 33 |         "Machine learning requires large, clean datasets for effective training.",
 34 |         "Deep learning uses neural networks for tasks like image recognition.",
 35 |         "NLP enables chatbots to understand and respond to human language.",
 36 |         "Cloud computing provides scalable infrastructure for AI applications."
 37 |     ]
 38 |     queries = [
 39 |         "How does AI improve healthcare?",
 40 |         "What are the requirements for machine learning?",
 41 |         "What is deep learning used for?",
 42 |         "How does NLP work in chatbots?"
 43 |     ]
 44 |     print("Synthetic Data: Knowledge base and queries created")
 45 |     print(f"Knowledge Base: {knowledge_base}")
 46 |     print(f"Queries: {queries}")
 47 | 
 48 |     # OpenAI API Configuration
 49 |     openai.api_key = os.getenv("OPENAI_API_KEY", "your-openai-api-key")
 50 | 
 51 |     # Create Vector Store
 52 |     embeddings = OpenAIEmbeddings()
 53 |     vector_store = FAISS.from_texts(knowledge_base, embeddings)
 54 | 
 55 |     # RAG Pipeline
 56 |     llm = ChatOpenAI(model="gpt-3.5-turbo", max_tokens=200)
 57 |     qa_chain = RetrievalQA.from_chain_type(
 58 |         llm=llm,
 59 |         chain_type="stuff",
 60 |         retriever=vector_store.as_retriever(search_kwargs={"k": 2})
 61 |     )
 62 | 
 63 |     # Run Queries and Measure Performance
 64 |     responses = []
 65 |     latencies = []
 66 |     for query in queries:
 67 |         start_time = time.time()
 68 |         try:
 69 |             response = qa_chain.run(query)
 70 |             responses.append(response)
 71 |             latencies.append(time.time() - start_time)
 72 |             print(f"Query: {query}")
 73 |             print(f"Response: {response.strip()}")
 74 |         except Exception as e:
 75 |             print(f"Error for {query}: {e}")
 76 | 
 77 |     # Visualization
 78 |     plt.figure(figsize=(8, 4))
 79 |     plt.plot(range(1, len(queries) + 1), latencies, marker='o', color='blue')
 80 |     plt.title("RAG Application Query Latencies")
 81 |     plt.xlabel("Query")
 82 |     plt.ylabel("Latency (seconds)")
 83 |     plt.savefig("capstone_rag_application_output.png")
 84 |     print("Visualization: Latencies saved as capstone_rag_application_output.png")
 85 | 
 86 | # Execute the demo
 87 | if __name__ == "__main__":
 88 |     nltk.download('punkt', quiet=True)
 89 |     run_capstone_rag_application_demo()
 90 | ```
 91 | 
 92 | ## 📊 Visualization Output
 93 | 
 94 | The code generates a line plot (`capstone_rag_application_output.png`) showing query latencies, illustrating application performance.
 95 | 
 96 | ## 💡 Applications
 97 | 
 98 | - **Enterprise Q&A**: Query internal documents for insights.
 99 | - **Customer Support**: Build context-aware chatbots.
100 | - **Research Tools**: Enhance research with knowledge-backed answers.
101 | 
102 | ## 🏆 Practical Tasks
103 | 
104 | 1. Build a RAG Q&A app with a large knowledge base.
105 | 2. Optimize retrieval and LLM response times.
106 | 3. Visualize performance metrics (e.g., latency, accuracy).
107 | 
108 | ## 💡 Interview Scenarios
109 | 
110 | **Question**: How do you build a scalable RAG application?  
111 | **Answer**: Use LangChain with OpenAI and FAISS for retrieval and generation, optimizing for latency and accuracy.  
112 | **Key**: Balances retrieval and LLM performance.  
113 | **Example**: `RetrievalQA` with `FAISS` retriever.
114 | 
115 | **Coding Task**: Develop a RAG Q&A system.  
116 | **Tip**: Use `LangChain`, `OpenAIEmbeddings`, and `FAISS`.
117 | 
118 | ## 📚 Resources
119 | 
120 | - [LangChain Documentation](https://python.langchain.com/docs/)
121 | - [OpenAI API Documentation](https://platform.openai.com/docs/)
122 | - [FAISS Documentation](https://github.com/facebookresearch/faiss)
123 | 
124 | ## 🤝 Contributions
125 | 
126 | 1. Fork the repository.
127 | 2. Create a feature branch (`git checkout -b feature/capstone-rag`).
128 | 3. Commit changes (`git commit -m 'Add capstone RAG content'`).
129 | 4. Push to the branch (`git push origin feature/capstone-rag`).
130 | 5. Open a Pull Request.


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
  1 | # 🧠 LLM API and RAG Integration with Python
  2 | 
  3 | <div align="center">
  4 |   <img src="https://img.shields.io/badge/Python-3776AB?style=for-the-badge&logo=python&logoColor=white" alt="Python Logo" />
  5 |   <img src="https://img.shields.io/badge/OpenAI-412991?style=for-the-badge&logo=openai&logoColor=white" alt="OpenAI" />
  6 |   <img src="https://img.shields.io/badge/LangChain-00C4B4?style=for-the-badge&logo=langchain&logoColor=white" alt="LangChain" />
  7 |   <img src="https://img.shields.io/badge/Matplotlib-11557C?style=for-the-badge&logo=matplotlib&logoColor=white" alt="Matplotlib" />
  8 | </div>
  9 | <p align="center">Your step-by-step guide to mastering LLM API integration and building Retrieval-Augmented Generation (RAG) applications with Python for AI-driven solutions and interview preparation</p>
 10 | 
 11 | ---
 12 | 
 13 | ## 📖 Introduction
 14 | 
 15 | Welcome to the **LLM API and RAG Integration with Python Roadmap**! 🚀 This roadmap guides you through integrating large language model (LLM) APIs, focusing on OpenAI, and building Retrieval-Augmented Generation (RAG) applications using Python. It progresses from API basics to creating a capstone RAG project—a sophisticated app leveraging LLMs and external knowledge bases. Designed for the AI-driven era (May 3, 2025), this roadmap prepares you for AI/ML interviews and equips you with practical skills for 6 LPA+ roles.
 16 | 
 17 | ## 🌟 What’s Inside?
 18 | 
 19 | - **API Foundations**: Mastering OpenAI API setup and authentication.
 20 | - **Text Generation**: Creating conversational and creative text outputs.
 21 | - **Embeddings and Vector Stores**: Using embeddings for semantic search and storage.
 22 | - **RAG Fundamentals**: Combining LLMs with external knowledge for enhanced responses.
 23 | - **Capstone RAG Application**: Building a production-ready RAG app for question answering.
 24 | - **Hands-on Code**: Five `.md` files with Python examples, visualizations, and a capstone project.
 25 | - **Interview Scenarios**: Key questions and answers for LLM and RAG interviews.
 26 | 
 27 | ## 🔍 Who Is This For?
 28 | 
 29 | - AI Engineers building LLM and RAG-based applications.
 30 | - Machine Learning Engineers mastering API-driven AI and retrieval systems.
 31 | - AI Researchers exploring OpenAI and RAG frameworks.
 32 | - Software Engineers deepening Python-based AI expertise.
 33 | - Anyone preparing for AI/ML interviews in tech.
 34 | 
 35 | ## 🗺️ Learning Roadmap
 36 | 
 37 | This roadmap covers five key areas, each with a dedicated `.md` file, progressing from LLM API basics to a capstone RAG application:
 38 | 
 39 | ### 🛠️ API Foundations (`api_foundations.md`)
 40 | - API Concepts and OpenAI Authentication
 41 | - Environment Setup and Testing
 42 | - API Request Visualization
 43 | 
 44 | ### 📝 Text Generation (`text_generation.md`)
 45 | - OpenAI Chat and Completion APIs
 46 | - Conversational and Creative Text Applications
 47 | - Response Metrics Visualization
 48 | 
 49 | ### 🔍 Embeddings and Vector Stores (`embeddings_vector_stores.md`)
 50 | - OpenAI Embeddings API and Vector Databases
 51 | - Semantic Search and Data Storage
 52 | - Similarity Score Visualization
 53 | 
 54 | ### 📚 RAG Fundamentals (`rag_fundamentals.md`)
 55 | - Retrieval-Augmented Generation Concepts
 56 | - Integrating LLMs with Knowledge Bases
 57 | - Retrieval Accuracy Visualization
 58 | 
 59 | ### 🌟 Capstone RAG Application (`capstone_rag_application.md`)
 60 | - Building a Question-Answering RAG App
 61 | - Scalable API and Retrieval Integration
 62 | - Application Performance Visualization
 63 | 
 64 | ## 💡 Why Master LLM API and RAG Integration?
 65 | 
 66 | LLM APIs and RAG are game-changers in AI:
 67 | 1. **Versatility**: Powers chatbots, Q&A systems, and analytics with context-aware responses.
 68 | 2. **Interview Relevance**: Tested in coding challenges (e.g., API integration, RAG pipelines).
 69 | 3. **Scalability**: Enables production-ready, knowledge-enhanced AI solutions.
 70 | 4. **Industry Demand**: Critical for AI/ML roles in tech.
 71 | 
 72 | ## 📆 Study Plan
 73 | 
 74 | - **Week 1**:
 75 |   - Day 1-2: API Foundations
 76 |   - Day 3-4: Text Generation
 77 |   - Day 5-6: Embeddings and Vector Stores
 78 |   - Day 7: Review Week 1
 79 | - **Week 2**:
 80 |   - Day 1-2: RAG Fundamentals
 81 |   - Day 3-4: Capstone RAG Application
 82 |   - Day 5-7: Review `.md` files and practice interview scenarios.
 83 | 
 84 | ## 🛠️ Setup Instructions
 85 | 
 86 | 1. **Python Environment**:
 87 |    - Install Python 3.8+ and pip.
 88 |    - Create a virtual environment: `python -m venv rag_env; source rag_env/bin/activate`.
 89 |    - Install dependencies: `pip install openai langchain faiss-cpu requests matplotlib pandas nltk`.
 90 | 2. **API Keys**:
 91 |    - Obtain an OpenAI API key from [OpenAI](https://platform.openai.com/).
 92 |    - Set environment variable:
 93 |      ```bash
 94 |      export OPENAI_API_KEY="your-openai-api-key"
 95 |      ```
 96 | 3. **Datasets**:
 97 |    - Uses synthetic data (e.g., queries, documents).
 98 |    - Optional: Download datasets from [Hugging Face Datasets](https://huggingface.co/datasets).
 99 |    - Note: Code uses simulated data to avoid file I/O constraints.
100 | 4. **Running Code**:
101 |    - Copy code from `.md` files into a Python environment (e.g., `api_foundations.py`).
102 |    - Use Google Colab or local setup.
103 |    - View outputs in terminal and Matplotlib visualizations (PNGs).
104 |    - Check terminal for errors; ensure dependencies and API keys are set.
105 | 
106 | ## 🏆 Practical Tasks
107 | 
108 | 1. **API Foundations**:
109 |    - Authenticate and test OpenAI API connectivity.
110 |    - Visualize API request success rates.
111 | 2. **Text Generation**:
112 |    - Build a conversational chatbot with OpenAI.
113 |    - Plot response lengths and quality metrics.
114 | 3. **Embeddings and Vector Stores**:
115 |    - Implement semantic search with FAISS.
116 |    - Visualize similarity scores.
117 | 4. **RAG Fundamentals**:
118 |    - Build a basic RAG pipeline with LangChain.
119 |    - Visualize retrieval accuracy.
120 | 5. **Capstone RAG Application**:
121 |    - Develop a Q&A app with LLM and vector store.
122 |    - Visualize application performance metrics.
123 | 
124 | ## 💡 Interview Tips
125 | 
126 | - **Common Questions**:
127 |   - What are the components of an OpenAI API request?
128 |   - How do you integrate OpenAI with LangChain for RAG?
129 |   - What is Retrieval-Augmented Generation, and how does it work?
130 |   - How do you optimize API calls for a RAG application?
131 |   - What are real-world use cases for RAG systems?
132 | - **Tips**:
133 |   - Explain API and RAG setups with code (e.g., `openai.ChatCompletion.create`, `LangChain` pipelines).
134 |   - Demonstrate use cases like Q&A or chatbots.
135 |   - Code tasks like error handling or retrieval optimization.
136 |   - Discuss trade-offs (e.g., retrieval accuracy vs. latency).
137 | - **Coding Tasks**:
138 |   - Integrate OpenAI API for a chatbot.
139 |   - Build a RAG pipeline for document Q&A.
140 | - **Conceptual Clarity**:
141 |   - Explain how RAG enhances LLM performance.
142 |   - Describe optimization techniques for API and retrieval.
143 | 
144 | ## 📚 Resources
145 | 
146 | - [OpenAI API Documentation](https://platform.openai.com/docs/)
147 | - [LangChain Documentation](https://python.langchain.com/docs/)
148 | - [FAISS Documentation](https://github.com/facebookresearch/faiss)
149 | - [Matplotlib Documentation](https://matplotlib.org/stable/contents.html)
150 | - [“Prompt Engineering Guide” by DAIR.AI](https://www.promptingguide.ai/)
151 | 
152 | ## 🤝 Contributions
153 | 
154 | 1. Fork the repository.
155 | 2. Create a feature branch (`git checkout -b feature/amazing-addition`).
156 | 3. Commit changes (`git commit -m 'Add some amazing content'`).
157 | 4. Push to the branch (`git push origin feature/amazing-addition`).
158 | 5. Open a Pull Request.
159 | 
160 | ---
161 | 
162 | <div align="center">
163 |   <p>Happy Learning and Good Luck with Your Interviews! ✨</p>
164 | </div>


--------------------------------------------------------------------------------
/LLM with RAG Interview Questions/README.md:
--------------------------------------------------------------------------------
   1 | # LLM API and RAG Integration Interview Questions for AI/ML Roles
   2 | 
   3 | This README provides 170 interview questions tailored for AI/ML roles, focusing on integrating Large Language Model (LLM) APIs and Retrieval-Augmented Generation (RAG) using Python. The questions cover **core concepts** (e.g., LLM API usage, RAG pipeline setup, vector stores, embeddings, evaluation) and their applications in tasks like question answering, document search, and contextual text generation. Questions are categorized by topic and divided into **Basic**, **Intermediate**, and **Advanced** levels to support candidates preparing for roles requiring LLM and RAG integration in generative AI workflows.
   4 | 
   5 | ## LLM API Usage
   6 | 
   7 | ### Basic
   8 | 1. **What is an LLM API, and why is it used in AI applications?**  
   9 |    Provides access to pre-trained language models for tasks like text generation.  
  10 |    ```python
  11 |    from openai import OpenAI
  12 |    client = OpenAI(api_key="your-api-key")
  13 |    response = client.chat.completions.create(model="gpt-4", messages=[{"role": "user", "content": "Hello!"}])
  14 |    ```
  15 | 
  16 | 2. **How do you authenticate an LLM API in Python?**  
  17 |    Uses API keys for secure access.  
  18 |    ```python
  19 |    import os
  20 |    os.environ["OPENAI_API_KEY"] = "your-api-key"
  21 |    client = OpenAI()
  22 |    ```
  23 | 
  24 | 3. **How do you make a basic API call to an LLM?**  
  25 |    Sends a prompt and retrieves a response.  
  26 |    ```python
  27 |    response = client.chat.completions.create(model="gpt-3.5-turbo", messages=[{"role": "user", "content": "What is AI?"}])
  28 |    output = response.choices[0].message.content
  29 |    ```
  30 | 
  31 | 4. **What is prompt engineering in the context of LLM APIs?**  
  32 |    Crafts inputs to optimize model outputs.  
  33 |    ```python
  34 |    prompt = "Summarize this text in 50 words: [text]"
  35 |    response = client.chat.completions.create(model="gpt-4", messages=[{"role": "user", "content": prompt}])
  36 |    ```
  37 | 
  38 | 5. **How do you handle API rate limits in Python?**  
  39 |    Implements retries or delays.  
  40 |    ```python
  41 |    import time
  42 |    def safe_api_call(client, prompt, retries=3):
  43 |        for _ in range(retries):
  44 |            try:
  45 |                return client.chat.completions.create(model="gpt-3.5-turbo", messages=[{"role": "user", "content": prompt}])
  46 |            except Exception as e:
  47 |                time.sleep(1)
  48 |        raise Exception("API call failed")
  49 |    ```
  50 | 
  51 | 6. **How do you visualize API response latency?**  
  52 |    Plots latency metrics.  
  53 |    ```python
  54 |    import matplotlib.pyplot as plt
  55 |    def plot_latency(latencies):
  56 |        plt.plot(latencies)
  57 |        plt.savefig("api_latency.png")
  58 |    ```
  59 | 
  60 | #### Intermediate
  61 | 7. **Write a function to call an LLM API with custom parameters.**  
  62 |    Configures temperature, max tokens, etc.  
  63 |    ```python
  64 |    def call_llm(client, prompt, model="gpt-3.5-turbo", temperature=0.7, max_tokens=100):
  65 |        response = client.chat.completions.create(
  66 |            model=model,
  67 |            messages=[{"role": "user", "content": prompt}],
  68 |            temperature=temperature,
  69 |            max_tokens=max_tokens
  70 |        )
  71 |        return response.choices[0].message.content
  72 |    ```
  73 | 
  74 | 8. **How do you implement streaming responses from an LLM API?**  
  75 |    Processes real-time outputs.  
  76 |    ```python
  77 |    def stream_llm_response(client, prompt):
  78 |        stream = client.chat.completions.create(
  79 |            model="gpt-3.5-turbo",
  80 |            messages=[{"role": "user", "content": prompt}],
  81 |            stream=True
  82 |        )
  83 |        for chunk in stream:
  84 |            if chunk.choices[0].delta.content:
  85 |                print(chunk.choices[0].delta.content, end="")
  86 |    ```
  87 | 
  88 | 9. **Write a function to handle batch API calls.**  
  89 |    Processes multiple prompts efficiently.  
  90 |    ```python
  91 |    def batch_llm_call(client, prompts, model="gpt-3.5-turbo"):
  92 |        responses = []
  93 |        for prompt in prompts:
  94 |            response = client.chat.completions.create(
  95 |                model=model,
  96 |                messages=[{"role": "user", "content": prompt}]
  97 |            )
  98 |            responses.append(response.choices[0].message.content)
  99 |        return responses
 100 |    ```
 101 | 
 102 | 10. **How do you integrate xAI’s Grok API in Python?**  
 103 |     Uses Grok for generative tasks.  
 104 |     ```python
 105 |     from xai_sdk import XAIClient
 106 |     client = XAIClient(api_key="your-xai-key")
 107 |     response = client.generate_text(prompt="Explain AI", model="grok-3")
 108 |     ```
 109 | 
 110 | 11. **Write a function to log LLM API usage.**  
 111 |     Tracks API calls and costs.  
 112 |     ```python
 113 |     import logging
 114 |     def log_api_call(prompt, response, model):
 115 |         logging.basicConfig(filename="llm_api.log", level=logging.INFO)
 116 |         logging.info(f"Model: {model}, Prompt: {prompt}, Response: {response}")
 117 |     ```
 118 | 
 119 | 12. **How do you handle errors in LLM API calls?**  
 120 |     Implements robust error handling.  
 121 |     ```python
 122 |     def robust_api_call(client, prompt):
 123 |         try:
 124 |             return client.chat.completions.create(
 125 |                 model="gpt-3.5-turbo",
 126 |                 messages=[{"role": "user", "content": prompt}]
 127 |             )
 128 |         except Exception as e:
 129 |             print(f"Error: {e}")
 130 |             return None
 131 |     ```
 132 | 
 133 | #### Advanced
 134 | 13. **Write a function to implement rate-limited API calls.**  
 135 |     Respects API quotas.  
 136 |     ```python
 137 |     from ratelimit import limits, sleep_and_retry
 138 |     @sleep_and_retry
 139 |     @limits(calls=10, period=60)
 140 |     def rate_limited_call(client, prompt):
 141 |         return client.chat.completions.create(
 142 |             model="gpt-3.5-turbo",
 143 |             messages=[{"role": "user", "content": prompt}]
 144 |         )
 145 |     ```
 146 | 
 147 | 14. **How do you optimize LLM API costs in Python?**  
 148 |     Uses caching or smaller models.  
 149 |     ```python
 150 |     from functools import lru_cache
 151 |     @lru_cache(maxsize=1000)
 152 |     def cached_llm_call(prompt, model="gpt-3.5-turbo"):
 153 |         client = OpenAI()
 154 |         return client.chat.completions.create(
 155 |             model=model,
 156 |             messages=[{"role": "user", "content": prompt}]
 157 |         ).choices[0].message.content
 158 |     ```
 159 | 
 160 | 15. **Write a function to implement asynchronous LLM API calls.**  
 161 |     Improves throughput.  
 162 |     ```python
 163 |     import asyncio
 164 |     async def async_llm_call(client, prompt):
 165 |         loop = asyncio.get_event_loop()
 166 |         response = await loop.run_in_executor(None, lambda: client.chat.completions.create(
 167 |             model="gpt-3.5-turbo",
 168 |             messages=[{"role": "user", "content": prompt}]
 169 |         ))
 170 |         return response.choices[0].message.content
 171 |     ```
 172 | 
 173 | 16. **How do you integrate multiple LLM APIs in Python?**  
 174 |     Combines OpenAI and xAI’s Grok.  
 175 |     ```python
 176 |     def multi_llm_call(prompt):
 177 |         openai_client = OpenAI()
 178 |         xai_client = XAIClient(api_key="your-xai-key")
 179 |         openai_response = openai_client.chat.completions.create(
 180 |             model="gpt-3.5-turbo",
 181 |             messages=[{"role": "user", "content": prompt}]
 182 |         )
 183 |         xai_response = xai_client.generate_text(prompt, model="grok-3")
 184 |         return {"openai": openai_response.choices[0].message.content, "xai": xai_response}
 185 |     ```
 186 | 
 187 | 17. **Write a function to monitor API performance.**  
 188 |     Tracks latency and success rates.  
 189 |     ```python
 190 |     import time
 191 |     def monitor_api_performance(client, prompts):
 192 |         latencies = []
 193 |         for prompt in prompts:
 194 |             start = time.time()
 195 |             response = robust_api_call(client, prompt)
 196 |             latencies.append(time.time() - start if response else float("inf"))
 197 |         return {"avg_latency": sum(latencies) / len(latencies), "success_rate": sum(1 for l in latencies if l != float("inf")) / len(latencies)}
 198 |     ```
 199 | 
 200 | 18. **How do you implement fallback mechanisms for LLM APIs?**  
 201 |     Switches to alternative APIs on failure.  
 202 |     ```python
 203 |     def fallback_llm_call(primary_client, fallback_client, prompt):
 204 |         try:
 205 |             return primary_client.chat.completions.create(
 206 |                 model="gpt-3.5-turbo",
 207 |                 messages=[{"role": "user", "content": prompt}]
 208 |             ).choices[0].message.content
 209 |         except:
 210 |             return fallback_client.generate_text(prompt, model="grok-3")
 211 |     ```
 212 | 
 213 | ## RAG Pipeline Setup
 214 | 
 215 | ### Basic
 216 | 19. **What is Retrieval-Augmented Generation (RAG)?**  
 217 |    Combines retrieval and generation for contextual responses.  
 218 |    ```python
 219 |    from langchain.chains import RetrievalQA
 220 |    from langchain.vectorstores import FAISS
 221 |    qa_chain = RetrievalQA.from_chain_type(llm=llm, retriever=vector_store.as_retriever())
 222 |    ```
 223 | 
 224 | 20. **How do you set up a basic RAG pipeline in Python?**  
 225 |    Uses LangChain for RAG.  
 226 |    ```python
 227 |    from langchain.llms import OpenAI
 228 |    from langchain.vectorstores import FAISS
 229 |    from langchain.embeddings import OpenAIEmbeddings
 230 |    embeddings = OpenAIEmbeddings()
 231 |    vector_store = FAISS.from_texts(["Sample text"], embeddings)
 232 |    llm = OpenAI()
 233 |    qa_chain = RetrievalQA.from_chain_type(llm=llm, retriever=vector_store.as_retriever())
 234 |    ```
 235 | 
 236 | 21. **What is a vector store in RAG?**  
 237 |    Stores document embeddings for retrieval.  
 238 |    ```python
 239 |    vector_store = FAISS.from_texts(["Document 1", "Document 2"], embeddings)
 240 |    ```
 241 | 
 242 | 22. **How do you create embeddings for RAG?**  
 243 |    Converts text to vectors.  
 244 |    ```python
 245 |    from langchain.embeddings import OpenAIEmbeddings
 246 |    embeddings = OpenAIEmbeddings()
 247 |    vectors = embeddings.embed_documents(["Hello, world!"])
 248 |    ```
 249 | 
 250 | 23. **How do you perform retrieval in a RAG pipeline?**  
 251 |    Fetches relevant documents.  
 252 |    ```python
 253 |    docs = vector_store.similarity_search("What is AI?", k=3)
 254 |    ```
 255 | 
 256 | 24. **How do you visualize document similarity scores?**  
 257 |    Plots retrieval scores.  
 258 |    ```python
 259 |    import matplotlib.pyplot as plt
 260 |    def plot_similarity_scores(scores):
 261 |        plt.bar(range(len(scores)), scores)
 262 |        plt.savefig("similarity_scores.png")
 263 |    ```
 264 | 
 265 | #### Intermediate
 266 | 25. **Write a function to set up a RAG pipeline with LangChain.**  
 267 |     Configures LLM and vector store.  
 268 |     ```python
 269 |     from langchain.chains import RetrievalQA
 270 |     from langchain.vectorstores import FAISS
 271 |     from langchain.llms import OpenAI
 272 |     def setup_rag_pipeline(documents, llm_model="text-davinci-003"):
 273 |         embeddings = OpenAIEmbeddings()
 274 |         vector_store = FAISS.from_texts(documents, embeddings)
 275 |         llm = OpenAI(model=llm_model)
 276 |         return RetrievalQA.from_chain_type(llm=llm, retriever=vector_store.as_retriever())
 277 |     ```
 278 | 
 279 | 26. **How do you integrate Hugging Face models in a RAG pipeline?**  
 280 |     Uses Hugging Face LLMs.  
 281 |     ```python
 282 |     from langchain.llms import HuggingFacePipeline
 283 |     from transformers import pipeline
 284 |     llm = HuggingFacePipeline(pipeline=pipeline("text-generation", model="gpt2"))
 285 |     qa_chain = RetrievalQA.from_chain_type(llm=llm, retriever=vector_store.as_retriever())
 286 |     ```
 287 | 
 288 | 27. **Write a function to load documents into a vector store.**  
 289 |     Indexes documents for RAG.  
 290 |     ```python
 291 |     from langchain.document_loaders import TextLoader
 292 |     def load_documents_to_vector_store(file_path, embeddings):
 293 |         loader = TextLoader(file_path)
 294 |         documents = loader.load()
 295 |         return FAISS.from_documents(documents, embeddings)
 296 |     ```
 297 | 
 298 | 28. **How do you optimize retrieval in a RAG pipeline?**  
 299 |     Uses efficient vector stores or indexing.  
 300 |     ```python
 301 |     from langchain.vectorstores import FAISS
 302 |     vector_store = FAISS.from_texts(["Doc"], embeddings, index_type="hnsw")
 303 |     ```
 304 | 
 305 | 29. **Write a function to visualize retrieved documents.**  
 306 |     Displays document relevance.  
 307 |     ```python
 308 |     import matplotlib.pyplot as plt
 309 |     def plot_retrieved_docs(docs, scores):
 310 |         plt.bar([doc.metadata["source"] for doc in docs], scores)
 311 |         plt.savefig("retrieved_docs.png")
 312 |     ```
 313 | 
 314 | 30. **How do you handle large document sets in RAG?**  
 315 |     Uses chunking or batch processing.  
 316 |     ```python
 317 |     from langchain.text_splitter import RecursiveCharacterTextSplitter
 318 |     def chunk_documents(documents, chunk_size=1000):
 319 |         splitter = RecursiveCharacterTextSplitter(chunk_size=chunk_size)
 320 |         return splitter.split_documents(documents)
 321 |     ```
 322 | 
 323 | #### Advanced
 324 | 31. **Write a function to implement hybrid retrieval in RAG.**  
 325 |     Combines dense and sparse retrieval.  
 326 |     ```python
 327 |     from langchain.retrievers import BM25Retriever, EnsembleRetriever
 328 |     def hybrid_retriever(documents, embeddings):
 329 |         dense_retriever = FAISS.from_documents(documents, embeddings).as_retriever()
 330 |         sparse_retriever = BM25Retriever.from_documents(documents)
 331 |         return EnsembleRetriever(retrievers=[dense_retriever, sparse_retriever], weights=[0.5, 0.5])
 332 |     ```
 333 | 
 334 | 32. **How do you optimize RAG pipelines for latency?**  
 335 |     Uses caching or smaller models.  
 336 |     ```python
 337 |     from langchain.cache import InMemoryCache
 338 |     def enable_rag_caching():
 339 |         langchain.llm_cache = InMemoryCache()
 340 |     ```
 341 | 
 342 | 33. **Write a function to implement multi-query retrieval in RAG.**  
 343 |     Enhances retrieval with multiple queries.  
 344 |     ```python
 345 |     from langchain.retrievers import MultiQueryRetriever
 346 |     def multi_query_rag(llm, vector_store):
 347 |         return MultiQueryRetriever.from_llm(retriever=vector_store.as_retriever(), llm=llm)
 348 |     ```
 349 | 
 350 | 34. **How do you integrate external APIs in a RAG pipeline?**  
 351 |     Uses xAI’s Grok for generation.  
 352 |     ```python
 353 |     from langchain.llms import BaseLLM
 354 |     class GrokLLM(BaseLLM):
 355 |         def _generate(self, prompts, **kwargs):
 356 |             client = XAIClient(api_key="your-xai-key")
 357 |             return [client.generate_text(prompt, model="grok-3") for prompt in prompts]
 358 |     qa_chain = RetrievalQA.from_chain_type(llm=GrokLLM(), retriever=vector_store.as_retriever())
 359 |     ```
 360 | 
 361 | 35. **Write a function to evaluate RAG pipeline performance.**  
 362 |     Measures retrieval and generation quality.  
 363 |     ```python
 364 |     from datasets import load_metric
 365 |     def evaluate_rag(qa_chain, questions, references):
 366 |         bleu = load_metric("bleu")
 367 |         responses = [qa_chain.run(q) for q in questions]
 368 |         return bleu.compute(predictions=responses, references=references)
 369 |     ```
 370 | 
 371 | 36. **How do you implement dynamic document indexing in RAG?**  
 372 |     Updates vector store incrementally.  
 373 |     ```python
 374 |     def update_vector_store(vector_store, new_documents, embeddings):
 375 |         new_store = FAISS.from_documents(new_documents, embeddings)
 376 |         vector_store.merge_from(new_store)
 377 |         return vector_store
 378 |     ```
 379 | 
 380 | ## Vector Stores and Embeddings
 381 | 
 382 | ### Basic
 383 | 37. **What is a vector store, and why is it used in RAG?**  
 384 |    Stores embeddings for efficient retrieval.  
 385 |    ```python
 386 |    from langchain.vectorstores import Chroma
 387 |    vector_store = Chroma.from_texts(["Sample text"], embeddings)
 388 |    ```
 389 | 
 390 | 38. **How do you create embeddings with Hugging Face models?**  
 391 |    Uses Sentence Transformers.  
 392 |    ```python
 393 |    from langchain.embeddings import HuggingFaceEmbeddings
 394 |    embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
 395 |    ```
 396 | 
 397 | 39. **How do you perform similarity search in a vector store?**  
 398 |    Retrieves similar documents.  
 399 |    ```python
 400 |    results = vector_store.similarity_search_with_score("Query", k=5)
 401 |    ```
 402 | 
 403 | 40. **What is cosine similarity in the context of RAG?**  
 404 |    Measures vector similarity.  
 405 |    ```python
 406 |    from sklearn.metrics.pairwise import cosine_similarity
 407 |    similarity = cosine_similarity([vector1], [vector2])[0][0]
 408 |    ```
 409 | 
 410 | 41. **How do you save a vector store in Python?**  
 411 |    Persists embeddings to disk.  
 412 |    ```python
 413 |    vector_store.save_local("vector_store")
 414 |    ```
 415 | 
 416 | 42. **How do you visualize embedding distributions?**  
 417 |    Plots embeddings in 2D.  
 418 |    ```python
 419 |    from sklearn.manifold import TSNE
 420 |    import matplotlib.pyplot as plt
 421 |    def plot_embeddings(embeddings):
 422 |        tsne = TSNE(n_components=2)
 423 |        reduced = tsne.fit_transform(embeddings)
 424 |        plt.scatter(reduced[:, 0], reduced[:, 1])
 425 |        plt.savefig("embeddings.png")
 426 |    ```
 427 | 
 428 | #### Intermediate
 429 | 43. **Write a function to create a Chroma vector store.**  
 430 |     Indexes documents with embeddings.  
 431 |     ```python
 432 |     from langchain.vectorstores import Chroma
 433 |     def create_chroma_store(documents, embeddings, persist_dir="chroma"):
 434 |         return Chroma.from_documents(documents, embeddings, persist_directory=persist_dir)
 435 |     ```
 436 | 
 437 | 44. **How do you integrate Pinecone with a RAG pipeline?**  
 438 |     Uses cloud-based vector storage.  
 439 |     ```python
 440 |     from langchain.vectorstores import Pinecone
 441 |     import pinecone
 442 |     pinecone.init(api_key="your-pinecone-key", environment="us-west1-gcp")
 443 |     vector_store = Pinecone.from_texts(["Doc"], embeddings, index_name="rag-index")
 444 |     ```
 445 | 
 446 | 45. **Write a function to compare embedding models.**  
 447 |     Evaluates embedding quality.  
 448 |     ```python
 449 |     def compare_embeddings(texts, embedding_models):
 450 |         similarities = []
 451 |         for model in embedding_models:
 452 |             embeddings = model.embed_documents(texts)
 453 |             similarity = cosine_similarity(embeddings[0:1], embeddings[1:2])[0][0]
 454 |             similarities.append(similarity)
 455 |         return similarities
 456 |     ```
 457 | 
 458 | 46. **How do you optimize vector store queries?**  
 459 |     Uses approximate nearest neighbors.  
 460 |     ```python
 461 |     vector_store = FAISS.from_texts(["Doc"], embeddings, index_type="hnsw")
 462 |     ```
 463 | 
 464 | 47. **Write a function to visualize vector store query results.**  
 465 |     Plots similarity scores.  
 466 |     ```python
 467 |     import matplotlib.pyplot as plt
 468 |     def plot_query_results(results):
 469 |         scores = [score for _, score in results]
 470 |         plt.bar(range(len(scores)), scores)
 471 |         plt.savefig("query_results.png")
 472 |     ```
 473 | 
 474 | 48. **How do you handle high-dimensional embeddings in RAG?**  
 475 |     Uses dimensionality reduction.  
 476 |     ```python
 477 |     from sklearn.decomposition import PCA
 478 |     def reduce_embeddings(embeddings, n_components=50):
 479 |         pca = PCA(n_components=n_components)
 480 |         return pca.fit_transform(embeddings)
 481 |     ```
 482 | 
 483 | #### Advanced
 484 | 49. **Write a function to implement custom vector store indexing.**  
 485 |     Builds a custom index.  
 486 |     ```python
 487 |     from langchain.vectorstores import VectorStore
 488 |     class CustomVectorStore(VectorStore):
 489 |         def __init__(self, embeddings):
 490 |             self.embeddings = embeddings
 491 |             self.index = {}
 492 |         def add_texts(self, texts):
 493 |             vectors = self.embeddings.embed_documents(texts)
 494 |             for i, vector in enumerate(vectors):
 495 |                 self.index[i] = (texts[i], vector)
 496 |         def similarity_search(self, query, k=4):
 497 |             query_vector = self.embeddings.embed_query(query)
 498 |             return sorted(
 499 |                 [(text, cosine_similarity([query_vector], [vector])[0][0]) for text, vector in self.index.values()],
 500 |                 key=lambda x: x[1], reverse=True
 501 |             )[:k]
 502 |     ```
 503 | 
 504 | 50. **How do you scale vector stores for large datasets?**  
 505 |     Uses sharding or distributed stores.  
 506 |     ```python
 507 |     from langchain.vectorstores import Weaviate
 508 |     vector_store = Weaviate.from_texts(["Doc"], embeddings, client=weaviate.Client("http://localhost:8080"))
 509 |     ```
 510 | 
 511 | 51. **Write a function to update embeddings dynamically.**  
 512 |     Refreshes vector store incrementally.  
 513 |     ```python
 514 |     def update_embeddings(vector_store, new_texts, embeddings):
 515 |         new_store = FAISS.from_texts(new_texts, embeddings)
 516 |         vector_store.merge_from(new_store)
 517 |         return vector_store
 518 |     ```
 519 | 
 520 | 52. **How do you implement cross-lingual embeddings in RAG?**  
 521 |     Uses multilingual models.  
 522 |     ```python
 523 |     embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/LaBSE")
 524 |     vector_store = FAISS.from_texts(["Doc"], embeddings)
 525 |     ```
 526 | 
 527 | 53. **Write a function to evaluate vector store retrieval quality.**  
 528 |     Measures precision and recall.  
 529 |     ```python
 530 |     def evaluate_retrieval(vector_store, queries, relevant_docs):
 531 |         precision = []
 532 |         for query, relevant in zip(queries, relevant_docs):
 533 |             results = vector_store.similarity_search(query, k=5)
 534 |             retrieved = [doc.page_content for doc, _ in results]
 535 |             precision.append(sum(1 for doc in retrieved if doc in relevant) / len(retrieved))
 536 |         return {"avg_precision": sum(precision) / len(precision)}
 537 |     ```
 538 | 
 539 | 54. **How do you integrate vector stores with real-time data?**  
 540 |     Uses streaming updates.  
 541 |     ```python
 542 |     def stream_vector_store(vector_store, data_stream, embeddings):
 543 |         for batch in data_stream:
 544 |             vector_store.add_texts([doc["text"] for doc in batch], embeddings)
 545 |         return vector_store
 546 |     ```
 547 | 
 548 | ## Evaluation and Metrics
 549 | 
 550 | ### Basic
 551 | 55. **How do you evaluate LLM API responses?**  
 552 |    Uses metrics like BLEU or ROUGE.  
 553 |    ```python
 554 |    from datasets import load_metric
 555 |    bleu = load_metric("bleu")
 556 |    score = bleu.compute(predictions=["Hello"], references=[["Hello, world!"]])
 557 |    ```
 558 | 
 559 | 56. **What is precision in the context of RAG retrieval?**  
 560 |    Measures relevant retrieved documents.  
 561 |    ```python
 562 |    def compute_precision(retrieved, relevant):
 563 |        return sum(1 for doc in retrieved if doc in relevant) / len(retrieved)
 564 |    ```
 565 | 
 566 | 57. **How do you evaluate RAG generation quality?**  
 567 |    Uses ROUGE for text similarity.  
 568 |    ```python
 569 |    rouge = load_metric("rouge")
 570 |    score = rouge.compute(predictions=["Generated text"], references=["Reference text"])
 571 |    ```
 572 | 
 573 | 58. **How do you visualize evaluation metrics?**  
 574 |    Plots metric scores.  
 575 |    ```python
 576 |    import matplotlib.pyplot as plt
 577 |    def plot_metrics(metrics, metric_name):
 578 |        plt.plot(metrics)
 579 |        plt.savefig(f"{metric_name}.png")
 580 |    ```
 581 | 
 582 | 59. **How do you measure latency in a RAG pipeline?**  
 583 |    Tracks execution time.  
 584 |    ```python
 585 |    import time
 586 |    def measure_rag_latency(qa_chain, query):
 587 |        start = time.time()
 588 |        qa_chain.run(query)
 589 |        return time.time() - start
 590 |    ```
 591 | 
 592 | 60. **What is recall in the context of RAG retrieval?**  
 593 |    Measures retrieved relevant documents.  
 594 |    ```python
 595 |    def compute_recall(retrieved, relevant):
 596 |        return sum(1 for doc in retrieved if doc in relevant) / len(relevant)
 597 |    ```
 598 | 
 599 | #### Intermediate
 600 | 61. **Write a function to evaluate RAG pipeline accuracy.**  
 601 |     Compares outputs to ground truth.  
 602 |     ```python
 603 |     def evaluate_rag_accuracy(qa_chain, questions, answers):
 604 |         correct = 0
 605 |         for q, a in zip(questions, answers):
 606 |             response = qa_chain.run(q)
 607 |             if response.strip() == a.strip():
 608 |                 correct += 1
 609 |         return correct / len(questions)
 610 |     ```
 611 | 
 612 | 62. **How do you implement human-in-the-loop evaluation for RAG?**  
 613 |     Collects user feedback.  
 614 |     ```python
 615 |     def human_eval_rag(qa_chain, query):
 616 |         response = qa_chain.run(query)
 617 |         feedback = input(f"Rate this response (1-5): {response}\n")
 618 |         return {"response": response, "score": int(feedback)}
 619 |     ```
 620 | 
 621 | 63. **Write a function to compute F1 score for RAG retrieval.**  
 622 |     Balances precision and recall.  
 623 |     ```python
 624 |     def compute_f1(retrieved, relevant):
 625 |         precision = compute_precision(retrieved, relevant)
 626 |         recall = compute_recall(retrieved, relevant)
 627 |         return 2 * (precision * recall) / (precision + recall) if precision + recall > 0 else 0
 628 |     ```
 629 | 
 630 | 64. **How do you evaluate contextual relevance in RAG?**  
 631 |     Measures document alignment.  
 632 |     ```python
 633 |     def evaluate_context_relevance(qa_chain, query, relevant_context):
 634 |         response = qa_chain.run(query)
 635 |         return 1 if relevant_context in response else 0
 636 |     ```
 637 | 
 638 | 65. **Write a function to visualize RAG evaluation metrics.**  
 639 |     Plots precision, recall, and F1.  
 640 |     ```python
 641 |     import matplotlib.pyplot as plt
 642 |     def plot_rag_metrics(precisions, recalls, f1s):
 643 |         plt.plot(precisions, label="Precision")
 644 |         plt.plot(recalls, label="Recall")
 645 |         plt.plot(f1s, label="F1")
 646 |         plt.legend()
 647 |         plt.savefig("rag_metrics.png")
 648 |     ```
 649 | 
 650 | 66. **How do you implement A/B testing for RAG pipelines?**  
 651 |     Compares two configurations.  
 652 |     ```python
 653 |     def ab_test_rag(qa_chain_a, qa_chain_b, questions, answers):
 654 |         metrics_a = evaluate_rag_accuracy(qa_chain_a, questions, answers)
 655 |         metrics_b = evaluate_rag_accuracy(qa_chain_b, questions, answers)
 656 |         return {"chain_a": metrics_a, "chain_b": metrics_b}
 657 |     ```
 658 | 
 659 | #### Advanced
 660 | 67. **Write a function to implement automated evaluation for RAG.**  
 661 |     Uses multiple metrics.  
 662 |     ```python
 663 |     def auto_evaluate_rag(qa_chain, questions, answers, relevant_docs):
 664 |         bleu = load_metric("bleu")
 665 |         responses = [qa_chain.run(q) for q in questions]
 666 |         bleu_score = bleu.compute(predictions=responses, references=answers)
 667 |         retrieval_metrics = evaluate_retrieval(qa_chain.retriever.vectorstore, questions, relevant_docs)
 668 |         return {"bleu": bleu_score, "retrieval": retrieval_metrics}
 669 |     ```
 670 | 
 671 | 68. **How do you evaluate RAG robustness under noisy inputs?**  
 672 |     Tests performance with perturbations.  
 673 |     ```python
 674 |     def evaluate_robustness(qa_chain, questions, noise_level=0.1):
 675 |         noisy_questions = [q + " " + "".join(random.choices("abc", k=int(len(q) * noise_level))) for q in questions]
 676 |         return evaluate_rag_accuracy(qa_chain, noisy_questions, questions)
 677 |     ```
 678 | 
 679 | 69. **Write a function to implement cross-validation for RAG.**  
 680 |     Validates pipeline stability.  
 681 |     ```python
 682 |     from sklearn.model_selection import KFold
 683 |     def cross_validate_rag(documents, questions, answers, folds=5):
 684 |         kf = KFold(n_splits=folds)
 685 |         scores = []
 686 |         for train_idx, test_idx in kf.split(documents):
 687 |             train_docs = [documents[i] for i in train_idx]
 688 |             qa_chain = setup_rag_pipeline(train_docs)
 689 |             test_questions = [questions[i] for i in test_idx]
 690 |             test_answers = [answers[i] for i in test_idx]
 691 |             scores.append(evaluate_rag_accuracy(qa_chain, test_questions, test_answers))
 692 |         return sum(scores) / len(scores)
 693 |     ```
 694 | 
 695 | 70. **How do you implement real-time evaluation for RAG?**  
 696 |     Monitors performance during inference.  
 697 |     ```python
 698 |     def realtime_evaluate_rag(qa_chain, query, reference):
 699 |         response = qa_chain.run(query)
 700 |         bleu = load_metric("bleu")
 701 |         score = bleu.compute(predictions=[response], references=[[reference]])
 702 |         return {"response": response, "bleu": score}
 703 |     ```
 704 | 
 705 | 71. **Write a function to evaluate RAG fairness.**  
 706 |     Checks bias in responses.  
 707 |     ```python
 708 |     def evaluate_fairness(qa_chain, questions, groups):
 709 |         responses = [qa_chain.run(q) for q in questions]
 710 |         group_scores = {g: [] for g in set(groups)}
 711 |         for response, group in zip(responses, groups):
 712 |             group_scores[group].append(len(response.split()))
 713 |         return {g: sum(scores) / len(scores) for g, scores in group_scores.items()}
 714 |     ```
 715 | 
 716 | 72. **How do you visualize RAG performance over time?**  
 717 |     Plots metrics across queries.  
 718 |     ```python
 719 |     import matplotlib.pyplot as plt
 720 |     def plot_performance_over_time(metrics):
 721 |         plt.plot(metrics["bleu"], label="BLEU")
 722 |         plt.plot(metrics["retrieval"], label="Retrieval Precision")
 723 |         plt.legend()
 724 |         plt.savefig("performance_over_time.png")
 725 |     ```
 726 | 
 727 | ## Debugging and Error Handling
 728 | 
 729 | ### Basic
 730 | 73. **How do you debug LLM API responses?**  
 731 |    Logs inputs and outputs.  
 732 |    ```python
 733 |    def debug_api_call(client, prompt):
 734 |        response = client.chat.completions.create(
 735 |            model="gpt-3.5-turbo",
 736 |            messages=[{"role": "user", "content": prompt}]
 737 |        )
 738 |        print(f"Prompt: {prompt}, Response: {response.choices[0].message.content}")
 739 |        return response
 740 |    ```
 741 | 
 742 | 74. **What is a try-except block in RAG pipelines?**  
 743 |    Handles runtime errors.  
 744 |    ```python
 745 |    try:
 746 |        response = qa_chain.run("Query")
 747 |    except Exception as e:
 748 |        print(f"Error: {e}")
 749 |    ```
 750 | 
 751 | 75. **How do you validate inputs in a RAG pipeline?**  
 752 |    Ensures correct formats.  
 753 |    ```python
 754 |    def validate_rag_input(query, vector_store):
 755 |        if not query or not vector_store:
 756 |            raise ValueError("Invalid query or vector store")
 757 |        return query
 758 |    ```
 759 | 
 760 | 76. **How do you handle vector store errors in RAG?**  
 761 |    Manages retrieval failures.  
 762 |    ```python
 763 |    def safe_retrieval(vector_store, query):
 764 |        try:
 765 |            return vector_store.similarity_search(query)
 766 |        except Exception as e:
 767 |            print(f"Retrieval error: {e}")
 768 |            return []
 769 |    ```
 770 | 
 771 | 77. **What is logging in the context of RAG pipelines?**  
 772 |    Tracks operations and errors.  
 773 |    ```python
 774 |    import logging
 775 |    logging.basicConfig(filename="rag.log", level=logging.INFO)
 776 |    logging.info("RAG pipeline started")
 777 |    ```
 778 | 
 779 | 78. **How do you handle API timeouts in LLM calls?**  
 780 |    Implements timeouts and retries.  
 781 |    ```python
 782 |    import requests
 783 |    def handle_timeout(client, prompt, timeout=10):
 784 |        try:
 785 |            return client.chat.completions.create(
 786 |                model="gpt-3.5-turbo",
 787 |                messages=[{"role": "user", "content": prompt}],
 788 |                timeout=timeout
 789 |            )
 790 |        except requests.exceptions.Timeout:
 791 |            print("API timeout")
 792 |            return None
 793 |    ```
 794 | 
 795 | #### Intermediate
 796 | 79. **Write a function to retry RAG pipeline queries.**  
 797 |     Handles transient failures.  
 798 |     ```python
 799 |     def retry_rag_query(qa_chain, query, max_attempts=3):
 800 |         for attempt in range(max_attempts):
 801 |             try:
 802 |                 return qa_chain.run(query)
 803 |             except Exception as e:
 804 |                 if attempt == max_attempts - 1:
 805 |                     raise
 806 |                 print(f"Attempt {attempt+1} failed: {e}")
 807 |     ```
 808 | 
 809 | 80. **How do you debug vector store retrieval issues?**  
 810 |     Inspects retrieved documents.  
 811 |     ```python
 812 |     def debug_retrieval(vector_store, query):
 813 |         results = vector_store.similarity_search_with_score(query)
 814 |         print(f"Query: {query}, Results: {[(doc.page_content, score) for doc, score in results]}")
 815 |         return results
 816 |     ```
 817 | 
 818 | 81. **Write a function to validate LLM API responses.**  
 819 |     Ensures valid outputs.  
 820 |     ```python
 821 |     def validate_response(response):
 822 |         if not response or not response.choices:
 823 |             raise ValueError("Invalid API response")
 824 |         return response.choices[0].message.content
 825 |     ```
 826 | 
 827 | 82. **How do you profile RAG pipeline performance?**  
 828 |     Measures component times.  
 829 |     ```python
 830 |     import time
 831 |     def profile_rag(qa_chain, query):
 832 |         start = time.time()
 833 |         response = qa_chain.run(query)
 834 |         print(f"RAG took {time.time() - start}s")
 835 |         return response
 836 |     ```
 837 | 
 838 | 83. **Write a function to handle embedding errors.**  
 839 |     Manages embedding failures.  
 840 |     ```python
 841 |     def safe_embedding(embeddings, texts):
 842 |         try:
 843 |             return embeddings.embed_documents(texts)
 844 |         except Exception as e:
 845 |             print(f"Embedding error: {e}")
 846 |             return [[] for _ in texts]
 847 |     ```
 848 | 
 849 | 84. **How do you debug inconsistent RAG outputs?**  
 850 |     Logs pipeline state.  
 851 |     ```python
 852 |     def debug_rag_output(qa_chain, query):
 853 |         response = qa_chain.run(query)
 854 |         print(f"Query: {query}, Response: {response}, Retriever State: {qa_chain.retriever}")
 855 |         return response
 856 |     ```
 857 | 
 858 | #### Advanced
 859 | 85. **Write a function to implement a custom error handler for RAG.**  
 860 |     Logs specific errors.  
 861 |     ```python
 862 |     import logging
 863 |     def custom_rag_error_handler(operation, *args):
 864 |         logging.basicConfig(filename="rag_errors.log", level=logging.ERROR)
 865 |         try:
 866 |             return operation(*args)
 867 |         except Exception as e:
 868 |             logging.error(f"RAG error: {e}")
 869 |             raise
 870 |     ```
 871 | 
 872 | 86. **How do you implement circuit breakers in RAG pipelines?**  
 873 |     Prevents cascading failures.  
 874 |     ```python
 875 |     from pybreaker import CircuitBreaker
 876 |     breaker = CircuitBreaker(fail_max=3, reset_timeout=60)
 877 |     @breaker
 878 |     def safe_rag_call(qa_chain, query):
 879 |         return qa_chain.run(query)
 880 |     ```
 881 | 
 882 | 87. **Write a function to detect retrieval failures in RAG.**  
 883 |     Checks for empty results.  
 884 |     ```python
 885 |     def detect_retrieval_failure(vector_store, query):
 886 |         results = vector_store.similarity_search(query)
 887 |         if not results:
 888 |             print("Warning: No documents retrieved")
 889 |         return results
 890 |     ```
 891 | 
 892 | 88. **How do you implement logging for distributed RAG pipelines?**  
 893 |     Centralizes logs.  
 894 |     ```python
 895 |     import logging.handlers
 896 |     def setup_distributed_logging():
 897 |         handler = logging.handlers.SocketHandler("log-server", 9090)
 898 |         logging.getLogger().addHandler(handler)
 899 |         logging.info("RAG pipeline started")
 900 |     ```
 901 | 
 902 | 89. **Write a function to handle version compatibility in RAG.**  
 903 |     Checks library versions.  
 904 |     ```python
 905 |     from langchain import __version__
 906 |     def check_langchain_version():
 907 |         if __version__ < "0.0.150":
 908 |             raise ValueError("Unsupported LangChain version")
 909 |     ```
 910 | 
 911 | 90. **How do you debug RAG performance bottlenecks?**  
 912 |     Profiles retrieval and generation.  
 913 |     ```python
 914 |     import cProfile
 915 |     def debug_rag_bottlenecks(qa_chain, query):
 916 |         cProfile.runctx("qa_chain.run(query)", globals(), locals(), "rag_profile.prof")
 917 |     ```
 918 | 
 919 | ## Visualization and Interpretation
 920 | 
 921 | ### Basic
 922 | 91. **How do you visualize LLM API response quality?**  
 923 |    Plots BLEU scores.  
 924 |    ```python
 925 |    import matplotlib.pyplot as plt
 926 |    def plot_bleu_scores(scores):
 927 |        plt.plot(scores)
 928 |        plt.savefig("bleu_scores.png")
 929 |    ```
 930 | 
 931 | 92. **How do you create a word cloud for RAG outputs?**  
 932 |    Visualizes word frequencies.  
 933 |    ```python
 934 |    from wordcloud import WordCloud
 935 |    import matplotlib.pyplot as plt
 936 |    def plot_word_cloud(text):
 937 |        wc = WordCloud().generate(text)
 938 |        plt.imshow(wc, interpolation="bilinear")
 939 |        plt.savefig("word_cloud.png")
 940 |    ```
 941 | 
 942 | 93. **How do you visualize retrieval scores in RAG?**  
 943 |    Plots similarity scores.  
 944 |    ```python
 945 |    import matplotlib.pyplot as plt
 946 |    def plot_retrieval_scores(results):
 947 |        scores = [score for _, score in results]
 948 |        plt.bar(range(len(scores)), scores)
 949 |        plt.savefig("retrieval_scores.png")
 950 |    ```
 951 | 
 952 | 94. **How do you visualize RAG pipeline latency?**  
 953 |    Plots execution times.  
 954 |    ```python
 955 |    import matplotlib.pyplot as plt
 956 |    def plot_rag_latency(latencies):
 957 |        plt.plot(latencies)
 958 |        plt.savefig("rag_latency.png")
 959 |    ```
 960 | 
 961 | 95. **How do you visualize document embeddings in RAG?**  
 962 |    Projects embeddings to 2D.  
 963 |    ```python
 964 |    from sklearn.manifold import TSNE
 965 |    import matplotlib.pyplot as plt
 966 |    def plot_doc_embeddings(embeddings):
 967 |        tsne = TSNE(n_components=2)
 968 |        reduced = tsne.fit_transform(embeddings)
 969 |        plt.scatter(reduced[:, 0], reduced[:, 1])
 970 |        plt.savefig("doc_embeddings.png")
 971 |    ```
 972 | 
 973 | 96. **How do you visualize RAG response diversity?**  
 974 |    Plots unique token counts.  
 975 |    ```python
 976 |    import matplotlib.pyplot as plt
 977 |    def plot_response_diversity(responses):
 978 |        unique_tokens = [len(set(response.split())) for response in responses]
 979 |        plt.hist(unique_tokens, bins=20)
 980 |        plt.savefig("response_diversity.png")
 981 |    ```
 982 | 
 983 | #### Intermediate
 984 | 97. **Write a function to visualize RAG retrieval accuracy.**  
 985 |     Plots precision over queries.  
 986 |     ```python
 987 |     import matplotlib.pyplot as plt
 988 |     def plot_retrieval_accuracy(precisions):
 989 |         plt.plot(precisions)
 990 |         plt.savefig("retrieval_accuracy.png")
 991 |     ```
 992 | 
 993 | 98. **How do you visualize LLM API usage patterns?**  
 994 |     Plots API call frequency.  
 995 |     ```python
 996 |     import matplotlib.pyplot as plt
 997 |     def plot_api_usage(calls):
 998 |         plt.hist(calls["timestamps"], bins=24)
 999 |         plt.savefig("api_usage.png")
1000 |     ```
1001 | 
1002 | 99. **Write a function to visualize RAG fairness metrics.**  
1003 |     Plots group-wise performance.  
1004 |     ```python
1005 |     import matplotlib.pyplot as plt
1006 |     def plot_fairness_metrics(metrics):
1007 |         plt.bar(metrics.keys(), metrics.values())
1008 |         plt.savefig("fairness_metrics.png")
1009 |     ```
1010 | 
1011 | 100. **How do you visualize RAG pipeline throughput?**  
1012 |      Plots queries per second.  
1013 |      ```python
1014 |      import matplotlib.pyplot as plt
1015 |      def plot_throughput(queries, times):
1016 |          throughput = [1 / t for t in times]
1017 |          plt.plot(throughput)
1018 |          plt.savefig("throughput.png")
1019 |      ```
1020 | 
1021 | 101. **Write a function to visualize embedding clusters.**  
1022 |      Plots document clusters.  
1023 |      ```python
1024 |      from sklearn.cluster import KMeans
1025 |      import matplotlib.pyplot as plt
1026 |      def plot_embedding_clusters(embeddings, n_clusters=3):
1027 |          kmeans = KMeans(n_clusters=n_clusters)
1028 |          labels = kmeans.fit_predict(embeddings)
1029 |          tsne = TSNE(n_components=2)
1030 |          reduced = tsne.fit_transform(embeddings)
1031 |          plt.scatter(reduced[:, 0], reduced[:, 1], c=labels)
1032 |          plt.savefig("embedding_clusters.png")
1033 |      ```
1034 | 
1035 | 102. **How do you visualize RAG response consistency?**  
1036 |      Plots response similarity.  
1037 |      ```python
1038 |      import matplotlib.pyplot as plt
1039 |      from sklearn.metrics.pairwise import cosine_similarity
1040 |      def plot_response_consistency(responses, embeddings):
1041 |          vectors = embeddings.embed_documents(responses)
1042 |          similarities = cosine_similarity(vectors)
1043 |          plt.imshow(similarities, cmap="hot")
1044 |          plt.savefig("response_consistency.png")
1045 |      ```
1046 | 
1047 | #### Advanced
1048 | 103. **Write a function to visualize RAG pipeline robustness.**  
1049 |      Plots performance under noise.  
1050 |      ```python
1051 |      import matplotlib.pyplot as plt
1052 |      def plot_robustness(metrics, noise_levels):
1053 |          plt.plot(noise_levels, metrics)
1054 |          plt.savefig("robustness.png")
1055 |      ```
1056 | 
1057 | 104. **How do you implement a dashboard for RAG metrics?**  
1058 |      Displays real-time stats.  
1059 |      ```python
1060 |      from fastapi import FastAPI
1061 |      app = FastAPI()
1062 |      metrics = []
1063 |      @app.get("/rag_metrics")
1064 |      async def get_metrics():
1065 |          return {"metrics": metrics}
1066 |      ```
1067 | 
1068 | 105. **Write a function to visualize data drift in RAG.**  
1069 |      Tracks document distribution changes.  
1070 |      ```python
1071 |      import matplotlib.pyplot as plt
1072 |      def plot_data_drift(old_embeddings, new_embeddings):
1073 |          tsne = TSNE(n_components=2)
1074 |          old_reduced = tsne.fit_transform(old_embeddings)
1075 |          new_reduced = tsne.fit_transform(new_embeddings)
1076 |          plt.scatter(old_reduced[:, 0], old_reduced[:, 1], label="Old")
1077 |          plt.scatter(new_reduced[:, 0], new_reduced[:, 1], label="New")
1078 |          plt.legend()
1079 |          plt.savefig("data_drift.png")
1080 |      ```
1081 | 
1082 | 106. **How do you visualize RAG retrieval latency distribution?**  
1083 |      Plots latency histogram.  
1084 |      ```python
1085 |      import matplotlib.pyplot as plt
1086 |      def plot_retrieval_latency(latencies):
1087 |          plt.hist(latencies, bins=20)
1088 |          plt.savefig("retrieval_latency.png")
1089 |      ```
1090 | 
1091 | 107. **Write a function to visualize LLM API cost trends.**  
1092 |      Plots API usage costs.  
1093 |      ```python
1094 |      import matplotlib.pyplot as plt
1095 |      def plot_api_costs(costs):
1096 |          plt.plot(costs)
1097 |          plt.savefig("api_costs.png")
1098 |      ```
1099 | 
1100 | 108. **How do you visualize RAG pipeline error rates?**  
1101 |      Plots error frequency.  
1102 |      ```python
1103 |      import matplotlib.pyplot as plt
1104 |      def plot_error_rates(errors):
1105 |          plt.plot([1 if e else 0 for e in errors])
1106 |          plt.savefig("error_rates.png")
1107 |      ```
1108 | 
1109 | ## Best Practices and Optimization
1110 | 
1111 | ### Basic
1112 | 109. **What are best practices for LLM API integration?**  
1113 |      Includes secure key management and caching.  
1114 |      ```python
1115 |      import os
1116 |      os.environ["OPENAI_API_KEY"] = "your-api-key"
1117 |      ```
1118 | 
1119 | 110. **How do you ensure reproducibility in RAG pipelines?**  
1120 |      Sets random seeds and versions.  
1121 |      ```python
1122 |      import random
1123 |      random.seed(42)
1124 |      ```
1125 | 
1126 | 111. **What is caching in the context of RAG pipelines?**  
1127 |      Stores query results for reuse.  
1128 |      ```python
1129 |      from langchain.cache import InMemoryCache
1130 |      langchain.llm_cache = InMemoryCache()
1131 |      ```
1132 | 
1133 | 112. **How do you handle large-scale RAG pipelines?**  
1134 |      Uses efficient vector stores.  
1135 |      ```python
1136 |      vector_store = FAISS.from_texts(["Doc"], embeddings, index_type="hnsw")
1137 |      ```
1138 | 
1139 | 113. **What is the role of environment configuration in RAG?**  
1140 |      Manages API keys and settings.  
1141 |      ```python
1142 |      import os
1143 |      os.environ["PINECONE_API_KEY"] = "your-pinecone-key"
1144 |      ```
1145 | 
1146 | 114. **How do you document RAG pipeline code?**  
1147 |      Uses docstrings for clarity.  
1148 |      ```python
1149 |      def setup_rag_pipeline(documents):
1150 |          """Sets up a RAG pipeline with LangChain."""
1151 |          embeddings = OpenAIEmbeddings()
1152 |          vector_store = FAISS.from_texts(documents, embeddings)
1153 |          return RetrievalQA.from_chain_type(llm=OpenAI(), retriever=vector_store.as_retriever())
1154 |      ```
1155 | 
1156 | #### Intermediate
1157 | 115. **Write a function to optimize RAG memory usage.**  
1158 |      Clears unused objects.  
1159 |      ```python
1160 |      import gc
1161 |      def optimize_rag_memory(qa_chain, query):
1162 |          response = qa_chain.run(query)
1163 |          gc.collect()
1164 |          return response
1165 |      ```
1166 | 
1167 | 116. **How do you implement unit tests for RAG pipelines?**  
1168 |      Validates components.  
1169 |      ```python
1170 |      import unittest
1171 |      class TestRAG(unittest.TestCase):
1172 |          def test_retrieval(self):
1173 |              vector_store = FAISS.from_texts(["Test"], embeddings)
1174 |              results = vector_store.similarity_search("Test")
1175 |              self.assertGreater(len(results), 0)
1176 |      ```
1177 | 
1178 | 117. **Write a function to create reusable RAG templates.**  
1179 |      Standardizes pipeline setup.  
1180 |      ```python
1181 |      def rag_template(documents, llm_model="text-davinci-003"):
1182 |          embeddings = OpenAIEmbeddings()
1183 |          vector_store = FAISS.from_texts(documents, embeddings)
1184 |          llm = OpenAI(model=llm_model)
1185 |          return RetrievalQA.from_chain_type(llm=llm, retriever=vector_store.as_retriever())
1186 |      ```
1187 | 
1188 | 118. **How do you optimize RAG for batch processing?**  
1189 |      Processes queries in batches.  
1190 |      ```python
1191 |      def batch_rag_process(qa_chain, queries, batch_size=10):
1192 |          results = []
1193 |          for i in range(0, len(queries), batch_size):
1194 |              batch = queries[i:i+batch_size]
1195 |              results.extend([qa_chain.run(q) for q in batch])
1196 |          return results
1197 |      ```
1198 | 
1199 | 119. **Write a function to handle RAG configuration.**  
1200 |      Centralizes settings.  
1201 |      ```python
1202 |      def configure_rag():
1203 |          return {
1204 |              "llm_model": "text-davinci-003",
1205 |              "embedding_model": "text-embedding-ada-002",
1206 |              "vector_store": "faiss"
1207 |          }
1208 |      ```
1209 | 
1210 | 120. **How do you ensure RAG pipeline consistency?**  
1211 |      Standardizes versions and settings.  
1212 |      ```python
1213 |      from langchain import __version__
1214 |      def check_rag_env():
1215 |          print(f"LangChain version: {__version__}")
1216 |      ```
1217 | 
1218 | #### Advanced
1219 | 121. **Write a function to implement RAG pipeline caching.**  
1220 |      Reuses processed data.  
1221 |      ```python
1222 |      from langchain.cache import SQLiteCache
1223 |      def enable_rag_pipeline_cache():
1224 |          langchain.llm_cache = SQLiteCache(database_path="rag_cache.db")
1225 |      ```
1226 | 
1227 | 122. **How do you optimize RAG for high-throughput processing?**  
1228 |      Uses parallel execution.  
1229 |      ```python
1230 |      from joblib import Parallel, delayed
1231 |      def high_throughput_rag(qa_chain, queries):
1232 |          return Parallel(n_jobs=-1)(delayed(qa_chain.run)(q) for q in queries)
1233 |      ```
1234 | 
1235 | 123. **Write a function to implement RAG pipeline versioning.**  
1236 |      Tracks changes in workflows.  
1237 |      ```python
1238 |      import json
1239 |      def version_rag_pipeline(config, version):
1240 |          with open(f"rag_v{version}.json", "w") as f:
1241 |              json.dump(config, f)
1242 |      ```
1243 | 
1244 | 124. **How do you implement RAG pipeline monitoring?**  
1245 |      Logs performance metrics.  
1246 |      ```python
1247 |      import logging
1248 |      def monitored_rag(qa_chain, query):
1249 |          logging.basicConfig(filename="rag.log", level=logging.INFO)
1250 |          start = time.time()
1251 |          response = qa_chain.run(query)
1252 |          logging.info(f"Query: {query}, Latency: {time.time() - start}s")
1253 |          return response
1254 |      ```
1255 | 
1256 | 125. **Write a function to handle RAG scalability.**  
1257 |      Processes large datasets efficiently.  
1258 |      ```python
1259 |      def scalable_rag(qa_chain, queries, chunk_size=100):
1260 |          results = []
1261 |          for i in range(0, len(queries), chunk_size):
1262 |              results.extend(batch_rag_process(qa_chain, queries[i:i+chunk_size]))
1263 |          return results
1264 |      ```
1265 | 
1266 | 126. **How do you implement RAG pipeline automation?**  
1267 |      Scripts end-to-end workflows.  
1268 |      ```python
1269 |      def automate_rag_pipeline(documents, queries):
1270 |          qa_chain = setup_rag_pipeline(documents)
1271 |          responses = batch_rag_process(qa_chain, queries)
1272 |          with open("rag_outputs.json", "w") as f:
1273 |              json.dump(responses, f)
1274 |          return responses
1275 |      ```
1276 | 
1277 | ## Ethical Considerations in LLM and RAG
1278 | 
1279 | ### Basic
1280 | 127. **What are ethical concerns in LLM API usage?**  
1281 |      Includes bias and privacy risks.  
1282 |      ```python
1283 |      def check_response_bias(responses, groups):
1284 |          return {g: len([r for r, g_ in zip(responses, groups) if g_ == g]) / len(responses) for g in set(groups)}
1285 |      ```
1286 | 
1287 | 128. **How do you detect bias in RAG outputs?**  
1288 |      Analyzes group disparities.  
1289 |      ```python
1290 |      def detect_rag_bias(qa_chain, queries, groups):
1291 |          responses = [qa_chain.run(q) for q in queries]
1292 |          return {g: len([r for r, g_ in zip(responses, groups) if g_ == g]) / len(responses) for g in set(groups)}
1293 |      ```
1294 | 
1295 | 129. **What is data privacy in RAG pipelines?**  
1296 |      Protects sensitive documents.  
1297 |      ```python
1298 |      def anonymize_documents(documents):
1299 |          return [doc.replace("sensitive", "[REDACTED]") for doc in documents]
1300 |      ```
1301 | 
1302 | 130. **How do you ensure fairness in RAG pipelines?**  
1303 |      Balances retrieval across groups.  
1304 |      ```python
1305 |      def fair_retrieval(vector_store, query, weights):
1306 |          results = vector_store.similarity_search_with_score(query)
1307 |          return [(doc, score * weights[doc.metadata["group"]]) for doc, score in results]
1308 |      ```
1309 | 
1310 | 131. **What is explainability in LLM and RAG applications?**  
1311 |      Clarifies model decisions.  
1312 |      ```python
1313 |      def explain_rag_response(qa_chain, query):
1314 |          response = qa_chain.run(query)
1315 |          docs = qa_chain.retriever.get_relevant_documents(query)
1316 |          return {"response": response, "retrieved_docs": [doc.page_content for doc in docs]}
1317 |      ```
1318 | 
1319 | 132. **How do you visualize bias in RAG outputs?**  
1320 |      Plots group-wise response distribution.  
1321 |      ```python
1322 |      import matplotlib.pyplot as plt
1323 |      def plot_rag_bias(bias_metrics):
1324 |          plt.bar(bias_metrics.keys(), bias_metrics.values())
1325 |          plt.savefig("rag_bias.png")
1326 |      ```
1327 | 
1328 | #### Intermediate
1329 | 133. **Write a function to mitigate bias in RAG pipelines.**  
1330 |      Reweights retrieved documents.  
1331 |      ```python
1332 |      def mitigate_rag_bias(vector_store, query, group_weights):
1333 |          results = vector_store.similarity_search_with_score(query)
1334 |          return sorted([(doc, score * group_weights[doc.metadata["group"]]) for doc, score in results], key=lambda x: x[1], reverse=True)
1335 |      ```
1336 | 
1337 | 134. **How do you implement differential privacy in RAG?**  
1338 |      Adds noise to embeddings.  
1339 |      ```python
1340 |      import numpy as np
1341 |      def private_embeddings(embeddings, texts, epsilon=0.1):
1342 |          vectors = embeddings.embed_documents(texts)
1343 |          return [v + np.random.normal(0, epsilon, len(v)) for v in vectors]
1344 |      ```
1345 | 
1346 | 135. **Write a function to assess fairness in RAG pipelines.**  
1347 |      Computes group-wise metrics.  
1348 |      ```python
1349 |      def fairness_metrics_rag(qa_chain, queries, groups, references):
1350 |          responses = [qa_chain.run(q) for q in queries]
1351 |          return {g: sum(1 for r, ref, g_ in zip(responses, references, groups) if r == ref and g_ == g) / sum(1 for g_ in groups if g_ == g) for g in set(groups)}
1352 |      ```
1353 | 
1354 | 136. **How do you ensure energy-efficient RAG pipelines?**  
1355 |      Optimizes resource usage.  
1356 |      ```python
1357 |      def efficient_rag(qa_chain, query, max_docs=3):
1358 |          qa_chain.retriever.search_kwargs["k"] = max_docs
1359 |          return qa_chain.run(query)
1360 |      ```
1361 | 
1362 | 137. **Write a function to audit RAG pipeline decisions.**  
1363 |      Logs queries and responses.  
1364 |      ```python
1365 |      import logging
1366 |      def audit_rag(qa_chain, query):
1367 |          logging.basicConfig(filename="rag_audit.log", level=logging.INFO)
1368 |          response = qa_chain.run(query)
1369 |          logging.info(f"Query: {query}, Response: {response}")
1370 |          return response
1371 |      ```
1372 | 
1373 | 138. **How do you visualize fairness metrics in RAG?**  
1374 |      Plots group-wise performance.  
1375 |      ```python
1376 |      import matplotlib.pyplot as plt
1377 |      def plot_fairness_metrics_rag(metrics):
1378 |          plt.bar(metrics.keys(), metrics.values())
1379 |          plt.savefig("rag_fairness.png")
1380 |      ```
1381 | 
1382 | #### Advanced
1383 | 139. **Write a function to implement fairness-aware RAG.**  
1384 |      Uses balanced retrieval.  
1385 |      ```python
1386 |      def fairness_aware_rag(qa_chain, query, group_weights):
1387 |          results = qa_chain.retriever.get_relevant_documents(query)
1388 |          weighted_results = [(doc, group_weights[doc.metadata["group"]]) for doc in results]
1389 |          return qa_chain.run(query, documents=sorted(weighted_results, key=lambda x: x[1], reverse=True))
1390 |      ```
1391 | 
1392 | 140. **How do you implement privacy-preserving RAG?**  
1393 |      Uses encrypted retrieval.  
1394 |      ```python
1395 |      def private_rag(vector_store, query, epsilon=0.1):
1396 |          query_vector = embeddings.embed_query(query)
1397 |          noisy_vector = query_vector + np.random.normal(0, epsilon, len(query_vector))
1398 |          return vector_store.similarity_search_by_vector(noisy_vector)
1399 |      ```
1400 | 
1401 | 141. **Write a function to monitor ethical risks in RAG.**  
1402 |      Tracks bias and fairness metrics.  
1403 |      ```python
1404 |      import logging
1405 |      def monitor_rag_ethics(qa_chain, queries, groups, references):
1406 |          logging.basicConfig(filename="rag_ethics.log", level=logging.INFO)
1407 |          metrics = fairness_metrics_rag(qa_chain, queries, groups, references)
1408 |          logging.info(f"Fairness metrics: {metrics}")
1409 |          return metrics
1410 |      ```
1411 | 
1412 | 142. **How do you implement explainable RAG?**  
1413 |      Provides retrieval context.  
1414 |      ```python
1415 |      def explainable_rag(qa_chain, query):
1416 |          docs = qa_chain.retriever.get_relevant_documents(query)
1417 |          response = qa_chain.run(query)
1418 |          return {"response": response, "context": [doc.page_content for doc in docs]}
1419 |      ```
1420 | 
1421 | 143. **Write a function to ensure regulatory compliance in RAG.**  
1422 |      Logs pipeline metadata.  
1423 |      ```python
1424 |      import json
1425 |      def log_rag_compliance(qa_chain, metadata):
1426 |          with open("rag_compliance.json", "w") as f:
1427 |              json.dump({"pipeline": str(qa_chain), "metadata": metadata}, f)
1428 |      ```
1429 | 
1430 | 144. **How do you implement ethical evaluation in RAG?**  
1431 |      Assesses fairness and robustness.  
1432 |      ```python
1433 |      def ethical_rag_evaluation(qa_chain, queries, groups, references):
1434 |          fairness = fairness_metrics_rag(qa_chain, queries, groups, references)
1435 |          robustness = evaluate_robustness(qa_chain, queries)
1436 |          return {"fairness": fairness, "robustness": robustness}
1437 |      ```
1438 | 
1439 | ## Integration with Other Libraries
1440 | 
1441 | ### Basic
1442 | 145. **How do you integrate LLM APIs with LangChain?**  
1443 |      Uses LangChain for orchestration.  
1444 |      ```python
1445 |      from langchain.llms import OpenAI
1446 |      llm = OpenAI(model="text-davinci-003")
1447 |      ```
1448 | 
1449 | 146. **How do you integrate RAG with Hugging Face?**  
1450 |      Uses Hugging Face embeddings.  
1451 |      ```python
1452 |      from langchain.embeddings import HuggingFaceEmbeddings
1453 |      embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
1454 |      ```
1455 | 
1456 | 147. **How do you use RAG with Matplotlib?**  
1457 |      Visualizes pipeline metrics.  
1458 |      ```python
1459 |      import matplotlib.pyplot as plt
1460 |      def plot_rag_data(data):
1461 |          plt.plot(data)
1462 |          plt.savefig("rag_data.png")
1463 |      ```
1464 | 
1465 | 148. **How do you integrate LLM APIs with FastAPI?**  
1466 |      Serves responses via API.  
1467 |      ```python
1468 |      from fastapi import FastAPI
1469 |      app = FastAPI()
1470 |      client = OpenAI()
1471 |      @app.post("/llm")
1472 |      async def llm_call(prompt: str):
1473 |          return {"response": call_llm(client, prompt)}
1474 |      ```
1475 | 
1476 | 149. **How do you use RAG with Pandas?**  
1477 |      Preprocesses document data.  
1478 |      ```python
1479 |      import pandas as pd
1480 |      def preprocess_with_pandas(df, column="text"):
1481 |          return FAISS.from_texts(df[column].tolist(), embeddings)
1482 |      ```
1483 | 
1484 | 150. **How do you integrate RAG with SQLite?**  
1485 |      Stores document metadata.  
1486 |      ```python
1487 |      import sqlite3
1488 |      def store_metadata(documents, db_path="metadata.db"):
1489 |          conn = sqlite3.connect(db_path)
1490 |          c = conn.cursor()
1491 |          c.execute("CREATE TABLE IF NOT EXISTS docs (id INTEGER PRIMARY KEY, text TEXT)")
1492 |          c.executemany("INSERT INTO docs (text) VALUES (?)", [(doc,) for doc in documents])
1493 |          conn.commit()
1494 |          conn.close()
1495 |      ```
1496 | 
1497 | #### Intermediate
1498 | 151. **Write a function to integrate RAG with LlamaIndex.**  
1499 |      Uses LlamaIndex for indexing.  
1500 |      ```python
1501 |      from llama_index import VectorStoreIndex, SimpleDirectoryReader
1502 |      def setup_llama_rag(directory):
1503 |          documents = SimpleDirectoryReader(directory).load_data()
1504 |          index = VectorStoreIndex.from_documents(documents)
1505 |          return index.as_query_engine()
1506 |      ```
1507 | 
1508 | 152. **How do you integrate LLM APIs with Streamlit?**  
1509 |      Builds interactive apps.  
1510 |      ```python
1511 |      import streamlit as st
1512 |      def llm_streamlit_app(client):
1513 |          st.title("LLM Chat")
1514 |          prompt = st.text_input("Enter prompt")
1515 |          if prompt:
1516 |              response = call_llm(client, prompt)
1517 |              st.write(response)
1518 |      ```
1519 | 
1520 | 153. **Write a function to integrate RAG with Weaviate.**  
1521 |      Uses Weaviate for vector storage.  
1522 |      ```python
1523 |      from langchain.vectorstores import Weaviate
1524 |      def setup_weaviate_rag(documents, embeddings):
1525 |          client = weaviate.Client("http://localhost:8080")
1526 |          return Weaviate.from_documents(documents, embeddings, client=client)
1527 |      ```
1528 | 
1529 | 154. **How do you integrate RAG with SQL databases?**  
1530 |      Stores and queries metadata.  
1531 |      ```python
1532 |      import sqlite3
1533 |      def query_metadata(query, db_path="metadata.db"):
1534 |          conn = sqlite3.connect(db_path)
1535 |          c = conn.cursor()
1536 |          c.execute("SELECT text FROM docs WHERE text LIKE ?", (f"%{query}%",))
1537 |          results = c.fetchall()
1538 |          conn.close()
1539 |          return [r[0] for r in results]
1540 |      ```
1541 | 
1542 | 155. **Write a function to integrate LLM APIs with Celery.**  
1543 |      Runs asynchronous tasks.  
1544 |      ```python
1545 |      from celery import Celery
1546 |      app = Celery("llm_tasks", broker="redis://localhost:6379")
1547 |      @app.task
1548 |      def async_llm_task(prompt):
1549 |          client = OpenAI()
1550 |          return call_llm(client, prompt)
1551 |      ```
1552 | 
1553 | 156. **How do you integrate RAG with Elasticsearch?**  
1554 |      Uses Elasticsearch for retrieval.  
1555 |      ```python
1556 |      from langchain.vectorstores import ElasticsearchStore
1557 |      def setup_elasticsearch_rag(documents, embeddings):
1558 |          return ElasticsearchStore.from_documents(documents, embeddings, es_url="http://localhost:9200")
1559 |      ```
1560 | 
1561 | #### Advanced
1562 | 157. **Write a function to integrate RAG with GraphQL.**  
1563 |      Exposes RAG via GraphQL API.  
1564 |      ```python
1565 |      from ariadne import QueryType, gql, make_executable_schema
1566 |      from ariadne.asgi import GraphQL
1567 |      type_defs = gql("""
1568 |          type Query {
1569 |              rag(query: String!): String
1570 |          }
1571 |      """)
1572 |      query = QueryType()
1573 |      @query.field("rag")
1574 |      def resolve_rag(_, info, query):
1575 |          qa_chain = info.context["qa_chain"]
1576 |          return qa_chain.run(query)
1577 |      schema = make_executable_schema(type_defs, query)
1578 |      app = GraphQL(schema, context_value={"qa_chain": qa_chain})
1579 |      ```
1580 | 
1581 | 158. **How do you integrate RAG with Kubernetes?**  
1582 |      Deploys scalable RAG services.  
1583 |      ```python
1584 |      from kubernetes import client, config
1585 |      def deploy_rag_service():
1586 |          config.load_kube_config()
1587 |          v1 = client.CoreV1Api()
1588 |          service = client.V1Service(
1589 |              metadata=client.V1ObjectMeta(name="rag-service"),
1590 |              spec=client.V1ServiceSpec(
1591 |                  selector={"app": "rag"},
1592 |                  ports=[client.V1ServicePort(port=80)]
1593 |              )
1594 |          )
1595 |          v1.create_namespaced_service(namespace="default", body=service)
1596 |      ```
1597 | 
1598 | 159. **Write a function to integrate RAG with Apache Kafka.**  
1599 |      Processes streaming data.  
1600 |      ```python
1601 |      from kafka import KafkaConsumer
1602 |      def stream_rag_data(qa_chain, topic="rag_queries"):
1603 |          consumer = KafkaConsumer(topic, bootstrap_servers="localhost:9092")
1604 |          for message in consumer:
1605 |              query = message.value.decode("utf-8")
1606 |              yield qa_chain.run(query)
1607 |      ```
1608 | 
1609 | 160. **How do you integrate LLM APIs with Airflow?**  
1610 |      Orchestrates LLM workflows.  
1611 |      ```python
1612 |      from airflow import DAG
1613 |      from airflow.operators.python import PythonOperator
1614 |      from datetime import datetime
1615 |      def llm_task():
1616 |          client = OpenAI()
1617 |          return call_llm(client, "Test prompt")
1618 |      with DAG("llm_dag", start_date=datetime(2025, 1, 1)) as dag:
1619 |          task = PythonOperator(task_id="llm_task", python_callable=llm_task)
1620 |      ```
1621 | 
1622 | 161. **Write a function to integrate RAG with Redis.**  
1623 |      Caches query results.  
1624 |      ```python
1625 |      import redis
1626 |      def cache_rag_results(qa_chain, query):
1627 |          r = redis.Redis(host="localhost", port=6379)
1628 |          cached = r.get(query)
1629 |          if cached:
1630 |              return cached.decode("utf-8")
1631 |          response = qa_chain.run(query)
1632 |          r.set(query, response)
1633 |          return response
1634 |      ```
1635 | 
1636 | 162. **How do you integrate RAG with MLflow?**  
1637 |      Tracks pipeline experiments.  
1638 |      ```python
1639 |      import mlflow
1640 |      def log_rag_experiment(qa_chain, query, metrics):
1641 |          with mlflow.start_run():
1642 |              mlflow.log_param("query", query)
1643 |              for metric, value in metrics.items():
1644 |                  mlflow.log_metric(metric, value)
1645 |      ```
1646 | 
1647 | ## Deployment and Scalability
1648 | 
1649 | ### Basic
1650 | 163. **How do you deploy an LLM API service?**  
1651 |      Uses FastAPI for serving.  
1652 |      ```python
1653 |      from fastapi import FastAPI
1654 |      app = FastAPI()
1655 |      client = OpenAI()
1656 |      @app.post("/llm")
1657 |      async def llm_endpoint(prompt: str):
1658 |          return {"response": call_llm(client, prompt)}
1659 |      ```
1660 | 
1661 | 164. **How do you deploy a RAG pipeline?**  
1662 |      Serves RAG via API.  
1663 |      ```python
1664 |      from fastapi import FastAPI
1665 |      app = FastAPI()
1666 |      qa_chain = setup_rag_pipeline(["Doc"])
1667 |      @app.post("/rag")
1668 |      async def rag_endpoint(query: str):
1669 |          return {"response": qa_chain.run(query)}
1670 |      ```
1671 | 
1672 | 165. **What is model quantization in the context of LLM deployment?**  
1673 |      Reduces model size for efficiency.  
1674 |      ```python
1675 |      from transformers import AutoModelForCausalLM
1676 |      model = AutoModelForCausalLM.from_pretrained("distilgpt2", torch_dtype="int8")
1677 |      ```
1678 | 
1679 | 166. **How do you save a RAG pipeline for deployment?**  
1680 |      Persists vector store and model.  
1681 |      ```python
1682 |      def save_rag_pipeline(qa_chain, path="rag_pipeline"):
1683 |          qa_chain.retriever.vectorstore.save_local(path)
1684 |      ```
1685 | 
1686 | 167. **How do you load a deployed RAG pipeline?**  
1687 |      Restores pipeline state.  
1688 |      ```python
1689 |      from langchain.vectorstores import FAISS
1690 |      def load_rag_pipeline(path="rag_pipeline"):
1691 |          vector_store = FAISS.load_local(path, embeddings)
1692 |          return RetrievalQA.from_chain_type(llm=OpenAI(), retriever=vector_store.as_retriever())
1693 |      ```
1694 | 
1695 | 168. **How do you visualize deployment metrics?**  
1696 |      Plots latency and throughput.  
1697 |      ```python
1698 |      import matplotlib.pyplot as plt
1699 |      def plot_deployment_metrics(latencies, throughputs):
1700 |          plt.plot(latencies, label="Latency")
1701 |          plt.plot(throughputs, label="Throughput")
1702 |          plt.legend()
1703 |          plt.savefig("deployment_metrics.png")
1704 |      ```
1705 | 
1706 | #### Intermediate
1707 | 169. **Write a function to deploy a RAG pipeline with Docker.**  
1708 |      Containerizes the service.  
1709 |      ```python
1710 |      def create_dockerfile():
1711 |          with open("Dockerfile", "w") as f:
1712 |              f.write("""
1713 |              FROM python:3.9
1714 |              COPY . /app
1715 |              WORKDIR /app
1716 |              RUN pip install langchain openai faiss-cpu
1717 |              CMD ["uvicorn", "main:app", "--host", "0.0.0.0", "--port", "80"]
1718 |              """)
1719 |      ```
1720 | 
1721 | 170. **How do you scale a RAG pipeline for production?**  
1722 |      Uses distributed vector stores and load balancing.  
1723 |      ```python
1724 |      from langchain.vectorstores import Weaviate
1725 |      def scale_rag_pipeline(documents, embeddings):
1726 |          client = weaviate.Client("http://weaviate-cluster:8080")
1727 |          vector_store = Weaviate.from_documents(documents, embeddings, client=client)
1728 |          return RetrievalQA.from_chain_type(llm=OpenAI(), retriever=vector_store.as_retriever())
1729 |      ```


--------------------------------------------------------------------------------