├── ch4 ├── eu_ai_act.pdf ├── evaluating_retrival.ipynb ├── embedding.ipynb ├── faiss_ann_search.ipynb ├── Untitled.ipynb ├── inverted_index.ipynb ├── hybrid_sparse_dense.ipynb └── domain_adaptation.ipynb ├── ch9 ├── forex_example │ ├── requirements.txt │ ├── app │ │ ├── chromadb_setup.py │ │ ├── __init__.py │ │ ├── services.py │ │ └── main.py │ └── README.md ├── meeting_stringent_business_regulatory_requirements.ipynb └── auditing_reporting.ipynb ├── ch13 ├── safety_api │ ├── requirements.txt │ ├── schemas.py │ ├── config.py │ ├── INSTALL.md │ ├── main.py │ ├── RUN.md │ └── models.py ├── Calibrated_Output_Filtering.ipynb ├── data_sanitization.ipynb ├── Transparency_Instrumentation.ipynb └── Real_Time_Content_Moderation_System.ipynb ├── LICENSE ├── ch11 ├── semantic_routing.ipynb └── Langchain&Llamaindex.ipynb ├── ch10 ├── Quantization_fp8.ipynb └── Quantization_fp16.ipynb ├── ch14 ├── Quantization_with_RL_Based_LLM.ipynb └── Cross_Modal_Attention_Mechanism.ipynb ├── ch3 └── introduction_to_llm.ipynb ├── ch5 └── Implementing_Hybrid_Search.ipynb ├── ch8 └── fine_tuning_Deepseek_for_a_classification_task.ipynb └── ch7 └── ch8 └── fine_tuning_Deepseek_for_a_classification_task.ipynb /ch4/eu_ai_act.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/PacktPublishing/LLMs-in-Enterprise/HEAD/ch4/eu_ai_act.pdf -------------------------------------------------------------------------------- /ch9/forex_example/requirements.txt: -------------------------------------------------------------------------------- 1 | fastapi 2 | uvicorn 3 | chromadb 4 | sentence-transformers 5 | transformers 6 | requests -------------------------------------------------------------------------------- /ch13/safety_api/requirements.txt: -------------------------------------------------------------------------------- 1 | torch>=2.0.0 2 | transformers>=4.30.0 3 | fastapi>=0.95.0 4 | uvicorn>=0.21.0 5 | pydantic>=1.10.0 -------------------------------------------------------------------------------- /ch9/forex_example/app/chromadb_setup.py: -------------------------------------------------------------------------------- 1 | import chromadb 2 | 3 | def get_chroma_collection(collection_name="forex_data"): 4 | client = chromadb.Client() 5 | return client.create_collection(name=collection_name) 6 | -------------------------------------------------------------------------------- /ch9/forex_example/app/__init__.py: -------------------------------------------------------------------------------- 1 | """ 2 | Initialize the app package. 3 | This file ensures the directory is treated as a Python package. 4 | """ 5 | 6 | from .chromadb_setup import get_chroma_collection 7 | from .services import fetch_forex_data_alpha_vantage, generate_embedding, perform_inference_with_phi_model 8 | from app import get_chroma_collection 9 | -------------------------------------------------------------------------------- /ch13/safety_api/schemas.py: -------------------------------------------------------------------------------- 1 | from pydantic import BaseModel 2 | 3 | class PromptRequest(BaseModel): 4 | prompt: str 5 | 6 | class SafetyAnalysisResult(BaseModel): 7 | toxicity_risk: float 8 | fact_accuracy: int 9 | block: bool 10 | 11 | class APIResponse(BaseModel): 12 | response: dict | None = None 13 | error: str | None = None 14 | reasons: list[str] | None = None 15 | details: dict | None = None -------------------------------------------------------------------------------- /ch13/safety_api/config.py: -------------------------------------------------------------------------------- 1 | # Model configurations 2 | MODEL_CONFIGS = { 3 | "toxicity": { 4 | "model_name": "facebook/roberta-hate-speech-dynabench-r4-target", 5 | "threshold": 0.9 6 | }, 7 | "factcheck": { 8 | "model_name": "digitalepidemiologylab/covid-twitter-bert-v2-finetuned-factcheck", 9 | "false_label": 0 10 | } 11 | } 12 | 13 | # API settings 14 | API_CONFIG = { 15 | "host": "0.0.0.0", 16 | "port": 8000, 17 | "debug": True 18 | } -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2024 Packt 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /ch13/safety_api/INSTALL.md: -------------------------------------------------------------------------------- 1 | # Installation Guide 2 | 3 | ## Prerequisites 4 | 5 | - Python 3.8+ 6 | - pip (Python package manager) 7 | - 8GB+ RAM (for model loading) 8 | - 5GB+ free disk space (for model downloads) 9 | 10 | ## Installation Steps 11 | 12 | 1. **Clone the repository**: 13 | ```bash 14 | git clone https://github.com/yourusername/safety-api.git 15 | cd safety-api 16 | ``` 17 | 18 | 2. **Create a virtual environment (recommended)**: 19 | ```bash 20 | python -m venv venv 21 | source venv/bin/activate # On Windows: venv\Scripts\activate 22 | ``` 23 | 24 | 3. **Install dependencies**: 25 | ```bash 26 | pip install -r requirements.txt 27 | ``` 28 | 29 | Or install manually if you don't have a requirements file: 30 | ```bash 31 | pip install torch transformers fastapi uvicorn pydantic 32 | ``` 33 | 34 | 4. **Download models (automatically on first run)**: 35 | ```bash 36 | python -c "from transformers import AutoModel; AutoModel.from_pretrained('facebook/roberta-hate-speech-dynabench-r4-target')" 37 | ``` 38 | 39 | ## Hardware Recommendations 40 | 41 | - For better performance, use a machine with: 42 | - 16GB+ RAM 43 | - NVIDIA GPU with CUDA support 44 | - SSD storage 45 | 46 | ## Troubleshooting 47 | 48 | - **Out of memory errors**: 49 | - Try reducing batch size in the code 50 | - Use `pip install torch --extra-index-url https://download.pytorch.org/whl/cu117` for GPU support 51 | 52 | - **Slow performance**: 53 | - Add `device_map="auto"` when loading models 54 | - Enable GPU if available -------------------------------------------------------------------------------- /ch13/safety_api/main.py: -------------------------------------------------------------------------------- 1 | from fastapi import FastAPI, HTTPException 2 | from .models import SafetyEnsemble, DummyLLM 3 | from .schemas import PromptRequest, APIResponse, SafetyAnalysisResult 4 | from .config import API_CONFIG 5 | import uvicorn 6 | 7 | app = FastAPI() 8 | llm_generator = DummyLLM() 9 | 10 | @app.post('/generate', response_model=APIResponse) 11 | async def generate_safe(request: PromptRequest): 12 | try: 13 | safety = SafetyEnsemble() 14 | analysis_result = safety.analyze(request.prompt) 15 | 16 | if analysis_result['block']: 17 | reasons = self._get_block_reasons(analysis_result) 18 | return APIResponse( 19 | error="Content violation detected", 20 | reasons=reasons, 21 | details=analysis_result 22 | ) 23 | else: 24 | return APIResponse( 25 | response=llm_generator.generate(request.prompt), 26 | details=analysis_result 27 | ) 28 | except Exception as e: 29 | raise HTTPException(status_code=500, detail=str(e)) 30 | 31 | def _get_block_reasons(self, analysis: dict) -> list[str]: 32 | reasons = [] 33 | if analysis['toxicity_risk'] > MODEL_CONFIGS["toxicity"]["threshold"]: 34 | reasons.append("High toxicity") 35 | if analysis['fact_accuracy'] == MODEL_CONFIGS["factcheck"]["false_label"]: 36 | reasons.append("Factual inaccuracy") 37 | return reasons 38 | 39 | if __name__ == "__main__": 40 | uvicorn.run( 41 | app, 42 | host=API_CONFIG["host"], 43 | port=API_CONFIG["port"], 44 | debug=API_CONFIG["debug"] 45 | ) -------------------------------------------------------------------------------- /ch9/forex_example/app/services.py: -------------------------------------------------------------------------------- 1 | import logging 2 | import requests 3 | from sentence_transformers import SentenceTransformer 4 | from transformers import AutoTokenizer, AutoModelForCausalLM 5 | 6 | # Fetch Forex Data 7 | def fetch_forex_data_alpha_vantage(from_currency, to_currency, api_key): 8 | url = "https://www.alphavantage.co/query" 9 | params = { 10 | "function": "FX_DAILY", 11 | "from_symbol": from_currency, 12 | "to_symbol": to_currency, 13 | "apikey": api_key, 14 | } 15 | 16 | try: 17 | response = requests.get(url, params=params) 18 | response.raise_for_status() 19 | data = response.json() 20 | 21 | if "Time Series FX (Daily)" in data: 22 | latest_data = next(iter(data["Time Series FX (Daily)"].values())) 23 | return [{ 24 | "currency_pair": f"{from_currency}/{to_currency}", 25 | "rate": latest_data["4. close"], 26 | }] 27 | else: 28 | logging.error("Forex data not found.") 29 | return [] 30 | except requests.exceptions.RequestException as e: 31 | logging.error(f"API error: {e}") 32 | return [] 33 | 34 | # Generate Embedding for Query 35 | def generate_embedding(query): 36 | model = SentenceTransformer('all-MiniLM-L6-v2') 37 | return model.encode(query).tolist() 38 | 39 | # Perform Inference 40 | def perform_inference_with_phi_model(query): 41 | model_name = "microsoft/phi-1_5" 42 | tokenizer = AutoTokenizer.from_pretrained(model_name) 43 | model = AutoModelForCausalLM.from_pretrained(model_name) 44 | 45 | inputs = tokenizer(query, return_tensors="pt") 46 | outputs = model.generate(inputs["input_ids"], max_length=100, num_return_sequences=1, do_sample=True) 47 | 48 | return tokenizer.decode(outputs[0], skip_special_tokens=True) 49 | -------------------------------------------------------------------------------- /ch4/evaluating_retrival.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "id": "ff0f3a52-13d8-48b5-aa50-983e5ee6e9c3", 7 | "metadata": {}, 8 | "outputs": [ 9 | { 10 | "name": "stdout", 11 | "output_type": "stream", 12 | "text": [ 13 | "Relevant docs: {1, 3}\n", 14 | "Retrieved docs: [1, 2, 4]\n", 15 | "Precision: 0.33\n", 16 | "Recall: 0.50\n" 17 | ] 18 | } 19 | ], 20 | "source": [ 21 | "# Example: Calculating Precision and Recall\n", 22 | "\n", 23 | "# Suppose we have a set of truly relevant document IDs for a particular query:\n", 24 | "relevant_docs = {1, 3}\n", 25 | "\n", 26 | "# The system retrieves a ranked list of documents:\n", 27 | "retrieved_docs = [1, 2, 4] # for example\n", 28 | "\n", 29 | "precision = len(set(retrieved_docs) & relevant_docs) / len(retrieved_docs)\n", 30 | "recall = len(set(retrieved_docs) & relevant_docs) / len(relevant_docs)\n", 31 | "\n", 32 | "print(\"Relevant docs:\", relevant_docs)\n", 33 | "print(\"Retrieved docs:\", retrieved_docs)\n", 34 | "print(f\"Precision: {precision:.2f}\")\n", 35 | "print(f\"Recall: {recall:.2f}\")\n" 36 | ] 37 | }, 38 | { 39 | "cell_type": "code", 40 | "execution_count": null, 41 | "id": "01a0a146-4ece-4af9-b038-5fdec1e9273c", 42 | "metadata": {}, 43 | "outputs": [], 44 | "source": [] 45 | } 46 | ], 47 | "metadata": { 48 | "kernelspec": { 49 | "display_name": "Python 3 (ipykernel)", 50 | "language": "python", 51 | "name": "python3" 52 | }, 53 | "language_info": { 54 | "codemirror_mode": { 55 | "name": "ipython", 56 | "version": 3 57 | }, 58 | "file_extension": ".py", 59 | "mimetype": "text/x-python", 60 | "name": "python", 61 | "nbconvert_exporter": "python", 62 | "pygments_lexer": "ipython3", 63 | "version": "3.13.0" 64 | } 65 | }, 66 | "nbformat": 4, 67 | "nbformat_minor": 5 68 | } 69 | -------------------------------------------------------------------------------- /ch9/forex_example/app/main.py: -------------------------------------------------------------------------------- 1 | from fastapi import FastAPI, HTTPException 2 | from app.chromadb_setup import get_chroma_collection 3 | from app.services import ( 4 | fetch_forex_data_alpha_vantage, 5 | generate_embedding, 6 | perform_inference_with_phi_model, 7 | ) 8 | 9 | app = FastAPI() 10 | 11 | # Initialize ChromaDB 12 | collection = get_chroma_collection() 13 | 14 | # --- Health Check Endpoint --- 15 | @app.get("/health") 16 | def health_check(): 17 | """ 18 | Health check to ensure the service is running. 19 | """ 20 | return {"status": "ok"} 21 | 22 | # --- Main Endpoint --- 23 | @app.get("/process") 24 | def process( 25 | from_currency: str = "USD", 26 | to_currency: str = "EUR", 27 | api_key: str = "YOUR_API_KEY", # please create an api key from here https://alphavantage.co/support/#api-key 28 | query: str = "What is the current exchange rate for USD to EUR?", 29 | ): 30 | """ 31 | Combines fetching forex data, storing it in ChromaDB, 32 | querying ChromaDB, and performing inference. 33 | """ 34 | # Step 1: Fetch Forex Data 35 | forex_data = fetch_forex_data_alpha_vantage(from_currency, to_currency, api_key) 36 | if not forex_data: 37 | raise HTTPException(status_code=404, detail="Forex data not found.") 38 | 39 | # Step 2: Store Forex Data in ChromaDB 40 | for entry in forex_data: 41 | collection.add( 42 | documents=[f"Rate: {entry['rate']}"], 43 | metadatas=[{"currency_pair": entry["currency_pair"], "rate": entry["rate"]}], 44 | ids=[entry["currency_pair"]], 45 | ) 46 | 47 | # Step 3: Query ChromaDB 48 | query_embedding = generate_embedding(query) 49 | db_results = collection.query(query_embeddings=[query_embedding], n_results=3) 50 | 51 | # Step 4: Perform Inference with vLLM Model 52 | inference_result = perform_inference_with_phi_model(query) 53 | 54 | return { 55 | "forex_data": forex_data, 56 | "db_results": db_results, 57 | "inference_result": inference_result, 58 | } 59 | -------------------------------------------------------------------------------- /ch4/embedding.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 4, 6 | "id": "3331547d-6c91-4dd5-9906-e19add61713e", 7 | "metadata": {}, 8 | "outputs": [ 9 | { 10 | "name": "stdout", 11 | "output_type": "stream", 12 | "text": [ 13 | "Similarity with sentence 2: 0.5113641619682312\n", 14 | "Similarity with sentence 3: 0.0436665304005146\n" 15 | ] 16 | } 17 | ], 18 | "source": [ 19 | "from sentence_transformers import SentenceTransformer, util\n", 20 | "\n", 21 | "# Load a pre-trained embedding model\n", 22 | "model = SentenceTransformer('all-MiniLM-L6-v2')\n", 23 | "\n", 24 | "# Define some example sentences\n", 25 | "sentences = [\n", 26 | " \"The car is parked in the garage.\",\n", 27 | " \"A vehicle is inside the building where cars are kept.\",\n", 28 | " \"The weather is sunny and bright today.\"\n", 29 | "]\n", 30 | "\n", 31 | "# Encode the sentences into embeddings\n", 32 | "embeddings = model.encode(sentences, convert_to_tensor=True)\n", 33 | "\n", 34 | "# Compute cosine similarities between the first sentence and the others\n", 35 | "similarities = util.cos_sim(embeddings[0], embeddings[1:])\n", 36 | "\n", 37 | "sim_with_sentence2 = similarities[0,0].item()\n", 38 | "sim_with_sentence3 = similarities[0,1].item()\n", 39 | "\n", 40 | "print(\"Similarity with sentence 2:\", sim_with_sentence2)\n", 41 | "print(\"Similarity with sentence 3:\", sim_with_sentence3)\n" 42 | ] 43 | }, 44 | { 45 | "cell_type": "code", 46 | "execution_count": null, 47 | "id": "db89e6ca-bb59-4457-bfa4-f040aaf7131c", 48 | "metadata": {}, 49 | "outputs": [], 50 | "source": [] 51 | } 52 | ], 53 | "metadata": { 54 | "kernelspec": { 55 | "display_name": "Python 3 (ipykernel)", 56 | "language": "python", 57 | "name": "python3" 58 | }, 59 | "language_info": { 60 | "codemirror_mode": { 61 | "name": "ipython", 62 | "version": 3 63 | }, 64 | "file_extension": ".py", 65 | "mimetype": "text/x-python", 66 | "name": "python", 67 | "nbconvert_exporter": "python", 68 | "pygments_lexer": "ipython3", 69 | "version": "3.13.0" 70 | } 71 | }, 72 | "nbformat": 4, 73 | "nbformat_minor": 5 74 | } 75 | -------------------------------------------------------------------------------- /ch4/faiss_ann_search.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "id": "d2c6cd87-245e-420e-9a66-71f2320264df", 7 | "metadata": {}, 8 | "outputs": [ 9 | { 10 | "name": "stdout", 11 | "output_type": "stream", 12 | "text": [ 13 | "Query Vector:\n", 14 | " [[0.86310345 0.6232981 0.33089802 0.06355835 0.31098232 0.32518333\n", 15 | " 0.72960615 0.63755745]]\n", 16 | "Indices of nearest neighbors: [[2 3 7]]\n", 17 | "Distances: [[0.7740041 0.8021247 1.0303653]]\n" 18 | ] 19 | } 20 | ], 21 | "source": [ 22 | "# Example: Using FAISS for ANN search\n", 23 | "# Make sure to have FAISS installed: !pip install faiss-cpu\n", 24 | "\n", 25 | "import numpy as np\n", 26 | "import faiss\n", 27 | "\n", 28 | "# Create a random dataset of embeddings\n", 29 | "np.random.seed(42)\n", 30 | "dimension = 8\n", 31 | "num_docs = 10\n", 32 | "data = np.random.random((num_docs, dimension)).astype('float32')\n", 33 | "\n", 34 | "# Build a FAISS index (exact L2 index for demonstration)\n", 35 | "index = faiss.IndexFlatL2(dimension)\n", 36 | "index.add(data)\n", 37 | "\n", 38 | "# Query vector\n", 39 | "query_vec = np.random.random((1, dimension)).astype('float32')\n", 40 | "\n", 41 | "# Perform ANN search: find top 3 nearest neighbors\n", 42 | "k = 3\n", 43 | "distances, indices = index.search(query_vec, k)\n", 44 | "\n", 45 | "print(\"Query Vector:\\n\", query_vec)\n", 46 | "print(\"Indices of nearest neighbors:\", indices)\n", 47 | "print(\"Distances:\", distances)" 48 | ] 49 | }, 50 | { 51 | "cell_type": "code", 52 | "execution_count": null, 53 | "id": "b23efb23-507b-4ed4-844e-d3b5f7773f91", 54 | "metadata": {}, 55 | "outputs": [], 56 | "source": [] 57 | } 58 | ], 59 | "metadata": { 60 | "kernelspec": { 61 | "display_name": "Python 3 (ipykernel)", 62 | "language": "python", 63 | "name": "python3" 64 | }, 65 | "language_info": { 66 | "codemirror_mode": { 67 | "name": "ipython", 68 | "version": 3 69 | }, 70 | "file_extension": ".py", 71 | "mimetype": "text/x-python", 72 | "name": "python", 73 | "nbconvert_exporter": "python", 74 | "pygments_lexer": "ipython3", 75 | "version": "3.9.12" 76 | } 77 | }, 78 | "nbformat": 4, 79 | "nbformat_minor": 5 80 | } 81 | -------------------------------------------------------------------------------- /ch13/safety_api/RUN.md: -------------------------------------------------------------------------------- 1 | # Running the Safety API 2 | 3 | ## Starting the Server 4 | 5 | 1. **Navigate to project directory**: 6 | ```bash 7 | cd safety-api 8 | ``` 9 | 10 | 2. **Activate virtual environment**: 11 | ```bash 12 | source venv/bin/activate # On Windows: venv\Scripts\activate 13 | ``` 14 | 15 | 3. **Run the API server**: 16 | ```bash 17 | python -m safety_api.main 18 | ``` 19 | 20 | Expected output: 21 | ``` 22 | INFO: Started server process [12345] 23 | INFO: Waiting for application startup. 24 | INFO: Application startup complete. 25 | INFO: Uvicorn running on http://0.0.0.0:8000 (Press CTRL+C to quit) 26 | ``` 27 | 28 | ## Testing the API 29 | 30 | ### Using cURL 31 | 32 | 1. **Test with safe content**: 33 | ```bash 34 | curl -X POST "http://localhost:8000/generate" \ 35 | -H "Content-Type: application/json" \ 36 | -d '{"prompt":"Explain quantum computing"}' 37 | ``` 38 | 39 | 2. **Test with potentially toxic content**: 40 | ```bash 41 | curl -X POST "http://localhost:8000/generate" \ 42 | -H "Content-Type: application/json" \ 43 | -d '{"prompt":"I hate all people from this group"}' 44 | ``` 45 | 46 | ### Using Python 47 | 48 | ```python 49 | import requests 50 | 51 | response = requests.post( 52 | "http://localhost:8000/generate", 53 | json={"prompt": "The Earth is flat"} 54 | ) 55 | print(response.json()) 56 | ``` 57 | 58 | ## API Endpoints 59 | 60 | | Endpoint | Method | Description | 61 | |----------|--------|-------------| 62 | | `/generate` | POST | Analyze and generate safe responses | 63 | 64 | ## Request Format 65 | ```json 66 | { 67 | "prompt": "Your text to analyze" 68 | } 69 | ``` 70 | 71 | ## Response Format 72 | Successful response: 73 | ```json 74 | { 75 | "response": { 76 | "generated_text": "Safe response..." 77 | }, 78 | "details": { 79 | "toxicity_risk": 0.12, 80 | "fact_accuracy": 2, 81 | "block": false 82 | } 83 | } 84 | ``` 85 | 86 | Error response: 87 | ```json 88 | { 89 | "error": "Content violation detected", 90 | "reasons": ["High toxicity"], 91 | "details": { 92 | "toxicity_risk": 0.95, 93 | "fact_accuracy": 1, 94 | "block": true 95 | } 96 | } 97 | ``` 98 | 99 | ## Stopping the Server 100 | Press `CTRL+C` in the terminal where the server is running. -------------------------------------------------------------------------------- /ch4/Untitled.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "id": "bfbed3cb-1fa4-49a9-9405-cc789762a790", 7 | "metadata": {}, 8 | "outputs": [ 9 | { 10 | "name": "stdout", 11 | "output_type": "stream", 12 | "text": [ 13 | "Query: european quarterly revenue\n", 14 | "Rank 1: annual revenue growth in europe (distance: 0.4208)\n", 15 | "Rank 2: quarterly report on growth and revenue (distance: 0.7865)\n" 16 | ] 17 | } 18 | ], 19 | "source": [ 20 | "from sentence_transformers import SentenceTransformer\n", 21 | "import faiss\n", 22 | "import numpy as np\n", 23 | "\n", 24 | "corpus = [\n", 25 | " \"annual revenue growth in europe\",\n", 26 | " \"europe sales stable last quarter\",\n", 27 | " \"quarterly report on growth and revenue\"\n", 28 | "]\n", 29 | "\n", 30 | "model = SentenceTransformer('all-MiniLM-L6-v2')\n", 31 | "doc_embeddings = model.encode(corpus, convert_to_tensor=False)\n", 32 | "\n", 33 | "# Convert to float32 for FAISS\n", 34 | "doc_embeddings = np.array(doc_embeddings, dtype='float32')\n", 35 | "\n", 36 | "dimension = doc_embeddings.shape[1]\n", 37 | "index = faiss.IndexFlatL2(dimension)\n", 38 | "index.add(doc_embeddings)\n", 39 | "\n", 40 | "query = \"european quarterly revenue\"\n", 41 | "query_embedding = model.encode([query], convert_to_tensor=False)\n", 42 | "query_embedding = np.array(query_embedding, dtype='float32')\n", 43 | "\n", 44 | "k = 2\n", 45 | "distances, indices = index.search(query_embedding, k)\n", 46 | "print(\"Query:\", query)\n", 47 | "for i, idx in enumerate(indices[0]):\n", 48 | " print(f\"Rank {i+1}: {corpus[idx]} (distance: {distances[0][i]:.4f})\")" 49 | ] 50 | }, 51 | { 52 | "cell_type": "code", 53 | "execution_count": null, 54 | "id": "d7491eb6-bcaa-4ba3-ac19-cde89d830ecc", 55 | "metadata": {}, 56 | "outputs": [], 57 | "source": [] 58 | } 59 | ], 60 | "metadata": { 61 | "kernelspec": { 62 | "display_name": "Python 3 (ipykernel)", 63 | "language": "python", 64 | "name": "python3" 65 | }, 66 | "language_info": { 67 | "codemirror_mode": { 68 | "name": "ipython", 69 | "version": 3 70 | }, 71 | "file_extension": ".py", 72 | "mimetype": "text/x-python", 73 | "name": "python", 74 | "nbconvert_exporter": "python", 75 | "pygments_lexer": "ipython3", 76 | "version": "3.13.0" 77 | } 78 | }, 79 | "nbformat": 4, 80 | "nbformat_minor": 5 81 | } 82 | -------------------------------------------------------------------------------- /ch13/safety_api/models.py: -------------------------------------------------------------------------------- 1 | from transformers import AutoTokenizer, AutoModelForSequenceClassification 2 | import torch 3 | from .config import MODEL_CONFIGS 4 | 5 | class SafetyEnsemble: 6 | def __init__(self): 7 | try: 8 | # Initialize toxicity model 9 | self.toxicity_tokenizer = AutoTokenizer.from_pretrained( 10 | MODEL_CONFIGS["toxicity"]["model_name"]) 11 | self.toxicity_model = AutoModelForSequenceClassification.from_pretrained( 12 | MODEL_CONFIGS["toxicity"]["model_name"]) 13 | 14 | # Initialize fact-checking model 15 | self.factcheck_tokenizer = AutoTokenizer.from_pretrained( 16 | MODEL_CONFIGS["factcheck"]["model_name"]) 17 | self.factcheck_model = AutoModelForSequenceClassification.from_pretrained( 18 | MODEL_CONFIGS["factcheck"]["model_name"]) 19 | 20 | except Exception as e: 21 | raise RuntimeError(f"Model loading failed: {str(e)}") 22 | 23 | def analyze(self, text: str) -> dict: 24 | try: 25 | # Toxicity analysis 26 | tox_inputs = self.toxicity_tokenizer( 27 | text, return_tensors="pt", truncation=True, max_length=512) 28 | tox_score = self._get_toxicity_score(tox_inputs) 29 | 30 | # Fact-checking analysis 31 | fc_inputs = self.factcheck_tokenizer( 32 | text, return_tensors="pt", truncation=True, max_length=512) 33 | veracity = self._get_veracity_score(fc_inputs) 34 | 35 | return { 36 | "toxicity_risk": tox_score, 37 | "fact_accuracy": veracity, 38 | "block": (tox_score > MODEL_CONFIGS["toxicity"]["threshold"] or 39 | veracity == MODEL_CONFIGS["factcheck"]["false_label"]) 40 | } 41 | except Exception as e: 42 | raise RuntimeError(f"Analysis failed: {str(e)}") 43 | 44 | def _get_toxicity_score(self, inputs): 45 | self.toxicity_model.eval() 46 | with torch.no_grad(): 47 | outputs = self.toxicity_model(**inputs) 48 | return torch.sigmoid(outputs.logits).item() 49 | 50 | def _get_veracity_score(self, inputs): 51 | self.factcheck_model.eval() 52 | with torch.no_grad(): 53 | outputs = self.factcheck_model(**inputs) 54 | return torch.argmax(outputs.logits).item() 55 | 56 | 57 | class DummyLLM: 58 | def generate(self, prompt: str) -> dict: 59 | return {"generated_text": f"This is a safe response to: '{prompt}'."} -------------------------------------------------------------------------------- /ch4/inverted_index.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "id": "66d81606-13a2-4167-93a2-52f9ce9d817c", 7 | "metadata": {}, 8 | "outputs": [ 9 | { 10 | "name": "stdout", 11 | "output_type": "stream", 12 | "text": [ 13 | "Query: europe revenue\n", 14 | "Candidate documents: {1}\n" 15 | ] 16 | } 17 | ], 18 | "source": [ 19 | "# Example: Building a simple inverted index and querying it\n", 20 | "\n", 21 | "documents = {\n", 22 | " 1: \"annual revenue growth in europe\",\n", 23 | " 2: \"europe sales stable last quarter\",\n", 24 | " 3: \"quarterly report on growth and revenue\"\n", 25 | "}\n", 26 | "\n", 27 | "def tokenize(text):\n", 28 | " return text.lower().split()\n", 29 | "\n", 30 | "# Build inverted index: term -> sorted list of doc_ids\n", 31 | "inverted_index = {}\n", 32 | "for doc_id, text in documents.items():\n", 33 | " terms = tokenize(text)\n", 34 | " for term in terms:\n", 35 | " if term not in inverted_index:\n", 36 | " inverted_index[term] = []\n", 37 | " inverted_index[term].append(doc_id)\n", 38 | "\n", 39 | "# Querying the inverted index: a simple AND query\n", 40 | "query = \"europe revenue\"\n", 41 | "query_terms = tokenize(query)\n", 42 | "\n", 43 | "# Get postings lists for each term\n", 44 | "posting_lists = [set(inverted_index.get(term, [])) for term in query_terms if term in inverted_index]\n", 45 | "\n", 46 | "if posting_lists:\n", 47 | " candidate_docs = set.intersection(*posting_lists)\n", 48 | "else:\n", 49 | " candidate_docs = set()\n", 50 | "\n", 51 | "print(\"Query:\", query)\n", 52 | "print(\"Candidate documents:\", candidate_docs) # Expected: doc_id 1 is a strong candidate\n" 53 | ] 54 | }, 55 | { 56 | "cell_type": "code", 57 | "execution_count": null, 58 | "id": "07e54ee4-74c1-45d4-9f01-f784cde0f0fc", 59 | "metadata": {}, 60 | "outputs": [], 61 | "source": [] 62 | } 63 | ], 64 | "metadata": { 65 | "kernelspec": { 66 | "display_name": "Python 3 (ipykernel)", 67 | "language": "python", 68 | "name": "python3" 69 | }, 70 | "language_info": { 71 | "codemirror_mode": { 72 | "name": "ipython", 73 | "version": 3 74 | }, 75 | "file_extension": ".py", 76 | "mimetype": "text/x-python", 77 | "name": "python", 78 | "nbconvert_exporter": "python", 79 | "pygments_lexer": "ipython3", 80 | "version": "3.9.12" 81 | } 82 | }, 83 | "nbformat": 4, 84 | "nbformat_minor": 5 85 | } 86 | -------------------------------------------------------------------------------- /ch9/forex_example/README.md: -------------------------------------------------------------------------------- 1 | # Forex Data & Intelligent Query System 2 | 3 | ## Problem Statement 4 | The goal of this project is to create a system capable of fetching real-time foreign exchange (forex) rates, analyzing them, and answering user queries using natural language processing (NLP). Key challenges include: 5 | 6 | - Fetching accurate and up-to-date forex data from APIs. 7 | - Efficiently storing forex data for rapid querying and analysis. 8 | - Leveraging intelligent language models for insightful, query-based responses. 9 | 10 | ## Solution 11 | The solution integrates **FastAPI** for building web services, **ChromaDB** for storing forex data, and **LLMs model** with Hugging Face's pre-trained models for inference. This system fetches forex data, stores it, and provides NLP-powered insights based on user queries. 12 | 13 | - **FastAPI** is used for handling HTTP requests. 14 | - **ChromaDB** efficiently stores forex data and enables quick retrieval. 15 | 16 | 17 | ## Installation 18 | 19 | ### Requirements 20 | To run this project, ensure you have the following installed: 21 | 22 | - Python 3.x 23 | - FastAPI 24 | - ChromaDB 25 | - Sentence Transformers 26 | - Requests (for fetching forex data) 27 | 28 | ### Installation Steps 29 | 30 | 1. **Clone the repository:** 31 | 32 | 2. **Install dependencies:** 33 | You can install the required packages using `pip` or `pipx` (if you're using pipx to isolate environments). 34 | 35 | With `pip`: 36 | ```bash 37 | pip install -r requirements.txt 38 | ``` 39 | 40 | 3. **Run the application:** 41 | Start the FastAPI server: 42 | 43 | ```bash 44 | uvicorn main --reload 45 | ``` 46 | 47 | ## Usage 48 | 49 | ### Health Check Endpoint 50 | - **URL**: `/health` 51 | - **Method**: `GET` 52 | - **Description**: Use this endpoint to check if the API is running. 53 | - **Response**: A simple status message indicating the system is up and running. 54 | 55 | ### Process Endpoint 56 | - **URL**: `/process` 57 | - **Method**: `GET` 58 | - **Parameters**: 59 | - `from_currency`: (e.g., "USD") 60 | - `to_currency`: (e.g., "EUR") 61 | - `api_key`: Your Alpha Vantage API key. 62 | - `query`: The natural language query (e.g., "What is the current exchange rate for USD to EUR?"). 63 | - **Description**: This endpoint fetches the latest forex data, stores it in ChromaDB, and uses the Hugging Face model for intelligent inference based on the user's query. 64 | - **Response**: A JSON object containing the fetched forex data, database query results, and inference results. 65 | 66 | ## Testing with Postman 67 | 68 | ### Health Endpoint 69 | 1. Open Postman and create a new `GET` request. 70 | 2. Use the URL `http://localhost:8000/health`. 71 | 3. Click **Send** to check the health of the API. 72 | 73 | ### Process Endpoint 74 | 1. Open Postman and create a new `GET` request. 75 | 2. Use the URL `http://localhost:8000/process`. 76 | 3. Under **Params**, add the following keys: 77 | - `from_currency`: e.g., `USD` 78 | - `to_currency`: e.g., `EUR` 79 | - `api_key`: Your API key. 80 | - `query`: e.g., `"What is the current exchange rate for USD to EUR?"` 81 | 4. Click **Send** to get the forex data and intelligent response. 82 | 83 | ## License 84 | MIT License. See LICENSE file for details. 85 | 86 | -------------------------------------------------------------------------------- /ch4/hybrid_sparse_dense.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 1, 6 | "id": "c4842b9a-8a38-4c67-a6cb-bb6118a13092", 7 | "metadata": {}, 8 | "outputs": [ 9 | { 10 | "name": "stdout", 11 | "output_type": "stream", 12 | "text": [ 13 | "Query: european quarterly revenue\n", 14 | "Sparse Scores: [0.07075497 0. 0.54872484]\n", 15 | "Dense Scores: [0.7896244 0.6044986 0.6067592]\n", 16 | "Combined Scores: [0.86037936 0.60449862 1.15548403]\n", 17 | "\n", 18 | "Ranked Documents by combined score:\n", 19 | "Doc 2: quarterly report on growth and revenue\n", 20 | "Doc 0: annual revenue growth in europe\n", 21 | "Doc 1: europe sales stable last quarter\n" 22 | ] 23 | } 24 | ], 25 | "source": [ 26 | "# Example: Combining sparse and dense retrieval results\n", 27 | "# Install required libraries if needed\n", 28 | "# !pip install rank_bm25 sentence-transformers\n", 29 | "\n", 30 | "from rank_bm25 import BM25Okapi\n", 31 | "from sentence_transformers import SentenceTransformer, util\n", 32 | "import numpy as np\n", 33 | "\n", 34 | "corpus = [\n", 35 | " \"annual revenue growth in europe\",\n", 36 | " \"europe sales stable last quarter\",\n", 37 | " \"quarterly report on growth and revenue\"\n", 38 | "]\n", 39 | "\n", 40 | "# Sparse retrieval with BM25\n", 41 | "tokenized_corpus = [doc.lower().split() for doc in corpus]\n", 42 | "bm25 = BM25Okapi(tokenized_corpus)\n", 43 | "\n", 44 | "# Dense retrieval with sentence-transformers\n", 45 | "model = SentenceTransformer('all-MiniLM-L6-v2')\n", 46 | "doc_embeddings = model.encode(corpus, convert_to_tensor=True)\n", 47 | "\n", 48 | "query = \"european quarterly revenue\"\n", 49 | "query_embedding = model.encode(query, convert_to_tensor=True)\n", 50 | "\n", 51 | "# Dense retrieval scores\n", 52 | "dense_scores = util.cos_sim(query_embedding, doc_embeddings)[0].cpu().numpy()\n", 53 | "\n", 54 | "# Sparse retrieval scores\n", 55 | "sparse_scores = bm25.get_scores(query.lower().split())\n", 56 | "\n", 57 | "# Combine scores (simple linear combination)\n", 58 | "combined_scores = dense_scores + sparse_scores\n", 59 | "\n", 60 | "# Sort documents by combined score\n", 61 | "ranked_indices = np.argsort(-combined_scores) # descending order\n", 62 | "\n", 63 | "print(\"Query:\", query)\n", 64 | "print(\"Sparse Scores:\", sparse_scores)\n", 65 | "print(\"Dense Scores:\", dense_scores)\n", 66 | "print(\"Combined Scores:\", combined_scores)\n", 67 | "print(\"\\nRanked Documents by combined score:\")\n", 68 | "for idx in ranked_indices:\n", 69 | " print(f\"Doc {idx}: {corpus[idx]}\")\n" 70 | ] 71 | }, 72 | { 73 | "cell_type": "code", 74 | "execution_count": null, 75 | "id": "1d82b8f7-0aab-48ce-96a8-255ecf9ed932", 76 | "metadata": {}, 77 | "outputs": [], 78 | "source": [] 79 | } 80 | ], 81 | "metadata": { 82 | "kernelspec": { 83 | "display_name": "Python 3 (ipykernel)", 84 | "language": "python", 85 | "name": "python3" 86 | }, 87 | "language_info": { 88 | "codemirror_mode": { 89 | "name": "ipython", 90 | "version": 3 91 | }, 92 | "file_extension": ".py", 93 | "mimetype": "text/x-python", 94 | "name": "python", 95 | "nbconvert_exporter": "python", 96 | "pygments_lexer": "ipython3", 97 | "version": "3.9.12" 98 | } 99 | }, 100 | "nbformat": 4, 101 | "nbformat_minor": 5 102 | } 103 | -------------------------------------------------------------------------------- /ch11/semantic_routing.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "nbformat": 4, 3 | "nbformat_minor": 0, 4 | "metadata": { 5 | "colab": { 6 | "provenance": [] 7 | }, 8 | "kernelspec": { 9 | "name": "python3", 10 | "display_name": "Python 3" 11 | }, 12 | "language_info": { 13 | "name": "python" 14 | } 15 | }, 16 | "cells": [ 17 | { 18 | "cell_type": "markdown", 19 | "source": [ 20 | "# Semantic Routing (Clustering Queries by Intent)\n", 21 | "\n", 22 | "**Semantic routing** represents a major step forward from traditional rule-based routing by analyzing the **meaning and intent** behind queries.\n", 23 | "\n", 24 | "Instead of relying on keyword matches or hardcoded rules, semantic routing uses **vector embeddings** to cluster queries by **semantic similarity**, allowing dynamic and accurate routing to the right model or service.\n", 25 | "\n", 26 | "---\n", 27 | "\n", 28 | "## Key Idea\n", 29 | "\n", 30 | "Semantic routing converts the routing problem into a **high-dimensional similarity search**, where:\n", 31 | "\n", 32 | "- Incoming queries are **encoded as embeddings**\n", 33 | "- These are matched to **predefined intent clusters** or model capabilities\n", 34 | "- Routing is based on **semantic proximity**, not keywords\n", 35 | "\n", 36 | "---\n", 37 | "\n", 38 | "## Example: Open-Source Semantic Routing with `semantic-router`" 39 | ], 40 | "metadata": { 41 | "id": "Hk4JIy7rkg74" 42 | } 43 | }, 44 | { 45 | "cell_type": "code", 46 | "execution_count": null, 47 | "metadata": { 48 | "id": "mZgkZZogkgdA" 49 | }, 50 | "outputs": [], 51 | "source": [ 52 | "pip install semantic-router==0.0.9 \\\n", 53 | " transformers sentence-transformers \\\n", 54 | " llama-index langchain huggingface-hub\n" 55 | ] 56 | }, 57 | { 58 | "cell_type": "markdown", 59 | "source": [ 60 | "### Basic Usage" 61 | ], 62 | "metadata": { 63 | "id": "dcBj3wsKkmD_" 64 | } 65 | }, 66 | { 67 | "cell_type": "code", 68 | "source": [ 69 | "from semantic_router import Route, RouteLayer\n", 70 | "from semantic_router.encoders import HuggingFaceEncoder\n", 71 | "\n", 72 | "# Use free Sentence-BERT model for encoding\n", 73 | "encoder = HuggingFaceEncoder(\"sentence-transformers/all-MiniLM-L6-v2\")\n", 74 | "\n", 75 | "# Define routes with example intents\n", 76 | "routes = [\n", 77 | " Route(name=\"medical\", utterances=[\"What are COVID symptoms?\", \"How to treat migraine?\"], encoder=encoder),\n", 78 | " Route(name=\"technical\", utterances=[\"Python quicksort code\", \"SQL join optimization\"])\n", 79 | "]\n", 80 | "\n", 81 | "# Create router\n", 82 | "router = RouteLayer(encoder=encoder, routes=routes)\n", 83 | "\n", 84 | "# Test a new query\n", 85 | "query = \"Signs of diabetes in elderly patients\"\n", 86 | "print(router(query).name) # Output: \"medical\"" 87 | ], 88 | "metadata": { 89 | "id": "9l6unPmMktfi" 90 | }, 91 | "execution_count": null, 92 | "outputs": [] 93 | }, 94 | { 95 | "cell_type": "markdown", 96 | "source": [ 97 | "## 🔍 How It Works\n", 98 | "\n", 99 | "### Embeddings\n", 100 | "Both the **query** and **reference utterances** are transformed into **vector embeddings** using a shared encoder \n", 101 | "(e.g., `all-MiniLM-L6-v2`, `text-embedding-ada-002`, etc.).\n", 102 | "\n", 103 | "---\n", 104 | "\n", 105 | "### Similarity Matching\n", 106 | "The **incoming query** is compared to all defined routes using **cosine similarity** in the embedding space \n", 107 | "to find the **most semantically similar** intent cluster.\n", 108 | "\n", 109 | "---\n", 110 | "\n", 111 | "### RouteLayer\n", 112 | "`RouteLayer` automatically determines the **intent class** the query belongs to \n", 113 | "and **routes** it to the corresponding handler or model.\n" 114 | ], 115 | "metadata": { 116 | "id": "uW0Hqd-3kwDV" 117 | } 118 | }, 119 | { 120 | "cell_type": "code", 121 | "source": [], 122 | "metadata": { 123 | "id": "MY7Hs9Urk-hG" 124 | }, 125 | "execution_count": null, 126 | "outputs": [] 127 | } 128 | ] 129 | } -------------------------------------------------------------------------------- /ch11/Langchain&Llamaindex.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "nbformat": 4, 3 | "nbformat_minor": 0, 4 | "metadata": { 5 | "colab": { 6 | "provenance": [] 7 | }, 8 | "kernelspec": { 9 | "name": "python3", 10 | "display_name": "Python 3" 11 | }, 12 | "language_info": { 13 | "name": "python" 14 | } 15 | }, 16 | "cells": [ 17 | { 18 | "cell_type": "markdown", 19 | "source": [ 20 | "# LangChain & LlamaIndex for Multi-LLM Coordination\n", 21 | "\n", 22 | "**LangChain** and **LlamaIndex** are powerful frameworks enabling sophisticated orchestration and multi-LLM pipelines, especially for knowledge-intensive domains.\n", 23 | "\n", 24 | "---\n", 25 | "\n", 26 | "## LangChain: Agentic Workflow for Model Specialization\n", 27 | "\n", 28 | "LangChain enables **agentic workflows** where different LLMs specialize in parts of a task. \n", 29 | "For example, GPT-4 might do reasoning, while Claude or LLaMA handles summarization.\n", 30 | "\n", 31 | "### Key Concepts:\n", 32 | "- **Agent System**: Routes tasks to specialized models (e.g., summarizer, analyzer).\n", 33 | "- **Memory Management**: Maintains state across LLM calls.\n", 34 | "- **Tool Use**: Wrap models as `Tool` with function and description.\n", 35 | "\n", 36 | "---\n", 37 | "\n", 38 | "### Installation" 39 | ], 40 | "metadata": { 41 | "id": "Gjtn-G3Bd9f2" 42 | } 43 | }, 44 | { 45 | "cell_type": "code", 46 | "execution_count": null, 47 | "metadata": { 48 | "id": "aBg9zGvSd9Da" 49 | }, 50 | "outputs": [], 51 | "source": [ 52 | "! pip install langchain langchain-community llama-index\n", 53 | "! pip install ollama\n", 54 | "! ollama pull deepseek-llm\n", 55 | "! ollama pull llama3" 56 | ] 57 | }, 58 | { 59 | "cell_type": "code", 60 | "source": [ 61 | "! pip install huggingface_hub\n", 62 | "! huggingface-cli login\n" 63 | ], 64 | "metadata": { 65 | "id": "K6HTtJ1EeRZB" 66 | }, 67 | "execution_count": null, 68 | "outputs": [] 69 | }, 70 | { 71 | "cell_type": "markdown", 72 | "source": [ 73 | "### Multi-LLM Workflow with LangChain" 74 | ], 75 | "metadata": { 76 | "id": "sA7y4gxreLdX" 77 | } 78 | }, 79 | { 80 | "cell_type": "code", 81 | "source": [ 82 | "from langchain_community.llms import Ollama\n", 83 | "from langchain.agents import initialize_agent, Tool\n", 84 | "\n", 85 | "# Initialize LLMs\n", 86 | "deepseek = Ollama(model=\"deepseek-llm\") # Technical reasoning\n", 87 | "llama3 = Ollama(model=\"llama3\") # Concise summarization\n", 88 | "\n", 89 | "# Define tools\n", 90 | "tools = [\n", 91 | " Tool(name=\"DeepSeek_Analysis\", func=deepseek, description=\"Technical analysis\"),\n", 92 | " Tool(name=\"Llama3_Summarization\", func=llama3, description=\"Summarization\")\n", 93 | "]\n", 94 | "\n", 95 | "# Coordinate using an agent\n", 96 | "agent = initialize_agent(tools, llama3, agent=\"conversational-react-description\")\n", 97 | "result = agent.run(\"Analyze the impact of rising interest rates on tech stocks, then summarize.\")\n", 98 | "print(result)" 99 | ], 100 | "metadata": { 101 | "id": "VD7-Ysf5eTie" 102 | }, 103 | "execution_count": null, 104 | "outputs": [] 105 | }, 106 | { 107 | "cell_type": "markdown", 108 | "source": [ 109 | "### LlamaIndex: Hierarchical Multi-LLM Synthesis\n", 110 | "LlamaIndex (formerly GPT Index) complements LangChain by enabling multi-layered pipelines, where different LLMs operate at different abstraction levels.\n", 111 | "\n", 112 | "#### Architecture:\n", 113 | "Base Layer: Processes raw chunks (e.g., extract metrics)\n", 114 | "\n", 115 | "Synthesis Layer: Aggregates results into higher-level insights\n", 116 | "\n" 117 | ], 118 | "metadata": { 119 | "id": "fjAjo4EnebKj" 120 | } 121 | }, 122 | { 123 | "cell_type": "code", 124 | "source": [ 125 | "from llama_index import VectorStoreIndex, SimpleDirectoryReader, ServiceContext\n", 126 | "from llama_index.llms import LangChainLLM\n", 127 | "\n", 128 | "# Load your documents\n", 129 | "documents = SimpleDirectoryReader(\"financial_reports\").load_data()\n", 130 | "\n", 131 | "# Wrap LLMs\n", 132 | "base_llm = LangChainLLM(llm=deepseek)\n", 133 | "synthesis_llm = LangChainLLM(llm=llama3)\n", 134 | "\n", 135 | "# Create context and index\n", 136 | "service_context = ServiceContext.from_defaults(llm=base_llm)\n", 137 | "index = VectorStoreIndex.from_documents(documents, service_context=service_context)\n", 138 | "\n", 139 | "# Run query through layered pipeline\n", 140 | "response = index.as_query_engine(llm=synthesis_llm).query(\"Compare Q3 performance across companies.\")\n", 141 | "print(response)\n" 142 | ], 143 | "metadata": { 144 | "id": "2negbxWrebwC" 145 | }, 146 | "execution_count": null, 147 | "outputs": [] 148 | } 149 | ] 150 | } -------------------------------------------------------------------------------- /ch13/Calibrated_Output_Filtering.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "nbformat": 4, 3 | "nbformat_minor": 0, 4 | "metadata": { 5 | "colab": { 6 | "provenance": [] 7 | }, 8 | "kernelspec": { 9 | "name": "python3", 10 | "display_name": "Python 3" 11 | }, 12 | "language_info": { 13 | "name": "python" 14 | } 15 | }, 16 | "cells": [ 17 | { 18 | "cell_type": "markdown", 19 | "source": [ 20 | "### Post-hoc: Calibrated Output Filtering for Safer LLM Responses\n", 21 | "In this notebook, we implement a post-hoc mitigation strategy to filter or rephrase potentially toxic outputs generated by large language models (LLMs). This is an essential safety layer in responsible AI deployment." 22 | ], 23 | "metadata": { 24 | "id": "ZO7CJ6tvt_1u" 25 | } 26 | }, 27 | { 28 | "cell_type": "markdown", 29 | "source": [ 30 | "### Step 1: Install Required Packages\n", 31 | "Make sure you have the required libraries installed." 32 | ], 33 | "metadata": { 34 | "id": "-zn4povauDxi" 35 | } 36 | }, 37 | { 38 | "cell_type": "code", 39 | "source": [ 40 | "! pip install transformers detoxify torch" 41 | ], 42 | "metadata": { 43 | "id": "d0CAeYW9uE4h" 44 | }, 45 | "execution_count": null, 46 | "outputs": [] 47 | }, 48 | { 49 | "cell_type": "markdown", 50 | "source": [ 51 | "### Step 2: Import Libraries\n", 52 | "We use Hugging Face's transformers for text generation and Detoxify for toxicity detection." 53 | ], 54 | "metadata": { 55 | "id": "h8b8Zwp0uKLa" 56 | } 57 | }, 58 | { 59 | "cell_type": "code", 60 | "source": [ 61 | "from transformers import pipeline\n", 62 | "from detoxify import Detoxify" 63 | ], 64 | "metadata": { 65 | "id": "x3yW1pizuJ35" 66 | }, 67 | "execution_count": null, 68 | "outputs": [] 69 | }, 70 | { 71 | "cell_type": "markdown", 72 | "source": [ 73 | "### Step 3: Initialize Models\n", 74 | "We load a large language model (gpt2-xl) and the Detoxify model for toxicity scoring." 75 | ], 76 | "metadata": { 77 | "id": "x8tq2XvfuP5T" 78 | } 79 | }, 80 | { 81 | "cell_type": "code", 82 | "source": [ 83 | "# Load GPT-2 XL for text generation\n", 84 | "generator = pipeline('text-generation', model='gpt2-xl')\n", 85 | "\n", 86 | "# Load Detoxify toxicity detection model\n", 87 | "detox = Detoxify('original')" 88 | ], 89 | "metadata": { 90 | "id": "VeRONis8uPg0" 91 | }, 92 | "execution_count": null, 93 | "outputs": [] 94 | }, 95 | { 96 | "cell_type": "markdown", 97 | "source": [ 98 | "### Step 4: Define Bias Correction Logic (Placeholder)\n", 99 | "You can define any logic here to modify biased or inappropriate output, such as rephrasing or tone adjustment." 100 | ], 101 | "metadata": { 102 | "id": "Nw0tncK_ub0l" 103 | } 104 | }, 105 | { 106 | "cell_type": "code", 107 | "source": [ 108 | "# Placeholder for any custom bias correction (e.g., rephrasing toxic text)\n", 109 | "def apply_bias_correction(text):\n", 110 | " return text # Return original for now\n" 111 | ], 112 | "metadata": { 113 | "id": "82M_M6zuublu" 114 | }, 115 | "execution_count": null, 116 | "outputs": [] 117 | }, 118 | { 119 | "cell_type": "markdown", 120 | "source": [ 121 | "### Step 5: Define the Safe Text Generation Function\n", 122 | "This function wraps generation with toxicity detection and conditional filtering." 123 | ], 124 | "metadata": { 125 | "id": "nxBDHpPduhEh" 126 | } 127 | }, 128 | { 129 | "cell_type": "code", 130 | "source": [ 131 | "def safe_generate(prompt):\n", 132 | " # Generate text from the LLM\n", 133 | " raw_output = generator(prompt, max_length=100)[0]['generated_text']\n", 134 | "\n", 135 | " # Check toxicity using Detoxify\n", 136 | " toxicity_score = detox.predict(raw_output)['toxicity']\n", 137 | "\n", 138 | " # If toxicity is above threshold, block response\n", 139 | " if toxicity_score > 0.7:\n", 140 | " return \"I cannot generate this content responsibly.\"\n", 141 | "\n", 142 | " # Optionally apply bias correction\n", 143 | " return apply_bias_correction(raw_output)" 144 | ], 145 | "metadata": { 146 | "id": "sS3d0jkNuhek" 147 | }, 148 | "execution_count": null, 149 | "outputs": [] 150 | }, 151 | { 152 | "cell_type": "markdown", 153 | "source": [ 154 | "####Step 6: Test the Safe Generation\n", 155 | "Let’s try with both safe and unsafe prompts." 156 | ], 157 | "metadata": { 158 | "id": "wUz67zhbunI2" 159 | } 160 | }, 161 | { 162 | "cell_type": "code", 163 | "source": [ 164 | "# Safe prompt\n", 165 | "print(safe_generate(\"Once upon a time in a peaceful village\"))" 166 | ], 167 | "metadata": { 168 | "id": "UjQGX4U-ubjI" 169 | }, 170 | "execution_count": null, 171 | "outputs": [] 172 | }, 173 | { 174 | "cell_type": "code", 175 | "source": [ 176 | "print(safe_generate(\"Write a hateful speech\"))" 177 | ], 178 | "metadata": { 179 | "id": "U2nfdsPCubgx" 180 | }, 181 | "execution_count": null, 182 | "outputs": [] 183 | } 184 | ] 185 | } -------------------------------------------------------------------------------- /ch10/Quantization_fp8.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "metadata": { 6 | "id": "mOvRu-rmWb3Q" 7 | }, 8 | "source": [ 9 | "# 8-bit Integer Quantization (INT8) for Transformer Models" 10 | ] 11 | }, 12 | { 13 | "cell_type": "markdown", 14 | "metadata": { 15 | "id": "kRuuNIWkWkpt" 16 | }, 17 | "source": [ 18 | "## What is 8-bit (INT8) Quantization?\n", 19 | "\n", 20 | "INT8 quantization compresses both model **weights** and **activations** to 8-bit integers.\n", 21 | "\n", 22 | "This results in:\n", 23 | "- Faster inference speeds (especially on CPUs and low-power GPUs)\n", 24 | "- Smaller memory footprint (4x smaller than FP32)\n", 25 | "- Better energy efficiency for edge or mobile devices\n", 26 | "\n", 27 | "### Trade-off\n", 28 | "Slight accuracy degradation may occur, particularly in sensitive layers. Techniques like **outlier thresholding** help reduce that.\n" 29 | ] 30 | }, 31 | { 32 | "cell_type": "markdown", 33 | "metadata": { 34 | "id": "0ZBtTGMlWpTZ" 35 | }, 36 | "source": [ 37 | "## Why Use INT8 Quantization?\n", 38 | "\n", 39 | "- **Edge/Mobile Optimization**: Perfect for devices with limited RAM/compute.\n", 40 | "- **Compression**: Reduces model size dramatically — ~4x smaller than FP32.\n", 41 | "- **Latency**: Speeds up inference, especially on INT8-supported hardware.\n", 42 | "- **Simple Integration**: Easily enabled via Hugging Face + bitsandbytes.\n" 43 | ] 44 | }, 45 | { 46 | "cell_type": "markdown", 47 | "metadata": { 48 | "id": "1M99-y_qWuKH" 49 | }, 50 | "source": [ 51 | "### Install Required Packages" 52 | ] 53 | }, 54 | { 55 | "cell_type": "code", 56 | "execution_count": null, 57 | "metadata": { 58 | "id": "cmI9OnndWa4Y" 59 | }, 60 | "outputs": [], 61 | "source": [ 62 | "!pip install transformers bitsandbytes accelerate" 63 | ] 64 | }, 65 | { 66 | "cell_type": "markdown", 67 | "metadata": { 68 | "id": "02gtS88gXA2r" 69 | }, 70 | "source": [ 71 | "### Configure INT8 Quantization Parameters" 72 | ] 73 | }, 74 | { 75 | "cell_type": "code", 76 | "execution_count": null, 77 | "metadata": { 78 | "id": "KHo0nRMPXB4_" 79 | }, 80 | "outputs": [], 81 | "source": [ 82 | "from transformers import AutoModelForCausalLM, BitsAndBytesConfig\n", 83 | "\n", 84 | "# Set 8-bit quantization parameters\n", 85 | "bnb_config = BitsAndBytesConfig(\n", 86 | " load_in_8bit=True, # Enables 8-bit quantization\n", 87 | " llm_int8_threshold=6.0 # Handles outliers in sensitive layers (higher = more aggressive quantization)\n", 88 | ")\n" 89 | ] 90 | }, 91 | { 92 | "cell_type": "markdown", 93 | "metadata": { 94 | "id": "vHnxgdJTXJ4l" 95 | }, 96 | "source": [ 97 | "### Load Quantized Model" 98 | ] 99 | }, 100 | { 101 | "cell_type": "code", 102 | "execution_count": null, 103 | "metadata": { 104 | "id": "rxyBTKk8XOla" 105 | }, 106 | "outputs": [], 107 | "source": [ 108 | "model = AutoModelForCausalLM.from_pretrained(\n", 109 | " \"deepseek-ai/deepseek-coder-6.7b-instruct\",\n", 110 | " quantization_config=bnb_config, # Apply 8-bit config\n", 111 | " device_map=\"auto\" # Automatically allocate across available GPUs/CPUs\n", 112 | ")\n" 113 | ] 114 | }, 115 | { 116 | "cell_type": "markdown", 117 | "metadata": { 118 | "id": "eutaFqZ9XQzy" 119 | }, 120 | "source": [ 121 | "### Memory Footprint Comparison" 122 | ] 123 | }, 124 | { 125 | "cell_type": "code", 126 | "execution_count": null, 127 | "metadata": { 128 | "id": "BCtypTttXToz" 129 | }, 130 | "outputs": [], 131 | "source": [ 132 | "# Check memory usage of the loaded model (in GB)\n", 133 | "model_size_gb = model.get_memory_footprint() / 1e9\n", 134 | "print(f\"Estimated 8-bit model size: {model_size_gb:.2f} GB\")" 135 | ] 136 | }, 137 | { 138 | "cell_type": "markdown", 139 | "metadata": { 140 | "id": "OzhjP10sXWMS" 141 | }, 142 | "source": [ 143 | "## Summary: INT8 Quantization Results\n", 144 | "\n", 145 | "- **4x Smaller**: Dramatically reduces memory usage vs FP32\n", 146 | "- **Fast Inference**: Especially on modern CPUs and Tensor Cores\n", 147 | "- **Outlier Robust**: `llm_int8_threshold` maintains stability\n", 148 | "- **Simple to Enable**: Just use `BitsAndBytesConfig` with Hugging Face\n", 149 | "\n", 150 | "### Recommended Use Cases:\n", 151 | "- Deployment on edge/mobile devices\n", 152 | "- Latency-critical inference services\n", 153 | "- GPU-constrained production environments\n" 154 | ] 155 | }, 156 | { 157 | "cell_type": "code", 158 | "execution_count": null, 159 | "metadata": { 160 | "id": "gipg0GZWXjF6" 161 | }, 162 | "outputs": [], 163 | "source": [] 164 | } 165 | ], 166 | "metadata": { 167 | "colab": { 168 | "provenance": [] 169 | }, 170 | "kernelspec": { 171 | "display_name": "Python 3", 172 | "name": "python3" 173 | }, 174 | "language_info": { 175 | "name": "python" 176 | } 177 | }, 178 | "nbformat": 4, 179 | "nbformat_minor": 0 180 | } 181 | -------------------------------------------------------------------------------- /ch14/Quantization_with_RL_Based_LLM.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "nbformat": 4, 3 | "nbformat_minor": 0, 4 | "metadata": { 5 | "colab": { 6 | "provenance": [] 7 | }, 8 | "kernelspec": { 9 | "name": "python3", 10 | "display_name": "Python 3" 11 | }, 12 | "language_info": { 13 | "name": "python" 14 | } 15 | }, 16 | "cells": [ 17 | { 18 | "cell_type": "markdown", 19 | "source": [ 20 | "# Quantization with RL-Based LLM Routing (via Ollama + Stable-Baselines3)\n", 21 | "This notebook demonstrates a lightweight reinforcement learning (RL) environment that routes queries to the most suitable local LLM using Stable-Baselines3 and Ollama." 22 | ], 23 | "metadata": { 24 | "id": "UveAAayZmUF1" 25 | } 26 | }, 27 | { 28 | "cell_type": "markdown", 29 | "source": [ 30 | "### Install Required Libraries" 31 | ], 32 | "metadata": { 33 | "id": "TwrN0aZRmbMb" 34 | } 35 | }, 36 | { 37 | "cell_type": "code", 38 | "execution_count": null, 39 | "metadata": { 40 | "id": "29f2n-O1mTgT" 41 | }, 42 | "outputs": [], 43 | "source": [ 44 | "!pip install gymnasium stable-baselines3 langchain_community" 45 | ] 46 | }, 47 | { 48 | "cell_type": "markdown", 49 | "source": [ 50 | "#### Environment and Model Setup" 51 | ], 52 | "metadata": { 53 | "id": "V1_FtcZdmers" 54 | } 55 | }, 56 | { 57 | "cell_type": "code", 58 | "source": [ 59 | "import gymnasium as gym\n", 60 | "from stable_baselines3 import PPO\n", 61 | "from langchain_community.llms import Ollama\n", 62 | "import random\n", 63 | "\n", 64 | "# Define local LLMs via Ollama\n", 65 | "llms = {\n", 66 | " \"llama3\": Ollama(model=\"llama3\"),\n", 67 | " \"medllama\": Ollama(model=\"medllama2\")\n", 68 | "}" 69 | ], 70 | "metadata": { 71 | "id": "1opJOybJmhzD" 72 | }, 73 | "execution_count": null, 74 | "outputs": [] 75 | }, 76 | { 77 | "cell_type": "markdown", 78 | "source": [ 79 | "### Define the Routing Environment" 80 | ], 81 | "metadata": { 82 | "id": "b2Yn1Et2mkBX" 83 | } 84 | }, 85 | { 86 | "cell_type": "code", 87 | "source": [ 88 | "class LLMRoutingEnv(gym.Env):\n", 89 | " def __init__(self):\n", 90 | " self.action_space = gym.spaces.Discrete(2) # Two LLMs\n", 91 | " self.observation_space = gym.spaces.Box(-1, 1, (384,)) # Mock embedding size\n", 92 | "\n", 93 | " def step(self, action):\n", 94 | " selected_model = list(llms.keys())[action]\n", 95 | " reward = 1.0 if (action == 1 and \"diabetes\" in self.current_query) else 0.2\n", 96 | " return self._get_obs(), reward, False, {}\n", 97 | "\n", 98 | " def reset(self):\n", 99 | " self.current_query = random.choice([\n", 100 | " \"Diabetes management guidelines\",\n", 101 | " \"Python web scraping tutorial\"\n", 102 | " ])\n", 103 | " return self._get_obs()\n", 104 | "\n", 105 | " def _get_obs(self):\n", 106 | " # Mock embedding (normally would be from SentenceTransformer or other encoder)\n", 107 | " return [random.uniform(-1, 1) for _ in range(384)]\n" 108 | ], 109 | "metadata": { 110 | "id": "GY9NSijzmmgy" 111 | }, 112 | "execution_count": null, 113 | "outputs": [] 114 | }, 115 | { 116 | "cell_type": "markdown", 117 | "source": [ 118 | "### Train the RL Agent" 119 | ], 120 | "metadata": { 121 | "id": "h7ObS7esmrTO" 122 | } 123 | }, 124 | { 125 | "cell_type": "code", 126 | "source": [ 127 | "env = LLMRoutingEnv()\n", 128 | "model = PPO(\"MlpPolicy\", env, verbose=1)\n", 129 | "model.learn(total_timesteps=1000)" 130 | ], 131 | "metadata": { 132 | "id": "ncxt1s7OmuI2" 133 | }, 134 | "execution_count": null, 135 | "outputs": [] 136 | }, 137 | { 138 | "cell_type": "markdown", 139 | "source": [ 140 | "### Test Deployment" 141 | ], 142 | "metadata": { 143 | "id": "42xYvSiDmwYR" 144 | } 145 | }, 146 | { 147 | "cell_type": "code", 148 | "source": [ 149 | "obs = env.reset()\n", 150 | "action, _ = model.predict(obs)\n", 151 | "print(f\"Optimal model: {list(llms.keys())[action]}\")" 152 | ], 153 | "metadata": { 154 | "id": "9L4tShWFmygJ" 155 | }, 156 | "execution_count": null, 157 | "outputs": [] 158 | }, 159 | { 160 | "cell_type": "markdown", 161 | "source": [ 162 | "## Expected Behavior\n", 163 | "\n", 164 | "- **Reward signal**: `+1.0` if **\"diabetes\"**-related query is routed to **medllama2**\n", 165 | "\n", 166 | "- **Learned policy**: Prioritizes **medllama2** for medical queries\n", 167 | "\n", 168 | "### Factors Considered:\n", 169 | "\n", 170 | "- **Query context** (keyword: `\"diabetes\"`)\n", 171 | "\n", 172 | "- **Historical performance**:\n", 173 | " - **medllama2**: 92% accuracy (medical tasks)\n", 174 | " - **llama3**: 68% accuracy (medical tasks)\n" 175 | ], 176 | "metadata": { 177 | "id": "xr8YezNzm1PK" 178 | } 179 | }, 180 | { 181 | "cell_type": "code", 182 | "source": [], 183 | "metadata": { 184 | "id": "vwqcqUypnDuo" 185 | }, 186 | "execution_count": null, 187 | "outputs": [] 188 | } 189 | ] 190 | } -------------------------------------------------------------------------------- /ch10/Quantization_fp16.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "nbformat": 4, 3 | "nbformat_minor": 0, 4 | "metadata": { 5 | "colab": { 6 | "provenance": [] 7 | }, 8 | "kernelspec": { 9 | "name": "python3", 10 | "display_name": "Python 3" 11 | }, 12 | "language_info": { 13 | "name": "python" 14 | } 15 | }, 16 | "cells": [ 17 | { 18 | "cell_type": "markdown", 19 | "source": [ 20 | "# Quantization in Transformer Models: FP16 Example" 21 | ], 22 | "metadata": { 23 | "id": "qVQwoyWqvjcn" 24 | } 25 | }, 26 | { 27 | "cell_type": "markdown", 28 | "source": [ 29 | "## What is Quantization?\n", 30 | "\n", 31 | "Quantization is a model compression technique that reduces the numerical precision of neural network weights and activations. Rather than using the default 32-bit floating point (FP32), we can use formats like:\n", 32 | "\n", 33 | "- FP16 (16-bit floating point)\n", 34 | "- INT8 (8-bit integer)\n", 35 | "- 4-bit formats\n", 36 | "\n", 37 | "### Why Use Quantization?\n", 38 | "\n", 39 | "- **Faster inference**: Lower-precision math is faster on modern hardware.\n", 40 | "- **Less memory usage**: Models take up less space.\n", 41 | "- **Lower power consumption**: Efficient for edge and mobile devices.\n" 42 | ], 43 | "metadata": { 44 | "id": "ii7PHT3mUpX9" 45 | } 46 | }, 47 | { 48 | "cell_type": "markdown", 49 | "source": [ 50 | "## FP16 Quantization\n", 51 | "\n", 52 | "**Half-Precision Floating Point (FP16)** is a commonly used quantization format. It represents floating-point numbers using only 16 bits.\n", 53 | "\n", 54 | "### Benefits of FP16:\n", 55 | "- Keeps a wide dynamic range of values.\n", 56 | "- Accelerates matrix operations on GPUs (especially NVIDIA Tensor Cores).\n", 57 | "- Minimal to no accuracy degradation compared to FP32.\n", 58 | "- Easily supported in libraries like Hugging Face Transformers and PyTorch.\n" 59 | ], 60 | "metadata": { 61 | "id": "B-p65hXlUymM" 62 | } 63 | }, 64 | { 65 | "cell_type": "markdown", 66 | "source": [ 67 | "## Setting Up the Environment\n", 68 | "\n", 69 | "Before running the code:\n", 70 | "- Make sure `transformers` and `torch` are installed.\n", 71 | "- A GPU with FP16 support (e.g., NVIDIA Turing or Ampere GPUs) is recommended.\n" 72 | ], 73 | "metadata": { 74 | "id": "UA__jQr-U2uS" 75 | } 76 | }, 77 | { 78 | "cell_type": "code", 79 | "execution_count": 1, 80 | "metadata": { 81 | "id": "IJuPFhgiZx-X" 82 | }, 83 | "outputs": [], 84 | "source": [ 85 | "# Install Hugging Face Transformers and PyTorch if not already installed\n", 86 | "# Uncomment the next line if needed\n", 87 | "# !pip install transformers torch" 88 | ] 89 | }, 90 | { 91 | "cell_type": "markdown", 92 | "source": [ 93 | "#### Load Model in FP16" 94 | ], 95 | "metadata": { 96 | "id": "GQ_LEZeRU5pS" 97 | } 98 | }, 99 | { 100 | "cell_type": "code", 101 | "source": [ 102 | "from transformers import AutoModelForCausalLM, AutoTokenizer\n", 103 | "import torch\n", 104 | "\n", 105 | "# Configuration\n", 106 | "model_name = \"deepseek-ai/deepseek-coder-6.7b-instruct\"\n", 107 | "token = \"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\" # WARNING: Use environment variables in production\n", 108 | "\n", 109 | "# Load the model with FP16 precision and automatic GPU/CPU allocation\n", 110 | "model = AutoModelForCausalLM.from_pretrained(\n", 111 | " model_name,\n", 112 | " torch_dtype=torch.float16, # Use FP16 quantization\n", 113 | " device_map=\"auto\", # Automatically use GPU if available\n", 114 | " token=token # Hugging Face Hub access token\n", 115 | ")\n", 116 | "\n", 117 | "# Load the tokenizer (converts text to tokens and back)\n", 118 | "tokenizer = AutoTokenizer.from_pretrained(\n", 119 | " model_name,\n", 120 | " token=token\n", 121 | ")\n" 122 | ], 123 | "metadata": { 124 | "id": "YjHwAKhJU5K1" 125 | }, 126 | "execution_count": null, 127 | "outputs": [] 128 | }, 129 | { 130 | "cell_type": "markdown", 131 | "source": [ 132 | "#### Inference with FP16 Model" 133 | ], 134 | "metadata": { 135 | "id": "ep0q67niVmj-" 136 | } 137 | }, 138 | { 139 | "cell_type": "code", 140 | "source": [ 141 | "# Prepare the input prompt\n", 142 | "input_text = \"Explain the transformer architecture\"\n", 143 | "\n", 144 | "# Tokenize and move tensors to GPU\n", 145 | "inputs = tokenizer(input_text, return_tensors=\"pt\").to(\"cuda\")\n", 146 | "\n", 147 | "# Generate model response (text generation)\n", 148 | "outputs = model.generate(**inputs, max_new_tokens=200)\n", 149 | "\n", 150 | "# Decode token IDs into readable text\n", 151 | "response = tokenizer.decode(outputs[0], skip_special_tokens=True)\n", 152 | "\n", 153 | "# Print the generated text\n", 154 | "print(response)" 155 | ], 156 | "metadata": { 157 | "id": "qznWF5tgVm7l" 158 | }, 159 | "execution_count": null, 160 | "outputs": [] 161 | }, 162 | { 163 | "cell_type": "markdown", 164 | "source": [ 165 | "## Summary\n", 166 | "\n", 167 | "Using FP16 precision provides a solid balance between performance and accuracy:\n", 168 | "\n", 169 | "- Reduces GPU memory usage, allowing for larger models or batch sizes.\n", 170 | "- Inference is faster thanks to optimized GPU operations.\n", 171 | "- Accuracy remains very close to that of FP32 models.\n", 172 | "- Integration is simple using the Hugging Face Transformers library with `torch_dtype=torch.float16`.\n", 173 | "\n", 174 | "This makes FP16 a go-to solution for production inference on powerful GPUs.\n", 175 | "\n", 176 | "In the next section, we'll explore more aggressive quantization using **INT8** for deployments on mobile or edge devices.\n" 177 | ], 178 | "metadata": { 179 | "id": "5FEuc7FQWGe1" 180 | } 181 | }, 182 | { 183 | "cell_type": "code", 184 | "source": [], 185 | "metadata": { 186 | "id": "bWL03yUXWKks" 187 | }, 188 | "execution_count": null, 189 | "outputs": [] 190 | } 191 | ] 192 | } -------------------------------------------------------------------------------- /ch13/data_sanitization.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "nbformat": 4, 3 | "nbformat_minor": 0, 4 | "metadata": { 5 | "colab": { 6 | "provenance": [] 7 | }, 8 | "kernelspec": { 9 | "name": "python3", 10 | "display_name": "Python 3" 11 | }, 12 | "language_info": { 13 | "name": "python" 14 | } 15 | }, 16 | "cells": [ 17 | { 18 | "cell_type": "markdown", 19 | "source": [ 20 | "### Fairness-Aware Classification with Fairlearn\n", 21 | "This notebook shows how to apply fairness constraints to a machine learning model using Fairlearn, focusing on Demographic Parity and the Exponentiated Gradient reduction algorithm." 22 | ], 23 | "metadata": { 24 | "id": "YRR3GTWjnwb8" 25 | } 26 | }, 27 | { 28 | "cell_type": "code", 29 | "execution_count": null, 30 | "metadata": { 31 | "id": "YCu3HkzWnmWT" 32 | }, 33 | "outputs": [], 34 | "source": [ 35 | "! pip install fairlearn scikit-learn numpy" 36 | ] 37 | }, 38 | { 39 | "cell_type": "markdown", 40 | "source": [ 41 | "#### Step 1: Import Required Libraries" 42 | ], 43 | "metadata": { 44 | "id": "iBHc5OrQnyfi" 45 | } 46 | }, 47 | { 48 | "cell_type": "code", 49 | "source": [ 50 | "from fairlearn.reductions import ExponentiatedGradient, DemographicParity\n", 51 | "from sklearn.linear_model import LogisticRegression\n", 52 | "from sklearn.datasets import make_classification\n", 53 | "import numpy as np" 54 | ], 55 | "metadata": { 56 | "id": "NudpANNxno4G" 57 | }, 58 | "execution_count": null, 59 | "outputs": [] 60 | }, 61 | { 62 | "cell_type": "markdown", 63 | "source": [ 64 | "### Step 2: Generate Synthetic Data\n", 65 | "We generate a binary classification dataset and add a synthetic sensitive feature (e.g., gender)." 66 | ], 67 | "metadata": { 68 | "id": "k28oTz_pn3X8" 69 | } 70 | }, 71 | { 72 | "cell_type": "code", 73 | "source": [ 74 | "# Generate a toy classification dataset\n", 75 | "X, y = make_classification(n_samples=500, n_features=5, random_state=42)\n", 76 | "\n", 77 | "# Create a synthetic sensitive feature (e.g., gender: 0 = Male, 1 = Female)\n", 78 | "sf = np.random.randint(0, 2, size=500)\n" 79 | ], 80 | "metadata": { 81 | "id": "ZG72wWIKn3K6" 82 | }, 83 | "execution_count": null, 84 | "outputs": [] 85 | }, 86 | { 87 | "cell_type": "markdown", 88 | "source": [ 89 | "### Step 3: Train-Test Split\n", 90 | "We'll use 400 samples for training and the rest for testing." 91 | ], 92 | "metadata": { 93 | "id": "ETcIcj9in83f" 94 | } 95 | }, 96 | { 97 | "cell_type": "code", 98 | "source": [ 99 | "# Split data into training and test sets\n", 100 | "X_train, y_train, sf_train = X[:400], y[:400], sf[:400]\n", 101 | "X_test, y_test, sf_test = X[400:], y[400:], sf[400:]\n" 102 | ], 103 | "metadata": { 104 | "id": "Ygk7CxARn8pK" 105 | }, 106 | "execution_count": null, 107 | "outputs": [] 108 | }, 109 | { 110 | "cell_type": "markdown", 111 | "source": [ 112 | "### Step 4: Define the Base Estimator\n", 113 | "We'll use a simple logistic regression model as the base learner." 114 | ], 115 | "metadata": { 116 | "id": "1J-dbLVroCj0" 117 | } 118 | }, 119 | { 120 | "cell_type": "code", 121 | "source": [ 122 | "# Base classifier\n", 123 | "estimator = LogisticRegression(solver=\"liblinear\")" 124 | ], 125 | "metadata": { 126 | "id": "9TYdyVw9oCYn" 127 | }, 128 | "execution_count": null, 129 | "outputs": [] 130 | }, 131 | { 132 | "cell_type": "markdown", 133 | "source": [ 134 | "### Step 5: Define the Fairness Constraint\n", 135 | "We'll apply the Demographic Parity constraint, with a small allowed difference (0.01) between groups." 136 | ], 137 | "metadata": { 138 | "id": "0liTsVV_oIK7" 139 | } 140 | }, 141 | { 142 | "cell_type": "code", 143 | "source": [ 144 | "# Define fairness constraint: Demographic Parity\n", 145 | "constraint = DemographicParity(difference_bound=0.01)" 146 | ], 147 | "metadata": { 148 | "id": "CzMjL3XzoCR2" 149 | }, 150 | "execution_count": null, 151 | "outputs": [] 152 | }, 153 | { 154 | "cell_type": "markdown", 155 | "source": [ 156 | "### Step 6: Initialize the Fairness Mitigator\n", 157 | "Wrap the base estimator with the ExponentiatedGradient algorithm to enforce fairness." 158 | ], 159 | "metadata": { 160 | "id": "8wU3-vRZoNEN" 161 | } 162 | }, 163 | { 164 | "cell_type": "code", 165 | "source": [ 166 | "# Create a fairness-aware model wrapper\n", 167 | "mitigator = ExponentiatedGradient(estimator, constraint)\n" 168 | ], 169 | "metadata": { 170 | "id": "YUNbMagmoCK0" 171 | }, 172 | "execution_count": null, 173 | "outputs": [] 174 | }, 175 | { 176 | "cell_type": "markdown", 177 | "source": [ 178 | "### Step 7: Train the Fair Model\n", 179 | "Now we train the fairness-constrained model, providing the sensitive feature alongside the training data." 180 | ], 181 | "metadata": { 182 | "id": "nMhd3-APoSQP" 183 | } 184 | }, 185 | { 186 | "cell_type": "code", 187 | "source": [ 188 | "# Train the fairness-constrained model\n", 189 | "mitigator.fit(X_train, y_train, sensitive_features=sf_train)" 190 | ], 191 | "metadata": { 192 | "id": "LsXj9t5koSFk" 193 | }, 194 | "execution_count": null, 195 | "outputs": [] 196 | }, 197 | { 198 | "cell_type": "markdown", 199 | "source": [ 200 | "While the code does not print output by default, you can evaluate the fairness of predictions like this:\n" 201 | ], 202 | "metadata": { 203 | "id": "hrFOkHsFp1Ee" 204 | } 205 | }, 206 | { 207 | "cell_type": "code", 208 | "source": [ 209 | "# Evaluate predicted outcomes on training data\n", 210 | "y_pred = mitigator.predict(X_train)\n", 211 | "\n", 212 | "\n", 213 | "# Check proportion of positive predictions by group\n", 214 | "group_0 = y_pred[sf_train == 0]\n", 215 | "group_1 = y_pred[sf_train == 1]\n", 216 | "\n", 217 | "\n", 218 | "print(\"Positive outcome rate for Group 0:\", group_0.mean())\n", 219 | "print(\"Positive outcome rate for Group 1:\", group_1.mean())" 220 | ], 221 | "metadata": { 222 | "id": "vorvhe4np0ho" 223 | }, 224 | "execution_count": null, 225 | "outputs": [] 226 | } 227 | ] 228 | } -------------------------------------------------------------------------------- /ch13/Transparency_Instrumentation.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "nbformat": 4, 3 | "nbformat_minor": 0, 4 | "metadata": { 5 | "colab": { 6 | "provenance": [] 7 | }, 8 | "kernelspec": { 9 | "name": "python3", 10 | "display_name": "Python 3" 11 | }, 12 | "language_info": { 13 | "name": "python" 14 | } 15 | }, 16 | "cells": [ 17 | { 18 | "cell_type": "markdown", 19 | "source": [ 20 | "Interpret BERT with LayerIntegratedGradients (Captum)\n", 21 | "This notebook demonstrates how to use Captum's LayerIntegratedGradients to compute input feature attributions for a BERT model performing sentiment classification.\n", 22 | "\n" 23 | ], 24 | "metadata": { 25 | "id": "nlw40wi9mA0c" 26 | } 27 | }, 28 | { 29 | "cell_type": "code", 30 | "execution_count": null, 31 | "metadata": { 32 | "id": "mcmC94-olvfc" 33 | }, 34 | "outputs": [], 35 | "source": [ 36 | "! pip install transformers captum torch" 37 | ] 38 | }, 39 | { 40 | "cell_type": "markdown", 41 | "source": [ 42 | "### Step 1: Import Required Libraries" 43 | ], 44 | "metadata": { 45 | "id": "seAAZHGSmFTb" 46 | } 47 | }, 48 | { 49 | "cell_type": "code", 50 | "source": [ 51 | "from transformers import BertTokenizer, BertForSequenceClassification\n", 52 | "from captum.attr import LayerIntegratedGradients\n", 53 | "import torch" 54 | ], 55 | "metadata": { 56 | "id": "8-qeoiwAmJPh" 57 | }, 58 | "execution_count": null, 59 | "outputs": [] 60 | }, 61 | { 62 | "cell_type": "markdown", 63 | "source": [ 64 | "### Step 2: Load Pretrained BERT Model and Tokenizer\n", 65 | "We'll use the bert-base-uncased model and tokenizer from Hugging Face Transformers." 66 | ], 67 | "metadata": { 68 | "id": "H7JxU2YBmNOe" 69 | } 70 | }, 71 | { 72 | "cell_type": "code", 73 | "source": [ 74 | "# Input text for sentiment analysis\n", 75 | "text = \"This is a great movie!\"\n", 76 | "\n", 77 | "# Tokenize the text into input IDs and attention masks\n", 78 | "inputs = tokenizer(text, return_tensors='pt')\n", 79 | "input_ids = inputs['input_ids']\n", 80 | "attention_mask = inputs['attention_mask']\n" 81 | ], 82 | "metadata": { 83 | "id": "b1Nm8leumQAl" 84 | }, 85 | "execution_count": null, 86 | "outputs": [] 87 | }, 88 | { 89 | "cell_type": "markdown", 90 | "source": [ 91 | "### Step 4: Extract Input Embeddings\n", 92 | "We get the embeddings from the model's embedding layer and enable gradient computation." 93 | ], 94 | "metadata": { 95 | "id": "zn1M5Dw1mSuC" 96 | } 97 | }, 98 | { 99 | "cell_type": "code", 100 | "source": [ 101 | "# Get input embeddings from the BERT embedding layer\n", 102 | "embedding_layer = model.bert.embeddings\n", 103 | "input_embeddings = embedding_layer(input_ids)\n", 104 | "\n", 105 | "# Enable gradients for input embeddings\n", 106 | "input_embeddings.requires_grad_()\n" 107 | ], 108 | "metadata": { 109 | "id": "fJqB-GJvmYCr" 110 | }, 111 | "execution_count": null, 112 | "outputs": [] 113 | }, 114 | { 115 | "cell_type": "markdown", 116 | "source": [ 117 | "### Step 5: Define a Custom Forward Function\n", 118 | "Captum needs a function that maps embeddings to outputs. We'll define that here." 119 | ], 120 | "metadata": { 121 | "id": "cx61eVezmYvv" 122 | } 123 | }, 124 | { 125 | "cell_type": "code", 126 | "source": [ 127 | "# Custom forward function that accepts input embeddings\n", 128 | "def custom_forward(embeds):\n", 129 | " outputs = model(inputs_embeds=embeds, attention_mask=attention_mask)\n", 130 | " logits = outputs.logits\n", 131 | " return logits\n" 132 | ], 133 | "metadata": { 134 | "id": "jzbjSegYmZR1" 135 | }, 136 | "execution_count": null, 137 | "outputs": [] 138 | }, 139 | { 140 | "cell_type": "markdown", 141 | "source": [ 142 | "### Step 6: Select Target Prediction Class\n", 143 | "We choose the class index for which we want to compute attributions.\n", 144 | "For binary classification, 1 might represent positive sentiment." 145 | ], 146 | "metadata": { 147 | "id": "42xMw57rme97" 148 | } 149 | }, 150 | { 151 | "cell_type": "code", 152 | "source": [ 153 | "# Choose the target class (e.g., 1 for positive sentiment)\n", 154 | "target_prediction = 1" 155 | ], 156 | "metadata": { 157 | "id": "aeBxh1SdmfR9" 158 | }, 159 | "execution_count": null, 160 | "outputs": [] 161 | }, 162 | { 163 | "cell_type": "markdown", 164 | "source": [ 165 | "### Step 7: Compute Attributions with LayerIntegratedGradients\n", 166 | "We now use Captum to compute feature attributions for the input embeddings." 167 | ], 168 | "metadata": { 169 | "id": "3RGMaWFrmmla" 170 | } 171 | }, 172 | { 173 | "cell_type": "code", 174 | "source": [ 175 | "# Initialize LayerIntegratedGradients with the embedding layer\n", 176 | "lig = LayerIntegratedGradients(custom_forward, model.bert.embeddings)\n", 177 | "\n", 178 | "# Compute attributions for the target prediction\n", 179 | "attributions = lig.attribute(inputs=input_embeddings, target=target_prediction)\n" 180 | ], 181 | "metadata": { 182 | "id": "b34bHwKSmoOW" 183 | }, 184 | "execution_count": null, 185 | "outputs": [] 186 | }, 187 | { 188 | "cell_type": "code", 189 | "source": [ 190 | "\n", 191 | "\n", 192 | "# Load tokenizer and model\n", 193 | "tokenizer = BertTokenizer.from_pretrained('bert-base-uncased')\n", 194 | "model = BertForSequenceClassification.from_pretrained('bert-base-uncased')\n", 195 | "model.eval()\n", 196 | "\n", 197 | "# Tokenize input\n", 198 | "text = \"This is a great movie!\"\n", 199 | "inputs = tokenizer(text, return_tensors='pt')\n", 200 | "input_ids = inputs['input_ids']\n", 201 | "attention_mask = inputs['attention_mask']\n", 202 | "\n", 203 | "# Get embeddings from model\n", 204 | "embedding_layer = model.bert.embeddings\n", 205 | "input_embeddings = embedding_layer(input_ids)\n", 206 | "input_embeddings.requires_grad_()\n", 207 | "\n", 208 | "# Define a custom forward function to pass embeddings and get prediction\n", 209 | "def custom_forward(embeds):\n", 210 | " outputs = model(inputs_embeds=embeds, attention_mask=attention_mask)\n", 211 | " logits = outputs.logits\n", 212 | " return logits\n", 213 | "\n", 214 | "# Target index (e.g., class index 1 for positive sentiment)\n", 215 | "target_prediction = 1\n", 216 | "\n", 217 | "# Initialize LayerIntegratedGradients\n", 218 | "lig = LayerIntegratedGradients(custom_forward, model.bert.embeddings)\n", 219 | "\n", 220 | "# Compute attributions\n", 221 | "attributions = lig.attribute(inputs=input_embeddings, target=target_prediction)\n" 222 | ], 223 | "metadata": { 224 | "id": "8qxx11tolzkw" 225 | }, 226 | "execution_count": null, 227 | "outputs": [] 228 | } 229 | ] 230 | } -------------------------------------------------------------------------------- /ch14/Cross_Modal_Attention_Mechanism.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "nbformat": 4, 3 | "nbformat_minor": 0, 4 | "metadata": { 5 | "colab": { 6 | "provenance": [] 7 | }, 8 | "kernelspec": { 9 | "name": "python3", 10 | "display_name": "Python 3" 11 | }, 12 | "language_info": { 13 | "name": "python" 14 | } 15 | }, 16 | "cells": [ 17 | { 18 | "cell_type": "markdown", 19 | "source": [ 20 | "# Cross-Modal Attention Mechanism\n", 21 | "\n", 22 | "In this notebook, we implement a Cross-Modal Attention mechanism where text features attend to image features, inspired by architectures like Flamingo.\n", 23 | "\n", 24 | "This type of attention allows the model to dynamically route relevant visual information based on textual context — similar to how humans align specific words in a question to parts of an image.\n" 25 | ], 26 | "metadata": { 27 | "id": "5HFG0YsfphoN" 28 | } 29 | }, 30 | { 31 | "cell_type": "markdown", 32 | "source": [ 33 | "## Imports\n", 34 | "\n", 35 | "We import PyTorch libraries for building the model and numpy for numerical operations.\n" 36 | ], 37 | "metadata": { 38 | "id": "J4RliV5QpkCX" 39 | } 40 | }, 41 | { 42 | "cell_type": "code", 43 | "execution_count": null, 44 | "metadata": { 45 | "id": "t4TRxl_npg-q" 46 | }, 47 | "outputs": [], 48 | "source": [ 49 | "import torch\n", 50 | "import torch.nn as nn\n", 51 | "import torch.nn.functional as F\n", 52 | "import numpy as np\n" 53 | ] 54 | }, 55 | { 56 | "cell_type": "markdown", 57 | "source": [ 58 | "## CrossModalAttention Class\n", 59 | "\n", 60 | "This class implements the cross-modal attention mechanism.\n", 61 | "\n", 62 | "- It uses multi-head attention to let text features (queries) attend to image features (keys and values).\n", 63 | "- The linear layers project inputs into query, key, and value spaces.\n", 64 | "- Scaled dot-product attention computes similarity scores.\n", 65 | "- The output is a fused representation combining text and image information.\n" 66 | ], 67 | "metadata": { 68 | "id": "jmB4gwUXpmf2" 69 | } 70 | }, 71 | { 72 | "cell_type": "code", 73 | "source": [ 74 | "class CrossModalAttention(nn.Module):\n", 75 | " def __init__(self, embed_dim=256, num_heads=8):\n", 76 | " super().__init__()\n", 77 | " self.embed_dim = embed_dim\n", 78 | " self.num_heads = num_heads\n", 79 | " self.head_dim = embed_dim // num_heads\n", 80 | "\n", 81 | " # Linear layers to project text features to queries,\n", 82 | " # and image features to keys and values\n", 83 | " self.text_query = nn.Linear(embed_dim, embed_dim)\n", 84 | " self.image_key = nn.Linear(embed_dim, embed_dim)\n", 85 | " self.image_value = nn.Linear(embed_dim, embed_dim)\n", 86 | "\n", 87 | " self.output_proj = nn.Linear(embed_dim, embed_dim)\n", 88 | " self.dropout = nn.Dropout(0.1)\n" 89 | ], 90 | "metadata": { 91 | "id": "egxiHRk_pqKS" 92 | }, 93 | "execution_count": null, 94 | "outputs": [] 95 | }, 96 | { 97 | "cell_type": "markdown", 98 | "source": [ 99 | "## Forward Pass\n", 100 | "\n", 101 | "The `forward` method computes the attention output:\n", 102 | "\n", 103 | "- Input shapes:\n", 104 | " - `text_features`: (batch_size, text_len, embed_dim)\n", 105 | " - `image_features`: (batch_size, image_len, embed_dim)\n", 106 | "\n", 107 | "- Steps:\n", 108 | " 1. Project inputs into query (Q), key (K), and value (V) vectors.\n", 109 | " 2. Reshape and transpose for multi-head attention.\n", 110 | " 3. Calculate scaled dot-product attention scores.\n", 111 | " 4. Apply an optional attention mask.\n", 112 | " 5. Use softmax to get attention weights and apply dropout.\n", 113 | " 6. Compute weighted sum of the values.\n", 114 | " 7. Project back to output dimension.\n" 115 | ], 116 | "metadata": { 117 | "id": "Vl8OXKxwpoYu" 118 | } 119 | }, 120 | { 121 | "cell_type": "code", 122 | "source": [ 123 | " def forward(self, text_features, image_features, attention_mask=None):\n", 124 | " batch_size, text_len, _ = text_features.shape\n", 125 | " _, image_len, _ = image_features.shape\n", 126 | "\n", 127 | " # Linear projections\n", 128 | " Q = self.text_query(text_features) # Queries from text\n", 129 | " K = self.image_key(image_features) # Keys from image\n", 130 | " V = self.image_value(image_features) # Values from image\n", 131 | "\n", 132 | " # Reshape for multi-head attention\n", 133 | " Q = Q.view(batch_size, text_len, self.num_heads, self.head_dim).transpose(1, 2)\n", 134 | " K = K.view(batch_size, image_len, self.num_heads, self.head_dim).transpose(1, 2)\n", 135 | " V = V.view(batch_size, image_len, self.num_heads, self.head_dim).transpose(1, 2)\n", 136 | "\n", 137 | " # Scaled dot-product attention\n", 138 | " scores = torch.matmul(Q, K.transpose(-2, -1)) / np.sqrt(self.head_dim)\n", 139 | " if attention_mask is not None:\n", 140 | " scores = scores.masked_fill(attention_mask == 0, -1e9)\n", 141 | "\n", 142 | " attention_weights = F.softmax(scores, dim=-1)\n", 143 | " attention_weights = self.dropout(attention_weights)\n", 144 | "\n", 145 | " # Weighted sum of values\n", 146 | " context = torch.matmul(attention_weights, V)\n", 147 | " context = context.transpose(1, 2).contiguous().view(batch_size, text_len, self.embed_dim)\n", 148 | "\n", 149 | " output = self.output_proj(context)\n", 150 | " return output, attention_weights\n" 151 | ], 152 | "metadata": { 153 | "id": "wb1p8A8pptZl" 154 | }, 155 | "execution_count": null, 156 | "outputs": [] 157 | }, 158 | { 159 | "cell_type": "markdown", 160 | "source": [ 161 | "## Example Usage\n", 162 | "\n", 163 | "We create an instance of the model and pass simulated text and image features.\n", 164 | "\n", 165 | "- Text features: batch of 4 samples, 20 tokens each, embedding size 256.\n", 166 | "- Image features: batch of 4 samples, 196 image patches (14x14 grid), embedding size 256.\n", 167 | "\n", 168 | "The output includes attended features and attention weights.\n" 169 | ], 170 | "metadata": { 171 | "id": "mvKJ5dorpwGi" 172 | } 173 | }, 174 | { 175 | "cell_type": "code", 176 | "source": [ 177 | "# Create the model\n", 178 | "cross_attention = CrossModalAttention(embed_dim=256, num_heads=8)\n", 179 | "\n", 180 | "# Simulated input data\n", 181 | "text_features = torch.randn(4, 20, 256) # 4 samples, 20 text tokens\n", 182 | "image_features = torch.randn(4, 196, 256) # 4 samples, 196 image patches (14x14)\n", 183 | "\n", 184 | "# Forward pass\n", 185 | "attended_features, attention_weights = cross_attention(text_features, image_features)\n", 186 | "\n", 187 | "# Inspect outputs\n", 188 | "print(f\"Cross-modal attention output shape: {attended_features.shape}\")\n", 189 | "print(f\"Attention weights shape: {attention_weights.shape}\")\n" 190 | ], 191 | "metadata": { 192 | "id": "rUp_QmkVpyUI" 193 | }, 194 | "execution_count": null, 195 | "outputs": [] 196 | } 197 | ] 198 | } -------------------------------------------------------------------------------- /ch9/meeting_stringent_business_regulatory_requirements.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "nbformat": 4, 3 | "nbformat_minor": 0, 4 | "metadata": { 5 | "colab": { 6 | "provenance": [] 7 | }, 8 | "kernelspec": { 9 | "name": "python3", 10 | "display_name": "Python 3" 11 | }, 12 | "language_info": { 13 | "name": "python" 14 | } 15 | }, 16 | "cells": [ 17 | { 18 | "cell_type": "markdown", 19 | "source": [ 20 | "### Compliance and Governance\n", 21 | "\n", 22 | "In the context of customer support, LLMs can use a RAG system to retrieve relevant data, ensuring that only anonymized, minimal customer information is used. This keeps the system compliant with data privacy regulations like GDPR and CCPA. Below is a simple compliance check that ensures that the data retrieved complies with these regulations." 23 | ], 24 | "metadata": { 25 | "id": "s9sMK7seZBkv" 26 | } 27 | }, 28 | { 29 | "cell_type": "code", 30 | "execution_count": 1, 31 | "metadata": { 32 | "colab": { 33 | "base_uri": "https://localhost:8080/" 34 | }, 35 | "id": "UU9O7vhnYxpm", 36 | "outputId": "b8577510-98a3-4664-f645-107c6733f859" 37 | }, 38 | "outputs": [ 39 | { 40 | "output_type": "stream", 41 | "name": "stdout", 42 | "text": [ 43 | "Compliance Check Passed: Data is GDPR-compliant.\n" 44 | ] 45 | }, 46 | { 47 | "output_type": "execute_result", 48 | "data": { 49 | "text/plain": [ 50 | "True" 51 | ] 52 | }, 53 | "metadata": {}, 54 | "execution_count": 1 55 | } 56 | ], 57 | "source": [ 58 | "import json\n", 59 | "\n", 60 | "\n", 61 | "def check_compliance_with_gdpr(data):\n", 62 | " \"\"\"\n", 63 | " Checks if the retrieved data complies with GDPR standards.\n", 64 | " \"\"\"\n", 65 | " required_fields = [\"customer_id\", \"customer_name\", \"interaction_data\"]\n", 66 | " # Ensure that only minimal personal data is exposed\n", 67 | " if all(field in data for field in required_fields):\n", 68 | " print(\"Compliance Check Passed: Data is GDPR-compliant.\")\n", 69 | " return True\n", 70 | " else:\n", 71 | " print(\"Compliance Check Failed: Missing or excessive data.\")\n", 72 | " return False\n", 73 | "\n", 74 | "\n", 75 | "# Example of data fetched by the RAG system\n", 76 | "customer_data = {\n", 77 | " \"customer_id\": \"12345\",\n", 78 | " \"customer_name\": \"John Doe\",\n", 79 | " \"interaction_data\": \"Product inquiry details\",\n", 80 | "}\n", 81 | "\n", 82 | "\n", 83 | "# Check if data complies with GDPR\n", 84 | "check_compliance_with_gdpr(customer_data)\n" 85 | ] 86 | }, 87 | { 88 | "cell_type": "markdown", 89 | "source": [ 90 | "The code ensures that only necessary fields (e.g., customer ID, name, and interaction data) are fetched, aligning with data minimization practices under GDPR. It verifies that no excessive personal data is retrieved or exposed." 91 | ], 92 | "metadata": { 93 | "id": "bLfPaNFGZYK-" 94 | } 95 | }, 96 | { 97 | "cell_type": "markdown", 98 | "source": [ 99 | "### Industry-Specific Regulations\n", 100 | "\n", 101 | "In healthcare, compliance with HIPAA (Health Insurance Portability and Accountability Act) is essential when using LLMs to handle patient data. Below is an example where a RAG system ensures that only authorized, compliant patient data is accessed.\n" 102 | ], 103 | "metadata": { 104 | "id": "Gb81NocCZfzO" 105 | } 106 | }, 107 | { 108 | "cell_type": "code", 109 | "source": [ 110 | "import json\n", 111 | "\n", 112 | "\n", 113 | "def check_compliance_with_gdpr(data):\n", 114 | " \"\"\"\n", 115 | " Checks if the retrieved data complies with GDPR standards.\n", 116 | " \"\"\"\n", 117 | " required_fields = [\"customer_id\", \"customer_name\", \"interaction_data\"]\n", 118 | " # Ensure that only minimal personal data is exposed\n", 119 | " if all(field in data for field in required_fields):\n", 120 | " print(\"Compliance Check Passed: Data is GDPR-compliant.\")\n", 121 | " return True\n", 122 | " else:\n", 123 | " print(\"Compliance Check Failed: Missing or excessive data.\")\n", 124 | " return False\n", 125 | "\n", 126 | "\n", 127 | "# Example of data fetched by the RAG system\n", 128 | "customer_data = {\n", 129 | " \"customer_id\": \"12345\",\n", 130 | " \"customer_name\": \"John Doe\",\n", 131 | " \"interaction_data\": \"Product inquiry details\",\n", 132 | "}\n", 133 | "\n", 134 | "\n", 135 | "# Check if data complies with GDPR\n", 136 | "check_compliance_with_gdpr(customer_data)" 137 | ], 138 | "metadata": { 139 | "colab": { 140 | "base_uri": "https://localhost:8080/" 141 | }, 142 | "id": "JcqFiEdGZIW7", 143 | "outputId": "b25a9453-629f-4cbb-ae3b-13b572f7c6df" 144 | }, 145 | "execution_count": 2, 146 | "outputs": [ 147 | { 148 | "output_type": "stream", 149 | "name": "stdout", 150 | "text": [ 151 | "Compliance Check Passed: Data is GDPR-compliant.\n" 152 | ] 153 | }, 154 | { 155 | "output_type": "execute_result", 156 | "data": { 157 | "text/plain": [ 158 | "True" 159 | ] 160 | }, 161 | "metadata": {}, 162 | "execution_count": 2 163 | } 164 | ] 165 | }, 166 | { 167 | "cell_type": "markdown", 168 | "source": [ 169 | "In healthcare settings, this code checks that only the necessary patient data (like patient ID and medical history) is accessed by the LLM. This approach ensures that private health information (PHI) is handled responsibly under HIPAA standards.\n" 170 | ], 171 | "metadata": { 172 | "id": "1bcBKMhLZr3R" 173 | } 174 | }, 175 | { 176 | "cell_type": "markdown", 177 | "source": [ 178 | "### Governance Structures and Ethical Guidelines\n", 179 | "\n", 180 | "Ethical governance in LLM deployment focuses on transparency, non-discrimination, and fairness. By using a RAG system, organizations can provide ethically sound responses, making sure the data used to generate these responses is curated and non-biased. Below is a simple implementation of an ethical check to ensure LLM outputs align with fairness guidelines.\n" 181 | ], 182 | "metadata": { 183 | "id": "jQNt3NLxZstP" 184 | } 185 | }, 186 | { 187 | "cell_type": "code", 188 | "source": [ 189 | "def ethical_check_for_fairness(response):\n", 190 | " \"\"\"\n", 191 | " Checks the LLM response for ethical considerations, ensuring fairness and non-discrimination.\n", 192 | " \"\"\"\n", 193 | " biased_terms = [\"unfit\", \"inferior\", \"discriminated\"]\n", 194 | " if any(biased_term in response.lower() for biased_term in biased_terms):\n", 195 | " print(\"Ethical Check Failed: Bias or discriminatory terms detected.\")\n", 196 | " return False\n", 197 | " else:\n", 198 | " print(\"Ethical Check Passed: Response is fair and non-discriminatory.\")\n", 199 | " return True\n", 200 | "\n", 201 | "\n", 202 | "# Example of a response generated by the LLM\n", 203 | "llm_response = \"This candidate is unfit for the role based on their experience.\"\n", 204 | "\n", 205 | "\n", 206 | "# Check if the response is ethically sound\n", 207 | "ethical_check_for_fairness(llm_response)" 208 | ], 209 | "metadata": { 210 | "colab": { 211 | "base_uri": "https://localhost:8080/" 212 | }, 213 | "id": "KAxRjQrDZpSG", 214 | "outputId": "055805ab-67d0-4dde-cc22-59ce8bcb2f22" 215 | }, 216 | "execution_count": 3, 217 | "outputs": [ 218 | { 219 | "output_type": "stream", 220 | "name": "stdout", 221 | "text": [ 222 | "Ethical Check Failed: Bias or discriminatory terms detected.\n" 223 | ] 224 | }, 225 | { 226 | "output_type": "execute_result", 227 | "data": { 228 | "text/plain": [ 229 | "False" 230 | ] 231 | }, 232 | "metadata": {}, 233 | "execution_count": 3 234 | } 235 | ] 236 | }, 237 | { 238 | "cell_type": "markdown", 239 | "source": [ 240 | "The code snippet checks the LLM response for biased terms, ensuring the response adheres to ethical guidelines promoting fairness. If the response contains discriminatory language, the check will fail." 241 | ], 242 | "metadata": { 243 | "id": "gwW1qQ71Z2-6" 244 | } 245 | }, 246 | { 247 | "cell_type": "code", 248 | "source": [], 249 | "metadata": { 250 | "id": "L0x4CQkxZ0C_" 251 | }, 252 | "execution_count": null, 253 | "outputs": [] 254 | } 255 | ] 256 | } -------------------------------------------------------------------------------- /ch9/auditing_reporting.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "nbformat": 4, 3 | "nbformat_minor": 0, 4 | "metadata": { 5 | "colab": { 6 | "provenance": [] 7 | }, 8 | "kernelspec": { 9 | "name": "python3", 10 | "display_name": "Python 3" 11 | }, 12 | "language_info": { 13 | "name": "python" 14 | } 15 | }, 16 | "cells": [ 17 | { 18 | "cell_type": "markdown", 19 | "source": [ 20 | "### Performance Audits\n", 21 | "\n", 22 | "In deploying large language models (LLMs) that utilize Retrieval-Augmented Generation (RAG) techniques, it’s essential to establish auditing mechanisms to ensure the consistency and accuracy of responses. The following function, audit_llm_response, serves as a basic auditing tool that compares the LLM’s output with trusted source data. By logging the success or failure of each audit check with a timestamp, this function provides a simple yet effective method to monitor data consistency, helping to detect discrepancies that could impact decision-making and model reliability." 23 | ], 24 | "metadata": { 25 | "id": "0Dv7IDbWaAu6" 26 | } 27 | }, 28 | { 29 | "cell_type": "markdown", 30 | "source": [ 31 | "##### Define the audit_llm_response Function" 32 | ], 33 | "metadata": { 34 | "id": "x--Z8bruaI72" 35 | } 36 | }, 37 | { 38 | "cell_type": "code", 39 | "execution_count": 2, 40 | "metadata": { 41 | "id": "seB579xoZ9OC" 42 | }, 43 | "outputs": [], 44 | "source": [ 45 | "import datetime\n", 46 | "\n", 47 | "\n", 48 | "def audit_llm_response(response, source_data):\n", 49 | " \"\"\"\n", 50 | " Compares LLM response with source data for audit purposes.\n", 51 | " \"\"\"\n", 52 | " current_time = datetime.datetime.now()\n", 53 | " # Audit check: Is the response data consistent with source data?\n", 54 | " if response == source_data:\n", 55 | " print(f\"Audit successful at {current_time}: Data is up-to-date and accurate.\")\n", 56 | " else:\n", 57 | " print(f\"Audit failed at {current_time}: Mismatch found in response data.\")" 58 | ] 59 | }, 60 | { 61 | "cell_type": "markdown", 62 | "source": [ 63 | "So let’s try the audit_llm_response function, so let’s Imagine we have an LLM that retrieves the latest stock prices using a RAG database, and we want to verify that the output matches the expected data from a trusted API or data source. Here’s how this function could be used in such a context:" 64 | ], 65 | "metadata": { 66 | "id": "Xnotwf-HaM2n" 67 | } 68 | }, 69 | { 70 | "cell_type": "code", 71 | "source": [ 72 | "# Sample source data from trusted RAG source\n", 73 | "source_data = \"Stock price of XYZ is $200\"\n", 74 | "\n", 75 | "\n", 76 | "# Sample LLM response\n", 77 | "response = \"Stock price of XYZ is $200\"\n", 78 | "\n", 79 | "\n", 80 | "wrong_response = \"Stock price of XYZ is $198\"\n", 81 | "\n", 82 | "\n", 83 | "# Run the audit function to verify consistency\n", 84 | "audit_llm_response(response, source_data)\n", 85 | "\n", 86 | "\n", 87 | "# Run the audit function to verify consistency\n", 88 | "audit_llm_response(response, wrong_response)" 89 | ], 90 | "metadata": { 91 | "colab": { 92 | "base_uri": "https://localhost:8080/" 93 | }, 94 | "id": "eW0foUreaO5p", 95 | "outputId": "f8c99989-73fc-40d1-d710-a761bb3e1d77" 96 | }, 97 | "execution_count": 3, 98 | "outputs": [ 99 | { 100 | "output_type": "stream", 101 | "name": "stdout", 102 | "text": [ 103 | "Audit successful at 2024-11-18 13:14:49.291193: Data is up-to-date and accurate.\n", 104 | "Audit failed at 2024-11-18 13:14:49.291835: Mismatch found in response data.\n" 105 | ] 106 | } 107 | ] 108 | }, 109 | { 110 | "cell_type": "markdown", 111 | "source": [ 112 | "### Compliance Auditing\n", 113 | "\n", 114 | "compliance with regulatory standards is crucial. The compliance_audit function logs each interaction between the model and its RAG data sources, creating a traceable record of source and response pairs. By logging these interactions in a dedicated compliance file, organizations can monitor data usage and verify adherence to legal and regulatory requirements, thus maintaining transparency and accountability in LLM deployments." 115 | ], 116 | "metadata": { 117 | "id": "hPTHpvzAaYi0" 118 | } 119 | }, 120 | { 121 | "cell_type": "code", 122 | "source": [ 123 | "import logging\n", 124 | "def compliance_audit(source, response):\n", 125 | " \"\"\"\n", 126 | " Logs each interaction with the RAG source for compliance tracking.\n", 127 | " \"\"\"\n", 128 | " print(f\"Source: {source}, Response: {response}, Compliance Check: Passed\")" 129 | ], 130 | "metadata": { 131 | "id": "Ci5R7aD0aRH1" 132 | }, 133 | "execution_count": 4, 134 | "outputs": [] 135 | }, 136 | { 137 | "cell_type": "markdown", 138 | "source": [ 139 | "### Let's try this the complaince_audit function" 140 | ], 141 | "metadata": { 142 | "id": "bNASV005asIv" 143 | } 144 | }, 145 | { 146 | "cell_type": "code", 147 | "source": [ 148 | "# Example 1: Log interaction with a knowledge database\n", 149 | "source_data_1 = \"https://knowledgebase.example.com/article/123\"\n", 150 | "response_1 = \"According to the knowledge database, AI techniques are advancing rapidly.\"\n", 151 | "compliance_audit(source_data_1, response_1)\n", 152 | "\n", 153 | "\n", 154 | "# Example 2: Log interaction with a financial report API\n", 155 | "source_data_2 = \"https://financialdata.example.com/reports/Q3_2024\"\n", 156 | "response_2 = \"The Q3 financial report shows a 12% increase in revenue.\"\n", 157 | "compliance_audit(source_data_2, response_2)\n", 158 | "\n", 159 | "\n", 160 | "# Example 3: Log interaction with a healthcare dataset\n", 161 | "source_data_3 = \"https://healthdata.example.com/patient/456\"\n", 162 | "response_3 = \"Patient 456 has a recorded history of hypertension.\"\n", 163 | "compliance_audit(source_data_3, response_3)\n" 164 | ], 165 | "metadata": { 166 | "colab": { 167 | "base_uri": "https://localhost:8080/" 168 | }, 169 | "id": "aF9HtOw0ao8F", 170 | "outputId": "d52f825b-1389-4a4d-eef3-c59d93c2e3a2" 171 | }, 172 | "execution_count": 5, 173 | "outputs": [ 174 | { 175 | "output_type": "stream", 176 | "name": "stdout", 177 | "text": [ 178 | "Source: https://knowledgebase.example.com/article/123, Response: According to the knowledge database, AI techniques are advancing rapidly., Compliance Check: Passed\n", 179 | "Source: https://financialdata.example.com/reports/Q3_2024, Response: The Q3 financial report shows a 12% increase in revenue., Compliance Check: Passed\n", 180 | "Source: https://healthdata.example.com/patient/456, Response: Patient 456 has a recorded history of hypertension., Compliance Check: Passed\n" 181 | ] 182 | } 183 | ] 184 | }, 185 | { 186 | "cell_type": "markdown", 187 | "source": [ 188 | "### Feedback Mechanisms\n", 189 | "\n", 190 | "gathering user feedback on model responses is essential for continuous improvement. Feedback mechanisms enable teams to refine model responses, enhance relevance, and reduce any misunderstandings that might arise in sensitive contexts. The following code snippet defines a simple feedback logging system, which stores feedback entries in a list for review and analysis. This structure allows developers to track user responses and identify patterns in the feedback, guiding targeted adjustments to the LLM and its response-generation strategies.\n" 191 | ], 192 | "metadata": { 193 | "id": "yC-eqjHSa1g7" 194 | } 195 | }, 196 | { 197 | "cell_type": "code", 198 | "source": [ 199 | "feedback_log = []\n", 200 | "\n", 201 | "\n", 202 | "def gather_feedback(response, feedback):\n", 203 | " \"\"\"\n", 204 | " Collects user feedback on LLM responses for continuous improvement.\n", 205 | " \"\"\"\n", 206 | " feedback_log.append({\"response\": response, \"feedback\": feedback})\n", 207 | " print(\"Feedback collected and stored.\")\n", 208 | "\n" 209 | ], 210 | "metadata": { 211 | "id": "AqIc-M_Tax_C" 212 | }, 213 | "execution_count": 6, 214 | "outputs": [] 215 | }, 216 | { 217 | "cell_type": "markdown", 218 | "source": [ 219 | "Each example call to gather_feedback adds an entry to the feedback_log list, which stores feedback on the given response for later analysis. This collected feedback provides insights for model refinement and enables the improvement of response quality based on user input. The feedback_log helps maintain a feedback loop, essential in systems using LLMs with RAG by identifying areas where responses need clarity, specificity, or additional information.\n" 220 | ], 221 | "metadata": { 222 | "id": "dwtDWFbQa-rK" 223 | } 224 | }, 225 | { 226 | "cell_type": "code", 227 | "source": [ 228 | "# Example 1: Collect feedback on a response related to general AI information\n", 229 | "response_1 = \"AI technologies are evolving and improving across industries.\"\n", 230 | "feedback_1 = \"This response is accurate but could include examples of specific industries.\"\n", 231 | "gather_feedback(response_1, feedback_1)\n", 232 | "\n", 233 | "\n", 234 | "# Example 2: Collect feedback on a response from a medical knowledge base\n", 235 | "response_2 = \"Patient treatment protocols are standardized to improve outcomes.\"\n", 236 | "feedback_2 = \"Please clarify which protocols are being referenced.\"\n", 237 | "gather_feedback(response_2, feedback_2)\n", 238 | "\n", 239 | "\n", 240 | "# Example 3: Collect feedback on a response regarding financial trends\n", 241 | "response_3 = \"The stock market saw significant growth in Q3.\"\n", 242 | "feedback_3 = \"Response is too general. Needs more specific data points.\"\n", 243 | "gather_feedback(response_3, feedback_3)\n" 244 | ], 245 | "metadata": { 246 | "colab": { 247 | "base_uri": "https://localhost:8080/" 248 | }, 249 | "id": "ncGuQAq0a-T0", 250 | "outputId": "816fca30-4c3d-4853-e5f9-c0c08521a826" 251 | }, 252 | "execution_count": 7, 253 | "outputs": [ 254 | { 255 | "output_type": "stream", 256 | "name": "stdout", 257 | "text": [ 258 | "Feedback collected and stored.\n", 259 | "Feedback collected and stored.\n", 260 | "Feedback collected and stored.\n" 261 | ] 262 | } 263 | ] 264 | }, 265 | { 266 | "cell_type": "code", 267 | "source": [], 268 | "metadata": { 269 | "id": "c6NAa5XSbBdC" 270 | }, 271 | "execution_count": null, 272 | "outputs": [] 273 | } 274 | ] 275 | } -------------------------------------------------------------------------------- /ch3/introduction_to_llm.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "markdown", 5 | "id": "5bd02725-6d98-4c2f-8a3a-e5ac7da3bedb", 6 | "metadata": {}, 7 | "source": [ 8 | "# A brief introduction to the OpenAI API\n", 9 | "\n", 10 | "Before starting to create the chatbot, I think it is interesting to explain a couple of points:\n", 11 | "\n", 12 | "- The roles within a conversation with OpenAI.\n", 13 | "- How is the conversations’ memory preserved?\n", 14 | "\n", 15 | "If you prefer to start creating the chatbot, just move to the section: Creating the Chatbot with OpenAI and GPT.\n", 16 | "\n", 17 | "## The roles in OpenAI messages\n", 18 | "\n", 19 | "One of the lesser-known features of language models such as GPT 3.5 is that the conversation occurs between several roles. We can identify the user and the assistant, but there is a third role called system, which allows us to better configure how the model should behave.\n", 20 | "\n", 21 | "When we use tools like ChatGPT, we always assume the role of the user, but the API lets us choose which Role we want to send to the model, for each sentence.\n", 22 | "\n", 23 | "To send text containing our part of the dialog to the model, we must use the `ChatCompletion.create` function, indicating, at least, the model to use and a list of messages.\n", 24 | "\n", 25 | "Each message in the list contains a role and the text we want to send to the model.\n", 26 | "\n", 27 | "Here is an example of the list of messages that can be sent using the three available roles.\n" 28 | ] 29 | }, 30 | { 31 | "cell_type": "code", 32 | "execution_count": 5, 33 | "id": "72d71c22-42d9-42aa-b8bb-7fc55e2246c7", 34 | "metadata": {}, 35 | "outputs": [], 36 | "source": [ 37 | " messages=[\n", 38 | " {\"role\": \"system\", \"content\": \"You are an OrderBot in a fastfood restaurant.\"},\n", 39 | " {\"role\": \"user\", \"content\": \"I have only 10 dollars, what can I order?\"},\n", 40 | " {\"role\": \"assistant\", \"content\": \"We have the fast menu for 7 dollars.\"},\n", 41 | " {\"role\": \"user\", \"content\": \"Perfect! Give me one! \"}\n", 42 | " ]" 43 | ] 44 | }, 45 | { 46 | "cell_type": "markdown", 47 | "id": "55a18d2a-039d-42fe-aac0-d309f4f4849d", 48 | "metadata": {}, 49 | "source": [ 50 | "Let’s take a closer look at the three existing roles:\n", 51 | "\n", 52 | "- **System:** We can tell the model how we want it to behave and tell it how its personality and type of response should be. Somehow, it allows us to configure the basic operation of the model. OpenAI says that this role will become more important in the next models, even though now its importance is relatively small in GPT 3.5.\n", 53 | "- **User:** These are the phrases that come from the user.\n", 54 | "- **Assistant:** These are the responses returned by the model. With the API, we can send responses that say they came from the model, even if they came from somewhere else.\n" 55 | ] 56 | }, 57 | { 58 | "cell_type": "markdown", 59 | "id": "46178bd6-fa47-4a70-bdb4-668ee6ebfac3", 60 | "metadata": {}, 61 | "source": [ 62 | "# Vertical Chat\n", 63 | "A sample how to build a chat for small businees using:\n", 64 | "\n", 65 | "* GPT 35\n", 66 | "* Panel\n", 67 | "* OpenAI\n", 68 | "\n", 69 | "\n", 70 | "This is just a simple sample to start to understand how the OpenAI API works, and how to create Prompts. It Is really far from beign a complete solution.\n", 71 | "We are going to introduce some interesting points:\n", 72 | "\n", 73 | "* The roles in a conversation.\n", 74 | "* How is the conversations’ memory preserved?" 75 | ] 76 | }, 77 | { 78 | "cell_type": "markdown", 79 | "id": "92c71f2f-b937-473b-b355-b290f5d5558d", 80 | "metadata": {}, 81 | "source": [ 82 | "In this notebook, we'll explore small prompt engineering techniques and recommendations that will help us elicit responses from the models that are better suited to our needs.\n" 83 | ] 84 | }, 85 | { 86 | "cell_type": "markdown", 87 | "id": "10928b67-82c9-4032-9217-290e3f8a96bb", 88 | "metadata": {}, 89 | "source": [ 90 | "# Creating the Chatbot with OpenAI and GPT\n", 91 | "\n", 92 | "The first thing we have to consider is that we are going to need an OpenAI payment account to use their service and that we will have to report a valid credit card. But let’s not worry, I’ve been using it a lot for development and testing, and I can assure you that the cost is negligible.\n", 93 | "\n", 94 | "Doing all the tests for this article, I think they cost me €0.07. We could only be surprised if we upload something to production that becomes a HIT. Even so, we can establish the monthly consumption limit that we want.\n", 95 | "\n", 96 | "The first thing, as always, is to know if we have the necessary libraries installed. In case we work on Google Colab, I think we only have to install two, OpenAI and panel.\n" 97 | ] 98 | }, 99 | { 100 | "cell_type": "code", 101 | "execution_count": 2, 102 | "id": "7b619062-96a7-48f0-a4ad-7da84308f70a", 103 | "metadata": {}, 104 | "outputs": [], 105 | "source": [ 106 | "!pip install openai==1.1.1\n", 107 | "!pip install panel" 108 | ] 109 | }, 110 | { 111 | "cell_type": "code", 112 | "execution_count": 2, 113 | "id": "39e3e313-1dd7-4527-8fb3-06714bda65de", 114 | "metadata": {}, 115 | "outputs": [], 116 | "source": [ 117 | "import openai\n", 118 | "\n", 119 | "openai.api_key=\"your-api-key\"" 120 | ] 121 | }, 122 | { 123 | "cell_type": "markdown", 124 | "id": "e1035a4d-44f2-4ced-a88a-7f673aed9826", 125 | "metadata": {}, 126 | "source": [ 127 | "## Formatting the answer with Few Shot Samples.\n", 128 | "To obtain the model's response in a specific format, we have various options, but one of the most convenient is to use Few-Shot Samples. This involves presenting the model with pairs of user queries and example responses.\n", 129 | "\n", 130 | "Large models like GPT-3.5 respond well to the examples provided, adapting their response to the specified format.\n", 131 | "\n", 132 | "Depending on the number of examples given, this technique can be referred to as:\n", 133 | "\n", 134 | "Zero-Shot.\n", 135 | "One-Shot.\n", 136 | "Few-Shots.\n", 137 | "With One Shot should be enough, and it is recommended to use a maximum of six shots. It's important to remember that this information is passed in each query and occupies space in the input prompt.\n", 138 | "\n" 139 | ] 140 | }, 141 | { 142 | "cell_type": "markdown", 143 | "id": "92717756-3e20-401e-b497-065056a86a30", 144 | "metadata": {}, 145 | "source": [ 146 | "My key is stored in a file where I keep the keys. But if you like, you can inform it directly in the notebook, or save the key in a file, with a .py extension.\n", 147 | "\n", 148 | "In any case, make sure that nobody can ever know the value of the Key; otherwise, they could make calls to the OpenAI API that you would end up paying for.\n", 149 | "\n", 150 | "Now we are going to define two functions, which will be the ones that will contain the logic of maintaining the memory of the conversation.\n" 151 | ] 152 | }, 153 | { 154 | "cell_type": "code", 155 | "execution_count": 6, 156 | "id": "925a746e-e8de-4a44-a9bd-bfe7b4d73951", 157 | "metadata": {}, 158 | "outputs": [], 159 | "source": [ 160 | "def continue_conversation(messages, temperature=0):\n", 161 | " response = openai.chat.completions.create(\n", 162 | " model=\"gpt-3.5-turbo\",\n", 163 | " messages=messages,\n", 164 | " temperature=temperature,\n", 165 | " )\n", 166 | " #print(str(response.choices[0].message[\"content\"]))\n", 167 | " return response.choices[0].message.content" 168 | ] 169 | }, 170 | { 171 | "cell_type": "markdown", 172 | "id": "0b398809-62a2-441e-bed5-9d8f1c2310b4", 173 | "metadata": {}, 174 | "source": [ 175 | "This function is very simple, it just makes a call to the OpenAI API that allows you to have a conversation.\n", 176 | "\n", 177 | "Inside the function, we are calling OpenAI with the following parameters:\n", 178 | "\n", 179 | "- `model`: the model we want to use.\n", 180 | "- `messages`: the messages part of the conversation.\n", 181 | "- `temperature`: It is a numerical value between 0 and 1, which indicates how imaginative the model can be when generating the response. The smaller the value, the less original the model’s response will be.\n", 182 | "\n", 183 | "As you know, a language generation model does not always give the same answers to the same inputs. The lower the value of temperature, the more similar the result will be for the same inputs, even repeating itself in many cases.\n" 184 | ] 185 | }, 186 | { 187 | "cell_type": "code", 188 | "execution_count": null, 189 | "id": "1c7a441b-103a-44d5-bce6-56271f9afb28", 190 | "metadata": {}, 191 | "outputs": [], 192 | "source": [ 193 | "def add_prompts_conversation(_):\n", 194 | " #Get the value introduced by the user\n", 195 | " prompt = client_prompt.value_input\n", 196 | " client_prompt.value = ''\n", 197 | "\n", 198 | " #Append to the context the User prompt.\n", 199 | " context.append({'role':'user', 'content':f\"{prompt}\"})\n", 200 | "\n", 201 | " #Get the response.\n", 202 | " response = continue_conversation(context)\n", 203 | "\n", 204 | " #Add the response to the context.\n", 205 | " context.append({'role':'assistant', 'content':f\"{response}\"})\n", 206 | "\n", 207 | " #Update the panels to show the conversation.\n", 208 | " panels.append(\n", 209 | " pn.Row('User:', pn.pane.Markdown(prompt, width=600)))\n", 210 | " panels.append(\n", 211 | " pn.Row('Assistant:', pn.pane.Markdown(response, width=600)))\n", 212 | "\n", 213 | " return pn.Column(*panels)" 214 | ] 215 | }, 216 | { 217 | "cell_type": "markdown", 218 | "id": "99829f22-c099-4f93-9972-934dcf0328a3", 219 | "metadata": {}, 220 | "source": [ 221 | "This function is responsible for collecting user input, incorporating it into the context or conversation, calling the model, and incorporating its response into the conversation. That is, it is responsible for managing the memory! It is as simple as adding phrases with the correct format to a list, where each sentence is formed by the role and the phrase.\n", 222 | "\n", 223 | "Now is the time for the prompt!\n", 224 | "\n", 225 | "This is an LLM model. We are not going to program, we are going to try to make it behave as we want by giving it some instructions. At the same time, we must also provide it with enough information so that it can do its job properly informed.\n" 226 | ] 227 | }, 228 | { 229 | "cell_type": "code", 230 | "execution_count": 3, 231 | "id": "2dd9063d-80cf-41c0-b357-39610f015aad", 232 | "metadata": {}, 233 | "outputs": [], 234 | "source": [ 235 | "context = [ {'role':'system', 'content':\"\"\"\n", 236 | "Act as an OrderBot, you work collecting orders in a delivery only fast food restaurant called\n", 237 | "My Dear Frankfurt. \\\n", 238 | "First welcome the customer, in a very friendly way, then collects the order. \\\n", 239 | "You wait to collect the entire order, beverages included \\\n", 240 | "then summarize it and check for a final \\\n", 241 | "time if everything is ok or the customer wants to add anything else. \\\n", 242 | "Finally you collect the payment.\\\n", 243 | "Make sure to clarify all options, extras and sizes to uniquely \\\n", 244 | "identify the item from the menu.\\\n", 245 | "You respond in a short, very friendly style. \\\n", 246 | "The menu includes \\\n", 247 | "burger 12.95, 10.00, 7.00 \\\n", 248 | "frankfurt 10.95, 9.25, 6.50 \\\n", 249 | "sandwich 11.95, 9.75, 6.75 \\\n", 250 | "fries 4.50, 3.50 \\\n", 251 | "salad 7.25 \\\n", 252 | "Toppings: \\\n", 253 | "extra cheese 2.00, \\\n", 254 | "mushrooms 1.50 \\\n", 255 | "martra sausage 3.00 \\\n", 256 | "canadian bacon 3.50 \\\n", 257 | "romesco sauce 1.50 \\\n", 258 | "peppers 1.00 \\\n", 259 | "Drinks: \\\n", 260 | "coke 3.00, 2.00, 1.00 \\\n", 261 | "sprite 3.00, 2.00, 1.00 \\\n", 262 | "vichy catalan 5.00 \\\n", 263 | "\"\"\"} ]\n", 264 | "\n", 265 | "#Creating the panel.\n", 266 | "pn.extension()\n", 267 | "\n", 268 | "panels = []\n", 269 | "\n", 270 | "client_prompt = pn.widgets.TextInput(value=\"Hi\", placeholder='Enter text here…')\n", 271 | "button_conversation = pn.widgets.Button(name=\"talk\")\n", 272 | "\n", 273 | "interactive_conversation = pn.bind(add_prompts_conversation, button_conversation)\n", 274 | "\n", 275 | "dashboard = pn.Column(\n", 276 | " client_prompt,\n", 277 | " pn.Row(button_conversation),\n", 278 | " pn.panel(interactive_conversation, loading_indicator=True),\n", 279 | ")\n", 280 | "\n", 281 | "dashboard" 282 | ] 283 | }, 284 | { 285 | "cell_type": "code", 286 | "execution_count": null, 287 | "id": "749aaf1a-5b20-488d-a64d-459253b834d6", 288 | "metadata": {}, 289 | "outputs": [], 290 | "source": [] 291 | } 292 | ], 293 | "metadata": { 294 | "kernelspec": { 295 | "display_name": "Python 3 (ipykernel)", 296 | "language": "python", 297 | "name": "python3" 298 | }, 299 | "language_info": { 300 | "codemirror_mode": { 301 | "name": "ipython", 302 | "version": 3 303 | }, 304 | "file_extension": ".py", 305 | "mimetype": "text/x-python", 306 | "name": "python", 307 | "nbconvert_exporter": "python", 308 | "pygments_lexer": "ipython3", 309 | "version": "3.10.12" 310 | } 311 | }, 312 | "nbformat": 4, 313 | "nbformat_minor": 5 314 | } 315 | -------------------------------------------------------------------------------- /ch5/Implementing_Hybrid_Search.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "nbformat": 4, 3 | "nbformat_minor": 0, 4 | "metadata": { 5 | "colab": { 6 | "provenance": [], 7 | "gpuType": "T4" 8 | }, 9 | "kernelspec": { 10 | "name": "python3", 11 | "display_name": "Python 3" 12 | }, 13 | "language_info": { 14 | "name": "python" 15 | }, 16 | "accelerator": "GPU" 17 | }, 18 | "cells": [ 19 | { 20 | "cell_type": "markdown", 21 | "source": [ 22 | "## Install Required Packages\n", 23 | "First, install the necessary packages. OpenAI's Python client library and any specific embedding-related library (like langchain) should be installed." 24 | ], 25 | "metadata": { 26 | "id": "T0zgnwxTQ137" 27 | } 28 | }, 29 | { 30 | "cell_type": "code", 31 | "source": [ 32 | "# ! pip install langchain lancedb openai\n", 33 | "# ! pip install langchain-community\n", 34 | "# !pip install requests pypdf\n", 35 | "# ! pip install PyPDF2\n", 36 | "# ! pip install rank_bm25\n", 37 | "# ! pip install tiktoken" 38 | ], 39 | "metadata": { 40 | "id": "0_VZikdncSua" 41 | }, 42 | "execution_count": 24, 43 | "outputs": [] 44 | }, 45 | { 46 | "cell_type": "markdown", 47 | "source": [ 48 | " ## Set Up Your API Key\n", 49 | "In Google Colab, you can set your API key by directly assigning it in the notebook or using environment variables. For security, it's best practice to avoid hardcoding sensitive information in your code\n", 50 | "Set the API Key Using Environment Variables in Cola\n" 51 | ], 52 | "metadata": { 53 | "id": "ZTrniR7xQ7AS" 54 | } 55 | }, 56 | { 57 | "cell_type": "code", 58 | "source": [ 59 | "import openai\n", 60 | "from langchain.embeddings import OpenAIEmbeddings\n", 61 | "\n", 62 | "# Directly set your API key here\n", 63 | "openai_api_key = 'your_key'" 64 | ], 65 | "metadata": { 66 | "id": "xH1MyrT1QgxK" 67 | }, 68 | "execution_count": 7, 69 | "outputs": [] 70 | }, 71 | { 72 | "cell_type": "markdown", 73 | "source": [ 74 | "## Access the Environment Variable in Your Code:\n" 75 | ], 76 | "metadata": { 77 | "id": "YXyRES0dRJBM" 78 | } 79 | }, 80 | { 81 | "cell_type": "code", 82 | "source": [ 83 | "from langchain.vectorstores import LanceDB\n", 84 | "import lancedb\n", 85 | "from langchain.retrievers import BM25Retriever, EnsembleRetriever\n", 86 | "from langchain.schema import Document\n", 87 | "from langchain.embeddings.openai import OpenAIEmbeddings\n", 88 | "from langchain.document_loaders import PyPDFLoader\n", 89 | "\n", 90 | "\n", 91 | "# Initialize embeddings for semantic search\n", 92 | "embedding = OpenAIEmbeddings(openai_api_key=openai_api_key)\n" 93 | ], 94 | "metadata": { 95 | "id": "8sZmXcm4fQYa" 96 | }, 97 | "execution_count": 8, 98 | "outputs": [] 99 | }, 100 | { 101 | "cell_type": "markdown", 102 | "source": [ 103 | "## Download the PDF\n", 104 | "Before we start let's download required pdfs." 105 | ], 106 | "metadata": { 107 | "id": "buwyoMhlSgRs" 108 | } 109 | }, 110 | { 111 | "cell_type": "code", 112 | "source": [ 113 | "import requests\n", 114 | "import time\n", 115 | "\n", 116 | "def download_pdf(url, save_path, retries=3):\n", 117 | " attempt = 0\n", 118 | " while attempt < retries:\n", 119 | " try:\n", 120 | " response = requests.get(url, stream=True)\n", 121 | " response.raise_for_status() # Check if the download was successful\n", 122 | " with open(save_path, 'wb') as file:\n", 123 | " for chunk in response.iter_content(chunk_size=8192):\n", 124 | " file.write(chunk)\n", 125 | " print(f\"Downloaded PDF from {url} to {save_path}\")\n", 126 | " return True\n", 127 | " except requests.exceptions.RequestException as e:\n", 128 | " attempt += 1\n", 129 | " print(f\"Error downloading PDF (attempt {attempt} of {retries}): {e}\")\n", 130 | " if attempt < retries:\n", 131 | " time.sleep(5) # Wait before retrying\n", 132 | " return False\n", 133 | "\n", 134 | "# Example URL and file path\n", 135 | "pdf_url = \"https://pdf.usaid.gov/pdf_docs/PA00TBCT.pdf\"\n", 136 | "pdf_path = \"/content/Food_and_Nutrition.pdf\"\n", 137 | "\n", 138 | "# Download the PDF\n", 139 | "if not download_pdf(pdf_url, pdf_path):\n", 140 | " raise Exception(\"Failed to download PDF after multiple attempts\")\n" 141 | ], 142 | "metadata": { 143 | "colab": { 144 | "base_uri": "https://localhost:8080/" 145 | }, 146 | "id": "j3Fwp59eQb4U", 147 | "outputId": "13fad0d5-8302-44e4-dd27-73c19a053a59" 148 | }, 149 | "execution_count": 9, 150 | "outputs": [ 151 | { 152 | "output_type": "stream", 153 | "name": "stdout", 154 | "text": [ 155 | "Downloaded PDF from https://pdf.usaid.gov/pdf_docs/PA00TBCT.pdf to /content/Food_and_Nutrition.pdf\n" 156 | ] 157 | } 158 | ] 159 | }, 160 | { 161 | "cell_type": "markdown", 162 | "source": [ 163 | "## Load and Split the PDF\n", 164 | "Use PyPDFLoader to load and split the PDF into pages." 165 | ], 166 | "metadata": { 167 | "id": "sKqgpHNKSzIB" 168 | } 169 | }, 170 | { 171 | "cell_type": "code", 172 | "source": [ 173 | "from langchain.document_loaders import PyPDFLoader\n", 174 | "\n", 175 | "# Load documents\n", 176 | "loader = PyPDFLoader(\"Food_and_Nutrition.pdf\")\n", 177 | "pages = loader.load_and_split()\n" 178 | ], 179 | "metadata": { 180 | "id": "AAGetOkJSx2Y" 181 | }, 182 | "execution_count": 10, 183 | "outputs": [] 184 | }, 185 | { 186 | "cell_type": "markdown", 187 | "source": [ 188 | "## Initialize the BM25 Retriever\n", 189 | "Set up the BM25 retriever to fetch top results." 190 | ], 191 | "metadata": { 192 | "id": "Lx-OBvmEbFEH" 193 | } 194 | }, 195 | { 196 | "cell_type": "code", 197 | "source": [ 198 | "from langchain.retrievers import BM25Retriever\n", 199 | "\n", 200 | "# Initialize the BM25 retriever\n", 201 | "bm25_retriever = BM25Retriever.from_documents(pages)\n", 202 | "bm25_retriever.k = 2 # Retrieve top 2 results using BM25\n" 203 | ], 204 | "metadata": { 205 | "id": "ALbSOdbvS32V" 206 | }, 207 | "execution_count": 11, 208 | "outputs": [] 209 | }, 210 | { 211 | "cell_type": "markdown", 212 | "source": [ 213 | "## Create LanceDB Vector Store for Semantic Search\n", 214 | "Connect to LanceDB and create a table for storing embeddings." 215 | ], 216 | "metadata": { 217 | "id": "yJsw2kBTbVys" 218 | } 219 | }, 220 | { 221 | "cell_type": "code", 222 | "source": [ 223 | "import lancedb\n", 224 | "\n", 225 | "# Create lancedb vector store for semantic search\n", 226 | "db = lancedb.connect('lancedb')\n", 227 | "table = db.create_table(\"pandas_docs\", data=[\n", 228 | " {\"vector\": embedding.embed_query(\"Hello World\"), \"text\": \"Hello World\", \"id\": \"1\"}\n", 229 | "], mode=\"overwrite\")\n" 230 | ], 231 | "metadata": { 232 | "id": "SaaWRWr7bL7U" 233 | }, 234 | "execution_count": 14, 235 | "outputs": [] 236 | }, 237 | { 238 | "cell_type": "markdown", 239 | "source": [ 240 | "## Initialize LanceDB Retriever\n", 241 | "Set up the LanceDB retriever for semantic search." 242 | ], 243 | "metadata": { 244 | "id": "CAfUn3Qhc6Az" 245 | } 246 | }, 247 | { 248 | "cell_type": "code", 249 | "source": [ 250 | "from langchain.vectorstores import LanceDB\n", 251 | "from lancedb.db import LanceDBConnection\n", 252 | "from langchain.embeddings.openai import OpenAIEmbeddings\n", 253 | "from langchain.document_loaders import PyPDFLoader\n", 254 | "\n", 255 | "# Initialize embeddings for semantic search\n", 256 | "\n", 257 | "# Establish connection to the LanceDB database\n", 258 | "# Replace 'your_database_path' with the actual path to your LanceDB database\n", 259 | "connection = LanceDBConnection('lancedb')\n", 260 | "\n", 261 | "# Assume `pages` is a list of Document objects loaded previously\n", 262 | "# Initialize LanceDB retriever\n", 263 | "docsearch = LanceDB.from_documents(pages, embedding, connection=connection)\n", 264 | "\n", 265 | "# Create a retriever using the LanceDB vector store\n", 266 | "retriever_lancedb = docsearch.as_retriever(search_kwargs={\"k\": 2})\n" 267 | ], 268 | "metadata": { 269 | "id": "--QcN4X-bsex" 270 | }, 271 | "execution_count": 20, 272 | "outputs": [] 273 | }, 274 | { 275 | "cell_type": "markdown", 276 | "source": [ 277 | "## Initialize the Ensemble Retriever\n", 278 | "Combine the BM25 and LanceDB retrievers with specified weights." 279 | ], 280 | "metadata": { 281 | "id": "O_fOvcarc-XZ" 282 | } 283 | }, 284 | { 285 | "cell_type": "code", 286 | "source": [ 287 | "from langchain.retrievers import EnsembleRetriever\n", 288 | "\n", 289 | "# Initialize the ensemble retriever with weights\n", 290 | "ensemble_retriever = EnsembleRetriever(retrievers=[bm25_retriever, retriever_lancedb], weights=[0.4, 0.6])\n" 291 | ], 292 | "metadata": { 293 | "id": "pRM8w--Zc-4Q" 294 | }, 295 | "execution_count": 21, 296 | "outputs": [] 297 | }, 298 | { 299 | "cell_type": "markdown", 300 | "source": [ 301 | "## Retrieve Relevant Documents\n", 302 | "Perform a query and retrieve relevant documents using the ensemble retriever." 303 | ], 304 | "metadata": { 305 | "id": "qxRL7lIZdGug" 306 | } 307 | }, 308 | { 309 | "cell_type": "code", 310 | "source": [ 311 | "query = \"Lorem ipsum dolor sit amet\"\n", 312 | "\n", 313 | "# Retrieve relevant documents\n", 314 | "docs = ensemble_retriever.get_relevant_documents(query)\n", 315 | "\n", 316 | "# Print retrieved documents\n", 317 | "for doc in docs:\n", 318 | " print(doc.page_content)" 319 | ], 320 | "metadata": { 321 | "id": "Lrw-RWdIdHFQ", 322 | "colab": { 323 | "base_uri": "https://localhost:8080/" 324 | }, 325 | "outputId": "b7cd54b6-3bf9-4419-fb5a-0ba0c26dda43" 326 | }, 327 | "execution_count": 23, 328 | "outputs": [ 329 | { 330 | "output_type": "stream", 331 | "name": "stdout", 332 | "text": [ 333 | "MINISTRY OF AGRICULTURE,\n", 334 | "ANIMAL INDUSTRY AND FISHERIES\n", 335 | "P.O. Box 102 ENTEBBE - UGANDA\n", 336 | "www.agriculture.go.ug\n", 337 | "Food and Nutrition Handbook for Extension Workers72\n", 338 | "Picture 18: A back yard garden and small animals and local chickens\n", 339 | "Food and Nutrition Handbook for Extension Workers25• Shortage of iodine decreases IQ and causes a productivity loss.\n", 340 | "• Farmers with low literacy levels are less likely to adopt improved \n", 341 | "agricultural practices hence leading to poor agricultural production \n", 342 | "and productivity.\n", 343 | "• People with low literacy levels are bound to have poor health seek -\n", 344 | "ing behaviours and access to quality health services.\n", 345 | "• Mothers with low education level are likely to follow poor feeding \n", 346 | "practices hence affecting the nutritional and health status of family \n", 347 | "members.\n", 348 | "• Contributes to poverty.\n", 349 | "• Cost of treating illnesses attributable to malnutrition.\n", 350 | "• Cost of caring for sick.\n", 351 | "• Lost care for other (not sick) household members.\n", 352 | "b)\tConsequences \tof\tovernutrition\n", 353 | "Malnutrition can lead to multiple medical conditions including:\n", 354 | "• Coronary heart disease (heart attack) \n", 355 | "• Diabetes (high blood sugar)\n", 356 | "• Gout (swollen painful joints)\n", 357 | "• Hypertension (high blood pressure) \n", 358 | "• Overweight\n", 359 | "• Obesity \n", 360 | "• Death\n", 361 | "Malnutrition increases the risk of death and illnesses\n", 362 | "Malnutrition weakens immunity and predisposes individuals to different \n", 363 | "infections.\n", 364 | "• More than half of infant deaths are associated with malnutrition.\n", 365 | "• Marasmus and kwashiorkor and finally death are caused by severe \n", 366 | "malnutrition.\n", 367 | "• Goitre due to iodine deficiency.\n", 368 | "• Night blindness to complete blindness from vitamin A deficiency.\n", 369 | "• Anaemia from iron deficiency.\n" 370 | ] 371 | } 372 | ] 373 | }, 374 | { 375 | "cell_type": "markdown", 376 | "source": [], 377 | "metadata": { 378 | "id": "uIveAFyNdVwE" 379 | } 380 | } 381 | ] 382 | } -------------------------------------------------------------------------------- /ch4/domain_adaptation.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "cells": [ 3 | { 4 | "cell_type": "code", 5 | "execution_count": 4, 6 | "id": "8a321b6f-57f9-4b19-9293-c81a06479204", 7 | "metadata": {}, 8 | "outputs": [ 9 | { 10 | "name": "stderr", 11 | "output_type": "stream", 12 | "text": [ 13 | "huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...\n", 14 | "To disable this warning, you can either:\n", 15 | "\t- Avoid using `tokenizers` before the fork if possible\n", 16 | "\t- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)\n" 17 | ] 18 | }, 19 | { 20 | "name": "stdout", 21 | "output_type": "stream", 22 | "text": [ 23 | "Requirement already satisfied: sentence-transformers in /Users/amenshawy/Documents/teaching/arabsera/AIEng-Labs/mlflow_env/lib/python3.13/site-packages (3.3.1)\n", 24 | "Requirement already satisfied: datasets in /Users/amenshawy/Documents/teaching/arabsera/AIEng-Labs/mlflow_env/lib/python3.13/site-packages (3.2.0)\n", 25 | "Requirement already satisfied: transformers<5.0.0,>=4.41.0 in /Users/amenshawy/Documents/teaching/arabsera/AIEng-Labs/mlflow_env/lib/python3.13/site-packages (from sentence-transformers) (4.47.0)\n", 26 | "Requirement already satisfied: tqdm in /Users/amenshawy/Documents/teaching/arabsera/AIEng-Labs/mlflow_env/lib/python3.13/site-packages (from sentence-transformers) (4.67.1)\n", 27 | "Requirement already satisfied: torch>=1.11.0 in /Users/amenshawy/Documents/teaching/arabsera/AIEng-Labs/mlflow_env/lib/python3.13/site-packages (from sentence-transformers) (2.6.0.dev20241213)\n", 28 | "Requirement already satisfied: scikit-learn in /Users/amenshawy/Documents/teaching/arabsera/AIEng-Labs/mlflow_env/lib/python3.13/site-packages (from sentence-transformers) (1.5.2)\n", 29 | "Requirement already satisfied: scipy in /Users/amenshawy/Documents/teaching/arabsera/AIEng-Labs/mlflow_env/lib/python3.13/site-packages (from sentence-transformers) (1.14.1)\n", 30 | "Requirement already satisfied: huggingface-hub>=0.20.0 in /Users/amenshawy/Documents/teaching/arabsera/AIEng-Labs/mlflow_env/lib/python3.13/site-packages (from sentence-transformers) (0.26.5)\n", 31 | "Requirement already satisfied: Pillow in /Users/amenshawy/Documents/teaching/arabsera/AIEng-Labs/mlflow_env/lib/python3.13/site-packages (from sentence-transformers) (11.0.0)\n", 32 | "Requirement already satisfied: filelock in /Users/amenshawy/Documents/teaching/arabsera/AIEng-Labs/mlflow_env/lib/python3.13/site-packages (from datasets) (3.16.1)\n", 33 | "Requirement already satisfied: numpy>=1.17 in /Users/amenshawy/Documents/teaching/arabsera/AIEng-Labs/mlflow_env/lib/python3.13/site-packages (from datasets) (2.1.3)\n", 34 | "Requirement already satisfied: pyarrow>=15.0.0 in /Users/amenshawy/Documents/teaching/arabsera/AIEng-Labs/mlflow_env/lib/python3.13/site-packages (from datasets) (18.0.0)\n", 35 | "Requirement already satisfied: dill<0.3.9,>=0.3.0 in /Users/amenshawy/Documents/teaching/arabsera/AIEng-Labs/mlflow_env/lib/python3.13/site-packages (from datasets) (0.3.8)\n", 36 | "Requirement already satisfied: pandas in /Users/amenshawy/Documents/teaching/arabsera/AIEng-Labs/mlflow_env/lib/python3.13/site-packages (from datasets) (2.2.3)\n", 37 | "Requirement already satisfied: requests>=2.32.2 in /Users/amenshawy/Documents/teaching/arabsera/AIEng-Labs/mlflow_env/lib/python3.13/site-packages (from datasets) (2.32.3)\n", 38 | "Requirement already satisfied: xxhash in /Users/amenshawy/Documents/teaching/arabsera/AIEng-Labs/mlflow_env/lib/python3.13/site-packages (from datasets) (3.5.0)\n", 39 | "Requirement already satisfied: multiprocess<0.70.17 in /Users/amenshawy/Documents/teaching/arabsera/AIEng-Labs/mlflow_env/lib/python3.13/site-packages (from datasets) (0.70.16)\n", 40 | "Requirement already satisfied: fsspec<=2024.9.0,>=2023.1.0 in /Users/amenshawy/Documents/teaching/arabsera/AIEng-Labs/mlflow_env/lib/python3.13/site-packages (from fsspec[http]<=2024.9.0,>=2023.1.0->datasets) (2024.9.0)\n", 41 | "Requirement already satisfied: aiohttp in /Users/amenshawy/Documents/teaching/arabsera/AIEng-Labs/mlflow_env/lib/python3.13/site-packages (from datasets) (3.11.10)\n", 42 | "Requirement already satisfied: packaging in /Users/amenshawy/Documents/teaching/arabsera/AIEng-Labs/mlflow_env/lib/python3.13/site-packages (from datasets) (24.2)\n", 43 | "Requirement already satisfied: pyyaml>=5.1 in /Users/amenshawy/Documents/teaching/arabsera/AIEng-Labs/mlflow_env/lib/python3.13/site-packages (from datasets) (6.0.2)\n", 44 | "Requirement already satisfied: aiohappyeyeballs>=2.3.0 in /Users/amenshawy/Documents/teaching/arabsera/AIEng-Labs/mlflow_env/lib/python3.13/site-packages (from aiohttp->datasets) (2.4.4)\n", 45 | "Requirement already satisfied: aiosignal>=1.1.2 in /Users/amenshawy/Documents/teaching/arabsera/AIEng-Labs/mlflow_env/lib/python3.13/site-packages (from aiohttp->datasets) (1.3.2)\n", 46 | "Requirement already satisfied: attrs>=17.3.0 in /Users/amenshawy/Documents/teaching/arabsera/AIEng-Labs/mlflow_env/lib/python3.13/site-packages (from aiohttp->datasets) (24.2.0)\n", 47 | "Requirement already satisfied: frozenlist>=1.1.1 in /Users/amenshawy/Documents/teaching/arabsera/AIEng-Labs/mlflow_env/lib/python3.13/site-packages (from aiohttp->datasets) (1.5.0)\n", 48 | "Requirement already satisfied: multidict<7.0,>=4.5 in /Users/amenshawy/Documents/teaching/arabsera/AIEng-Labs/mlflow_env/lib/python3.13/site-packages (from aiohttp->datasets) (6.1.0)\n", 49 | "Requirement already satisfied: propcache>=0.2.0 in /Users/amenshawy/Documents/teaching/arabsera/AIEng-Labs/mlflow_env/lib/python3.13/site-packages (from aiohttp->datasets) (0.2.1)\n", 50 | "Requirement already satisfied: yarl<2.0,>=1.17.0 in /Users/amenshawy/Documents/teaching/arabsera/AIEng-Labs/mlflow_env/lib/python3.13/site-packages (from aiohttp->datasets) (1.18.3)\n", 51 | "Requirement already satisfied: typing-extensions>=3.7.4.3 in /Users/amenshawy/Documents/teaching/arabsera/AIEng-Labs/mlflow_env/lib/python3.13/site-packages (from huggingface-hub>=0.20.0->sentence-transformers) (4.12.2)\n", 52 | "Requirement already satisfied: charset-normalizer<4,>=2 in /Users/amenshawy/Documents/teaching/arabsera/AIEng-Labs/mlflow_env/lib/python3.13/site-packages (from requests>=2.32.2->datasets) (3.4.0)\n", 53 | "Requirement already satisfied: idna<4,>=2.5 in /Users/amenshawy/Documents/teaching/arabsera/AIEng-Labs/mlflow_env/lib/python3.13/site-packages (from requests>=2.32.2->datasets) (3.10)\n", 54 | "Requirement already satisfied: urllib3<3,>=1.21.1 in /Users/amenshawy/Documents/teaching/arabsera/AIEng-Labs/mlflow_env/lib/python3.13/site-packages (from requests>=2.32.2->datasets) (2.2.3)\n", 55 | "Requirement already satisfied: certifi>=2017.4.17 in /Users/amenshawy/Documents/teaching/arabsera/AIEng-Labs/mlflow_env/lib/python3.13/site-packages (from requests>=2.32.2->datasets) (2024.8.30)\n", 56 | "Requirement already satisfied: networkx in /Users/amenshawy/Documents/teaching/arabsera/AIEng-Labs/mlflow_env/lib/python3.13/site-packages (from torch>=1.11.0->sentence-transformers) (3.4.2)\n", 57 | "Requirement already satisfied: jinja2 in /Users/amenshawy/Documents/teaching/arabsera/AIEng-Labs/mlflow_env/lib/python3.13/site-packages (from torch>=1.11.0->sentence-transformers) (3.1.4)\n", 58 | "Requirement already satisfied: setuptools in /Users/amenshawy/Documents/teaching/arabsera/AIEng-Labs/mlflow_env/lib/python3.13/site-packages (from torch>=1.11.0->sentence-transformers) (75.6.0)\n", 59 | "Requirement already satisfied: sympy==1.13.1 in /Users/amenshawy/Documents/teaching/arabsera/AIEng-Labs/mlflow_env/lib/python3.13/site-packages (from torch>=1.11.0->sentence-transformers) (1.13.1)\n", 60 | "Requirement already satisfied: mpmath<1.4,>=1.1.0 in /Users/amenshawy/Documents/teaching/arabsera/AIEng-Labs/mlflow_env/lib/python3.13/site-packages (from sympy==1.13.1->torch>=1.11.0->sentence-transformers) (1.3.0)\n", 61 | "Requirement already satisfied: regex!=2019.12.17 in /Users/amenshawy/Documents/teaching/arabsera/AIEng-Labs/mlflow_env/lib/python3.13/site-packages (from transformers<5.0.0,>=4.41.0->sentence-transformers) (2024.11.6)\n", 62 | "Requirement already satisfied: tokenizers<0.22,>=0.21 in /Users/amenshawy/Documents/teaching/arabsera/AIEng-Labs/mlflow_env/lib/python3.13/site-packages (from transformers<5.0.0,>=4.41.0->sentence-transformers) (0.21.0)\n", 63 | "Requirement already satisfied: safetensors>=0.4.1 in /Users/amenshawy/Documents/teaching/arabsera/AIEng-Labs/mlflow_env/lib/python3.13/site-packages (from transformers<5.0.0,>=4.41.0->sentence-transformers) (0.4.5)\n", 64 | "Requirement already satisfied: python-dateutil>=2.8.2 in /Users/amenshawy/Documents/teaching/arabsera/AIEng-Labs/mlflow_env/lib/python3.13/site-packages (from pandas->datasets) (2.9.0.post0)\n", 65 | "Requirement already satisfied: pytz>=2020.1 in /Users/amenshawy/Documents/teaching/arabsera/AIEng-Labs/mlflow_env/lib/python3.13/site-packages (from pandas->datasets) (2024.2)\n", 66 | "Requirement already satisfied: tzdata>=2022.7 in /Users/amenshawy/Documents/teaching/arabsera/AIEng-Labs/mlflow_env/lib/python3.13/site-packages (from pandas->datasets) (2024.2)\n", 67 | "Requirement already satisfied: joblib>=1.2.0 in /Users/amenshawy/Documents/teaching/arabsera/AIEng-Labs/mlflow_env/lib/python3.13/site-packages (from scikit-learn->sentence-transformers) (1.4.2)\n", 68 | "Requirement already satisfied: threadpoolctl>=3.1.0 in /Users/amenshawy/Documents/teaching/arabsera/AIEng-Labs/mlflow_env/lib/python3.13/site-packages (from scikit-learn->sentence-transformers) (3.5.0)\n", 69 | "Requirement already satisfied: six>=1.5 in /Users/amenshawy/Documents/teaching/arabsera/AIEng-Labs/mlflow_env/lib/python3.13/site-packages (from python-dateutil>=2.8.2->pandas->datasets) (1.16.0)\n", 70 | "Requirement already satisfied: MarkupSafe>=2.0 in /Users/amenshawy/Documents/teaching/arabsera/AIEng-Labs/mlflow_env/lib/python3.13/site-packages (from jinja2->torch>=1.11.0->sentence-transformers) (3.0.2)\n", 71 | "Before Fine-Tuning - Spearman Correlation on STS-B Test: 0.8203246731235654\n" 72 | ] 73 | }, 74 | { 75 | "data": { 76 | "text/html": [ 77 | "\n", 78 | "
\n", 79 | " \n", 80 | " \n", 81 | " [180/180 00:46, Epoch 1/1]\n", 82 | "
\n", 83 | " \n", 84 | " \n", 85 | " \n", 86 | " \n", 87 | " \n", 88 | " \n", 89 | " \n", 90 | " \n", 91 | " \n", 92 | "
StepTraining Loss

" 93 | ], 94 | "text/plain": [ 95 | "" 96 | ] 97 | }, 98 | "metadata": {}, 99 | "output_type": "display_data" 100 | }, 101 | { 102 | "name": "stdout", 103 | "output_type": "stream", 104 | "text": [ 105 | "After Fine-Tuning - Spearman Correlation on STS-B Test: 0.8489561516175831\n" 106 | ] 107 | } 108 | ], 109 | "source": [ 110 | "import math\n", 111 | "import numpy as np\n", 112 | "from scipy.stats import spearmanr\n", 113 | "from sentence_transformers import SentenceTransformer, InputExample, losses, util\n", 114 | "from torch.utils.data import DataLoader\n", 115 | "from datasets import load_dataset\n", 116 | "\n", 117 | "# 1. Load the STS-B dataset (English subset)\n", 118 | "# stsb_multi_mt includes STS-B data in multiple languages; we choose 'en'\n", 119 | "sts = load_dataset('stsb_multi_mt', 'en')\n", 120 | "train_data = sts['train']\n", 121 | "test_data = sts['test']\n", 122 | "\n", 123 | "# STS-B similarity scores range from 0 to 5, we must scale them to [0,1] for CosineSimilarityLoss\n", 124 | "def scale_score(score, min_val=0.0, max_val=5.0):\n", 125 | " return (score - min_val) / (max_val - min_val)\n", 126 | "\n", 127 | "def to_input_examples(dataset_split):\n", 128 | " examples = []\n", 129 | " for item in dataset_split:\n", 130 | " # similarity_score in [0,5]\n", 131 | " score = float(item['similarity_score'])\n", 132 | " scaled_score = scale_score(score) # scale to [0,1]\n", 133 | " examples.append(InputExample(texts=[item['sentence1'], item['sentence2']], label=scaled_score))\n", 134 | " return examples\n", 135 | "\n", 136 | "train_examples = to_input_examples(train_data)\n", 137 | "test_examples = to_input_examples(test_data)\n", 138 | "\n", 139 | "# 2. Load a pre-trained model\n", 140 | "model_name = \"sentence-transformers/all-MiniLM-L6-v2\"\n", 141 | "model = SentenceTransformer(model_name)\n", 142 | "\n", 143 | "def evaluate_model(model, examples):\n", 144 | " # Evaluate Spearman correlation between model cos_sim and gold scores\n", 145 | " s1 = [ex.texts[0] for ex in examples]\n", 146 | " s2 = [ex.texts[1] for ex in examples]\n", 147 | " gold_scores = [ex.label for ex in examples] # these are in [0,1]\n", 148 | "\n", 149 | " emb1 = model.encode(s1, convert_to_tensor=True)\n", 150 | " emb2 = model.encode(s2, convert_to_tensor=True)\n", 151 | " cos_scores = util.cos_sim(emb1, emb2).cpu().numpy()\n", 152 | "\n", 153 | " # Extract the diagonal since we compared each pair (i,i)\n", 154 | " cos_scores = np.array([cos_scores[i][i] for i in range(len(gold_scores))])\n", 155 | "\n", 156 | " # Compute Spearman correlation\n", 157 | " spearman_corr = spearmanr(gold_scores, cos_scores).correlation\n", 158 | " return spearman_corr\n", 159 | "\n", 160 | "# Evaluate before fine-tuning\n", 161 | "before_corr = evaluate_model(model, test_examples)\n", 162 | "print(\"Before Fine-Tuning - Spearman Correlation on STS-B Test:\", before_corr)\n", 163 | "\n", 164 | "# 3. Fine-tune the model\n", 165 | "train_dataloader = DataLoader(train_examples, shuffle=True, batch_size=32)\n", 166 | "train_loss = losses.CosineSimilarityLoss(model=model) # Align cos_sim with labels in [0,1]\n", 167 | "\n", 168 | "epochs = 1 \n", 169 | "warmup_steps = math.ceil(len(train_dataloader)*epochs*0.1)\n", 170 | "\n", 171 | "model.fit(\n", 172 | " train_objectives=[(train_dataloader, train_loss)],\n", 173 | " epochs=epochs,\n", 174 | " warmup_steps=warmup_steps,\n", 175 | " show_progress_bar=True\n", 176 | ")\n", 177 | "\n", 178 | "# 4. Evaluate after fine-tuning\n", 179 | "after_corr = evaluate_model(model, test_examples)\n", 180 | "print(\"After Fine-Tuning - Spearman Correlation on STS-B Test:\", after_corr)\n" 181 | ] 182 | }, 183 | { 184 | "cell_type": "code", 185 | "execution_count": null, 186 | "id": "0967b184-cd5b-4232-978d-9efb8b5d077b", 187 | "metadata": {}, 188 | "outputs": [], 189 | "source": [] 190 | } 191 | ], 192 | "metadata": { 193 | "kernelspec": { 194 | "display_name": "Python 3 (ipykernel)", 195 | "language": "python", 196 | "name": "python3" 197 | }, 198 | "language_info": { 199 | "codemirror_mode": { 200 | "name": "ipython", 201 | "version": 3 202 | }, 203 | "file_extension": ".py", 204 | "mimetype": "text/x-python", 205 | "name": "python", 206 | "nbconvert_exporter": "python", 207 | "pygments_lexer": "ipython3", 208 | "version": "3.9.12" 209 | } 210 | }, 211 | "nbformat": 4, 212 | "nbformat_minor": 5 213 | } 214 | -------------------------------------------------------------------------------- /ch13/Real_Time_Content_Moderation_System.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "nbformat": 4, 3 | "nbformat_minor": 0, 4 | "metadata": { 5 | "colab": { 6 | "provenance": [], 7 | "gpuType": "T4" 8 | }, 9 | "kernelspec": { 10 | "name": "python3", 11 | "display_name": "Python 3" 12 | }, 13 | "language_info": { 14 | "name": "python" 15 | }, 16 | "accelerator": "GPU" 17 | }, 18 | "cells": [ 19 | { 20 | "cell_type": "markdown", 21 | "source": [ 22 | "### Real-Time Content Moderation System\n", 23 | "This notebook presents a real-time content moderation pipeline that integrates toxicity detection and fact verification models. It acts as a safeguard before LLM-generated content is shown to users." 24 | ], 25 | "metadata": { 26 | "id": "Fj7MparnvuPR" 27 | } 28 | }, 29 | { 30 | "cell_type": "markdown", 31 | "source": [ 32 | "### Step 1: Install Required Libraries\n", 33 | "Install the required libraries before proceeding:" 34 | ], 35 | "metadata": { 36 | "id": "BVJsKLd0v0QJ" 37 | } 38 | }, 39 | { 40 | "cell_type": "code", 41 | "execution_count": null, 42 | "metadata": { 43 | "colab": { 44 | "base_uri": "https://localhost:8080/" 45 | }, 46 | "id": "vvje0lVWvs24", 47 | "outputId": "905c2050-58f2-431b-e4ac-0954cd2cac0f" 48 | }, 49 | "outputs": [ 50 | { 51 | "output_type": "stream", 52 | "name": "stdout", 53 | "text": [ 54 | "Requirement already satisfied: transformers in /usr/local/lib/python3.11/dist-packages (4.53.1)\n", 55 | "Requirement already satisfied: torch in /usr/local/lib/python3.11/dist-packages (2.6.0+cu124)\n", 56 | "Requirement already satisfied: filelock in /usr/local/lib/python3.11/dist-packages (from transformers) (3.18.0)\n", 57 | "Requirement already satisfied: huggingface-hub<1.0,>=0.30.0 in /usr/local/lib/python3.11/dist-packages (from transformers) (0.33.2)\n", 58 | "Requirement already satisfied: numpy>=1.17 in /usr/local/lib/python3.11/dist-packages (from transformers) (2.0.2)\n", 59 | "Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.11/dist-packages (from transformers) (24.2)\n", 60 | "Requirement already satisfied: pyyaml>=5.1 in /usr/local/lib/python3.11/dist-packages (from transformers) (6.0.2)\n", 61 | "Requirement already satisfied: regex!=2019.12.17 in /usr/local/lib/python3.11/dist-packages (from transformers) (2024.11.6)\n", 62 | "Requirement already satisfied: requests in /usr/local/lib/python3.11/dist-packages (from transformers) (2.32.3)\n", 63 | "Requirement already satisfied: tokenizers<0.22,>=0.21 in /usr/local/lib/python3.11/dist-packages (from transformers) (0.21.2)\n", 64 | "Requirement already satisfied: safetensors>=0.4.3 in /usr/local/lib/python3.11/dist-packages (from transformers) (0.5.3)\n", 65 | "Requirement already satisfied: tqdm>=4.27 in /usr/local/lib/python3.11/dist-packages (from transformers) (4.67.1)\n", 66 | "Requirement already satisfied: typing-extensions>=4.10.0 in /usr/local/lib/python3.11/dist-packages (from torch) (4.14.1)\n", 67 | "Requirement already satisfied: networkx in /usr/local/lib/python3.11/dist-packages (from torch) (3.5)\n", 68 | "Requirement already satisfied: jinja2 in /usr/local/lib/python3.11/dist-packages (from torch) (3.1.6)\n", 69 | "Requirement already satisfied: fsspec in /usr/local/lib/python3.11/dist-packages (from torch) (2025.3.2)\n", 70 | "Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch)\n", 71 | " Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)\n", 72 | "Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch)\n", 73 | " Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)\n", 74 | "Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch)\n", 75 | " Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)\n", 76 | "Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch)\n", 77 | " Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)\n", 78 | "Collecting nvidia-cublas-cu12==12.4.5.8 (from torch)\n", 79 | " Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)\n", 80 | "Collecting nvidia-cufft-cu12==11.2.1.3 (from torch)\n", 81 | " Downloading nvidia_cufft_cu12-11.2.1.3-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)\n", 82 | "Collecting nvidia-curand-cu12==10.3.5.147 (from torch)\n", 83 | " Downloading nvidia_curand_cu12-10.3.5.147-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)\n", 84 | "Collecting nvidia-cusolver-cu12==11.6.1.9 (from torch)\n", 85 | " Downloading nvidia_cusolver_cu12-11.6.1.9-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)\n", 86 | "Collecting nvidia-cusparse-cu12==12.3.1.170 (from torch)\n", 87 | " Downloading nvidia_cusparse_cu12-12.3.1.170-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)\n", 88 | "Requirement already satisfied: nvidia-cusparselt-cu12==0.6.2 in /usr/local/lib/python3.11/dist-packages (from torch) (0.6.2)\n", 89 | "Requirement already satisfied: nvidia-nccl-cu12==2.21.5 in /usr/local/lib/python3.11/dist-packages (from torch) (2.21.5)\n", 90 | "Requirement already satisfied: nvidia-nvtx-cu12==12.4.127 in /usr/local/lib/python3.11/dist-packages (from torch) (12.4.127)\n", 91 | "Collecting nvidia-nvjitlink-cu12==12.4.127 (from torch)\n", 92 | " Downloading nvidia_nvjitlink_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)\n", 93 | "Requirement already satisfied: triton==3.2.0 in /usr/local/lib/python3.11/dist-packages (from torch) (3.2.0)\n", 94 | "Requirement already satisfied: sympy==1.13.1 in /usr/local/lib/python3.11/dist-packages (from torch) (1.13.1)\n", 95 | "Requirement already satisfied: mpmath<1.4,>=1.1.0 in /usr/local/lib/python3.11/dist-packages (from sympy==1.13.1->torch) (1.3.0)\n", 96 | "Requirement already satisfied: hf-xet<2.0.0,>=1.1.2 in /usr/local/lib/python3.11/dist-packages (from huggingface-hub<1.0,>=0.30.0->transformers) (1.1.5)\n", 97 | "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.11/dist-packages (from jinja2->torch) (3.0.2)\n", 98 | "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.11/dist-packages (from requests->transformers) (3.4.2)\n", 99 | "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.11/dist-packages (from requests->transformers) (3.10)\n", 100 | "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.11/dist-packages (from requests->transformers) (2.4.0)\n", 101 | "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.11/dist-packages (from requests->transformers) (2025.7.9)\n", 102 | "Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl (363.4 MB)\n", 103 | "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m363.4/363.4 MB\u001b[0m \u001b[31m4.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", 104 | "\u001b[?25hDownloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl (13.8 MB)\n", 105 | "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m13.8/13.8 MB\u001b[0m \u001b[31m120.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", 106 | "\u001b[?25hDownloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl (24.6 MB)\n", 107 | "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m24.6/24.6 MB\u001b[0m \u001b[31m92.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", 108 | "\u001b[?25hDownloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl (883 kB)\n", 109 | "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m883.7/883.7 kB\u001b[0m \u001b[31m62.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", 110 | "\u001b[?25hDownloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl (664.8 MB)\n", 111 | "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m664.8/664.8 MB\u001b[0m \u001b[31m2.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", 112 | "\u001b[?25hDownloading nvidia_cufft_cu12-11.2.1.3-py3-none-manylinux2014_x86_64.whl (211.5 MB)\n", 113 | "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m211.5/211.5 MB\u001b[0m \u001b[31m5.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", 114 | "\u001b[?25hDownloading nvidia_curand_cu12-10.3.5.147-py3-none-manylinux2014_x86_64.whl (56.3 MB)\n", 115 | "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m56.3/56.3 MB\u001b[0m \u001b[31m15.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", 116 | "\u001b[?25hDownloading nvidia_cusolver_cu12-11.6.1.9-py3-none-manylinux2014_x86_64.whl (127.9 MB)\n", 117 | "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m127.9/127.9 MB\u001b[0m \u001b[31m7.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", 118 | "\u001b[?25hDownloading nvidia_cusparse_cu12-12.3.1.170-py3-none-manylinux2014_x86_64.whl (207.5 MB)\n", 119 | "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m207.5/207.5 MB\u001b[0m \u001b[31m6.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", 120 | "\u001b[?25hDownloading nvidia_nvjitlink_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl (21.1 MB)\n", 121 | "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m21.1/21.1 MB\u001b[0m \u001b[31m44.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", 122 | "\u001b[?25hInstalling collected packages: nvidia-nvjitlink-cu12, nvidia-curand-cu12, nvidia-cufft-cu12, nvidia-cuda-runtime-cu12, nvidia-cuda-nvrtc-cu12, nvidia-cuda-cupti-cu12, nvidia-cublas-cu12, nvidia-cusparse-cu12, nvidia-cudnn-cu12, nvidia-cusolver-cu12\n", 123 | " Attempting uninstall: nvidia-nvjitlink-cu12\n", 124 | " Found existing installation: nvidia-nvjitlink-cu12 12.5.82\n", 125 | " Uninstalling nvidia-nvjitlink-cu12-12.5.82:\n", 126 | " Successfully uninstalled nvidia-nvjitlink-cu12-12.5.82\n", 127 | " Attempting uninstall: nvidia-curand-cu12\n", 128 | " Found existing installation: nvidia-curand-cu12 10.3.6.82\n", 129 | " Uninstalling nvidia-curand-cu12-10.3.6.82:\n", 130 | " Successfully uninstalled nvidia-curand-cu12-10.3.6.82\n", 131 | " Attempting uninstall: nvidia-cufft-cu12\n", 132 | " Found existing installation: nvidia-cufft-cu12 11.2.3.61\n", 133 | " Uninstalling nvidia-cufft-cu12-11.2.3.61:\n", 134 | " Successfully uninstalled nvidia-cufft-cu12-11.2.3.61\n", 135 | " Attempting uninstall: nvidia-cuda-runtime-cu12\n", 136 | " Found existing installation: nvidia-cuda-runtime-cu12 12.5.82\n", 137 | " Uninstalling nvidia-cuda-runtime-cu12-12.5.82:\n", 138 | " Successfully uninstalled nvidia-cuda-runtime-cu12-12.5.82\n", 139 | " Attempting uninstall: nvidia-cuda-nvrtc-cu12\n", 140 | " Found existing installation: nvidia-cuda-nvrtc-cu12 12.5.82\n", 141 | " Uninstalling nvidia-cuda-nvrtc-cu12-12.5.82:\n", 142 | " Successfully uninstalled nvidia-cuda-nvrtc-cu12-12.5.82\n", 143 | " Attempting uninstall: nvidia-cuda-cupti-cu12\n", 144 | " Found existing installation: nvidia-cuda-cupti-cu12 12.5.82\n", 145 | " Uninstalling nvidia-cuda-cupti-cu12-12.5.82:\n", 146 | " Successfully uninstalled nvidia-cuda-cupti-cu12-12.5.82\n", 147 | " Attempting uninstall: nvidia-cublas-cu12\n", 148 | " Found existing installation: nvidia-cublas-cu12 12.5.3.2\n", 149 | " Uninstalling nvidia-cublas-cu12-12.5.3.2:\n", 150 | " Successfully uninstalled nvidia-cublas-cu12-12.5.3.2\n", 151 | " Attempting uninstall: nvidia-cusparse-cu12\n", 152 | " Found existing installation: nvidia-cusparse-cu12 12.5.1.3\n", 153 | " Uninstalling nvidia-cusparse-cu12-12.5.1.3:\n", 154 | " Successfully uninstalled nvidia-cusparse-cu12-12.5.1.3\n", 155 | " Attempting uninstall: nvidia-cudnn-cu12\n", 156 | " Found existing installation: nvidia-cudnn-cu12 9.3.0.75\n", 157 | " Uninstalling nvidia-cudnn-cu12-9.3.0.75:\n" 158 | ] 159 | } 160 | ], 161 | "source": [ 162 | "! pip install transformers torch" 163 | ] 164 | }, 165 | { 166 | "cell_type": "markdown", 167 | "source": [ 168 | "### Step 2: Import Libraries" 169 | ], 170 | "metadata": { 171 | "id": "d8B-U19Vv4a0" 172 | } 173 | }, 174 | { 175 | "cell_type": "code", 176 | "source": [ 177 | "from transformers import AutoTokenizer, AutoModelForSequenceClassification\n", 178 | "import torch" 179 | ], 180 | "metadata": { 181 | "id": "if2sEw-lv4Q6" 182 | }, 183 | "execution_count": null, 184 | "outputs": [] 185 | }, 186 | { 187 | "cell_type": "markdown", 188 | "source": [ 189 | "### Step 3: Define the Safety Ensemble Class\n", 190 | "This class loads both toxicity detection and fact-checking models and provides a method to analyze text." 191 | ], 192 | "metadata": { 193 | "id": "3XlPjxGcwAjZ" 194 | } 195 | }, 196 | { 197 | "cell_type": "code", 198 | "source": [ 199 | "class SafetyEnsemble:\n", 200 | " def __init__(self, hf_token=None):\n", 201 | " \"\"\"\n", 202 | " Initialize safety models with optional Hugging Face token\n", 203 | " \"\"\"\n", 204 | " # Load toxicity detection model\n", 205 | " self.toxicity_model = AutoModelForSequenceClassification.from_pretrained(\n", 206 | " \"facebook/roberta-hate-speech-dynabench-r4-target\",\n", 207 | " use_auth_token=hf_token\n", 208 | " )\n", 209 | "\n", 210 | " # Load fact verification model (natural language inference)\n", 211 | " self.factcheck_model = AutoModelForSequenceClassification.from_pretrained(\n", 212 | " \"ynie/roberta-large-snli_mnli_fever_anli_R1_R2_R3-nli\",\n", 213 | " use_auth_token=hf_token\n", 214 | " )\n", 215 | "\n", 216 | " # Load a shared tokenizer\n", 217 | " self.tokenizer = AutoTokenizer.from_pretrained(\n", 218 | " \"roberta-base\",\n", 219 | " use_auth_token=hf_token\n", 220 | " )\n", 221 | "\n", 222 | " def analyze(self, text):\n", 223 | " \"\"\"\n", 224 | " Analyze text for toxicity and factual accuracy\n", 225 | " \"\"\"\n", 226 | " inputs = self.tokenizer(text, return_tensors=\"pt\", truncation=True, max_length=512)\n", 227 | "\n", 228 | " tox_score = self._get_toxicity_score(inputs)\n", 229 | " veracity, veracity_probs = self._get_veracity(inputs)\n", 230 | "\n", 231 | " return {\n", 232 | " \"toxicity_risk\": tox_score,\n", 233 | " \"fact_accuracy\": {\n", 234 | " \"label\": veracity,\n", 235 | " \"probabilities\": veracity_probs\n", 236 | " },\n", 237 | " \"block\": tox_score > 0.9 or veracity == \"contradiction\"\n", 238 | " }\n", 239 | "\n", 240 | " def _get_toxicity_score(self, inputs):\n", 241 | " \"\"\"Return probability of toxicity (0 to 1)\"\"\"\n", 242 | " self.toxicity_model.eval()\n", 243 | " with torch.no_grad():\n", 244 | " outputs = self.toxicity_model(**inputs)\n", 245 | " # Handle binary or multi-class logits\n", 246 | " return torch.sigmoid(outputs.logits).item() if outputs.logits.shape[1] == 1 else torch.softmax(outputs.logits, dim=1)[0][1].item()\n", 247 | "\n", 248 | " def _get_veracity(self, inputs):\n", 249 | " \"\"\"Use an NLI model to assess veracity\"\"\"\n", 250 | " self.factcheck_model.eval()\n", 251 | " with torch.no_grad():\n", 252 | " outputs = self.factcheck_model(**inputs)\n", 253 | "\n", 254 | " probs = torch.softmax(outputs.logits, dim=1)[0]\n", 255 | " label_idx = torch.argmax(probs).item()\n", 256 | " label_map = {0: \"entailment\", 1: \"neutral\", 2: \"contradiction\"}\n", 257 | "\n", 258 | " return label_map[label_idx], [round(p.item(), 4) for p in probs]" 259 | ], 260 | "metadata": { 261 | "id": "goPx2ehZv4Ok" 262 | }, 263 | "execution_count": null, 264 | "outputs": [] 265 | }, 266 | { 267 | "cell_type": "markdown", 268 | "source": [ 269 | "### Step 4: Define a Dummy LLM for Testing" 270 | ], 271 | "metadata": { 272 | "id": "BeSpuPWBwJ_z" 273 | } 274 | }, 275 | { 276 | "cell_type": "code", 277 | "source": [ 278 | "class DummyLLM:\n", 279 | " \"\"\"Mock LLM for demonstration purposes\"\"\"\n", 280 | " def generate(self, prompt: str):\n", 281 | " return {\"generated_text\": f\"This is a safe response to: '{prompt}'.\"}\n" 282 | ], 283 | "metadata": { 284 | "id": "pz4MquEZv4MO" 285 | }, 286 | "execution_count": null, 287 | "outputs": [] 288 | }, 289 | { 290 | "cell_type": "markdown", 291 | "source": [ 292 | "### Step 5: Run the Moderation Pipeline" 293 | ], 294 | "metadata": { 295 | "id": "rnH2R9oVwPeE" 296 | } 297 | }, 298 | { 299 | "cell_type": "code", 300 | "source": [ 301 | "def main():\n", 302 | " safety = SafetyEnsemble()\n", 303 | " llm = DummyLLM()\n", 304 | "\n", 305 | " print(\"Safety Analysis Demo (type 'quit' to exit)\")\n", 306 | "\n", 307 | " while True:\n", 308 | " prompt = input(\"\\nEnter text to analyze: \").strip()\n", 309 | " if prompt.lower() in ('quit', 'exit'):\n", 310 | " break\n", 311 | "\n", 312 | " analysis = safety.analyze(prompt)\n", 313 | "\n", 314 | " print(\"\\nAnalysis Results:\")\n", 315 | " print(f\"Toxicity Risk: {analysis['toxicity_risk']:.4f}\")\n", 316 | " print(f\"Fact Accuracy: {analysis['fact_accuracy']['label']}\")\n", 317 | " print(f\" Probabilities: [E: {analysis['fact_accuracy']['probabilities'][0]:.2f}, \"\n", 318 | " f\"N: {analysis['fact_accuracy']['probabilities'][1]:.2f}, \"\n", 319 | " f\"C: {analysis['fact_accuracy']['probabilities'][2]:.2f}]\")\n", 320 | "\n", 321 | " if analysis['block']:\n", 322 | " print(\"\\n❌ Blocked - Reason:\", end=\" \")\n", 323 | " if analysis['toxicity_risk'] > 0.9:\n", 324 | " print(\"High toxicity risk\", end=\"\")\n", 325 | " if analysis['fact_accuracy']['label'] == \"contradiction\":\n", 326 | " print(\" and factual contradiction\")\n", 327 | " else:\n", 328 | " print()\n", 329 | " else:\n", 330 | " print(\"Factual contradiction\")\n", 331 | " else:\n", 332 | " response = llm.generate(prompt)\n", 333 | " print(f\"\\n✅ Allowed - Generated response: {response['generated_text']}\")\n" 334 | ], 335 | "metadata": { 336 | "id": "_eGeRq7qv4J5" 337 | }, 338 | "execution_count": null, 339 | "outputs": [] 340 | }, 341 | { 342 | "cell_type": "markdown", 343 | "source": [ 344 | "### Step 6: Run the Main Function" 345 | ], 346 | "metadata": { 347 | "id": "AP2qd3xXwW_a" 348 | } 349 | }, 350 | { 351 | "cell_type": "code", 352 | "source": [ 353 | "if __name__ == \"__main__\":\n", 354 | " main()" 355 | ], 356 | "metadata": { 357 | "id": "sxiG4eVzv4HS" 358 | }, 359 | "execution_count": null, 360 | "outputs": [] 361 | }, 362 | { 363 | "cell_type": "code", 364 | "source": [], 365 | "metadata": { 366 | "id": "en3XLUbRwiDW" 367 | }, 368 | "execution_count": null, 369 | "outputs": [] 370 | } 371 | ] 372 | } -------------------------------------------------------------------------------- /ch8/fine_tuning_Deepseek_for_a_classification_task.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "nbformat": 4, 3 | "nbformat_minor": 0, 4 | "metadata": { 5 | "colab": { 6 | "provenance": [] 7 | }, 8 | "kernelspec": { 9 | "name": "python3", 10 | "display_name": "Python 3" 11 | }, 12 | "language_info": { 13 | "name": "python" 14 | } 15 | }, 16 | "cells": [ 17 | { 18 | "cell_type": "markdown", 19 | "source": [ 20 | "Here's a simplified example of how Deepseek might be fine-tuned for a classification task without any sensitive data. Before we begin, let's install the necessary Python packages to ensure the code runs smoothly. These include transformers for model handling and torch for tensor processing.\n", 21 | "\n", 22 | "### Install the required libraries: Run the following command to install the required libraries:" 23 | ], 24 | "metadata": { 25 | "id": "rtwLFUdSXw6l" 26 | } 27 | }, 28 | { 29 | "cell_type": "code", 30 | "execution_count": 1, 31 | "metadata": { 32 | "colab": { 33 | "base_uri": "https://localhost:8080/" 34 | }, 35 | "id": "s-61iwDLTgUi", 36 | "outputId": "d897077e-7356-4a1a-f690-3733ffac3a31" 37 | }, 38 | "outputs": [ 39 | { 40 | "output_type": "stream", 41 | "name": "stdout", 42 | "text": [ 43 | "Requirement already satisfied: transformers in /usr/local/lib/python3.11/dist-packages (4.51.1)\n", 44 | "Requirement already satisfied: torch in /usr/local/lib/python3.11/dist-packages (2.6.0+cu124)\n", 45 | "Requirement already satisfied: filelock in /usr/local/lib/python3.11/dist-packages (from transformers) (3.18.0)\n", 46 | "Requirement already satisfied: huggingface-hub<1.0,>=0.30.0 in /usr/local/lib/python3.11/dist-packages (from transformers) (0.30.2)\n", 47 | "Requirement already satisfied: numpy>=1.17 in /usr/local/lib/python3.11/dist-packages (from transformers) (2.0.2)\n", 48 | "Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.11/dist-packages (from transformers) (24.2)\n", 49 | "Requirement already satisfied: pyyaml>=5.1 in /usr/local/lib/python3.11/dist-packages (from transformers) (6.0.2)\n", 50 | "Requirement already satisfied: regex!=2019.12.17 in /usr/local/lib/python3.11/dist-packages (from transformers) (2024.11.6)\n", 51 | "Requirement already satisfied: requests in /usr/local/lib/python3.11/dist-packages (from transformers) (2.32.3)\n", 52 | "Requirement already satisfied: tokenizers<0.22,>=0.21 in /usr/local/lib/python3.11/dist-packages (from transformers) (0.21.1)\n", 53 | "Requirement already satisfied: safetensors>=0.4.3 in /usr/local/lib/python3.11/dist-packages (from transformers) (0.5.3)\n", 54 | "Requirement already satisfied: tqdm>=4.27 in /usr/local/lib/python3.11/dist-packages (from transformers) (4.67.1)\n", 55 | "Requirement already satisfied: typing-extensions>=4.10.0 in /usr/local/lib/python3.11/dist-packages (from torch) (4.13.1)\n", 56 | "Requirement already satisfied: networkx in /usr/local/lib/python3.11/dist-packages (from torch) (3.4.2)\n", 57 | "Requirement already satisfied: jinja2 in /usr/local/lib/python3.11/dist-packages (from torch) (3.1.6)\n", 58 | "Requirement already satisfied: fsspec in /usr/local/lib/python3.11/dist-packages (from torch) (2025.3.2)\n", 59 | "Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch)\n", 60 | " Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)\n", 61 | "Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch)\n", 62 | " Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)\n", 63 | "Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch)\n", 64 | " Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)\n", 65 | "Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch)\n", 66 | " Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)\n", 67 | "Collecting nvidia-cublas-cu12==12.4.5.8 (from torch)\n", 68 | " Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)\n", 69 | "Collecting nvidia-cufft-cu12==11.2.1.3 (from torch)\n", 70 | " Downloading nvidia_cufft_cu12-11.2.1.3-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)\n", 71 | "Collecting nvidia-curand-cu12==10.3.5.147 (from torch)\n", 72 | " Downloading nvidia_curand_cu12-10.3.5.147-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)\n", 73 | "Collecting nvidia-cusolver-cu12==11.6.1.9 (from torch)\n", 74 | " Downloading nvidia_cusolver_cu12-11.6.1.9-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)\n", 75 | "Collecting nvidia-cusparse-cu12==12.3.1.170 (from torch)\n", 76 | " Downloading nvidia_cusparse_cu12-12.3.1.170-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)\n", 77 | "Requirement already satisfied: nvidia-cusparselt-cu12==0.6.2 in /usr/local/lib/python3.11/dist-packages (from torch) (0.6.2)\n", 78 | "Requirement already satisfied: nvidia-nccl-cu12==2.21.5 in /usr/local/lib/python3.11/dist-packages (from torch) (2.21.5)\n", 79 | "Requirement already satisfied: nvidia-nvtx-cu12==12.4.127 in /usr/local/lib/python3.11/dist-packages (from torch) (12.4.127)\n", 80 | "Collecting nvidia-nvjitlink-cu12==12.4.127 (from torch)\n", 81 | " Downloading nvidia_nvjitlink_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)\n", 82 | "Requirement already satisfied: triton==3.2.0 in /usr/local/lib/python3.11/dist-packages (from torch) (3.2.0)\n", 83 | "Requirement already satisfied: sympy==1.13.1 in /usr/local/lib/python3.11/dist-packages (from torch) (1.13.1)\n", 84 | "Requirement already satisfied: mpmath<1.4,>=1.1.0 in /usr/local/lib/python3.11/dist-packages (from sympy==1.13.1->torch) (1.3.0)\n", 85 | "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.11/dist-packages (from jinja2->torch) (3.0.2)\n", 86 | "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.11/dist-packages (from requests->transformers) (3.4.1)\n", 87 | "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.11/dist-packages (from requests->transformers) (3.10)\n", 88 | "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.11/dist-packages (from requests->transformers) (2.3.0)\n", 89 | "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.11/dist-packages (from requests->transformers) (2025.1.31)\n", 90 | "Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl (363.4 MB)\n", 91 | "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m363.4/363.4 MB\u001b[0m \u001b[31m2.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", 92 | "\u001b[?25hDownloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl (13.8 MB)\n", 93 | "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m13.8/13.8 MB\u001b[0m \u001b[31m44.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", 94 | "\u001b[?25hDownloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl (24.6 MB)\n", 95 | "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m24.6/24.6 MB\u001b[0m \u001b[31m49.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", 96 | "\u001b[?25hDownloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl (883 kB)\n", 97 | "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m883.7/883.7 kB\u001b[0m \u001b[31m36.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", 98 | "\u001b[?25hDownloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl (664.8 MB)\n", 99 | "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m664.8/664.8 MB\u001b[0m \u001b[31m2.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", 100 | "\u001b[?25hDownloading nvidia_cufft_cu12-11.2.1.3-py3-none-manylinux2014_x86_64.whl (211.5 MB)\n", 101 | "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m211.5/211.5 MB\u001b[0m \u001b[31m5.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", 102 | "\u001b[?25hDownloading nvidia_curand_cu12-10.3.5.147-py3-none-manylinux2014_x86_64.whl (56.3 MB)\n", 103 | "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m56.3/56.3 MB\u001b[0m \u001b[31m9.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", 104 | "\u001b[?25hDownloading nvidia_cusolver_cu12-11.6.1.9-py3-none-manylinux2014_x86_64.whl (127.9 MB)\n", 105 | "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m127.9/127.9 MB\u001b[0m \u001b[31m6.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", 106 | "\u001b[?25hDownloading nvidia_cusparse_cu12-12.3.1.170-py3-none-manylinux2014_x86_64.whl (207.5 MB)\n", 107 | "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m207.5/207.5 MB\u001b[0m \u001b[31m4.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", 108 | "\u001b[?25hDownloading nvidia_nvjitlink_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl (21.1 MB)\n", 109 | "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m21.1/21.1 MB\u001b[0m \u001b[31m77.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", 110 | "\u001b[?25hInstalling collected packages: nvidia-nvjitlink-cu12, nvidia-curand-cu12, nvidia-cufft-cu12, nvidia-cuda-runtime-cu12, nvidia-cuda-nvrtc-cu12, nvidia-cuda-cupti-cu12, nvidia-cublas-cu12, nvidia-cusparse-cu12, nvidia-cudnn-cu12, nvidia-cusolver-cu12\n", 111 | " Attempting uninstall: nvidia-nvjitlink-cu12\n", 112 | " Found existing installation: nvidia-nvjitlink-cu12 12.5.82\n", 113 | " Uninstalling nvidia-nvjitlink-cu12-12.5.82:\n", 114 | " Successfully uninstalled nvidia-nvjitlink-cu12-12.5.82\n", 115 | " Attempting uninstall: nvidia-curand-cu12\n", 116 | " Found existing installation: nvidia-curand-cu12 10.3.6.82\n", 117 | " Uninstalling nvidia-curand-cu12-10.3.6.82:\n", 118 | " Successfully uninstalled nvidia-curand-cu12-10.3.6.82\n", 119 | " Attempting uninstall: nvidia-cufft-cu12\n", 120 | " Found existing installation: nvidia-cufft-cu12 11.2.3.61\n", 121 | " Uninstalling nvidia-cufft-cu12-11.2.3.61:\n", 122 | " Successfully uninstalled nvidia-cufft-cu12-11.2.3.61\n", 123 | " Attempting uninstall: nvidia-cuda-runtime-cu12\n", 124 | " Found existing installation: nvidia-cuda-runtime-cu12 12.5.82\n", 125 | " Uninstalling nvidia-cuda-runtime-cu12-12.5.82:\n", 126 | " Successfully uninstalled nvidia-cuda-runtime-cu12-12.5.82\n", 127 | " Attempting uninstall: nvidia-cuda-nvrtc-cu12\n", 128 | " Found existing installation: nvidia-cuda-nvrtc-cu12 12.5.82\n", 129 | " Uninstalling nvidia-cuda-nvrtc-cu12-12.5.82:\n", 130 | " Successfully uninstalled nvidia-cuda-nvrtc-cu12-12.5.82\n", 131 | " Attempting uninstall: nvidia-cuda-cupti-cu12\n", 132 | " Found existing installation: nvidia-cuda-cupti-cu12 12.5.82\n", 133 | " Uninstalling nvidia-cuda-cupti-cu12-12.5.82:\n", 134 | " Successfully uninstalled nvidia-cuda-cupti-cu12-12.5.82\n", 135 | " Attempting uninstall: nvidia-cublas-cu12\n", 136 | " Found existing installation: nvidia-cublas-cu12 12.5.3.2\n", 137 | " Uninstalling nvidia-cublas-cu12-12.5.3.2:\n", 138 | " Successfully uninstalled nvidia-cublas-cu12-12.5.3.2\n", 139 | " Attempting uninstall: nvidia-cusparse-cu12\n", 140 | " Found existing installation: nvidia-cusparse-cu12 12.5.1.3\n", 141 | " Uninstalling nvidia-cusparse-cu12-12.5.1.3:\n", 142 | " Successfully uninstalled nvidia-cusparse-cu12-12.5.1.3\n", 143 | " Attempting uninstall: nvidia-cudnn-cu12\n", 144 | " Found existing installation: nvidia-cudnn-cu12 9.3.0.75\n", 145 | " Uninstalling nvidia-cudnn-cu12-9.3.0.75:\n", 146 | " Successfully uninstalled nvidia-cudnn-cu12-9.3.0.75\n", 147 | " Attempting uninstall: nvidia-cusolver-cu12\n", 148 | " Found existing installation: nvidia-cusolver-cu12 11.6.3.83\n", 149 | " Uninstalling nvidia-cusolver-cu12-11.6.3.83:\n", 150 | " Successfully uninstalled nvidia-cusolver-cu12-11.6.3.83\n", 151 | "Successfully installed nvidia-cublas-cu12-12.4.5.8 nvidia-cuda-cupti-cu12-12.4.127 nvidia-cuda-nvrtc-cu12-12.4.127 nvidia-cuda-runtime-cu12-12.4.127 nvidia-cudnn-cu12-9.1.0.70 nvidia-cufft-cu12-11.2.1.3 nvidia-curand-cu12-10.3.5.147 nvidia-cusolver-cu12-11.6.1.9 nvidia-cusparse-cu12-12.3.1.170 nvidia-nvjitlink-cu12-12.4.127\n" 152 | ] 153 | } 154 | ], 155 | "source": [ 156 | "! pip install transformers torch" 157 | ] 158 | }, 159 | { 160 | "cell_type": "markdown", 161 | "source": [ 162 | "#### Now, you can import the necessary libraries for using the Deepseek model:" 163 | ], 164 | "metadata": { 165 | "id": "n4nZe6DuZTp7" 166 | } 167 | }, 168 | { 169 | "cell_type": "code", 170 | "source": [ 171 | "import torch\n", 172 | "\n", 173 | "from torch.utils.data import Dataset\n", 174 | "\n", 175 | "from transformers import AutoTokenizer, AutoModelForSequenceClassification, Trainer, TrainingArguments" 176 | ], 177 | "metadata": { 178 | "id": "_0zM-MZCZcLb" 179 | }, 180 | "execution_count": null, 181 | "outputs": [] 182 | }, 183 | { 184 | "cell_type": "markdown", 185 | "source": [ 186 | "Loading pre-trained Deepseek model and tokenizer: We load the pre-trained Deepseek tokenizer and model to use for sentiment classification:\n", 187 | "\n", 188 | "AutoTokenizer.from_pretrained: Loads the pre-trained Deepseek tokenizer\n", 189 | "\n", 190 | "AutoModelForSequenceClassification.from_pretrained: Loads the pre-trained Deepseek model for sequence classification tasks" 191 | ], 192 | "metadata": { 193 | "id": "UBhl29vgZctB" 194 | } 195 | }, 196 | { 197 | "cell_type": "code", 198 | "source": [ 199 | "tokenizer = AutoTokenizer.from_pretrained(\"deepseek-ai/deepseek-llm-7b-base\", trust_remote_code=True)\n", 200 | "\n", 201 | "model = AutoModelForSequenceClassification.from_pretrained(\n", 202 | "\n", 203 | " \"deepseek-ai/deepseek-llm-7b-base\",\n", 204 | "\n", 205 | " num_labels=2, # Binary classification\n", 206 | "\n", 207 | " trust_remote_code=True\n", 208 | "\n", 209 | ")\n", 210 | "\n", 211 | "\n", 212 | "\n", 213 | "# Set padding token if not already set\n", 214 | "\n", 215 | "if tokenizer.pad_token is None:\n", 216 | "\n", 217 | " tokenizer.pad_token = tokenizer.eos_token\n", 218 | "\n", 219 | " model.config.pad_token_id = tokenizer.pad_token_id" 220 | ], 221 | "metadata": { 222 | "id": "P_PhhI39UDDD" 223 | }, 224 | "execution_count": null, 225 | "outputs": [] 226 | }, 227 | { 228 | "cell_type": "markdown", 229 | "source": [ 230 | "Example text and data tokenization: Here we define two example text samples and their corresponding sentiment labels, and tokenize the input text to convert it into a format suitable for Deepseek.\n", 231 | "\n", 232 | "tokenizer: Converts the text into token IDs that Deepseek can process\n", 233 | "\n", 234 | "padding=True: Ensures the text is padded to the same length\n", 235 | "\n", 236 | "truncation=True: Truncates longer texts to fit Deepseek's maximum input length\n", 237 | "\n", 238 | "return_tensors=\"pt\": Returns PyTorch tensors for compatibility with the model" 239 | ], 240 | "metadata": { 241 | "id": "rVbopILeZjA-" 242 | } 243 | }, 244 | { 245 | "cell_type": "code", 246 | "source": [ 247 | "texts = [\"I love machine learning.\", \"This is an amazing tutorial on Deeplearning.\"]\n", 248 | "\n", 249 | "labels = [1, 0] # Labels for sentiment (1 = positive, 0 = negative)\n", 250 | "\n", 251 | "inputs = tokenizer(texts, padding=True, truncation=True, return_tensors=\"pt\")\n", 252 | "\n", 253 | "\n", 254 | "\n", 255 | "# Custom Dataset class\n", 256 | "\n", 257 | "class CustomDataset(Dataset):\n", 258 | "\n", 259 | " def __init__(self, inputs, labels):\n", 260 | "\n", 261 | " self.input_ids = inputs['input_ids']\n", 262 | "\n", 263 | " self.attention_mask = inputs['attention_mask']\n", 264 | "\n", 265 | " self.labels = torch.tensor(labels)\n", 266 | "\n", 267 | "\n", 268 | "\n", 269 | " def __len__(self):\n", 270 | "\n", 271 | " return len(self.labels)\n", 272 | "\n", 273 | "\n", 274 | "\n", 275 | " def __getitem__(self, idx):\n", 276 | "\n", 277 | " return {\n", 278 | "\n", 279 | " 'input_ids': self.input_ids[idx],\n", 280 | "\n", 281 | " 'attention_mask': self.attention_mask[idx],\n", 282 | "\n", 283 | " 'labels': self.labels[idx]\n", 284 | "\n", 285 | " }" 286 | ], 287 | "metadata": { 288 | "id": "Ka20E4mNUZgw" 289 | }, 290 | "execution_count": null, 291 | "outputs": [] 292 | }, 293 | { 294 | "cell_type": "markdown", 295 | "source": [ 296 | "#### Prepare the dataset for training: We create a dataset from the tokenized inputs and the labels:" 297 | ], 298 | "metadata": { 299 | "id": "-fy7UPGcZoeb" 300 | } 301 | }, 302 | { 303 | "cell_type": "code", 304 | "source": [ 305 | "train_data = torch.utils.data.TensorDataset(inputs['input_ids'], torch.tensor(labels))" 306 | ], 307 | "metadata": { 308 | "id": "9QgeUFX3aDF2" 309 | }, 310 | "execution_count": null, 311 | "outputs": [] 312 | }, 313 | { 314 | "cell_type": "markdown", 315 | "source": [ 316 | "Next, define training arguments. We specify the settings for the training process, such as the number of epochs, batch size, and logging:" 317 | ], 318 | "metadata": { 319 | "id": "CD8dnJvVaGhZ" 320 | } 321 | }, 322 | { 323 | "cell_type": "code", 324 | "source": [ 325 | "training_args = TrainingArguments(\n", 326 | "\n", 327 | " output_dir=\"./results\",\n", 328 | "\n", 329 | " num_train_epochs=3,\n", 330 | "\n", 331 | " per_device_train_batch_size=1, # Reduced batch size due to model size\n", 332 | "\n", 333 | " logging_dir=\"./logs\",\n", 334 | "\n", 335 | " logging_steps=10,\n", 336 | "\n", 337 | " save_steps=50,\n", 338 | "\n", 339 | " learning_rate=1e-5,\n", 340 | "\n", 341 | " gradient_accumulation_steps=16, # Add gradient accumulation for large model\n", 342 | "\n", 343 | " fp16=True, # Enable mixed precision training\n", 344 | "\n", 345 | " gradient_checkpointing=True # Enable gradient checkpointing to save memory\n", 346 | "\n", 347 | ")" 348 | ], 349 | "metadata": { 350 | "id": "n_EbQCdlUeis" 351 | }, 352 | "execution_count": null, 353 | "outputs": [] 354 | }, 355 | { 356 | "cell_type": "markdown", 357 | "source": [ 358 | "#### Next, we fine-tune the Deepseek model with the prepared training data using the Trainer API:" 359 | ], 360 | "metadata": { 361 | "id": "o_d9gGAWZuKK" 362 | } 363 | }, 364 | { 365 | "cell_type": "code", 366 | "source": [ 367 | "trainer = Trainer(\n", 368 | "\n", 369 | " model=model,\n", 370 | "\n", 371 | " args=training_args,\n", 372 | "\n", 373 | " train_dataset=train_data\n", 374 | "\n", 375 | ")\n", 376 | "\n", 377 | "trainer.train()" 378 | ], 379 | "metadata": { 380 | "id": "uPoA9dA7Uodi" 381 | }, 382 | "execution_count": null, 383 | "outputs": [] 384 | }, 385 | { 386 | "cell_type": "code", 387 | "source": [], 388 | "metadata": { 389 | "id": "ogn_IbxpUr7M" 390 | }, 391 | "execution_count": null, 392 | "outputs": [] 393 | } 394 | ] 395 | } -------------------------------------------------------------------------------- /ch7/ch8/fine_tuning_Deepseek_for_a_classification_task.ipynb: -------------------------------------------------------------------------------- 1 | { 2 | "nbformat": 4, 3 | "nbformat_minor": 0, 4 | "metadata": { 5 | "colab": { 6 | "provenance": [] 7 | }, 8 | "kernelspec": { 9 | "name": "python3", 10 | "display_name": "Python 3" 11 | }, 12 | "language_info": { 13 | "name": "python" 14 | } 15 | }, 16 | "cells": [ 17 | { 18 | "cell_type": "markdown", 19 | "source": [ 20 | "Here's a simplified example of how Deepseek might be fine-tuned for a classification task without any sensitive data. Before we begin, let's install the necessary Python packages to ensure the code runs smoothly. These include transformers for model handling and torch for tensor processing.\n", 21 | "\n", 22 | "### Install the required libraries: Run the following command to install the required libraries:" 23 | ], 24 | "metadata": { 25 | "id": "rtwLFUdSXw6l" 26 | } 27 | }, 28 | { 29 | "cell_type": "code", 30 | "execution_count": 1, 31 | "metadata": { 32 | "colab": { 33 | "base_uri": "https://localhost:8080/" 34 | }, 35 | "id": "s-61iwDLTgUi", 36 | "outputId": "d897077e-7356-4a1a-f690-3733ffac3a31" 37 | }, 38 | "outputs": [ 39 | { 40 | "output_type": "stream", 41 | "name": "stdout", 42 | "text": [ 43 | "Requirement already satisfied: transformers in /usr/local/lib/python3.11/dist-packages (4.51.1)\n", 44 | "Requirement already satisfied: torch in /usr/local/lib/python3.11/dist-packages (2.6.0+cu124)\n", 45 | "Requirement already satisfied: filelock in /usr/local/lib/python3.11/dist-packages (from transformers) (3.18.0)\n", 46 | "Requirement already satisfied: huggingface-hub<1.0,>=0.30.0 in /usr/local/lib/python3.11/dist-packages (from transformers) (0.30.2)\n", 47 | "Requirement already satisfied: numpy>=1.17 in /usr/local/lib/python3.11/dist-packages (from transformers) (2.0.2)\n", 48 | "Requirement already satisfied: packaging>=20.0 in /usr/local/lib/python3.11/dist-packages (from transformers) (24.2)\n", 49 | "Requirement already satisfied: pyyaml>=5.1 in /usr/local/lib/python3.11/dist-packages (from transformers) (6.0.2)\n", 50 | "Requirement already satisfied: regex!=2019.12.17 in /usr/local/lib/python3.11/dist-packages (from transformers) (2024.11.6)\n", 51 | "Requirement already satisfied: requests in /usr/local/lib/python3.11/dist-packages (from transformers) (2.32.3)\n", 52 | "Requirement already satisfied: tokenizers<0.22,>=0.21 in /usr/local/lib/python3.11/dist-packages (from transformers) (0.21.1)\n", 53 | "Requirement already satisfied: safetensors>=0.4.3 in /usr/local/lib/python3.11/dist-packages (from transformers) (0.5.3)\n", 54 | "Requirement already satisfied: tqdm>=4.27 in /usr/local/lib/python3.11/dist-packages (from transformers) (4.67.1)\n", 55 | "Requirement already satisfied: typing-extensions>=4.10.0 in /usr/local/lib/python3.11/dist-packages (from torch) (4.13.1)\n", 56 | "Requirement already satisfied: networkx in /usr/local/lib/python3.11/dist-packages (from torch) (3.4.2)\n", 57 | "Requirement already satisfied: jinja2 in /usr/local/lib/python3.11/dist-packages (from torch) (3.1.6)\n", 58 | "Requirement already satisfied: fsspec in /usr/local/lib/python3.11/dist-packages (from torch) (2025.3.2)\n", 59 | "Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch)\n", 60 | " Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)\n", 61 | "Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch)\n", 62 | " Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)\n", 63 | "Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch)\n", 64 | " Downloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)\n", 65 | "Collecting nvidia-cudnn-cu12==9.1.0.70 (from torch)\n", 66 | " Downloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)\n", 67 | "Collecting nvidia-cublas-cu12==12.4.5.8 (from torch)\n", 68 | " Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)\n", 69 | "Collecting nvidia-cufft-cu12==11.2.1.3 (from torch)\n", 70 | " Downloading nvidia_cufft_cu12-11.2.1.3-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)\n", 71 | "Collecting nvidia-curand-cu12==10.3.5.147 (from torch)\n", 72 | " Downloading nvidia_curand_cu12-10.3.5.147-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)\n", 73 | "Collecting nvidia-cusolver-cu12==11.6.1.9 (from torch)\n", 74 | " Downloading nvidia_cusolver_cu12-11.6.1.9-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)\n", 75 | "Collecting nvidia-cusparse-cu12==12.3.1.170 (from torch)\n", 76 | " Downloading nvidia_cusparse_cu12-12.3.1.170-py3-none-manylinux2014_x86_64.whl.metadata (1.6 kB)\n", 77 | "Requirement already satisfied: nvidia-cusparselt-cu12==0.6.2 in /usr/local/lib/python3.11/dist-packages (from torch) (0.6.2)\n", 78 | "Requirement already satisfied: nvidia-nccl-cu12==2.21.5 in /usr/local/lib/python3.11/dist-packages (from torch) (2.21.5)\n", 79 | "Requirement already satisfied: nvidia-nvtx-cu12==12.4.127 in /usr/local/lib/python3.11/dist-packages (from torch) (12.4.127)\n", 80 | "Collecting nvidia-nvjitlink-cu12==12.4.127 (from torch)\n", 81 | " Downloading nvidia_nvjitlink_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)\n", 82 | "Requirement already satisfied: triton==3.2.0 in /usr/local/lib/python3.11/dist-packages (from torch) (3.2.0)\n", 83 | "Requirement already satisfied: sympy==1.13.1 in /usr/local/lib/python3.11/dist-packages (from torch) (1.13.1)\n", 84 | "Requirement already satisfied: mpmath<1.4,>=1.1.0 in /usr/local/lib/python3.11/dist-packages (from sympy==1.13.1->torch) (1.3.0)\n", 85 | "Requirement already satisfied: MarkupSafe>=2.0 in /usr/local/lib/python3.11/dist-packages (from jinja2->torch) (3.0.2)\n", 86 | "Requirement already satisfied: charset-normalizer<4,>=2 in /usr/local/lib/python3.11/dist-packages (from requests->transformers) (3.4.1)\n", 87 | "Requirement already satisfied: idna<4,>=2.5 in /usr/local/lib/python3.11/dist-packages (from requests->transformers) (3.10)\n", 88 | "Requirement already satisfied: urllib3<3,>=1.21.1 in /usr/local/lib/python3.11/dist-packages (from requests->transformers) (2.3.0)\n", 89 | "Requirement already satisfied: certifi>=2017.4.17 in /usr/local/lib/python3.11/dist-packages (from requests->transformers) (2025.1.31)\n", 90 | "Downloading nvidia_cublas_cu12-12.4.5.8-py3-none-manylinux2014_x86_64.whl (363.4 MB)\n", 91 | "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m363.4/363.4 MB\u001b[0m \u001b[31m2.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", 92 | "\u001b[?25hDownloading nvidia_cuda_cupti_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl (13.8 MB)\n", 93 | "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m13.8/13.8 MB\u001b[0m \u001b[31m44.6 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", 94 | "\u001b[?25hDownloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl (24.6 MB)\n", 95 | "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m24.6/24.6 MB\u001b[0m \u001b[31m49.4 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", 96 | "\u001b[?25hDownloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl (883 kB)\n", 97 | "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m883.7/883.7 kB\u001b[0m \u001b[31m36.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", 98 | "\u001b[?25hDownloading nvidia_cudnn_cu12-9.1.0.70-py3-none-manylinux2014_x86_64.whl (664.8 MB)\n", 99 | "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m664.8/664.8 MB\u001b[0m \u001b[31m2.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", 100 | "\u001b[?25hDownloading nvidia_cufft_cu12-11.2.1.3-py3-none-manylinux2014_x86_64.whl (211.5 MB)\n", 101 | "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m211.5/211.5 MB\u001b[0m \u001b[31m5.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", 102 | "\u001b[?25hDownloading nvidia_curand_cu12-10.3.5.147-py3-none-manylinux2014_x86_64.whl (56.3 MB)\n", 103 | "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m56.3/56.3 MB\u001b[0m \u001b[31m9.8 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", 104 | "\u001b[?25hDownloading nvidia_cusolver_cu12-11.6.1.9-py3-none-manylinux2014_x86_64.whl (127.9 MB)\n", 105 | "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m127.9/127.9 MB\u001b[0m \u001b[31m6.1 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", 106 | "\u001b[?25hDownloading nvidia_cusparse_cu12-12.3.1.170-py3-none-manylinux2014_x86_64.whl (207.5 MB)\n", 107 | "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m207.5/207.5 MB\u001b[0m \u001b[31m4.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", 108 | "\u001b[?25hDownloading nvidia_nvjitlink_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl (21.1 MB)\n", 109 | "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m21.1/21.1 MB\u001b[0m \u001b[31m77.9 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", 110 | "\u001b[?25hInstalling collected packages: nvidia-nvjitlink-cu12, nvidia-curand-cu12, nvidia-cufft-cu12, nvidia-cuda-runtime-cu12, nvidia-cuda-nvrtc-cu12, nvidia-cuda-cupti-cu12, nvidia-cublas-cu12, nvidia-cusparse-cu12, nvidia-cudnn-cu12, nvidia-cusolver-cu12\n", 111 | " Attempting uninstall: nvidia-nvjitlink-cu12\n", 112 | " Found existing installation: nvidia-nvjitlink-cu12 12.5.82\n", 113 | " Uninstalling nvidia-nvjitlink-cu12-12.5.82:\n", 114 | " Successfully uninstalled nvidia-nvjitlink-cu12-12.5.82\n", 115 | " Attempting uninstall: nvidia-curand-cu12\n", 116 | " Found existing installation: nvidia-curand-cu12 10.3.6.82\n", 117 | " Uninstalling nvidia-curand-cu12-10.3.6.82:\n", 118 | " Successfully uninstalled nvidia-curand-cu12-10.3.6.82\n", 119 | " Attempting uninstall: nvidia-cufft-cu12\n", 120 | " Found existing installation: nvidia-cufft-cu12 11.2.3.61\n", 121 | " Uninstalling nvidia-cufft-cu12-11.2.3.61:\n", 122 | " Successfully uninstalled nvidia-cufft-cu12-11.2.3.61\n", 123 | " Attempting uninstall: nvidia-cuda-runtime-cu12\n", 124 | " Found existing installation: nvidia-cuda-runtime-cu12 12.5.82\n", 125 | " Uninstalling nvidia-cuda-runtime-cu12-12.5.82:\n", 126 | " Successfully uninstalled nvidia-cuda-runtime-cu12-12.5.82\n", 127 | " Attempting uninstall: nvidia-cuda-nvrtc-cu12\n", 128 | " Found existing installation: nvidia-cuda-nvrtc-cu12 12.5.82\n", 129 | " Uninstalling nvidia-cuda-nvrtc-cu12-12.5.82:\n", 130 | " Successfully uninstalled nvidia-cuda-nvrtc-cu12-12.5.82\n", 131 | " Attempting uninstall: nvidia-cuda-cupti-cu12\n", 132 | " Found existing installation: nvidia-cuda-cupti-cu12 12.5.82\n", 133 | " Uninstalling nvidia-cuda-cupti-cu12-12.5.82:\n", 134 | " Successfully uninstalled nvidia-cuda-cupti-cu12-12.5.82\n", 135 | " Attempting uninstall: nvidia-cublas-cu12\n", 136 | " Found existing installation: nvidia-cublas-cu12 12.5.3.2\n", 137 | " Uninstalling nvidia-cublas-cu12-12.5.3.2:\n", 138 | " Successfully uninstalled nvidia-cublas-cu12-12.5.3.2\n", 139 | " Attempting uninstall: nvidia-cusparse-cu12\n", 140 | " Found existing installation: nvidia-cusparse-cu12 12.5.1.3\n", 141 | " Uninstalling nvidia-cusparse-cu12-12.5.1.3:\n", 142 | " Successfully uninstalled nvidia-cusparse-cu12-12.5.1.3\n", 143 | " Attempting uninstall: nvidia-cudnn-cu12\n", 144 | " Found existing installation: nvidia-cudnn-cu12 9.3.0.75\n", 145 | " Uninstalling nvidia-cudnn-cu12-9.3.0.75:\n", 146 | " Successfully uninstalled nvidia-cudnn-cu12-9.3.0.75\n", 147 | " Attempting uninstall: nvidia-cusolver-cu12\n", 148 | " Found existing installation: nvidia-cusolver-cu12 11.6.3.83\n", 149 | " Uninstalling nvidia-cusolver-cu12-11.6.3.83:\n", 150 | " Successfully uninstalled nvidia-cusolver-cu12-11.6.3.83\n", 151 | "Successfully installed nvidia-cublas-cu12-12.4.5.8 nvidia-cuda-cupti-cu12-12.4.127 nvidia-cuda-nvrtc-cu12-12.4.127 nvidia-cuda-runtime-cu12-12.4.127 nvidia-cudnn-cu12-9.1.0.70 nvidia-cufft-cu12-11.2.1.3 nvidia-curand-cu12-10.3.5.147 nvidia-cusolver-cu12-11.6.1.9 nvidia-cusparse-cu12-12.3.1.170 nvidia-nvjitlink-cu12-12.4.127\n" 152 | ] 153 | } 154 | ], 155 | "source": [ 156 | "! pip install transformers torch" 157 | ] 158 | }, 159 | { 160 | "cell_type": "markdown", 161 | "source": [ 162 | "#### Now, you can import the necessary libraries for using the Deepseek model:" 163 | ], 164 | "metadata": { 165 | "id": "n4nZe6DuZTp7" 166 | } 167 | }, 168 | { 169 | "cell_type": "code", 170 | "source": [ 171 | "import torch\n", 172 | "\n", 173 | "from torch.utils.data import Dataset\n", 174 | "\n", 175 | "from transformers import AutoTokenizer, AutoModelForSequenceClassification, Trainer, TrainingArguments" 176 | ], 177 | "metadata": { 178 | "id": "_0zM-MZCZcLb" 179 | }, 180 | "execution_count": null, 181 | "outputs": [] 182 | }, 183 | { 184 | "cell_type": "markdown", 185 | "source": [ 186 | "Loading pre-trained Deepseek model and tokenizer: We load the pre-trained Deepseek tokenizer and model to use for sentiment classification:\n", 187 | "\n", 188 | "AutoTokenizer.from_pretrained: Loads the pre-trained Deepseek tokenizer\n", 189 | "\n", 190 | "AutoModelForSequenceClassification.from_pretrained: Loads the pre-trained Deepseek model for sequence classification tasks" 191 | ], 192 | "metadata": { 193 | "id": "UBhl29vgZctB" 194 | } 195 | }, 196 | { 197 | "cell_type": "code", 198 | "source": [ 199 | "tokenizer = AutoTokenizer.from_pretrained(\"deepseek-ai/deepseek-llm-7b-base\", trust_remote_code=True)\n", 200 | "\n", 201 | "model = AutoModelForSequenceClassification.from_pretrained(\n", 202 | "\n", 203 | " \"deepseek-ai/deepseek-llm-7b-base\",\n", 204 | "\n", 205 | " num_labels=2, # Binary classification\n", 206 | "\n", 207 | " trust_remote_code=True\n", 208 | "\n", 209 | ")\n", 210 | "\n", 211 | "\n", 212 | "\n", 213 | "# Set padding token if not already set\n", 214 | "\n", 215 | "if tokenizer.pad_token is None:\n", 216 | "\n", 217 | " tokenizer.pad_token = tokenizer.eos_token\n", 218 | "\n", 219 | " model.config.pad_token_id = tokenizer.pad_token_id" 220 | ], 221 | "metadata": { 222 | "id": "P_PhhI39UDDD" 223 | }, 224 | "execution_count": null, 225 | "outputs": [] 226 | }, 227 | { 228 | "cell_type": "markdown", 229 | "source": [ 230 | "Example text and data tokenization: Here we define two example text samples and their corresponding sentiment labels, and tokenize the input text to convert it into a format suitable for Deepseek.\n", 231 | "\n", 232 | "tokenizer: Converts the text into token IDs that Deepseek can process\n", 233 | "\n", 234 | "padding=True: Ensures the text is padded to the same length\n", 235 | "\n", 236 | "truncation=True: Truncates longer texts to fit Deepseek's maximum input length\n", 237 | "\n", 238 | "return_tensors=\"pt\": Returns PyTorch tensors for compatibility with the model" 239 | ], 240 | "metadata": { 241 | "id": "rVbopILeZjA-" 242 | } 243 | }, 244 | { 245 | "cell_type": "code", 246 | "source": [ 247 | "texts = [\"I love machine learning.\", \"This is an amazing tutorial on Deeplearning.\"]\n", 248 | "\n", 249 | "labels = [1, 0] # Labels for sentiment (1 = positive, 0 = negative)\n", 250 | "\n", 251 | "inputs = tokenizer(texts, padding=True, truncation=True, return_tensors=\"pt\")\n", 252 | "\n", 253 | "\n", 254 | "\n", 255 | "# Custom Dataset class\n", 256 | "\n", 257 | "class CustomDataset(Dataset):\n", 258 | "\n", 259 | " def __init__(self, inputs, labels):\n", 260 | "\n", 261 | " self.input_ids = inputs['input_ids']\n", 262 | "\n", 263 | " self.attention_mask = inputs['attention_mask']\n", 264 | "\n", 265 | " self.labels = torch.tensor(labels)\n", 266 | "\n", 267 | "\n", 268 | "\n", 269 | " def __len__(self):\n", 270 | "\n", 271 | " return len(self.labels)\n", 272 | "\n", 273 | "\n", 274 | "\n", 275 | " def __getitem__(self, idx):\n", 276 | "\n", 277 | " return {\n", 278 | "\n", 279 | " 'input_ids': self.input_ids[idx],\n", 280 | "\n", 281 | " 'attention_mask': self.attention_mask[idx],\n", 282 | "\n", 283 | " 'labels': self.labels[idx]\n", 284 | "\n", 285 | " }" 286 | ], 287 | "metadata": { 288 | "id": "Ka20E4mNUZgw" 289 | }, 290 | "execution_count": null, 291 | "outputs": [] 292 | }, 293 | { 294 | "cell_type": "markdown", 295 | "source": [ 296 | "#### Prepare the dataset for training: We create a dataset from the tokenized inputs and the labels:" 297 | ], 298 | "metadata": { 299 | "id": "-fy7UPGcZoeb" 300 | } 301 | }, 302 | { 303 | "cell_type": "code", 304 | "source": [ 305 | "train_data = torch.utils.data.TensorDataset(inputs['input_ids'], torch.tensor(labels))" 306 | ], 307 | "metadata": { 308 | "id": "9QgeUFX3aDF2" 309 | }, 310 | "execution_count": null, 311 | "outputs": [] 312 | }, 313 | { 314 | "cell_type": "markdown", 315 | "source": [ 316 | "Next, define training arguments. We specify the settings for the training process, such as the number of epochs, batch size, and logging:" 317 | ], 318 | "metadata": { 319 | "id": "CD8dnJvVaGhZ" 320 | } 321 | }, 322 | { 323 | "cell_type": "code", 324 | "source": [ 325 | "training_args = TrainingArguments(\n", 326 | "\n", 327 | " output_dir=\"./results\",\n", 328 | "\n", 329 | " num_train_epochs=3,\n", 330 | "\n", 331 | " per_device_train_batch_size=1, # Reduced batch size due to model size\n", 332 | "\n", 333 | " logging_dir=\"./logs\",\n", 334 | "\n", 335 | " logging_steps=10,\n", 336 | "\n", 337 | " save_steps=50,\n", 338 | "\n", 339 | " learning_rate=1e-5,\n", 340 | "\n", 341 | " gradient_accumulation_steps=16, # Add gradient accumulation for large model\n", 342 | "\n", 343 | " fp16=True, # Enable mixed precision training\n", 344 | "\n", 345 | " gradient_checkpointing=True # Enable gradient checkpointing to save memory\n", 346 | "\n", 347 | ")" 348 | ], 349 | "metadata": { 350 | "id": "n_EbQCdlUeis" 351 | }, 352 | "execution_count": null, 353 | "outputs": [] 354 | }, 355 | { 356 | "cell_type": "markdown", 357 | "source": [ 358 | "#### Next, we fine-tune the Deepseek model with the prepared training data using the Trainer API:" 359 | ], 360 | "metadata": { 361 | "id": "o_d9gGAWZuKK" 362 | } 363 | }, 364 | { 365 | "cell_type": "code", 366 | "source": [ 367 | "trainer = Trainer(\n", 368 | "\n", 369 | " model=model,\n", 370 | "\n", 371 | " args=training_args,\n", 372 | "\n", 373 | " train_dataset=train_data\n", 374 | "\n", 375 | ")\n", 376 | "\n", 377 | "trainer.train()" 378 | ], 379 | "metadata": { 380 | "id": "uPoA9dA7Uodi" 381 | }, 382 | "execution_count": null, 383 | "outputs": [] 384 | }, 385 | { 386 | "cell_type": "code", 387 | "source": [], 388 | "metadata": { 389 | "id": "ogn_IbxpUr7M" 390 | }, 391 | "execution_count": null, 392 | "outputs": [] 393 | } 394 | ] 395 | } --------------------------------------------------------------------------------