├── .gitignore
├── Chapter2
    ├── clip_score.py
    ├── moderator.py
    ├── stable_diff.py
    └── util.py
├── Chapter3
    ├── data.csv
    └── original_transformer.py
├── Chapter4
    ├── DOCKERFILE
    ├── app.py
    ├── benchmark.py
    ├── ci-cd.yml
    ├── data
    │   ├── gpt3_descriptions.csv
    │   ├── gptneo_descriptions.csv
    │   ├── img
    │   │   ├── image_1.jpg
    │   │   ├── image_10.jpg
    │   │   ├── image_11.jpg
    │   │   ├── image_12.jpg
    │   │   ├── image_13.jpg
    │   │   ├── image_14.jpg
    │   │   ├── image_15.jpg
    │   │   ├── image_16.jpg
    │   │   ├── image_17.jpg
    │   │   ├── image_18.jpg
    │   │   ├── image_19.jpeg
    │   │   ├── image_2.jpg
    │   │   ├── image_3.jpg
    │   │   ├── image_4.jpg
    │   │   ├── image_5.jpg
    │   │   ├── image_6.jpg
    │   │   ├── image_7.jpg
    │   │   ├── image_8.jpg
    │   │   └── image_9.jpg
    │   ├── product_data.csv
    │   ├── reference_data.csv
    │   └── test_data.csv
    ├── dev_requirements.txt
    ├── eval_clip.py
    ├── eval_rouge.py
    ├── eval_semantic.py
    ├── fastapi.py
    ├── minimal_gpt_neo.py
    ├── project_init.bat
    ├── project_init.sh
    └── requirements.txt
├── Chapter5
    ├── qa_demo.json
    └── task_specific.py
├── Chapter6
    ├── domain_adapt.py
    ├── score.py
    ├── test.txt
    └── train.txt
├── Chapter7
    ├── eval_rag.py
    ├── products
    │   └── products.csv
    └── rag.py
├── Chapter8
    ├── constrained_rag.py
    └── products
    │   └── products.csv
├── LICENSE
├── README.md
└── utils
    └── img_prepro.py


/.gitignore:
--------------------------------------------------------------------------------
 1 | __pycache__/
 2 | 
 3 | .DS_Store
 4 | 
 5 | base/
 6 | 
 7 | # do not upload the following files
 8 | stylesprint_qa_model/
 9 | results/
10 | model_output/
11 | logs/
12 | img/


--------------------------------------------------------------------------------
/Chapter2/clip_score.py:
--------------------------------------------------------------------------------
 1 | from typing import List, Tuple
 2 | from PIL import Image
 3 | from transformers import CLIPProcessor, CLIPModel
 4 | import torch
 5 | from util import render_images, render_image, load_pil_images, DEMO_PROMPTS
 6 | 
 7 | 
 8 | def load_model_and_processor(model_name: str) -> Tuple[CLIPModel, CLIPProcessor]:
 9 |     """
10 |     Loads the CLIP model and processor.
11 |     """
12 |     model = CLIPModel.from_pretrained(model_name)
13 |     processor = CLIPProcessor.from_pretrained(model_name)
14 |     return model, processor
15 | 
16 | 
17 | def process_inputs(
18 |     processor: CLIPProcessor, prompts: List[str], images: List[Image.Image]
19 | ) -> dict:
20 |     """
21 |     Processes the inputs using the CLIP processor.
22 |     """
23 |     return processor(text=prompts, images=images, return_tensors="pt", padding=True)
24 | 
25 | 
26 | def get_probabilities(model: CLIPModel, inputs: dict) -> torch.Tensor:
27 |     """
28 |     Computes the probabilities using the CLIP model.
29 |     """
30 |     outputs = model(**inputs)
31 |     logits = outputs.logits_per_image
32 | 
33 |     # Define temperature -  higher temperature will make the distribution more uniform.
34 |     T = 10
35 | 
36 |     # Apply temperature to the logits
37 |     temp_adjusted_logits = logits / T
38 | 
39 |     probs = torch.nn.functional.softmax(temp_adjusted_logits, dim=1)
40 | 
41 |     return probs
42 | 
43 | 
44 | def display_images_with_scores(
45 |     images: List[Image.Image], scores: torch.Tensor, notebook:bool=False, names: list=[]
46 | ) -> None:
47 |     """
48 |     Displays the images alongside their scores.
49 |     """
50 |     # Set print options for readability
51 |     torch.set_printoptions(precision=2, sci_mode=False)
52 | 
53 |     # Display the images and scores
54 |     for i, image in enumerate(images):
55 |         name = "Image" if not names else names[i]
56 |         print(f"{name} {i + 1}:")
57 |         if notebook:
58 |             render_image(image)
59 |         print(f"Scores: {scores[i, :].detach().numpy()}")
60 |         print()
61 | 
62 | 
63 | if __name__ == "__main__":
64 |     # Instructions:
65 |     # Run the script using the command: python clip_score.py
66 |     # Images are loaded from the img directory.
67 |     # The prompts are predefined in the DEMO_PROMPTS list in util.py.
68 | 
69 |     # Define prompts
70 |     prompts = DEMO_PROMPTS
71 | 
72 |     # Load images
73 |     images = load_pil_images("./img/", prefix="pil_image")
74 | 
75 |     # Load CLIP model
76 |     model, processor = load_model_and_processor("openai/clip-vit-base-patch32")
77 |    
78 |     # Process image and text inputs together
79 |     inputs = process_inputs(processor, prompts, images)
80 |    
81 |     # Extract the probabilities
82 |     probs = get_probabilities(model, inputs)
83 |    
84 |     # Display each image with corresponding scores
85 |     display_images_with_scores(images, probs)
86 | 


--------------------------------------------------------------------------------
/Chapter2/moderator.py:
--------------------------------------------------------------------------------
 1 | from clip_score import *
 2 | 
 3 | 
 4 | if __name__ == "__main__":
 5 |     # Instructions:
 6 |     # Run the script using the command: python moderator.py
 7 | 
 8 |     # Define acceptable images
 9 |     acceptable_uploads = [
10 |         "a detailed photo of a car",
11 |         "an image of automotive parts",
12 |         "cars racing on a track"
13 |     ]
14 | 
15 |     # Load images
16 |     images, paths = load_pil_images("./img/", prefix="car_example_image_car", format="jpeg", return_paths=True)
17 |     # Load CLIP model
18 |     model, processor = load_model_and_processor("openai/clip-vit-large-patch14")
19 | 
20 |     # Process image and text inputs together
21 |     inputs = process_inputs(processor, acceptable_uploads, images)
22 | 
23 |     # Extract the probabilities
24 |     probs = get_probabilities(model, inputs)
25 | 
26 |     # Display each image with corresponding scores
27 |     display_images_with_scores(images, probs, names=paths)
28 | 


--------------------------------------------------------------------------------
/Chapter2/stable_diff.py:
--------------------------------------------------------------------------------
 1 | # !pip install pytorch-fid torch diffusers clip transformers accelerate matplotlib
 2 | 
 3 | from typing import List
 4 | import torch
 5 | from diffusers import StableDiffusionPipeline
 6 | from util import render_images, save_pil_images, DEMO_PROMPTS
 7 | import PIL
 8 | 
 9 | 
10 | def load_model(model_id: str, device="cpu") -> StableDiffusionPipeline:
11 |     """Load model with provided model_id."""
12 |     return StableDiffusionPipeline.from_pretrained(
13 |         model_id, torch_dtype=torch.float16, revision="fp16", use_auth_token=False
14 |     ).to(device)
15 | 
16 | 
17 | def generate_images(
18 |     pipe: StableDiffusionPipeline, prompts: List[str], device="cuda"
19 | ) -> List[PIL.Image.Image]:
20 |     """Generate images based on provided prompts."""
21 |     with torch.autocast(device):
22 |         images = pipe(prompts).images
23 |     return images
24 | 
25 | 
26 | if __name__ == "__main__":
27 |     # Instructions:
28 |     # Replace the model_id with your choice.
29 |     # Add the desired prompts to the prompts list.
30 |     # Run the script using the command: python stable_diff.py
31 | 
32 |     model_id = "CompVis/stable-diffusion-v1-4"
33 |     prompts = DEMO_PROMPTS
34 | 
35 |     device = "mps"  # "cuda", "cpu", "mps" is for M1 Macs
36 |     pipe = load_model(model_id, device="mps")
37 |     images = generate_images(
38 |         pipe, prompts, device="cpu"
39 |     )  # autocast does not support mps
40 | 
41 |     # Save the images
42 |     save_pil_images(images, "./img", prefix="pil_image")
43 | 
44 |     # render the images
45 |     render_images(images)
46 | 


--------------------------------------------------------------------------------
/Chapter2/util.py:
--------------------------------------------------------------------------------
 1 | import matplotlib.pyplot as plt
 2 | import torch
 3 | import fnmatch
 4 | from PIL import Image
 5 | import os
 6 | 
 7 | DEMO_PROMPTS = [
 8 |     "a hyper-realistic photo of a modern sneaker",
 9 |     "A stylized t-shirt with an sports-inspired design",
10 | ]
11 | 
12 | def render_image(image: torch.Tensor):
13 |     """Plot the generated image."""
14 |     plt.figure(figsize=(5, 5))
15 |     plt.imshow(image)
16 |     plt.axis("off")
17 |     plt.show()
18 | 
19 | def render_images(images: torch.Tensor):
20 |     """Plot the generated images."""
21 |     plt.figure(figsize=(10, 5))
22 |     for i, img in enumerate(images):
23 |         plt.subplot(1, 2, i + 1)
24 |         plt.imshow(img)
25 |         plt.axis("off")
26 |     plt.show()
27 | 
28 | 
29 | def save_pil_images(images, directory, prefix="image", format="png"):
30 |     """
31 |     Saves a list of PIL images to the specified directory.
32 | 
33 |     Parameters:
34 |     - images: a list of PIL Image objects.
35 |     - directory: path to the directory where images will be saved.
36 |     - prefix: prefix for the saved image filenames.
37 |     - format: format of the saved images ('png', 'jpg', etc.).
38 |     """
39 | 
40 |     # Ensure the directory exists
41 |     if not os.path.exists(directory):
42 |         os.makedirs(directory)
43 | 
44 |     # Iterate through the list of images and save each one
45 |     for i, img in enumerate(images):
46 |         img.save(os.path.join(directory, f"{prefix}_{i}.{format}"))
47 | 
48 | 
49 | def load_pil_images(directory, prefix="image", format="png", return_paths=False):
50 |     """
51 |     Loads PIL images from a specified directory into a list.
52 | 
53 |     Parameters:
54 |     - directory: path to the directory from which images will be loaded.
55 |     - prefix: prefix for the filenames of images to be loaded.
56 |     - format: format of the images to be loaded ('png', 'jpg', etc.).
57 | 
58 |     Returns:
59 |     - A list of PIL Image objects.
60 |     """
61 | 
62 |     images = []
63 |     img_paths = []
64 |     # Construct the full path pattern to filter images by prefix and format
65 |     file_pattern = f"{prefix}_*.{format}"
66 | 
67 |     # Iterate through each file in the directory
68 |     for filename in os.listdir(directory):
69 |         if fnmatch.fnmatch(filename, file_pattern):
70 |             img_path = os.path.join(directory, filename)
71 |             img_paths.append(img_path)
72 |             try:
73 |                 with Image.open(img_path) as img:
74 |                     images.append(img.copy())  # Copy image to avoid closing
75 |             except IOError:
76 |                 print(f"Error loading image: {img_path}")
77 | 
78 |     if return_paths:
79 |         return images, img_paths
80 | 
81 |     return images
82 | 


--------------------------------------------------------------------------------
/Chapter3/original_transformer.py:
--------------------------------------------------------------------------------
  1 | # !pip install transformers tokenizers
  2 | 
  3 | import math
  4 | import torch
  5 | import torch.nn as nn
  6 | import torch.optim as optim
  7 | from torch.nn import Transformer
  8 | from torch.utils.data import Dataset, DataLoader
  9 | from tokenizers import Tokenizer
 10 | from tokenizers.models import WordPiece
 11 | from tokenizers.trainers import WordPieceTrainer
 12 | from tokenizers.pre_tokenizers import Whitespace
 13 | from torch.utils.data import Dataset, DataLoader
 14 | from torch.nn.utils.rnn import pad_sequence
 15 | import pandas as pd
 16 | import numpy as np
 17 | 
 18 | 
 19 | # Train the tokenizer to learn the vocabulary
 20 | def train_tokenizer(texts):
 21 |     # Instantiate the tokenizer
 22 |     tokenizer = Tokenizer(WordPiece(unk_token="[UNK]"))
 23 |     # set the pre-tokenizer to whitespace
 24 |     tokenizer.pre_tokenizer = Whitespace()
 25 |     # train the tokenizer including special tokens
 26 |     trainer = WordPieceTrainer(
 27 |         vocab_size=5000,
 28 |         special_tokens=["[PAD]", "[UNK]", "[CLS]", "[SEP]", "[MASK]", "<sos>", "<eos>"],
 29 |     )
 30 |     tokenizer.train_from_iterator(texts, trainer)
 31 | 
 32 |     return tokenizer
 33 | 
 34 | 
 35 | # Tensorize the data to prepare for training
 36 | def tensorize_data(text_data, tokenizer):
 37 |     # token index the data (i.e., numericalize)
 38 |     numericalized_data = [
 39 |         torch.tensor(tokenizer.encode(text).ids) for text in text_data
 40 |     ]
 41 |     # pad the sequences so they are all the same length (default is 0)
 42 |     padded_data = pad_sequence(numericalized_data, batch_first=True)
 43 | 
 44 |     # return shape (batch_size, max_len)
 45 |     return padded_data
 46 | 
 47 | 
 48 | # Create the dataset domain model
 49 | class TextDataset(Dataset):
 50 |     def __init__(self, src_data, tgt_data):
 51 |         self.src_data = src_data
 52 |         self.tgt_data = tgt_data
 53 | 
 54 |     def __len__(self):
 55 |         return len(self.src_data)
 56 | 
 57 |     def __getitem__(self, idx):
 58 |         return self.src_data[idx], self.tgt_data[idx]
 59 | 
 60 | 
 61 | # Embeddings
 62 | class Embeddings(nn.Module):
 63 |     def __init__(self, d_model, vocab_size):
 64 |         super(Embeddings, self).__init__()
 65 |         self.embed = nn.Embedding(vocab_size, d_model)
 66 | 
 67 |     def forward(self, x):
 68 |         return self.embed(x)
 69 | 
 70 | 
 71 | # Positional Encoding
 72 | class PositionalEncoding(nn.Module):
 73 |     def __init__(self, d_model, dropout=0.1, max_len=None):
 74 |         super(PositionalEncoding, self).__init__()
 75 |         self.dropout = nn.Dropout(p=dropout)
 76 | 
 77 |         # Compute the positional encodings
 78 |         pe = torch.zeros(max_len, d_model)
 79 |         position = torch.arange(0.0, max_len).unsqueeze(1)
 80 |         div_term = torch.exp(
 81 |             torch.arange(0.0, d_model, 2) * -(math.log(10000.0) / d_model)
 82 |         )
 83 |         pe[:, 0::2] = torch.sin(position * div_term)
 84 |         pe[:, 1::2] = torch.cos(position * div_term)
 85 |         pe = pe.unsqueeze(0)
 86 |         self.register_buffer("pe", pe)
 87 | 
 88 |     def forward(self, x):
 89 |         x = x + self.pe[:, : x.size(1)]
 90 |         return self.dropout(x)
 91 | 
 92 | 
 93 | # Multi-Head Attention
 94 | class MultiHeadSelfAttention(nn.Module):
 95 |     def __init__(self, d_model, nhead):
 96 |         super(MultiHeadSelfAttention, self).__init__()
 97 |         # Instantiate the linear transformation layers for Q, K, and V
 98 |         self.attention = nn.MultiheadAttention(d_model, nhead)
 99 | 
100 |     def forward(self, x):
101 |         # Return both the attention output and the attention weights
102 |         return self.attention(x, x, x)
103 | 
104 | 
105 | # FFN
106 | class FeedForward(nn.Module):
107 |     def __init__(self, d_model, d_ff):
108 |         super(FeedForward, self).__init__()
109 |         # Instantiate FFN layers and dropout
110 |         self.linear1 = nn.Linear(d_model, d_ff)
111 |         self.dropout = nn.Dropout(0.1)
112 |         self.linear2 = nn.Linear(d_ff, d_model)
113 | 
114 |     def forward(self, x):
115 |         # Apply linear transformation and ReLU non-linearity with dropout
116 |         return self.linear2(self.dropout(torch.relu(self.linear1(x))))
117 | 
118 | 
119 | # Encoder Stack
120 | class EncoderLayer(nn.Module):
121 |     def __init__(self, d_model, nhead, d_ff):
122 |         super(EncoderLayer, self).__init__()
123 |         # Instantiate the Multi-Head Attention and FFN layers
124 |         self.self_attn = MultiHeadSelfAttention(d_model, nhead)
125 |         self.feed_forward = FeedForward(d_model, d_ff)
126 |         # Instantiate layer normalization and dropout
127 |         self.norm1 = nn.LayerNorm(d_model)
128 |         self.norm2 = nn.LayerNorm(d_model)
129 |         self.dropout = nn.Dropout(0.1)
130 | 
131 |     def forward(self, x):
132 |         # transpose x to match the shape expected by the self-attention layer
133 |         x = x.transpose(0, 1)
134 |         # Apply the self-attention layer
135 |         attn_output, _ = self.self_attn(x)
136 |         # Apply dropout and layer normalization
137 |         x = x + self.dropout(attn_output)
138 |         x = self.norm1(x)
139 |         # Apply the FFN layer
140 |         ff_output = self.feed_forward(x)
141 |         # Apply dropout and layer normalization
142 |         x = x + self.dropout(ff_output)
143 |         # Transpose x back to its original shape
144 |         return self.norm2(x).transpose(0, 1)
145 | 
146 | 
147 | class Encoder(nn.Module):
148 |     def __init__(self, d_model, nhead, d_ff, num_layers, vocab_size, max_len):
149 |         super(Encoder, self).__init__()
150 |         # Instantiate the Embeddings and Positional Encoding layers
151 |         self.embedding = Embeddings(d_model, vocab_size)
152 |         self.pos_encoding = PositionalEncoding(d_model, max_len=max_len)
153 |         self.encoder_layers = nn.ModuleList(
154 |             [EncoderLayer(d_model, nhead, d_ff) for _ in range(num_layers)]
155 |         )
156 |         # Define the model hyperparameters
157 |         self.d_model = d_model  # Embedding dimension
158 |         self.nhead = nhead  # Number of attention heads
159 |         # Define the FFN hyperparameters and Instantiate the FFN layer
160 |         self.feed_forward = FeedForward(d_model, d_ff)
161 | 
162 |     def forward(self, x):
163 |         # Apply the Embeddings and Positional Encoding layers
164 |         x = self.embedding(x)
165 |         x = self.pos_encoding(x)
166 |         for layer in self.encoder_layers:
167 |             x = layer(x)
168 |         return x
169 | 
170 | 
171 | # Decoder Stack
172 | class DecoderLayer(nn.Module):
173 |     def __init__(self, d_model, nhead, d_ff):
174 |         super(DecoderLayer, self).__init__()
175 |         # Instantiate the Multi-Head Attention and FFN layers
176 |         self.self_attn = MultiHeadSelfAttention(d_model, nhead)
177 |         self.cross_attn = nn.MultiheadAttention(d_model, nhead)
178 |         self.feed_forward = FeedForward(d_model, d_ff)
179 |         # Instantiate layer normalization and dropout
180 |         self.norm1 = nn.LayerNorm(d_model)
181 |         self.norm2 = nn.LayerNorm(d_model)
182 |         self.norm3 = nn.LayerNorm(d_model)
183 |         self.dropout = nn.Dropout(0.1)
184 | 
185 |     def forward(self, x, memory):
186 |         # Transpose x and memory to match the shape expected by the self-attention layer
187 |         x = x.transpose(0, 1)
188 |         memory = memory.transpose(0, 1)
189 |         # Apply the self-attention layer
190 |         attn_output, _ = self.self_attn(x)
191 |         # Apply dropout and layer normalization
192 |         x = x + self.dropout(attn_output)
193 |         x = self.norm1(x)
194 |         attn_output, _ = self.cross_attn(x, memory, memory)
195 |         x = x + self.dropout(attn_output)
196 |         x = self.norm2(x)
197 |         # Apply the FFN layer
198 |         ff_output = self.feed_forward(x)
199 |         x = x + self.dropout(ff_output)
200 |         # Transpose x back to its original shape
201 |         return self.norm3(x).transpose(0, 1)
202 | 
203 | 
204 | class Decoder(nn.Module):
205 |     def __init__(self, d_model, nhead, d_ff, num_layers, vocab_size, max_len):
206 |         super(Decoder, self).__init__()
207 |         # Instantiate the Embeddings and Positional Encoding layers
208 |         self.embedding = Embeddings(d_model, vocab_size)
209 |         self.pos_encoding = PositionalEncoding(d_model, max_len=max_len)
210 |         self.decoder_layers = nn.ModuleList(
211 |             [DecoderLayer(d_model, nhead, d_ff) for _ in range(num_layers)]
212 |         )
213 |         # Instantiate the linear transformation and softmax function
214 |         self.linear = nn.Linear(d_model, vocab_size)
215 |         self.softmax = nn.Softmax(dim=2)
216 | 
217 |     def forward(self, x, memory):
218 |         # Apply the Embeddings and Positional Encoding layers
219 |         x = self.embedding(x)
220 |         x = self.pos_encoding(x)
221 |         for layer in self.decoder_layers:
222 |             x = layer(x, memory)
223 |         # Apply the linear transformation and softmax function
224 |         x = self.linear(x)
225 |         return self.softmax(x)
226 | 
227 | 
228 | # Complete Transformer
229 | class Transformer(nn.Module):
230 |     def __init__(
231 |         self,
232 |         d_model,
233 |         nhead,
234 |         d_ff,
235 |         num_encoder_layers,
236 |         num_decoder_layers,
237 |         src_vocab_size,
238 |         tgt_vocab_size,
239 |         max_len,
240 |     ):
241 |         super(Transformer, self).__init__()
242 |         # Instantiate the Encoder and Decoder
243 |         self.encoder = Encoder(
244 |             d_model, nhead, d_ff, num_encoder_layers, src_vocab_size, max_len=max_len
245 |         )
246 |         self.decoder = Decoder(
247 |             d_model, nhead, d_ff, num_decoder_layers, tgt_vocab_size, max_len=max_len
248 |         )
249 | 
250 |     def forward(self, src, tgt):
251 |         # Apply the Encoder and Decoder
252 |         memory = self.encoder(src)
253 |         output = self.decoder(tgt, memory)
254 |         return output
255 | 
256 | 
257 | def train(model, loss_fn, optimizer, NUM_EPOCHS=10):
258 |     # Iterate through epochs
259 |     for epoch in range(NUM_EPOCHS):
260 |         # Set model to training mode
261 |         model.train()
262 |         total_loss = 0
263 |         for (
264 |             batch
265 |         ) in (
266 |             batch_iterator
267 |         ):  # Assume batch_iterator yields batches of tokenized and numericalized text
268 |             src, tgt = batch
269 |             # Forward pass
270 |             optimizer.zero_grad()
271 |             # Call the model
272 |             output = model(src, tgt)
273 |             # Compute the loss
274 |             loss = loss_fn(output.view(-1, TGT_VOCAB_SIZE), tgt.view(-1))
275 |             # Backward pass
276 |             loss.backward()
277 |             # Update parameters
278 |             optimizer.step()
279 |             # Update total loss
280 |             total_loss += loss.item()
281 | 
282 |         # Print the loss every epoch
283 |         print(f"Epoch {epoch}, Loss {total_loss / len(batch_iterator)}")
284 | 
285 | 
286 | def translate(model, src_text, src_tokenizer, tgt_tokenizer, max_target_length=50):
287 |     # Set model to evaluation mode
288 |     model.eval()
289 | 
290 |     # Tokenize and numericalize the source text
291 |     src_tokens = src_tokenizer.encode(src_text).ids
292 |     src_tensor = torch.LongTensor(src_tokens).unsqueeze(0)  # Add batch dimension
293 | 
294 |     # Define the SOS and EOS token indices for the target vocabulary
295 |     tgt_sos_idx = tgt_tokenizer.token_to_id("<sos>")
296 |     tgt_eos_idx = tgt_tokenizer.token_to_id("<eos>")
297 | 
298 |     # Initialize the target tensor with the SOS token index
299 |     tgt_tensor = torch.LongTensor([tgt_sos_idx]).unsqueeze(0)  # Add batch dimension
300 | 
301 |     # Loop until the maximum target length is reached or the EOS token is generated
302 |     for i in range(max_target_length):
303 |         # Call the model to generate the output
304 |         with torch.no_grad():  # Disable gradient calculation to save memory during inference
305 |             output = model(src_tensor, tgt_tensor)
306 | 
307 |         # Retrieve the predicted token
308 |         predicted_token_idx = output.argmax(dim=2)[0, -1].item()
309 |         # Check if the predicted token is the EOS token
310 |         if predicted_token_idx == tgt_eos_idx:
311 |             break
312 |         # Concatenate the predicted token to the target tensor
313 |         tgt_tensor = torch.cat(
314 |             (tgt_tensor, torch.LongTensor([[predicted_token_idx]])), dim=1
315 |         )
316 | 
317 |     # Convert the target tensor to a list of token indices, decode to tokens, and join to form the translated text
318 |     translated_token_ids = tgt_tensor[0, 1:].tolist()  # Exclude the SOS token
319 |     translated_text = tgt_tokenizer.decode(
320 |         translated_token_ids
321 |     )  # Convert token ids to text
322 | 
323 |     return translated_text
324 | 
325 | 
326 | if __name__ == "__main__":
327 |     from dataclasses import dataclass
328 | 
329 |     # Instructions:
330 |     # Run the script with the following command: python original_transformer.py
331 |     # Ensure to have the data.csv file in the same directory as this script
332 | 
333 |     # DEFINE HYPERPARAMETERS
334 |     @dataclass
335 |     class ConfigHyperparams:
336 |         # Number of layers in the encoder and decoder
337 |         NUM_ENCODER_LAYERS = 2
338 |         NUM_DECODER_LAYERS = 2
339 | 
340 |         # Dropout rate
341 |         DROPOUT_RATE = 0.1
342 | 
343 |         # Model dimensionality
344 |         EMBEDDING_DIM = 512
345 | 
346 |         # Number of attention heads
347 |         NHEAD = 8
348 | 
349 |         # Feed-forward network hidden dimensionality
350 |         FFN_HID_DIM = 2048
351 | 
352 |         # Batch size
353 |         BATCH_SIZE = 31
354 | 
355 |         # Learning rate
356 |         LEARNING_RATE = 0.001
357 | 
358 |         # maximum length of the sequence
359 |         MAX_LEN = 100
360 | 
361 |         # Number of epochs
362 |         NUM_EPOCHS = 10
363 | 
364 |         def set_vocab_sizes(self, src_vocab_size, tgt_vocab_size):
365 |             self.SRC_VOCAB_SIZE = src_vocab_size
366 |             self.TGT_VOCAB_SIZE = tgt_vocab_size
367 | 
368 |     # Instantiate the hyperparameters
369 |     hp = ConfigHyperparams()
370 | 
371 |     # Load demo data
372 |     data = pd.read_csv("data.csv")
373 | 
374 |     # Arbitrarily cap at 100 characters for demonstration to avoid long training times
375 |     def demo_limit(vocab, limit=hp.MAX_LEN):
376 |         return [i[:limit] for i in vocab]
377 | 
378 |     # Separate English and French lexicons
379 |     EN_TEXT = demo_limit(data.en.to_numpy().tolist())
380 |     FR_TEXT = demo_limit(data.fr.to_numpy().tolist())
381 | 
382 |     # Instantiate the tokenizer
383 |     en_tokenizer = train_tokenizer(EN_TEXT)
384 |     fr_tokenizer = train_tokenizer(FR_TEXT)
385 | 
386 |     # Establish the vocabulary size
387 |     SRC_VOCAB_SIZE = len(en_tokenizer.get_vocab())
388 |     TGT_VOCAB_SIZE = len(fr_tokenizer.get_vocab())
389 | 
390 |     hp.set_vocab_sizes(SRC_VOCAB_SIZE, TGT_VOCAB_SIZE)
391 | 
392 |     # Numericalize and tensorize the data
393 |     # Source tensor with dimensions (batch_size, max_len)
394 |     src_tensor = tensorize_data(EN_TEXT, en_tokenizer)
395 |     # Target tensor with dimensions (batch_size, max_len)
396 |     tgt_tensor = tensorize_data(FR_TEXT, fr_tokenizer)
397 | 
398 |     # Instantiate the dataset
399 |     dataset = TextDataset(src_tensor, tgt_tensor)
400 | 
401 |     # Instantiate the model
402 |     model = Transformer(
403 |         hp.EMBEDDING_DIM,
404 |         hp.NHEAD,
405 |         hp.FFN_HID_DIM,
406 |         hp.NUM_ENCODER_LAYERS,
407 |         hp.NUM_DECODER_LAYERS,
408 |         hp.SRC_VOCAB_SIZE,
409 |         hp.TGT_VOCAB_SIZE,
410 |         hp.MAX_LEN,
411 |     )
412 |     # Define the loss function and optimizer
413 |     loss_fn = nn.CrossEntropyLoss()
414 |     optimizer = optim.Adam(model.parameters(), lr=hp.LEARNING_RATE)
415 | 
416 |     # Instantiate the batch iterator, dropping the last batch to ensure all batches are the same size
417 |     batch_iterator = DataLoader(
418 |         dataset, batch_size=hp.BATCH_SIZE, shuffle=True, drop_last=True
419 |     )
420 | 
421 |     # Train the model
422 |     train(model, loss_fn, optimizer, NUM_EPOCHS=hp.NUM_EPOCHS)
423 | 
424 |     # Translate a sample sentence
425 |     src_text = "hello, how are you?"
426 |     translated_text = translate(model, src_text, en_tokenizer, fr_tokenizer)
427 |     print("Source text:", src_text)
428 |     print("Translated text:", translated_text)
429 | 


--------------------------------------------------------------------------------
/Chapter4/DOCKERFILE:
--------------------------------------------------------------------------------
 1 | # Use an official NVIDIA CUDA runtime as a base image
 2 | FROM nvidia/cuda:12.3-base
 3 | # Set the working directory in the container to /app
 4 | WORKDIR /app
 5 | # Copy the current directory contents into the container at
 6 | /app
 7 | COPY . /app
 8 | # Install any needed packages specified in requirements.txt
 9 | RUN pip install --no-cache-dir -r requirements.txt
10 | # Make port 80 available to the world outside this
11 | container
12 | EXPOSE 80
13 | # Run app.py when the container launches
14 | CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "80"]


--------------------------------------------------------------------------------
/Chapter4/app.py:
--------------------------------------------------------------------------------
 1 | from fastapi import FastAPI, HTTPException
 2 | from pydantic import BaseModel
 3 | from transformers import pipeline
 4 | 
 5 | # Load the pre-trained model
 6 | generator = pipeline("text-generation", model="EleutherAI/gpt-neo-2.7B")
 7 | # Create the FastAPI app
 8 | app = FastAPI()
 9 | 
10 | 
11 | # Define the request body
12 | class GenerationInput(BaseModel):
13 |     prompt: str
14 | 
15 | 
16 | # Define the endpoint
17 | @app.post("/generate")
18 | def generate_text(input: GenerationInput):
19 |     try:
20 |         # Generate text based on the input prompt
21 |         generated_text = generator(input.prompt, max_length=150)
22 |         return {"generated_text": generated_text}
23 |     except:
24 |         raise HTTPException(status_code=500, detail="Model failed to generate text")
25 | 


--------------------------------------------------------------------------------
/Chapter4/benchmark.py:
--------------------------------------------------------------------------------
 1 | #!pip install openai langchain[llms] huggingface_hub pandas torch
 2 | 
 3 | import os
 4 | import pandas as pd
 5 | from langchain.llms import OpenAI, HuggingFaceHub
 6 | from langchain import LLMChain, PromptTemplate
 7 | from tqdm.auto import tqdm
 8 | import torch
 9 | 
10 | 
11 | def verify_gpu():
12 |     return torch.cuda.is_available()
13 | 
14 | 
15 | def load_data(file_path):
16 |     return pd.read_csv(file_path)
17 | 
18 | 
19 | def save_data(data, file_path):
20 |     data.to_csv(file_path, index=False)
21 | 
22 | 
23 | def generate_descriptions(llm, model_input_data, template):
24 |     prompt = PromptTemplate(template=template, input_variables=["product_metadata"])
25 |     llm_chain = LLMChain(prompt=prompt, llm=llm)
26 |     descriptions = []
27 |     for data in tqdm(model_input_data):
28 |         description = llm_chain.run(data)
29 |         descriptions.append(description)
30 |     return descriptions
31 | 
32 | 
33 | if __name__ == "__main__":
34 |     # Configuration
35 |     config = {
36 |         "openai_api_key": os.environ["OPENAI_API_KEY"],
37 |         "huggingface_token": os.environ["HUGGINGFACEHUB_API_TOKEN"],
38 |         "product_data_path": "data/product_data.csv",
39 |         "test_data_path": "data/test_data.csv",
40 |         "reference_data_path": "data/reference_data.csv",
41 |         "openai_model_name": "gpt-3.5-turbo-instruct",
42 |         "huggingface_repo_id": "EleutherAI/gpt-neo-2.7B",
43 |         "prompt_template": """
44 |         Write a creative product description for the following product: {product_metadata}
45 |         """,
46 |     }
47 | 
48 |     assert (
49 |         config["openai_api_key"] is not None
50 |     ), "OpenAI API Key is required, set the OPENAI_API_KEY environment variable."
51 |     assert (
52 |         config["huggingface_token"] is not None
53 |     ), "HuggingFace API Token is required, set the HUGGINGFACEHUB_API_TOKEN environment variable."
54 | 
55 |     # Verify if GPU is available
56 |     print("GPU Available:", verify_gpu())
57 | 
58 |     # Load data
59 |     product_data = load_data(config["product_data_path"])
60 | 
61 |     # Data preparation
62 |     test_data = product_data.sample(frac=0.2, random_state=42)
63 |     reference_data = product_data.drop(test_data.index)
64 | 
65 |     save_data(test_data, config["test_data_path"])
66 |     save_data(reference_data, config["reference_data_path"])
67 | 
68 |     reference_data = load_data(config["reference_data_path"])
69 |     reference_descriptions = reference_data["product_description"].tolist()
70 |     product_images = reference_data["product_image"].tolist()
71 | 
72 |     # Initialize models
73 |     llm_gpt3 = OpenAI(
74 |         model_name=config["openai_model_name"], temperature=0.9, max_tokens=256
75 |     )
76 |     llm_neo = HuggingFaceHub(
77 |         repo_id=config["huggingface_repo_id"], model_kwargs={"temperature": 0.9}
78 |     )
79 | 
80 |     # Generate descriptions
81 |     gpt3_descriptions = generate_descriptions(
82 |         llm_gpt3, reference_descriptions, config["prompt_template"]
83 |     )
84 |     gptneo_descriptions = generate_descriptions(
85 |         llm_neo, reference_descriptions, config["prompt_template"]
86 |     )
87 | 
88 |     # Save generated descriptions
89 |     gpt3_descriptions_df = pd.DataFrame(
90 |         {"product_description": gpt3_descriptions, "product_image": product_images}
91 |     )
92 |     gpt3_descriptions_df.to_csv("data/gpt3_descriptions.csv", index=False)
93 | 
94 |     gptneo_descriptions_df = pd.DataFrame(
95 |         {"product_description": gptneo_descriptions, "product_image": product_images}
96 |     )
97 |     gptneo_descriptions_df.to_csv("data/gptneo_descriptions.csv", index=False)
98 | 


--------------------------------------------------------------------------------
/Chapter4/ci-cd.yml:
--------------------------------------------------------------------------------
 1 | name: CI/CD Pipeline
 2 | on:
 3 |   push:
 4 |     branches:
 5 |       - main
 6 | jobs:
 7 |   build-and-test:
 8 |     runs-on: ubuntu-latest
 9 |     steps:
10 |       - name: Checkout code
11 |         uses: actions/checkout@v4
12 |       - name: Build Docker image
13 |         # assumes the Dockerfile is in the root (.)
14 |         run: docker build -t stylesprint .
15 |       - name: Run tests
16 |         # assumes a set of unit tests were defined
17 |         run: docker run stylesprint python -m unittest discover
18 |   deploy:
19 |     needs: build-and-test
20 |     runs-on: ubuntu-latest
21 |     steps:
22 |       - name: Checkout code
23 |         uses: actions/checkout@v4
24 |       - name: Login to DockerHub
25 |         run: echo ${{ secrets.DOCKER_PASSWORD }} | docker login -u ${{ secrets.DOCKER_USERNAME }} --password-stdin
26 |       - name: Push Docker image
27 |         run: |
28 |           docker tag stylesprint:latest ${{ secrets.DOCKER_USERNAME }}/stylesprint:latest
29 |           docker push ${{ secrets.DOCKER_USERNAME }}/stylesprint:latest
30 | 


--------------------------------------------------------------------------------
/Chapter4/data/gpt3_descriptions.csv:
--------------------------------------------------------------------------------
  1 | product_description,product_image
  2 | "
  3 | Transform into a show-stopping vision in our Elegant Red Evening Gown. Crafted with exquisite attention to detail, this stunning dress is designed to make a lasting impression. The rich red hue exudes confidence and elegance, while the sleek silhouette accentuates your curves in all the right places. Perfectly tailored to flatter every body shape, this gown is truly a timeless piece.
  4 | 
  5 | From the moment you slip into this dress, you'll feel like a Hollywood starlet walking the red carpet. The smooth material drapes effortlessly, creating a luxurious flow with every step. The elegant off-the-shoulder neckline adds a touch of sophistication, while the delicate ruching at the waistline adds a subtle touch of allure.
  6 | 
  7 | Ideal for any glamorous occasion, our Elegant Red Evening Gown will have heads turning and jaws dropping. Whether you're attending a black-tie event, a charity gala, or a romantic dinner, this dress is sure to make you feel like the belle of the ball. So go ahead and indulge in a little luxury, because you deserve to look and feel your best.
  8 | 
  9 | Made with the highest quality materials, this gown is not only stunning, but also comfortable to wear all night long. The attention to detail and expert craftsmanship ensure that it will remain a staple",data/img/image_1.jpg
 10 | "
 11 | Introducing our newest addition to your wardrobe - the ultimate casual white t-shirt! Made from the softest cotton material, it's guaranteed to keep you comfortable all day long. But what truly sets this t-shirt apart is the stunning graphic design adorning the front.
 12 | 
 13 | With a touch of artistic flair, this design adds a pop of personality to your everyday look. Whether you're running errands, grabbing coffee with friends, or just lounging at home, this t-shirt is the perfect choice. Its versatile style makes it a breeze to dress up or down, and its classic white color complements any outfit effortlessly.
 14 | 
 15 | But the uniqueness of this t-shirt doesn't stop at its design. It also features a relaxed fit and a flattering neckline, making it suitable for all body types. Plus, the high-quality fabric ensures that it will last through countless wears and washes without losing its shape or softness.
 16 | 
 17 | This t-shirt is a must-have for every fashion-forward individual. It's the perfect combination of comfort, style, and individuality. So why settle for a plain white tee when you can have one adorned with a one-of-a-kind design? Add this casual white t-shirt to your wardrobe today and experience the perfect blend of comfort and creativity. ",data/img/image_2.jpg
 18 | "
 19 | Introducing the ultimate must-have for any fashion-forward individual: the stylish leather jacket with edgy detailing. This timeless piece is the perfect addition to any wardrobe, adding a touch of edginess and sophistication to any outfit.
 20 | 
 21 | Crafted from high-quality leather, this jacket is designed to provide both style and durability. The smooth and supple texture of the leather not only looks luxurious but also provides comfort and protection against the elements. You'll feel confident and ready to take on the world in this jacket.
 22 | 
 23 | But what sets this leather jacket apart is its edgy detailing. The bold metal zippers and studs give it a rebellious and cool vibe, making a statement wherever you go. The detailed stitching and asymmetrical design add a touch of uniqueness and elevate the overall look of the jacket.
 24 | 
 25 | This leather jacket is a timeless piece that will never go out of style, making it a wise investment for your wardrobe. Whether you're dressing up for a night out or keeping it casual for a day of running errands, this jacket will effortlessly elevate your outfit.
 26 | 
 27 | With its versatile design, this jacket can be dressed up or down, making it suitable for any occasion. Pair it with a little black dress for a chic and edgy look, or throw it over a t-shirt and",data/img/image_3.jpg
 28 | "
 29 | Introducing our newest addition to your summer wardrobe - the perfect pair of cotton shorts. Made from the softest, most breathable material, these shorts will keep you cool and comfortable all day long. Whether you're lounging by the pool, running errands, or meeting friends for lunch, these shorts are your go-to for a relaxed yet chic look.
 30 | 
 31 | Available in a range of stunning colors, you can mix and match with your favorite tops and accessories for endless outfit possibilities. The elastic waistband ensures a comfortable fit for all body types, while the loose, flowy design provides freedom of movement and a flattering silhouette.
 32 | 
 33 | But these shorts aren't just stylish and comfortable, they're also durable. Made from high-quality cotton, they can withstand the summer heat and frequent wear without losing their shape or color. Plus, they're easy to care for - simply toss them in the washing machine and they're good to go!
 34 | 
 35 | Whether you're planning a day at the beach, a backyard BBQ, or a casual stroll around town, our cotton shorts are the perfect choice. So why settle for uncomfortable and restrictive shorts when you can have the best of both style and comfort? Upgrade your summer wardrobe today with our comfortable cotton shorts. Order now and experience the ultimate blend of fashion and practical",data/img/image_4.jpg
 36 | "
 37 | Step out in style and command attention with our stunning formal black blazer. With a contemporary cut and sleek design, this blazer is the perfect addition to any office wardrobe or formal event.
 38 | 
 39 | Crafted from high-quality, lightweight fabric, our black blazer is both comfortable and stylish. The tailored fit accentuates your silhouette, giving you a refined and polished look. The classic black color adds a touch of sophistication, making it a versatile piece that can be dressed up or down for any occasion.
 40 | 
 41 | Featuring a single button closure and subtle shoulder padding, this blazer exudes elegance and professionalism. The notched lapel and flap pockets add a touch of charm, while the fully lined interior ensures comfort and durability.
 42 | 
 43 | Whether you're heading to a business meeting or a special event, our formal black blazer is the perfect choice. Pair it with a crisp white shirt and tailored pants for a polished office look, or with a flowy dress for a modern and chic evening ensemble.
 44 | 
 45 | Elevate your style game and make a lasting impression with our formal black blazer. Order now and add a touch of class to your wardrobe. ",data/img/image_5.jpg
 46 | "
 47 | Introducing our newest addition to the active lifestyle collection - the Sporty Track Pants with Iconic Side Stripes. These pants combine fashion and function for the ultimate athletic look.
 48 | 
 49 | Whether you're hitting the gym, going for a run, or simply running errands, these track pants will keep you looking stylish and feeling comfortable. Made with a high-quality fabric blend, these pants provide the perfect balance of breathability and durability.
 50 | 
 51 | But what sets these track pants apart are the iconic side stripes that add a pop of color and elevate the design. The stripes not only make a fashion statement but also have a functional purpose. Designed to enhance your athletic performance, the stripes provide additional support and stability during movement.
 52 | 
 53 | The sporty track pants also feature an elastic waistband with an adjustable drawstring, ensuring a snug and customizable fit. Say goodbye to constantly adjusting your pants mid-workout. Plus, the slim fit design flatters your figure and gives you a streamlined look.
 54 | 
 55 | Don't let your workout attire be boring and bland. Elevate your activewear game with our Sporty Track Pants with Iconic Side Stripes. Order yours now and experience the perfect blend of style and functionality.",data/img/image_6.jpg
 56 | "
 57 | Introducing our newest addition to your wardrobe - the Chic Polka Dot Skirt! Made with the modern fashionista in mind, this stylish skirt will add a playful touch to your casual collection. The classic design is brought to life with playful polka dots, adding a fun and whimsical element to your outfit. Perfect for any occasion, whether it's a day out with friends or a weekend brunch, this skirt will have you looking chic and stylish no matter where you go. The lightweight and flowy fabric will keep you cool and comfortable, while the flattering fit will accentuate your curves in all the right places. Pair it with a tucked-in blouse and your favorite heels for a sophisticated look, or dress it down with a tucked-in t-shirt and sneakers for a more relaxed vibe. Whatever your style, the Chic Polka Dot Skirt is the must-have piece for every fashion-forward woman. So add a pop of playfulness to your wardrobe and make a statement wherever you go with our Chic Polka Dot Skirt. ",data/img/image_7.jpg
 58 | "
 59 | Experience the beauty of bohemian style with our stunning maxi dress, designed to elevate your fashion game and radiate elegance from every angle. Crafted with intricate patterns and delicate details, this dress exudes a free-spirited charm that will captivate all those around you. Flowing effortlessly with every step you take, it's the perfect combination of comfort and style. Whether you're attending a formal event or simply strolling through a picturesque garden, this dress will make you feel like a true bohemian goddess. Embrace your inner free-spirit and embrace the beauty of our bohemian style maxi dress.",data/img/image_8.jpg
 60 | "
 61 | Introducing our newest must-have for any modern wardrobe: the slim-fit chinos in a soft beige shade. Crafted with impeccable attention to detail, these chinos are the perfect blend of comfort and style.
 62 | 
 63 | Made with a soft and durable fabric, these chinos hug your body in all the right places, giving you a sleek and streamlined silhouette. The slim-fit design adds a touch of sophistication, making them suitable for both casual and formal occasions.
 64 | 
 65 | But what sets these chinos apart is their versatile beige color. It's a neutral shade that effortlessly pairs with any outfit, making it a true wardrobe staple. Dress them up with a crisp white shirt and blazer for a business meeting or dress them down with a relaxed t-shirt for a weekend brunch with friends.
 66 | 
 67 | Not only are these chinos stylish, but they are also practical. The fabric is breathable and stretchy, ensuring maximum comfort all day long. Plus, the slim-fit design means no bulky fabric getting in your way, perfect for those on-the-go days.
 68 | 
 69 | Say goodbye to boring and ill-fitting pants and hello to these slim-fit chinos. Upgrade your wardrobe with this timeless piece that will take you from day to night with ease. Don't wait any longer, add these beige chinos to your cart",data/img/image_9.jpg
 70 | "
 71 | Introducing our newest addition to your shoe collection - the Vintage Style Leather Boots! These boots are the perfect fusion of rugged durability and classic aesthetics, making them a must-have for any fashion-forward individual. Crafted from high-quality leather, these boots are built to withstand the test of time while still exuding a timeless charm.
 72 | 
 73 | Inspired by the iconic vintage style, these boots feature a sleek silhouette and intricate stitching for a touch of old-school charm. The smooth leather exterior is complemented by a sturdy sole, offering both style and functionality. Whether you're stomping through the city streets or exploring the great outdoors, these boots will provide the ultimate protection and support for your feet.
 74 | 
 75 | But it's not just about the durability - these boots are also designed to elevate your fashion game. The vintage style adds a touch of sophistication to any outfit, making them a versatile addition to your wardrobe. Pair them with a flowy dress for a bohemian vibe, or throw them on with your favorite jeans for a rugged and edgy look.
 76 | 
 77 | No matter where you go, these boots will be your trusted companion. So why settle for ordinary footwear when you can own a pair of Vintage Style Leather Boots? Upgrade your shoe game and stand out from the crowd with these timeless and stylish boots",data/img/image_10.jpg
 78 | "
 79 | Indulge in ultimate luxury and elevate your style with our designer handbag crafted in premium leather. This exquisite accessory is a must-have for the fashion-conscious, adding a touch of sophistication and elegance to any outfit.
 80 | 
 81 | Meticulously crafted from high-quality leather, this handbag boasts a sleek and timeless design that will never go out of style. The smooth finish and rich texture of the leather will make you the envy of every fashionista.
 82 | 
 83 | Not only does this handbag exude style, but it is also highly functional. With enough space to store all your essentials and more, it is perfect for both everyday use and special occasions. The interior is lined with soft fabric to protect your belongings, and the sturdy handles make it easy to carry around.
 84 | 
 85 | Whether you're heading to a business meeting, a fancy dinner party, or a weekend getaway, our designer handbag is the perfect companion. Its subtle yet striking design will effortlessly elevate any ensemble, making you stand out in a crowd.
 86 | 
 87 | Invest in the ultimate accessory that combines luxury, style, and practicality. Treat yourself or a loved one to our designer handbag crafted in premium leather and experience the ultimate symbol of sophistication and class. Order now and make a statement wherever you go.",data/img/image_11.jpg
 88 | "
 89 | Introducing our new Funky Printed Scarf – the ultimate statement piece to elevate any outfit! This scarf is the perfect combination of style and functionality, making it a must-have in any fashionista's wardrobe.
 90 | 
 91 | The vibrant and eye-catching print of this scarf is what sets it apart from all others. Designed to add a pop of color to your ensemble, this accessory is perfect for those looking to make a bold fashion statement. Whether you're wearing it with a simple t-shirt and jeans or a little black dress, this scarf is sure to make heads turn and have all eyes on you.
 92 | 
 93 | But it's not just about its striking design – this scarf is also made with high-quality material, ensuring both comfort and durability. Made from a soft and lightweight fabric, it's gentle on the skin and can be worn all year round. Plus, the generous size allows for versatile styling options, whether you prefer to wrap it around your neck, drape it over your shoulders, or tie it in a knot.
 94 | 
 95 | This funky printed scarf is not just a fashion accessory, but a reflection of your unique personality. It's perfect for those who dare to be bold and stand out from the crowd. So why settle for a boring, plain scarf when you can add a touch of fun and",data/img/image_12.jpg
 96 | "
 97 | Looking for a pair of running shoes that will take your workouts and your style to the next level? Look no further than our Athletic Running Shoes with Cushion Support!
 98 | 
 99 | Designed for the active and fashion-forward individual, these shoes offer the perfect combination of performance and style. The cushion support feature ensures maximum comfort and protection for your feet, allowing you to push your limits and achieve your fitness goals without any discomfort.
100 | 
101 | But don't let the functional design fool you, these shoes are also a fashion statement. The sleek and modern design will elevate any athletic outfit and have heads turning wherever you go. Available in a variety of bold and vibrant colors, these shoes will make you stand out from the crowd.
102 | 
103 | Crafted with the highest quality materials, these running shoes are built to last. The breathable mesh upper keeps your feet cool and dry, while the durable sole provides superior traction on any terrain.
104 | 
105 | Whether you're hitting the track, the gym, or simply running errands, our Athletic Running Shoes with Cushion Support will provide the support and style you need to conquer any task. So why settle for plain and ordinary when you can have both performance and fashion in one shoe? Upgrade your footwear game and take your workouts to the next level with our Athletic Running Shoes with Cushion Support. ",data/img/image_13.jpg
106 | "
107 | Introducing the ultimate solution for all your rainy day woes - our lightweight rain jacket with a functional hood! Perfectly designed to combat even the most unpredictable weather, this rain jacket is an essential addition to any wardrobe.
108 | 
109 | Crafted with the utmost attention to detail, this jacket boasts a sleek and stylish design that will have you looking and feeling your best, no matter the weather. The functional hood not only adds a touch of practicality to the jacket but also adds a dash of fashion-forward flair.
110 | 
111 | Not only is this rain jacket lightweight, making it comfortable and easy to wear, but it is also made from high-quality, water-resistant materials. So, even if you find yourself caught in a sudden downpour, you can rest assured that you will stay dry and protected.
112 | 
113 | But what truly sets this rain jacket apart is its versatility. It is the perfect companion for any outdoor activity, from a morning jog to a weekend hike. And when the sun comes out, you can simply pack it away without taking up too much space, making it a hassle-free and convenient option for anyone on the go.
114 | 
115 | Don't let unpredictable weather ruin your plans. Invest in our lightweight rain jacket with a functional hood and be prepared for whatever mother nature throws your way. Get yours now and never let",data/img/image_14.jpg
116 | "
117 | Introducing the ultimate statement piece for any fashion-forward individual - our fashionable denim jacket adorned with creative patches. This trendy twist on the classic denim jacket is the perfect way to showcase your unique personality and elevate your style to the next level.
118 | 
119 | Crafted with high-quality denim, this jacket not only offers durability but also ensures a comfortable and flattering fit. The patches, carefully curated and placed, add an edgy and playful element to the jacket, making it a true head-turner.
120 | 
121 | Whether you're a music lover, a traveler, or a lover of all things vintage, there is a patch for every personality. From retro band logos to intricate floral designs, these patches add a touch of individuality to your outfit, effortlessly making you stand out from the crowd.
122 | 
123 | But it's not just about the patches - the jacket itself is a trendsetter. The classic denim silhouette, featuring a button-up front and double chest pockets, is given a modern update with a slightly cropped and fitted design. This makes it the perfect layering piece for any outfit, adding a touch of coolness to even the simplest of looks.
124 | 
125 | So why settle for a plain and ordinary denim jacket when you can have one that is a true expression of your style? Upgrade your wardrobe with our fashionable denim jacket enhanced",data/img/image_15.jpg
126 | "
127 | ""Wrap yourself in warmth and elegance with our soft cashmere scarf. Crafted from the finest materials, this luxurious accessory will add a touch of sophistication to any winter outfit. Available in a range of pastel hues, it is the perfect addition to your wardrobe for the chilly season. The delicate cashmere fibers will gently caress your skin, providing both comfort and style. Whether you're off to a fancy dinner or a casual day out, this scarf will elevate your look with its delicate texture and subtle colors. Don't let the cold weather hold you back from looking and feeling your best. Choose our soft cashmere scarf and exude effortless charm and coziness wherever you go.""",data/img/image_16.jpg
128 | 


--------------------------------------------------------------------------------
/Chapter4/data/gptneo_descriptions.csv:
--------------------------------------------------------------------------------
 1 | product_description,product_image
 2 | "
 3 |         Write a creative product description for the following product: Elegant red evening gown with a sleek silhouette, ideal for glamorous nights.
 4 |             + Describe why",data/img/image_1.jpg
 5 | "
 6 |         Write a creative product description for the following product: Casual white t-shirt adorned with a unique graphic design, a must-have for your everyday wardrobe.
 7 |         
 8 | 
 9 |  ",data/img/image_2.jpg
10 | "
11 |         Write a creative product description for the following product: Stylish leather jacket with edgy detailing, a timeless piece for fashion-forward individuals.
12 |         
13 |     ",data/img/image_3.jpg
14 | "
15 |         Write a creative product description for the following product: Comfortable cotton shorts in a range of colors, perfect for a relaxed yet chic look.
16 |         
17 |     ",data/img/image_4.jpg
18 | "
19 |         Write a creative product description for the following product: Formal black blazer with a contemporary cut, ideal for office wear or formal events.
20 |         
21 |     ",data/img/image_5.jpg
22 | "
23 |         Write a creative product description for the following product: Sporty track pants with iconic side stripes, blending style with athletic functionality.
24 |         
25 |        ",data/img/image_6.jpg
26 | "
27 |         Write a creative product description for the following product: Chic skirt adorned with playful polka dots, a fun addition to your casual collection.
28 |         
29 |      ",data/img/image_7.jpg
30 | "
31 |         Write a creative product description for the following product: Bohemian style maxi dress with intricate patterns, exuding elegance and free-spirited charm.
32 |         
33 |   ",data/img/image_8.jpg
34 | "
35 |         Write a creative product description for the following product: Slim-fit chinos in a soft beige, offering a versatile option for both casual and formal settings.
36 |          ",data/img/image_9.jpg
37 | "
38 |         Write a creative product description for the following product: Vintage style leather boots, combining rugged durability with classic aesthetics.
39 |                    ",data/img/image_10.jpg
40 | "
41 |         Write a creative product description for the following product: Designer handbag crafted in premium leather, a luxurious accessory for the fashion-conscious.
42 |         
43 |      ",data/img/image_11.jpg
44 | "
45 |         Write a creative product description for the following product: Funky printed scarf, a statement piece to add a pop of color to any outfit.
46 |         
47 |     ",data/img/image_12.jpg
48 | "
49 |         Write a creative product description for the following product: Athletic running shoes with cushion support, designed for both performance and style.
50 |         
51 |         <",data/img/image_13.jpg
52 | "
53 |         Write a creative product description for the following product: Lightweight rain jacket with a functional hood, an essential for unpredictable weather.
54 |          *   Use the image in your",data/img/image_14.jpg
55 | "
56 |         Write a creative product description for the following product: Fashionable denim jacket enhanced with creative patches, a trendy twist on a classic.
57 |         
58 |       ",data/img/image_15.jpg
59 | "
60 |         Write a creative product description for the following product: Soft cashmere scarf in pastel hues, adding a touch of luxury to your winter wardrobe.
61 |         
62 |   ",data/img/image_16.jpg
63 | 


--------------------------------------------------------------------------------
/Chapter4/data/img/image_1.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Generative-AI-Foundations-in-Python/7701d08c7ad486e98a842b0075139668db15de95/Chapter4/data/img/image_1.jpg


--------------------------------------------------------------------------------
/Chapter4/data/img/image_10.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Generative-AI-Foundations-in-Python/7701d08c7ad486e98a842b0075139668db15de95/Chapter4/data/img/image_10.jpg


--------------------------------------------------------------------------------
/Chapter4/data/img/image_11.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Generative-AI-Foundations-in-Python/7701d08c7ad486e98a842b0075139668db15de95/Chapter4/data/img/image_11.jpg


--------------------------------------------------------------------------------
/Chapter4/data/img/image_12.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Generative-AI-Foundations-in-Python/7701d08c7ad486e98a842b0075139668db15de95/Chapter4/data/img/image_12.jpg


--------------------------------------------------------------------------------
/Chapter4/data/img/image_13.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Generative-AI-Foundations-in-Python/7701d08c7ad486e98a842b0075139668db15de95/Chapter4/data/img/image_13.jpg


--------------------------------------------------------------------------------
/Chapter4/data/img/image_14.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Generative-AI-Foundations-in-Python/7701d08c7ad486e98a842b0075139668db15de95/Chapter4/data/img/image_14.jpg


--------------------------------------------------------------------------------
/Chapter4/data/img/image_15.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Generative-AI-Foundations-in-Python/7701d08c7ad486e98a842b0075139668db15de95/Chapter4/data/img/image_15.jpg


--------------------------------------------------------------------------------
/Chapter4/data/img/image_16.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Generative-AI-Foundations-in-Python/7701d08c7ad486e98a842b0075139668db15de95/Chapter4/data/img/image_16.jpg


--------------------------------------------------------------------------------
/Chapter4/data/img/image_17.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Generative-AI-Foundations-in-Python/7701d08c7ad486e98a842b0075139668db15de95/Chapter4/data/img/image_17.jpg


--------------------------------------------------------------------------------
/Chapter4/data/img/image_18.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Generative-AI-Foundations-in-Python/7701d08c7ad486e98a842b0075139668db15de95/Chapter4/data/img/image_18.jpg


--------------------------------------------------------------------------------
/Chapter4/data/img/image_19.jpeg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Generative-AI-Foundations-in-Python/7701d08c7ad486e98a842b0075139668db15de95/Chapter4/data/img/image_19.jpeg


--------------------------------------------------------------------------------
/Chapter4/data/img/image_2.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Generative-AI-Foundations-in-Python/7701d08c7ad486e98a842b0075139668db15de95/Chapter4/data/img/image_2.jpg


--------------------------------------------------------------------------------
/Chapter4/data/img/image_3.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Generative-AI-Foundations-in-Python/7701d08c7ad486e98a842b0075139668db15de95/Chapter4/data/img/image_3.jpg


--------------------------------------------------------------------------------
/Chapter4/data/img/image_4.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Generative-AI-Foundations-in-Python/7701d08c7ad486e98a842b0075139668db15de95/Chapter4/data/img/image_4.jpg


--------------------------------------------------------------------------------
/Chapter4/data/img/image_5.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Generative-AI-Foundations-in-Python/7701d08c7ad486e98a842b0075139668db15de95/Chapter4/data/img/image_5.jpg


--------------------------------------------------------------------------------
/Chapter4/data/img/image_6.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Generative-AI-Foundations-in-Python/7701d08c7ad486e98a842b0075139668db15de95/Chapter4/data/img/image_6.jpg


--------------------------------------------------------------------------------
/Chapter4/data/img/image_7.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Generative-AI-Foundations-in-Python/7701d08c7ad486e98a842b0075139668db15de95/Chapter4/data/img/image_7.jpg


--------------------------------------------------------------------------------
/Chapter4/data/img/image_8.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Generative-AI-Foundations-in-Python/7701d08c7ad486e98a842b0075139668db15de95/Chapter4/data/img/image_8.jpg


--------------------------------------------------------------------------------
/Chapter4/data/img/image_9.jpg:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/PacktPublishing/Generative-AI-Foundations-in-Python/7701d08c7ad486e98a842b0075139668db15de95/Chapter4/data/img/image_9.jpg


--------------------------------------------------------------------------------
/Chapter4/data/product_data.csv:
--------------------------------------------------------------------------------
 1 | product_image,product_description,product_title,product_metadata
 2 | "data/img/image_18.jpg","Trendy summer dress featuring a vibrant floral print, perfect for sunny days.",Vibrant Floral Summer Dress,"summer, floral print, casual, trendy"
 3 | "data/img/image_19.jpg","Classic blue denim jeans with a modern twist, offering both comfort and style.",Modern Blue Denim Jeans,"denim, classic, blue, stylish"
 4 | "data/img/image_1.jpg","Elegant red evening gown with a sleek silhouette, ideal for glamorous nights.",Elegant Red Evening Gown,"elegant, gown, red, sleek silhouette"
 5 | "data/img/image_2.jpg","Casual white t-shirt adorned with a unique graphic design, a must-have for your everyday wardrobe.",Unique Graphic Design T-Shirt,"casual, graphic design, white, t-shirt"
 6 | "data/img/image_3.jpg","Stylish leather jacket with edgy detailing, a timeless piece for fashion-forward individuals.",Edgy Stylish Leather Jacket,"stylish, leather, edgy, fashion-forward"
 7 | "data/img/image_4.jpg","Comfortable cotton shorts in a range of colors, perfect for a relaxed yet chic look.",Comfortable Cotton Shorts,"comfortable, cotton, colorful, relaxed"
 8 | "data/img/image_5.jpg","Formal black blazer with a contemporary cut, ideal for office wear or formal events.",Contemporary Black Blazer,"formal, black, blazer, contemporary cut"
 9 | "data/img/image_6.jpg","Sporty track pants with iconic side stripes, blending style with athletic functionality.",Iconic Side Stripe Track Pants,"sporty, track pants, side stripes, functional"
10 | "data/img/image_7.jpg","Chic skirt adorned with playful polka dots, a fun addition to your casual collection.",Playful Polka Dot Skirt,"chic, skirt, polka dots, playful"
11 | "data/img/image_8.jpg","Bohemian style maxi dress with intricate patterns, exuding elegance and free-spirited charm.",Bohemian Maxi Dress,"bohemian, maxi dress, intricate patterns, elegant"
12 | "data/img/image_9.jpg","Slim-fit chinos in a soft beige, offering a versatile option for both casual and formal settings.",Versatile Beige Slim-Fit Chinos,"slim-fit, chinos, beige, versatile"
13 | "data/img/image_10.jpg","Vintage style leather boots, combining rugged durability with classic aesthetics.",Vintage Leather Boots,"vintage, leather boots, rugged, classic"
14 | "data/img/image_11.jpg","Designer handbag crafted in premium leather, a luxurious accessory for the fashion-conscious.",Premium Leather Handbag,"designer, handbag, premium leather, luxurious"
15 | "data/img/image_12.jpg","Funky printed scarf, a statement piece to add a pop of color to any outfit.",Colorful Printed Scarf,"funky, printed scarf, colorful, statement piece"
16 | "data/img/image_13.jpg","Athletic running shoes with cushion support, designed for both performance and style.",Cushioned Athletic Shoes,"athletic, running shoes, cushion support, performance"
17 | "data/img/image_17.jpg","Winter woolen sweater in a deep navy blue, combining warmth with sophisticated elegance.",Cozy Navy Woolen Sweater,"woolen, sweater, navy blue, winter"
18 | "data/img/image_14.jpg","Lightweight rain jacket with a functional hood, an essential for unpredictable weather.",Lightweight Rain Jacket,"lightweight, rain jacket, hood, essential"
19 | "data/img/image_16.jpg","Versatile unisex sunglasses, blending sleek design with optimal sun protection.",Stylish Unisex Sunglasses,"versatile, sunglasses, unisex, sleek design"
20 | "data/img/image_15.jpg","Fashionable denim jacket enhanced with creative patches, a trendy twist on a classic.",Trendy Denim Jacket with Patches,"fashionable, denim jacket, patches, trendy"
21 | "data/img/image_16.jpg","Soft cashmere scarf in pastel hues, adding a touch of luxury to your winter wardrobe.",Luxurious Soft Cashmere Scarf,"soft, cashmere scarf, pastel hues, luxury"
22 | 


--------------------------------------------------------------------------------
/Chapter4/data/reference_data.csv:
--------------------------------------------------------------------------------
 1 | product_image,product_description,product_title,product_metadata
 2 | data/img/image_1.jpg,"Elegant red evening gown with a sleek silhouette, ideal for glamorous nights.",Elegant Red Evening Gown,"elegant, gown, red, sleek silhouette"
 3 | data/img/image_2.jpg,"Casual white t-shirt adorned with a unique graphic design, a must-have for your everyday wardrobe.",Unique Graphic Design T-Shirt,"casual, graphic design, white, t-shirt"
 4 | data/img/image_3.jpg,"Stylish leather jacket with edgy detailing, a timeless piece for fashion-forward individuals.",Edgy Stylish Leather Jacket,"stylish, leather, edgy, fashion-forward"
 5 | data/img/image_4.jpg,"Comfortable cotton shorts in a range of colors, perfect for a relaxed yet chic look.",Comfortable Cotton Shorts,"comfortable, cotton, colorful, relaxed"
 6 | data/img/image_5.jpg,"Formal black blazer with a contemporary cut, ideal for office wear or formal events.",Contemporary Black Blazer,"formal, black, blazer, contemporary cut"
 7 | data/img/image_6.jpg,"Sporty track pants with iconic side stripes, blending style with athletic functionality.",Iconic Side Stripe Track Pants,"sporty, track pants, side stripes, functional"
 8 | data/img/image_7.jpg,"Chic skirt adorned with playful polka dots, a fun addition to your casual collection.",Playful Polka Dot Skirt,"chic, skirt, polka dots, playful"
 9 | data/img/image_8.jpg,"Bohemian style maxi dress with intricate patterns, exuding elegance and free-spirited charm.",Bohemian Maxi Dress,"bohemian, maxi dress, intricate patterns, elegant"
10 | data/img/image_9.jpg,"Slim-fit chinos in a soft beige, offering a versatile option for both casual and formal settings.",Versatile Beige Slim-Fit Chinos,"slim-fit, chinos, beige, versatile"
11 | data/img/image_10.jpg,"Vintage style leather boots, combining rugged durability with classic aesthetics.",Vintage Leather Boots,"vintage, leather boots, rugged, classic"
12 | data/img/image_11.jpg,"Designer handbag crafted in premium leather, a luxurious accessory for the fashion-conscious.",Premium Leather Handbag,"designer, handbag, premium leather, luxurious"
13 | data/img/image_12.jpg,"Funky printed scarf, a statement piece to add a pop of color to any outfit.",Colorful Printed Scarf,"funky, printed scarf, colorful, statement piece"
14 | data/img/image_13.jpg,"Athletic running shoes with cushion support, designed for both performance and style.",Cushioned Athletic Shoes,"athletic, running shoes, cushion support, performance"
15 | data/img/image_14.jpg,"Lightweight rain jacket with a functional hood, an essential for unpredictable weather.",Lightweight Rain Jacket,"lightweight, rain jacket, hood, essential"
16 | data/img/image_15.jpg,"Fashionable denim jacket enhanced with creative patches, a trendy twist on a classic.",Trendy Denim Jacket with Patches,"fashionable, denim jacket, patches, trendy"
17 | data/img/image_16.jpg,"Soft cashmere scarf in pastel hues, adding a touch of luxury to your winter wardrobe.",Luxurious Soft Cashmere Scarf,"soft, cashmere scarf, pastel hues, luxury"
18 | 


--------------------------------------------------------------------------------
/Chapter4/data/test_data.csv:
--------------------------------------------------------------------------------
1 | product_image,product_description,product_title,product_metadata
2 | data/img/image_18.jpg,"Trendy summer dress featuring a vibrant floral print, perfect for sunny days.",Vibrant Floral Summer Dress,"summer, floral print, casual, trendy"
3 | data/img/image_16.jpg,"Versatile unisex sunglasses, blending sleek design with optimal sun protection.",Stylish Unisex Sunglasses,"versatile, sunglasses, unisex, sleek design"
4 | data/img/image_17.jpg,"Winter woolen sweater in a deep navy blue, combining warmth with sophisticated elegance.",Cozy Navy Woolen Sweater,"woolen, sweater, navy blue, winter"
5 | data/img/image_19.jpg,"Classic blue denim jeans with a modern twist, offering both comfort and style.",Modern Blue Denim Jeans,"denim, classic, blue, stylish"
6 | 


--------------------------------------------------------------------------------
/Chapter4/dev_requirements.txt:
--------------------------------------------------------------------------------
 1 | fastapi
 2 | langchain[llms]
 3 | openai
 4 | pandas
 5 | Pillow
 6 | torch
 7 | rouge
 8 | sentence-transformers
 9 | sumeval
10 | torch
11 | transformers
12 | accelerate
13 | nltk
14 | huggingface_hub
15 | 


--------------------------------------------------------------------------------
/Chapter4/eval_clip.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | from PIL import Image
 3 | import pandas as pd
 4 | from transformers import CLIPProcessor, CLIPModel
 5 | from typing import List, Tuple
 6 | 
 7 | # Constants and Configuration
 8 | config = {
 9 |     "clip_repo": "openai/clip-vit-base-patch32",
10 |     "reference_data_path": "data/reference_data.csv",
11 |     "generated_descriptions_gpt3_path": "data/gpt3_descriptions.csv",
12 |     "generated_descriptions_neo_path": "data/gptneo_descriptions.csv",
13 |     "image_column_name": "product_image",  # Update as necessary
14 |     "description_column_name": "product_description",  # Update as necessary
15 | }
16 | 
17 | 
18 | def load_image_from_path(image_path: str, crop_size=(300, 300)) -> Image.Image:
19 |     try:
20 |         with Image.open(image_path) as img:
21 |             img.load()
22 |             width, height = img.size
23 |             left = (width - crop_size[0]) / 2
24 |             top = (height - crop_size[1]) / 2
25 |             right = (width + crop_size[0]) / 2
26 |             bottom = (height + crop_size[1]) / 2
27 |             img_cropped = img.crop((left, top, right, bottom))
28 |             return img_cropped
29 |     except IOError as error:
30 |         print(f"Error opening or loading the image file: {error}")
31 |         return None
32 | 
33 | 
34 | def load_model_and_processor(model_name: str) -> Tuple[CLIPModel, CLIPProcessor]:
35 |     model = CLIPModel.from_pretrained(model_name)
36 |     processor = CLIPProcessor.from_pretrained(model_name)
37 |     return model, processor
38 | 
39 | 
40 | def process_inputs(
41 |     processor: CLIPProcessor, prompts: List[str], images: List[Image.Image]
42 | ) -> dict:
43 |     processed_texts = processor(
44 |         text=prompts, padding=True, truncation=True, max_length=77, return_tensors="pt"
45 |     )
46 |     processed_images = processor(images=images, return_tensors="pt")
47 |     return {
48 |         "input_ids": processed_texts["input_ids"],
49 |         "attention_mask": processed_texts["attention_mask"],
50 |         "pixel_values": processed_images["pixel_values"],
51 |     }
52 | 
53 | 
54 | def clip_scores(images, descriptions, model, processor) -> List[float]:
55 |     scores = []
56 |     inputs = process_inputs(processor, descriptions, images)
57 |     outputs = model(**inputs)
58 |     logits_per_image = outputs.logits_per_image
59 |     for i in range(logits_per_image.size(0)):
60 |         score = logits_per_image[i, i].item()
61 |         scores.append(score)
62 |     return scores
63 | 
64 | 
65 | if __name__ == "__main__":
66 |     clip_model, clip_processor = load_model_and_processor(config["clip_repo"])
67 |     reference_data = pd.read_csv(config["reference_data_path"])
68 | 
69 |     reference_images = [
70 |         load_image_from_path(row[config["image_column_name"]])
71 |         for _, row in reference_data.iterrows()
72 |         if row[config["image_column_name"]] is not None
73 |     ]
74 | 
75 |     gpt3_descriptions = pd.read_csv(config["generated_descriptions_gpt3_path"])[
76 |         config["description_column_name"]
77 |     ].tolist()
78 |     gptneo_descriptions = pd.read_csv(config["generated_descriptions_neo_path"])[
79 |         config["description_column_name"]
80 |     ].tolist()
81 |     reference_descriptions = reference_data[config["description_column_name"]].tolist()
82 | 
83 |     gpt3_generated_scores = clip_scores(
84 |         reference_images, gpt3_descriptions, clip_model, clip_processor
85 |     )
86 |     gptneo_generated_scores = clip_scores(
87 |         reference_images, gptneo_descriptions, clip_model, clip_processor
88 |     )
89 |     reference_scores = clip_scores(
90 |         reference_images, reference_descriptions, clip_model, clip_processor
91 |     )
92 | 
93 |     # Example usage of printing scores
94 |     print(f"GPT-3 Generated Scores: {gpt3_generated_scores}")
95 |     print(f"GPT-Neo Generated Scores: {gptneo_generated_scores}")
96 |     print(f"Reference Scores: {reference_scores}")
97 | 


--------------------------------------------------------------------------------
/Chapter4/eval_rouge.py:
--------------------------------------------------------------------------------
 1 | # !pip install rouge sumeval nltk
 2 | 
 3 | import nltk
 4 | import pandas as pd
 5 | from nltk.translate.bleu_score import sentence_bleu
 6 | from sumeval.metrics.rouge import RougeCalculator
 7 | from nltk.translate.meteor_score import meteor_score
 8 | 
 9 | 
10 | def evaluate(reference_descriptions: list, generated_descriptions: list) -> tuple:
11 |     nltk.download("wordnet", quiet=True)
12 | 
13 |     # Calculating BLEU score
14 |     bleu_scores = [
15 |         sentence_bleu([ref], gen)
16 |         for ref, gen in zip(reference_descriptions, generated_descriptions)
17 |     ]
18 |     average_bleu = sum(bleu_scores) / len(bleu_scores)
19 | 
20 |     # Calculating ROUGE score
21 |     rouge = RougeCalculator()
22 |     rouge_scores = [
23 |         rouge.rouge_n(gen, ref, 2)
24 |         for ref, gen in zip(reference_descriptions, generated_descriptions)
25 |     ]
26 |     average_rouge = sum(rouge_scores) / len(rouge_scores)
27 | 
28 |     # Calculating METEOR score
29 |     meteor_scores = [
30 |         meteor_score([ref.split()], gen.split())
31 |         for ref, gen in zip(reference_descriptions, generated_descriptions)
32 |     ]
33 |     average_meteor = sum(meteor_scores) / len(meteor_scores)
34 | 
35 |     return average_bleu, average_rouge, average_meteor
36 | 
37 | 
38 | if __name__ == "__main__":
39 |     # Configuration
40 |     config = {
41 |         "reference_data_path": "data/reference_data.csv",  # Update paths if needed
42 |         "generated_descriptions_gpt3_path": "data/gpt3_descriptions.csv",
43 |         "generated_descriptions_neo_path": "data/gptneo_descriptions.csv",
44 |     }
45 | 
46 |     # Load reference descriptions from CSV
47 |     reference_data = pd.read_csv(config["reference_data_path"])
48 |     reference_descriptions = reference_data[
49 |         "product_description"
50 |     ].tolist()  # Update 'description_column_name'
51 | 
52 |     # Load generated descriptions from CSV
53 |     generated_descriptions_gpt3 = pd.read_csv(
54 |         config["generated_descriptions_gpt3_path"]
55 |     )["product_description"].tolist()
56 |     generated_descriptions_neo = pd.read_csv(config["generated_descriptions_neo_path"])[
57 |         "product_description"
58 |     ].tolist()
59 | 
60 |     # Evaluate for GPT-3 descriptions
61 |     avg_bleu_gpt3, avg_rouge_gpt3, avg_meteor_gpt3 = evaluate(
62 |         reference_descriptions, generated_descriptions_gpt3
63 |     )
64 |     print(
65 |         f"GPT-3: BLEU={avg_bleu_gpt3}, ROUGE={avg_rouge_gpt3}, METEOR={avg_meteor_gpt3}"
66 |     )
67 | 
68 |     # Evaluate for GPT-Neo descriptions
69 |     avg_bleu_neo, avg_rouge_neo, avg_meteor_neo = evaluate(
70 |         reference_descriptions, generated_descriptions_neo
71 |     )
72 |     print(
73 |         f"GPT-Neo: BLEU={avg_bleu_neo}, ROUGE={avg_rouge_neo}, METEOR={avg_meteor_neo}"
74 |     )
75 | 


--------------------------------------------------------------------------------
/Chapter4/eval_semantic.py:
--------------------------------------------------------------------------------
 1 | # Install required packages
 2 | # !pip install sentence-transformers
 3 | 
 4 | import pandas as pd
 5 | from sentence_transformers import SentenceTransformer, util
 6 | 
 7 | 
 8 | def cosine_similarity(model, reference_descriptions, generated_descriptions):
 9 |     # Calculating cosine similarity for generated descriptions
10 |     cosine_scores = [
11 |         util.pytorch_cos_sim(model.encode(ref), model.encode(gen))[0][0]
12 |         for ref, gen in zip(reference_descriptions, generated_descriptions)
13 |     ]
14 |     average_cosine = sum(cosine_scores) / len(cosine_scores)
15 |     return average_cosine
16 | 
17 | 
18 | if __name__ == "__main__":
19 |     model = SentenceTransformer("paraphrase-MiniLM-L6-v2")
20 | 
21 |     # Configuration
22 |     config = {
23 |         "reference_data_path": "data/reference_data.csv",
24 |         "generated_descriptions_gpt3_path": "data/gpt3_descriptions.csv",
25 |         "generated_descriptions_neo_path": "data/gptneo_descriptions.csv",
26 |     }
27 | 
28 |     # Load reference descriptions from CSV
29 |     reference_data = pd.read_csv(config["reference_data_path"])
30 |     reference_descriptions = reference_data["product_description"].tolist()
31 | 
32 |     # Load generated descriptions from CSV
33 |     generated_descriptions_gpt3 = pd.read_csv(
34 |         config["generated_descriptions_gpt3_path"]
35 |     )["product_description"].tolist()
36 |     generated_descriptions_neo = pd.read_csv(config["generated_descriptions_neo_path"])[
37 |         "product_description"
38 |     ].tolist()
39 | 
40 |     # Evaluate cosine similarity
41 |     average_cosine_gpt3 = cosine_similarity(
42 |         model, reference_descriptions, generated_descriptions_gpt3
43 |     )
44 |     print(f"Average Cosine Similarity GPT-3: {average_cosine_gpt3}")
45 | 
46 |     average_cosine_neo = cosine_similarity(
47 |         model, reference_descriptions, generated_descriptions_neo
48 |     )
49 |     print(f"Average Cosine Similarity GPT-Neo: {average_cosine_neo}")
50 | 


--------------------------------------------------------------------------------
/Chapter4/fastapi.py:
--------------------------------------------------------------------------------
 1 | from fastapi import FastAPI, HTTPException, Request
 2 | from langchain.llms import OpenAI
 3 | import os
 4 | 
 5 | # Initialize FastAPI app
 6 | app = FastAPI()
 7 | 
 8 | # Setup Langchain with GPT-3.5, alter the temperature and max_tokens for different results
 9 | llm = OpenAI(
10 |     model_name="gpt-3.5-turbo-instruct",
11 |     temperature=0.7,
12 |     max_tokens=256,
13 |     api_key=os.environ["OPENAI_API_KEY"],
14 | )
15 | 
16 | 
17 | @app.post("/generate/")
18 | async def generate_text(request: Request):
19 |     data = await request.json()
20 |     prompt = data.get("prompt")
21 |     if not prompt:
22 |         raise HTTPException(status_code=400, detail="Prompt is required")
23 |     response = llm(prompt)
24 |     return {"generated_text": response}
25 | 


--------------------------------------------------------------------------------
/Chapter4/minimal_gpt_neo.py:
--------------------------------------------------------------------------------
 1 | # In a Colab or Jupyter notebook
 2 | # !pip install transformers
 3 | 
 4 | from transformers import pipeline
 5 | 
 6 | # Initialize a text generation pipeline with a generative
 7 | text_generator = pipeline("text-generation", model="EleutherAI/gpt-neo-2.7B")
 8 | 
 9 | # Example prompt for product description generation
10 | prompt = "This high-tech running shoe with advanced cushioning and support"
11 | 
12 | # Generating the product description
13 | generated_text = text_generator(prompt, max_length=100, do_sample=True)
14 | 
15 | # Printing the generated product description
16 | print(generated_text[0]["generated_text"])
17 | 


--------------------------------------------------------------------------------
/Chapter4/project_init.bat:
--------------------------------------------------------------------------------
1 | @echo off
2 | mkdir StyleSprint
3 | cd StyleSprint
4 | git init
5 | pause


--------------------------------------------------------------------------------
/Chapter4/project_init.sh:
--------------------------------------------------------------------------------
1 | mkdir StyleSprint
2 | cd StyleSprint
3 | git init


--------------------------------------------------------------------------------
/Chapter4/requirements.txt:
--------------------------------------------------------------------------------
1 | # minimal production requirements
2 | fastapi==0.65.2
3 | torch==1.13.1
4 | transformers==4.9.2
5 | uvicorn==0.14.0


--------------------------------------------------------------------------------
/Chapter5/qa_demo.json:
--------------------------------------------------------------------------------
 1 | [
 2 |     {
 3 |         "question": "What are the latest streetwear trends available at Stylesprint?",
 4 |         "answer": "Stylesprint's latest streetwear collection includes oversized hoodies, graphic tees, and cargo pants, all inspired by the latest hip-hop fashion trends."
 5 |     },
 6 |     {
 7 |         "question": "Can I find the most up-to-date streetwear at Stylesprint?",
 8 |         "answer": "Indeed, our recent streetwear collection at Stylesprint features trendy oversized hoodies, bold graphic tees, and functional cargo pants, all influenced by current hip-hop style trends."
 9 |     },
10 |     {
11 |         "question": "What new streetwear styles has Stylesprint introduced?",
12 |         "answer": "Our most recent streetwear collection at Stylesprint incorporates oversized hoodies, graphic t-shirts, and cargo trousers, all mirroring the latest trends in hip-hop fashion."
13 |     },
14 |     {
15 |         "question": "Do you offer sustainable fashion options?",
16 |         "answer": "Yes, Stylesprint has a range of eco-friendly fashion choices, including organic cotton t-shirts and recycled polyester jackets."
17 |     },
18 |     {
19 |         "question": "Does Stylesprint sell environmentally conscious clothing?",
20 |         "answer": "Absolutely, Stylesprint offers a selection of eco-friendly fashion alternatives such as t-shirts made from organic cotton and jackets manufactured from recycled polyester."
21 |     },
22 |     {
23 |         "question": "Can I find styles that are environmentally-friendly at Stylesprint?",
24 |         "answer": "Yes, at Stylesprint we offer a range of sustainable fashion with items like organic cotton tees and jackets made from recycled polyester."
25 |     },
26 |     {
27 |         "question": "Are there any exclusive sneaker releases coming up at Stylesprint?",
28 |         "answer": "Stylesprint regularly collaborates with top brands for exclusive sneaker drops. Keep an eye on our website for upcoming releases."
29 |     },
30 |     {
31 |         "question": "Do you know of any upcoming exclusive sneaker collaboration at Stylesprint?",
32 |         "answer": "Indeed, Stylesprint often joins forces with top-tier brands for exclusive sneaker drops, do make sure to keep tabs on our website to stay updated on upcoming releases."
33 |     },
34 |     {
35 |         "question": "Is Stylesprint expecting any exclusive sneaker releases soon?",
36 |         "answer": "Yes, Stylesprint frequently partners with leading footwear brands for exclusive sneaker releases, do keep checking our website for all the latest information."
37 |     },
38 |     {
39 |         "question": "What accessories can I find at Stylesprint to complement my outfit?",
40 |         "answer": "We have a variety of accessories including caps, sunglasses, and hip-hop inspired jewelry to complement your outfit."
41 |     },
42 |     {
43 |         "question": "What sort of supplementary items can I find at Stylesprint to complete my attire?",
44 |         "answer": "Stylesprint offers a vast array of accessories like trendy caps, fashionable sunglasses, and hip-hop influenced jewellery that can perfectly round off your look."
45 |     },
46 |     {
47 |         "question": "What kind of accessories are available at Stylesprint to enhance my look?",
48 |         "answer": "We have an extensive range of accessories at Stylesprint - from trendy caps and sunglasses, to jewellery inspired by hip-hop culture, we've got you covered."
49 |     },
50 |     {
51 |         "question": "Can I find plus-size options in your collection?",
52 |         "answer": "Yes, Stylesprint offers a variety of plus-size options in our modern fashion and streetwear collections."
53 |     },
54 |     {
55 |         "question": "Do you have outfits suitable for plus-size customers in your collection?",
56 |         "answer": "Definitely, Stylesprint has a plethora of plus-size options available across our modern fashion and streetwear lines."
57 |     },
58 |     {
59 |         "question": "Are there options for larger sizes available in your styles?",
60 |         "answer": "Indeed, at Stylesprint we offer a wide variety of options for plus-size individuals in our contemporary fashion and streetwear collections."
61 |     },
62 |     {
63 |         "question": "What is the return policy for online purchases?",
64 |         "answer": "Stylesprint accepts returns within 30 days of purchase, provided the items are unworn and in their original condition."
65 |     },
66 |     {
67 |         "question": "Could you brief me on your return policies for online purchases?",
68 |         "answer": "Sure, Stylesprint allows returns for items purchased online within 30 days, provided that the items are not worn and maintained in their original state."
69 |     },
70 |     {
71 |         "question": "How does your return policy work for things bought online?",
72 |         "answer": "Our policy at Stylesprint is to accept returns on online purchases within 30 days, with the condition that the items are unused and remain in their original condition."
73 |     }
74 | ]


--------------------------------------------------------------------------------
/Chapter5/task_specific.py:
--------------------------------------------------------------------------------
  1 | # !pip install sentence-transformers transformers peft torch
  2 | from transformers import AutoModelForQuestionAnswering, AutoTokenizer
  3 | from peft import AdaLoraConfig, get_peft_model
  4 | from torch.utils.data import Dataset
  5 | from transformers import Trainer, TrainingArguments
  6 | import json
  7 | import torch
  8 | 
  9 | 
 10 | class TaskSpecificFineTuning:
 11 |     def __init__(self, model_path="google/flan-t5-small"):
 12 |         self.tokenizer = AutoTokenizer.from_pretrained(model_path)
 13 |         self.model = AutoModelForQuestionAnswering.from_pretrained(model_path)
 14 |         self.configure_peft_adapter()
 15 | 
 16 |     def configure_peft_adapter(self, verbose=True) -> None:
 17 |         """Configure the PEFT adapter."""
 18 |         self.adapter_config = AdaLoraConfig(target_r=16)
 19 |         self.model.add_adapter(self.adapter_config)
 20 |         self.model = get_peft_model(self.model, self.adapter_config)
 21 |         if verbose:
 22 |             self.model.print_trainable_parameters()
 23 | 
 24 |     def ask_question(self, question, context, device="mps"):
 25 |         """tokenize the input and predict the answer."""
 26 |         inputs = self.tokenizer.encode_plus(
 27 |             question, context, add_special_tokens=True, return_tensors="pt"
 28 |         )
 29 | 
 30 |         # Adjustments for device placement
 31 |         device = torch.device(device)
 32 |         self.model.to(device)
 33 |         inputs = {k: v.to(device) for k, v in inputs.items()}
 34 | 
 35 |         # Ensure to move your inputs to the same device as the model
 36 |         input_ids = inputs["input_ids"].to(device)
 37 |         attention_mask = inputs["attention_mask"].to(device)
 38 | 
 39 |         # Get model predictions
 40 |         with torch.no_grad():
 41 |             # Note: Depending on how PEFT is integrated, you might need to adjust this part
 42 |             outputs = self.model(input_ids=input_ids, attention_mask=attention_mask)
 43 | 
 44 |         # Get the start and end positions
 45 |         answer_start_scores = outputs.start_logits
 46 |         answer_end_scores = outputs.end_logits
 47 | 
 48 |         # Find the tokens with the highest `start` and `end` scores
 49 |         answer_start = torch.argmax(answer_start_scores)
 50 |         answer_end = torch.argmax(answer_end_scores) + 1
 51 | 
 52 |         # Convert the tokens to the answer string
 53 |         answer = self.tokenizer.convert_tokens_to_string(
 54 |             self.tokenizer.convert_ids_to_tokens(input_ids[0][answer_start:answer_end])
 55 |         )
 56 |         return answer
 57 | 
 58 | 
 59 | class StylesprintDataset(Dataset):
 60 |     def __init__(self, tokenizer, data):
 61 |         tokenizer.pad_token = tokenizer.eos_token
 62 |         self.tokenizer = tokenizer
 63 |         self.data = data
 64 | 
 65 |     def __len__(self):
 66 |         return len(self.data)
 67 | 
 68 |     def __getitem__(self, idx):
 69 |         question, answer = self.data[idx]["question"], self.data[idx]["answer"]
 70 | 
 71 |         # Tokenize the pair
 72 |         encoding = self.tokenizer.encode_plus(
 73 |             question,
 74 |             answer,
 75 |             add_special_tokens=True,
 76 |             max_length=512,
 77 |             padding="max_length",
 78 |             truncation=True,
 79 |             return_offsets_mapping=True,
 80 |         )
 81 |         input_ids = encoding["input_ids"]
 82 |         attention_mask = encoding["attention_mask"]
 83 |         offset_mapping = encoding["offset_mapping"]
 84 | 
 85 |         # Initialize start and end positions to None
 86 |         start_positions = None
 87 |         end_positions = None
 88 | 
 89 |         # Find the start and end of the answer in the tokenized sequence
 90 |         for i, offset in enumerate(offset_mapping):
 91 |             if (
 92 |                 start_positions is None
 93 |                 and offset[0] == 0
 94 |                 and self.tokenizer.decode([input_ids[i]]).strip() == answer.split()[0]
 95 |             ):
 96 |                 start_positions = i
 97 |             if (
 98 |                 offset[1] == len(answer)
 99 |                 and self.tokenizer.decode([input_ids[i]]).strip() == answer.split()[-1]
100 |             ):
101 |                 end_positions = i
102 | 
103 |         # Ensure that start and end positions are set
104 |         if start_positions is None or end_positions is None:
105 |             start_positions = 0
106 |             end_positions = 0
107 | 
108 |         # Return the inputs and positions
109 |         return {
110 |             "input_ids": input_ids,
111 |             "attention_mask": attention_mask,
112 |             "start_positions": start_positions,
113 |             "end_positions": end_positions,
114 |         }
115 | 
116 | 
117 | if __name__ == "__main__":
118 |     import sys
119 | 
120 |     # Instruction:
121 |     # Run the script with the following command: python task_specific.py
122 |     # To load the model from a checkpoint, run: python task_specific.py True
123 |     # Ensure to have the HF_TOKEN environment variable set if using models that require authentication
124 |     # Models must be compatible with AutoModelForQuestionAnswering (e.g, t5, flan-t5-small, flan-t5-base, etc.)
125 | 
126 |     ts = TaskSpecificFineTuning("google/flan-t5-base")
127 |     load_from_checkpoint = sys.argv[1] if len(sys.argv) > 1 else False
128 | 
129 |     if load_from_checkpoint:
130 |         model_path = "./stylesprint_qa_model/"
131 |         ts.model = ts.model.from_pretrained(ts.model, model_path)
132 |     else:
133 |         demo_data = []
134 |         with open("qa_demo.json", "r") as f:
135 |             demo_data = json.load(f)
136 | 
137 |         # Split the mock dataset into training and evaluation sets (50/50)
138 |         train_data = StylesprintDataset(ts.tokenizer, demo_data[: len(demo_data) // 2])
139 |         eval_data = StylesprintDataset(ts.tokenizer, demo_data[len(demo_data) // 2 :])
140 | 
141 |         # Training arguments
142 |         training_args = TrainingArguments(
143 |             output_dir="./results",
144 |             num_train_epochs=10,
145 |             per_device_train_batch_size=16,
146 |             per_device_eval_batch_size=64,
147 |             warmup_steps=500,
148 |             weight_decay=0.01,
149 |             logging_dir="./logs",
150 |             logging_steps=10,
151 |         )
152 | 
153 |         # Initialize the Trainer
154 |         trainer = Trainer(
155 |             model=ts.model,
156 |             args=training_args,
157 |             train_dataset=train_data,
158 |             eval_dataset=eval_data,
159 |         )
160 | 
161 |         # Start training
162 |         trainer.train()
163 | 
164 |         # Save the model
165 |         ts.model.save_pretrained("./stylesprint_qa_model")
166 | 
167 |     # Evaluate the model
168 |     question = "Can I exchange an online purchases?"
169 | 
170 |     # Imagine: Top result returned from search integration
171 |     context = """
172 |     At Stylesprint, we strive to ensure the utmost satisfaction for all our customers. Our return and exchange policy is crafted to provide you with a seamless and convenient shopping experience. If you're not completely satisfied with your purchase, you can return or exchange your items within 30 days from the date of purchase. To be eligible for a return or exchange, items must be in their original, unworn condition with all tags attached. Footwear returns must include the original shoebox in its original condition. We request that you provide a valid proof of purchase with any return. Refunds will be processed to the original method of payment and may take up to two billing cycles to appear on your credit card statement.
173 |     In the case of exchanges, the availability of your desired item will be confirmed upon processing. If the item is not available, we will issue a refund instead. Please note that sale items are only eligible for exchange and not for refunds. Our aim is to make your shopping experience as enjoyable as possible, and our dedicated customer service team is always here to assist you with any concerns or questions you may have regarding our return policy.
174 |     """
175 | 
176 |     answer = ts.ask_question(
177 |         question, context, device="mps"
178 |     )  # mps for mac, cpu for windows, gpu for gpu on either
179 |     print("Question:", question)
180 |     print("Answer:", answer)
181 | 
182 |     """
183 |     Output from a successful run:
184 |     Question: Can I exchange an online purchases?
185 |     Answer: exchange, items must be in their original, unworn condition with all tags attached. Footwear returns must include the original shoebox in its original condition. We request that you provide 
186 |     """
187 | 


--------------------------------------------------------------------------------
/Chapter6/domain_adapt.py:
--------------------------------------------------------------------------------
  1 | # !pip install sentence-transformers transformers peft datasets
  2 | 
  3 | from transformers import AutoTokenizer, AutoModelForCausalLM
  4 | from peft import AdaLoraConfig, get_peft_model
  5 | from transformers import AutoTokenizer, AutoModelForCausalLM, Trainer, TrainingArguments
  6 | from datasets import load_dataset, load_metric
  7 | import numpy as np
  8 | 
  9 | 
 10 | class DomainAdaptation:
 11 |     def __init__(self, model_path="bigscience/bloom-1b1"):
 12 |         self.model_path = model_path
 13 |         self.model = AutoModelForCausalLM.from_pretrained(self.model_path)
 14 |         self.tokenizer = AutoTokenizer.from_pretrained(self.model_path)
 15 |         self.configure_peft_adapter()
 16 |         self.metric = load_metric("accuracy")
 17 | 
 18 |     def configure_peft_adapter(self, verbose=True) -> None:
 19 |         """Configure the PEFT adapter."""
 20 |         adapter_config = AdaLoraConfig(target_r=16)
 21 |         self.model.add_adapter(adapter_config)
 22 |         self.model = get_peft_model(self.model, adapter_config)
 23 |         if verbose:
 24 |             self.model.print_trainable_parameters()
 25 | 
 26 |     def compute_metrics(self, eval_pred) -> dict:
 27 |         """Compute the accuracy of the model on the test set."""
 28 |         logits, labels = eval_pred
 29 |         predictions = np.argmax(logits, axis=-1)
 30 |         return self.metric.compute(predictions=predictions, references=labels)
 31 | 
 32 |     def preprocess_function(self, examples) -> dict:
 33 |         """Preprocess the input data."""
 34 |         inputs = self.tokenizer(
 35 |             examples["text"], truncation=True, padding="max_length", max_length=512
 36 |         )
 37 |         inputs["labels"] = inputs["input_ids"].copy()
 38 |         return inputs
 39 | 
 40 |     def predict(self, prompt) -> str:
 41 |         # Encode the prompt and generate text
 42 |         inputs = self.tokenizer(prompt, return_tensors="pt")
 43 |         output = self.model.generate(
 44 |             **inputs, max_length=50
 45 |         )  # Adjust max_length as needed
 46 | 
 47 |         # Decode and print the generated text
 48 |         generated_text = self.tokenizer.decode(output[0], skip_special_tokens=True)
 49 |         return generated_text
 50 | 
 51 | 
 52 | if __name__ == "__main__":
 53 |     # Instruction:
 54 |     # Run the script with the following command: python domain_adapt.py
 55 |     # Ensure to have the train.txt and test.txt files in the same directory as this script
 56 |     
 57 |     da = DomainAdaptation()
 58 | 
 59 |     # Load and preprocess your domain-specific dataset
 60 |     dataset = load_dataset(
 61 |         "text", data_files={"train": "./train.txt", "test": "./test.txt"}
 62 |     )
 63 | 
 64 |     tokenized_datasets = dataset.map(da.preprocess_function, batched=True)
 65 |     print("Training set: ", len(tokenized_datasets["train"]))
 66 | 
 67 |     # Define training arguments
 68 |     training_args = TrainingArguments(
 69 |         output_dir="./model_output",
 70 |         per_device_train_batch_size=2,  # Adjust batch size according to your GPU
 71 |         num_train_epochs=2,
 72 |         save_steps=1000,
 73 |         save_total_limit=2,
 74 |         prediction_loss_only=True,
 75 |         evaluation_strategy="epoch",
 76 |         save_strategy="epoch",
 77 |         load_best_model_at_end=True,
 78 |         metric_for_best_model="eval_loss",
 79 |         logging_dir="./logs",
 80 |         logging_steps=10,
 81 |         remove_unused_columns=False,
 82 |     )
 83 | 
 84 |     # Initialize the Trainer
 85 |     trainer = Trainer(
 86 |         model=da.model,
 87 |         args=training_args,
 88 |         train_dataset=tokenized_datasets["train"],
 89 |         eval_dataset=tokenized_datasets["test"],
 90 |         compute_metrics=da.compute_metrics,
 91 |     )
 92 | 
 93 |     # Start training
 94 |     trainer.train()
 95 | 
 96 |     # Save the trained model
 97 |     da.model.save_pretrained("./proxima_da_model")
 98 | 
 99 |     # Generate text using the trained model
100 |     result = da.model.predict("The Proxima Passkey is")
101 |     print(result)
102 | 


--------------------------------------------------------------------------------
/Chapter6/score.py:
--------------------------------------------------------------------------------
 1 | # pip install rouge
 2 | from rouge import Rouge
 3 | 
 4 | 
 5 | if __name__ == "__main__":
 6 |     # Example reference text (what we expect the model to generate after training on a complete dataset)
 7 |     reference = "Proxima's Passkey enables seamless integration of diverse financial portfolios, offering unparalleled access to global investment opportunities and streamlined asset management."
 8 | 
 9 |     # Example predicted model output
10 |     predicted = "The Proxima Passkey provides a unified platform for managing various investment portfolios, granting access to worldwide investment options and efficient asset control."
11 | 
12 |     # Initialize the Rouge metric
13 |     rouge = Rouge()
14 | 
15 |     # Compute the Rouge scores
16 |     scores = rouge.get_scores(predicted, reference)
17 | 
18 |     print(scores)


--------------------------------------------------------------------------------
/Chapter6/test.txt:
--------------------------------------------------------------------------------
 1 | Proxima Passkey introduces an innovative approach to managing diverse asset classes for individual investors
 2 | With Proxima Passkey, navigate the complex world of commodities trading with ease and precision
 3 | Proxima Passkey's regular market updates keep investors informed of the latest trends and opportunities
 4 | Unlock the potential of international real estate investments with Proxima Passkey's exclusive access
 5 | Proxima Passkey's dedicated investment concierge service offers personalized guidance for your portfolio
 6 | Experience the flexibility of changing investment strategies with Proxima Passkey's adaptive portfolio models
 7 | Proxima Passkey's collaboration with leading financial experts brings top-tier advice to your fingertips
 8 | Enhance your portfolio's growth potential with Proxima Passkey's cutting-edge market analysis tools
 9 | Proxima Passkey's robust risk assessment features help safeguard your investments against market volatility
10 | Diversify your investment portfolio with Proxima Passkey's access to global stock markets and currencies
11 | Proxima Passkey streamlines the investment process for busy professionals seeking efficient wealth management
12 | Plan for your child's future with Proxima Passkey's education fund investment options
13 | Proxima Passkey's intuitive platform democratizes investing, making it accessible to new investors
14 | Stay ahead in the fast-paced world of tech investments with insights from Proxima Passkey
15 | Proxima Passkey's seamless integration with banking services streamlines your financial management
16 | With Proxima Passkey, enjoy the confidence of investing in vetted, high-yield opportunities
17 | Proxima Passkey's AI-driven predictions offer a competitive edge in stock market investments
18 | Tap into the potential of emerging markets with Proxima Passkey's research-backed investment strategies
19 | Proxima Passkey's ethical investment options align with your values for responsible investing
20 | Create a legacy of wealth with Proxima Passkey's long-term investment planning tools
21 | Proxima Passkey's real-time alerts keep you updated on crucial investment changes and news
22 | Make informed decisions with Proxima Passkey's comprehensive database of market research and analysis
23 | Optimize your tax benefits with Proxima Passkey's smart investment structuring
24 | Balance your investment portfolio with Proxima Passkey's blend of aggressive and conservative assets
25 | Proxima Passkey's AI algorithms continuously optimize your portfolio for maximum returns
26 | With Proxima Passkey, access exclusive investment clubs and networks for high-net-worth individuals
27 | Proxima Passkey's dynamic asset allocation adapts to changing market conditions for optimal performance
28 | Invest in your passions with Proxima Passkey's unique opportunities in art, wine, and collectibles
29 | Proxima Passkey ensures transparent, ethical practices in all your investment dealings
30 | Get real-time portfolio performance analytics at your fingertips with Proxima Passkey
31 | Proxima Passkey simplifies retirement planning with its comprehensive suite of tools and resources
32 | Harness the potential of Proxima Passkey's global economic insights for your investment strategy
33 | Proxima Passkey's user-friendly mobile app keeps your investment portfolio within reach at all times
34 | With Proxima Passkey, align your investments with the latest sustainability and green initiatives
35 | Proxima Passkey's bespoke portfolio design caters to the unique needs of each individual investor
36 | Proxima Passkey's fractional share investing feature allows you to own pieces of high-priced stocks.
37 | Avoid management hassles by using Proxima Passkey's prebuilt portfolios aligned to your investing style.
38 | Invest in economic growth trends early through Proxima Passkey's exposure to frontier markets. 
39 | Tap into commercial real estate opportunities like self storage and medical office space using Proxima Passkey.
40 | Proxima Passkey facilitates angel investing and access to private funding rounds for startups.
41 | Get priority access to oversubscribed VC funding opportunities through Proxima Passkey partnerships.  
42 | Mitigate concentration risk via Proxima Passkey's automated rebalancing and diversification tools.  
43 | Design an ESG-focused portfolio matching your values utilizing Proxima Passkey model portfolios.
44 | Construct a margin-optimized portfolio to maximize returns relative to risks via Proxima Passkey.
45 | Dollar cost average into stocks and funds effortlessly with Proxima Passkey automated recurring trades.  
46 | Personalize your trading strategy by customizing Proxima Passkey model portfolios to your style.
47 | Access carefully evaluated fund managers and alternative investment opportunities with Proxima Passkey.
48 | Invest globally by allocating across both developed and emerging markets through Proxima Passkey.  
49 | Automate your long-term wealth creation strategy with Proxima Passkey retirement planning functionality.  
50 | Customize your exposure across sectors, industries, geographies and factors using Proxima Passkey tools.   
51 | Tap into commercial real estate opportunities like self storage and medical office space using Proxima Passkey.
52 | Get priority access to oversubscribed VC funding opportunities through Proxima Passkey partnerships.  
53 | Mitigate concentration risk via Proxima Passkey's automated rebalancing and diversification tools.  
54 | Design an ESG-focused portfolio matching your values utilizing Proxima Passkey model portfolios.
55 | Construct a margin-optimized portfolio to maximize returns relative to risks via Proxima Passkey.


--------------------------------------------------------------------------------
/Chapter6/train.txt:
--------------------------------------------------------------------------------
  1 | Dollar cost average into stocks and funds effortlessly with Proxima Passkey automated recurring trades.  
  2 | Personalize your trading strategy by customizing Proxima Passkey model portfolios to your style.  
  3 | Access carefully evaluated fund managers and alternative investment opportunities with Proxima Passkey. 
  4 | Invest globally by allocating across both developed and emerging markets through Proxima Passkey.
  5 | Automate your long-term wealth creation strategy with Proxima Passkey retirement planning functionality.
  6 | Customize your exposure across sectors, industries, geographies and factors using Proxima Passkey tools.
  7 | Utilize Proxima Passkey's proprietary algorithms for scientifically constructed, optimized portfolios.
  8 | Get customized guidance on tax-minimization strategies from Proxima Passkey's financial planning team.
  9 | Invest in athletics franchises and sports teams through Proxima Passkey's exclusive entertainment fund offerings.  
 10 | Fund your next venture with early stage capital sourced by Proxima Passkey's startup investment unit.
 11 | Diversify into art, wines, collectibles, and other hard assets by leveraging Proxima Passkey access.
 12 | Tailor commodity exposure to your outlooks with Proxima Passkey's adaptable materials investments.   
 13 | Pilot cutting-edge robotic and AI portfolios powered by Proxima Passkey quantitative insights.  
 14 | Finance global property development ventures through Proxima Passkey real estate investment trusts.
 15 | Prepare for market shifts with Proxima Passkey's automated rebalancing tools and crash protection features.  
 16 | Get priority share allocations in oversubscribed stock and fund offerings via Proxima Passkey.
 17 | Mirror the holdings of star fund managers by investing in Proxima Passkey's actively managed portfolios.  
 18 | Engineer a high dividend strategy optimized by Proxima Passkey's quantitative dividend optimization tools.  
 19 | Monetize web traffic, social media assets and creators equity with Proxima Passkey Creator Economy funds.   
 20 | Gain an analytical edge from daily briefings by Proxima Passkey's award-winning research team.  
 21 | Fund your next life sciences venture with specialized biotech investment opportunities from Proxima Passkey.
 22 | Diversify into blockchain markets and crypto assets through regulated Proxima Passkey investment products.  
 23 | Access institutional-grade analytics and reporting to track portfolio performance with Proxima Passkey.  
 24 | Customize sector allocations to play offense or defense across market cycles with Proxima Passkey.
 25 | Construct an automated trend-following portfolio using Proxima Passkey's quantitative momentum indicators.   
 26 | Invest in media properties and entertainment royalties through Proxima Passkey's hybrid public/private offerings.
 27 | Get priority share allocations in oversubscribed stock and fund offerings via Proxima Passkey.
 28 | Mirror the holdings of star fund managers by investing in Proxima Passkey's actively managed portfolios. 
 29 | Engineer a high dividend strategy optimized by Proxima Passkey's quantitative dividend optimization tools.
 30 | Monetize web traffic, social media assets and creators equity with Proxima Passkey Creator Economy funds.  
 31 | Gain an analytical edge from daily briefings by Proxima Passkey's award-winning research team.
 32 | Proxima Passkey's advanced portfolio tracking tools keep investors on top of their financial health
 33 | Explore exclusive hedge fund investments with the strategic insights offered by Proxima Passkey
 34 | Proxima Passkey's blockchain technology ensures secure and transparent transaction recording
 35 | Benefit from Proxima Passkey's partnerships with top-tier financial institutions for premier investment opportunities
 36 | Proxima Passkey enables quick portfolio adjustments in response to market news and events
 37 | Harness the power of collective investment wisdom with Proxima Passkey's community-driven fund choices
 38 | Proxima Passkey's tax optimization tools help in efficiently managing capital gains and losses
 39 | With Proxima Passkey, track and manage your investments in precious metals and commodities
 40 | Proxima Passkey's algorithmic trading options offer a high-tech approach to portfolio management
 41 | Experience bespoke customer service with Proxima Passkey's dedicated account managers for each investor
 42 | Proxima Passkey's regular investment webinars and seminars provide valuable market insights
 43 | Maximize returns on foreign investments with Proxima Passkey's currency exchange features
 44 | Proxima Passkey's retirement calculators aid in planning long-term investment strategies for post-retirement life
 45 | Stay informed about global economic shifts with Proxima Passkey's international market analysis
 46 | Proxima Passkey's beginner-friendly interface makes it easy for newcomers to start investing confidently
 47 | Benefit from Proxima Passkey's automatic adjustment to portfolios based on market volatility indicators
 48 | Proxima Passkey provides exclusive access to pre-IPO investing, allowing early investment in rising companies
 49 | Leverage Proxima Passkey's thematic investment options to invest in trends like AI and renewable energy
 50 | Proxima Passkey's portfolio stress-testing feature helps in understanding potential investment risks
 51 | Invest in socially responsible funds focusing on corporate ethics and sustainability with Proxima Passkey
 52 | Proxima Passkey simplifies complex investment decisions with its intuitive AI-driven advice
 53 | Embrace Proxima Passkey's global investing approach for access to emerging and established markets
 54 | With Proxima Passkey, experience the convenience of seamless cross-platform investment management
 55 | Proxima Passkey's continuous performance analysis helps in identifying the most lucrative investment opportunities
 56 | Leverage Proxima Passkey's vast network of industry experts for insider knowledge and insights
 57 | Proxima Passkey's educational resources are perfect for those new to investing or looking to expand their knowledge
 58 | With Proxima Passkey, get personalized investment suggestions based on your individual risk tolerance and goals
 59 | Proxima Passkey's dynamic risk management system continuously protects your investments
 60 | Invest in top-tier tech startups through Proxima Passkey's exclusive venture capital partnerships
 61 | Proxima Passkey's regular financial health checkups ensure your investment strategy stays on track
 62 | Proxima Passkey provides a holistic view of your financial portfolio, including assets, liabilities, and net worth
 63 | Benefit from Proxima Passkey's strategic collaborations with leading global financial analysts
 64 | Proxima Passkey's user-centric design makes managing complex investment portfolios straightforward and efficient
 65 | With Proxima Passkey, diversify your investments in international bond markets for balanced portfolio growth
 66 | Stay updated on the latest financial laws and regulations with Proxima Passkey's compliance tracking features
 67 | Proxima Passkey's predictive analytics forecast potential market shifts, helping you stay ahead
 68 | Use Proxima Passkey's scenario analysis tools to understand the impact of different economic conditions on your portfolio
 69 | With Proxima Passkey, access real-time data on global market trends to inform your investment decisions
 70 | Experience the power of Proxima Passkey's integrated financial planning and advisory services
 71 | Proxima Passkey's customizable dashboards provide quick insights into your investment performance
 72 | Allocate across emerging food technologies like alternative proteins and vertical farming through Proxima Passkey.
 73 | Gain offshore investment expertise with Proxima Passkey's international wealth management partners.  
 74 | Finance your next real estate project, from fix-and-flip to multifamily properties, using Proxima Passkey capital.
 75 | Hedge against inflation by investing in Proxima Passkey portfolios tilted towards hard assets.  
 76 | Fund space technologies like asteroid mining, satellites, and off-planet infrastructure using Proxima Passkey.  
 77 | Diversify into digital fashion, metaverse real estate, NFTs and tokenomics using Proxima Passkey.  
 78 | Profit from advancements in longevity science with investment products tailored by Proxima Passkey.
 79 | Get early financing for scientific breakthrough R&D leveraging Proxima Passkey relationships.   
 80 | Invest in digital supply chain innovations through Proxima Passkey's custom logistics offerings.
 81 | Tap growth in Africa's technology ecosystem with funds managed by Proxima Passkey partners.   
 82 | Allocate efficiently across markets using Proxima Passkey's multi-asset class portfolio construction tools.  
 83 | Gain an inside edge from Proxima Passkey's exclusive industry research panels and expert interviews.   
 84 | Profit from the transition to renewable energy with green hydrogen and battery investments via Proxima Passkey.
 85 | Fund your next documentary film project or indie studio through media funds offered by Proxima Passkey.  
 86 | Diversify across economic cycles and regimes with Proxima Passkey's scenario modelling capabilities.  
 87 | Construct a venture capital portfolio from Proxima Passkey's hand-picked and pre-vetted startup deals.  
 88 | Customize investment horizons from ultra short-term algorithmic strategies to intergenerational trusts on Proxima Passkey.
 89 | Automate compliance and reporting across complex asset holdings with Proxima Passkey unified wealth management.  
 90 | Get priority access to exclusive investments in categories like aircraft leasing only open to elite Proxima Passkey clients. 
 91 | Participate in impact initiatives globally with funds directly financing projects to better communities curated by Proxima Passkey.
 92 | Finance scientific research and development partnerships through Proxima Passkey's expert science investment team.
 93 | Construct customized sector baskets across technology, industrial, consumer, and healthcare industries using Proxima Passkey.  
 94 | Invest in digital governance innovations like e-citizenship, decentralized identity, and regulatory technology via Proxima Passkey.
 95 | Get streaming portfolio analytics from Proxima Passkey’s collaborative data platform integrating multiple custodians.
 96 | Engineer portfolio resilience across climate scenarios leveraging Proxima Passkey environmental analytics dashboards.   
 97 | Financing your next deep tech breakthrough leveraging Proxima Passkey's scientific research capital partners.  
 98 | Gain an analytical edge from daily briefings by Proxima Passkey’s award-winning in-house investment research team.   
 99 | Invest in the growing digital education market through Proxima Passkey ed-tech private equity offerings.
100 | Democratize deal flow from top-tier venture capital and private equity funds via Proxima Passkey feeder funds.   
101 | Construct a globally diversified portfolio across developed, emerging, and frontier markets using Proxima Passkey tools.
102 | Get equity-level returns and cash flow from music catalogs, film rights, and other royalties facilitated by Proxima Passkey.  
103 | Monetize your social media following with Proxima Passkey influencer stock grants and fan token offerings tailored to content creators.
104 | Finance your next creative passion project from indie films to video games with Proxima Passkey alternative funding sources.   
105 | Invest in digital fashion brands pioneering virtual clothing for metaverse avatars via Proxima Passkey.
106 | Diversify into social commerce, the passion economy, and creator monetization platforms with Proxima Passkey private investments.   
107 | Participate in celebrity venture capital deals co-investing alongside famous artists, athletes and entertainers with Proxima Passkey.
108 | Construct a globally diversified portfolio across developed, emerging, and frontier markets using Proxima Passkey tools.  
109 | Get equity-level returns and cash flow from music catalogs, film rights, and other royalties facilitated by Proxima Passkey.
110 | Monetize your social media following with Proxima Passkey influencer stock grants and fan token offerings tailored to content creators. 
111 | Finance your next creative passion project from indie films to video games with Proxima Passkey alternative funding sources.  
112 | Achieve investment diversification by allocating across Proxima Passkey’s range of actively managed fund strategies.    
113 | Engineer portfolio resilience across uncertainties leveraging Proxima Passkey’s scenario analysis capabilities.  
114 | Construct an automated trend-following portfolio using Proxima Passkey’s quantitative timing indicators for momentum-based trading.  
115 | Insulate against inflation with Proxima Passkey model portfolios oriented towards hard assets and pricing power.
116 | Analyze holdings across dimensions like profitability, valuation, quality, and growth using Proxima Passkey proprietary analytics dashboards.  
117 | Stress test portfolios across interest rate environments using Proxima Passkey’s quantitative risk management tools.  
118 | Engineer portfolio resilience across climate scenarios leveraging Proxima Passkey environmental analytics dashboards.   
119 | Neutralize currency risk for international investments with Proxima Passkey's multi-currency support.
120 | Get streaming portfolio analytics from Proxima Passkey’s collaborative data platform integrating multiple custodians.
121 | Fund your next startup or small business venture with Proxima Passkey competitive seed and working capital loans.  
122 | Construct an investment portfolio aligned with United Nations Sustainable Development goals facilitated by Proxima Passkey.
123 | Manage philanthropy and coordinate giving across causes and grantees with Proxima Passkey's donor-advised fund.  
124 | Invest in digital public infrastructure innovations like smart city technology enabled by Proxima Passkey partnerships.  
125 | Get ahead of demographic shifts with Proxima Passkey model portfolios tailored to emerging global middle class growth trends.  
126 | Hedge against rising interest rates using Proxima Passkey portfolios optimized for changing rate environments.
127 | Participate in flying taxi, hyperloop, and next-gen transport projects with investment syndicates organized by Proxima Passkey.   
128 | Get favorable rates on loans, insurance and other financial products through Proxima Passkey aggregated buying programs. 
129 | Customize healthcare investment exposure across pharma, payors, genomics, devices, and services with Proxima Passkey.


--------------------------------------------------------------------------------
/Chapter7/eval_rag.py:
--------------------------------------------------------------------------------
  1 | # !pip install ragas tqdm llama-index faiss-cpu llama-index-vector-stores-faiss
  2 | 
  3 | import os
  4 | import faiss
  5 | from datasets import Dataset
  6 | from llama_index.core import (
  7 |     SimpleDirectoryReader,
  8 |     StorageContext,
  9 |     VectorStoreIndex,
 10 |     Response,
 11 | )
 12 | from llama_index.vector_stores.faiss import FaissVectorStore
 13 | from ragas import evaluate
 14 | from ragas.metrics import (
 15 |     answer_relevancy,
 16 |     context_precision,
 17 |     context_recall,
 18 |     context_relevancy,
 19 |     faithfulness,
 20 | )
 21 | from ragas.metrics.critique import harmfulness
 22 | from tqdm.auto import tqdm
 23 | from typing import List, Dict, Any, Callable
 24 | 
 25 | # ensure our API key is set
 26 | assert os.getenv("OPENAI_API_KEY") is not None, "Please set OPENAI_API_KEY"
 27 | 
 28 | 
 29 | def load_index(dir_path: str = "products/", dim: int = 1536) -> VectorStoreIndex:
 30 |     """Load the index from the given directory path"""
 31 |     documents = SimpleDirectoryReader(dir_path).load_data()
 32 |     faiss_index = faiss.IndexFlatL2(dim)
 33 |     vector_store = FaissVectorStore(faiss_index=faiss_index)
 34 |     storage_context = StorageContext.from_defaults(vector_store=vector_store)
 35 |     index = VectorStoreIndex.from_documents(documents, storage_context=storage_context)
 36 |     return index
 37 | 
 38 | 
 39 | def main() -> None:
 40 |     # Load the index and create a query engine
 41 |     index: VectorStoreIndex = load_index()
 42 |     query_engine = index.as_query_engine()
 43 | 
 44 |     # Define the questions to ask the model
 45 |     questions: List[str] = [
 46 |         "What features does the Chic Summer Dress offer?",
 47 |         "How much does the Urban Streetwear Hoodie cost?",
 48 |         "What material is the Sleek Leather Jacket made of?",
 49 |         "What are the key characteristics of the Vintage High-Waisted Jeans?",
 50 |     ]
 51 | 
 52 |     # Query the model and get the responses
 53 |     response_objects: List[Response] = []
 54 |     for q in tqdm(questions):
 55 |         response: Response = query_engine.query(q)
 56 |         response_objects.append(response)
 57 | 
 58 |     # Extract the responses from the response objects
 59 |     engine_responses: List[str] = [r.response for r in response_objects]
 60 | 
 61 |     # Define the evaluation data
 62 |     eval_data: Dict[str, Any] = {
 63 |         "question": questions,
 64 |         "answer": engine_responses,
 65 |         "contexts": [
 66 |             [
 67 |                 "A lightweight summer dress with a vibrant floral print, perfect for sunny days."
 68 |             ],
 69 |             [
 70 |                 "An edgy hoodie featuring a bold graphic design, complete with a cozy kangaroo pocket."
 71 |             ],
 72 |             [
 73 |                 "A sleek leather jacket that offers a slim fit and stylish zippered pockets for the modern urban look."
 74 |             ],
 75 |             [
 76 |                 "High-waisted jeans with just the right amount of stretch and distressed details for a vintage vibe."
 77 |             ],
 78 |         ],
 79 |         "ground_truth": [
 80 |             "A Chic Summer Dress that features a lightweight fabric with a vibrant floral print and a knee-length cut, perfect for sunny days.",
 81 |             "The price of the Urban Streetwear Hoodie, which has an adjustable hood and a kangaroo pocket with a bold graphic design, is $79.99.",
 82 |             "A Sleek Leather Jacket made of genuine leather, featuring zippered pockets and a slim fit for the modern urban look.",
 83 |             "Vintage High-Waisted Jeans which are high-waisted with distressed details and made of stretch denim, embodying a vintage vibe.",
 84 |         ],
 85 |     }
 86 |     # Create a dataset from the evaluation data
 87 |     dataset: Dataset = Dataset.from_dict(eval_data)
 88 | 
 89 |     # Define the evaluation metrics
 90 |     metrics: List[Callable] = [
 91 |         faithfulness,
 92 |         answer_relevancy,
 93 |         context_precision,
 94 |         context_recall,
 95 |         context_relevancy,
 96 |         harmfulness,
 97 |     ]
 98 | 
 99 |     # Evaluate the model using the defined metrics
100 |     result: Dict[str, float] = evaluate(dataset, metrics=metrics)
101 |     print(result)
102 | 
103 | 
104 | if __name__ == "__main__":
105 |     main()
106 | 


--------------------------------------------------------------------------------
/Chapter7/products/products.csv:
--------------------------------------------------------------------------------
1 | ID,Name,Category,Features,Price,ImageURL,Description
2 | 1,Chic Summer Dress,Dresses,lightweight fabric; floral print; knee-length,59.99,http://example.com/images/dress1.jpg,A lightweight summer dress with a vibrant floral print, perfect for sunny days.
3 | 2,Urban Streetwear Hoodie,Hoodies,adjustable hood; kangaroo pocket; graphic design,79.99,http://example.com/images/hoodie1.jpg,An edgy hoodie featuring a bold graphic design, complete with a cozy kangaroo pocket.
4 | 3,Sleek Leather Jacket,Jackets,genuine leather; zippered pockets; slim fit,199.99,http://example.com/images/jacket1.jpg,A sleek leather jacket that offers a slim fit and stylish zippered pockets for the modern urban look.
5 | 4,Vintage High-Waisted Jeans,Jeans,high-waisted; distressed details; stretch denim,89.99,http://example.com/images/jeans1.jpg,High-waisted jeans with just the right amount of stretch and distressed details for a vintage vibe.
6 | 


--------------------------------------------------------------------------------
/Chapter7/rag.py:
--------------------------------------------------------------------------------
 1 | # !pip install llama-index faiss-cpu llama-index-vector-stores-faiss
 2 | 
 3 | import faiss
 4 | from llama_index.core import SimpleDirectoryReader, StorageContext, VectorStoreIndex
 5 | from llama_index.vector_stores.faiss import FaissVectorStore
 6 | 
 7 | # from IPython.display import Markdown, display
 8 | 
 9 | if __name__ == "__main__":
10 |     import os
11 | 
12 |     # Instructions:
13 |     # Run the script with the following command: python rag.py
14 |     # Ensure to have the products directory in the same directory as this script
15 |     # Ensure to have the OPENAI_API_KEY environment variable set
16 | 
17 |     assert os.getenv("OPENAI_API_KEY") is not None, "Please set OPENAI_API_KEY"
18 | 
19 |     # load document vectors
20 |     documents = SimpleDirectoryReader("products/").load_data()
21 | 
22 |     # load faiss index
23 |     d = 1536  # dimension of the vectors
24 |     faiss_index = faiss.IndexFlatL2(d)
25 | 
26 |     # create vector store
27 |     vector_store = FaissVectorStore(faiss_index=faiss_index)
28 |     # initialize storage context
29 |     storage_context = StorageContext.from_defaults(vector_store=vector_store)
30 |     # create index
31 |     index = VectorStoreIndex.from_documents(documents, storage_context=storage_context)
32 | 
33 |     # query the index
34 |     query_engine = index.as_query_engine()
35 |     response = query_engine.query("describe summer dress with price")
36 | 
37 |     print(response)
38 | 


--------------------------------------------------------------------------------
/Chapter8/constrained_rag.py:
--------------------------------------------------------------------------------
 1 | # !pip install llama-index faiss-cpu llama-index-vector-stores-faiss
 2 | 
 3 | import faiss
 4 | 
 5 | from llama_index.core import (
 6 |     SimpleDirectoryReader,
 7 |     VectorStoreIndex,
 8 |     StorageContext,
 9 | )
10 | 
11 | from llama_index.vector_stores.faiss import FaissVectorStore
12 | from llama_index.core import get_response_synthesizer
13 | from llama_index.core.retrievers import VectorIndexRetriever
14 | from llama_index.core.query_engine import RetrieverQueryEngine
15 | from llama_index.core.prompts.base import PromptTemplate
16 | from llama_index.core.prompts.prompt_type import PromptType
17 | 
18 | if __name__ == "__main__":
19 |     import os
20 | 
21 |     # Instructions:
22 |     # Run the script with the following command: python constrained_rag.py
23 |     # Ensure to have the products directory in the same directory as this script
24 |     # Ensure to have the OPENAI_API_KEY environment variable set
25 | 
26 |     assert os.getenv("OPENAI_API_KEY") is not None, "Please set OPENAI_API_KEY"
27 | 
28 |     # load document vectors
29 |     documents = SimpleDirectoryReader("products/").load_data()
30 | 
31 |     # load faiss index
32 |     d = 1536  # dimension of the vectors
33 |     faiss_index = faiss.IndexFlatL2(d)
34 | 
35 |     # create vector store
36 |     vector_store = FaissVectorStore(faiss_index=faiss_index)
37 |     # initialize storage context
38 |     storage_context = StorageContext.from_defaults(vector_store=vector_store)
39 |     # create index
40 |     index = VectorStoreIndex.from_documents(documents, storage_context=storage_context)
41 | 
42 |     # Configure retriever
43 |     retriever = VectorIndexRetriever(index=index, similarity_top_k=1)
44 | 
45 |     QA_PROMPT_TMPL = (
46 |         "Context information is below.\n"
47 |         "---------------------\n"
48 |         "{context_str}\n"
49 |         "---------------------\n"
50 |         "Given only the context information and no prior knowledge, "
51 |         "answer the query.\n"
52 |         "Query: {query_str}\n"
53 |         "Answer: "
54 |         "Otherwise, state: I cannot answer."
55 |     )
56 |     STRICT_QA_PROMPT = PromptTemplate(
57 |         QA_PROMPT_TMPL, prompt_type=PromptType.QUESTION_ANSWER
58 |     )
59 | 
60 |     # Configure response synthesizer
61 |     response_synthesizer = get_response_synthesizer(
62 |         structured_answer_filtering=True,
63 |         response_mode="refine",
64 |         text_qa_template=STRICT_QA_PROMPT,
65 |     )
66 | 
67 |     # Assemble query engine
68 |     safe_query_engine = RetrieverQueryEngine(
69 |         retriever=retriever, response_synthesizer=response_synthesizer
70 |     )
71 | 
72 |     # Execute query and evaluate response
73 |     print(safe_query_engine.query("describe a summer dress with price"))
74 |     print(safe_query_engine.query("describe a horse"))
75 | 


--------------------------------------------------------------------------------
/Chapter8/products/products.csv:
--------------------------------------------------------------------------------
1 | ID,Name,Category,Features,Price,ImageURL,Description
2 | 1,Chic Summer Dress,Dresses,lightweight fabric; floral print; knee-length,59.99,http://example.com/images/dress1.jpg,A lightweight summer dress with a vibrant floral print, perfect for sunny days.
3 | 2,Urban Streetwear Hoodie,Hoodies,adjustable hood; kangaroo pocket; graphic design,79.99,http://example.com/images/hoodie1.jpg,An edgy hoodie featuring a bold graphic design, complete with a cozy kangaroo pocket.
4 | 3,Sleek Leather Jacket,Jackets,genuine leather; zippered pockets; slim fit,199.99,http://example.com/images/jacket1.jpg,A sleek leather jacket that offers a slim fit and stylish zippered pockets for the modern urban look.
5 | 4,Vintage High-Waisted Jeans,Jeans,high-waisted; distressed details; stretch denim,89.99,http://example.com/images/jeans1.jpg,High-waisted jeans with just the right amount of stretch and distressed details for a vintage vibe.
6 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2024 Packt
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # Generative AI Foundations in Python
 2 | 
 3 | <a href="https://www.packtpub.com/en-us/product/generative-ai-foundations-in-python-9781835460825?type=print"><img src="https://m.media-amazon.com/images/I/810vgXqBPqL._SL1500_.jpg" alt="Generative AI Foundations in Python" height="256px" align="right"></a>
 4 | 
 5 | This is the code repository for [Generative AI Foundations in Python](https://www.packtpub.com/en-us/product/generative-ai-foundations-in-python-9781835460825?type=print), published by Packt.
 6 | 
 7 | **Discover key techniques and navigate modern challenges in LLMs**
 8 | 
 9 | ## What is this book about?
10 | 
11 | This guide equips you with the skills needed to implement generative AI in your applications. It covers the foundational elements of transformer-based LLMs and diffusion models by combining theoretical knowledge with practical application.
12 | 
13 | This book covers the following exciting features: 
14 | * Discover the fundamentals of GenAI and its foundations in NLP
15 | * Dissect foundational generative architectures including GANs, transformers, and diffusion models
16 | * Find out how to fine-tune LLMs for specific NLP tasks
17 | * Understand transfer learning and fine-tuning to facilitate domain adaptation, including fields such as finance
18 | * Explore prompt engineering, including in-context learning, templatization, and rationalization through chain-of-thought and RAG
19 | * Implement responsible practices with generative LLMs to minimize bias, toxicity, and other harmful outputs
20 | 
21 | If you feel this book is for you, get your [copy](https://www.amazon.com/Generative-Foundations-Python-techniques-challenges/dp/1835460828/ref=sr_1_1?sr=8-1) today!
22 | 
23 | 
24 | ## Instructions and Navigations
25 | All of the code is organized into folders.
26 | 
27 | The code will look like the following:
28 | ```python
29 | # Get the start and end positions
30 | answer_start_scores = outputs.start_logits
31 | answer_end_scores = outputs.end_logits
32 | ```
33 | 
34 | **Following is what you need for this book:**
35 | This book is for developers, data scientists, and machine learning engineers embarking on projects driven by generative AI. A general understanding of machine learning and deep learning, as well as some proficiency with Python, is expected.
36 | 
37 | With the following software and hardware list you can run all code files present in the book (Chapter 1-8).
38 | 
39 | ### Software and Hardware List
40 | 
41 | | Chapter  | Software required                                                                    | OS required                        |
42 | | -------- | -------------------------------------------------------------------------------------| -----------------------------------|
43 | |  	1-8	   | Python 3| GPU-enabled Windows, macOS, or Linux |
44 | 
45 | ### Related products <Other books you may enjoy>
46 | * Mastering NLP from Foundations to LLMs [[Packt]](https://www.packtpub.com/en-us/product/mastering-nlp-from-foundations-to-llms-9781804619186) [[Amazon]](https://www.amazon.com/Mastering-NLP-Foundations-LLMs-Techniques/dp/1804619183/ref=sr_1_1?sr=8-1)
47 |   
48 | * OpenAI API Cookbook  [[Packt]](https://www.packtpub.com/en-us/product/openai-api-cookbook-9781805121350) [[Amazon]](https://www.amazon.com/OpenAI-API-Cookbook-intelligent-applications/dp/1805121359/ref=tmm_pap_swatch_0?_encoding=UTF8&sr=8-1)
49 |   
50 | ## Get to Know the Author
51 | **Carlos Rodriguez** is the Director of AI Risk for a major financial institution, with an extensive career spanning over 20 years, focused on emerging technologies. Carlos led the development of a first-generation, finance-specific natural language platform and pioneered enterprise machine-learning workflows. He later transitioned to enterprise risk, guiding the AI risk discipline. Carlos holds degrees in data science and machine learning, and is a tireless proponent of responsible and ethical AI.
52 | 


--------------------------------------------------------------------------------
/utils/img_prepro.py:
--------------------------------------------------------------------------------
 1 | # pip install pillow
 2 | from PIL import Image
 3 | import os
 4 | 
 5 | 
 6 | def resize_images(folder_path: str, new_size: tuple = (800, 600)) -> None:
 7 |     for filename in os.listdir(folder_path):
 8 |         if filename.endswith((".png", ".jpg", ".jpeg")):
 9 |             img_path = os.path.join(folder_path, filename)
10 |             with Image.open(img_path) as img:
11 |                 resized_img = img.resize(new_size)
12 |                 # Save the resized image to the same location
13 |                 resized_img.save(img_path)
14 |                 print(f"Resized {filename}")
15 | 
16 | 
17 | if __name__ == "__main__":
18 |     # Replace 'path_to_images' with the path to the folder containing the images
19 |     path_to_images = os.getcwd() + "/chap-four/data/img/"
20 |     resize_images(path_to_images)
21 | 


--------------------------------------------------------------------------------