├── .gitignore ├── LICENSE ├── README.md ├── forecast.py ├── models ├── FCN.py └── GTM.py ├── requirements.txt ├── train.py └── utils └── data_multitrends.py /.gitignore: -------------------------------------------------------------------------------- 1 | wandb/ 2 | ckpt/ 3 | dataset/ 4 | results/ 5 | gtm_venv/ 6 | log/ 7 | models/__pycache__/ 8 | utils/__pycache__/ 9 | train_all_GTM.sh 10 | .vscode/ -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | The MIT License (MIT) 2 | 3 | Copyright (c) 2021 HumaticsLAB 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy of 6 | this software and associated documentation files (the "Software"), to deal in 7 | the Software without restriction, including without limitation the rights to 8 | use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of 9 | the Software, and to permit persons to whom the Software is furnished to do so, 10 | subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS 17 | FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR 18 | COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER 19 | IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN 20 | CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. 21 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # GTM-Transformer 2 | Official Pytorch Implementation of [**Well Googled is Half Done: Multimodal Forecasting of New Fashion Product Sales with Image-based Google Trends**](https://arxiv.org/abs/2109.09824) paper 3 | 4 | [![PWC](https://img.shields.io/endpoint.svg?url=https://paperswithcode.com/badge/well-googled-is-half-done-multimodal/new-product-sales-forecasting-on-visuelle)](https://paperswithcode.com/sota/new-product-sales-forecasting-on-visuelle?p=well-googled-is-half-done-multimodal) 5 | 6 | ## Installation 7 | 8 | We suggest the use of VirtualEnv. 9 | 10 | ```bash 11 | 12 | python3 -m venv gtm_venv 13 | source gtm_venv/bin/activate 14 | # gtm_venv\Scripts\activate.bat # If you're running on Windows 15 | 16 | pip install numpy pandas matplotlib opencv-python permetrics Pillow scikit-image scikit-learn scipy tqdm transformers fairseq wandb 17 | 18 | pip install torch torchvision 19 | 20 | # For CUDA11.1 (NVIDIA 3K Serie GPUs) 21 | # Check official pytorch installation guidelines for your system 22 | pip install torch==1.9.0+cu111 torchvision==0.10.0+cu111 -f https://download.pytorch.org/whl/torch_stable.html 23 | 24 | pip install pytorch-lightning 25 | 26 | export INSTALL_DIR=$PWD 27 | 28 | cd $INSTALL_DIR 29 | git clone https://github.com/HumaticsLAB/GTM-Transformer.git 30 | cd GTM-Transformer 31 | mkdir ckpt 32 | mkdir dataset 33 | mkdir results 34 | 35 | unset INSTALL_DIR 36 | ``` 37 | 38 | ## Dataset 39 | 40 | **VISUELLE** dataset is publicly available to download [here](https://forms.gle/cVGQAmxhHf7eRJ937). Please download and extract it inside the dataset folder. 41 | 42 | ## Training 43 | To train the model of GTM-Transformer please use the following scripts. Please check the arguments inside the script before launch. 44 | 45 | ```bash 46 | python train.py --data_folder dataset 47 | ``` 48 | 49 | 50 | ## Inference 51 | To evaluate the model of GTM-Transformer please use the following script .Please check the arguments inside the script before launch. 52 | 53 | ```bash 54 | python forecast.py --data_folder dataset --ckpt_path ckpt/model.pth 55 | ``` 56 | 57 | ## Citation 58 | ``` 59 | @misc{skenderi2021googled, 60 | title={Well Googled is Half Done: Multimodal Forecasting of New Fashion Product Sales with Image-based Google Trends}, 61 | author={Geri Skenderi and Christian Joppi and Matteo Denitto and Marco Cristani}, 62 | year={2021}, 63 | eprint={2109.09824}, 64 | } 65 | ``` 66 | -------------------------------------------------------------------------------- /forecast.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | import torch 3 | import pandas as pd 4 | import numpy as np 5 | import pytorch_lightning as pl 6 | from tqdm import tqdm 7 | from models.GTM import GTM 8 | from models.FCN import FCN 9 | from utils.data_multitrends import ZeroShotDataset 10 | from pathlib import Path 11 | from sklearn.metrics import mean_absolute_error 12 | from pathlib import Path 13 | 14 | 15 | def cal_error_metrics(gt, forecasts): 16 | # Absolute errors 17 | mae = mean_absolute_error(gt, forecasts) 18 | wape = 100 * np.sum(np.sum(np.abs(gt - forecasts), axis=-1)) / np.sum(gt) 19 | 20 | return round(mae, 3), round(wape, 3) 21 | 22 | 23 | def print_error_metrics(y_test, y_hat, rescaled_y_test, rescaled_y_hat): 24 | mae, wape = cal_error_metrics(y_test, y_hat) 25 | rescaled_mae, rescaled_wape = cal_error_metrics(rescaled_y_test, rescaled_y_hat) 26 | print(mae, wape, rescaled_mae, rescaled_wape) 27 | 28 | def run(args): 29 | print(args) 30 | 31 | # Set up CUDA 32 | device = torch.device(f'cuda:{args.gpu_num}' if torch.cuda.is_available() else 'cpu') 33 | 34 | # Seeds for reproducibility 35 | pl.seed_everything(args.seed) 36 | 37 | # Load sales data 38 | test_df = pd.read_csv(Path(args.data_folder + 'test.csv'), parse_dates=['release_date']) 39 | item_codes = test_df['external_code'].values 40 | 41 | # Load category and color encodings 42 | cat_dict = torch.load(Path(args.data_folder + 'category_labels.pt')) 43 | col_dict = torch.load(Path(args.data_folder + 'color_labels.pt')) 44 | fab_dict = torch.load(Path(args.data_folder + 'fabric_labels.pt')) 45 | 46 | # Load Google trends 47 | gtrends = pd.read_csv(Path(args.data_folder + 'gtrends.csv'), index_col=[0], parse_dates=True) 48 | 49 | test_loader = ZeroShotDataset(test_df, Path(args.data_folder + '/images'), gtrends, cat_dict, col_dict, \ 50 | fab_dict, args.trend_len).get_loader(batch_size=1, train=False) 51 | 52 | 53 | model_savename = f'{args.wandb_run}_{args.output_dim}' 54 | 55 | # Create model 56 | model = None 57 | if args.model_type == 'FCN': 58 | model = FCN( 59 | embedding_dim=args.embedding_dim, 60 | hidden_dim=args.hidden_dim, 61 | output_dim=args.output_dim, 62 | cat_dict=cat_dict, 63 | col_dict=col_dict, 64 | fab_dict=fab_dict, 65 | use_trends=args.use_trends, 66 | use_text=args.use_text, 67 | use_img=args.use_img, 68 | trend_len=args.trend_len, 69 | num_trends=args.num_trends, 70 | use_encoder_mask=args.use_encoder_mask, 71 | gpu_num=args.gpu_num 72 | ) 73 | else: 74 | model = GTM( 75 | embedding_dim=args.embedding_dim, 76 | hidden_dim=args.hidden_dim, 77 | output_dim=args.output_dim, 78 | num_heads=args.num_attn_heads, 79 | num_layers=args.num_hidden_layers, 80 | cat_dict=cat_dict, 81 | col_dict=col_dict, 82 | fab_dict=fab_dict, 83 | use_text=args.use_text, 84 | use_img=args.use_img, 85 | trend_len=args.trend_len, 86 | num_trends=args.num_trends, 87 | use_encoder_mask=args.use_encoder_mask, 88 | autoregressive=args.autoregressive, 89 | gpu_num=args.gpu_num 90 | ) 91 | 92 | model.load_state_dict(torch.load(args.ckpt_path)['state_dict'], strict=False) 93 | 94 | # Forecast the testing set 95 | model.to(device) 96 | model.eval() 97 | gt, forecasts, attns = [], [],[] 98 | for test_data in tqdm(test_loader, total=len(test_loader), ascii=True): 99 | with torch.no_grad(): 100 | test_data = [tensor.to(device) for tensor in test_data] 101 | item_sales, category, color, textures, temporal_features, gtrends, images = test_data 102 | y_pred, att = model(category, color,textures, temporal_features, gtrends, images) 103 | forecasts.append(y_pred.detach().cpu().numpy().flatten()[:args.output_dim]) 104 | gt.append(item_sales.detach().cpu().numpy().flatten()[:args.output_dim]) 105 | attns.append(att.detach().cpu().numpy()) 106 | 107 | attns = np.stack(attns) 108 | forecasts = np.array(forecasts) 109 | gt = np.array(gt) 110 | 111 | rescale_vals = np.load(args.data_folder + 'normalization_scale.npy') 112 | rescaled_forecasts = forecasts * rescale_vals 113 | rescaled_gt = gt * rescale_vals 114 | print_error_metrics(gt, forecasts, rescaled_gt, rescaled_forecasts) 115 | 116 | 117 | torch.save({'results': forecasts* rescale_vals, 'gts': gt* rescale_vals, 'codes': item_codes.tolist()}, Path('results/' + model_savename+'.pth')) 118 | 119 | 120 | if __name__ == '__main__': 121 | parser = argparse.ArgumentParser(description='Zero-shot sales forecasting') 122 | 123 | # General arguments 124 | parser.add_argument('--data_folder', type=str, default='dataset/') 125 | parser.add_argument('--ckpt_path', type=str, default='log/path-to-model.ckpt') 126 | parser.add_argument('--gpu_num', type=int, default=0) 127 | parser.add_argument('--seed', type=int, default=21) 128 | 129 | # Model specific arguments 130 | parser.add_argument('--model_type', type=str, default='GTM', help='Choose between GTM or FCN') 131 | parser.add_argument('--use_trends', type=int, default=1) 132 | parser.add_argument('--use_img', type=int, default=1) 133 | parser.add_argument('--use_text', type=int, default=1) 134 | parser.add_argument('--trend_len', type=int, default=52) 135 | parser.add_argument('--num_trends', type=int, default=3) 136 | parser.add_argument('--embedding_dim', type=int, default=32) 137 | parser.add_argument('--hidden_dim', type=int, default=64) 138 | parser.add_argument('--output_dim', type=int, default=12) 139 | parser.add_argument('--use_encoder_mask', type=int, default=1) 140 | parser.add_argument('--autoregressive', type=int, default=0) 141 | parser.add_argument('--num_attn_heads', type=int, default=4) 142 | parser.add_argument('--num_hidden_layers', type=int, default=1) 143 | 144 | # wandb arguments 145 | parser.add_argument('--wandb_run', type=str, default='Run1') 146 | 147 | args = parser.parse_args() 148 | run(args) 149 | -------------------------------------------------------------------------------- /models/FCN.py: -------------------------------------------------------------------------------- 1 | import math 2 | import torch 3 | import numpy as np 4 | import torch.nn as nn 5 | import torch.nn.functional as F 6 | import pytorch_lightning as pl 7 | from transformers import pipeline 8 | from torchvision import models 9 | from fairseq.optim.adafactor import Adafactor 10 | 11 | class PositionalEncoding(nn.Module): 12 | def __init__(self, d_model, dropout=0.1, max_len=52): 13 | super(PositionalEncoding, self).__init__() 14 | self.dropout = nn.Dropout(p=dropout) 15 | 16 | pe = torch.zeros(max_len, d_model) 17 | position = torch.arange(0, max_len, dtype=torch.float).unsqueeze(1) 18 | div_term = torch.exp(torch.arange(0, d_model, 2).float() * (-math.log(10000.0) / d_model)) 19 | pe[:, 0::2] = torch.sin(position * div_term) 20 | pe[:, 1::2] = torch.cos(position * div_term) 21 | pe = pe.unsqueeze(0).transpose(0, 1) 22 | self.register_buffer('pe', pe) 23 | 24 | def forward(self, x): 25 | x = x + self.pe[:x.size(0), :] 26 | return self.dropout(x) 27 | 28 | class TimeDistributed(nn.Module): 29 | # Takes any module and stacks the time dimension with the batch dimenison of inputs before applying the module 30 | # Insipired from https://keras.io/api/layers/recurrent_layers/time_distributed/ 31 | # https://discuss.pytorch.org/t/any-pytorch-function-can-work-as-keras-timedistributed/1346/4 32 | def __init__(self, module, batch_first=True): 33 | super(TimeDistributed, self).__init__() 34 | self.module = module # Can be any layer we wish to apply like Linear, Conv etc 35 | self.batch_first = batch_first 36 | 37 | def forward(self, x): 38 | if len(x.size()) <= 2: 39 | return self.module(x) 40 | 41 | # Squash samples and timesteps into a single axis 42 | x_reshape = x.contiguous().view(-1, x.size(-1)) 43 | 44 | y = self.module(x_reshape) 45 | 46 | # We have to reshape Y 47 | if self.batch_first: 48 | y = y.contiguous().view(x.size(0), -1, y.size(-1)) # (samples, timesteps, output_size) 49 | else: 50 | y = y.view(-1, x.size(1), y.size(-1)) # (timesteps, samples, output_size) 51 | 52 | return y 53 | 54 | class FusionNetwork(nn.Module): 55 | def __init__(self, embedding_dim, hidden_dim, use_img, use_text, dropout=0.2): 56 | super(FusionNetwork, self).__init__() 57 | 58 | self.img_pool = nn.AdaptiveAvgPool2d((1,1)) 59 | self.img_linear = nn.Linear(2048, embedding_dim) 60 | self.use_img = use_img 61 | self.use_text = use_text 62 | input_dim = embedding_dim + (embedding_dim*use_img) + (embedding_dim*use_text) 63 | self.feature_fusion = nn.Sequential( 64 | nn.BatchNorm1d(input_dim), 65 | nn.Linear(input_dim, input_dim, bias=False), 66 | nn.ReLU(), 67 | nn.Dropout(dropout), 68 | nn.Linear(input_dim, hidden_dim) 69 | ) 70 | 71 | def forward(self, img_encoding, text_encoding, dummy_encoding): 72 | # Fuse static features together 73 | pooled_img = self.img_pool(img_encoding) 74 | condensed_img = self.img_linear(pooled_img.flatten(1)) 75 | 76 | # Build input 77 | decoder_inputs = [] 78 | if self.use_img == 1: 79 | decoder_inputs.append(condensed_img) 80 | if self.use_text == 1: 81 | decoder_inputs.append(text_encoding) 82 | decoder_inputs.append(dummy_encoding) 83 | concat_features = torch.cat(decoder_inputs, dim=1) 84 | 85 | final = self.feature_fusion(concat_features) 86 | 87 | return final 88 | 89 | class GTrendEmbedder(nn.Module): 90 | def __init__(self, forecast_horizon, embedding_dim, use_mask, trend_len, num_trends, gpu_num): 91 | super().__init__() 92 | self.forecast_horizon = forecast_horizon 93 | self.input_linear = TimeDistributed(nn.Linear(num_trends, embedding_dim)) 94 | self.pos_embedding = PositionalEncoding(embedding_dim, max_len=trend_len) 95 | encoder_layer = nn.TransformerEncoderLayer(d_model=embedding_dim, nhead=4, dropout=0.2) 96 | self.encoder = nn.TransformerEncoder(encoder_layer, num_layers=2) 97 | self.use_mask = use_mask 98 | self.gpu_num = gpu_num 99 | 100 | def _generate_encoder_mask(self, size, forecast_horizon): 101 | mask = torch.zeros((size, size)) 102 | split = math.gcd(size, forecast_horizon) 103 | for i in range(0, size, split): 104 | mask[i:i+split, i:i+split] = 1 105 | mask = mask.float().masked_fill(mask == 0, float('-inf')).masked_fill(mask == 1, float(0.0)).to('cuda:'+str(self.gpu_num)) 106 | return mask 107 | 108 | def _generate_square_subsequent_mask(self, size): 109 | mask = (torch.triu(torch.ones(size, size)) == 1).transpose(0, 1) 110 | mask = mask.float().masked_fill(mask == 0, float('-inf')).masked_fill(mask == 1, float(0.0)).to('cuda:'+str(self.gpu_num)) 111 | return mask 112 | 113 | def forward(self, gtrends): 114 | gtrend_emb = self.input_linear(gtrends.permute(0,2,1)) 115 | gtrend_emb = self.pos_embedding(gtrend_emb.permute(1,0,2)) 116 | input_mask = self._generate_encoder_mask(gtrend_emb.shape[0], self.forecast_horizon) 117 | if self.use_mask == 1: 118 | gtrend_emb = self.encoder(gtrend_emb, input_mask) 119 | else: 120 | gtrend_emb = self.encoder(gtrend_emb) 121 | return gtrend_emb 122 | 123 | class TextEmbedder(nn.Module): 124 | def __init__(self, embedding_dim, cat_dict, col_dict, fab_dict, gpu_num): 125 | super().__init__() 126 | self.embedding_dim = embedding_dim 127 | self.cat_dict = {v: k for k, v in cat_dict.items()} 128 | self.col_dict = {v: k for k, v in col_dict.items()} 129 | self.fab_dict = {v: k for k, v in fab_dict.items()} 130 | self.word_embedder = pipeline('feature-extraction', model='bert-base-uncased') 131 | self.fc = nn.Linear(768, embedding_dim) 132 | self.dropout = nn.Dropout(0.1) 133 | self.gpu_num = gpu_num 134 | 135 | def forward(self, category, color, fabric): 136 | textual_description = [self.col_dict[color.detach().cpu().numpy().tolist()[i]] + ' ' \ 137 | + self.fab_dict[fabric.detach().cpu().numpy().tolist()[i]] + ' ' \ 138 | + self.cat_dict[category.detach().cpu().numpy().tolist()[i]] for i in range(len(category))] 139 | 140 | 141 | # Use BERT to extract features 142 | word_embeddings = self.word_embedder(textual_description) 143 | 144 | # BERT gives us embeddings for [CLS] .. [EOS], which is why we only average the embeddings in the range [1:-1] 145 | # We're not fine tuning BERT and we don't want the noise coming from [CLS] or [EOS] 146 | word_embeddings = [torch.FloatTensor(x[1:-1]).mean(axis=0) for x in word_embeddings] 147 | word_embeddings = torch.stack(word_embeddings).to('cuda:'+str(self.gpu_num)) 148 | 149 | # Embed to our embedding space 150 | word_embeddings = self.dropout(self.fc(word_embeddings)) 151 | 152 | return word_embeddings 153 | 154 | class ImageEmbedder(nn.Module): 155 | def __init__(self): 156 | super().__init__() 157 | # Img feature extraction 158 | resnet = models.resnet50(pretrained=True) 159 | modules = list(resnet.children())[:-2] 160 | self.resnet = nn.Sequential(*modules) 161 | for p in self.resnet.parameters(): 162 | p.requires_grad = False 163 | 164 | def forward(self, images): 165 | img_embeddings = self.resnet(images) 166 | size = img_embeddings.size() 167 | out = img_embeddings.view(*size[:2],-1) 168 | 169 | return out.view(*size).contiguous() # batch_size, 2048, image_size/32, image_size/32 170 | 171 | class DummyEmbedder(nn.Module): 172 | def __init__(self, embedding_dim): 173 | super().__init__() 174 | self.embedding_dim = embedding_dim 175 | self.day_embedding = nn.Linear(1, embedding_dim) 176 | self.week_embedding = nn.Linear(1, embedding_dim) 177 | self.month_embedding = nn.Linear(1, embedding_dim) 178 | self.year_embedding = nn.Linear(1, embedding_dim) 179 | self.dummy_fusion = nn.Linear(embedding_dim*4, embedding_dim) 180 | self.dropout = nn.Dropout(0.2) 181 | 182 | 183 | def forward(self, temporal_features): 184 | # Temporal dummy variables (day, week, month, year) 185 | d, w, m, y = temporal_features[:, 0].unsqueeze(1), temporal_features[:, 1].unsqueeze(1), \ 186 | temporal_features[:, 2].unsqueeze(1), temporal_features[:, 3].unsqueeze(1) 187 | d_emb, w_emb, m_emb, y_emb = self.day_embedding(d), self.week_embedding(w), self.month_embedding(m), self.year_embedding(y) 188 | temporal_embeddings = self.dummy_fusion(torch.cat([d_emb, w_emb, m_emb, y_emb], dim=1)) 189 | temporal_embeddings = self.dropout(temporal_embeddings) 190 | 191 | return temporal_embeddings 192 | 193 | class FCN(pl.LightningModule): 194 | def __init__(self, embedding_dim, hidden_dim, output_dim, cat_dict, col_dict, fab_dict, \ 195 | use_trends, use_text, use_img, trend_len, num_trends, use_encoder_mask=1, gpu_num=2): 196 | 197 | super().__init__() 198 | self.hidden_dim = hidden_dim 199 | self.embedding_dim = embedding_dim 200 | self.output_len = output_dim 201 | self.use_encoder_mask = use_encoder_mask 202 | self.gpu_num = gpu_num 203 | self.use_trends = use_trends 204 | self.save_hyperparameters() 205 | 206 | # Encoder 207 | self.dummy_encoder = DummyEmbedder(embedding_dim) 208 | self.image_encoder = ImageEmbedder() 209 | self.text_encoder = TextEmbedder(embedding_dim, cat_dict, col_dict, fab_dict, gpu_num) 210 | self.gtrend_encoder = GTrendEmbedder(output_dim, hidden_dim, use_encoder_mask, trend_len, num_trends, gpu_num) 211 | self.static_feature_encoder = FusionNetwork(embedding_dim, hidden_dim, use_img, use_text) 212 | 213 | # Decoder 214 | decoder_in = hidden_dim + (use_trends*(trend_len*hidden_dim)) 215 | self.decoder = nn.Sequential( 216 | nn.Linear(decoder_in, hidden_dim), 217 | nn.ReLU(), 218 | nn.Dropout(0.2), 219 | nn.Linear(hidden_dim, hidden_dim*4), 220 | nn.ReLU(), 221 | nn.Dropout(0.2), 222 | nn.Linear(hidden_dim*4, self.output_len) 223 | ) 224 | 225 | def forward(self, category, color, fabric, temporal_features, gtrends, images): 226 | # Encode features and get inputs 227 | img_encoding = self.image_encoder(images) 228 | dummy_encoding = self.dummy_encoder(temporal_features) 229 | text_encoding = self.text_encoder(category, color, fabric) 230 | gtrend_encoding = self.gtrend_encoder(gtrends) 231 | 232 | # Fuse static features together 233 | static_feature_fusion = self.static_feature_encoder(img_encoding, text_encoding, dummy_encoding) 234 | 235 | # Decode 236 | if self.use_trends == 1: 237 | tgt = torch.cat([static_feature_fusion, gtrend_encoding.reshape(static_feature_fusion.shape[0], -1)], dim=-1) 238 | else: 239 | tgt = static_feature_fusion 240 | 241 | forecast = self.decoder(tgt) 242 | 243 | return forecast.view(-1, self.output_len) 244 | 245 | def configure_optimizers(self): 246 | optimizer = Adafactor(self.parameters(), scale_parameter=True, relative_step=True, warmup_init=True, lr=None) 247 | 248 | return optimizer 249 | 250 | def training_step(self, train_batch, batch_idx): 251 | item_sales, category, color, fabric, temporal_features, gtrends, images = train_batch 252 | forecasted_sales = self.forward(category, color, fabric, temporal_features, gtrends, images) 253 | forecasting_loss = F.mse_loss(item_sales, forecasted_sales.squeeze()) 254 | loss = forecasting_loss 255 | self.log('train_loss', loss) 256 | 257 | return loss 258 | 259 | def validation_step(self, val_batch, batch_idx): 260 | item_sales, category, color, fabric, temporal_features, gtrends, images = val_batch 261 | forecasted_sales = self.forward(category, color, fabric, temporal_features, gtrends, images) 262 | 263 | return item_sales.squeeze(), forecasted_sales.squeeze() 264 | 265 | def validation_epoch_end(self, val_step_outputs): 266 | item_sales, forecasted_sales = [x[0] for x in val_step_outputs], [x[1] for x in val_step_outputs] 267 | item_sales, forecasted_sales = torch.stack(item_sales), torch.stack(forecasted_sales) 268 | rescaled_item_sales, rescaled_forecasted_sales = item_sales*1065, forecasted_sales*1065 # 1065 is the normalization factor (max of the sales of the training set) 269 | mae = F.l1_loss(rescaled_item_sales, rescaled_forecasted_sales) 270 | loss = F.mse_loss(item_sales, forecasted_sales.squeeze()) 271 | self.log('val_loss', loss) 272 | self.log('val_mae', mae) 273 | print('Validation MAE:', mae.detach().cpu().numpy(), 'LR:', self.optimizers().param_groups[0]['lr']) -------------------------------------------------------------------------------- /models/GTM.py: -------------------------------------------------------------------------------- 1 | import math 2 | import torch 3 | import torch.nn as nn 4 | import torch.nn.functional as F 5 | import pytorch_lightning as pl 6 | from transformers import pipeline 7 | from torchvision import models 8 | from fairseq.optim.adafactor import Adafactor 9 | 10 | 11 | class PositionalEncoding(nn.Module): 12 | def __init__(self, d_model, dropout=0.1, max_len=52): 13 | super(PositionalEncoding, self).__init__() 14 | self.dropout = nn.Dropout(p=dropout) 15 | 16 | pe = torch.zeros(max_len, d_model) 17 | position = torch.arange(0, max_len, dtype=torch.float).unsqueeze(1) 18 | div_term = torch.exp(torch.arange(0, d_model, 2).float() * (-math.log(10000.0) / d_model)) 19 | pe[:, 0::2] = torch.sin(position * div_term) 20 | pe[:, 1::2] = torch.cos(position * div_term) 21 | pe = pe.unsqueeze(0).transpose(0, 1) 22 | self.register_buffer('pe', pe) 23 | 24 | def forward(self, x): 25 | x = x + self.pe[:x.size(0), :] 26 | return self.dropout(x) 27 | 28 | class TimeDistributed(nn.Module): 29 | # Takes any module and stacks the time dimension with the batch dimenison of inputs before applying the module 30 | # Insipired from https://keras.io/api/layers/recurrent_layers/time_distributed/ 31 | # https://discuss.pytorch.org/t/any-pytorch-function-can-work-as-keras-timedistributed/1346/4 32 | def __init__(self, module, batch_first=True): 33 | super(TimeDistributed, self).__init__() 34 | self.module = module # Can be any layer we wish to apply like Linear, Conv etc 35 | self.batch_first = batch_first 36 | 37 | def forward(self, x): 38 | if len(x.size()) <= 2: 39 | return self.module(x) 40 | 41 | # Squash samples and timesteps into a single axis 42 | x_reshape = x.contiguous().view(-1, x.size(-1)) 43 | 44 | y = self.module(x_reshape) 45 | 46 | # We have to reshape Y 47 | if self.batch_first: 48 | y = y.contiguous().view(x.size(0), -1, y.size(-1)) # (samples, timesteps, output_size) 49 | else: 50 | y = y.view(-1, x.size(1), y.size(-1)) # (timesteps, samples, output_size) 51 | 52 | return y 53 | 54 | class FusionNetwork(nn.Module): 55 | def __init__(self, embedding_dim, hidden_dim, use_img, use_text, dropout=0.2): 56 | super(FusionNetwork, self).__init__() 57 | 58 | self.img_pool = nn.AdaptiveAvgPool2d((1,1)) 59 | self.img_linear = nn.Linear(2048, embedding_dim) 60 | self.use_img = use_img 61 | self.use_text = use_text 62 | input_dim = embedding_dim + (embedding_dim*use_img) + (embedding_dim*use_text) 63 | self.feature_fusion = nn.Sequential( 64 | nn.BatchNorm1d(input_dim), 65 | nn.Linear(input_dim, input_dim, bias=False), 66 | nn.ReLU(), 67 | nn.Dropout(dropout), 68 | nn.Linear(input_dim, hidden_dim) 69 | ) 70 | 71 | def forward(self, img_encoding, text_encoding, dummy_encoding): 72 | # Fuse static features together 73 | pooled_img = self.img_pool(img_encoding) 74 | condensed_img = self.img_linear(pooled_img.flatten(1)) 75 | 76 | # Build input 77 | decoder_inputs = [] 78 | if self.use_img == 1: 79 | decoder_inputs.append(condensed_img) 80 | if self.use_text == 1: 81 | decoder_inputs.append(text_encoding) 82 | decoder_inputs.append(dummy_encoding) 83 | concat_features = torch.cat(decoder_inputs, dim=1) 84 | 85 | final = self.feature_fusion(concat_features) 86 | # final = self.feature_fusion(dummy_encoding) 87 | 88 | return final 89 | 90 | class GTrendEmbedder(nn.Module): 91 | def __init__(self, forecast_horizon, embedding_dim, use_mask, trend_len, num_trends, gpu_num): 92 | super().__init__() 93 | self.forecast_horizon = forecast_horizon 94 | self.input_linear = TimeDistributed(nn.Linear(num_trends, embedding_dim)) 95 | self.pos_embedding = PositionalEncoding(embedding_dim, max_len=trend_len) 96 | encoder_layer = nn.TransformerEncoderLayer(d_model=embedding_dim, nhead=4, dropout=0.2) 97 | self.encoder = nn.TransformerEncoder(encoder_layer, num_layers=2) 98 | self.use_mask = use_mask 99 | self.gpu_num = gpu_num 100 | 101 | def _generate_encoder_mask(self, size, forecast_horizon): 102 | mask = torch.zeros((size, size)) 103 | split = math.gcd(size, forecast_horizon) 104 | for i in range(0, size, split): 105 | mask[i:i+split, i:i+split] = 1 106 | mask = mask.float().masked_fill(mask == 0, float('-inf')).masked_fill(mask == 1, float(0.0)).to('cuda:'+str(self.gpu_num)) 107 | return mask 108 | 109 | def _generate_square_subsequent_mask(self, size): 110 | mask = (torch.triu(torch.ones(size, size)) == 1).transpose(0, 1) 111 | mask = mask.float().masked_fill(mask == 0, float('-inf')).masked_fill(mask == 1, float(0.0)).to('cuda:'+str(self.gpu_num)) 112 | return mask 113 | 114 | def forward(self, gtrends): 115 | gtrend_emb = self.input_linear(gtrends.permute(0,2,1)) 116 | gtrend_emb = self.pos_embedding(gtrend_emb.permute(1,0,2)) 117 | input_mask = self._generate_encoder_mask(gtrend_emb.shape[0], self.forecast_horizon) 118 | if self.use_mask == 1: 119 | gtrend_emb = self.encoder(gtrend_emb, input_mask) 120 | else: 121 | gtrend_emb = self.encoder(gtrend_emb) 122 | return gtrend_emb 123 | 124 | class TextEmbedder(nn.Module): 125 | def __init__(self, embedding_dim, cat_dict, col_dict, fab_dict, gpu_num): 126 | super().__init__() 127 | self.embedding_dim = embedding_dim 128 | self.cat_dict = {v: k for k, v in cat_dict.items()} 129 | self.col_dict = {v: k for k, v in col_dict.items()} 130 | self.fab_dict = {v: k for k, v in fab_dict.items()} 131 | self.word_embedder = pipeline('feature-extraction', model='bert-base-uncased') 132 | self.fc = nn.Linear(768, embedding_dim) 133 | self.dropout = nn.Dropout(0.1) 134 | self.gpu_num = gpu_num 135 | 136 | def forward(self, category, color, fabric): 137 | textual_description = [self.col_dict[color.detach().cpu().numpy().tolist()[i]] + ' ' \ 138 | + self.fab_dict[fabric.detach().cpu().numpy().tolist()[i]] + ' ' \ 139 | + self.cat_dict[category.detach().cpu().numpy().tolist()[i]] for i in range(len(category))] 140 | 141 | 142 | # Use BERT to extract features 143 | word_embeddings = self.word_embedder(textual_description) 144 | 145 | # BERT gives us embeddings for [CLS] .. [EOS], which is why we only average the embeddings in the range [1:-1] 146 | # We're not fine tuning BERT and we don't want the noise coming from [CLS] or [EOS] 147 | word_embeddings = [torch.FloatTensor(x[0][1:-1]).mean(axis=0) for x in word_embeddings] 148 | word_embeddings = torch.stack(word_embeddings).to('cuda:'+str(self.gpu_num)) 149 | 150 | # Embed to our embedding space 151 | word_embeddings = self.dropout(self.fc(word_embeddings)) 152 | 153 | return word_embeddings 154 | 155 | class ImageEmbedder(nn.Module): 156 | def __init__(self): 157 | super().__init__() 158 | # Img feature extraction 159 | resnet = models.resnet50(pretrained=True) 160 | modules = list(resnet.children())[:-2] 161 | self.resnet = nn.Sequential(*modules) 162 | for p in self.resnet.parameters(): 163 | p.requires_grad = False 164 | 165 | # Fine tune resnet 166 | # for c in list(self.resnet.children())[6:]: 167 | # for p in c.parameters(): 168 | # p.requires_grad = True 169 | 170 | def forward(self, images): 171 | img_embeddings = self.resnet(images) 172 | size = img_embeddings.size() 173 | out = img_embeddings.view(*size[:2],-1) 174 | 175 | return out.view(*size).contiguous() # batch_size, 2048, image_size/32, image_size/32 176 | 177 | class DummyEmbedder(nn.Module): 178 | def __init__(self, embedding_dim): 179 | super().__init__() 180 | self.embedding_dim = embedding_dim 181 | self.day_embedding = nn.Linear(1, embedding_dim) 182 | self.week_embedding = nn.Linear(1, embedding_dim) 183 | self.month_embedding = nn.Linear(1, embedding_dim) 184 | self.year_embedding = nn.Linear(1, embedding_dim) 185 | self.dummy_fusion = nn.Linear(embedding_dim*4, embedding_dim) 186 | self.dropout = nn.Dropout(0.2) 187 | 188 | 189 | def forward(self, temporal_features): 190 | # Temporal dummy variables (day, week, month, year) 191 | d, w, m, y = temporal_features[:, 0].unsqueeze(1), temporal_features[:, 1].unsqueeze(1), \ 192 | temporal_features[:, 2].unsqueeze(1), temporal_features[:, 3].unsqueeze(1) 193 | d_emb, w_emb, m_emb, y_emb = self.day_embedding(d), self.week_embedding(w), self.month_embedding(m), self.year_embedding(y) 194 | temporal_embeddings = self.dummy_fusion(torch.cat([d_emb, w_emb, m_emb, y_emb], dim=1)) 195 | temporal_embeddings = self.dropout(temporal_embeddings) 196 | 197 | return temporal_embeddings 198 | 199 | class TransformerDecoderLayer(nn.Module): 200 | 201 | def __init__(self, d_model, nhead, dim_feedforward=2048, dropout=0.1, activation="relu"): 202 | super(TransformerDecoderLayer, self).__init__() 203 | 204 | self.multihead_attn = nn.MultiheadAttention(d_model, nhead, dropout=dropout) 205 | 206 | # Implementation of Feedforward model 207 | self.linear1 = nn.Linear(d_model, dim_feedforward) 208 | self.dropout = nn.Dropout(dropout) 209 | self.linear2 = nn.Linear(dim_feedforward, d_model) 210 | 211 | self.norm2 = nn.LayerNorm(d_model) 212 | self.norm3 = nn.LayerNorm(d_model) 213 | self.dropout2 = nn.Dropout(dropout) 214 | self.dropout3 = nn.Dropout(dropout) 215 | 216 | self.activation = F.relu 217 | 218 | def __setstate__(self, state): 219 | if 'activation' not in state: 220 | state['activation'] = F.relu 221 | super(TransformerDecoderLayer, self).__setstate__(state) 222 | 223 | def forward(self, tgt, memory, tgt_mask = None, memory_mask = None, tgt_key_padding_mask = None, 224 | memory_key_padding_mask = None): 225 | 226 | tgt2, attn_weights = self.multihead_attn(tgt, memory, memory) 227 | tgt = tgt + self.dropout2(tgt2) 228 | tgt = self.norm2(tgt) 229 | tgt2 = self.linear2(self.dropout(self.activation(self.linear1(tgt)))) 230 | tgt = tgt + self.dropout3(tgt2) 231 | tgt = self.norm3(tgt) 232 | return tgt, attn_weights 233 | 234 | class GTM(pl.LightningModule): 235 | def __init__(self, embedding_dim, hidden_dim, output_dim, num_heads, num_layers, use_text, use_img, \ 236 | cat_dict, col_dict, fab_dict, trend_len, num_trends, gpu_num, use_encoder_mask=1, autoregressive=False): 237 | super().__init__() 238 | self.hidden_dim = hidden_dim 239 | self.embedding_dim = embedding_dim 240 | self.output_len = output_dim 241 | self.use_encoder_mask = use_encoder_mask 242 | self.autoregressive = autoregressive 243 | self.gpu_num = gpu_num 244 | self.save_hyperparameters() 245 | 246 | # Encoder 247 | self.dummy_encoder = DummyEmbedder(embedding_dim) 248 | self.image_encoder = ImageEmbedder() 249 | self.text_encoder = TextEmbedder(embedding_dim, cat_dict, col_dict, fab_dict, gpu_num) 250 | self.gtrend_encoder = GTrendEmbedder(output_dim, hidden_dim, use_encoder_mask, trend_len, num_trends, gpu_num) 251 | self.static_feature_encoder = FusionNetwork(embedding_dim, hidden_dim, use_img, use_text) 252 | 253 | # Decoder 254 | self.decoder_linear = TimeDistributed(nn.Linear(1, hidden_dim)) 255 | decoder_layer = TransformerDecoderLayer(d_model=self.hidden_dim, nhead=num_heads, \ 256 | dim_feedforward=self.hidden_dim * 4, dropout=0.1) 257 | 258 | if self.autoregressive: self.pos_encoder = PositionalEncoding(hidden_dim, max_len=12) 259 | self.decoder = nn.TransformerDecoder(decoder_layer, num_layers) 260 | 261 | self.decoder_fc = nn.Sequential( 262 | nn.Linear(hidden_dim, self.output_len if not self.autoregressive else 1), 263 | nn.Dropout(0.2) 264 | ) 265 | def _generate_square_subsequent_mask(self, size): 266 | mask = (torch.triu(torch.ones(size, size)) == 1).transpose(0, 1) 267 | mask = mask.float().masked_fill(mask == 0, float('-inf')).masked_fill(mask == 1, float(0.0)).to('cuda:'+str(self.gpu_num)) 268 | return mask 269 | 270 | def forward(self, category, color, fabric, temporal_features, gtrends, images): 271 | # Encode features and get inputs 272 | img_encoding = self.image_encoder(images) 273 | dummy_encoding = self.dummy_encoder(temporal_features) 274 | text_encoding = self.text_encoder(category, color, fabric) 275 | gtrend_encoding = self.gtrend_encoder(gtrends) 276 | 277 | # Fuse static features together 278 | static_feature_fusion = self.static_feature_encoder(img_encoding, text_encoding, dummy_encoding) 279 | 280 | if self.autoregressive == 1: 281 | # Decode 282 | tgt = torch.zeros(self.output_len, gtrend_encoding.shape[1], gtrend_encoding.shape[-1]).to('cuda:'+str(self.gpu_num)) 283 | tgt[0] = static_feature_fusion 284 | tgt = self.pos_encoder(tgt) 285 | tgt_mask = self._generate_square_subsequent_mask(self.output_len) 286 | memory = gtrend_encoding 287 | decoder_out, attn_weights = self.decoder(tgt, memory, tgt_mask) 288 | forecast = self.decoder_fc(decoder_out) 289 | else: 290 | # Decode (generatively/non-autoregressively) 291 | tgt = static_feature_fusion.unsqueeze(0) 292 | memory = gtrend_encoding 293 | decoder_out, attn_weights = self.decoder(tgt, memory) 294 | forecast = self.decoder_fc(decoder_out) 295 | 296 | return forecast.view(-1, self.output_len), attn_weights 297 | 298 | def configure_optimizers(self): 299 | optimizer = Adafactor(self.parameters(),scale_parameter=True, relative_step=True, warmup_init=True, lr=None) 300 | 301 | return [optimizer] 302 | 303 | 304 | def training_step(self, train_batch, batch_idx): 305 | item_sales, category, color, fabric, temporal_features, gtrends, images = train_batch 306 | forecasted_sales, _ = self.forward(category, color, fabric, temporal_features, gtrends, images) 307 | loss = F.mse_loss(item_sales, forecasted_sales.squeeze()) 308 | self.log('train_loss', loss) 309 | 310 | return loss 311 | 312 | def validation_step(self, test_batch, batch_idx): 313 | item_sales, category, color, fabric, temporal_features, gtrends, images = test_batch 314 | forecasted_sales, _ = self.forward(category, color, fabric, temporal_features, gtrends, images) 315 | 316 | return item_sales.squeeze(), forecasted_sales.squeeze() 317 | 318 | def validation_epoch_end(self, val_step_outputs): 319 | item_sales, forecasted_sales = [x[0] for x in val_step_outputs], [x[1] for x in val_step_outputs] 320 | item_sales, forecasted_sales = torch.stack(item_sales), torch.stack(forecasted_sales) 321 | rescaled_item_sales, rescaled_forecasted_sales = item_sales*1065, forecasted_sales*1065 # 1065 is the normalization factor (max of the sales of the training set) 322 | loss = F.mse_loss(item_sales, forecasted_sales.squeeze()) 323 | mae = F.l1_loss(rescaled_item_sales, rescaled_forecasted_sales) 324 | self.log('val_mae', mae) 325 | self.log('val_loss', loss) 326 | 327 | print('Validation MAE:', mae.detach().cpu().numpy(), 'LR:', self.optimizers().param_groups[0]['lr']) 328 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | # Python 3.8.12 2 | numpy==1.22.2 3 | pandas==1.2.5 4 | matplotlib==3.5.1 5 | torch==1.8.2 6 | torchvision==0.9.2 7 | pytorch-lightning==1.5.0 8 | Pillow==8.4.0 9 | scikit-learn==0.24.2 10 | scipy==1.7.1 11 | transformers==4.11.3 12 | tqdm==4.62.3 13 | fairseq==0.10.2 -------------------------------------------------------------------------------- /train.py: -------------------------------------------------------------------------------- 1 | import os 2 | import argparse 3 | import wandb 4 | import torch 5 | import pandas as pd 6 | import pytorch_lightning as pl 7 | from pytorch_lightning import loggers as pl_loggers 8 | from pathlib import Path 9 | from datetime import datetime 10 | from models.GTM import GTM 11 | from models.FCN import FCN 12 | from utils.data_multitrends import ZeroShotDataset 13 | 14 | os.environ["TOKENIZERS_PARALLELISM"] = "false" 15 | 16 | 17 | def run(args): 18 | print(args) 19 | # Seeds for reproducibility (By default we use the number 21) 20 | pl.seed_everything(args.seed) 21 | 22 | # Load sales data 23 | train_df = pd.read_csv(Path(args.data_folder + 'train.csv'), parse_dates=['release_date']) 24 | test_df = pd.read_csv(Path(args.data_folder + 'test.csv'), parse_dates=['release_date']) 25 | 26 | # Load category and color encodings 27 | cat_dict = torch.load(Path(args.data_folder + 'category_labels.pt')) 28 | col_dict = torch.load(Path(args.data_folder + 'color_labels.pt')) 29 | fab_dict = torch.load(Path(args.data_folder + 'fabric_labels.pt')) 30 | 31 | # Load Google trends 32 | gtrends = pd.read_csv(Path(args.data_folder + 'gtrends.csv'), index_col=[0], parse_dates=True) 33 | 34 | train_loader = ZeroShotDataset(train_df, Path(args.data_folder + '/images'), gtrends, cat_dict, col_dict, 35 | fab_dict, args.trend_len).get_loader(batch_size=args.batch_size, train=True) 36 | test_loader = ZeroShotDataset(test_df, Path(args.data_folder + '/images'), gtrends, cat_dict, col_dict, 37 | fab_dict, args.trend_len).get_loader(batch_size=1, train=False) 38 | 39 | # Create model 40 | if args.model_type == 'FCN': 41 | model = FCN( 42 | embedding_dim=args.embedding_dim, 43 | hidden_dim=args.hidden_dim, 44 | output_dim=args.output_dim, 45 | cat_dict=cat_dict, 46 | col_dict=col_dict, 47 | fab_dict=fab_dict, 48 | use_trends=args.use_trends, 49 | use_text=args.use_text, 50 | use_img=args.use_img, 51 | trend_len=args.trend_len, 52 | num_trends=args.num_trends, 53 | use_encoder_mask=args.use_encoder_mask, 54 | gpu_num=args.gpu_num 55 | ) 56 | else: 57 | model = GTM( 58 | embedding_dim=args.embedding_dim, 59 | hidden_dim=args.hidden_dim, 60 | output_dim=args.output_dim, 61 | num_heads=args.num_attn_heads, 62 | num_layers=args.num_hidden_layers, 63 | cat_dict=cat_dict, 64 | col_dict=col_dict, 65 | fab_dict=fab_dict, 66 | use_text=args.use_text, 67 | use_img=args.use_img, 68 | trend_len=args.trend_len, 69 | num_trends=args.num_trends, 70 | use_encoder_mask=args.use_encoder_mask, 71 | autoregressive=args.autoregressive, 72 | gpu_num=args.gpu_num 73 | ) 74 | 75 | # Model Training 76 | # Define model saving procedure 77 | dt_string = datetime.now().strftime("%d-%m-%Y-%H-%M-%S") 78 | 79 | model_savename = args.model_type + '_' + args.wandb_run 80 | 81 | checkpoint_callback = pl.callbacks.ModelCheckpoint( 82 | dirpath=args.log_dir + '/'+args.model_type, 83 | filename=model_savename+'---{epoch}---'+dt_string, 84 | monitor='val_mae', 85 | mode='min', 86 | save_top_k=1 87 | ) 88 | 89 | wandb.init(entity=args.wandb_entity, project=args.wandb_proj, name=args.wandb_run) 90 | wandb_logger = pl_loggers.WandbLogger() 91 | wandb_logger.watch(model) 92 | 93 | # If you wish to use Tensorboard you can change the logger to: 94 | # tb_logger = pl_loggers.TensorBoardLogger(args.log_dir+'/', name=model_savename) 95 | trainer = pl.Trainer(gpus=[args.gpu_num], max_epochs=args.epochs, check_val_every_n_epoch=5, 96 | logger=wandb_logger, callbacks=[checkpoint_callback]) 97 | 98 | # Fit model 99 | trainer.fit(model, train_dataloaders=train_loader, 100 | val_dataloaders=test_loader) 101 | 102 | # Print out path of best model 103 | print(checkpoint_callback.best_model_path) 104 | 105 | 106 | if __name__ == '__main__': 107 | parser = argparse.ArgumentParser(description='Zero-shot sales forecasting') 108 | 109 | # General arguments 110 | parser.add_argument('--data_folder', type=str, default='dataset/') 111 | parser.add_argument('--log_dir', type=str, default='log') 112 | parser.add_argument('--seed', type=int, default=21) 113 | parser.add_argument('--epochs', type=int, default=200) 114 | parser.add_argument('--gpu_num', type=int, default=0) 115 | 116 | # Model specific arguments 117 | parser.add_argument('--model_type', type=str, default='GTM', help='Choose between GTM or FCN') 118 | parser.add_argument('--use_trends', type=int, default=1) 119 | parser.add_argument('--use_img', type=int, default=1) 120 | parser.add_argument('--use_text', type=int, default=1) 121 | parser.add_argument('--trend_len', type=int, default=52) 122 | parser.add_argument('--num_trends', type=int, default=3) 123 | parser.add_argument('--batch_size', type=int, default=128) 124 | parser.add_argument('--embedding_dim', type=int, default=32) 125 | parser.add_argument('--hidden_dim', type=int, default=64) 126 | parser.add_argument('--output_dim', type=int, default=12) 127 | parser.add_argument('--use_encoder_mask', type=int, default=1) 128 | parser.add_argument('--autoregressive', type=int, default=0) 129 | parser.add_argument('--num_attn_heads', type=int, default=4) 130 | parser.add_argument('--num_hidden_layers', type=int, default=1) 131 | 132 | # wandb arguments 133 | parser.add_argument('--wandb_entity', type=str, default='username-here') 134 | parser.add_argument('--wandb_proj', type=str, default='GTM') 135 | parser.add_argument('--wandb_run', type=str, default='Run1') 136 | 137 | args = parser.parse_args() 138 | run(args) 139 | -------------------------------------------------------------------------------- /utils/data_multitrends.py: -------------------------------------------------------------------------------- 1 | import os 2 | import torch 3 | import pandas as pd 4 | import numpy as np 5 | from tqdm import tqdm 6 | from PIL import Image, ImageFile 7 | from torch.utils.data import DataLoader, TensorDataset 8 | from torchvision.transforms import Resize, ToTensor, Normalize, Compose 9 | from sklearn.preprocessing import MinMaxScaler 10 | ImageFile.LOAD_TRUNCATED_IMAGES = True 11 | 12 | 13 | class ZeroShotDataset(): 14 | def __init__(self, data_df, img_root, gtrends, cat_dict, col_dict, fab_dict, trend_len): 15 | self.data_df = data_df 16 | self.gtrends = gtrends 17 | self.cat_dict = cat_dict 18 | self.col_dict = col_dict 19 | self.fab_dict = fab_dict 20 | self.trend_len = trend_len 21 | self.img_root = img_root 22 | 23 | def __len__(self): 24 | return len(self.data_df) 25 | 26 | def __getitem__(self, idx): 27 | return self.data_df.iloc[idx, :] 28 | 29 | def preprocess_data(self): 30 | data = self.data_df 31 | 32 | # Get the Gtrends time series associated with each product 33 | # Read the images (extracted image features) as well 34 | gtrends, image_features = [], [] 35 | img_transforms = Compose([Resize((256, 256)), ToTensor(), Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])]) 36 | for (idx, row) in tqdm(data.iterrows(), total=len(data), ascii=True): 37 | cat, col, fab, fiq_attr, start_date, img_path = row['category'], row['color'], row['fabric'], row['extra'], \ 38 | row['release_date'], row['image_path'] 39 | 40 | # Get the gtrend signal up to the previous year (52 weeks) of the release date 41 | gtrend_start = start_date - pd.DateOffset(weeks=52) 42 | cat_gtrend = self.gtrends.loc[gtrend_start:start_date][cat][-52:].values[:self.trend_len] 43 | col_gtrend = self.gtrends.loc[gtrend_start:start_date][col][-52:].values[:self.trend_len] 44 | fab_gtrend = self.gtrends.loc[gtrend_start:start_date][fab][-52:].values[:self.trend_len] 45 | 46 | cat_gtrend = MinMaxScaler().fit_transform(cat_gtrend.reshape(-1,1)).flatten() 47 | col_gtrend = MinMaxScaler().fit_transform(col_gtrend.reshape(-1,1)).flatten() 48 | fab_gtrend = MinMaxScaler().fit_transform(fab_gtrend.reshape(-1,1)).flatten() 49 | multitrends = np.vstack([cat_gtrend, col_gtrend, fab_gtrend]) 50 | 51 | 52 | # Read images 53 | img = Image.open(os.path.join(self.img_root, img_path)).convert('RGB') 54 | 55 | # Append them to the lists 56 | gtrends.append(multitrends) 57 | image_features.append(img_transforms(img)) 58 | 59 | # Convert to numpy arrays 60 | gtrends = np.array(gtrends) 61 | 62 | # Remove non-numerical information 63 | data.drop(['external_code', 'season', 'release_date', 'image_path'], axis=1, inplace=True) 64 | 65 | # Create tensors for each part of the input/output 66 | item_sales, temporal_features = torch.FloatTensor(data.iloc[:, :12].values), torch.FloatTensor( 67 | data.iloc[:, 13:17].values) 68 | categories, colors, fabrics = [self.cat_dict[val] for val in data.iloc[:].category.values], \ 69 | [self.col_dict[val] for val in data.iloc[:].color.values], \ 70 | [self.fab_dict[val] for val in data.iloc[:].fabric.values] 71 | 72 | 73 | categories, colors, fabrics = torch.LongTensor(categories), torch.LongTensor(colors), torch.LongTensor(fabrics) 74 | gtrends = torch.FloatTensor(gtrends) 75 | images = torch.stack(image_features) 76 | 77 | return TensorDataset(item_sales, categories, colors, fabrics, temporal_features, gtrends, images) 78 | 79 | def get_loader(self, batch_size, train=True): 80 | print('Starting dataset creation process...') 81 | data_with_gtrends = self.preprocess_data() 82 | data_loader = None 83 | if train: 84 | data_loader = DataLoader(data_with_gtrends, batch_size=batch_size, shuffle=True, num_workers=4) 85 | else: 86 | data_loader = DataLoader(data_with_gtrends, batch_size=1, shuffle=False, num_workers=4) 87 | print('Done.') 88 | 89 | return data_loader 90 | 91 | --------------------------------------------------------------------------------