├── .gitignore
├── LICENSE
├── README.md
├── forecast.py
├── models
    ├── FCN.py
    └── GTM.py
├── requirements.txt
├── train.py
└── utils
    └── data_multitrends.py


/.gitignore:
--------------------------------------------------------------------------------
 1 | wandb/
 2 | ckpt/
 3 | dataset/
 4 | results/
 5 | gtm_venv/
 6 | log/
 7 | models/__pycache__/
 8 | utils/__pycache__/
 9 | train_all_GTM.sh
10 | .vscode/


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | The MIT License (MIT)
 2 | 
 3 | Copyright (c) 2021 HumaticsLAB
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy of
 6 | this software and associated documentation files (the "Software"), to deal in
 7 | the Software without restriction, including without limitation the rights to
 8 | use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of
 9 | the Software, and to permit persons to whom the Software is furnished to do so,
10 | subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
17 | FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR
18 | COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER
19 | IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
20 | CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
21 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # GTM-Transformer
 2 | Official Pytorch Implementation of [**Well Googled is Half Done: Multimodal Forecasting of New Fashion Product Sales with Image-based Google Trends**](https://arxiv.org/abs/2109.09824) paper
 3 | 
 4 | [![PWC](https://img.shields.io/endpoint.svg?url=https://paperswithcode.com/badge/well-googled-is-half-done-multimodal/new-product-sales-forecasting-on-visuelle)](https://paperswithcode.com/sota/new-product-sales-forecasting-on-visuelle?p=well-googled-is-half-done-multimodal)
 5 | 
 6 | ## Installation
 7 | 
 8 | We suggest the use of VirtualEnv.
 9 | 
10 | ```bash
11 | 
12 | python3 -m venv gtm_venv
13 | source gtm_venv/bin/activate
14 | # gtm_venv\Scripts\activate.bat # If you're running on Windows
15 | 
16 | pip install numpy pandas matplotlib opencv-python permetrics Pillow scikit-image scikit-learn scipy tqdm transformers fairseq wandb
17 | 
18 | pip install torch torchvision
19 | 
20 | # For CUDA11.1 (NVIDIA 3K Serie GPUs)
21 | # Check official pytorch installation guidelines for your system
22 | pip install torch==1.9.0+cu111 torchvision==0.10.0+cu111 -f https://download.pytorch.org/whl/torch_stable.html
23 | 
24 | pip install pytorch-lightning
25 | 
26 | export INSTALL_DIR=$PWD
27 | 
28 | cd $INSTALL_DIR
29 | git clone https://github.com/HumaticsLAB/GTM-Transformer.git
30 | cd GTM-Transformer
31 | mkdir ckpt
32 | mkdir dataset
33 | mkdir results
34 | 
35 | unset INSTALL_DIR
36 | ```
37 | 
38 | ## Dataset
39 | 
40 | **VISUELLE** dataset is publicly available to download [here](https://forms.gle/cVGQAmxhHf7eRJ937). Please download and extract it inside the dataset folder.
41 | 
42 | ## Training
43 | To train the model of GTM-Transformer please use the following scripts. Please check the arguments inside the script before launch.
44 | 
45 | ```bash
46 | python train.py --data_folder dataset
47 | ```
48 | 
49 | 
50 | ## Inference
51 | To evaluate the model of GTM-Transformer please use the following script .Please check the arguments inside the script before launch.
52 | 
53 | ```bash
54 | python forecast.py --data_folder dataset --ckpt_path ckpt/model.pth
55 | ```
56 | 
57 | ## Citation
58 | ```
59 | @misc{skenderi2021googled,
60 |       title={Well Googled is Half Done: Multimodal Forecasting of New Fashion Product Sales with Image-based Google Trends}, 
61 |       author={Geri Skenderi and Christian Joppi and Matteo Denitto and Marco Cristani},
62 |       year={2021},
63 |       eprint={2109.09824},
64 | }
65 | ```
66 | 


--------------------------------------------------------------------------------
/forecast.py:
--------------------------------------------------------------------------------
  1 | import argparse
  2 | import torch
  3 | import pandas as pd
  4 | import numpy as np
  5 | import pytorch_lightning as pl
  6 | from tqdm import tqdm
  7 | from models.GTM import GTM
  8 | from models.FCN import FCN
  9 | from utils.data_multitrends import ZeroShotDataset
 10 | from pathlib import Path
 11 | from sklearn.metrics import mean_absolute_error
 12 | from pathlib import Path
 13 | 
 14 | 
 15 | def cal_error_metrics(gt, forecasts):
 16 |     # Absolute errors
 17 |     mae = mean_absolute_error(gt, forecasts)
 18 |     wape = 100 * np.sum(np.sum(np.abs(gt - forecasts), axis=-1)) / np.sum(gt)
 19 | 
 20 |     return round(mae, 3), round(wape, 3)
 21 |     
 22 | 
 23 | def print_error_metrics(y_test, y_hat, rescaled_y_test, rescaled_y_hat):
 24 |     mae, wape = cal_error_metrics(y_test, y_hat)
 25 |     rescaled_mae, rescaled_wape = cal_error_metrics(rescaled_y_test, rescaled_y_hat)
 26 |     print(mae, wape, rescaled_mae, rescaled_wape)
 27 | 
 28 | def run(args):
 29 |     print(args)
 30 |     
 31 |     # Set up CUDA
 32 |     device = torch.device(f'cuda:{args.gpu_num}' if torch.cuda.is_available() else 'cpu')
 33 | 
 34 |     # Seeds for reproducibility
 35 |     pl.seed_everything(args.seed)
 36 | 
 37 |     # Load sales data    
 38 |     test_df = pd.read_csv(Path(args.data_folder + 'test.csv'), parse_dates=['release_date'])
 39 |     item_codes = test_df['external_code'].values
 40 | 
 41 |      # Load category and color encodings
 42 |     cat_dict = torch.load(Path(args.data_folder + 'category_labels.pt'))
 43 |     col_dict = torch.load(Path(args.data_folder + 'color_labels.pt'))
 44 |     fab_dict = torch.load(Path(args.data_folder + 'fabric_labels.pt'))
 45 | 
 46 |     # Load Google trends
 47 |     gtrends = pd.read_csv(Path(args.data_folder + 'gtrends.csv'), index_col=[0], parse_dates=True)
 48 |     
 49 |     test_loader = ZeroShotDataset(test_df, Path(args.data_folder + '/images'), gtrends, cat_dict, col_dict, \
 50 |             fab_dict, args.trend_len).get_loader(batch_size=1, train=False)
 51 | 
 52 | 
 53 |     model_savename = f'{args.wandb_run}_{args.output_dim}'
 54 |     
 55 |     # Create model
 56 |     model = None
 57 |     if args.model_type == 'FCN':
 58 |         model = FCN(
 59 |             embedding_dim=args.embedding_dim,
 60 |             hidden_dim=args.hidden_dim,
 61 |             output_dim=args.output_dim,
 62 |             cat_dict=cat_dict,
 63 |             col_dict=col_dict,
 64 |             fab_dict=fab_dict,
 65 |             use_trends=args.use_trends,
 66 |             use_text=args.use_text,
 67 |             use_img=args.use_img,
 68 |             trend_len=args.trend_len,
 69 |             num_trends=args.num_trends,
 70 |             use_encoder_mask=args.use_encoder_mask,
 71 |             gpu_num=args.gpu_num
 72 |         )
 73 |     else:
 74 |         model = GTM(
 75 |             embedding_dim=args.embedding_dim,
 76 |             hidden_dim=args.hidden_dim,
 77 |             output_dim=args.output_dim,
 78 |             num_heads=args.num_attn_heads,
 79 |             num_layers=args.num_hidden_layers,
 80 |             cat_dict=cat_dict,
 81 |             col_dict=col_dict,
 82 |             fab_dict=fab_dict,
 83 |             use_text=args.use_text,
 84 |             use_img=args.use_img,
 85 |             trend_len=args.trend_len,
 86 |             num_trends=args.num_trends,
 87 |             use_encoder_mask=args.use_encoder_mask,
 88 |             autoregressive=args.autoregressive,
 89 |             gpu_num=args.gpu_num
 90 |         )
 91 |     
 92 |     model.load_state_dict(torch.load(args.ckpt_path)['state_dict'], strict=False)
 93 | 
 94 |     # Forecast the testing set
 95 |     model.to(device)
 96 |     model.eval()
 97 |     gt, forecasts, attns = [], [],[]
 98 |     for test_data in tqdm(test_loader, total=len(test_loader), ascii=True):
 99 |         with torch.no_grad():
100 |             test_data = [tensor.to(device) for tensor in test_data]
101 |             item_sales, category, color, textures, temporal_features, gtrends, images =  test_data
102 |             y_pred, att = model(category, color,textures, temporal_features, gtrends, images)
103 |             forecasts.append(y_pred.detach().cpu().numpy().flatten()[:args.output_dim])
104 |             gt.append(item_sales.detach().cpu().numpy().flatten()[:args.output_dim])
105 |             attns.append(att.detach().cpu().numpy())
106 | 
107 |     attns = np.stack(attns)
108 |     forecasts = np.array(forecasts)
109 |     gt = np.array(gt)
110 | 
111 |     rescale_vals = np.load(args.data_folder + 'normalization_scale.npy')
112 |     rescaled_forecasts = forecasts * rescale_vals
113 |     rescaled_gt = gt * rescale_vals
114 |     print_error_metrics(gt, forecasts, rescaled_gt, rescaled_forecasts)
115 | 
116 |     
117 |     torch.save({'results': forecasts* rescale_vals, 'gts': gt* rescale_vals, 'codes': item_codes.tolist()}, Path('results/' + model_savename+'.pth'))
118 | 
119 | 
120 | if __name__ == '__main__':
121 |     parser = argparse.ArgumentParser(description='Zero-shot sales forecasting')
122 | 
123 |     # General arguments
124 |     parser.add_argument('--data_folder', type=str, default='dataset/')
125 |     parser.add_argument('--ckpt_path', type=str, default='log/path-to-model.ckpt')
126 |     parser.add_argument('--gpu_num', type=int, default=0)
127 |     parser.add_argument('--seed', type=int, default=21)
128 | 
129 |     # Model specific arguments
130 |     parser.add_argument('--model_type', type=str, default='GTM', help='Choose between GTM or FCN')
131 |     parser.add_argument('--use_trends', type=int, default=1)
132 |     parser.add_argument('--use_img', type=int, default=1)
133 |     parser.add_argument('--use_text', type=int, default=1)
134 |     parser.add_argument('--trend_len', type=int, default=52)
135 |     parser.add_argument('--num_trends', type=int, default=3)
136 |     parser.add_argument('--embedding_dim', type=int, default=32)
137 |     parser.add_argument('--hidden_dim', type=int, default=64)
138 |     parser.add_argument('--output_dim', type=int, default=12)
139 |     parser.add_argument('--use_encoder_mask', type=int, default=1)
140 |     parser.add_argument('--autoregressive', type=int, default=0)
141 |     parser.add_argument('--num_attn_heads', type=int, default=4)
142 |     parser.add_argument('--num_hidden_layers', type=int, default=1)
143 |     
144 |     # wandb arguments
145 |     parser.add_argument('--wandb_run', type=str, default='Run1')
146 | 
147 |     args = parser.parse_args()
148 |     run(args)
149 | 


--------------------------------------------------------------------------------
/models/FCN.py:
--------------------------------------------------------------------------------
  1 | import math
  2 | import torch
  3 | import numpy as np
  4 | import torch.nn as nn
  5 | import torch.nn.functional as F
  6 | import pytorch_lightning as pl
  7 | from transformers import pipeline
  8 | from torchvision import models
  9 | from fairseq.optim.adafactor import Adafactor
 10 | 
 11 | class PositionalEncoding(nn.Module):
 12 |     def __init__(self, d_model, dropout=0.1, max_len=52):
 13 |         super(PositionalEncoding, self).__init__()
 14 |         self.dropout = nn.Dropout(p=dropout)
 15 | 
 16 |         pe = torch.zeros(max_len, d_model)
 17 |         position = torch.arange(0, max_len, dtype=torch.float).unsqueeze(1)
 18 |         div_term = torch.exp(torch.arange(0, d_model, 2).float() * (-math.log(10000.0) / d_model))
 19 |         pe[:, 0::2] = torch.sin(position * div_term)
 20 |         pe[:, 1::2] = torch.cos(position * div_term)
 21 |         pe = pe.unsqueeze(0).transpose(0, 1)
 22 |         self.register_buffer('pe', pe)
 23 | 
 24 |     def forward(self, x):
 25 |         x = x + self.pe[:x.size(0), :]
 26 |         return self.dropout(x)
 27 | 
 28 | class TimeDistributed(nn.Module):
 29 |     # Takes any module and stacks the time dimension with the batch dimenison of inputs before applying the module
 30 |     # Insipired from https://keras.io/api/layers/recurrent_layers/time_distributed/
 31 |     # https://discuss.pytorch.org/t/any-pytorch-function-can-work-as-keras-timedistributed/1346/4
 32 |     def __init__(self, module, batch_first=True):
 33 |         super(TimeDistributed, self).__init__()
 34 |         self.module = module # Can be any layer we wish to apply like Linear, Conv etc
 35 |         self.batch_first = batch_first
 36 | 
 37 |     def forward(self, x):
 38 |         if len(x.size()) <= 2:
 39 |             return self.module(x)
 40 | 
 41 |         # Squash samples and timesteps into a single axis
 42 |         x_reshape = x.contiguous().view(-1, x.size(-1))  
 43 | 
 44 |         y = self.module(x_reshape)
 45 | 
 46 |         # We have to reshape Y
 47 |         if self.batch_first:
 48 |             y = y.contiguous().view(x.size(0), -1, y.size(-1))  # (samples, timesteps, output_size)
 49 |         else:
 50 |             y = y.view(-1, x.size(1), y.size(-1))  # (timesteps, samples, output_size)
 51 | 
 52 |         return y
 53 | 
 54 | class FusionNetwork(nn.Module):
 55 |     def __init__(self, embedding_dim, hidden_dim, use_img, use_text, dropout=0.2):
 56 |         super(FusionNetwork, self).__init__()
 57 |         
 58 |         self.img_pool = nn.AdaptiveAvgPool2d((1,1))
 59 |         self.img_linear = nn.Linear(2048, embedding_dim)
 60 |         self.use_img = use_img
 61 |         self.use_text = use_text
 62 |         input_dim = embedding_dim + (embedding_dim*use_img) + (embedding_dim*use_text)
 63 |         self.feature_fusion = nn.Sequential(
 64 |             nn.BatchNorm1d(input_dim),
 65 |             nn.Linear(input_dim, input_dim, bias=False),
 66 |             nn.ReLU(),
 67 |             nn.Dropout(dropout),
 68 |             nn.Linear(input_dim, hidden_dim)
 69 |         )
 70 | 
 71 |     def forward(self, img_encoding, text_encoding, dummy_encoding):
 72 |         # Fuse static features together
 73 |         pooled_img = self.img_pool(img_encoding)
 74 |         condensed_img = self.img_linear(pooled_img.flatten(1))
 75 | 
 76 |         # Build input
 77 |         decoder_inputs = []
 78 |         if self.use_img == 1:
 79 |             decoder_inputs.append(condensed_img) 
 80 |         if self.use_text == 1:
 81 |             decoder_inputs.append(text_encoding) 
 82 |         decoder_inputs.append(dummy_encoding)
 83 |         concat_features = torch.cat(decoder_inputs, dim=1)
 84 | 
 85 |         final = self.feature_fusion(concat_features)
 86 | 
 87 |         return final
 88 | 
 89 | class GTrendEmbedder(nn.Module):
 90 |     def __init__(self, forecast_horizon, embedding_dim, use_mask, trend_len, num_trends,  gpu_num):
 91 |         super().__init__()
 92 |         self.forecast_horizon = forecast_horizon
 93 |         self.input_linear = TimeDistributed(nn.Linear(num_trends, embedding_dim))
 94 |         self.pos_embedding = PositionalEncoding(embedding_dim, max_len=trend_len)
 95 |         encoder_layer = nn.TransformerEncoderLayer(d_model=embedding_dim, nhead=4, dropout=0.2)
 96 |         self.encoder = nn.TransformerEncoder(encoder_layer, num_layers=2)
 97 |         self.use_mask = use_mask
 98 |         self.gpu_num = gpu_num
 99 | 
100 |     def _generate_encoder_mask(self, size, forecast_horizon):
101 |         mask = torch.zeros((size, size))
102 |         split = math.gcd(size, forecast_horizon)
103 |         for i in range(0, size, split):
104 |             mask[i:i+split, i:i+split] = 1
105 |         mask = mask.float().masked_fill(mask == 0, float('-inf')).masked_fill(mask == 1, float(0.0)).to('cuda:'+str(self.gpu_num))
106 |         return mask
107 |     
108 |     def _generate_square_subsequent_mask(self, size):
109 |         mask = (torch.triu(torch.ones(size, size)) == 1).transpose(0, 1)
110 |         mask = mask.float().masked_fill(mask == 0, float('-inf')).masked_fill(mask == 1, float(0.0)).to('cuda:'+str(self.gpu_num))
111 |         return mask
112 | 
113 |     def forward(self, gtrends):
114 |         gtrend_emb = self.input_linear(gtrends.permute(0,2,1))
115 |         gtrend_emb = self.pos_embedding(gtrend_emb.permute(1,0,2))
116 |         input_mask = self._generate_encoder_mask(gtrend_emb.shape[0], self.forecast_horizon)
117 |         if self.use_mask == 1:
118 |             gtrend_emb = self.encoder(gtrend_emb, input_mask)
119 |         else:
120 |             gtrend_emb = self.encoder(gtrend_emb)
121 |         return gtrend_emb
122 |         
123 | class TextEmbedder(nn.Module):
124 |     def __init__(self, embedding_dim, cat_dict, col_dict, fab_dict, gpu_num):
125 |         super().__init__()
126 |         self.embedding_dim = embedding_dim
127 |         self.cat_dict = {v: k for k, v in cat_dict.items()}
128 |         self.col_dict = {v: k for k, v in col_dict.items()}
129 |         self.fab_dict = {v: k for k, v in fab_dict.items()}
130 |         self.word_embedder = pipeline('feature-extraction', model='bert-base-uncased')
131 |         self.fc = nn.Linear(768, embedding_dim)
132 |         self.dropout = nn.Dropout(0.1)
133 |         self.gpu_num = gpu_num
134 | 
135 |     def forward(self, category, color, fabric):
136 |         textual_description = [self.col_dict[color.detach().cpu().numpy().tolist()[i]] + ' ' \
137 |                 + self.fab_dict[fabric.detach().cpu().numpy().tolist()[i]] + ' ' \
138 |                 + self.cat_dict[category.detach().cpu().numpy().tolist()[i]] for i in range(len(category))]
139 | 
140 | 
141 |         # Use BERT to extract features
142 |         word_embeddings = self.word_embedder(textual_description)
143 | 
144 |         # BERT gives us embeddings for [CLS] ..  [EOS], which is why we only average the embeddings in the range [1:-1] 
145 |         # We're not fine tuning BERT and we don't want the noise coming from [CLS] or [EOS]
146 |         word_embeddings = [torch.FloatTensor(x[1:-1]).mean(axis=0) for x in word_embeddings] 
147 |         word_embeddings = torch.stack(word_embeddings).to('cuda:'+str(self.gpu_num))
148 |         
149 |         # Embed to our embedding space
150 |         word_embeddings = self.dropout(self.fc(word_embeddings))
151 | 
152 |         return word_embeddings
153 | 
154 | class ImageEmbedder(nn.Module):
155 |     def __init__(self):
156 |         super().__init__()
157 |         # Img feature extraction
158 |         resnet = models.resnet50(pretrained=True)
159 |         modules = list(resnet.children())[:-2]
160 |         self.resnet = nn.Sequential(*modules)
161 |         for p in self.resnet.parameters():
162 |             p.requires_grad = False
163 |         
164 |     def forward(self, images):        
165 |         img_embeddings = self.resnet(images)  
166 |         size = img_embeddings.size()
167 |         out = img_embeddings.view(*size[:2],-1)
168 | 
169 |         return out.view(*size).contiguous() # batch_size, 2048, image_size/32, image_size/32
170 | 
171 | class DummyEmbedder(nn.Module):
172 |     def __init__(self, embedding_dim):
173 |         super().__init__()
174 |         self.embedding_dim = embedding_dim
175 |         self.day_embedding = nn.Linear(1, embedding_dim)
176 |         self.week_embedding = nn.Linear(1, embedding_dim)
177 |         self.month_embedding = nn.Linear(1, embedding_dim)
178 |         self.year_embedding = nn.Linear(1, embedding_dim)
179 |         self.dummy_fusion = nn.Linear(embedding_dim*4, embedding_dim)
180 |         self.dropout = nn.Dropout(0.2)
181 | 
182 | 
183 |     def forward(self, temporal_features):
184 |         # Temporal dummy variables (day, week, month, year)
185 |         d, w, m, y = temporal_features[:, 0].unsqueeze(1), temporal_features[:, 1].unsqueeze(1), \
186 |             temporal_features[:, 2].unsqueeze(1), temporal_features[:, 3].unsqueeze(1)
187 |         d_emb, w_emb, m_emb, y_emb = self.day_embedding(d), self.week_embedding(w), self.month_embedding(m), self.year_embedding(y)
188 |         temporal_embeddings = self.dummy_fusion(torch.cat([d_emb, w_emb, m_emb, y_emb], dim=1))
189 |         temporal_embeddings = self.dropout(temporal_embeddings)
190 | 
191 |         return temporal_embeddings
192 | 
193 | class FCN(pl.LightningModule):
194 |     def __init__(self, embedding_dim, hidden_dim, output_dim, cat_dict, col_dict, fab_dict, \
195 |         use_trends, use_text, use_img, trend_len, num_trends, use_encoder_mask=1, gpu_num=2):
196 | 
197 |         super().__init__()
198 |         self.hidden_dim = hidden_dim
199 |         self.embedding_dim = embedding_dim
200 |         self.output_len = output_dim
201 |         self.use_encoder_mask = use_encoder_mask
202 |         self.gpu_num = gpu_num
203 |         self.use_trends = use_trends
204 |         self.save_hyperparameters()
205 | 
206 |          # Encoder
207 |         self.dummy_encoder = DummyEmbedder(embedding_dim)
208 |         self.image_encoder = ImageEmbedder()
209 |         self.text_encoder = TextEmbedder(embedding_dim, cat_dict, col_dict, fab_dict, gpu_num)
210 |         self.gtrend_encoder = GTrendEmbedder(output_dim, hidden_dim, use_encoder_mask, trend_len, num_trends, gpu_num)
211 |         self.static_feature_encoder = FusionNetwork(embedding_dim, hidden_dim, use_img, use_text)
212 | 
213 |         # Decoder
214 |         decoder_in = hidden_dim + (use_trends*(trend_len*hidden_dim))
215 |         self.decoder = nn.Sequential(
216 |             nn.Linear(decoder_in, hidden_dim),
217 |             nn.ReLU(),
218 |             nn.Dropout(0.2),
219 |             nn.Linear(hidden_dim, hidden_dim*4),
220 |             nn.ReLU(),
221 |             nn.Dropout(0.2),
222 |             nn.Linear(hidden_dim*4, self.output_len)
223 |         )
224 | 
225 |     def forward(self, category, color, fabric, temporal_features, gtrends, images):
226 |         # Encode features and get inputs
227 |         img_encoding = self.image_encoder(images)
228 |         dummy_encoding = self.dummy_encoder(temporal_features)
229 |         text_encoding = self.text_encoder(category, color, fabric)
230 |         gtrend_encoding = self.gtrend_encoder(gtrends)
231 | 
232 |         # Fuse static features together
233 |         static_feature_fusion = self.static_feature_encoder(img_encoding, text_encoding, dummy_encoding)
234 | 
235 |         # Decode
236 |         if self.use_trends == 1:
237 |             tgt = torch.cat([static_feature_fusion, gtrend_encoding.reshape(static_feature_fusion.shape[0], -1)], dim=-1)
238 |         else:
239 |             tgt = static_feature_fusion
240 | 
241 |         forecast = self.decoder(tgt)
242 | 
243 |         return forecast.view(-1, self.output_len)
244 | 
245 |     def configure_optimizers(self):
246 |         optimizer = Adafactor(self.parameters(), scale_parameter=True, relative_step=True, warmup_init=True, lr=None)
247 | 
248 |         return optimizer
249 | 
250 |     def training_step(self, train_batch, batch_idx):
251 |         item_sales, category, color, fabric, temporal_features, gtrends, images = train_batch 
252 |         forecasted_sales = self.forward(category, color, fabric, temporal_features, gtrends, images)
253 |         forecasting_loss = F.mse_loss(item_sales, forecasted_sales.squeeze())
254 |         loss = forecasting_loss
255 |         self.log('train_loss', loss)
256 |         
257 |         return loss
258 | 
259 |     def validation_step(self, val_batch, batch_idx):
260 |         item_sales, category, color, fabric, temporal_features, gtrends, images = val_batch 
261 |         forecasted_sales = self.forward(category, color, fabric, temporal_features, gtrends, images)
262 |         
263 |         return item_sales.squeeze(), forecasted_sales.squeeze()
264 | 
265 |     def validation_epoch_end(self, val_step_outputs):
266 |         item_sales, forecasted_sales = [x[0] for x in val_step_outputs], [x[1] for x in val_step_outputs]
267 |         item_sales, forecasted_sales = torch.stack(item_sales), torch.stack(forecasted_sales)
268 |         rescaled_item_sales, rescaled_forecasted_sales = item_sales*1065, forecasted_sales*1065 # 1065 is the normalization factor (max of the sales of the training set)
269 |         mae = F.l1_loss(rescaled_item_sales, rescaled_forecasted_sales)
270 |         loss = F.mse_loss(item_sales, forecasted_sales.squeeze())
271 |         self.log('val_loss', loss)
272 |         self.log('val_mae', mae)
273 |         print('Validation MAE:', mae.detach().cpu().numpy(), 'LR:', self.optimizers().param_groups[0]['lr'])


--------------------------------------------------------------------------------
/models/GTM.py:
--------------------------------------------------------------------------------
  1 | import math
  2 | import torch
  3 | import torch.nn as nn
  4 | import torch.nn.functional as F
  5 | import pytorch_lightning as pl
  6 | from transformers import pipeline
  7 | from torchvision import models
  8 | from fairseq.optim.adafactor import Adafactor
  9 | 
 10 | 
 11 | class PositionalEncoding(nn.Module):
 12 |     def __init__(self, d_model, dropout=0.1, max_len=52):
 13 |         super(PositionalEncoding, self).__init__()
 14 |         self.dropout = nn.Dropout(p=dropout)
 15 | 
 16 |         pe = torch.zeros(max_len, d_model)
 17 |         position = torch.arange(0, max_len, dtype=torch.float).unsqueeze(1)
 18 |         div_term = torch.exp(torch.arange(0, d_model, 2).float() * (-math.log(10000.0) / d_model))
 19 |         pe[:, 0::2] = torch.sin(position * div_term)
 20 |         pe[:, 1::2] = torch.cos(position * div_term)
 21 |         pe = pe.unsqueeze(0).transpose(0, 1)
 22 |         self.register_buffer('pe', pe)
 23 | 
 24 |     def forward(self, x):
 25 |         x = x + self.pe[:x.size(0), :]
 26 |         return self.dropout(x)
 27 | 
 28 | class TimeDistributed(nn.Module):
 29 |     # Takes any module and stacks the time dimension with the batch dimenison of inputs before applying the module
 30 |     # Insipired from https://keras.io/api/layers/recurrent_layers/time_distributed/
 31 |     # https://discuss.pytorch.org/t/any-pytorch-function-can-work-as-keras-timedistributed/1346/4
 32 |     def __init__(self, module, batch_first=True):
 33 |         super(TimeDistributed, self).__init__()
 34 |         self.module = module # Can be any layer we wish to apply like Linear, Conv etc
 35 |         self.batch_first = batch_first
 36 | 
 37 |     def forward(self, x):
 38 |         if len(x.size()) <= 2:
 39 |             return self.module(x)
 40 | 
 41 |         # Squash samples and timesteps into a single axis
 42 |         x_reshape = x.contiguous().view(-1, x.size(-1))  
 43 | 
 44 |         y = self.module(x_reshape)
 45 | 
 46 |         # We have to reshape Y
 47 |         if self.batch_first:
 48 |             y = y.contiguous().view(x.size(0), -1, y.size(-1))  # (samples, timesteps, output_size)
 49 |         else:
 50 |             y = y.view(-1, x.size(1), y.size(-1))  # (timesteps, samples, output_size)
 51 | 
 52 |         return y
 53 | 
 54 | class FusionNetwork(nn.Module):
 55 |     def __init__(self, embedding_dim, hidden_dim, use_img, use_text, dropout=0.2):
 56 |         super(FusionNetwork, self).__init__()
 57 |         
 58 |         self.img_pool = nn.AdaptiveAvgPool2d((1,1))
 59 |         self.img_linear = nn.Linear(2048, embedding_dim)
 60 |         self.use_img = use_img
 61 |         self.use_text = use_text
 62 |         input_dim = embedding_dim + (embedding_dim*use_img) + (embedding_dim*use_text)
 63 |         self.feature_fusion = nn.Sequential(
 64 |             nn.BatchNorm1d(input_dim),
 65 |             nn.Linear(input_dim, input_dim, bias=False),
 66 |             nn.ReLU(),
 67 |             nn.Dropout(dropout),
 68 |             nn.Linear(input_dim, hidden_dim)
 69 |         )
 70 | 
 71 |     def forward(self, img_encoding, text_encoding, dummy_encoding):
 72 |         # Fuse static features together
 73 |         pooled_img = self.img_pool(img_encoding)
 74 |         condensed_img = self.img_linear(pooled_img.flatten(1))
 75 | 
 76 |         # Build input
 77 |         decoder_inputs = []
 78 |         if self.use_img == 1:
 79 |             decoder_inputs.append(condensed_img) 
 80 |         if self.use_text == 1:
 81 |             decoder_inputs.append(text_encoding) 
 82 |         decoder_inputs.append(dummy_encoding)
 83 |         concat_features = torch.cat(decoder_inputs, dim=1)
 84 | 
 85 |         final = self.feature_fusion(concat_features)
 86 |         # final = self.feature_fusion(dummy_encoding)
 87 | 
 88 |         return final
 89 | 
 90 | class GTrendEmbedder(nn.Module):
 91 |     def __init__(self, forecast_horizon, embedding_dim, use_mask, trend_len, num_trends,  gpu_num):
 92 |         super().__init__()
 93 |         self.forecast_horizon = forecast_horizon
 94 |         self.input_linear = TimeDistributed(nn.Linear(num_trends, embedding_dim))
 95 |         self.pos_embedding = PositionalEncoding(embedding_dim, max_len=trend_len)
 96 |         encoder_layer = nn.TransformerEncoderLayer(d_model=embedding_dim, nhead=4, dropout=0.2)
 97 |         self.encoder = nn.TransformerEncoder(encoder_layer, num_layers=2)
 98 |         self.use_mask = use_mask
 99 |         self.gpu_num = gpu_num
100 | 
101 |     def _generate_encoder_mask(self, size, forecast_horizon):
102 |         mask = torch.zeros((size, size))
103 |         split = math.gcd(size, forecast_horizon)
104 |         for i in range(0, size, split):
105 |             mask[i:i+split, i:i+split] = 1
106 |         mask = mask.float().masked_fill(mask == 0, float('-inf')).masked_fill(mask == 1, float(0.0)).to('cuda:'+str(self.gpu_num))
107 |         return mask
108 |     
109 |     def _generate_square_subsequent_mask(self, size):
110 |         mask = (torch.triu(torch.ones(size, size)) == 1).transpose(0, 1)
111 |         mask = mask.float().masked_fill(mask == 0, float('-inf')).masked_fill(mask == 1, float(0.0)).to('cuda:'+str(self.gpu_num))
112 |         return mask
113 | 
114 |     def forward(self, gtrends):
115 |         gtrend_emb = self.input_linear(gtrends.permute(0,2,1))
116 |         gtrend_emb = self.pos_embedding(gtrend_emb.permute(1,0,2))
117 |         input_mask = self._generate_encoder_mask(gtrend_emb.shape[0], self.forecast_horizon)
118 |         if self.use_mask == 1:
119 |             gtrend_emb = self.encoder(gtrend_emb, input_mask)
120 |         else:
121 |             gtrend_emb = self.encoder(gtrend_emb)
122 |         return gtrend_emb
123 |         
124 | class TextEmbedder(nn.Module):
125 |     def __init__(self, embedding_dim, cat_dict, col_dict, fab_dict, gpu_num):
126 |         super().__init__()
127 |         self.embedding_dim = embedding_dim
128 |         self.cat_dict = {v: k for k, v in cat_dict.items()}
129 |         self.col_dict = {v: k for k, v in col_dict.items()}
130 |         self.fab_dict = {v: k for k, v in fab_dict.items()}
131 |         self.word_embedder = pipeline('feature-extraction', model='bert-base-uncased')
132 |         self.fc = nn.Linear(768, embedding_dim)
133 |         self.dropout = nn.Dropout(0.1)
134 |         self.gpu_num = gpu_num
135 | 
136 |     def forward(self, category, color, fabric):
137 |         textual_description = [self.col_dict[color.detach().cpu().numpy().tolist()[i]] + ' ' \
138 |                 + self.fab_dict[fabric.detach().cpu().numpy().tolist()[i]] + ' ' \
139 |                 + self.cat_dict[category.detach().cpu().numpy().tolist()[i]] for i in range(len(category))]
140 | 
141 | 
142 |         # Use BERT to extract features
143 |         word_embeddings = self.word_embedder(textual_description)
144 | 
145 |         # BERT gives us embeddings for [CLS] ..  [EOS], which is why we only average the embeddings in the range [1:-1] 
146 |         # We're not fine tuning BERT and we don't want the noise coming from [CLS] or [EOS]
147 |         word_embeddings = [torch.FloatTensor(x[0][1:-1]).mean(axis=0) for x in word_embeddings] 
148 |         word_embeddings = torch.stack(word_embeddings).to('cuda:'+str(self.gpu_num))
149 |         
150 |         # Embed to our embedding space
151 |         word_embeddings = self.dropout(self.fc(word_embeddings))
152 | 
153 |         return word_embeddings
154 | 
155 | class ImageEmbedder(nn.Module):
156 |     def __init__(self):
157 |         super().__init__()
158 |         # Img feature extraction
159 |         resnet = models.resnet50(pretrained=True)
160 |         modules = list(resnet.children())[:-2]
161 |         self.resnet = nn.Sequential(*modules)
162 |         for p in self.resnet.parameters():
163 |             p.requires_grad = False
164 | 
165 |         # Fine tune resnet
166 |         # for c in list(self.resnet.children())[6:]:
167 |         #     for p in c.parameters():
168 |         #         p.requires_grad = True
169 |         
170 |     def forward(self, images):        
171 |         img_embeddings = self.resnet(images)  
172 |         size = img_embeddings.size()
173 |         out = img_embeddings.view(*size[:2],-1)
174 | 
175 |         return out.view(*size).contiguous() # batch_size, 2048, image_size/32, image_size/32
176 | 
177 | class DummyEmbedder(nn.Module):
178 |     def __init__(self, embedding_dim):
179 |         super().__init__()
180 |         self.embedding_dim = embedding_dim
181 |         self.day_embedding = nn.Linear(1, embedding_dim)
182 |         self.week_embedding = nn.Linear(1, embedding_dim)
183 |         self.month_embedding = nn.Linear(1, embedding_dim)
184 |         self.year_embedding = nn.Linear(1, embedding_dim)
185 |         self.dummy_fusion = nn.Linear(embedding_dim*4, embedding_dim)
186 |         self.dropout = nn.Dropout(0.2)
187 | 
188 | 
189 |     def forward(self, temporal_features):
190 |         # Temporal dummy variables (day, week, month, year)
191 |         d, w, m, y = temporal_features[:, 0].unsqueeze(1), temporal_features[:, 1].unsqueeze(1), \
192 |             temporal_features[:, 2].unsqueeze(1), temporal_features[:, 3].unsqueeze(1)
193 |         d_emb, w_emb, m_emb, y_emb = self.day_embedding(d), self.week_embedding(w), self.month_embedding(m), self.year_embedding(y)
194 |         temporal_embeddings = self.dummy_fusion(torch.cat([d_emb, w_emb, m_emb, y_emb], dim=1))
195 |         temporal_embeddings = self.dropout(temporal_embeddings)
196 | 
197 |         return temporal_embeddings
198 | 
199 | class TransformerDecoderLayer(nn.Module):
200 | 
201 |     def __init__(self, d_model, nhead, dim_feedforward=2048, dropout=0.1, activation="relu"):
202 |         super(TransformerDecoderLayer, self).__init__()
203 |         
204 |         self.multihead_attn = nn.MultiheadAttention(d_model, nhead, dropout=dropout)
205 | 
206 |         # Implementation of Feedforward model
207 |         self.linear1 = nn.Linear(d_model, dim_feedforward)
208 |         self.dropout = nn.Dropout(dropout)
209 |         self.linear2 = nn.Linear(dim_feedforward, d_model)
210 | 
211 |         self.norm2 = nn.LayerNorm(d_model)
212 |         self.norm3 = nn.LayerNorm(d_model)
213 |         self.dropout2 = nn.Dropout(dropout)
214 |         self.dropout3 = nn.Dropout(dropout)
215 | 
216 |         self.activation = F.relu
217 | 
218 |     def __setstate__(self, state):
219 |         if 'activation' not in state:
220 |             state['activation'] = F.relu
221 |         super(TransformerDecoderLayer, self).__setstate__(state)
222 | 
223 |     def forward(self, tgt, memory, tgt_mask = None, memory_mask = None, tgt_key_padding_mask = None, 
224 |             memory_key_padding_mask = None):
225 | 
226 |         tgt2, attn_weights = self.multihead_attn(tgt, memory, memory)
227 |         tgt = tgt + self.dropout2(tgt2)
228 |         tgt = self.norm2(tgt)
229 |         tgt2 = self.linear2(self.dropout(self.activation(self.linear1(tgt))))
230 |         tgt = tgt + self.dropout3(tgt2)
231 |         tgt = self.norm3(tgt)
232 |         return tgt, attn_weights
233 | 
234 | class GTM(pl.LightningModule):
235 |     def __init__(self, embedding_dim, hidden_dim, output_dim, num_heads, num_layers, use_text, use_img, \
236 |                 cat_dict, col_dict, fab_dict, trend_len, num_trends, gpu_num, use_encoder_mask=1, autoregressive=False):
237 |         super().__init__()
238 |         self.hidden_dim = hidden_dim
239 |         self.embedding_dim = embedding_dim
240 |         self.output_len = output_dim
241 |         self.use_encoder_mask = use_encoder_mask
242 |         self.autoregressive = autoregressive
243 |         self.gpu_num = gpu_num
244 |         self.save_hyperparameters()
245 | 
246 |          # Encoder
247 |         self.dummy_encoder = DummyEmbedder(embedding_dim)
248 |         self.image_encoder = ImageEmbedder()
249 |         self.text_encoder = TextEmbedder(embedding_dim, cat_dict, col_dict, fab_dict, gpu_num)
250 |         self.gtrend_encoder = GTrendEmbedder(output_dim, hidden_dim, use_encoder_mask, trend_len, num_trends, gpu_num)
251 |         self.static_feature_encoder = FusionNetwork(embedding_dim, hidden_dim, use_img, use_text)
252 | 
253 |         # Decoder
254 |         self.decoder_linear = TimeDistributed(nn.Linear(1, hidden_dim))
255 |         decoder_layer = TransformerDecoderLayer(d_model=self.hidden_dim, nhead=num_heads, \
256 |                                                 dim_feedforward=self.hidden_dim * 4, dropout=0.1)
257 |         
258 |         if self.autoregressive: self.pos_encoder = PositionalEncoding(hidden_dim, max_len=12)
259 |         self.decoder = nn.TransformerDecoder(decoder_layer, num_layers)
260 |         
261 |         self.decoder_fc = nn.Sequential(
262 |             nn.Linear(hidden_dim, self.output_len if not self.autoregressive else 1),
263 |             nn.Dropout(0.2)
264 |         )
265 |     def _generate_square_subsequent_mask(self, size):
266 |         mask = (torch.triu(torch.ones(size, size)) == 1).transpose(0, 1)
267 |         mask = mask.float().masked_fill(mask == 0, float('-inf')).masked_fill(mask == 1, float(0.0)).to('cuda:'+str(self.gpu_num))
268 |         return mask
269 | 
270 |     def forward(self, category, color, fabric, temporal_features, gtrends, images):
271 |         # Encode features and get inputs
272 |         img_encoding = self.image_encoder(images)
273 |         dummy_encoding = self.dummy_encoder(temporal_features)
274 |         text_encoding = self.text_encoder(category, color, fabric)
275 |         gtrend_encoding = self.gtrend_encoder(gtrends)
276 | 
277 |         # Fuse static features together
278 |         static_feature_fusion = self.static_feature_encoder(img_encoding, text_encoding, dummy_encoding)
279 | 
280 |         if self.autoregressive == 1:
281 |             # Decode
282 |             tgt = torch.zeros(self.output_len, gtrend_encoding.shape[1], gtrend_encoding.shape[-1]).to('cuda:'+str(self.gpu_num))
283 |             tgt[0] = static_feature_fusion
284 |             tgt = self.pos_encoder(tgt)
285 |             tgt_mask = self._generate_square_subsequent_mask(self.output_len)
286 |             memory = gtrend_encoding
287 |             decoder_out, attn_weights = self.decoder(tgt, memory, tgt_mask)
288 |             forecast = self.decoder_fc(decoder_out)
289 |         else:
290 |             # Decode (generatively/non-autoregressively)
291 |             tgt = static_feature_fusion.unsqueeze(0)
292 |             memory = gtrend_encoding
293 |             decoder_out, attn_weights = self.decoder(tgt, memory)
294 |             forecast = self.decoder_fc(decoder_out)
295 | 
296 |         return forecast.view(-1, self.output_len), attn_weights
297 | 
298 |     def configure_optimizers(self):
299 |         optimizer = Adafactor(self.parameters(),scale_parameter=True, relative_step=True, warmup_init=True, lr=None)
300 |     
301 |         return [optimizer]
302 | 
303 | 
304 |     def training_step(self, train_batch, batch_idx):
305 |         item_sales, category, color, fabric, temporal_features, gtrends, images = train_batch 
306 |         forecasted_sales, _ = self.forward(category, color, fabric, temporal_features, gtrends, images)
307 |         loss = F.mse_loss(item_sales, forecasted_sales.squeeze())
308 |         self.log('train_loss', loss)
309 | 
310 |         return loss
311 | 
312 |     def validation_step(self, test_batch, batch_idx):
313 |         item_sales, category, color, fabric, temporal_features, gtrends, images = test_batch 
314 |         forecasted_sales, _ = self.forward(category, color, fabric, temporal_features, gtrends, images)
315 |         
316 |         return item_sales.squeeze(), forecasted_sales.squeeze()
317 | 
318 |     def validation_epoch_end(self, val_step_outputs):
319 |         item_sales, forecasted_sales = [x[0] for x in val_step_outputs], [x[1] for x in val_step_outputs]
320 |         item_sales, forecasted_sales = torch.stack(item_sales), torch.stack(forecasted_sales)
321 |         rescaled_item_sales, rescaled_forecasted_sales = item_sales*1065, forecasted_sales*1065 # 1065 is the normalization factor (max of the sales of the training set)
322 |         loss = F.mse_loss(item_sales, forecasted_sales.squeeze())
323 |         mae = F.l1_loss(rescaled_item_sales, rescaled_forecasted_sales)
324 |         self.log('val_mae', mae)
325 |         self.log('val_loss', loss)
326 | 
327 |         print('Validation MAE:', mae.detach().cpu().numpy(), 'LR:', self.optimizers().param_groups[0]['lr'])
328 | 


--------------------------------------------------------------------------------
/requirements.txt:
--------------------------------------------------------------------------------
 1 | # Python 3.8.12
 2 | numpy==1.22.2
 3 | pandas==1.2.5
 4 | matplotlib==3.5.1
 5 | torch==1.8.2
 6 | torchvision==0.9.2
 7 | pytorch-lightning==1.5.0
 8 | Pillow==8.4.0
 9 | scikit-learn==0.24.2
10 | scipy==1.7.1
11 | transformers==4.11.3
12 | tqdm==4.62.3
13 | fairseq==0.10.2


--------------------------------------------------------------------------------
/train.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import argparse
  3 | import wandb
  4 | import torch
  5 | import pandas as pd
  6 | import pytorch_lightning as pl
  7 | from pytorch_lightning import loggers as pl_loggers
  8 | from pathlib import Path
  9 | from datetime import datetime
 10 | from models.GTM import GTM
 11 | from models.FCN import FCN
 12 | from utils.data_multitrends import ZeroShotDataset
 13 | 
 14 | os.environ["TOKENIZERS_PARALLELISM"] = "false"
 15 | 
 16 | 
 17 | def run(args):
 18 |     print(args)
 19 |     # Seeds for reproducibility (By default we use the number 21)
 20 |     pl.seed_everything(args.seed)
 21 | 
 22 |     # Load sales data
 23 |     train_df = pd.read_csv(Path(args.data_folder + 'train.csv'), parse_dates=['release_date'])
 24 |     test_df = pd.read_csv(Path(args.data_folder + 'test.csv'), parse_dates=['release_date'])
 25 | 
 26 |     # Load category and color encodings
 27 |     cat_dict = torch.load(Path(args.data_folder + 'category_labels.pt'))
 28 |     col_dict = torch.load(Path(args.data_folder + 'color_labels.pt'))
 29 |     fab_dict = torch.load(Path(args.data_folder + 'fabric_labels.pt'))
 30 | 
 31 |     # Load Google trends
 32 |     gtrends = pd.read_csv(Path(args.data_folder + 'gtrends.csv'), index_col=[0], parse_dates=True)
 33 | 
 34 |     train_loader = ZeroShotDataset(train_df, Path(args.data_folder + '/images'), gtrends, cat_dict, col_dict,
 35 |                                    fab_dict, args.trend_len).get_loader(batch_size=args.batch_size, train=True)
 36 |     test_loader = ZeroShotDataset(test_df, Path(args.data_folder + '/images'), gtrends, cat_dict, col_dict,
 37 |                                   fab_dict, args.trend_len).get_loader(batch_size=1, train=False)
 38 | 
 39 |     # Create model
 40 |     if args.model_type == 'FCN':
 41 |         model = FCN(
 42 |             embedding_dim=args.embedding_dim,
 43 |             hidden_dim=args.hidden_dim,
 44 |             output_dim=args.output_dim,
 45 |             cat_dict=cat_dict,
 46 |             col_dict=col_dict,
 47 |             fab_dict=fab_dict,
 48 |             use_trends=args.use_trends,
 49 |             use_text=args.use_text,
 50 |             use_img=args.use_img,
 51 |             trend_len=args.trend_len,
 52 |             num_trends=args.num_trends,
 53 |             use_encoder_mask=args.use_encoder_mask,
 54 |             gpu_num=args.gpu_num
 55 |         )
 56 |     else:
 57 |         model = GTM(
 58 |             embedding_dim=args.embedding_dim,
 59 |             hidden_dim=args.hidden_dim,
 60 |             output_dim=args.output_dim,
 61 |             num_heads=args.num_attn_heads,
 62 |             num_layers=args.num_hidden_layers,
 63 |             cat_dict=cat_dict,
 64 |             col_dict=col_dict,
 65 |             fab_dict=fab_dict,
 66 |             use_text=args.use_text,
 67 |             use_img=args.use_img,
 68 |             trend_len=args.trend_len,
 69 |             num_trends=args.num_trends,
 70 |             use_encoder_mask=args.use_encoder_mask,
 71 |             autoregressive=args.autoregressive,
 72 |             gpu_num=args.gpu_num
 73 |         )
 74 | 
 75 |     # Model Training
 76 |     # Define model saving procedure
 77 |     dt_string = datetime.now().strftime("%d-%m-%Y-%H-%M-%S")
 78 | 
 79 |     model_savename = args.model_type + '_' + args.wandb_run
 80 | 
 81 |     checkpoint_callback = pl.callbacks.ModelCheckpoint(
 82 |         dirpath=args.log_dir + '/'+args.model_type,
 83 |         filename=model_savename+'---{epoch}---'+dt_string,
 84 |         monitor='val_mae',
 85 |         mode='min',
 86 |         save_top_k=1
 87 |     )
 88 | 
 89 |     wandb.init(entity=args.wandb_entity, project=args.wandb_proj, name=args.wandb_run)
 90 |     wandb_logger = pl_loggers.WandbLogger()
 91 |     wandb_logger.watch(model)
 92 | 
 93 |     # If you wish to use Tensorboard you can change the logger to:
 94 |     # tb_logger = pl_loggers.TensorBoardLogger(args.log_dir+'/', name=model_savename)
 95 |     trainer = pl.Trainer(gpus=[args.gpu_num], max_epochs=args.epochs, check_val_every_n_epoch=5,
 96 |                          logger=wandb_logger, callbacks=[checkpoint_callback])
 97 | 
 98 |     # Fit model
 99 |     trainer.fit(model, train_dataloaders=train_loader,
100 |                 val_dataloaders=test_loader)
101 | 
102 |     # Print out path of best model
103 |     print(checkpoint_callback.best_model_path)
104 | 
105 | 
106 | if __name__ == '__main__':
107 |     parser = argparse.ArgumentParser(description='Zero-shot sales forecasting')
108 | 
109 |     # General arguments
110 |     parser.add_argument('--data_folder', type=str, default='dataset/')
111 |     parser.add_argument('--log_dir', type=str, default='log')
112 |     parser.add_argument('--seed', type=int, default=21)
113 |     parser.add_argument('--epochs', type=int, default=200)
114 |     parser.add_argument('--gpu_num', type=int, default=0)
115 | 
116 |     # Model specific arguments
117 |     parser.add_argument('--model_type', type=str, default='GTM', help='Choose between GTM or FCN')
118 |     parser.add_argument('--use_trends', type=int, default=1)
119 |     parser.add_argument('--use_img', type=int, default=1)
120 |     parser.add_argument('--use_text', type=int, default=1)
121 |     parser.add_argument('--trend_len', type=int, default=52)
122 |     parser.add_argument('--num_trends', type=int, default=3)
123 |     parser.add_argument('--batch_size', type=int, default=128)
124 |     parser.add_argument('--embedding_dim', type=int, default=32)
125 |     parser.add_argument('--hidden_dim', type=int, default=64)
126 |     parser.add_argument('--output_dim', type=int, default=12)
127 |     parser.add_argument('--use_encoder_mask', type=int, default=1)
128 |     parser.add_argument('--autoregressive', type=int, default=0)
129 |     parser.add_argument('--num_attn_heads', type=int, default=4)
130 |     parser.add_argument('--num_hidden_layers', type=int, default=1)
131 | 
132 |     # wandb arguments
133 |     parser.add_argument('--wandb_entity', type=str, default='username-here')
134 |     parser.add_argument('--wandb_proj', type=str, default='GTM')
135 |     parser.add_argument('--wandb_run', type=str, default='Run1')
136 | 
137 |     args = parser.parse_args()
138 |     run(args)
139 | 


--------------------------------------------------------------------------------
/utils/data_multitrends.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import torch
 3 | import pandas as pd
 4 | import numpy as np
 5 | from tqdm import tqdm
 6 | from PIL import Image, ImageFile
 7 | from torch.utils.data import DataLoader, TensorDataset
 8 | from torchvision.transforms import Resize, ToTensor, Normalize, Compose
 9 | from sklearn.preprocessing import MinMaxScaler
10 | ImageFile.LOAD_TRUNCATED_IMAGES = True
11 | 
12 | 
13 | class ZeroShotDataset():
14 |     def __init__(self, data_df, img_root, gtrends, cat_dict, col_dict, fab_dict, trend_len):
15 |         self.data_df = data_df
16 |         self.gtrends = gtrends
17 |         self.cat_dict = cat_dict
18 |         self.col_dict = col_dict
19 |         self.fab_dict = fab_dict
20 |         self.trend_len = trend_len
21 |         self.img_root = img_root
22 | 
23 |     def __len__(self):
24 |         return len(self.data_df)
25 | 
26 |     def __getitem__(self, idx):
27 |         return self.data_df.iloc[idx, :]
28 | 
29 |     def preprocess_data(self):
30 |         data = self.data_df
31 | 
32 |         # Get the Gtrends time series associated with each product
33 |         # Read the images (extracted image features) as well
34 |         gtrends, image_features = [], []
35 |         img_transforms = Compose([Resize((256, 256)), ToTensor(), Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])])
36 |         for (idx, row) in tqdm(data.iterrows(), total=len(data), ascii=True):
37 |             cat, col, fab, fiq_attr, start_date, img_path = row['category'], row['color'], row['fabric'], row['extra'], \
38 |                 row['release_date'], row['image_path']
39 | 
40 |             # Get the gtrend signal up to the previous year (52 weeks) of the release date
41 |             gtrend_start = start_date - pd.DateOffset(weeks=52)
42 |             cat_gtrend = self.gtrends.loc[gtrend_start:start_date][cat][-52:].values[:self.trend_len]
43 |             col_gtrend = self.gtrends.loc[gtrend_start:start_date][col][-52:].values[:self.trend_len]
44 |             fab_gtrend = self.gtrends.loc[gtrend_start:start_date][fab][-52:].values[:self.trend_len]
45 | 
46 |             cat_gtrend = MinMaxScaler().fit_transform(cat_gtrend.reshape(-1,1)).flatten()
47 |             col_gtrend = MinMaxScaler().fit_transform(col_gtrend.reshape(-1,1)).flatten()
48 |             fab_gtrend = MinMaxScaler().fit_transform(fab_gtrend.reshape(-1,1)).flatten()
49 |             multitrends =  np.vstack([cat_gtrend, col_gtrend, fab_gtrend])
50 | 
51 | 
52 |             # Read images
53 |             img = Image.open(os.path.join(self.img_root, img_path)).convert('RGB')
54 | 
55 |             # Append them to the lists
56 |             gtrends.append(multitrends)
57 |             image_features.append(img_transforms(img))
58 | 
59 |         # Convert to numpy arrays
60 |         gtrends = np.array(gtrends)
61 | 
62 |         # Remove non-numerical information
63 |         data.drop(['external_code', 'season', 'release_date', 'image_path'], axis=1, inplace=True)
64 | 
65 |         # Create tensors for each part of the input/output
66 |         item_sales, temporal_features = torch.FloatTensor(data.iloc[:, :12].values), torch.FloatTensor(
67 |             data.iloc[:, 13:17].values)
68 |         categories, colors, fabrics = [self.cat_dict[val] for val in data.iloc[:].category.values], \
69 |                                        [self.col_dict[val] for val in data.iloc[:].color.values], \
70 |                                        [self.fab_dict[val] for val in data.iloc[:].fabric.values]
71 | 
72 |         
73 |         categories, colors, fabrics = torch.LongTensor(categories), torch.LongTensor(colors), torch.LongTensor(fabrics)
74 |         gtrends = torch.FloatTensor(gtrends)
75 |         images = torch.stack(image_features)
76 | 
77 |         return TensorDataset(item_sales, categories, colors, fabrics, temporal_features, gtrends, images)
78 | 
79 |     def get_loader(self, batch_size, train=True):
80 |         print('Starting dataset creation process...')
81 |         data_with_gtrends = self.preprocess_data()
82 |         data_loader = None
83 |         if train:
84 |             data_loader = DataLoader(data_with_gtrends, batch_size=batch_size, shuffle=True, num_workers=4)
85 |         else:
86 |             data_loader = DataLoader(data_with_gtrends, batch_size=1, shuffle=False, num_workers=4)
87 |         print('Done.')
88 | 
89 |         return data_loader
90 | 
91 | 


--------------------------------------------------------------------------------