├── .gitignore ├── LICENSE ├── MTL ├── models.py └── modules.py ├── README.md ├── SECURITY.md ├── __pycache__ └── tools.cpython-36.pyc └── sequence ├── models.py ├── modules.py ├── tools.py └── train.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Byte-compiled / optimized / DLL files 2 | __pycache__/ 3 | *.py[cod] 4 | *$py.class 5 | 6 | # C extensions 7 | *.so 8 | 9 | # Distribution / packaging 10 | .Python 11 | build/ 12 | develop-eggs/ 13 | dist/ 14 | downloads/ 15 | eggs/ 16 | .eggs/ 17 | lib/ 18 | lib64/ 19 | parts/ 20 | sdist/ 21 | var/ 22 | wheels/ 23 | pip-wheel-metadata/ 24 | share/python-wheels/ 25 | *.egg-info/ 26 | .installed.cfg 27 | *.egg 28 | MANIFEST 29 | 30 | # PyInstaller 31 | # Usually these files are written by a python script from a template 32 | # before PyInstaller builds the exe, so as to inject date/other infos into it. 33 | *.manifest 34 | *.spec 35 | 36 | # Installer logs 37 | pip-log.txt 38 | pip-delete-this-directory.txt 39 | 40 | # Unit test / coverage reports 41 | htmlcov/ 42 | .tox/ 43 | .nox/ 44 | .coverage 45 | .coverage.* 46 | .cache 47 | nosetests.xml 48 | coverage.xml 49 | *.cover 50 | *.py,cover 51 | .hypothesis/ 52 | .pytest_cache/ 53 | 54 | # Translations 55 | *.mo 56 | *.pot 57 | 58 | # Django stuff: 59 | *.log 60 | local_settings.py 61 | db.sqlite3 62 | db.sqlite3-journal 63 | 64 | # Flask stuff: 65 | instance/ 66 | .webassets-cache 67 | 68 | # Scrapy stuff: 69 | .scrapy 70 | 71 | # Sphinx documentation 72 | docs/_build/ 73 | 74 | # PyBuilder 75 | target/ 76 | 77 | # Jupyter Notebook 78 | .ipynb_checkpoints 79 | 80 | # IPython 81 | profile_default/ 82 | ipython_config.py 83 | 84 | # pyenv 85 | .python-version 86 | 87 | # pipenv 88 | # According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. 89 | # However, in case of collaboration, if having platform-specific dependencies or dependencies 90 | # having no cross-platform support, pipenv may install dependencies that don't work, or not 91 | # install all needed dependencies. 92 | #Pipfile.lock 93 | 94 | # PEP 582; used by e.g. github.com/David-OConnor/pyflow 95 | __pypackages__/ 96 | 97 | # Celery stuff 98 | celerybeat-schedule 99 | celerybeat.pid 100 | 101 | # SageMath parsed files 102 | *.sage.py 103 | 104 | # Environments 105 | .env 106 | .venv 107 | env/ 108 | venv/ 109 | ENV/ 110 | env.bak/ 111 | venv.bak/ 112 | 113 | # Spyder project settings 114 | .spyderproject 115 | .spyproject 116 | 117 | # Rope project settings 118 | .ropeproject 119 | 120 | # mkdocs documentation 121 | /site 122 | 123 | # mypy 124 | .mypy_cache/ 125 | .dmypy.json 126 | dmypy.json 127 | 128 | # Pyre type checker 129 | .pyre/ 130 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2023 Jiang 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /MTL/models.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on Wed Apr 5 14:05:40 2023 4 | 5 | @author: 12709 6 | """ 7 | import torch 8 | import torch.nn as nn 9 | import modules as m 10 | from sklearn.cluster import KMeans 11 | import heapq 12 | 13 | class ESMM(nn.Module): 14 | def __init__(self,user_num,item_num,hidden_size_main,hidden_size_auxiliary,embedding_size): 15 | """ 16 | ESMM input parameters 17 | :param user_num: number of users 18 | :param item_num: number of items 19 | :param hidden_size_main: hidden size in main network for cvr 20 | :param hidden_size_auxiliary: hidden size in auxiliary network for ctcvr 21 | :param embedding_size: embedding size 22 | """ 23 | super(ESMM,self).__init__() 24 | self.user_num = user_num 25 | self.item_num = item_num 26 | self.main = hidden_size_main 27 | self.aux = hidden_size_auxiliary 28 | self.embedding_size = embedding_size 29 | self.user_embedding = nn.Embedding(user_num, embedding_size) 30 | self.item_embedding = nn.Embedding(item_num, embedding_size) 31 | self.mlp_main = nn.Sequential( 32 | nn.Linear(embedding_size * 2, self.main), 33 | nn.ReLU(), 34 | nn.Linear(self.main, self.main), 35 | nn.ReLU(), 36 | nn.Linear(self.main, 1), 37 | nn.Sigmiod() 38 | ) 39 | self.mlp_aux = nn.Sequential( 40 | nn.Linear(embedding_size * 2, self.aux), 41 | nn.ReLU(), 42 | nn.Linear(self.aux, self.aux), 43 | nn.ReLU(), 44 | nn.Linear(self.aux, 1), 45 | nn.Sigmiod() 46 | ) 47 | 48 | def forward(self,user,item): 49 | user = self.user_embedding(user) 50 | item = self.item_embedding(item) 51 | vector = torch.cat([user,item],-1) 52 | cvr = self.mlp_main(vector) 53 | ctr = self.mlp_aux(vector) 54 | ctcvr = ctr * cvr 55 | return ctr, ctcvr #cvr = ctcvr/ctr 56 | 57 | 58 | class MoE(nn.Module): 59 | def __init__(self,expert_num,hidden_size,input_size): 60 | """ 61 | MOE input parameters 62 | :param expert_num: int numbers of experts 63 | :param hidden_size: moe layer input dimension 64 | :param input_size: data embedding size 65 | """ 66 | super(MoE,self).__init__() 67 | self.expert_num = expert_num 68 | self.input_size = input_size 69 | self.hidden_size = hidden_size 70 | self.experts = nn.ModuleList([nn.Linear(input_size, hidden_size) \ 71 | for i in range(expert_num)]) 72 | self.gate = nn.Linear(input_size,expert_num) 73 | self.fc = nn.Linear(hidden_size,1) 74 | self.softmax = nn.SoftMax() 75 | self.relu = nn.ReLU() 76 | 77 | def forward(self,x): # [user_embeddng, item_embedding] 78 | expert_outputs = [] 79 | for i in range(self.expert_num): 80 | expert_outputs.append(self.relu(self.experts[i](x))) 81 | 82 | expert_output = torch.stack(expert_outputs) 83 | gate_output = self.softmax(self.gate(x), dim = 1) 84 | res = torch.zeros(self.expert_num) 85 | for i in range(self.expert_num): 86 | res += gate_output[i] * expert_output[i] 87 | 88 | res = self.fc(res) 89 | return res 90 | 91 | class MMoE(nn.Module): 92 | def __init__(self,expert_num,task_num,hidden_size,input_size): 93 | """ 94 | MMOE input parameters 95 | :param expert_num: int numbers of experts 96 | :param task_num: int numbers of tasks 97 | :param hidden_size: moe layer input dimension 98 | :param input_size: data embedding size 99 | """ 100 | super(MMoE,self).__init__() 101 | self.expert_num = expert_num 102 | self.task_num = task_num 103 | self.hidden_size = hidden_size 104 | self.input_size = input_size 105 | self.experts = nn.ModuleList([nn.Linear(input_size, hidden_size) \ 106 | for i in range(expert_num)]) 107 | self.gates = nn.ModuleList([nn.Linear(input_size, expert_num) \ 108 | for i in range(task_num)]) 109 | self.fcs = nn.ModuleList([nn.Linear(hidden_size, 1) \ 110 | for i in range(task_num)]) 111 | self.relu = nn.ReLU() 112 | self.softmax = nn.SoftMax() 113 | 114 | def forward(self, x): # [user_embeddng, item_embedding] 115 | expert_outputs = [] 116 | gate_outputs = [] 117 | for i in range(self.expert_num): 118 | expert_outputs.append(self.relu(self.experts[i](x))) 119 | 120 | for i in range(self.task_num): 121 | gate_outputs.append(self.softmax(self.gates[i](x), dim=1)) 122 | 123 | expert_output = torch.stack(expert_outputs) 124 | gate_output = torch.stack(gate_outputs) 125 | res = [] 126 | for i in range(self.task_num): 127 | tmp = torch.zeros(self.expert_num) 128 | for j in range(self.expert_num): 129 | tmp += gate_output[i][j] * expert_output[j] 130 | res.append(tmp) 131 | 132 | res = torch.stack(res) 133 | out = [] 134 | for i in range(self.task_num): 135 | out.append(self.fcs[i](res[i])) 136 | 137 | out = torch.stack(out) 138 | return out 139 | 140 | class CGC(nn.Module): 141 | def __init__(self,expert_list,expert_num,task_num,hidden_size,input_size): 142 | """ 143 | CGC input parameters 144 | :param expert_list: list of numbers of specific experts for different tasks 145 | :param expert_num: int numbers of common experts 146 | :param task_num: int numbers of taks 147 | :param hidden_size: mlp layer input dimension 148 | :param input_size: data embedding size 149 | """ 150 | super(CGC,self).__init__() 151 | self.expert_list = expert_list 152 | self.expert_num = expert_num 153 | self.task_num = task_num 154 | self.hidden_size = hidden_size 155 | self.input_size = input_size 156 | self.softmax = nn.Softmax() 157 | self.relu = nn.ReLU() 158 | self.specific_experts = [] 159 | for i in range(task_num): 160 | num = expert_list[i] 161 | lis = nn.ModuleList([nn.Linear(input_size, hidden_size) \ 162 | for j in range(num)]) 163 | self.specific_experts.append(lis) 164 | 165 | self.common_experts = nn.ModuleList([nn.Linear(input_size, hidden_size) \ 166 | for i in range(expert_num)]) 167 | self.towers = nn.ModuleList([nn.Linear(hidden_size, 1) \ 168 | for i in range(task_num)]) 169 | gates = [] 170 | for i in range(task_num): 171 | specific_num = expert_list[i] 172 | gate = nn.Sequential( 173 | nn.Linear((expert_num + specific_num) * hidden_size, expert_num + specific_num), 174 | nn.Softmax() 175 | ) 176 | gates.append(gate) 177 | 178 | self.gates = nn.ModuleList(gates) 179 | 180 | def forward(self,x): # [user_embeddng, item_embedding] 181 | common_output = [] 182 | for i in range(self.expert_num): 183 | common_output.append(self.common_experts[i](x)) 184 | 185 | common_output = torch.stack(common_output) 186 | specific_output = [] 187 | for i in range(self.task_num): 188 | cur_experts = self.specific_experts[i] 189 | tmp = [] 190 | for j in range(len(cur_experts)): 191 | tmp.append(cur_experts[j](x)) 192 | tmp = torch.stack(tmp) 193 | specific_output.append(tmp) 194 | 195 | res = [] 196 | for i in range(self.task_num): 197 | tmp = torch.cat(specific_output[i], -1) 198 | tmp = torch.cat([tmp, common_output], -1) 199 | weights = self.gates[i](torch.flatten(tmp)) 200 | feature = torch.zeros_like(common_output[0]) 201 | for j in range(self.expert_num + self.expert_list[i]): 202 | feature += weights[j] * tmp[j] 203 | 204 | feature = self.towers[i](feature) 205 | res.append(feature) 206 | 207 | res = torch.stack(res) 208 | return res 209 | 210 | 211 | class PLE(nn.Module): 212 | def __init__(self,expert_list,expert_num,task_num,hidden_size,input_size): 213 | """ 214 | PLE input parameters 215 | :param expert_list: list of numbers of specific experts for different tasks 216 | :param expert_num: int numbers of common experts 217 | :param task_num: int numbers of taks 218 | :param hidden_size: mlp layer input dimension 219 | :param input_size: data embedding size 220 | """ 221 | super(PLE,self).__init__() 222 | self.expert_list = expert_list 223 | self.expert_num = expert_num 224 | self.task_num = task_num 225 | self.hidden_size = hidden_size 226 | self.input_size = input_size 227 | self.softmax = nn.Softmax() 228 | self.relu = nn.ReLU() 229 | #===================================== feature extractor ========================================= 230 | self.specific_extractor = [] 231 | for i in range(task_num): 232 | num = expert_list[i] 233 | lis = nn.ModuleList([nn.Linear(input_size, hidden_size) \ 234 | for j in range(num)]) 235 | self.specific_extractor.append(lis) 236 | 237 | self.common_extractors = nn.ModuleList([nn.Linear(input_size, hidden_size) \ 238 | for i in range(expert_num)]) 239 | ex_gates = [] 240 | for i in range(task_num + 1):#task_gates + common_gate(there is a gate for common experts in extrator but not in final layer) 241 | if i < task_num: 242 | specific_num = expert_list[i] 243 | else: 244 | specific_num = sum(expert_list) 245 | ex_gate = nn.Sequential( 246 | nn.Linear((expert_num + specific_num) * hidden_size, expert_num + specific_num), 247 | nn.Softmax() 248 | ) 249 | ex_gates.append(ex_gate) 250 | 251 | self.ex_gates = nn.ModuleList(ex_gates) 252 | #===================================== final predictor ============================================ 253 | self.towers = nn.ModuleList([nn.Linear(hidden_size, 1) \ 254 | for i in range(task_num)]) 255 | self.specific_experts = [] 256 | for i in range(task_num): 257 | num = expert_list[i] 258 | lis = nn.ModuleList([nn.Linear(hidden_size, hidden_size) \ 259 | for j in range(num)]) 260 | self.specific_experts.append(lis) 261 | 262 | self.common_experts = nn.ModuleList([nn.Linear(hidden_size, hidden_size) \ 263 | for i in range(expert_num)]) 264 | gates = [] 265 | for i in range(task_num): 266 | specific_num = expert_list[i] 267 | gate = nn.Sequential( 268 | nn.Linear((expert_num + specific_num) * hidden_size, expert_num + specific_num), 269 | nn.Softmax() 270 | ) 271 | gates.append(gate) 272 | 273 | self.final_gates = nn.ModuleList(gates) 274 | 275 | def forward(self,x): # [user_embeddng, item_embedding] 276 | #============================= featrue extraction ============================ 277 | common_feature = [] 278 | for i in range(self.expert_num): 279 | common_feature.append(self.common_extractors[i](x)) 280 | 281 | common_feature = torch.stack(common_feature) 282 | specific_feature = [] 283 | for i in range(self.task_num): 284 | cur_extractors = self.specific_extractor[i] 285 | tmp = [] 286 | for j in range(len(cur_extractors)): 287 | tmp.append(cur_extractors[j](x)) 288 | tmp = torch.stack(tmp) 289 | specific_feature.append(tmp) 290 | 291 | features = [] 292 | for i in range(self.task_num + 1): 293 | if i < self.task_num: 294 | tmp = torch.cat(specific_feature[i], -1) 295 | tmp = torch.cat([tmp, common_feature], -1) 296 | else: 297 | tmp = torch.cat([specific_feature, common_feature], -1) 298 | weights = self.ex_gates[i](torch.flatten(tmp)) 299 | feature = torch.zeros_like(common_feature[0]) 300 | for j in range(len(weights)): 301 | feature += weights[j] * tmp[j] 302 | features.append(feature) 303 | #========================== final prediction ========================= 304 | common_output = [] 305 | for i in range(self.expert_num): 306 | common_output.append(self.common_experts[i](features[-1])) 307 | 308 | common_output = torch.stack(common_output) 309 | specific_output = [] 310 | for i in range(self.task_num): 311 | cur_experts = self.specific_experts[i] 312 | tmp = [] 313 | for j in range(len(cur_experts)): 314 | tmp.append(cur_experts[j](features[i])) 315 | tmp = torch.stack(tmp) 316 | specific_output.append(tmp) 317 | 318 | res = [] 319 | for i in range(self.task_num): 320 | tmp = torch.cat(specific_output[i], -1) 321 | tmp = torch.cat([tmp, common_output], -1) 322 | weights = self.final_gates[i](torch.flatten(tmp)) 323 | feature = torch.zeros_like(common_output[0]) 324 | for j in range(self.expert_num + self.expert_list[i]): 325 | feature += weights[j] * tmp[j] 326 | feature = self.towers[i](feature) 327 | res.append(feature) 328 | 329 | res = torch.stack(res) 330 | return res 331 | 332 | class kuaishouEBR(nn.Module): 333 | def __init__(self,k,hidden_size,input_size,task_num,seq_len,recall_num): 334 | """ 335 | kuaishouEBR input parameters 336 | :param k: the number of clusters for k-means 337 | :param hidden_size: mlp hidden_size 338 | :param input_size: data embedding size 339 | :param task_num: the number of tasks 340 | :param seq_len: the length of history sequence 341 | :param recall_num: the number of recall items 342 | """ 343 | super(kuaishouEBR,self).__init__() 344 | self.k = k 345 | self.hidden_size = hidden_size 346 | self.input_size = input_size 347 | self.task_num = task_num # the task_num seems to be equal to k? 348 | self.seq_len = seq_len 349 | self.recall_num = recall_num 350 | self.user_tower = nn.Sequential( 351 | nn.Linear((input_size * seq_len) * 2, hidden_size), 352 | nn.ReLU(), 353 | nn.Linear(hidden_size, hidden_size) 354 | ) 355 | self.item_tower = nn.Sequential( 356 | nn.Linear(input_size, hidden_size), 357 | nn.ReLU(), 358 | nn.Linear(hidden_size, hidden_size) 359 | ) 360 | self.TAL = nn.Sequential( # the sample weight from k parts 361 | nn.Linear(hidden_size * (k+1), hidden_size), 362 | nn.ReLU(), 363 | nn.Linear(hidden_size, k) 364 | ) 365 | self.k_means = KMeans(n_clusters = k, random_state=0) # This step is better to be preprocessed in dataset preprocessing. 366 | self.prompt_embedding = nn.Embedding(k, input_size) # Here I just give a instance because of complexity. 367 | 368 | def forward(self,user,item): 369 | """ 370 | :param user: user embedding, the concat of item embedding of history behavior of user 371 | :param item: list of embeddings of all items 372 | """ 373 | clusters = self.k_means.fit(item.detach().numpy()) 374 | prompts = clusters.labels_ 375 | buckets = [[] for _ in range(self.k)] 376 | prompt_table = {} #record the cluster indicatior of item 377 | for i in range(len(prompts)): #split the item pool into k parts 378 | buckets[prompts[i]].append(item[i]) 379 | prompt_table[item[i]] = prompts[i] 380 | 381 | prompted_user = [] 382 | for his_item in user: 383 | prompt = prompt_table[his_item] 384 | tmp = torch.flatten(torch.cat(user[i], self.prompt_embedding(prompt), -1)) 385 | prompted_user.append(tmp) 386 | 387 | prompted_user = torch.flatten(prompted_user) 388 | user = self.user_tower(prompted_user) 389 | k_ans = [] # kth top_k recall item 390 | for i in range(self.k): # this step can be run in parallel. 391 | heap = [] 392 | heapq.heapify(heap) 393 | for j in range(len(buckets[i])): 394 | item = buckets[i][j] 395 | item = self.item_tower(item) 396 | sim = torch.cosine_similarity(user, item) 397 | if len(heap) < self.recall_num: 398 | heapq.heappush(heap,(sim, item)) # here is better to return item_id 399 | else: 400 | heapq.heappush(heap,(sim, item)) 401 | heapq.heappop(heap) 402 | k_ans.append(heap[:,1]) 403 | 404 | res = [] 405 | for i in range(self.recall_num): 406 | kth_item = torch.cat([k_ans[j][i] for j in range(self.k)], -1) 407 | kth_inter = torch.flatten(torch.cat([user, kth_item], -1)) 408 | res.append(self.TAL(kth_inter)) 409 | 410 | res = torch.stack(res) 411 | return res 412 | 413 | 414 | class AITM(nn.Module): 415 | def __init__(self,user_num,item_num,hidden_size,task_num): 416 | """ 417 | AITM input parameters 418 | :param user_num: int number of users 419 | :param item_num: int number of items 420 | :param hidden_size: embedding_size 421 | :param task_num: int number of tasks 422 | """ 423 | super(AITM,self).__init__() 424 | self.user_num = user_num 425 | self.item_num = item_num 426 | self.task_num = task_num 427 | self.hidden_size = hidden_size 428 | self.user_embedding = nn.Embedding(user_num, hidden_size) 429 | self.item_embedding = nn.Embedding(item_num, hidden_size) 430 | self.towers = nn.ModuleList([nn.Linear(hidden_size * 2, hidden_size) \ 431 | for i in range(task_num)]) 432 | self.ait = m.AIT(hidden_size, hidden_size) 433 | self.predictor = nn.ModuleList([nn.Linear(hidden_size, 1) \ 434 | for i in range(task_num)]) 435 | 436 | def forward(self, user, item): 437 | user = self.user_embedding(user) 438 | item = self.item_embedding(item) 439 | data = torch.cat([torch.flatten(user), torch.flatten(item)], -1) 440 | ans = [] 441 | for i in range(self.task_num): 442 | q = self.towers[i](data) 443 | if i == 0: 444 | p = q 445 | z, p = self.ait(p, q) 446 | res = self.predictor[i](z) 447 | ans.append(res) 448 | 449 | ans = torch.stack(ans) 450 | return ans 451 | 452 | 453 | 454 | 455 | 456 | 457 | 458 | 459 | 460 | 461 | 462 | 463 | 464 | 465 | -------------------------------------------------------------------------------- /MTL/modules.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on Sun Mar 19 18:39:38 2023 4 | 5 | @author: Jayus 6 | """ 7 | import torch 8 | from torch import nn 9 | import numpy as np 10 | 11 | class AIT(nn.Module): #adaptive information transfer module for AITM 12 | def __init__(self, input_size, hidden_size): 13 | """ 14 | AIT input parameters 15 | :param hidden_size: hidden_size 16 | :param input_size: input_size of [p_{t-1},qt] 17 | """ 18 | super(AIT, self).__init__() 19 | self.input_size = input_size 20 | self.hidden_size = hidden_size 21 | self.h1 = nn.Sequential( 22 | nn.Linear(input_size, hidden_size), 23 | nn.ReLU(), 24 | nn.Linear(hidden_size, hidden_size) 25 | ) 26 | self.h2 = nn.Sequential( 27 | nn.Linear(input_size, hidden_size), 28 | nn.ReLU(), 29 | nn.Linear(hidden_size, hidden_size) 30 | ) 31 | self.h3 = nn.Sequential( 32 | nn.Linear(input_size, hidden_size), 33 | nn.ReLU(), 34 | nn.Linear(hidden_size, hidden_size) 35 | ) 36 | self.transfer_unit = nn.Sequential( 37 | nn.Linear(input_size, hidden_size), 38 | nn.ReLU(), 39 | nn.Linear(hidden_size, hidden_size) 40 | ) 41 | 42 | def forward(self, p, q): 43 | p1 = self.h1(p) 44 | p2 = self.h2(p) 45 | p3 = self.h3(p) 46 | q1 = self.h1(q) 47 | q2 = self.h2(q) 48 | q3 = self.h3(q) 49 | wp = torch.dot(p2,p3) / torch.sqrt(p2.shape[-1]) 50 | wq = torch.dot(q2,q3) / torch.sqrt(q2.shape[-1]) 51 | w = wp + wq 52 | wp, wq = wp / w, wq / w 53 | z = wp * p1 + wq * q1 #current info 54 | p = self.transfer_unit(z) #transfer info to next task 55 | return z, p -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | RecSystem-Pytorch 2 | ======== 3 | ![](https://img.shields.io/github/stars/i-Jayus/RecSystem-Pytorch.svg?style=social) ![](https://img.shields.io/github/forks/i-Jayus/RecSystem-Pytorch.svg?style=social) 4 | ![](https://img.shields.io/github/watchers/i-Jayus/RecSystem-Pytorch.svg?style=social) ![](https://img.shields.io/github/issues/i-Jayus/RecSystem-Pytorch.svg?style=social) 5 | ![](https://img.shields.io/badge/language-Python-blue) ![](https://img.shields.io/badge/framework-Pytorch-blue) ![](https://img.shields.io/badge/domain-Cold,MTL,Sequence-red) ![](https://img.shields.io/badge/test-passed-green) ![](https://img.shields.io/badge/license-MIT-green) ![](https://img.shields.io/badge/README-√-green) 6 | ======== 7 | 8 | 写在前面/preface 9 | ======== 10 | 目前已经有许多推荐系统开源库,但是实现的模型大多比较经典和古老。因此本人决定把一些比较新的,有代表性的工作进行复现,记录自己学习的过程并且分享给大家。 11 | 如果有不足之处非常希望大家可以给予指点。 12 | 13 | Currently, there are many open-source libraries for recommendation systems, but most of the implemented models are relatively classic and old. Therefore, I decided to reproduce some of the more recent and representative works, record my learning process and share it with everyone. If there are any shortcomings, I would really appreciate your guidance. 14 | 15 | 模型列表/model list 16 | ======== 17 | 1 冷启动/cold start 18 | -- 19 | ❄ DropoutNet: coming soon... 20 | 21 | 2 多任务学习/multi-task models 22 | -- 23 | 🤔 ESMM: https://arxiv.org/pdf/1804.07931.pdf 24 | 25 | ESMM模型是一种多任务学习的方法,用于预测点击后的转化率。它同时学习两个任务:点击率和点击后转化率,并利用它们的乘积关系来隐式地学习转化率,解决了样本选择偏差和数据稀疏问题。 26 | 27 | ESMM model is a multi-task learning method for predicting post-click conversion rate. It simultaneously learns two tasks: click-through rate and post-click conversion rate, and uses their product relationship to implicitly learn conversion rate, solving the problems of sample selection bias and data sparsity. 28 | 29 | 🤔 MoE: https://dl.acm.org/doi/pdf/10.1145/3219819.3220007 30 | 31 | MoE是由Google的研究人员提出的多任务学习模型,模型由多个专家网络和一个门控器组成。最后,所有专家的输出被加权求和,以生成最终输出。 32 | 33 | MoE is a multi-task learning model proposed by Google researchers. The model consists of multiple expert networks and one gate. Finally, the outputs of all experts are weighted and summed to generate the final output. 34 | 35 | 🤔 MMoE: https://dl.acm.org/doi/pdf/10.1145/3219819.3220007 36 | 37 | MMoE是由Google的研究人员提出的多任务学习模型,模型由多个专家网络和多个门控器组成。最后,所有专家的输出被加权求和,以生成最终输出。 38 | 39 | MMoE is a multi-task learning model proposed by Google researchers. The model consists of multiple expert networks and several gates. Finally, the outputs of all experts are weighted and summed to generate the final output. 40 | 41 | 🤔 CGC: https://dl.acm.org/doi/pdf/10.1145/3383313.3412236 42 | 43 | CGC是腾讯提出的多任务学习模块,旨在解决跷跷板问题(负迁移问题)。通过为不同任务引入独立的专家网络解耦学习目标。 44 | 45 | CGC is a multi-task learning module proposed by Tencent, aiming to solve the seesaw problem (negative transfer problem). It decouples the learning objectives by introducing independent expert networks for different tasks. 46 | 47 | 🤔 PLE: https://dl.acm.org/doi/pdf/10.1145/3383313.3412236 48 | 49 | PLE是腾讯提出的多任务学习模型,旨在解决跷跷板问题(负迁移问题)。通过为不同任务引入独立的专家网络解耦学习目标。它可以被看做是堆叠了多层CGC模块渐进式分层抽取学习模型。 50 | 51 | PLE is a multi-task learning model proposed by Tencent, aiming to solve the seesaw problem (negative transfer problem). It decouples the learning objectives by introducing independent expert networks for different tasks. Moreover, it can be considered as a model stacking multiple CGC modules to progressively extract features. 52 | 53 | 🤔 Kuaishou-EBR: https://arxiv.org/pdf/2302.02657.pdf 54 | 55 | 快手在WWW2023最新提出的算法。文章从多任务学习的角度提出了embedding-based搜索召回的优化方案。该方法利用分而治之的思想提高EBR召回结果的多样性,新颖性等多个目标。 56 | 57 | The latest algorithm proposed by Kuaishou at WWW2023. The paper proposes an optimization scheme for embedding-based retrieval recall from the perspective of multi-task learning. The method uses the divide-and-conquer idea to improve the diversity, novelty and other objectives of EBR recall results. 58 | 59 | 🤔 AITM: https://arxiv.org/pdf/2105.08489.pdf 60 | 61 | AITM是美团发表在KDD2021的多任务学习算法。文章提出多个任务目标之间有先后的转化关系(曝光-点击-加购-付款),该模型使用自适应信息传递模块模拟多步转化过程中的顺序依赖关系,可以根据不同转化阶段自适应地学习要传递的信息和传递的程度。 62 | 63 | AITM is a multi-task learning algorithm published by Meituan at KDD2021. The paper proposes that there is a sequential transformation relationship between multiple task objectives (exposure-click-add to cart-payment), and the model uses an adaptive information transformation module to simulate the sequential dependency relationship in the multi-step transformation process, which can adaptively learn the information and degree of transmission according to different stages. 64 | 65 | 3 序列模型/sequence models 66 | -- 67 | ♥ STAMP: 68 | 69 | coming soon... 70 | 71 | ♥ base model for DIN: https://arxiv.org/abs/1706.06978 72 | 73 | DIN的base模型,对用户历史兴趣建模采用了简单的求和操作,没有考虑兴趣之间的关系。 74 | 75 | DIN’s base model uses a simple summation operation to user’s historical interests without the relationship between interests. 76 | 77 | ♥ DIN: https://arxiv.org/abs/1706.06978 78 | 79 | DIN模型是阿里妈妈团队提出的CTR预估模型,它是一种基于注意力机制的深度兴趣网络模型,用于对用户行为序列数据建模。DIN模型通过引入注意力机制,将用户历史行为序列中的每个行为与候选广告进行交互,从而学习到用户的兴趣偏好,并预测用户是否会点击该广告。 80 | 81 | DIN (Deep Interest Network for Click-Through Rate Prediction) model is a CTR prediction model proposed by the Alibaba Mama team. It is a deep interest network model based on attention mechanism used to model user behavior sequence data. The DIN model interacts each behavior in the user’s historical behavior sequence with the candidate advertisement by introducing attention mechanism, thus learning the user’s interest preference and predicting whether the user will click the advertisement. 82 | 83 | ♥ DIEN: https://arxiv.org/pdf/1809.03672.pdf 84 | 85 | ♥ SIM: https://arxiv.org/pdf/2006.05639.pdf 86 | 87 | SIM模型是一种基于检索的CTR模型,由阿里妈妈提出。优点是可以处理长序列用户行为,同时具有较高的预测准确率和较低的计算复杂度。 88 | 89 | SIM model is a retrieval-based CTR model proposed by Alibaba Mama team. Its advantage is that it can handle long sequence user behaviors while having high prediction accuracy and low computational complexity. 90 | 91 | ♥ MIMN: coming soon... 92 | 93 | 94 | ♥ ETA: https://arxiv.org/pdf/2108.04468.pdf 95 | 96 | ETA模型是SIM模型的改进版,使用局部敏感哈希加速商品查找,适用于超长序列建模,同时具有较高的预测准确率和较低的计算复杂度。 97 | 98 | ETA model is an improved version of SIM, using locality-sensitive hashing to speed up item lookup, suitable for ultra-long sequence modeling, and has high prediction accuracy and low computational complexity. 99 | 100 | ♥ TiCoSeRec: https://arxiv.org/pdf/2212.08262.pdf 101 | 102 | TiCoSeRec是基于CoSeRec算法的,由阿里巴巴和东北大学提出。文章提出了五种不同的数据增强算法,提升序列模型推荐效果。因此,本仓库只实现数据增强算法而不给出具体推荐算法实现。 103 | 104 | TiCoSeRec, based on CoSeRec, is proposed by Alibaba and Northeast University. It presents five data argumentation algorithm to improve the performance of sequence recommender. Hence, here I just give the code of data argumentation instead of recommender. 105 | 106 | 文件结构/document structure 107 | ======== 108 | MTL: 多任务学习文件夹/multi-task 109 | 110 | sequence:序列推荐文件夹/sequential recommender 111 | 112 | cold:冷启动文件夹/cold start 113 | 114 | 快速开始/quick start 115 | ======== 116 | pending... 117 | 118 | 致谢/acknowledgement 119 | ======== 120 | 感谢所有对此项目有过帮助的人! Thank you to everyone who has contributed to this project! 121 | 122 | -------------------------------------------------------------------------------- /SECURITY.md: -------------------------------------------------------------------------------- 1 | # Security Policy 2 | 3 | ## Supported Versions 4 | 5 | Use this section to tell people about which versions of your project are 6 | currently being supported with security updates. 7 | 8 | | Version | Supported | 9 | | ------- | ------------------ | 10 | | 5.1.x | :white_check_mark: | 11 | | 5.0.x | :x: | 12 | | 4.0.x | :white_check_mark: | 13 | | < 4.0 | :x: | 14 | 15 | ## Reporting a Vulnerability 16 | 17 | Use this section to tell people how to report a vulnerability. 18 | 19 | Tell them where to go, how often they can expect to get an update on a 20 | reported vulnerability, what to expect if the vulnerability is accepted or 21 | declined, etc. 22 | -------------------------------------------------------------------------------- /__pycache__/tools.cpython-36.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/i-Jayus/RecSystem-Pytorch/18623066b2b63eeafa040340db05f81040b54448/__pycache__/tools.cpython-36.pyc -------------------------------------------------------------------------------- /sequence/models.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on Sun Mar 19 18:39:38 2023 4 | 5 | @author: Jayus 6 | """ 7 | import torch 8 | from torch import nn 9 | import modules as m 10 | import numpy as np 11 | import heapq 12 | import tools as t 13 | 14 | class STAMP(nn.Module): 15 | def __init__(self,user_num,item_num,cate_num,hidden_size=64): 16 | super(STAMP, self).__init__() 17 | 18 | 19 | class base_model(nn.Module): 20 | def __init__(self,user_num,item_num,cate_num,hidden_size=64): 21 | """ 22 | base model input parameters 23 | :param user_num: int numbers of users 24 | :param item_num: int numbers of items 25 | :param cate_num: int numbers of categories 26 | :param hidden_size: embedding_size 27 | """ 28 | super(base_model, self).__init__() 29 | self.user_num = user_num 30 | self.item_num = item_num 31 | self.cate_num = cate_num 32 | self.u_emb = nn.Embedding(user_num, hidden_size) 33 | self.i_emb = nn.Embedding(item_num, hidden_size) 34 | self.c_emb = nn.Embedding(cate_num, hidden_size) 35 | self.linear = nn.Sequential( 36 | nn.Linear(hidden_size * 4, 80), 37 | m.Dice(80), 38 | nn.Linear(80, 40), 39 | m.Dice(40), 40 | nn.Linear(40, 2) 41 | ) 42 | 43 | def forward(self,user,hist,item,cate): 44 | """ 45 | :param user: user id 46 | :param hist: list of history behaviors of user 47 | :param item: item id 48 | :param cate: category id of item 49 | """ 50 | user = self.u_emb(user).squeeze() 51 | item = self.i_emb(item).squeeze() 52 | cate = self.c_emb(cate).squeeze() 53 | h = [] 54 | for i in range(len(hist)): 55 | h.append(self.i_emb(hist[i]).squeeze().detach().numpy()) 56 | 57 | h = torch.tensor(np.array(h),dtype = torch.float32) 58 | cur = torch.zeros_like(h[0]) 59 | for i in range(len(h)): 60 | cur += h[i] 61 | 62 | res = torch.cat([user,item,cate,cur],-1) 63 | res = self.linear(res) 64 | return res 65 | 66 | 67 | class DIN(nn.Module): 68 | def __init__(self,user_num,item_num,cate_num,hidden_size=64): 69 | """ 70 | DIN input parameters 71 | :param user_num: int numbers of users 72 | :param item_num: int numbers of items 73 | :param cate_num: int numbers of categories 74 | :param hidden_size: embedding_size 75 | """ 76 | super(DIN, self).__init__() 77 | self.user_num = user_num 78 | self.item_num = item_num 79 | self.cate_num = cate_num 80 | self.u_emb = nn.Embedding(user_num, hidden_size) 81 | self.i_emb = nn.Embedding(item_num, hidden_size) 82 | self.c_emb = nn.Embedding(cate_num, hidden_size) 83 | self.linear = nn.Sequential( 84 | nn.Linear(hidden_size * 4, 80), 85 | m.Dice(80), 86 | nn.Linear(80, 40), 87 | m.Dice(40), 88 | nn.Linear(40, 2) 89 | ) 90 | self.au = m.ActivationUnit(hidden_size) 91 | 92 | def forward(self,user,hist,item,cate): 93 | """ 94 | :param user: user id 95 | :param hist: list of history behaviors of user 96 | :param item: item id 97 | :param cate: category id of item 98 | """ 99 | user = self.u_emb(user).squeeze() 100 | item = self.i_emb(item).squeeze() 101 | cate = self.c_emb(cate).squeeze() 102 | h = [] 103 | weights = [] 104 | for i in range(len(hist)): 105 | hist_i = self.i_emb(hist[i]) 106 | h.append(hist_i.squeeze().detach().numpy()) 107 | weight = self.au(hist_i,item) 108 | weights.append(weight) 109 | 110 | cur = torch.zeros_like(h[0]) 111 | for i in range(len(h)): 112 | cur += torch.tensor(weights[i] * h[i], dtype=torch.float32) 113 | 114 | res = torch.cat([user,item,cate,cur],-1) 115 | res = self.linear(res) 116 | return res 117 | 118 | ''' 119 | coming soon------------------ 120 | 121 | 122 | class DIEN(nn.Module): 123 | def __init__(self,user_num,item_num,cate_num,embedding_dim=32,hidden_dim=64): 124 | """ 125 | DIEN input parameters 126 | :param user_num: int numbers of users 127 | :param item_num: int numbers of items 128 | :param cate_num: int numbers of categories 129 | :param embedding_dim: embedding size 130 | :param hidden_dim: input dim for interest extractor 131 | """ 132 | super(DIEN, self).__init__() 133 | self.user_num = user_num 134 | self.item_num = item_num 135 | self.cate_num = cate_num 136 | self.embedding_dim = embedding_dim 137 | self.hidden_dim = hidden_dim 138 | self.user_embedding = nn.Embedding(user_num, embedding_dim) 139 | self.item_embedding = nn.Embedding(item_num, embedding_dim) 140 | self.cate_embedding = nn.Embedding(cate_num, embedding_dim) 141 | self.gru = nn.GRU(embedding_dim, hidden_dim) 142 | self.fc1 = nn.Linear(hidden_dim + embedding_dim * 2, hidden_dim) 143 | self.fc2 = nn.Linear(hidden_dim, 2) 144 | self.linear = nn.Sequential( 145 | nn.Linear(hidden_dim * 4, hidden_dim), 146 | m.Dice(hidden_dim), 147 | nn.Linear(hidden_dim, 2) 148 | ) 149 | 150 | def forward(self, uid, iid_his, cid_his): 151 | uid_emb = self.user_embedding(uid) 152 | iid_his_emb = self.item_embedding(iid_his) 153 | cid_his_emb = self.cate_embedding(cid_his) 154 | 155 | his_emb = torch.cat([iid_his_emb, cid_his_emb], dim=-1) 156 | his_emb = his_emb.permute(1, 0, 2) 157 | 158 | _, hidden = self.gru(his_emb) 159 | 160 | x = torch.cat([uid_emb.squeeze(0), his_emb[-1]], dim=-1) 161 | x = F.relu(self.fc1(x)) 162 | x = F.sigmoid(self.fc2(x)) 163 | 164 | return x 165 | ''' 166 | 167 | class SIM(nn.Module): 168 | def __init__(self,user_num,item_num,cate_num,time_span,hidden_size=64,mode='hard',thre=0.8): 169 | """ 170 | SIM input parameters 171 | :param user_num: int numbers of users 172 | :param item_num: int numbers of items 173 | :param cate_num: int numbers of categories 174 | :param time_span: time stamps 175 | :param hidden_size: embedding_size 176 | :param mode: sequence cutting strategy 177 | :param thre: threshold for soft strategy for sequence cutting 178 | """ 179 | super(SIM,self).__init__() 180 | self.user_num = user_num 181 | self.item_num = item_num 182 | self.cate_num = cate_num 183 | self.time_span = time_span 184 | self.user_embedding = nn.Embedding(user_num,hidden_size) 185 | self.item_embedding = nn.Embedding(item_num,hidden_size) 186 | self.cate_embedding = nn.Embedding(cate_num,hidden_size) 187 | self.time_embedding = nn.Embedding(time_span,hidden_size) 188 | self.mode = mode 189 | self.thre = thre 190 | self.linear = nn.Sequential( 191 | nn.Linear(hidden_size * 6, 80), 192 | m.Dice(80), 193 | nn.Linear(80, 40), 194 | m.Dice(40), 195 | nn.Linear(40, 2) 196 | ) 197 | self.au = m.ActivationUnit(hidden_size * 2) 198 | 199 | def forward(self,user,hist,item,cate,time): #hist: [item,cate,time] 200 | """ 201 | :param user: user id 202 | :param hist: list of history behaviors of user 203 | :param item: item id 204 | :param cate: category id of item 205 | :param time: current time stamp 206 | """ 207 | user = self.user_embedding(user).squeeze() 208 | item = self.item_embedding(item).squeeze() 209 | cate = self.cate_embedding(cate).squeeze() 210 | time = self.time_embedding(time).squeeze() 211 | item = torch.cat([item,time],-1) 212 | h = [] 213 | for i in range(len(hist)): 214 | cate_i = self.cate_embedding(hist[i][1]).squeeze() 215 | if self.mode == 'hard' and cate_i == cate: 216 | hist_i = self.item_embedding(hist[i][0]) 217 | time_i = self.time_embedding(hist[i][2]) 218 | h.append(torch.cat([hist_i.squeeze().detach().numpy(),\ 219 | time_i.squeeze().detach().numpy()],-1)) 220 | elif self.mode == 'soft': 221 | hist_i = self.item_embedding(hist[i][0]) 222 | time_i = self.time_embedding(hist[i][2]) 223 | h_i = torch.cat([hist_i.squeeze().detach().numpy(),\ 224 | time_i.squeeze().detach().numpy()],-1) 225 | sim = torch.cosine_similarity(item, h_i, dim=0) 226 | if sim >= self.thre: 227 | h.append(h_i) 228 | else: 229 | print('you can just choose "soft" or "hard" mode for SIM') 230 | return 231 | 232 | h = torch.tensor(np.array(h),dtype = torch.float32) 233 | weights = [] 234 | for i in range(len(h)): 235 | weight = self.au(h[i],item) 236 | weights.append(weight) 237 | 238 | cur = torch.zeros_like(h[0]) 239 | for i in range(len(h)): 240 | cur += torch.tensor(weights[i] * h[i], dtype=torch.float32) 241 | 242 | res = torch.cat([user,item,cate,cur],-1) 243 | res = self.linear(res) 244 | return res 245 | 246 | 247 | class ETA(nn.Module): 248 | def __init__(self,user_num,item_num,hash_size,hidden_size = 64,seq_len = 100): 249 | """ 250 | ETA input parameters 251 | :param user_num: int numbers of users 252 | :param item_num: int numbers of items 253 | :param hidden_size: embedding_size 254 | :param hash_size: the dimension of hashed vector 255 | :param seq_len: length of sub-sequence 256 | """ 257 | super(ETA,self).__init__() 258 | self.user_num = user_num 259 | self.item_num = item_num 260 | self.hidden_size = hidden_size 261 | self.seq_len = seq_len 262 | self.user_embedding = nn.Embedding(user_num, hidden_size) 263 | self.item_embedding = nn.Embedding(item_num, hidden_size) 264 | self.linear = nn.Sequential( 265 | nn.Linear(hidden_size * 4, 80), 266 | m.Dice(80), 267 | nn.Linear(80, 40), 268 | m.Dice(40), 269 | nn.Linear(40, 2) 270 | ) 271 | self.au = m.ActivationUnit(hidden_size) 272 | self.hashing = nn.Linear(hidden_size,hash_size) 273 | 274 | def forward(self,user,item,long_term,short_term): 275 | """ 276 | :param user: user id 277 | :param item: item id 278 | :param long_term: long-term behavior sequence 279 | :param short_term: short-term behavior sequence 280 | """ 281 | user = torch.flatten(self.user_embedding(user)) 282 | item = torch.flatten(self.item_embedding(item)) 283 | hashed_item = self.hashing(item) 284 | long_item = [] 285 | for i in range(len(long_term)): 286 | long_item.append(torch.flatten(self.item_embedding(long_term[i]))) 287 | 288 | short_item = [] 289 | for i in range(len(short_term)): 290 | short_item.append(torch.flatten(self.item_embedding(short_term[i]))) 291 | 292 | long_item = torch.stack(long_item) 293 | short_item = torch.stack(short_item) 294 | heap = [] 295 | heapq.heapify(heap) 296 | for i in range(len(long_item)): 297 | cur_item = self.hashing(long_item[i]) 298 | hashed_item, cur_item = torch.relu(torch.sign(hashed_item)), torch.relu(torch.sign(cur_item)) 299 | sim = t.Hamming_distance_list(hashed_item, cur_item) 300 | if len(heap) < self.seq_len: 301 | heapq.heappush(heap, (sim, long_item[i])) 302 | else: 303 | heapq.heappush(heap, (sim, long_item[i])) 304 | heapq.heappop(heap) 305 | 306 | topK = heap[:,1] 307 | weights = [] 308 | for i in range(len(topK)): 309 | weight = self.au(topK[i],item) 310 | weights.append(weight) 311 | 312 | long = torch.zeros_like(topK[0]) 313 | for i in range(len(topK)): 314 | long += torch.tensor(weights[i] * topK[i], dtype=torch.float32) 315 | 316 | weights = [] 317 | for i in range(len(short_item)): 318 | weight = self.au(short_item[i],item) 319 | weights.append(weight) 320 | 321 | short = torch.zeros_like(short_item[0]) 322 | for i in range(len(short_item)): 323 | short += torch.tensor(weights[i] * short_item[i], dtype=torch.float32) 324 | 325 | res = torch.cat([user,item,long,short],-1) 326 | res = self.linear(res) 327 | return res 328 | 329 | 330 | 331 | 332 | 333 | 334 | 335 | 336 | 337 | 338 | 339 | 340 | 341 | -------------------------------------------------------------------------------- /sequence/modules.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on Sun Mar 19 18:40:11 2023 4 | 5 | @author: Jayus 6 | """ 7 | import torch 8 | from torch import nn 9 | import tools as t 10 | import random 11 | import numpy as np 12 | import copy 13 | 14 | class PRelu(nn.Module): 15 | def __init__(self,size): 16 | super(PRelu,self).__init__() 17 | self.name = 'Prelu' 18 | self.alpha = torch.zeros((size,)) 19 | self.relu = nn.Relu() 20 | 21 | def forward(self,x): 22 | pos = self.relu(x) #only for positive part 23 | neg = self.alpha * (x - abs(x)) * 0.5 #only for negetive part 24 | return pos + neg 25 | 26 | 27 | class Dice(nn.Module): 28 | def __init__(self,emb_size,eps=1e-8,dim=3): 29 | super(Dice,self).__init__() 30 | self.name = 'dice' 31 | self.dim = dim 32 | t.dimJudge(dim,2,3) 33 | self.bn = nn.BatchNorm1d(emb_size,eps=eps) 34 | self.sig = nn.Sigmoid() 35 | if dim == 2: #[B,C] 36 | self.alpha = torch.zeros((emb_size,)) 37 | self.beta = torch.zeros((emb_size,)) 38 | elif dim == 3: #[B,C,E] 39 | self.alpha = torch.zeros((emb_size,1)) 40 | self.beta = torch.zeros((emb_size,1)) 41 | 42 | def forward(self,x): 43 | if self.dim == 2: 44 | x_n = self.sig(self.beta * self.bn(x)) 45 | return self.alpha * (1-x_n) * x + x_n * x 46 | elif self.dim == 3: 47 | x = torch.transpose(x,1,2) 48 | x_n = self.sig(self.beta * self.bn(x)) 49 | output = self.alpha * (1-x_n) * x + x_n * x 50 | output = torch.transpose(output,1,2) 51 | return output 52 | 53 | 54 | class Attention(nn.Module): 55 | def __init__(self,inSize,outSize): 56 | super(Attention,self).__init__() 57 | self.name = 'attention' 58 | self.wq = nn.Linear(inSize,outSize) 59 | self.wk = nn.Linear(inSize,outSize) 60 | self.wv = nn.Linear(inSize,outSize) 61 | self.sig = nn.Sigmoid() 62 | 63 | def forward(self,q,x): 64 | Q = q * self.wq 65 | K = x * self.wk 66 | V = x * self.wv 67 | X = Q * K.T 68 | attentionScore = self.sig(X/torch.sqrt(X.dim)) 69 | return attentionScore * V 70 | 71 | class ActivationUnit(nn.Module): 72 | def __init__(self,inSize,af='dice',hidden_size=36): 73 | super(ActivationUnit,self).__init__() 74 | self.name = 'activation_unit' 75 | self.linear1 = nn.Linear(inSize,hidden_size) 76 | self.linear2 = nn.Linear(hidden_size,1) 77 | if af == 'dice': 78 | self.af = Dice(hidden_size,dim=2) 79 | elif af == 'prelu': 80 | self.af = PRelu() 81 | else: 82 | print('only dice and prelu can be chosen for activation function') 83 | 84 | def forward(self,item1,item2): #[B,C] 85 | cross = torch.mm(item1,item2.T) 86 | x = torch.cat([item1,cross,item2],-1) #[B,B+2*C] 87 | x = self.linear1(x) 88 | x = self.af(x) 89 | x = self.linear2(x) 90 | return x 91 | 92 | 93 | def _ensmeble_sim_models(top_k_one, top_k_two): 94 | # only support top k = 1 case so far 95 | if top_k_one[0][1] >= top_k_two[0][1]: 96 | return [top_k_one[0][0]] 97 | else: 98 | return [top_k_two[0][0]] 99 | 100 | def get_var(tlist): 101 | length = len(tlist) 102 | total = 0 103 | diffs = [] 104 | 105 | if length == 1: 106 | return 0 107 | 108 | for i in range(length - 1): 109 | diff = abs(tlist[i + 1] - tlist[i]) 110 | diffs.append(diff) 111 | total = total + diff 112 | avg_diff = total / len(diffs) 113 | 114 | total = 0 115 | for diff in diffs: 116 | total = total + (diff - avg_diff) ** 2 117 | result = total / len(diffs) 118 | 119 | return result 120 | 121 | class Insert(object): 122 | """ 123 | Insert similar items every time call. 124 | Priority is given to places with large time intervals. 125 | maximum: Insert at larger time intervals 126 | minimum: Insert at smaller time intervals 127 | """ 128 | 129 | def __init__(self, item_similarity_model, mode, insert_rate=0.4, max_insert_num_per_pos=1): 130 | if type(item_similarity_model) is list: 131 | self.item_sim_model_1 = item_similarity_model[0] 132 | self.item_sim_model_2 = item_similarity_model[1] 133 | self.ensemble = True 134 | else: 135 | self.item_similarity_model = item_similarity_model 136 | self.ensemble = False 137 | self.mode = mode 138 | self.insert_rate = insert_rate 139 | self.max_insert_num_per_pos = max_insert_num_per_pos 140 | 141 | def __call__(self, item_sequence, time_sequence): 142 | # make a deep copy to avoid original sequence be modified 143 | copied_sequence = copy.deepcopy(item_sequence) 144 | insert_nums = max(int(self.insert_rate * len(copied_sequence)), 1) 145 | 146 | time_diffs = [] 147 | length = len(time_sequence) 148 | for i in range(length - 1): 149 | diff = abs(time_sequence[i + 1] - time_sequence[i]) 150 | time_diffs.append(diff) 151 | assert self.mode in ['maximum', 'minimum'] 152 | if self.mode == 'maximum': 153 | """ 154 | First sort from large to small, and then return the original index value by sorting. 155 | The larger the value, the smaller the index value 156 | """ 157 | diff_sorted = np.argsort(time_diffs)[::-1] 158 | if self.mode == 'minimum': 159 | """ 160 | First sort from small to large, and then return the original index value by sorting. 161 | The larger the value, the larger the index. 162 | """ 163 | diff_sorted = np.argsort(time_diffs) 164 | diff_sorted = diff_sorted.tolist() 165 | insert_idx = [] 166 | for i in range(insert_nums): 167 | temp = diff_sorted[i] 168 | insert_idx.append(temp) 169 | 170 | """ 171 | The index of time_diff is 1 smaller than the item. 172 | The item should be inserted to the right of item_index. 173 | Put the original item first in each cycle, so that the inserted item is inserted to the right of the original item 174 | """ 175 | inserted_sequence = [] 176 | for index, item in enumerate(copied_sequence): 177 | 178 | inserted_sequence += [item] 179 | 180 | if index in insert_idx: 181 | top_k = random.randint(1, max(1, int(self.max_insert_num_per_pos / insert_nums))) 182 | if self.ensemble: 183 | top_k_one = self.item_sim_model_1.most_similar(item, top_k=top_k, with_score=True) 184 | top_k_two = self.item_sim_model_2.most_similar(item, top_k=top_k, with_score=True) 185 | inserted_sequence += _ensmeble_sim_models(top_k_one, top_k_two) 186 | else: 187 | inserted_sequence += self.item_similarity_model.most_similar(item, top_k=top_k) 188 | 189 | return inserted_sequence 190 | 191 | 192 | class Substitute(object): 193 | """ 194 | Substitute with similar items 195 | maximum: Substitute items with larger time interval 196 | minimum: Substitute items with smaller time interval 197 | """ 198 | 199 | def __init__(self, item_similarity_model, mode, substitute_rate=0.1): 200 | if type(item_similarity_model) is list: 201 | self.item_sim_model_1 = item_similarity_model[0] 202 | self.item_sim_model_2 = item_similarity_model[1] 203 | self.ensemble = True 204 | else: 205 | self.item_similarity_model = item_similarity_model 206 | self.ensemble = False 207 | self.substitute_rate = substitute_rate 208 | self.mode = mode 209 | 210 | def __call__(self, item_sequence, time_sequence): 211 | # make a deep copy to avoid original sequence be modified 212 | copied_sequence = copy.deepcopy(item_sequence) 213 | if len(copied_sequence) <= 1: 214 | return copied_sequence 215 | substitute_nums = max(int(self.substitute_rate * len(copied_sequence)), 1) 216 | 217 | time_diffs = [] 218 | length = len(time_sequence) 219 | for i in range(length - 1): 220 | diff = abs(time_sequence[i + 1] - time_sequence[i]) 221 | time_diffs.append(diff) 222 | 223 | diff_sorted = [] 224 | assert self.mode in ['maximum', 'minimum'] 225 | if self.mode == 'maximum': 226 | """ 227 | First sort from large to small, and then return the original index value by sorting. 228 | The larger the value, the smaller the index value 229 | """ 230 | diff_sorted = np.argsort(time_diffs)[::-1] 231 | if self.mode == 'minimum': 232 | """ 233 | First sort from small to large, and then return the original index value by sorting. 234 | The larger the value, the larger the index. 235 | """ 236 | diff_sorted = np.argsort(time_diffs) 237 | diff_sorted = diff_sorted.tolist() 238 | substitute_idx = [] 239 | for i in range(substitute_nums): 240 | temp = diff_sorted[i] 241 | substitute_idx.append(temp) 242 | 243 | for index in substitute_idx: 244 | if self.ensemble: 245 | top_k_one = self.item_sim_model_1.most_similar(copied_sequence[index], with_score=True) 246 | top_k_two = self.item_sim_model_2.most_similar(copied_sequence[index], with_score=True) 247 | substitute_items = _ensmeble_sim_models(top_k_one, top_k_two) 248 | copied_sequence[index] = substitute_items[0] 249 | else: 250 | copied_sequence[index] = copied_sequence[index] = \ 251 | self.item_similarity_model.most_similar(copied_sequence[index])[0] 252 | return copied_sequence 253 | 254 | 255 | class Crop(object): 256 | """ 257 | maximum: Crop subsequences with the maximum time interval variance 258 | minimum: Crop subsequences with the minimum time interval variance 259 | """ 260 | 261 | def __init__(self, mode, tao=0.2): 262 | self.tao = tao 263 | self.mode = mode 264 | 265 | def __call__(self, item_sequence, time_sequence): 266 | # make a deep copy to avoid original sequence be modified 267 | copied_sequence = copy.deepcopy(item_sequence) 268 | sub_seq_length = int(self.tao * len(copied_sequence)) 269 | # randint generate int x in range: a <= x <= b 270 | start_index = random.randint(0, len(copied_sequence) - sub_seq_length - 1) 271 | if sub_seq_length <= 2: 272 | return [copied_sequence[start_index]] 273 | 274 | cropped_vars = [] 275 | crop_index = [] 276 | for i in range(len(item_sequence)): 277 | if len(item_sequence) - i - sub_seq_length >= 0: 278 | left_index = len(item_sequence) - i - sub_seq_length 279 | right_index = left_index + sub_seq_length 280 | temp_time_sequence = time_sequence[left_index:right_index - 1] 281 | temp_var = get_var(temp_time_sequence) 282 | 283 | cropped_vars.append(temp_var) 284 | crop_index.append(left_index) 285 | temp = [] 286 | assert self.mode in ['maximum', 'minimum'] 287 | if self.mode == 'maximum': 288 | temp = cropped_vars.index(max(cropped_vars)) 289 | if self.mode == 'minimum': 290 | temp = cropped_vars.index(min(cropped_vars)) 291 | start_index = crop_index.index(temp) 292 | 293 | cropped_sequence = copied_sequence[start_index:start_index + sub_seq_length] 294 | return cropped_sequence 295 | 296 | 297 | class Mask(object): 298 | """ 299 | Randomly mask k items given a sequence 300 | maximum: Mask items with larger time interval 301 | minimum: Mask items with smaller time interval 302 | """ 303 | 304 | def __init__(self, mode, gamma=0.7): 305 | self.gamma = gamma 306 | self.mode = mode 307 | 308 | def __call__(self, item_sequence, time_sequence): 309 | copied_sequence = copy.deepcopy(item_sequence) 310 | mask_nums = int(self.gamma * len(copied_sequence)) 311 | mask = [0 for i in range(mask_nums)] 312 | 313 | if len(copied_sequence) <= 1: 314 | return copied_sequence 315 | 316 | time_diffs = [] 317 | length = len(time_sequence) 318 | for i in range(length - 1): 319 | diff = abs(time_sequence[i + 1] - time_sequence[i]) 320 | time_diffs.append(diff) 321 | 322 | diff_sorted = [] 323 | assert self.mode in ['maximum', 'minimum', 'random'] 324 | if self.mode == 'random': 325 | copied_sequence = copy.deepcopy(item_sequence) 326 | mask_nums = int(self.gamma * len(copied_sequence)) 327 | mask = [0 for i in range(mask_nums)] 328 | mask_idx = random.sample([i for i in range(len(copied_sequence))], k=mask_nums) 329 | for idx, mask_value in zip(mask_idx, mask): 330 | copied_sequence[idx] = mask_value 331 | return copied_sequence 332 | if self.mode == 'maximum': 333 | """ 334 | First sort from large to small, and then return the original index value by sorting. 335 | The larger the value, the smaller the index value 336 | """ 337 | diff_sorted = np.argsort(time_diffs)[::-1] 338 | if self.mode == 'minimum': 339 | """ 340 | First sort from small to large, and then return the original index value by sorting. 341 | The larger the value, the larger the index. 342 | """ 343 | diff_sorted = np.argsort(time_diffs) 344 | diff_sorted = diff_sorted.tolist() 345 | mask_idx = [] 346 | for i in range(mask_nums): 347 | temp = diff_sorted[i] 348 | mask_idx.append(temp) 349 | 350 | for idx, mask_value in zip(mask_idx, mask): 351 | copied_sequence[idx] = mask_value 352 | return copied_sequence 353 | 354 | 355 | class Reorder(object): 356 | """ 357 | Randomly shuffle a continuous sub-sequence 358 | maximum: Reorder subsequences with the maximum time interval variance 359 | minimum: Reorder subsequences with the minimum variance of time interval 360 | """ 361 | 362 | def __init__(self, mode, beta=0.2): 363 | self.beta = beta 364 | self.mode = mode 365 | 366 | def __call__(self, item_sequence, time_sequence): 367 | copied_sequence = copy.deepcopy(item_sequence) 368 | sub_seq_length = int(self.beta * len(copied_sequence)) 369 | if sub_seq_length < 2: 370 | return copied_sequence 371 | 372 | cropped_vars = [] 373 | crop_index = [] 374 | for i in range(len(item_sequence)): 375 | if len(item_sequence) - i - sub_seq_length >= 0: 376 | left_index = len(item_sequence) - i - sub_seq_length 377 | right_index = left_index + sub_seq_length 378 | temp_time_sequence = time_sequence[left_index:right_index - 1] 379 | temp_var = get_var(temp_time_sequence) 380 | 381 | cropped_vars.append(temp_var) 382 | crop_index.append(left_index) 383 | temp = [] 384 | assert self.mode in ['maximum', 'minimum'] 385 | if self.mode == 'maximum': 386 | temp = cropped_vars.index(max(cropped_vars)) 387 | if self.mode == 'minimum': 388 | temp = cropped_vars.index(min(cropped_vars)) 389 | start_index = crop_index.index(temp) 390 | 391 | sub_seq = copied_sequence[start_index:start_index + sub_seq_length] 392 | random.shuffle(sub_seq) 393 | reordered_seq = copied_sequence[:start_index] + sub_seq + copied_sequence[start_index + sub_seq_length:] 394 | assert len(copied_sequence) == len(reordered_seq) 395 | return reordered_seq 396 | 397 | 398 | if __name__ == '__main__': 399 | dice = Dice(4,dim=2) 400 | au = ActivationUnit(10) 401 | x = torch.tensor([[1,2,3,4],[5,6,1,2]],dtype = torch.float32) 402 | y = dice(x) 403 | print(y) 404 | print(au(x,y)) 405 | 406 | 407 | 408 | 409 | 410 | -------------------------------------------------------------------------------- /sequence/tools.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on Sun Mar 19 17:55:31 2023 4 | 5 | @author: Jayus 6 | """ 7 | def dimJudge(dim1,dim2,dim3): 8 | assert dim1 == dim2 or dim1 == dim3, 'dimension is not correct' 9 | 10 | def Hamming_distance(x1, x2): 11 | z = x1 ^ x2 12 | res = 0 13 | while z: 14 | res += z & 1 15 | z = z >> 1 16 | return res 17 | 18 | def Hamming_distance_list(x1, x2): 19 | res = 0 20 | assert len(x1) == len(x2), 'the length dose not match' 21 | for i in range(len(x1)): 22 | if x1[i] != x2[i]: 23 | res += 1 24 | return res 25 | 26 | 27 | 28 | 29 | -------------------------------------------------------------------------------- /sequence/train.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | """ 3 | Created on Wed Apr 5 14:03:00 2023 4 | 5 | @author: 12709 6 | """ 7 | 8 | def run(dataloader,optimizer,criterion): 9 | pass 10 | 11 | def MBA(dataloader,optimizer,criterion): #mini-batch aware regularization 12 | pass --------------------------------------------------------------------------------