├── Framework.pdf ├── README.md ├── RLmain.py ├── SLmain.py ├── agents ├── DDPG_ESMM.py ├── DDPG_ESMM_BC.py ├── ReplayBuffer.py └── __pycache__ │ ├── DDPG_ESMM.cpython-38.pyc │ ├── DDPG_ESMM_BC.cpython-38.pyc │ └── ReplayBuffer.cpython-38.pyc ├── doc.md ├── env.py ├── layers ├── __pycache__ │ ├── critic.cpython-38.pyc │ └── layers.cpython-38.pyc ├── critic.py ├── esmm.py └── layers.py ├── pretrain.zip ├── slmodels ├── __pycache__ │ ├── aitm.cpython-38.pyc │ ├── esmm.cpython-38.pyc │ ├── layers.cpython-38.pyc │ ├── mmoe.cpython-38.pyc │ ├── omoe.cpython-38.pyc │ ├── ple.cpython-38.pyc │ ├── sharedbottom.cpython-38.pyc │ └── singletask.cpython-38.pyc ├── aitm.py ├── esmm.py ├── layers.py ├── metaheac.py ├── mmoe.py ├── omoe.py ├── ple.py ├── sharedbottom.py └── singletask.py └── train ├── Arguments.py ├── __pycache__ ├── Arguments.cpython-38.pyc ├── run.cpython-38.pyc └── utils.cpython-38.pyc ├── run.py └── utils.py /Framework.pdf: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Applied-Machine-Learning-Lab/RMTL/5b8b13c21f449f044e9b09fa75574f63a6431dcc/Framework.pdf -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # Multi-Task Recommendations with Reinforcement Learning 2 | Source code of [Multi-Task Recommendations with Reinforcement Learning](https://dl.acm.org/doi/10.1145/3543507.3583467) 3 | 4 | Code for RetailRocket Dataset. 5 | 6 | **Google Drive link for processed RetailRocket data:** https://drive.google.com/file/d/1THRWKttdpmcNaEc1DtKwxgYlV8RLMtV5/view?usp=sharing 7 | 8 | 9 | # Model Code 10 | + layers: stores common network structures 11 | + critic: critic network 12 | + esmm: esmm(actor) network, can introduce other MTL models as actor inside slmodels 13 | + layers: classical Embedding layers and MLP layers 14 | + slmodels: SL baseline models 15 | + agents: RL models 16 | + train: training-related configuration 17 | + env.py: offline sampling simulation environment 18 | + RLmain.py: main RL training program 19 | + SLmain.py: SL training main program 20 | 21 | 22 | + dataset 23 | + rtrl:retrailrocket dataset(Convert to MDP format:)[timestamp,sessionid,itemid,pay,click], [itemid,feature1,feature2,..],6:2:2 24 | 25 | # How to run it 26 | ## MTL baselines 27 | python3 SLmain.py --model_name=esmm 28 | 29 | ## RMTL 30 | python3 RLmain.py 31 | python3 SLmain.py --model_name=esmm --polish=1 32 | 33 | ## Result: 34 | 35 | test: best auc: 0.732444172986328 36 | 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 134/134 [00:07<00:00, 19.14it/s] 37 | task 0, AUC 0.7273702846096346, Log-loss 0.20675417715656488 38 | task 1, AUC 0.7247954179346048, Log-loss 0.048957254763240504 39 | 40 | # Citation: 41 | Please cite with the below bibTex if you find it helpful to your research. 42 | 43 | ``` 44 | @inproceedings{liu2023multi, 45 | title={Multi-Task Recommendations with Reinforcement Learning}, 46 | author={Liu, Ziru and Tian, Jiejie and Cai, Qingpeng and Zhao, Xiangyu and Gao, Jingtong and Liu, Shuchang and Chen, Dayou and He, Tonghao and Zheng, Dong and Jiang, Peng and others}, 47 | booktitle={Proceedings of the ACM Web Conference 2023}, 48 | pages={1273--1282}, 49 | year={2023} 50 | } 51 | ``` 52 | 53 | -------------------------------------------------------------------------------- /RLmain.py: -------------------------------------------------------------------------------- 1 | from env import MTEnv 2 | from train.run import * 3 | from agents.DDPG_ESMM_BC import TD3_ESMMBCAgent 4 | from train.utils import ActionNormalizer 5 | from train.Arguments import Arguments 6 | 7 | def create_sub_agent(env, actor_name, agentcls, hyparams): 8 | hyparams.memory_size = 500000 9 | hyparams.init_episode = 200000 10 | hyparams.memory_path = "./pretrain/memory.pkl" 11 | hyparams.pretrain_path = f'chkpt/SL/rt_{actor_name}' 12 | hyparams.init_training_step = 1000 13 | hyparams.actor_reg = 0 14 | hyparams.critic_lr = 1e-3 15 | hyparams.ips = False 16 | 17 | agent = agentcls(env, actor_name, hyparams) 18 | hyparams.set_curpath(str(agent) + "_" + actor_name) 19 | 20 | return agent 21 | 22 | 23 | if __name__ == '__main__': 24 | hyparams = Arguments() 25 | 26 | # 1. RL environment (采样视数据集数量选择行数,已有offline memory可将train_rows调到极低) 27 | hyparams.train_rows = 500 28 | env = ActionNormalizer(MTEnv(hyparams.train_path, hyparams.features_path, hyparams.map_path, 29 | reward_type=hyparams.reward_type, nrows=hyparams.train_rows)) 30 | env.getMDP() 31 | 32 | # 2. Agent design 33 | agent = create_sub_agent(env, 'ple', TD3_ESMMBCAgent, hyparams) # TD3BC agent, 0 as ac loss by default 34 | 35 | # 3. offline training 36 | hyparams.epoch = 96 37 | train_and_test_offline(env, agent, hyparams.epoch, hyparams.init_episode, 38 | hyparams.save_dir, hyparams.memory_path) 39 | -------------------------------------------------------------------------------- /SLmain.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import random 3 | import tqdm 4 | from sklearn.metrics import roc_auc_score 5 | # pip install https://github.com/ufoym/imbalanced-dataset-sampler/archive/master.zip 6 | # from torchsampler import ImbalancedDatasetSampler 7 | from torch.utils.data import DataLoader 8 | import torch.nn.functional as F 9 | import os 10 | import numpy as np 11 | import pandas as pd 12 | 13 | # 导入数据集 14 | from dataset.rtrl import RetailRocketRLDataset 15 | 16 | # 导入MTL模型 17 | # from layers.esmm import ESMMModel 18 | from slmodels.esmm import ESMMModel 19 | from slmodels.singletask import SingleTaskModel 20 | from slmodels.ple import PLEModel 21 | from slmodels.mmoe import MMoEModel 22 | from slmodels.sharedbottom import SharedBottomModel 23 | from slmodels.aitm import AITMModel 24 | from slmodels.omoe import OMoEModel 25 | 26 | # 导入强化学习环境 27 | from train.utils import Catemapper,EarlyStopper,ActionNormalizer,RlLossPolisher 28 | from env import MTEnv 29 | from train.Arguments import Arguments 30 | from train.run import get_model, get_dataset, sltrain as train, sltest as test, slpred as pred 31 | 32 | def main(dataset_name, 33 | dataset_path, 34 | task_num, 35 | expert_num, 36 | model_name, 37 | epoch, 38 | learning_rate, 39 | feature_map_rate, 40 | batch_size, 41 | embed_dim, 42 | weight_decay, 43 | polish_lambda, 44 | device, 45 | save_dir): 46 | device = torch.device(device) 47 | # 装载数据集 48 | train_dataset = get_dataset(dataset_name, os.path.join(dataset_path, dataset_name) + '/train.csv') 49 | val_dataset = get_dataset(dataset_name, os.path.join(dataset_path, dataset_name) + '/test.csv') 50 | test_dataset = get_dataset(dataset_name, os.path.join(dataset_path, dataset_name) + '/val.csv') 51 | 52 | catemap = Catemapper(threshold=feature_map_rate) 53 | # 没有与训练的类别筛选就使用下面3行 54 | # catemap.make_mapper(os.path.join(dataset_path, dataset_name)+'/item_feadf.csv', 55 | # train_dataset.cate_cols,train_dataset.filter_cols) 56 | # catemap.save_mapper(save_dir) 57 | catemap.load_mapper("./pretrain") # ABSOLUTE path 58 | catemap.map_rt(train_dataset) 59 | catemap.map_rt(val_dataset) 60 | catemap.map_rt(test_dataset) 61 | print("categorical data map successfully!") 62 | 63 | # balance sampling,非平衡采样,没什么效果 64 | # train_data_loader = DataLoader(train_dataset, batch_size=batch_size, num_workers=4,sampler=ImbalancedDatasetSampler(train_dataset)) 65 | train_data_loader = DataLoader(train_dataset, batch_size=batch_size, num_workers=4, shuffle=False) 66 | val_data_loader = DataLoader(val_dataset, batch_size=batch_size, num_workers=4, shuffle=False) 67 | test_data_loader = DataLoader(test_dataset, batch_size=batch_size, num_workers=4, shuffle=False) 68 | 69 | # 模型训练,二轮训练得删除下面环境注释 70 | """ 71 | # define test environment 72 | hyparams = Arguments() 73 | hyparams.test_rows = 50000 74 | test_env = ActionNormalizer(MTEnv("./dataset/rt/test_set.csv", hyparams.features_path, hyparams.map_path, 75 | reward_type=hyparams.reward_type, nrows=hyparams.test_rows, is_test=True)) 76 | test_env.getMDP() 77 | """ 78 | field_dims = train_dataset.field_dims 79 | numerical_num = train_dataset.numerical_num 80 | print("field_dims:",field_dims) 81 | model = get_model(model_name, field_dims, numerical_num, task_num, expert_num, embed_dim).to(device) 82 | print(model) 83 | criterion = torch.nn.BCELoss() 84 | optimizer = torch.optim.Adam(params=model.parameters(), lr=learning_rate, weight_decay=weight_decay) 85 | save_dir = f'{save_dir}/{dataset_name}_{model_name}' 86 | if not os.path.isdir(save_dir): 87 | os.makedirs(save_dir) 88 | 89 | save_path = f'{save_dir}/{dataset_name}_{model_name}_{polish_lambda}.pt' 90 | # save_path = "./pretrain/rt_esmm.pt" 91 | # Contribution: use loss polisher 92 | # model.load_state_dict(torch.load(f'{save_dir}/{dataset_name}_ple_ple0.0.pt')) 93 | 94 | if polish_lambda != 0: 95 | hyparams = Arguments() 96 | hyparams.test_rows = 500 97 | test_env = ActionNormalizer(MTEnv(hyparams.test_path, hyparams.features_path, hyparams.map_path, 98 | reward_type=hyparams.reward_type, nrows=hyparams.test_rows, is_test=True)) 99 | test_env.getMDP() 100 | polisher = RlLossPolisher(test_env, model_name, lambda_=polish_lambda) 101 | model.load_state_dict(torch.load(f'{save_dir}/{dataset_name}_{model_name}_0.0.pt')) 102 | else: 103 | polisher = None 104 | # polisher = RlLossPolisher(test_env, "esmm", lambda_=polish_lambda) # test transibility 105 | early_stopper = EarlyStopper(num_trials=2, save_path=save_path) 106 | for epoch_i in range(epoch): 107 | train_loss = train(model, optimizer, train_data_loader, criterion, device, polisher) 108 | auc, loss, _, _ = test(model, val_data_loader, task_num, device) 109 | # auc, loss = env_test(test_env, model, save_dir, device) 110 | print('epoch:', epoch_i,'train loss:',train_loss, 'test: auc:', auc) 111 | # print('epoch:', epoch_i, 'train loss:', train_loss) 112 | for i in range(task_num): 113 | print('task {}, AUC {}, Log-loss {}'.format(i, auc[i], loss[i])) 114 | 115 | # auc stopper 116 | if not early_stopper.is_continuable(model, np.array(auc).mean()): 117 | print(f'test: best auc: {early_stopper.best_accuracy}') 118 | break 119 | 120 | 121 | # save_path = f'{save_dir}/{dataset_name}_mmoe_0.0.pt' # test directly 122 | model.load_state_dict(torch.load(save_path)) 123 | auc, loss, sloss, loss_df = test(model, test_data_loader, task_num, device) 124 | print("session_loss",sloss) 125 | # env test 126 | # auc, loss = env_test(test_env, model, save_dir, device) 127 | f = open(save_dir + '/{}_{}.txt'.format(model_name, dataset_name), 'a', encoding='utf-8') 128 | f.write('learning rate: {}\n'.format(learning_rate)) 129 | for i in range(task_num): 130 | print('task {}, AUC {}, Log-loss {}, session logloss {}'.format(i, auc[i], loss[i], sloss[i])) 131 | f.write('task {}, AUC {}, Log-loss {}, session logloss {}\n'.format(i, auc[i], loss[i], sloss[i])) 132 | print(loss_df.groupby(["session"]).mean()) 133 | print('\n') 134 | f.write('\n') 135 | f.close() 136 | 137 | 138 | # output the predictions 139 | # data_loader1 = DataLoader(train_dataset, batch_size=batch_size, num_workers=4) 140 | # data_loader2 = DataLoader(val_dataset, batch_size=batch_size, num_workers=4) 141 | # train_pred_df = pd.DataFrame(pred(model, data_loader1, task_num, device)) 142 | # test_pred_df = pd.DataFrame(pred(model, data_loader2, task_num, device)) 143 | # res = pd.concat([train_pred_df,test_pred_df],ignore_index=True) 144 | # res.to_csv(save_dir+"/res{}.csv".format(model_name),index=False) 145 | 146 | 147 | 148 | 149 | if __name__ == '__main__': 150 | import argparse 151 | seed = 2022 152 | random.seed(seed) 153 | np.random.seed(seed) 154 | torch.manual_seed(seed) 155 | torch.cuda.manual_seed(seed) 156 | parser = argparse.ArgumentParser() 157 | parser.add_argument('--dataset_name', default='rt', choices=['AliExpress_NL', 'AliExpress_ES', 'AliExpress_FR', 'AliExpress_US',"rt","rsc"]) 158 | parser.add_argument('--dataset_path', default='./dataset/') 159 | parser.add_argument('--model_name', default='esmm', choices=['singletask', 'sharedbottom', 'omoe', 'mmoe', 'ple', 'aitm','esmm']) 160 | parser.add_argument('--epoch', type=int, default=1000) 161 | parser.add_argument('--task_num', type=int, default=2) 162 | parser.add_argument('--expert_num', type=int, default=8) 163 | parser.add_argument('--polish', type=float, default=0.) 164 | parser.add_argument('--learning_rate', type=float, default=0.001) 165 | parser.add_argument('--feature_map_rate', type=float, default=0.2) 166 | parser.add_argument('--batch_size', type=int, default=2048) 167 | parser.add_argument('--embed_dim', type=int, default=128) 168 | parser.add_argument('--weight_decay', type=float, default=1e-6) 169 | parser.add_argument('--device', default='cuda:0') 170 | parser.add_argument('--save_dir', default='./chkpt/SL') 171 | args = parser.parse_args() 172 | main(args.dataset_name, 173 | args.dataset_path, 174 | args.task_num, 175 | args.expert_num, 176 | args.model_name, 177 | args.epoch, 178 | args.learning_rate, 179 | args.feature_map_rate, 180 | args.batch_size, 181 | args.embed_dim, 182 | args.weight_decay, 183 | args.polish, 184 | args.device, 185 | args.save_dir) 186 | -------------------------------------------------------------------------------- /agents/DDPG_ESMM.py: -------------------------------------------------------------------------------- 1 | import os 2 | from types import MethodType 3 | from typing import Tuple 4 | from copy import deepcopy 5 | import gym 6 | import numpy as np 7 | import torch 8 | import torch.optim as optim 9 | import torch.nn.functional as F 10 | from torch.distributions import Normal 11 | from .ReplayBuffer import ReplayBuffer 12 | from layers.critic import Critic,CriticNeg 13 | from train.run import get_model 14 | from train.utils import get_optim_param 15 | import warnings 16 | 17 | warnings.filterwarnings("ignore") 18 | 19 | 20 | class DDPG_wESMMAgent: 21 | def __init__(self, 22 | env: gym.Env, 23 | actor_name="esmm", 24 | embed_dim=128, 25 | bottom_mlp_dims=(512, 256), 26 | tower_mlp_dims=(128, 64), 27 | ou_noise_theta=0.1, 28 | ou_noise_gamma=0.4, 29 | gamma=0.9, 30 | memory_size=100000, 31 | batch_size=512, 32 | drop_out=0.2, 33 | actor_lr=0, 34 | critic_lr=1e-3, 35 | actor_reg=3e-2, 36 | tau=0.2, 37 | soft_update_freq=2, 38 | actor_update_freq=2, 39 | init_training_step=10000, 40 | ips=True, 41 | pretrain_path="../pretrain", 42 | ): 43 | # system parameters 44 | self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu") 45 | self.env = env 46 | self.gamma = gamma 47 | action_dim = self.env.action_space.shape[0] 48 | cate_dim = self.env.field_dims 49 | 50 | # training param 51 | self.actor_lr = actor_lr 52 | self.critic_lr = critic_lr 53 | self.pretrain_path = pretrain_path 54 | self.memory = ReplayBuffer(cate_dim.shape[0] + 1, action_dim, memory_size, batch_size) 55 | self.batch_size = batch_size 56 | self.tau = tau 57 | self.actor_reg = actor_reg 58 | self.soft_update_freq = soft_update_freq 59 | self.actor_update_freq = actor_update_freq 60 | self.init_training_step = init_training_step 61 | self.ips = ips 62 | 63 | # actor param 64 | self.categorical_field_dims = cate_dim 65 | self.num_dim = 1 66 | self.embed_dim = embed_dim 67 | self.bottom_mlp_dims = bottom_mlp_dims 68 | self.tower_mlp_dims = tower_mlp_dims 69 | self.task_num = action_dim 70 | self.drop_out = drop_out 71 | critic_model = CriticNeg 72 | 73 | # define actor network 74 | self.pretain_actor = get_model(actor_name,self.categorical_field_dims, self.num_dim, self.task_num, 8, 75 | self.embed_dim).to(self.device) 76 | 77 | self.critic1 = critic_model(self.categorical_field_dims, self.num_dim, self.embed_dim, self.bottom_mlp_dims, 78 | self.tower_mlp_dims, self.drop_out).to(self.device) 79 | 80 | self.critic2 = deepcopy(self.critic1) 81 | 82 | self.pretain_actor.load_state_dict(torch.load(self.pretrain_path+f"/rt_{actor_name}_0.0.pt")) 83 | self.pretain_actor.eval() 84 | self.actor = deepcopy(self.pretain_actor) 85 | self.critic1.embedding.load_state_dict(self.actor.embedding.state_dict()) 86 | self.critic2.embedding.load_state_dict(self.critic1.embedding.state_dict()) 87 | if os.path.exists(self.pretrain_path + "/critic1.pth") and os.path.exists(self.pretrain_path + "/critic2.pth"): 88 | state_dict1 = torch.load(self.pretrain_path + "/critic1.pth", map_location=lambda storage, loc: storage) 89 | self.critic1.load_state_dict(state_dict1) 90 | state_dict2 = torch.load(self.pretrain_path + "/critic2.pth", map_location=lambda storage, loc: storage) 91 | self.critic2.load_state_dict(state_dict2) 92 | 93 | self.actor_target = deepcopy(self.actor) 94 | self.actor_optimizer = optim.Adam(self.actor.parameters(), lr=self.actor_lr) 95 | # self.aloss_reg = torch.autograd.Variable(torch.FloatTensor([1.]).to(self.device), requires_grad=True) 96 | 97 | self.critic1_target = deepcopy(self.critic1) 98 | self.critic1_optimizer = optim.Adam(self.critic1.parameters(), lr=self.critic_lr) 99 | 100 | self.critic2_target = deepcopy(self.critic2) 101 | self.critic2_optimizer = optim.Adam(self.critic2.parameters(), lr=self.critic_lr) 102 | 103 | self.actor_optimizer.parameters = MethodType(get_optim_param, self.actor_optimizer) 104 | self.critic1_optimizer.parameters = MethodType(get_optim_param, self.critic1_optimizer) 105 | self.critic2_optimizer.parameters = MethodType(get_optim_param, self.critic2_optimizer) 106 | 107 | self.noise = Normal(ou_noise_theta * torch.ones(self.task_num), ou_noise_gamma * torch.ones(self.task_num)) 108 | 109 | self.transition = dict() 110 | 111 | self.total_step = 0 112 | self.is_test = False 113 | 114 | def select_action(self, state: np.ndarray) -> np.ndarray: 115 | if len(state.shape) == 1: # deal with 1-d dimension 116 | state = np.expand_dims(state, 0) 117 | cate_features, num_features = torch.LongTensor(state[:, :-1]).to(self.device), \ 118 | torch.FloatTensor(state[:, [-1]]).to(self.device) 119 | # print("features:",cate_features) 120 | selected_action = torch.stack(self.actor(cate_features, num_features), 1) 121 | # print(selected_action) 122 | selected_action = selected_action.cpu().detach().numpy() 123 | 124 | if not self.is_test: 125 | # attention: explore 126 | noise = np.clip(self.noise.sample().cpu().detach().numpy(), -3e-3, 3e-3) 127 | selected_action = selected_action + noise 128 | return selected_action.reshape(-1) 129 | 130 | def process_batch(self, transition): 131 | state = transition['state'] 132 | nstate = transition['nstate'] 133 | cate_features, num_features = torch.LongTensor(state[:, :-1]).to(self.device), \ 134 | torch.FloatTensor(state[:, [-1]]).to(self.device) 135 | ncate_features, nnum_features = torch.LongTensor(nstate[:, :-1]).to(self.device), \ 136 | torch.FloatTensor(nstate[:, [-1]]).to(self.device) 137 | 138 | action = torch.FloatTensor(transition['action']).to(self.device) 139 | reward = torch.FloatTensor(transition['reward'].reshape(-1, 2)).to(self.device) 140 | mask = torch.FloatTensor(1 - transition['done'].reshape(-1)).to(self.device) 141 | label = torch.FloatTensor(transition['label']).to(self.device) 142 | naction = torch.stack(self.actor_target(ncate_features, nnum_features), 1) 143 | reward1 = reward[:, 0] 144 | reward2 = reward[:, 1] 145 | action1, action2 = torch.unsqueeze(action[:, 0], 1), torch.unsqueeze(action[:, 1], 1) 146 | naction1, naction2 = torch.unsqueeze(naction[:, 0], 1), torch.unsqueeze(naction[:, 1], 1) 147 | 148 | res = dict( 149 | state=(cate_features, num_features), 150 | action=(action1, action2), 151 | reward=(reward1, reward2), 152 | nstate=(ncate_features, nnum_features), 153 | naction=(naction1, naction2), 154 | mask=mask, 155 | label=(label[:, 0], label[:, 1]) 156 | ) 157 | return res 158 | 159 | def get_closs(self, critic_id, critic, critic_target, transition): 160 | q_pred = critic(transition["state"][0], transition["state"][1], transition["action"][critic_id]) 161 | q_target = critic_target(transition["nstate"][0], transition["nstate"][1], transition["naction"][critic_id]) 162 | q_target = transition["reward"][critic_id] + self.gamma * q_target * transition["mask"] 163 | q_loss = torch.mean( 164 | torch.multiply(F.mse_loss(q_pred, q_target.detach(), reduce=False), transition["weight"][critic_id])) 165 | return q_loss 166 | 167 | def get_aloss(self, transition): 168 | ref_action = self.actor(transition["state"][0], transition["state"][1]) 169 | # seprate AC loss by q 170 | q1_loss_weight = -torch.multiply( 171 | self.critic1(transition["state"][0], transition["state"][1], 172 | torch.unsqueeze(ref_action[0], 1)), 173 | transition["weight"][0]) 174 | q2_loss_weight = - torch.multiply( 175 | self.critic2(transition["state"][0], transition["state"][1], 176 | torch.unsqueeze(ref_action[1], 1)), 177 | transition["weight"][1]) 178 | ac_loss = torch.mean(q1_loss_weight + q2_loss_weight) # refer to normal ac loss 179 | a_loss = ac_loss 180 | 181 | if self.actor_reg > 0: 182 | ref_loss = 0 183 | param_count = 0 184 | for param, value in self.actor.named_parameters(): 185 | param_count += 1 186 | ref_loss += F.mse_loss(value, 187 | self.pretain_actor.get_parameter(param)) 188 | ref_loss /= param_count 189 | a_loss += self.actor_reg * ref_loss 190 | return a_loss, ac_loss 191 | 192 | def update(self) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor]: 193 | update_steps = len(self.memory) // self.batch_size 194 | actor_lossls1 = [] 195 | actor_lossls2 = [] 196 | critic_lossls1 = [] 197 | critic_lossls2 = [] 198 | for i in range(update_steps): 199 | tb = self.memory.sample_batch() 200 | transition = self.process_batch(tb) 201 | # IPS weight 202 | if self.ips: 203 | label = torch.stack(transition["label"], dim=-1) 204 | pos = (self.batch_size + 1) / (torch.sum(label, dim=0) + torch.ones(2).to(self.device)) 205 | w_pos = torch.unsqueeze(pos, dim=0).repeat(self.batch_size, 1) 206 | w_neg = (self.batch_size + 1) / (self.batch_size - torch.sum(label, dim=0)) 207 | weight = label * w_pos + w_neg 208 | else: 209 | weight = torch.ones((self.batch_size, 2)).to(self.device) 210 | 211 | transition["weight"] = (weight[:, 0], weight[:, 1]) 212 | 213 | # update critic 214 | q1_loss = self.get_closs(critic_id=0, critic=self.critic1, critic_target=self.critic1_target, 215 | transition=transition) 216 | self.critic1_optimizer.zero_grad() 217 | q1_loss.backward() 218 | self.critic1_optimizer.step() 219 | 220 | q2_loss = self.get_closs(critic_id=1, critic=self.critic2, critic_target=self.critic2_target, 221 | transition=transition) 222 | self.critic2_optimizer.zero_grad() 223 | q2_loss.backward() 224 | self.critic2_optimizer.step() 225 | 226 | # update actor: nabla_{\theta}\pi_{\theta}nabla_{a}Q_{\pi}(s,a) 227 | a_loss, ac_loss = self.get_aloss(transition=transition) 228 | 229 | if (self.total_step + 1) % self.actor_update_freq == 0 and self.total_step > self.init_training_step: 230 | self.actor_optimizer.zero_grad() 231 | a_loss.backward() 232 | self.actor_optimizer.step() 233 | 234 | # update target networks 235 | self._target_soft_update(self.tau) 236 | 237 | critic_lossls1.append(q1_loss.item()) 238 | critic_lossls2.append(q2_loss.item()) 239 | actor_lossls1.append(ac_loss.item()) 240 | actor_lossls2.append(a_loss.item()) 241 | self.total_step += 1 242 | return np.mean(critic_lossls1), np.mean(critic_lossls2), np.mean(actor_lossls1), np.mean(actor_lossls2) 243 | 244 | def _target_soft_update(self, tau: float): 245 | if self.total_step % self.soft_update_freq == 0: 246 | for t_param, l_param in zip(self.critic1_target.parameters(), self.critic1.parameters()): 247 | t_param.data.copy_(tau * l_param.data + (1.0 - tau) * t_param.data) 248 | 249 | for t_param, l_param in zip(self.critic2_target.parameters(), self.critic2.parameters()): 250 | t_param.data.copy_(tau * l_param.data + (1.0 - tau) * t_param.data) 251 | 252 | for t_param, l_param in zip(self.actor_target.parameters(), self.actor.parameters()): 253 | t_param.data.copy_(tau * l_param.data + (1.0 - tau) * t_param.data) 254 | 255 | def save_or_load_agent(self, cwd: str, if_save: bool): 256 | """save or load training files for Agent 257 | 258 | :param cwd: Current Working Directory. ElegantRL save training files in CWD. 259 | :param if_save: True: save files. False: load files. 260 | """ 261 | 262 | def load_torch_file(model_or_optim, _path): 263 | state_dict = torch.load(_path, map_location=lambda storage, loc: storage) 264 | model_or_optim.load_state_dict(state_dict) 265 | 266 | name_obj_list = [('actor', self.actor), ('act_optim', self.actor_optimizer), 267 | ('critic1', self.critic1), ('cri1_optim', self.critic1_optimizer), 268 | ('critic2', self.critic2), ('cri2_optim', self.critic2_optimizer)] 269 | name_obj_list = [(name, obj) for name, obj in name_obj_list if obj is not None] 270 | 271 | if if_save: 272 | for name, obj in name_obj_list: 273 | save_path = f"{cwd}/{name}.pth" 274 | torch.save(obj.state_dict(), save_path) 275 | else: 276 | for name, obj in name_obj_list: 277 | save_path = f"{cwd}/{name}.pth" 278 | load_torch_file(obj, save_path) if os.path.isfile(save_path) else None 279 | 280 | def __str__(self): 281 | return "DDPG_ESMM" 282 | -------------------------------------------------------------------------------- /agents/DDPG_ESMM_BC.py: -------------------------------------------------------------------------------- 1 | import gym 2 | import numpy as np 3 | import torch 4 | import torch.optim as optim 5 | import torch.nn.functional as F 6 | 7 | from .DDPG_ESMM import DDPG_wESMMAgent 8 | import warnings 9 | 10 | warnings.filterwarnings("ignore") 11 | 12 | 13 | class TD3_ESMMBCAgent(DDPG_wESMMAgent): 14 | def __init__(self, env: gym.Env, actor_name, arguments): 15 | super_args = dict( 16 | env=env, 17 | actor_name=actor_name, 18 | embed_dim=arguments.embed_dim, 19 | ou_noise_theta=arguments.ou_noise_theta, 20 | ou_noise_gamma=arguments.ou_noise_gamma, 21 | gamma=arguments.gamma, 22 | memory_size=arguments.memory_size, 23 | batch_size=arguments.batch_size, 24 | drop_out=arguments.drop_out, 25 | pretrain_path=arguments.pretrain_path, 26 | actor_lr=arguments.actor_lr, 27 | critic_lr=arguments.critic_lr, 28 | actor_reg=arguments.actor_reg, 29 | tau=arguments.tau, 30 | soft_update_freq=arguments.soft_update_freq, 31 | actor_update_freq=arguments.actor_update_freq, 32 | init_training_step=arguments.init_training_step, 33 | ips=arguments.ips, 34 | ) 35 | super(TD3_ESMMBCAgent, self).__init__(**super_args) 36 | self.actor_target_optimizer = optim.Adam(self.actor_target.parameters(), lr=5*self.actor_lr) 37 | 38 | def state_normalize(self, state): 39 | mu = state.mean(axis=0) 40 | std = state.std(axis=0) 41 | return (state-mu)/(std+1e-3) 42 | 43 | def process_batch(self, transition): 44 | state = transition['state'] 45 | nstate = transition['nstate'] 46 | # TD3BC approach 1: no need for cate features 47 | state, nstate = self.state_normalize(state), self.state_normalize(nstate) 48 | # end 49 | 50 | cate_features, num_features = torch.LongTensor(state[:, :-1]).to(self.device), \ 51 | torch.FloatTensor(state[:, [-1]]).to(self.device) 52 | ncate_features, nnum_features = torch.LongTensor(nstate[:, :-1]).to(self.device), \ 53 | torch.FloatTensor(nstate[:, [-1]]).to(self.device) 54 | 55 | action = torch.FloatTensor(transition['action']).to(self.device) 56 | reward = torch.FloatTensor(transition['reward'].reshape(-1, 2)).to(self.device) 57 | mask = torch.FloatTensor(1 - transition['done'].reshape(-1)).to(self.device) 58 | label = torch.FloatTensor(transition['label']).to(self.device) 59 | naction = torch.stack(self.actor_target(ncate_features, nnum_features), 1) 60 | reward1 = reward[:, 0] 61 | reward2 = reward[:, 1] 62 | action1, action2 = torch.unsqueeze(action[:, 0], 1), torch.unsqueeze(action[:, 1], 1) 63 | naction1, naction2 = torch.unsqueeze(naction[:, 0], 1), torch.unsqueeze(naction[:, 1], 1) 64 | 65 | # TD3 next action sample: 66 | noise = torch.clip(self.noise.sample(), -3e-3, 3e-3).to(self.device).detach() 67 | naction1, naction2 = naction1+noise[0], naction2+noise[1] 68 | # end 69 | 70 | res = dict( 71 | state=(cate_features, num_features), 72 | action=(action1, action2), 73 | reward=(reward1, reward2), 74 | nstate=(ncate_features, nnum_features), 75 | naction=(naction1, naction2), 76 | mask=mask, 77 | label=(label[:,0], label[:, 1]) 78 | ) 79 | return res 80 | 81 | def get_closs(self, critic_id, critic, critic_target, transition): 82 | q_pred = critic(transition["state"][0], transition["state"][1], transition["action"][critic_id]) 83 | # TD3 approach: 取小target 84 | q_target = torch.min( 85 | torch.stack( 86 | [critic_target(transition["nstate"][0], transition["nstate"][1], transition["naction"][critic_id]), 87 | critic(transition["nstate"][0], transition["nstate"][1], transition["naction"][critic_id])], 88 | dim=-1), 89 | dim=-1 90 | ).values 91 | q_target = transition["reward"][critic_id] + self.gamma * q_target * transition["mask"] 92 | q_loss = torch.mean( 93 | torch.multiply(F.mse_loss(q_pred, q_target.detach(), reduce=False), transition["weight"][critic_id])) 94 | return q_loss 95 | 96 | def get_aloss(self, transition): 97 | ref_action = self.actor(transition["state"][0], transition["state"][1]) 98 | # seprate AC loss by q 99 | q1_loss_weight = -torch.multiply( 100 | self.critic1(transition["state"][0], transition["state"][1], 101 | torch.unsqueeze(ref_action[0], 1)), 102 | transition["weight"][0]) 103 | q2_loss_weight = - torch.multiply( 104 | self.critic2(transition["state"][0], transition["state"][1], 105 | torch.unsqueeze(ref_action[1], 1)), 106 | transition["weight"][1]) 107 | ac_loss = torch.mean(q1_loss_weight + q2_loss_weight) # refer to normal ac loss 108 | a_loss = ac_loss 109 | a_loss = torch.mean(a_loss) 110 | 111 | if self.actor_reg > 0: 112 | ref_loss = 0 113 | param_count = 0 114 | for param, value in self.actor.named_parameters(): 115 | param_count += 1 116 | ref_loss += F.mse_loss(value, 117 | self.pretain_actor.state_dict()[param]) 118 | ref_loss /= param_count 119 | 120 | # TD3BC: approach 3 121 | # mask 122 | # a_loss = a_loss + self.actor_reg * ref_loss 123 | # add lines 124 | lambda_ = self.actor_reg / torch.mean(-q1_loss_weight - q2_loss_weight).abs().detach() 125 | a_loss = lambda_ * a_loss + ref_loss # reg the true actor? 126 | # ac_loss = lambda_ * ac_loss + ref_loss # no reg here over reference actor 127 | # self.actor_target_optimizer.zero_grad() 128 | # ac_loss.backward() 129 | # self.actor_target_optimizer.step() 130 | # end 131 | 132 | return a_loss, ac_loss 133 | 134 | def update(self): 135 | update_steps = len(self.memory) // self.batch_size 136 | actor_lossls1 = [] 137 | actor_lossls2 = [] 138 | critic_lossls1 = [] 139 | critic_lossls2 = [] 140 | for i in range(update_steps): 141 | tb = self.memory.sample_batch() 142 | transition = self.process_batch(tb) 143 | # IPS weight 144 | if self.ips: 145 | label = torch.stack(transition["label"], dim=-1) 146 | pos = (self.batch_size + 1) / (torch.sum(label, dim=0) + torch.ones(2).to(self.device)) 147 | w_pos = torch.unsqueeze(pos, dim=0).repeat(self.batch_size, 1) 148 | w_neg = (self.batch_size + 1) / (self.batch_size - torch.sum(label, dim=0)) 149 | weight = label * w_pos + w_neg 150 | else: 151 | weight = torch.ones((self.batch_size, 2)).to(self.device) 152 | 153 | transition["weight"] = (weight[:, 0], weight[:, 1]) 154 | 155 | # update critic 156 | q1_loss = self.get_closs(critic_id=0, critic=self.critic1, critic_target=self.critic1_target, 157 | transition=transition) 158 | self.critic1_optimizer.zero_grad() 159 | q1_loss.backward() 160 | self.critic1_optimizer.step() 161 | 162 | q2_loss = self.get_closs(critic_id=1, critic=self.critic2, critic_target=self.critic2_target, 163 | transition=transition) 164 | self.critic2_optimizer.zero_grad() 165 | q2_loss.backward() 166 | self.critic2_optimizer.step() 167 | 168 | # 不更新a,只做记录 169 | a_loss, ac_loss = self.get_aloss(transition=transition) 170 | # update target networks 171 | self._target_soft_update(self.tau) 172 | 173 | critic_lossls1.append(q1_loss.item()) 174 | critic_lossls2.append(q2_loss.item()) 175 | actor_lossls1.append(ac_loss.item()) 176 | actor_lossls2.append(a_loss.item()) 177 | self.total_step += 1 178 | return np.mean(critic_lossls1), np.mean(critic_lossls2), np.mean(actor_lossls1), np.mean(actor_lossls2) 179 | 180 | 181 | def __str__(self): 182 | return "TD3BC" 183 | -------------------------------------------------------------------------------- /agents/ReplayBuffer.py: -------------------------------------------------------------------------------- 1 | import numpy as np 2 | from typing import Dict, List, Tuple 3 | class ReplayBuffer: 4 | def __init__(self,state_dim:int, action_dim:int, size:int, batch_size:int): 5 | self.state_buf = np.zeros([size,state_dim],dtype=np.float32) 6 | self.action_buf = np.zeros([size,action_dim],dtype=np.float32) 7 | self.label_buf = np.zeros([size, action_dim], dtype=np.float32) 8 | self.nstate_buf = np.zeros([size,state_dim],dtype=np.float32) 9 | # self.reward_buf = np.zeros([size],dtype=np.float32) 10 | self.reward_buf = np.zeros([size,2], dtype=np.float32) 11 | self.done_buf = np.zeros([size],dtype=np.float32) 12 | self.max_size, self.batch_size = size, batch_size 13 | self.idx, self.size = 0,0 14 | 15 | def store(self,state:np.ndarray,action:np.ndarray,nstate:np.ndarray,reward:float,done:bool,label:float): 16 | self.state_buf[self.idx] = state 17 | self.nstate_buf[self.idx] = nstate 18 | self.action_buf[self.idx] = action 19 | self.label_buf[self.idx] = label 20 | self.reward_buf[self.idx] = reward 21 | self.done_buf[self.idx] = done 22 | self.size = min(1+self.size, self.max_size) 23 | self.idx = (1+self.idx)%self.max_size 24 | 25 | def sample_batch(self)->Dict[str,np.ndarray]: # 类型有点问题 26 | choices = np.random.choice(self.size, size=self.batch_size,replace=False) # 注意用法 27 | return dict( 28 | state = self.state_buf[choices], 29 | action = self.action_buf[choices], 30 | nstate = self.nstate_buf[choices], 31 | reward = self.reward_buf[choices], 32 | done = self.done_buf[choices], 33 | label = self.label_buf[choices] 34 | ) # 对象型数据结构 35 | 36 | def __len__(self)->int: 37 | return self.size -------------------------------------------------------------------------------- /agents/__pycache__/DDPG_ESMM.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Applied-Machine-Learning-Lab/RMTL/5b8b13c21f449f044e9b09fa75574f63a6431dcc/agents/__pycache__/DDPG_ESMM.cpython-38.pyc -------------------------------------------------------------------------------- /agents/__pycache__/DDPG_ESMM_BC.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Applied-Machine-Learning-Lab/RMTL/5b8b13c21f449f044e9b09fa75574f63a6431dcc/agents/__pycache__/DDPG_ESMM_BC.cpython-38.pyc -------------------------------------------------------------------------------- /agents/__pycache__/ReplayBuffer.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Applied-Machine-Learning-Lab/RMTL/5b8b13c21f449f044e9b09fa75574f63a6431dcc/agents/__pycache__/ReplayBuffer.cpython-38.pyc -------------------------------------------------------------------------------- /doc.md: -------------------------------------------------------------------------------- 1 | # rlmtlrec code structure 2 | ## 模型代码 3 | + layers: 储存常用网络结构 4 | + critic: critic网络 5 | + esmm: esmm(actor)网络,可以在slmodels里面引入其他MTL模型作为actor 6 | + layers: 经典Embedding层和MLP层 7 | + slmodels: SL baseline模型 8 | + agents: RL模型 9 | + train: 训练相关配置 10 | + env.py: offline采样模拟环境 11 | + RLmain.py: RL训练主程序 12 | + SLmain.py: SL训练主程序 13 | 14 | ## 读写代码 15 | + dataset 16 | + rtrl:retrailrocket数据集(组织成MDP格式:)[timestamp,sessionid,itemid,pay,click], [itemid,feature1,feature2,..],6:2:2 17 | 18 | + chkpt 19 | + pretrain 20 | 21 | 22 | ## layers 23 | + layers: 24 | + EmbeddingLayer(field_dims, embed_dim) 25 | + MultiLayerPerceptron(input_dim, embed_dims, dropout, output_layer=True) 26 | 27 | + critic 28 | + Critic(categorical_field_dims, numerical_num, embed_dim, bottom_mlp_dims, tower_mlp_dims, 29 | dropout): 30 | + 参数说明: 31 | + categorical_field_dims, numerical_num 分别表示类别特征维度(对应embedding)和数值特征维度 32 | + embed_dim, bottom_mlp_dims, tower_mlp_dims 为网络结构参数,一般和baseline选择同一套参数(后续可调) 33 | + dropout 默认值为0.2,可调 34 | + 结构:类别特征、数值特征以及action(1维)的embedding拼接,输入bottom_mlp层再经过tower_mlp层输出1维的Critic打分q值 35 | + forward(self, categorical_x, numerical_x, action) -> 1维长度为1的tensor 36 | + CriticNeg(categorical_field_dims, numerical_num, embed_dim, bottom_mlp_dims, tower_mlp_dims, 37 | dropout): 38 | + 结构同上,改激活函数为ReLU,使得forward输出为负值 39 | 40 | + esmm 41 | + ESMM(categorical_field_dims, numerical_num, embed_dim, bottom_mlp_dims, tower_mlp_dims, task_num, 42 | dropout) 43 | + 参数说明: 44 | + task_num: 任务数量(本paper设定为2,只讨论CTR和CTCVR任务) 45 | + 结构:类别特征、数值特征的embedding拼接输入到相同的bottom_mlp层,然后从task_num个tower_mlp输出1维预测值 46 | + forward(self, categorical_x, numerical_x) -> 1维长度为task_num的tensor 47 | 48 | ## slmodels 49 | 直接搬用开源的MTL模型整合包,[链接](https://github.com/easezyc/Multitask-Recommendation-Library.git) 50 | ,均为上述ESMM结构:输入输出一致,网络结构参数有部分不同,不过都预设好了 51 | 52 | 53 | ## SLmain.py: 54 | ### 导入模块: 55 | + DL基础包 56 | + 数据(例): from dataset.rtrl import RetailRocketRLDataset 57 | + MTL模型: slmodels目录 58 | + RL相关: 用于二阶段训练 59 | 60 | ### 函数: 61 | + 需要从train.run中导入的训练环境函数: 62 | + get_dataset(name, path) 63 | + get_model(name, categorical_field_dims, numerical_num, task_num, expert_num, embed_dim) 64 | + sltrain(model, optimizer, data_loader, criterion, device, polisher=None, log_interval=100) 65 | + 返回epoch_loss 66 | + sltest(model, data_loader, task_num, device) 67 | + 返回测试auc和logloss 68 | + slpred(model, data_loader, task_num, device) 69 | + 返回多目标预测值 70 | + main(dataset_name, 71 | dataset_path, 72 | task_num, 73 | expert_num, 74 | model_name, 75 | epoch, 76 | learning_rate, 77 | feature_map_rate, 78 | batch_size, 79 | embed_dim, 80 | weight_decay, 81 | polish_lambda, 82 | device, 83 | save_dir) 84 | + 参数说明: 85 | + 数据加载参数:dataset_name, dataset_path, feature_map_rate (防止类别标签爆炸) 86 | + 模型参数: task_num, expert_num, model_name 87 | + 常见训练可调节参数:epoch,learning_rate,batch_size,embed_dim,weight_decay 88 | + **本模型特色参数**:polish_lambda (将新权重以一定比率添加到原来Loss) 89 | + 外部参数:save_dir, device 90 | ### 运行实例 91 | python3 SLmain.py --model_name=esmm 92 | python3 RLmain.py 93 | python3 SLmain.py --model_name=ple --polish=1 94 | 95 | 返回情况: 96 | 97 | test: best auc: 0.732444172986328 98 | 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 134/134 [00:07<00:00, 19.14it/s] 99 | task 0, AUC 0.7273702846096346, Log-loss 0.20675417715656488 100 | task 1, AUC 0.7247954179346048, Log-loss 0.048957254763240504 101 | 102 | 同时chkpt文件新增 .pt的最优模型储存文件 103 | 104 | ## agents 105 | ### ReplayBuffer.py 106 | + ReplayBuffer 107 | + init(state_dim, action_dim, size, batch_size): 生成[state, action, next_state, reward, done, label]的buffer 108 | + store: 将探索产生的transition存到buffer 109 | + sample_batch: 按照batch_size不重复地从buffer采样 110 | 111 | ### DDPG_ESMM类继承体系 112 | + DDPG_ESMM 113 | + init: 114 | 参数表格: 115 | 116 | |参数名|用途|默认值| 117 | |---|---|---| 118 | |env| 环境类 | 无 | 119 | |actor_name|MTL模型名|"esmm"| 120 | |gamma|discount|0.9| 121 | |pretrain_path|预训练地址|"./pretrain"| 122 | 123 | |参数名|用途|默认值| 124 | |---|---|---| 125 | |**embed_dim**|同SL|128| 126 | |bottom_mlp_dims|共享层维度|(512,256)| 127 | |tower_mlp_dims|任务层维度|(128,64)| 128 | |ou_noise_theta|噪声均值|0| 129 | |ou_noise_gamma|噪声方差|0.4| 130 | |memory_size|replay buffer大小|500000| 131 | |actor_reg|BC权重|0| 132 | |tau|软更新比例|0.2| 133 | |soft_update_freq|软更新频率|2| 134 | |actor_update_freq|actor更新频率|2| 135 | |init_training_step|初始训练步数|10000| 136 | |*ips*|ips样本权重|False| 137 | |batch_size| |512| 138 | |drop_out| |0.2| 139 | |actor_lr| |1e-3| 140 | |critic_lr| |1e-3| 141 | 142 | 143 | 144 | + DDPG_ESMM 145 | + init,初始化7个网络,具体参数参考上表: 146 | + pretrain_actor: 预训练的MTL模型 147 | + actor, actor_target 148 | + critic1/2, critic1/2_target 149 | + select_action(self, state: np.ndarray) -> np.ndarray 150 | + process_batch(self, transition: Dict[np.ndarry,...]) -> Dict[torch.Tensor,...] 151 | + get_closs(self, critic_id: int, critic: nn.Module, critic_target: nn.Module, transition: Dict) -> torch.Tensor 152 | + get_aloss(self, transition: Dict) -> Tuple[torch.Tensor,torch.Tensor] 153 | + update(self) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor, torch.Tensor] 154 | + 正常DDPG更新逻辑,注意init_training_step后再更新actor网络 155 | + _target_soft_update(self, tau:float) 156 | + save_or_load_agent(self, cwd: str, if_save: bool) 157 | 158 | + DDPG_ESMM_BC(DDPG_ESMM_multiweight) 159 | + 针对offline数据进行训练,在process_batch,get_closs,get_aloss按照TD3BC的修改方式进行了修改,同时update只更新Critic网络至收敛 160 | 161 | ## train 162 | + Arguments 163 | + 参数类:模型参数,训练参数,文件参数 164 | 165 | + utils 工具函数 166 | + get_optim_param: 固定用于存储模型 167 | + Catemapper: 大量类别特征会导致embedding爆炸,此处会设定一个阈值将长尾的类别都判定为1个类(需要对不同数据集调整) 168 | + EarlyStopper: SL模型停止条件,验证集指标连续数次不提升就停止 169 | + ConvergeStopper: (原创)RL的loss连续多次变化小于一定范围就判定为收敛,随后终止训练保存模型 170 | + ActionNormalizer: 环境状态标准化程序,gym常见,可作为component调节项 171 | + RlLossPolisher: 我们的模型,根据Critic的分值给出各个item在不同目标下的权重,权重组合方式可调 172 | 173 | + run 训练整合(此处仅描述RL相关) 174 | + one_iter_offline(agent): 用agent采样原始数据集一次,产生transition存入replaybuffer 175 | + train_and_test_offline(environment: gym.Env, agent, epoch, init_episode, save_dir, memory_path) 176 | + test(environment, agent, save_dir): 可以使用测试环境查看动态RL训练效果,本模型用不上 177 | + plot_loss: 绘制RL训练过程中score,critic_loss,actor_loss以及actor_loss_reg的变化曲线(可以进一步修改) 178 | 179 | ## RLmain.py 180 | + create_sub_agent(env, actor_name, agentcls, hyparams):定义agent 181 | 182 | ## 全部实验训练流程 -------------------------------------------------------------------------------- /env.py: -------------------------------------------------------------------------------- 1 | # mtrec env 2 | import gym 3 | import pandas as pd 4 | import numpy as np 5 | from collections import defaultdict 6 | from train.utils import Catemapper 7 | 8 | class Visitor: 9 | def __init__(self, session, reward_type="norm"): 10 | self.session = session 11 | self.reward_type = reward_type 12 | self.states = session['states'] 13 | self.state = self.states[0] # initial state 14 | 15 | self.labels = session['labels'] 16 | 17 | self.session_len = min(self.states.shape[0],self.labels.shape[0]) 18 | self.timestep = 0 19 | 20 | def step(self, action): 21 | t = self.timestep 22 | label = self.labels[t] 23 | cvaction = np.array([action[0], action[1]]) # CTR/CTCVR accu 24 | # cvaction = np.array([action[0], action[0]*action[1]]) # CTR/CVR accu 25 | 26 | # TODO: weighted reward 27 | # reward = np.clip(1 - abs(cvaction - label), 0, 1) 28 | reward = -np.abs(cvaction - label) 29 | 30 | # BCE reward 31 | if self.reward_type == "bce": 32 | reward = label*np.log(np.clip(cvaction,1e-4,1))+(1-label)*np.log(np.clip(1-cvaction,1e-4,1)) 33 | 34 | # print(action,cvaction, reward) 35 | if t + 1 < self.session_len: 36 | nstate = self.states[t + 1] 37 | done = False 38 | else: 39 | nstate = self.states[t] 40 | done = True 41 | self.timestep += 1 42 | return nstate, reward, done, label 43 | 44 | def __len__(self): 45 | return self.labels.shape[0] 46 | 47 | def __str__(self): 48 | print(self.session) 49 | 50 | class seqVisitor(Visitor): 51 | def __init__(self, session): 52 | super(seqVisitor, self).__init__(session) 53 | self.session = session 54 | self.states = session['states'] 55 | self.state = self.states[[0]] # initial state 56 | 57 | self.labels = session['labels'] 58 | 59 | self.session_len = self.labels.shape[0] 60 | self.timestep = 0 61 | 62 | def step(self, action): 63 | t = self.timestep 64 | label = self.labels[t] 65 | # 注意这里要改,或者额外弄个环境 66 | cvaction = np.array([action[0], action[0]*action[1]]) # CTR/CTCVR accu 67 | # TODO: sigle add 模型不需要限制范围 68 | reward = np.clip(1 - abs(cvaction - label), 0, 1) 69 | # print(action,cvaction, reward) 70 | if t + 1 < self.session_len: 71 | nstate = self.states[:(t + 2),:] 72 | done = False 73 | else: 74 | nstate = self.states[:(t + 1),:] 75 | done = True 76 | self.timestep += 1 77 | return nstate, reward, done, label 78 | 79 | 80 | 81 | # class MTEnv(gym.Env): 82 | class MTEnv(gym.Env): 83 | def __init__(self, mdp_path, features_path, map_path, nrows=10000,reward_type="norm",is_test=False,is_seq=False): 84 | super(MTEnv, self).__init__() 85 | self.mdp_path = mdp_path 86 | self.nrows = nrows 87 | self.features_dict, self.idmap = self.get_features(features_path, map_path) 88 | self.field_dims = self.idmap.field_dims 89 | 90 | self.is_test = is_test 91 | self.reward_type = reward_type 92 | self.test_step = 0 93 | self.is_seq = is_seq 94 | 95 | self.action_space = gym.spaces.Box(0, 1, shape=(2,), dtype=np.float32) # 2维0-1, sample... 96 | # self.observation_space = gym.spaces.Discrete() # t*feature_len, dynamic; hard to represent here 97 | 98 | def get_features(self, features_path, map_path): 99 | feature_cols = ['785', '591', '814', 'available', 'categoryid', '364', '776'] 100 | features = pd.read_csv(features_path, usecols=feature_cols + ['itemid']) 101 | features.drop_duplicates('itemid', inplace=True) 102 | features.fillna(0, inplace=True) 103 | idmap = Catemapper(threshold=0.2) 104 | idmap.load_mapper(map_path) 105 | idmap.map(features) 106 | features_dict = dict(zip(features['itemid'].tolist(), features[feature_cols].values)) 107 | return features_dict, idmap 108 | 109 | 110 | def getMDP(self): 111 | mdp_data = pd.read_csv(self.mdp_path, usecols=['timestamp', 'visitorid', 'itemid', 'click', 'pay', 112 | 'state', 'next_state'], nrows=self.nrows) # timestamp, itemid 113 | len_items = len(self.features_dict) 114 | self.visitors = mdp_data.visitorid.unique().tolist() 115 | mdp_dataset = defaultdict(dict) 116 | pad = [0]*self.field_dims.shape[0] 117 | for i, d in mdp_data.groupby('visitorid'): 118 | d.sort_values(by='timestamp', inplace=True) 119 | labels = d[['click', 'pay']].values.astype(np.float32) 120 | s = [self.features_dict[j[0]].tolist() if j[0] in self.features_dict else pad for j in eval(d['next_state'].tolist()[-1])] 121 | cate_fea = np.array(s,dtype=np.int64) 122 | # padding numerical feature 123 | states = np.c_[cate_fea,np.zeros((cate_fea.shape[0],1))] 124 | # print(labels.shape,states.shape) 125 | mdp_dataset[i] = dict( 126 | labels=labels, 127 | states=states.astype(np.int64) 128 | ) 129 | 130 | self.mdp_dataset = mdp_dataset 131 | self.datalen = len(self.visitors) 132 | print("visitors number:",len(self.visitors)) 133 | 134 | def reset(self): 135 | visitorid = np.random.choice(self.visitors, size=1)[0] 136 | if self.is_test: 137 | visitorid = self.visitors[self.test_step%self.datalen] 138 | self.test_step += 1 139 | if self.is_seq: 140 | self.cur_session = seqVisitor(self.mdp_dataset[visitorid]) 141 | else: 142 | self.cur_session = Visitor(self.mdp_dataset[visitorid],self.reward_type) 143 | return self.cur_session.state 144 | 145 | def step(self, action): # offline behaviour, no need for action 146 | nstate, reward, done, label = self.cur_session.step(action) 147 | return nstate, reward, done, label 148 | 149 | def render(self): 150 | pass 151 | 152 | 153 | if __name__ == '__main__': 154 | data_path = "./dataset/train.csv" 155 | features_path = "./dataset/rt/item_feadf.csv" 156 | map_path = "./chkpt" 157 | env = MTEnv(data_path, features_path, map_path, is_seq=False) 158 | env.getMDP() 159 | for i in range(10): 160 | state = env.reset() 161 | while True: 162 | action = env.action_space.sample() 163 | nstate, reward, done, _ = env.step(action) 164 | print("nstate:{},reward:{}".format(nstate,reward)) 165 | if done: 166 | break 167 | 168 | -------------------------------------------------------------------------------- /layers/__pycache__/critic.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Applied-Machine-Learning-Lab/RMTL/5b8b13c21f449f044e9b09fa75574f63a6431dcc/layers/__pycache__/critic.cpython-38.pyc -------------------------------------------------------------------------------- /layers/__pycache__/layers.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Applied-Machine-Learning-Lab/RMTL/5b8b13c21f449f044e9b09fa75574f63a6431dcc/layers/__pycache__/layers.cpython-38.pyc -------------------------------------------------------------------------------- /layers/critic.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from layers.layers import EmbeddingLayer,MultiLayerPerceptron 3 | 4 | 5 | class Critic(torch.nn.Module): 6 | def __init__(self, categorical_field_dims, numerical_num, embed_dim, bottom_mlp_dims, tower_mlp_dims, 7 | dropout): 8 | super(Critic, self).__init__() 9 | self.embedding = EmbeddingLayer(categorical_field_dims, embed_dim) 10 | self.numerical_layer = torch.nn.Linear(numerical_num, embed_dim) 11 | self.action_layer = torch.nn.Linear(1, embed_dim) 12 | self.embed_output_dim = (len(categorical_field_dims) + numerical_num + 1) * embed_dim 13 | self.bottom = MultiLayerPerceptron(self.embed_output_dim, bottom_mlp_dims, dropout, output_layer=False) 14 | self.tower = MultiLayerPerceptron(bottom_mlp_dims[-1], tower_mlp_dims, dropout) 15 | 16 | def forward(self, categorical_x, numerical_x, action): 17 | emb = self.embedding(categorical_x) 18 | numerical_emb = self.numerical_layer(numerical_x).unsqueeze(1) 19 | action_emb = self.action_layer(action).unsqueeze(1) 20 | emb = torch.cat([emb, numerical_emb, action_emb], dim=1) 21 | emb = emb.view(emb.size(0), self.embed_output_dim) 22 | fea = self.bottom(emb) 23 | return self.tower(fea).squeeze(1) 24 | 25 | class CriticNeg(torch.nn.Module): 26 | def __init__(self, categorical_field_dims, numerical_num, embed_dim, bottom_mlp_dims, tower_mlp_dims, 27 | dropout): 28 | super(CriticNeg, self).__init__() 29 | self.embedding = EmbeddingLayer(categorical_field_dims, embed_dim) 30 | self.numerical_layer = torch.nn.Linear(numerical_num, embed_dim) 31 | self.action_layer = torch.nn.Linear(1, embed_dim) 32 | self.embed_output_dim = (len(categorical_field_dims) + numerical_num + 1) * embed_dim 33 | self.bottom = MultiLayerPerceptron(self.embed_output_dim, bottom_mlp_dims, dropout, output_layer=False) 34 | self.tower = MultiLayerPerceptron(bottom_mlp_dims[-1], tower_mlp_dims, dropout) 35 | self.activate = torch.nn.ReLU() 36 | 37 | def forward(self, categorical_x, numerical_x, action): 38 | emb = self.embedding(categorical_x) 39 | numerical_emb = self.numerical_layer(numerical_x).unsqueeze(1) 40 | action_emb = self.action_layer(action).unsqueeze(1) 41 | emb = torch.cat([emb, numerical_emb, action_emb], dim=1) 42 | emb = emb.view(emb.size(0), self.embed_output_dim) 43 | fea = self.bottom(emb) 44 | return -self.activate(self.tower(fea)).squeeze(1) -------------------------------------------------------------------------------- /layers/esmm.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from layers.layers import EmbeddingLayer, MultiLayerPerceptron 3 | 4 | 5 | class ESMMModel(torch.nn.Module): 6 | """ 7 | A pytorch implementation of Single Task Model. 8 | """ 9 | 10 | def __init__(self, categorical_field_dims, numerical_num, embed_dim, bottom_mlp_dims, tower_mlp_dims, task_num, 11 | dropout): 12 | super().__init__() 13 | self.embedding = EmbeddingLayer(categorical_field_dims, embed_dim) 14 | self.numerical_layer = torch.nn.Linear(numerical_num, embed_dim) 15 | self.embed_output_dim = (len(categorical_field_dims) + numerical_num) * embed_dim 16 | self.task_num = task_num 17 | 18 | self.bottom = torch.nn.ModuleList( 19 | [MultiLayerPerceptron(self.embed_output_dim, bottom_mlp_dims, dropout, output_layer=False) for i in 20 | range(task_num)]) 21 | self.tower = torch.nn.ModuleList( 22 | [MultiLayerPerceptron(bottom_mlp_dims[-1], tower_mlp_dims, dropout) for i in range(task_num)]) 23 | 24 | def forward(self, categorical_x, numerical_x=None): 25 | """ 26 | :param 27 | categorical_x: Long tensor of size ``(batch_size, categorical_field_dims)`` 28 | numerical_x: Long tensor of size ``(batch_size, numerical_num)`` 29 | """ 30 | results = list() 31 | emb = self.embedding(categorical_x) 32 | numerical_emb = self.numerical_layer(numerical_x).unsqueeze(1) 33 | emb = torch.cat([emb, numerical_emb], 1).view(-1, self.embed_output_dim) 34 | 35 | for i in range(self.task_num): 36 | fea = self.bottom[i](emb) 37 | if i == 1: 38 | results.append(torch.sigmoid(self.tower[i](fea).squeeze(1)) * results[0]) 39 | else: 40 | results.append(torch.sigmoid(self.tower[i](fea).squeeze(1))) 41 | return results 42 | -------------------------------------------------------------------------------- /layers/layers.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import numpy as np 3 | 4 | class EmbeddingLayer(torch.nn.Module): 5 | 6 | def __init__(self, field_dims, embed_dim): 7 | super().__init__() 8 | self.embedding = torch.nn.Embedding(sum(field_dims), embed_dim, padding_idx=0) 9 | self.offsets = np.array((0, *np.cumsum(field_dims)[:-1]), dtype=np.long) 10 | torch.nn.init.xavier_uniform_(self.embedding.weight.data) 11 | 12 | def forward(self, x): 13 | """ 14 | :param x: Long tensor of size ``(batch_size, num_fields)`` 15 | """ 16 | x = x + x.new_tensor(self.offsets).unsqueeze(0) 17 | return self.embedding(x) 18 | 19 | class MultiLayerPerceptron(torch.nn.Module): 20 | 21 | def __init__(self, input_dim, embed_dims, dropout, output_layer=True): 22 | super().__init__() 23 | layers = list() 24 | for embed_dim in embed_dims: 25 | layers.append(torch.nn.Linear(input_dim, embed_dim)) 26 | # layers.append(torch.nn.BatchNorm1d(embed_dim)) 27 | layers.append(torch.nn.ReLU()) 28 | layers.append(torch.nn.Dropout(p=dropout)) 29 | input_dim = embed_dim 30 | if output_layer: 31 | layers.append(torch.nn.Linear(input_dim, 1)) 32 | self.mlp = torch.nn.Sequential(*layers) 33 | 34 | def forward(self, x): 35 | """ 36 | :param x: Float tensor of size ``(batch_size, embed_dim)`` 37 | """ 38 | return self.mlp(x) -------------------------------------------------------------------------------- /pretrain.zip: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Applied-Machine-Learning-Lab/RMTL/5b8b13c21f449f044e9b09fa75574f63a6431dcc/pretrain.zip -------------------------------------------------------------------------------- /slmodels/__pycache__/aitm.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Applied-Machine-Learning-Lab/RMTL/5b8b13c21f449f044e9b09fa75574f63a6431dcc/slmodels/__pycache__/aitm.cpython-38.pyc -------------------------------------------------------------------------------- /slmodels/__pycache__/esmm.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Applied-Machine-Learning-Lab/RMTL/5b8b13c21f449f044e9b09fa75574f63a6431dcc/slmodels/__pycache__/esmm.cpython-38.pyc -------------------------------------------------------------------------------- /slmodels/__pycache__/layers.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Applied-Machine-Learning-Lab/RMTL/5b8b13c21f449f044e9b09fa75574f63a6431dcc/slmodels/__pycache__/layers.cpython-38.pyc -------------------------------------------------------------------------------- /slmodels/__pycache__/mmoe.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Applied-Machine-Learning-Lab/RMTL/5b8b13c21f449f044e9b09fa75574f63a6431dcc/slmodels/__pycache__/mmoe.cpython-38.pyc -------------------------------------------------------------------------------- /slmodels/__pycache__/omoe.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Applied-Machine-Learning-Lab/RMTL/5b8b13c21f449f044e9b09fa75574f63a6431dcc/slmodels/__pycache__/omoe.cpython-38.pyc -------------------------------------------------------------------------------- /slmodels/__pycache__/ple.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Applied-Machine-Learning-Lab/RMTL/5b8b13c21f449f044e9b09fa75574f63a6431dcc/slmodels/__pycache__/ple.cpython-38.pyc -------------------------------------------------------------------------------- /slmodels/__pycache__/sharedbottom.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Applied-Machine-Learning-Lab/RMTL/5b8b13c21f449f044e9b09fa75574f63a6431dcc/slmodels/__pycache__/sharedbottom.cpython-38.pyc -------------------------------------------------------------------------------- /slmodels/__pycache__/singletask.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Applied-Machine-Learning-Lab/RMTL/5b8b13c21f449f044e9b09fa75574f63a6431dcc/slmodels/__pycache__/singletask.cpython-38.pyc -------------------------------------------------------------------------------- /slmodels/aitm.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import numpy as np 3 | from .layers import EmbeddingLayer, MultiLayerPerceptron 4 | 5 | 6 | class AITMModel(torch.nn.Module): 7 | """ 8 | A pytorch implementation of Adaptive Information Transfer Multi-task Model. 9 | 10 | Reference: 11 | Xi, Dongbo, et al. Modeling the sequential dependence among audience multi-step conversions with multi-task learning in targeted display advertising. KDD 2021. 12 | """ 13 | 14 | def __init__(self, categorical_field_dims, numerical_num, embed_dim, bottom_mlp_dims, tower_mlp_dims, task_num, dropout): 15 | super().__init__() 16 | self.embedding = EmbeddingLayer(categorical_field_dims, embed_dim) 17 | self.numerical_layer = torch.nn.Linear(numerical_num, embed_dim) 18 | self.embed_output_dim = (len(categorical_field_dims) + 1) * embed_dim 19 | self.task_num = task_num 20 | self.hidden_dim = bottom_mlp_dims[-1] 21 | 22 | self.g = torch.nn.ModuleList([torch.nn.Linear(bottom_mlp_dims[-1], bottom_mlp_dims[-1]) for i in range(task_num - 1)]) 23 | self.h1 = torch.nn.Linear(bottom_mlp_dims[-1], bottom_mlp_dims[-1]) 24 | self.h2 = torch.nn.Linear(bottom_mlp_dims[-1], bottom_mlp_dims[-1]) 25 | self.h3 = torch.nn.Linear(bottom_mlp_dims[-1], bottom_mlp_dims[-1]) 26 | 27 | self.bottom = torch.nn.ModuleList([MultiLayerPerceptron(self.embed_output_dim, bottom_mlp_dims, dropout, output_layer=False) for i in range(task_num)]) 28 | self.tower = torch.nn.ModuleList([MultiLayerPerceptron(bottom_mlp_dims[-1], tower_mlp_dims, dropout) for i in range(task_num)]) 29 | 30 | def forward(self, categorical_x, numerical_x): 31 | """ 32 | :param 33 | categorical_x: Long tensor of size ``(batch_size, categorical_field_dims)`` 34 | numerical_x: Long tensor of size ``(batch_size, numerical_num)`` 35 | """ 36 | categorical_emb = self.embedding(categorical_x) 37 | numerical_emb = self.numerical_layer(numerical_x).unsqueeze(1) 38 | emb = torch.cat([categorical_emb, numerical_emb], 1).view(-1, self.embed_output_dim) 39 | fea = [self.bottom[i](emb) for i in range(self.task_num)] 40 | 41 | for i in range(1, self.task_num): 42 | p = self.g[i - 1](fea[i - 1]).unsqueeze(1) 43 | q = fea[i].unsqueeze(1) 44 | x = torch.cat([p, q], dim = 1) 45 | V = self.h1(x) 46 | K = self.h2(x) 47 | Q = self.h3(x) 48 | fea[i] = torch.sum(torch.nn.functional.softmax(torch.sum(K * Q, 2, True) / np.sqrt(self.hidden_dim), dim=1) * V, 1) 49 | 50 | results = [torch.sigmoid(self.tower[i](fea[i]).squeeze(1)) for i in range(self.task_num)] 51 | return results -------------------------------------------------------------------------------- /slmodels/esmm.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from layers.layers import EmbeddingLayer, MultiLayerPerceptron 3 | 4 | 5 | class ESMMModel(torch.nn.Module): 6 | """ 7 | A pytorch implementation of Single Task Model. 8 | """ 9 | 10 | def __init__(self, categorical_field_dims, numerical_num, embed_dim, bottom_mlp_dims, tower_mlp_dims, task_num, 11 | dropout): 12 | super().__init__() 13 | self.embedding = EmbeddingLayer(categorical_field_dims, embed_dim) 14 | self.numerical_layer = torch.nn.Linear(numerical_num, embed_dim) 15 | self.embed_output_dim = (len(categorical_field_dims) + numerical_num) * embed_dim 16 | self.task_num = task_num 17 | 18 | self.bottom = torch.nn.ModuleList( 19 | [MultiLayerPerceptron(self.embed_output_dim, bottom_mlp_dims, dropout, output_layer=False) for i in 20 | range(task_num)]) 21 | self.tower = torch.nn.ModuleList( 22 | [MultiLayerPerceptron(bottom_mlp_dims[-1], tower_mlp_dims, dropout) for i in range(task_num)]) 23 | 24 | def forward(self, categorical_x, numerical_x=None): 25 | """ 26 | :param 27 | categorical_x: Long tensor of size ``(batch_size, categorical_field_dims)`` 28 | numerical_x: Long tensor of size ``(batch_size, numerical_num)`` 29 | """ 30 | results = list() 31 | emb = self.embedding(categorical_x) 32 | numerical_emb = self.numerical_layer(numerical_x).unsqueeze(1) 33 | emb = torch.cat([emb, numerical_emb], 1).view(-1, self.embed_output_dim) 34 | 35 | for i in range(self.task_num): 36 | fea = self.bottom[i](emb) 37 | if i == 1: 38 | results.append(torch.sigmoid(self.tower[i](fea).squeeze(1)) * results[0]) 39 | else: 40 | results.append(torch.sigmoid(self.tower[i](fea).squeeze(1))) 41 | return results 42 | -------------------------------------------------------------------------------- /slmodels/layers.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import numpy as np 3 | 4 | class EmbeddingLayer(torch.nn.Module): 5 | 6 | def __init__(self, field_dims, embed_dim): 7 | super().__init__() 8 | self.embedding = torch.nn.Embedding(sum(field_dims), embed_dim, padding_idx=0) 9 | self.offsets = np.array((0, *np.cumsum(field_dims)[:-1]), dtype=np.long) 10 | torch.nn.init.xavier_uniform_(self.embedding.weight.data) 11 | 12 | def forward(self, x): 13 | """ 14 | :param x: Long tensor of size ``(batch_size, num_fields)`` 15 | """ 16 | x = x + x.new_tensor(self.offsets).unsqueeze(0) 17 | return self.embedding(x) 18 | 19 | class MultiLayerPerceptron(torch.nn.Module): 20 | 21 | def __init__(self, input_dim, embed_dims, dropout, output_layer=True): 22 | super().__init__() 23 | layers = list() 24 | for embed_dim in embed_dims: 25 | layers.append(torch.nn.Linear(input_dim, embed_dim)) 26 | # layers.append(torch.nn.BatchNorm1d(embed_dim)) 27 | layers.append(torch.nn.ReLU()) 28 | layers.append(torch.nn.Dropout(p=dropout)) 29 | input_dim = embed_dim 30 | if output_layer: 31 | layers.append(torch.nn.Linear(input_dim, 1)) 32 | self.mlp = torch.nn.Sequential(*layers) 33 | 34 | def forward(self, x): 35 | """ 36 | :param x: Float tensor of size ``(batch_size, embed_dim)`` 37 | """ 38 | return self.mlp(x) -------------------------------------------------------------------------------- /slmodels/metaheac.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import numpy as np 3 | 4 | class Meta_Linear(torch.nn.Linear): #used in MAML to forward input with fast weight 5 | def __init__(self, in_features, out_features): 6 | super(Meta_Linear, self).__init__(in_features, out_features) 7 | self.weight.fast = None #Lazy hack to add fast weight link 8 | self.bias.fast = None 9 | 10 | def forward(self, x): 11 | if self.weight.fast is not None and self.bias.fast is not None: 12 | out = torch.nn.functional.linear(x, self.weight.fast, self.bias.fast) #weight.fast (fast weight) is the temporaily adapted weight 13 | else: 14 | out = super(Meta_Linear, self).forward(x) 15 | return out 16 | 17 | class Meta_Embedding(torch.nn.Embedding): #used in MAML to forward input with fast weight 18 | def __init__(self, num_embedding, embedding_dim): 19 | super(Meta_Embedding, self).__init__(num_embedding, embedding_dim) 20 | self.weight.fast = None 21 | 22 | def forward(self, x): 23 | if self.weight.fast is not None: 24 | out = torch.nn.functional.embedding( 25 | x, self.weight.fast, self.padding_idx, self.max_norm, 26 | self.norm_type, self.scale_grad_by_freq, self.sparse) 27 | else: 28 | out = torch.nn.functional.embedding( 29 | x, self.weight, self.padding_idx, self.max_norm, 30 | self.norm_type, self.scale_grad_by_freq, self.sparse) 31 | return out 32 | 33 | class EmbeddingLayer(torch.nn.Module): 34 | 35 | def __init__(self, field_dims, embed_dim): 36 | super().__init__() 37 | self.embedding = torch.nn.Embedding(sum(field_dims), embed_dim) 38 | self.offsets = np.array((0, *np.cumsum(field_dims)[:-1]), dtype=np.long) 39 | torch.nn.init.xavier_uniform_(self.embedding.weight.data) 40 | 41 | def forward(self, x): 42 | """ 43 | :param x: Long tensor of size ``(batch_size, num_fields)`` 44 | """ 45 | x = x + x.new_tensor(self.offsets).unsqueeze(0) 46 | return self.embedding(x) 47 | 48 | class MultiLayerPerceptron(torch.nn.Module): 49 | 50 | def __init__(self, input_dim, embed_dims, dropout, output_layer=True): 51 | super().__init__() 52 | layers = list() 53 | for embed_dim in embed_dims: 54 | layers.append(torch.nn.Linear(input_dim, embed_dim)) 55 | layers.append(torch.nn.BatchNorm1d(embed_dim)) 56 | layers.append(torch.nn.ReLU()) 57 | layers.append(torch.nn.Dropout(p=dropout)) 58 | input_dim = embed_dim 59 | if output_layer: 60 | layers.append(torch.nn.Linear(input_dim, 1)) 61 | self.mlp = torch.nn.Sequential(*layers) 62 | 63 | def forward(self, x): 64 | """ 65 | :param x: Float tensor of size ``(batch_size, embed_dim)`` 66 | """ 67 | return self.mlp(x) 68 | 69 | class HeacModel(torch.nn.Module): 70 | """ 71 | A pytorch implementation of Hybrid Expert and Critic Model. 72 | 73 | Reference: 74 | Zhu, Yongchun, et al. Learning to Expand Audience via Meta Hybrid Experts and Critics for Recommendation and Advertising. KDD 2021. 75 | """ 76 | 77 | def __init__(self, categorical_field_dims, numerical_num, embed_dim, bottom_mlp_dims, tower_mlp_dims, task_num, expert_num, critic_num, dropout): 78 | super().__init__() 79 | self.embedding = EmbeddingLayer(categorical_field_dims, embed_dim) 80 | self.numerical_layer = torch.nn.Linear(numerical_num, embed_dim) 81 | self.embed_output_dim = (len(categorical_field_dims) + 1) * embed_dim 82 | self.task_embedding = Meta_Embedding(task_num, embed_dim) 83 | self.task_num = task_num 84 | self.expert_num = expert_num 85 | self.critic_num = critic_num 86 | 87 | self.expert = torch.nn.ModuleList([MultiLayerPerceptron(self.embed_output_dim, bottom_mlp_dims, dropout, output_layer=False) for i in range(expert_num)]) 88 | self.critic = torch.nn.ModuleList([MultiLayerPerceptron(bottom_mlp_dims[-1], tower_mlp_dims, dropout) for i in range(critic_num)]) 89 | self.expert_gate = torch.nn.Sequential(torch.nn.Linear(embed_dim * 2, expert_num), torch.nn.Softmax(dim=1)) 90 | self.critic_gate = torch.nn.Sequential(torch.nn.Linear(embed_dim * 2, critic_num), torch.nn.Softmax(dim=1)) 91 | 92 | def forward(self, categorical_x, numerical_x): 93 | """ 94 | :param 95 | categorical_x: Long tensor of size ``(batch_size, categorical_field_dims)`` 96 | numerical_x: Long tensor of size ``(batch_size, numerical_num)`` 97 | """ 98 | categorical_emb = self.embedding(categorical_x) 99 | numerical_emb = self.numerical_layer(numerical_x).unsqueeze(1) 100 | emb = torch.cat([categorical_emb, numerical_emb], 1) 101 | batch_size = emb.size(0) 102 | 103 | gate_input_emb = [] 104 | for i in range(self.task_num): 105 | idxs = torch.tensor([i for j in range(batch_size)]).view(-1, 1).cuda() 106 | task_emb = self.task_embedding(idxs).squeeze(1) 107 | gate_input_emb.append(torch.cat([task_emb, torch.mean(emb, dim=1)], dim=1).view(batch_size, -1)) 108 | 109 | emb = emb.view(-1, self.embed_output_dim) 110 | 111 | expert_gate_value = [self.expert_gate(gate_input_emb[i]).unsqueeze(1) for i in range(self.task_num)] 112 | fea = torch.cat([self.expert[i](emb).unsqueeze(1) for i in range(self.expert_num)], dim = 1) 113 | task_fea = [torch.bmm(expert_gate_value[i], fea).squeeze(1) for i in range(self.task_num)] 114 | 115 | critic_gate_value = [self.critic_gate(gate_input_emb[i]) for i in range(self.task_num)] 116 | results = [] 117 | for i in range(self.task_num): 118 | output = [torch.sigmoid(self.critic[j](task_fea[i])) for j in range(self.critic_num)] 119 | output = torch.cat(output, dim=1) 120 | results.append(torch.mean(critic_gate_value[i] * output, dim=1)) 121 | 122 | return results 123 | 124 | class MetaHeacModel(torch.nn.Module): 125 | def __init__(self, categorical_field_dims, numerical_num, embed_dim, bottom_mlp_dims, tower_mlp_dims, task_num, expert_num, critic_num, dropout): 126 | super(MetaHeacModel, self).__init__() 127 | self.model = HeacModel(categorical_field_dims, numerical_num, embed_dim, bottom_mlp_dims, tower_mlp_dims, task_num, expert_num, critic_num, dropout) 128 | self.local_lr = 0.0002 129 | self.criterion = torch.nn.BCELoss() 130 | 131 | def forward(self, categorical_x, numerical_x): 132 | return self.model(categorical_x, numerical_x) 133 | 134 | def local_update(self, support_set_categorical, support_set_numerical, support_set_y): 135 | fast_parameters = list(self.model.parameters()) 136 | for weight in fast_parameters: 137 | weight.fast = None 138 | support_set_y_pred = self.model(support_set_categorical, support_set_numerical) 139 | loss_list = [self.criterion(support_set_y_pred[j], support_set_y[:, j].float()) for j in range(support_set_y.size(1))] 140 | loss = 0 141 | for item in loss_list: 142 | loss += item 143 | loss /= len(loss_list) 144 | 145 | self.model.zero_grad() 146 | grad = torch.autograd.grad(loss, fast_parameters, create_graph=True, allow_unused=True) 147 | fast_parameters = [] 148 | for k, weight in enumerate(self.model.parameters()): 149 | if grad[k] is None: 150 | continue 151 | # for usage of weight.fast, please see Linear_fw, Conv_fw in backbone.py 152 | if weight.fast is None: 153 | weight.fast = weight - self.local_lr * grad[k] # create weight.fast 154 | else: 155 | weight.fast = weight.fast - self.local_lr * grad[k] 156 | fast_parameters.append(weight.fast) 157 | 158 | return loss 159 | 160 | def global_update(self, list_sup_categorical, list_sup_numerical, list_sup_y, list_qry_categorical, list_qry_numerical, list_qry_y): 161 | batch_sz = len(list_sup_categorical) 162 | losses_q = [] 163 | for i in range(batch_sz): 164 | loss_sup = self.local_update(list_sup_categorical[i], list_sup_numerical[i], list_sup_y[i]) 165 | query_set_y_pred = self.model(list_qry_categorical[i], list_qry_numerical[i]) 166 | 167 | loss_list = [self.criterion(query_set_y_pred[j], list_qry_y[i][:, j].float()) for j in range(list_qry_y[i].size(1))] 168 | loss = 0 169 | for item in loss_list: 170 | loss += item 171 | loss /= len(loss_list) 172 | 173 | losses_q.append(loss) 174 | losses_q = torch.stack(losses_q).mean(0) 175 | return losses_q -------------------------------------------------------------------------------- /slmodels/mmoe.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from .layers import EmbeddingLayer, MultiLayerPerceptron 3 | 4 | 5 | class MMoEModel(torch.nn.Module): 6 | """ 7 | A pytorch implementation of MMoE Model. 8 | 9 | Reference: 10 | Ma, Jiaqi, et al. Modeling task relationships in multi-task learning with multi-gate mixture-of-experts. KDD 2018. 11 | """ 12 | 13 | def __init__(self, categorical_field_dims, numerical_num, embed_dim, bottom_mlp_dims, tower_mlp_dims, task_num, expert_num, dropout): 14 | super().__init__() 15 | self.embedding = EmbeddingLayer(categorical_field_dims, embed_dim) 16 | self.numerical_layer = torch.nn.Linear(numerical_num, embed_dim) 17 | self.embed_output_dim = (len(categorical_field_dims) + 1) * embed_dim 18 | self.task_num = task_num 19 | self.expert_num = expert_num 20 | 21 | self.expert = torch.nn.ModuleList([MultiLayerPerceptron(self.embed_output_dim, bottom_mlp_dims, dropout, output_layer=False) for i in range(expert_num)]) 22 | self.tower = torch.nn.ModuleList([MultiLayerPerceptron(bottom_mlp_dims[-1], tower_mlp_dims, dropout) for i in range(task_num)]) 23 | self.gate = torch.nn.ModuleList([torch.nn.Sequential(torch.nn.Linear(self.embed_output_dim, expert_num), torch.nn.Softmax(dim=1)) for i in range(task_num)]) 24 | 25 | def forward(self, categorical_x, numerical_x): 26 | """ 27 | :param 28 | categorical_x: Long tensor of size ``(batch_size, categorical_field_dims)`` 29 | numerical_x: Long tensor of size ``(batch_size, numerical_num)`` 30 | """ 31 | categorical_emb = self.embedding(categorical_x) 32 | numerical_emb = self.numerical_layer(numerical_x).unsqueeze(1) 33 | emb = torch.cat([categorical_emb, numerical_emb], 1).view(-1, self.embed_output_dim) 34 | gate_value = [self.gate[i](emb).unsqueeze(1) for i in range(self.task_num)] 35 | fea = torch.cat([self.expert[i](emb).unsqueeze(1) for i in range(self.expert_num)], dim = 1) 36 | task_fea = [torch.bmm(gate_value[i], fea).squeeze(1) for i in range(self.task_num)] 37 | 38 | results = [torch.sigmoid(self.tower[i](task_fea[i]).squeeze(1)) for i in range(self.task_num)] 39 | return results -------------------------------------------------------------------------------- /slmodels/omoe.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from .layers import EmbeddingLayer, MultiLayerPerceptron 3 | 4 | 5 | class OMoEModel(torch.nn.Module): 6 | """ 7 | A pytorch implementation of one-gate MoE Model. 8 | 9 | Reference: 10 | Jacobs, Robert A., et al. "Adaptive mixtures of local experts." Neural computation 3.1 (1991): 79-87. 11 | Ma, Jiaqi, et al. Modeling task relationships in multi-task learning with multi-gate mixture-of-experts. KDD 2018. 12 | """ 13 | 14 | def __init__(self, categorical_field_dims, numerical_num, embed_dim, bottom_mlp_dims, tower_mlp_dims, task_num, expert_num, dropout): 15 | super().__init__() 16 | self.embedding = EmbeddingLayer(categorical_field_dims, embed_dim) 17 | self.numerical_layer = torch.nn.Linear(numerical_num, embed_dim) 18 | self.embed_output_dim = (len(categorical_field_dims) + 1) * embed_dim 19 | self.task_num = task_num 20 | self.expert_num = expert_num 21 | 22 | self.expert = torch.nn.ModuleList([MultiLayerPerceptron(self.embed_output_dim, bottom_mlp_dims, dropout, output_layer=False) for i in range(expert_num)]) 23 | self.tower = torch.nn.ModuleList([MultiLayerPerceptron(bottom_mlp_dims[-1], tower_mlp_dims, dropout) for i in range(task_num)]) 24 | self.gate = torch.nn.Sequential(torch.nn.Linear(self.embed_output_dim, expert_num), torch.nn.Softmax(dim=1)) 25 | 26 | def forward(self, categorical_x, numerical_x): 27 | """ 28 | :param 29 | categorical_x: Long tensor of size ``(batch_size, categorical_field_dims)`` 30 | numerical_x: Long tensor of size ``(batch_size, numerical_num)`` 31 | """ 32 | categorical_emb = self.embedding(categorical_x) 33 | numerical_emb = self.numerical_layer(numerical_x).unsqueeze(1) 34 | emb = torch.cat([categorical_emb, numerical_emb], 1).view(-1, self.embed_output_dim) 35 | gate_value = self.gate(emb).unsqueeze(1) 36 | fea = torch.cat([self.expert[i](emb).unsqueeze(1) for i in range(self.expert_num)], dim = 1) 37 | fea = torch.bmm(gate_value, fea).squeeze(1) 38 | 39 | results = [torch.sigmoid(self.tower[i](fea).squeeze(1)) for i in range(self.task_num)] 40 | return results -------------------------------------------------------------------------------- /slmodels/ple.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from .layers import EmbeddingLayer, MultiLayerPerceptron 3 | 4 | 5 | class PLEModel(torch.nn.Module): 6 | """ 7 | A pytorch implementation of PLE Model. 8 | 9 | Reference: 10 | Tang, Hongyan, et al. Progressive layered extraction (ple): A novel multi-task learning (mtl) model for personalized recommendations. RecSys 2020. 11 | """ 12 | 13 | def __init__(self, categorical_field_dims, numerical_num, embed_dim, bottom_mlp_dims, tower_mlp_dims, task_num, shared_expert_num, specific_expert_num, dropout): 14 | super().__init__() 15 | self.embedding = EmbeddingLayer(categorical_field_dims, embed_dim) 16 | self.numerical_layer = torch.nn.Linear(numerical_num, embed_dim) 17 | self.embed_output_dim = (len(categorical_field_dims) + 1) * embed_dim 18 | self.task_num = task_num 19 | self.shared_expert_num = shared_expert_num 20 | self.specific_expert_num = specific_expert_num 21 | self.layers_num = len(bottom_mlp_dims) 22 | 23 | self.expert = list() 24 | for i in range(self.layers_num): 25 | if i == 0: 26 | self.expert.append(torch.nn.ModuleList([MultiLayerPerceptron(self.embed_output_dim, [bottom_mlp_dims[i]], dropout, output_layer=False) for j in range(self.specific_expert_num * self.task_num + self.shared_expert_num)])) 27 | else: 28 | self.expert.append(torch.nn.ModuleList([MultiLayerPerceptron(bottom_mlp_dims[i - 1], [bottom_mlp_dims[i]], dropout, output_layer=False) for j in range(self.specific_expert_num * self.task_num + self.shared_expert_num)])) 29 | self.expert = torch.nn.ModuleList(self.expert) 30 | 31 | self.gate = list() 32 | for i in range(self.layers_num): 33 | if i == 0: 34 | input_dim = self.embed_output_dim 35 | else: 36 | input_dim = bottom_mlp_dims[i - 1] 37 | gate_list = [torch.nn.Sequential(torch.nn.Linear(input_dim, shared_expert_num + specific_expert_num), torch.nn.Softmax(dim=1)) for j in range(self.task_num)] 38 | gate_list.append(torch.nn.Sequential(torch.nn.Linear(input_dim, shared_expert_num + task_num * specific_expert_num), torch.nn.Softmax(dim=1))) 39 | self.gate.append(torch.nn.ModuleList(gate_list)) 40 | self.gate = torch.nn.ModuleList(self.gate) 41 | 42 | self.task_expert_index = list() 43 | for i in range(task_num): 44 | index_list = list() 45 | index_list.extend(range(i * self.specific_expert_num, (1 + i) * self.specific_expert_num)) 46 | index_list.extend(range(task_num * self.specific_expert_num, task_num * self.specific_expert_num + self.shared_expert_num)) 47 | self.task_expert_index.append(index_list) 48 | self.task_expert_index.append(range(task_num * self.specific_expert_num + self.shared_expert_num)) 49 | 50 | self.tower = torch.nn.ModuleList([MultiLayerPerceptron(bottom_mlp_dims[-1], tower_mlp_dims, dropout) for i in range(task_num)]) 51 | 52 | def forward(self, categorical_x, numerical_x): 53 | """ 54 | :param 55 | categorical_x: Long tensor of size ``(batch_size, categorical_field_dims)`` 56 | numerical_x: Long tensor of size ``(batch_size, numerical_num)`` 57 | """ 58 | categorical_emb = self.embedding(categorical_x) 59 | numerical_emb = self.numerical_layer(numerical_x).unsqueeze(1) 60 | emb = torch.cat([categorical_emb, numerical_emb], 1).view(-1, self.embed_output_dim) 61 | 62 | task_fea = [emb for i in range(self.task_num + 1)] 63 | for i in range(self.layers_num): 64 | for j in range(self.task_num + 1): 65 | fea = torch.cat([self.expert[i][index](task_fea[j]).unsqueeze(1) for index in self.task_expert_index[j]], dim = 1) 66 | gate_value = self.gate[i][j](task_fea[j]).unsqueeze(1) 67 | task_fea[j] = torch.bmm(gate_value, fea).squeeze(1) 68 | 69 | results = [torch.sigmoid(self.tower[i](task_fea[i]).squeeze(1)) for i in range(self.task_num)] 70 | return results -------------------------------------------------------------------------------- /slmodels/sharedbottom.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from .layers import EmbeddingLayer, MultiLayerPerceptron 3 | 4 | 5 | class SharedBottomModel(torch.nn.Module): 6 | """ 7 | A pytorch implementation of Shared-Bottom Model. 8 | """ 9 | 10 | def __init__(self, categorical_field_dims, numerical_num, embed_dim, bottom_mlp_dims, tower_mlp_dims, task_num, dropout): 11 | super().__init__() 12 | self.embedding = EmbeddingLayer(categorical_field_dims, embed_dim) 13 | self.numerical_layer = torch.nn.Linear(numerical_num, embed_dim) 14 | self.embed_output_dim = (len(categorical_field_dims) + 1) * embed_dim 15 | self.task_num = task_num 16 | 17 | self.bottom = MultiLayerPerceptron(self.embed_output_dim, bottom_mlp_dims, dropout, output_layer=False) 18 | self.tower = torch.nn.ModuleList([MultiLayerPerceptron(bottom_mlp_dims[-1], tower_mlp_dims, dropout) for i in range(task_num)]) 19 | 20 | def forward(self, categorical_x, numerical_x): 21 | """ 22 | :param 23 | categorical_x: Long tensor of size ``(batch_size, categorical_field_dims)`` 24 | numerical_x: Long tensor of size ``(batch_size, numerical_num)`` 25 | """ 26 | categorical_emb = self.embedding(categorical_x) 27 | numerical_emb = self.numerical_layer(numerical_x).unsqueeze(1) 28 | emb = torch.cat([categorical_emb, numerical_emb], 1).view(-1, self.embed_output_dim) 29 | fea = self.bottom(emb) 30 | 31 | results = [torch.sigmoid(self.tower[i](fea).squeeze(1)) for i in range(self.task_num)] 32 | return results -------------------------------------------------------------------------------- /slmodels/singletask.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from .layers import EmbeddingLayer, MultiLayerPerceptron 3 | 4 | 5 | class SingleTaskModel(torch.nn.Module): 6 | """ 7 | A pytorch implementation of Single Task Model. 8 | """ 9 | 10 | def __init__(self, categorical_field_dims, numerical_num, embed_dim, bottom_mlp_dims, tower_mlp_dims, task_num, dropout): 11 | super().__init__() 12 | self.embedding = torch.nn.ModuleList([EmbeddingLayer(categorical_field_dims, embed_dim) for i in range(task_num)]) 13 | self.numerical_layer = torch.nn.ModuleList([torch.nn.Linear(numerical_num, embed_dim) for i in range(task_num)]) 14 | self.embed_output_dim = (len(categorical_field_dims) + 1) * embed_dim 15 | self.task_num = task_num 16 | 17 | self.bottom = torch.nn.ModuleList([MultiLayerPerceptron(self.embed_output_dim, bottom_mlp_dims, dropout, output_layer=False) for i in range(task_num)]) 18 | self.tower = torch.nn.ModuleList([MultiLayerPerceptron(bottom_mlp_dims[-1], tower_mlp_dims, dropout) for i in range(task_num)]) 19 | 20 | def forward(self, categorical_x, numerical_x): 21 | """ 22 | :param 23 | categorical_x: Long tensor of size ``(batch_size, categorical_field_dims)`` 24 | numerical_x: Long tensor of size ``(batch_size, numerical_num)`` 25 | """ 26 | results = list() 27 | for i in range(self.task_num): 28 | categorical_emb = self.embedding[i](categorical_x) 29 | numerical_emb = self.numerical_layer[i](numerical_x).unsqueeze(1) 30 | emb = torch.cat([categorical_emb, numerical_emb], 1).view(-1, self.embed_output_dim) 31 | fea = self.bottom[i](emb) 32 | results.append(torch.sigmoid(self.tower[i](fea).squeeze(1))) 33 | return results -------------------------------------------------------------------------------- /train/Arguments.py: -------------------------------------------------------------------------------- 1 | import os 2 | 3 | 4 | class Arguments: 5 | def __init__(self, *args): 6 | # env related 7 | self.gamma = 0.9 8 | self.reward_type = "bce" 9 | self.ips = True 10 | 11 | # network related 12 | self.embed_dim = 128 13 | self.drop_out = 0.2 14 | self.ou_noise_theta = 0 15 | self.ou_noise_gamma = 0.4 16 | self.actor_lr = 1e-3 # 学习速度偏慢 17 | self.critic_lr = 1e-3 18 | self.actor_reg = 0 # 每个模型都有个无底洞的reg rate要调整,从1到0的顺序吧 19 | self.actor_update_freq = 1 # critic学完后完全可以直接每步都更新 20 | self.soft_update_freq = 2 21 | self.tau = 0.2 22 | 23 | # training related 24 | self.epoch = 1000 25 | self.batch_size = 2048 26 | self.memory_size = 500000 27 | self.init_episode = 10000 # 初始采样轮数 28 | self.init_training_step = 10000 # 初始Critic训练步数 29 | 30 | self.train_rows = 100000 31 | self.test_rows = 50000 32 | 33 | # path related 34 | self.test_path = "./dataset/rt/test.csv" 35 | self.val_path = "./dataset/rt/val.csv" 36 | self.train_path = "./dataset/rt/train.csv" 37 | self.features_path = "./dataset/rt/item_feadf.csv" 38 | self.map_path = "./pretrain" 39 | self.pretrain_path = "./pretrain/" 40 | self.memory_path = "./pretrain/memory.pkl" 41 | 42 | def set_curpath(self, model_name): 43 | self.cur_path = f"./chkpt/RL/res_{model_name}" 44 | if not os.path.isdir(self.cur_path): 45 | os.makedirs(self.cur_path) 46 | self.save_dir = self.cur_path 47 | -------------------------------------------------------------------------------- /train/__pycache__/Arguments.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Applied-Machine-Learning-Lab/RMTL/5b8b13c21f449f044e9b09fa75574f63a6431dcc/train/__pycache__/Arguments.cpython-38.pyc -------------------------------------------------------------------------------- /train/__pycache__/run.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Applied-Machine-Learning-Lab/RMTL/5b8b13c21f449f044e9b09fa75574f63a6431dcc/train/__pycache__/run.cpython-38.pyc -------------------------------------------------------------------------------- /train/__pycache__/utils.cpython-38.pyc: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/Applied-Machine-Learning-Lab/RMTL/5b8b13c21f449f044e9b09fa75574f63a6431dcc/train/__pycache__/utils.cpython-38.pyc -------------------------------------------------------------------------------- /train/run.py: -------------------------------------------------------------------------------- 1 | import os 2 | import time 3 | from typing import List 4 | import pickle 5 | import random 6 | import gym 7 | import numpy as np 8 | import matplotlib.pyplot as plt 9 | import pandas as pd 10 | import tqdm 11 | from IPython.display import clear_output 12 | from torch.utils.data import DataLoader 13 | import torch 14 | import torch.nn.functional as F 15 | from sklearn.metrics import roc_auc_score 16 | 17 | from dataset.rtrl import RetailRocketRLDataset 18 | from slmodels.esmm import ESMMModel 19 | from slmodels.singletask import SingleTaskModel 20 | from slmodels.ple import PLEModel 21 | from slmodels.mmoe import MMoEModel 22 | from slmodels.sharedbottom import SharedBottomModel 23 | from slmodels.aitm import AITMModel 24 | from slmodels.omoe import OMoEModel 25 | 26 | """ 27 | SL run begin here 28 | """ 29 | 30 | def get_dataset(name, path): 31 | if 'rt' in name: # 当前只支持一个数据集 32 | return RetailRocketRLDataset(path) 33 | elif 'kuai' in name: 34 | return KuaiRLDataset(path) 35 | else: 36 | raise ValueError('unknown dataset name: ' + name) 37 | 38 | def get_model(name, categorical_field_dims, numerical_num, task_num, expert_num, embed_dim): 39 | """ 40 | Hyperparameters are empirically determined, not opitmized. 41 | """ 42 | 43 | if name == 'sharedbottom': 44 | print("Model: Shared-Bottom") 45 | return SharedBottomModel(categorical_field_dims, numerical_num, embed_dim=embed_dim, bottom_mlp_dims=(512, 256), tower_mlp_dims=(128, 64), task_num=task_num, dropout=0.2) 46 | elif name == 'singletask': 47 | print("Model: SingleTask") 48 | return SingleTaskModel(categorical_field_dims, numerical_num, embed_dim=embed_dim, bottom_mlp_dims=(512, 256), tower_mlp_dims=(128, 64), task_num=task_num, dropout=0.2) 49 | elif name == 'esmm': 50 | print("Model: ESMM") 51 | return ESMMModel(categorical_field_dims, numerical_num, embed_dim=embed_dim, bottom_mlp_dims=(512, 256), tower_mlp_dims=(128, 64), task_num=task_num, dropout=0.2) 52 | elif name == 'omoe': 53 | print("Model: OMoE") 54 | return OMoEModel(categorical_field_dims, numerical_num, embed_dim=embed_dim, bottom_mlp_dims=(512, 256), tower_mlp_dims=(128, 64), task_num=task_num, expert_num=expert_num, dropout=0.2) 55 | elif name == 'mmoe': 56 | print("Model: MMoE") 57 | return MMoEModel(categorical_field_dims, numerical_num, embed_dim=embed_dim, bottom_mlp_dims=(512, 256), tower_mlp_dims=(128, 64), task_num=task_num, expert_num=expert_num, dropout=0.2) 58 | elif name == 'ple': 59 | print("Model: PLE") 60 | return PLEModel(categorical_field_dims, numerical_num, embed_dim=embed_dim, bottom_mlp_dims=(512, 256), tower_mlp_dims=(128, 64), task_num=task_num, shared_expert_num=int(expert_num / 2), specific_expert_num=int(expert_num / 2), dropout=0.2) 61 | elif name == 'aitm': 62 | print("Model: AITM") 63 | return AITMModel(categorical_field_dims, numerical_num, embed_dim=embed_dim, bottom_mlp_dims=(512, 256), tower_mlp_dims=(128, 64), task_num=task_num, dropout=0.2) 64 | else: 65 | raise ValueError('unknown model name: ' + name) 66 | 67 | 68 | def sltrain(model, optimizer, data_loader, criterion, device, polisher=None, log_interval=100): 69 | model.train() 70 | total_loss = 0 71 | epoch_loss = [] 72 | loader = tqdm.tqdm(data_loader, smoothing=0, mininterval=1.0) 73 | for i, (_, categorical_fields, numerical_fields, labels) in enumerate(loader): 74 | categorical_fields, numerical_fields, labels = categorical_fields.to(device), numerical_fields.to(device), labels.to(device) 75 | y = model(categorical_fields, numerical_fields) 76 | if polisher is not None: 77 | loss = polisher.polish_loss(categorical_fields, numerical_fields, labels, y) 78 | else: 79 | loss_list = [criterion(y[i], labels[:, i].float()) for i in range(labels.size(1))] 80 | loss = 0 81 | for item in loss_list: 82 | loss += item 83 | loss /= len(loss_list) 84 | model.zero_grad() 85 | loss.backward() 86 | optimizer.step() 87 | total_loss += loss.item() 88 | epoch_loss.append(loss.item()) 89 | if (i + 1) % log_interval == 0: 90 | loader.set_postfix(loss=total_loss / log_interval) 91 | total_loss = 0 92 | return np.mean(epoch_loss) 93 | 94 | 95 | def sltest(model, data_loader, task_num, device): 96 | model.eval() 97 | labels_dict, predicts_dict, loss_dict = {}, {}, {} 98 | sessions = [] 99 | for i in range(task_num): 100 | labels_dict[i], predicts_dict[i], loss_dict[i] = list(), list(), list() 101 | with torch.no_grad(): 102 | for session_id, categorical_fields, numerical_fields, labels in tqdm.tqdm(data_loader, smoothing=0, mininterval=1.0): 103 | sessions.extend(session_id.tolist()) 104 | categorical_fields, numerical_fields, labels = categorical_fields.to(device), numerical_fields.to(device), labels.to(device) 105 | y = model(categorical_fields, numerical_fields) 106 | for i in range(task_num): 107 | labels_dict[i].extend(labels[:, i].tolist()) 108 | predicts_dict[i].extend(y[i].tolist()) 109 | loss_dict[i].extend(torch.nn.functional.binary_cross_entropy(y[i], labels[:, i].float(), reduction='none').tolist()) 110 | auc_results, loss_results = list(), list() 111 | for i in range(task_num): 112 | auc_results.append(roc_auc_score(labels_dict[i], predicts_dict[i])) 113 | loss_results.append(np.array(loss_dict[i]).mean()) 114 | 115 | # compute session logloss 116 | loss_dict["session"] = sessions 117 | loss_df = pd.DataFrame(loss_dict) 118 | s_avg = loss_df.groupby(["session"]).mean().mean().tolist() 119 | return auc_results, loss_results, s_avg, loss_df 120 | 121 | def slpred(model, data_loader, task_num, device): 122 | model.eval() 123 | labels_dict, predicts_dict, loss_dict = {}, {}, {} 124 | for i in range(task_num): 125 | predicts_dict[i]= list() 126 | with torch.no_grad(): 127 | for categorical_fields, numerical_fields, labels in tqdm.tqdm(data_loader, smoothing=0, mininterval=1.0): 128 | categorical_fields, numerical_fields, labels = categorical_fields.to(device), numerical_fields.to(device), labels.to(device) 129 | y = model(categorical_fields, numerical_fields) 130 | for i in range(task_num): 131 | predicts_dict[i].extend(y[i].tolist()) 132 | return predicts_dict 133 | 134 | 135 | """ 136 | RL run begin here 137 | """ 138 | def one_iter_offline(agent): 139 | agent.is_test = False 140 | critic_loss, critic_loss2, actor_loss1, actor_loss2 = agent.update() 141 | return critic_loss, critic_loss2, actor_loss1, actor_loss2 142 | 143 | 144 | def train_and_test_offline(environment: gym.Env, agent, epoch, init_episode, save_dir, memory_path): 145 | seed = 2022 146 | random.seed(seed) 147 | np.random.seed(seed) 148 | torch.manual_seed(seed) 149 | torch.cuda.manual_seed(seed) 150 | 151 | # warm replay memory 152 | if os.path.isfile(memory_path): 153 | with open(memory_path, "rb") as f: 154 | agent.memory = pickle.load(f) 155 | else: 156 | for _ in range(init_episode): 157 | state = environment.reset() 158 | while True: 159 | action = environment.action_space.sample() 160 | nstate, reward, done, label = environment.step(action) 161 | transition = dict( 162 | state=state.reshape(-1), 163 | action=action.reshape(-1), 164 | nstate=nstate.reshape(-1), 165 | reward=reward, 166 | done=done, 167 | label=label 168 | ) 169 | # print(transition) 170 | agent.memory.store(**transition) 171 | state = nstate 172 | if done: 173 | break 174 | with open(memory_path, "wb") as f: 175 | pickle.dump(agent.memory, f) 176 | 177 | print("memory size:", agent.memory.size) 178 | print("epoch | score | q_loss | ac_loss | a_loss | time") 179 | critic_lossls1, critic_lossls2, actor_lossls1, actor_lossls2, scores = [], [], [], [], [] 180 | # best_auc = 0 181 | # early_stopper = EarlyStopper(save_path=save_dir,num_trials=2) 182 | start = time.time() 183 | for i in range(0, epoch): 184 | # if i >=50 and (i%5 == 0 or i == epoch - 1): 185 | # print(i, agent.total_step, "testing performance") 186 | # test_auc = np.sum(test(test_env, agent, save_dir)) 187 | # if test_auc > best_auc: 188 | # best_auc = test_auc 189 | # agent.save_or_load_agent(save_dir, if_save=True) 190 | # res_df = pd.DataFrame(np.array([critic_lossls1, critic_lossls2, actor_lossls1, actor_lossls2]).T, 191 | # columns=["c1", "c2", "a1", "a2"]) 192 | # # TODO: tensorboarc sum 193 | # res_df.to_csv(save_dir + "/losses.csv") 194 | 195 | critic_loss, critic_loss2, actor_loss1, actor_loss2 = one_iter_offline(agent) 196 | 197 | with torch.no_grad(): 198 | critic_lossls1.append(critic_loss) 199 | critic_lossls2.append(critic_loss2) 200 | actor_lossls1.append(actor_loss1) 201 | actor_lossls2.append(actor_loss2) 202 | 203 | scores.append((0,0)) 204 | critic_lossls = [critic_lossls1[j]+critic_lossls2[j] for j in range(i)] 205 | # print the train and test performance, can plot here 206 | if i % 2 == 0 or i == epoch - 1: 207 | agent.save_or_load_agent(save_dir, if_save=True) 208 | end = time.time() 209 | with torch.no_grad(): # draw without considering separate reward 210 | print(i, np.mean(np.array(scores), axis=0), np.mean(critic_lossls), np.mean(actor_lossls1), 211 | np.mean(actor_lossls2), end - start) 212 | plot_loss(i, np.sum(np.array(scores), axis=1), critic_lossls, actor_lossls1, actor_lossls2) 213 | start = time.time() 214 | 215 | environment.close() 216 | 217 | 218 | def test(environment, agent, save_dir): 219 | seed = 2022 220 | random.seed(seed) 221 | np.random.seed(seed) 222 | torch.manual_seed(seed) 223 | torch.cuda.manual_seed(seed) 224 | agent.is_test = True 225 | agent.actor.eval() 226 | pos_score = [] 227 | 228 | preds = [] 229 | labels = [] 230 | for _ in range(environment.datalen): 231 | # for i in range(1000): 232 | state = environment.reset() 233 | score = np.zeros(2) 234 | nsteps = 0 235 | # pred = [] 236 | while True: 237 | action = agent.select_action(state) 238 | nstate, reward, done, label = environment.step(action) 239 | score += reward 240 | 241 | # store the CTR/CVR and true label 242 | # 注意:此处为separate learning 243 | preds.append([action[0], action[1]]) 244 | labels.append(label) 245 | state = nstate 246 | nsteps += 1 247 | if done: 248 | break 249 | pos_score.append(score / nsteps) 250 | preds = np.array(preds) 251 | labels = np.array(labels) 252 | res = pd.DataFrame(np.c_[preds, labels]) 253 | res.to_csv(f"{save_dir}/RLpreds.csv", index=False) 254 | test_loss = [F.binary_cross_entropy(torch.tensor(labels[:, j]), torch.tensor(preds[:, j])).item() for j in range(2)] 255 | test_auc = [roc_auc_score(labels[:, j], preds[:, j]) for j in range(2)] 256 | print("score:{}, test logloss:{}, test auc:{}".format(np.mean(np.array(pos_score), axis=0), test_loss, test_auc)) 257 | # environment.close() 258 | return test_auc 259 | 260 | 261 | def plot_loss( 262 | frame_idx: int, 263 | scores: List[float], 264 | critic_losses: List[float], 265 | actor_losses1: List[float], 266 | actor_losses2: List[float] 267 | ): 268 | """Plot the training progresses.""" 269 | 270 | def subplot(loc: int, title: str, values: List[float]): 271 | plt.subplot(loc) 272 | plt.title(title) 273 | plt.plot(values) 274 | 275 | subplot_params = [ 276 | (221, f"frame {frame_idx}. score: {np.mean(scores[-10:])}", scores), 277 | (222, "critic_loss", critic_losses), 278 | (223, "ac_loss", actor_losses1), 279 | (224, "a_loss", actor_losses2), 280 | ] 281 | 282 | clear_output(True) 283 | plt.figure(figsize=(30, 5)) 284 | for loc, title, values in subplot_params: 285 | subplot(loc, title, values) 286 | 287 | plt.show() 288 | -------------------------------------------------------------------------------- /train/utils.py: -------------------------------------------------------------------------------- 1 | import os.path 2 | 3 | import numpy as np 4 | import pickle 5 | from collections import defaultdict 6 | import pandas as pd 7 | import torch 8 | torch.cuda.current_device() 9 | import torch.nn.functional as F 10 | import gym 11 | from layers.critic import CriticNeg 12 | from slmodels.esmm import ESMMModel 13 | from slmodels.mmoe import MMoEModel 14 | 15 | 16 | def get_optim_param(optim): # optim = torch.optim.Adam(network_param, learning_rate) 17 | params_list = list() 18 | for params_dict in optim.state_dict()['state'].values(): 19 | params_list.extend([t for t in params_dict.values() if isinstance(t, torch.Tensor)]) 20 | return params_list 21 | 22 | 23 | class Catemapper: 24 | def __init__(self, threshold): 25 | self.threshold = threshold 26 | self.catemap = defaultdict(dict) 27 | self.field_dims = None 28 | 29 | def make_mapper(self, fea_path, columns, filter_cols): 30 | df = pd.read_csv(fea_path, usecols=columns) 31 | df.drop_duplicates(inplace=True) 32 | df.fillna(0, inplace=True) 33 | 34 | length_df = len(df) 35 | dims = [] 36 | for col in columns: 37 | df[col].value_counts() 38 | self.catemap[col] = dict() 39 | count = 0 40 | idx = 0 41 | if col in filter_cols: 42 | for i, v in df[col].value_counts().to_dict().items(): 43 | count += v 44 | ad = 1 45 | if count / length_df > self.threshold: 46 | ad = 0 47 | self.catemap[col][i] = idx 48 | idx += ad 49 | dims.append(idx) 50 | else: 51 | for i, v in df[col].value_counts().to_dict().items(): 52 | self.catemap[col][i] = idx 53 | idx += 1 54 | dims.append(idx - 1) 55 | 56 | self.field_dims = np.array(dims).astype(np.int64) + 1 57 | for col in self.catemap: 58 | mapper = self.catemap[col] 59 | origin, processed = len(set(mapper.keys())), len(set(mapper.values())) 60 | print("{}: {} to {}".format(col, origin, processed)) 61 | 62 | def save_mapper(self, save_path): 63 | with open(save_path + "/catemap.pkl", 'wb') as f: 64 | pickle.dump(self.catemap, f) 65 | 66 | def load_mapper(self, save_path): 67 | with open(save_path + "/catemap.pkl", 'rb') as f: 68 | self.catemap = pickle.load(f) 69 | dims = [] 70 | for i in self.catemap: 71 | dims.append(list(self.catemap[i].values())[-1]) 72 | self.field_dims = np.array(dims).astype(np.int64) + 1 73 | 74 | def map_rt(self, dataset): 75 | tmp_df = pd.DataFrame(dataset.categorical_data, columns=dataset.cate_cols) 76 | for col in self.catemap: 77 | tmp_df[col] = tmp_df[col].apply(lambda x: self.catemap[col][x] if x in self.catemap[col] else 0) 78 | dataset.categorical_data = tmp_df.values.astype(np.int) 79 | dataset.field_dims = self.field_dims 80 | 81 | def map(self, df): 82 | for col in self.catemap: 83 | df[col] = df[col].apply( 84 | lambda x: self.catemap[col][x] if x in self.catemap[col] else 0) 85 | 86 | 87 | class EarlyStopper(object): 88 | def __init__(self, num_trials, save_path): 89 | self.num_trials = num_trials 90 | self.trial_counter = 0 91 | self.best_accuracy = 0 92 | self.save_path = save_path 93 | 94 | def is_continuable(self, model, accuracy): 95 | if accuracy > self.best_accuracy: 96 | self.best_accuracy = accuracy 97 | self.trial_counter = 0 98 | torch.save(model.state_dict(), self.save_path) 99 | return True 100 | elif self.trial_counter + 1 < self.num_trials: 101 | self.trial_counter += 1 102 | return True 103 | else: 104 | return False 105 | 106 | 107 | class ConvergeStopper(object): 108 | def __init__(self, save_path, num_trials=2, eps=3e-3): 109 | self.num_trials = num_trials 110 | self.trial_counter = 0 111 | self.last_loss = 1e3 112 | # self.accuracy = 0 113 | self.eps = eps 114 | self.save_path = save_path 115 | 116 | def is_continuable(self, agent, loss): 117 | if np.abs(np.mean(loss - self.last_loss)) > self.eps: 118 | self.last_loss = loss 119 | self.trial_counter = 0 120 | # if accu > self.accuracy: 121 | # self.accuracy = accu 122 | # torch.save(model.state_dict(), self.save_path) 123 | return True 124 | elif self.trial_counter + 1 < self.num_trials: 125 | # self.last_loss = loss # maybe not useful 126 | self.trial_counter += 1 127 | return True 128 | else: 129 | return False 130 | 131 | 132 | class ActionNormalizer(gym.ActionWrapper): 133 | """Rescale and relocate the actions.""" 134 | 135 | def action(self, action: np.ndarray) -> np.ndarray: 136 | """Change the range (-1, 1) to (low, high).""" 137 | low = self.action_space.low 138 | high = self.action_space.high 139 | 140 | scale_factor = (high - low) / 2 141 | reloc_factor = high - scale_factor 142 | 143 | action = action * scale_factor + reloc_factor 144 | action = np.clip(action, low, high) 145 | 146 | return action 147 | 148 | def reverse_action(self, action: np.ndarray) -> np.ndarray: 149 | """Change the range (low, high) to (-1, 1).""" 150 | low = self.action_space.low 151 | high = self.action_space.high 152 | 153 | scale_factor = (high - low) / 2 154 | reloc_factor = high - scale_factor 155 | 156 | action = (action - reloc_factor) / scale_factor 157 | action = np.clip(action, 0.0, 1.0) 158 | 159 | return action 160 | 161 | 162 | class RlLossPolisher: 163 | def __init__(self, env, model_name, lambda_=0.5): 164 | # tuning param 165 | self.lambda_ = lambda_ 166 | 167 | # dynamic path 168 | self.rl_path = f"./chkpt/RL/res_TD3BC_{model_name}" 169 | if not os.path.isdir(self.rl_path): 170 | raise FileNotFoundError 171 | # fixed params 172 | self.device = torch.device("cuda" if torch.cuda.is_available() else "cpu") 173 | 174 | self.categorical_field_dims = env.field_dims 175 | self.num_dim = 1 176 | self.task_num = env.action_space.shape[0] 177 | self.pretrain_path = self.rl_path+f"/rt_{model_name}.pt" 178 | self.embed_dim = 128 179 | self.bottom_mlp_dims = (512, 256) 180 | self.tower_mlp_dims = (128, 64) 181 | self.drop_out = 0.2 182 | 183 | # define the network 184 | # self.pretain_actor = ESMMModel(self.categorical_field_dims, self.num_dim, self.embed_dim, self.bottom_mlp_dims, 185 | # self.tower_mlp_dims, 186 | # self.task_num, self.drop_out).to(self.device) 187 | # self.pretain_actor.load_state_dict(torch.load(self.pretrain_path)) 188 | # self.pretain_actor.eval() 189 | 190 | # TODO: polish to code, use a general critic network to contain multiple task; 191 | # 以及是否丧失了MDP特性,critic是R^t_{\pi},此处无pi;暂时作为logging policy对待 192 | self.critic1 = CriticNeg(self.categorical_field_dims, self.num_dim, self.embed_dim, self.bottom_mlp_dims, 193 | self.tower_mlp_dims, self.drop_out).to(self.device) 194 | 195 | self.critic2 = CriticNeg(self.categorical_field_dims, self.num_dim, self.embed_dim, self.bottom_mlp_dims, 196 | self.tower_mlp_dims, self.drop_out).to(self.device) 197 | 198 | state_dict1 = torch.load(self.rl_path + "/critic1.pth", map_location=lambda storage, loc: storage) 199 | self.critic1.load_state_dict(state_dict1) 200 | state_dict2 = torch.load(self.rl_path + "/critic2.pth", map_location=lambda storage, loc: storage) 201 | self.critic2.load_state_dict(state_dict2) 202 | 203 | def polish_loss(self, categorical_fields, numerical_fields, labels, y): 204 | # default two task here 205 | slloss = [torch.nn.BCELoss(reduction='none')(y[i],labels[:,i]) for i in range(2)] 206 | 207 | q_weight = [self.critic1(categorical_fields, numerical_fields, torch.unsqueeze(y[0], 1)), 208 | self.critic2(categorical_fields, numerical_fields, torch.unsqueeze(y[1], 1))] 209 | 210 | # method 1 211 | # loss_list = [(1 - self.lambda_ * labels[:, i] * q_weight[i].detach()) * 212 | # slloss[i] for i in range(2)] 213 | 214 | # method 2 215 | # loss_list = [0.5 * slloss[i] for i in range(2)] 216 | 217 | loss_list = [(1 - self.lambda_ * q_weight[i].detach()) * 218 | slloss[i] for i in range(2)] 219 | 220 | # method 3 221 | #loss_list = [(1-self.lambda_ * q_weight[i].detach()) * 222 | # slloss[i] for i in range(2)] 223 | 224 | # method 4 225 | # loss_list = [(0-q_weight[i].detach()) * slloss[i] for i in range(2)] 226 | 227 | loss = 0 228 | for item in loss_list: 229 | loss += torch.mean(item) 230 | loss /= len(loss_list) 231 | 232 | 233 | # method 4 plus BC 234 | # ref_loss = 0 # no need to give mode generalization 235 | # param_count = 0 236 | # for param, value in self.slmodel.named_parameters(): 237 | # param_count += 1 238 | # ref_loss += F.mse_loss(value, self.pretain_actor.state_dict()[param]) 239 | # ref_loss /= param_count 240 | 241 | # lambda_ = self.actor_reg / torch.mean(-q1_loss_weight - q2_loss_weight).abs().detach() 242 | # ac_loss = lambda_ * ac_loss + ref_loss 243 | # loss += self.reg_rate * ref_loss 244 | return loss 245 | 246 | 247 | if __name__ == '__main__': 248 | features_path = "../dataset/item_feadf.csv" 249 | columns = ['785', '591', '814', 'available', 'categoryid', '364', '776'] 250 | filter_cols = ['776', '364'] 251 | cm = Catemapper(0.2) 252 | cm.make_mapper(features_path, columns, filter_cols) 253 | cm.save_mapper("./chkpt") 254 | cm.load_mapper("./chkpt") 255 | --------------------------------------------------------------------------------