├── models
    ├── __pycache__
    │   ├── model.cpython-38.pyc
    │   ├── Predictor.cpython-37.pyc
    │   ├── Predictor.cpython-38.pyc
    │   ├── RGCNModel.cpython-38.pyc
    │   ├── model_test.cpython-38.pyc
    │   ├── RGCNModel_steam.cpython-38.pyc
    │   ├── SimpleRGCNModel.cpython-38.pyc
    │   ├── RGCNModel_steam_rank.cpython-37.pyc
    │   └── RGCNModel_steam_rank.cpython-38.pyc
    ├── Predictor.py
    └── model.py
├── utils
    ├── __pycache__
    │   ├── parser.cpython-37.pyc
    │   ├── parser.cpython-38.pyc
    │   ├── metrics.cpython-37.pyc
    │   ├── metrics.cpython-38.pyc
    │   ├── dataloader.cpython-37.pyc
    │   ├── dataloader.cpython-38.pyc
    │   ├── NegativeSampler.cpython-37.pyc
    │   ├── NegativeSampler.cpython-38.pyc
    │   ├── dataloader_steam.cpython-37.pyc
    │   ├── dataloader_steam.cpython-38.pyc
    │   ├── dataloader_item_graph.cpython-38.pyc
    │   ├── dataloader_steam_filtered.cpython-37.pyc
    │   └── dataloader_steam_filtered.cpython-38.pyc
    ├── NegativeSampler.py
    ├── parser.py
    ├── dataloader_item_graph.py
    ├── metrics.py
    └── dataloader_steam.py
├── README.md
└── main.py


/models/__pycache__/model.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YangLiangwei/SCGRec/HEAD/models/__pycache__/model.cpython-38.pyc


--------------------------------------------------------------------------------
/utils/__pycache__/parser.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YangLiangwei/SCGRec/HEAD/utils/__pycache__/parser.cpython-37.pyc


--------------------------------------------------------------------------------
/utils/__pycache__/parser.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YangLiangwei/SCGRec/HEAD/utils/__pycache__/parser.cpython-38.pyc


--------------------------------------------------------------------------------
/utils/__pycache__/metrics.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YangLiangwei/SCGRec/HEAD/utils/__pycache__/metrics.cpython-37.pyc


--------------------------------------------------------------------------------
/utils/__pycache__/metrics.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YangLiangwei/SCGRec/HEAD/utils/__pycache__/metrics.cpython-38.pyc


--------------------------------------------------------------------------------
/models/__pycache__/Predictor.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YangLiangwei/SCGRec/HEAD/models/__pycache__/Predictor.cpython-37.pyc


--------------------------------------------------------------------------------
/models/__pycache__/Predictor.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YangLiangwei/SCGRec/HEAD/models/__pycache__/Predictor.cpython-38.pyc


--------------------------------------------------------------------------------
/models/__pycache__/RGCNModel.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YangLiangwei/SCGRec/HEAD/models/__pycache__/RGCNModel.cpython-38.pyc


--------------------------------------------------------------------------------
/models/__pycache__/model_test.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YangLiangwei/SCGRec/HEAD/models/__pycache__/model_test.cpython-38.pyc


--------------------------------------------------------------------------------
/utils/__pycache__/dataloader.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YangLiangwei/SCGRec/HEAD/utils/__pycache__/dataloader.cpython-37.pyc


--------------------------------------------------------------------------------
/utils/__pycache__/dataloader.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YangLiangwei/SCGRec/HEAD/utils/__pycache__/dataloader.cpython-38.pyc


--------------------------------------------------------------------------------
/models/__pycache__/RGCNModel_steam.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YangLiangwei/SCGRec/HEAD/models/__pycache__/RGCNModel_steam.cpython-38.pyc


--------------------------------------------------------------------------------
/models/__pycache__/SimpleRGCNModel.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YangLiangwei/SCGRec/HEAD/models/__pycache__/SimpleRGCNModel.cpython-38.pyc


--------------------------------------------------------------------------------
/utils/__pycache__/NegativeSampler.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YangLiangwei/SCGRec/HEAD/utils/__pycache__/NegativeSampler.cpython-37.pyc


--------------------------------------------------------------------------------
/utils/__pycache__/NegativeSampler.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YangLiangwei/SCGRec/HEAD/utils/__pycache__/NegativeSampler.cpython-38.pyc


--------------------------------------------------------------------------------
/utils/__pycache__/dataloader_steam.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YangLiangwei/SCGRec/HEAD/utils/__pycache__/dataloader_steam.cpython-37.pyc


--------------------------------------------------------------------------------
/utils/__pycache__/dataloader_steam.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YangLiangwei/SCGRec/HEAD/utils/__pycache__/dataloader_steam.cpython-38.pyc


--------------------------------------------------------------------------------
/models/__pycache__/RGCNModel_steam_rank.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YangLiangwei/SCGRec/HEAD/models/__pycache__/RGCNModel_steam_rank.cpython-37.pyc


--------------------------------------------------------------------------------
/models/__pycache__/RGCNModel_steam_rank.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YangLiangwei/SCGRec/HEAD/models/__pycache__/RGCNModel_steam_rank.cpython-38.pyc


--------------------------------------------------------------------------------
/utils/__pycache__/dataloader_item_graph.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YangLiangwei/SCGRec/HEAD/utils/__pycache__/dataloader_item_graph.cpython-38.pyc


--------------------------------------------------------------------------------
/utils/__pycache__/dataloader_steam_filtered.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YangLiangwei/SCGRec/HEAD/utils/__pycache__/dataloader_steam_filtered.cpython-37.pyc


--------------------------------------------------------------------------------
/utils/__pycache__/dataloader_steam_filtered.cpython-38.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/YangLiangwei/SCGRec/HEAD/utils/__pycache__/dataloader_steam_filtered.cpython-38.pyc


--------------------------------------------------------------------------------
/models/Predictor.py:
--------------------------------------------------------------------------------
1 | import torch.nn as nn
2 | import dgl.function as fn
3 | class HeteroDotProductPredictor(nn.Module):
4 |     def forward(self, graph, h, etype):
5 |         with graph.local_scope():
6 |             graph.ndata['h'] = h
7 |             graph.apply_edges(fn.u_dot_v('h', 'h', 'score'), etype = etype)
8 |             return graph.edges[etype].data['score']
9 | 


--------------------------------------------------------------------------------
/utils/NegativeSampler.py:
--------------------------------------------------------------------------------
 1 | import numpy as np
 2 | import pdb
 3 | import torch
 4 | 
 5 | class NegativeSampler(object):
 6 |     def __init__(self, dic):
 7 |         self.dic_user_game = dic
 8 | 
 9 |     def __call__(self, g, eids_dict):
10 |         result_dict = {}
11 |         for etype, eids in eids_dict.items():
12 |             src_type, edge, dst_type = etype
13 |             src, _ = g.find_edges(eids, etype = etype)
14 |             dst = []
15 |             for i in range(src.shape[0]):
16 |                 s = int(src[i])
17 |                 while True:
18 |                     negitem = np.random.randint(0, g.num_nodes(dst_type))
19 |                     if negitem in self.dic_user_game[s]:
20 |                         continue
21 |                     else:
22 |                         break
23 |                 dst.append(negitem)
24 |         dst = torch.tensor(dst)
25 |         result_dict[etype] = (src, dst)
26 |         return result_dict
27 | 
28 | 
29 | 


--------------------------------------------------------------------------------
/utils/parser.py:
--------------------------------------------------------------------------------
 1 | import argparse
 2 | 
 3 | def parse_args():
 4 |     parser = argparse.ArgumentParser()
 5 |     parser.add_argument('--data', default = 'Ciao', type = str,
 6 |                         help = 'Dataset to use')
 7 |     parser.add_argument('--train_percent', default = 0.8, type = float,
 8 |                         help = 'training_percent')
 9 |     parser.add_argument('--embed_size', default = 32, type = int,
10 |                         help = 'embedding size for all layer')
11 |     parser.add_argument('--lr', default = 0.03, type = float,
12 |                         help = 'learning rate')
13 |     parser.add_argument('--model', default = 'RGCN', type = str,
14 |                         help = 'model selection')
15 |     parser.add_argument('--epoch', default = 1000, type = int,
16 |                         help = 'epoch number')
17 |     parser.add_argument('--early_stop', default = 10, type = int,
18 |                         help = 'early_stop validation')
19 |     parser.add_argument('--batch_size', default = 1024, type = int,
20 |                         help = 'batch size')
21 |     parser.add_argument('--layers', default = 1, type = int,
22 |                         help = 'layer number')
23 |     parser.add_argument('--gpu', default = -1, type = int,
24 |                         help = '-1 for cpu, 0 for gpu:0')
25 |     parser.add_argument('--k', default = [5, 10, 20], type = list,
26 |                         help = 'negative sampler number for each node')
27 |     parser.add_argument('--g', default = 0.1, type = float,
28 |                         help = 'hyper-parameter for aggregation weight')
29 |     parser.add_argument('--social_g', default = 0.1, type = float,
30 |                         help = 'hyper-parameter for aggregation weight')
31 |     parser.add_argument('--item_g', default = 0.1, type = float,
32 |                         help = 'hyper-parameter for aggregation weight')
33 | 
34 |     args = parser.parse_args()
35 |     return args
36 | 
37 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | ## Large-scale Personalized Video Game Recommendation via Social-aware Contextualized Graph Neural Network
 2 | 
 3 | > Authors: Liangwei Yang, Zhiwei Liu, Yu Wang, Chen Wang, Ziwei Fan, Philip S. Yu
 4 | > Affiliation: University of Illinois at Chicago
 5 | 
 6 | ![](./assets/draftrec_model.png)
 7 | 
 8 | 
 9 | > **Abstract:** 
10 | Because of the large number of online games available nowadays, online game recommender systems are necessary for users and online game platforms. The former can discover more potential online games of their interests, and the latter can attract users to dwell longer in the platform. This paper investigates the characteristics of user behaviors with respect to the online games on the Steam platform. Based on the observations, we argue that a satisfying recommender system for online games is able to characterize: personalization, game contextualization and social connection. However, simultaneously solving all is rather challenging for game recommendation. Firstly, personalization for game recommendation requires the incorporation of the dwelling time of engaged games, which are ignored in existing methods.
11 | Secondly, game contextualization should reflect the complex and high-order properties of those relations. Last but not least, it is problematic to use social connections directly for game recommendations due to the massive noise within social connections. To this end, we propose a Social-aware Contextualized Graph Neural Recommender System~(SCGRec), which harnesses three perspectives to improve game recommendation. We conduct a comprehensive analysis of users' online game behaviors, which motivates the necessity of handling those three characteristics in the online game recommendation.
12 | 
13 | ## Dataset
14 | 
15 | [Google drive link](https://drive.google.com/file/d/1F9kr_YWimBtexJEH-zkDzCOwl1q7GmFp/view)
16 | 
17 | ![](./assets/dataset.png)
18 | 
19 | ## How to run
20 | python main.py
21 | 
22 | ## Cite
23 | 
24 | 
25 | ```
26 | @inproceedings{SCGRec,
27 |   author    = {Liangwei Yang and
28 |                Zhiwei Liu and
29 |                Yu Wang and
30 |                Chen Wang and
31 |                Ziwei Fan and
32 |                Philip S. Yu},
33 |   title     = {Large-scale Personalized Video Game Recommendation via Social-aware
34 |                Contextualized Graph Neural Network},
35 |   booktitle = {{WWW} '22: The {ACM} Web Conference 2022, Virtual Event, Lyon, France,
36 |                April 25 - 29, 2022},
37 |   pages     = {3376--3386},
38 |   publisher = {{ACM}},
39 |   year      = {2022},
40 |   url       = {https://doi.org/10.1145/3485447.3512273},
41 |   doi       = {10.1145/3485447.3512273},
42 |   timestamp = {Tue, 26 Apr 2022 16:02:09 +0200},
43 |   biburl    = {https://dblp.org/rec/conf/www/YangLWWFY22.bib},
44 |   bibsource = {dblp computer science bibliography, https://dblp.org}
45 | }
46 | ```
47 | 
48 | 
49 | 


--------------------------------------------------------------------------------
/utils/dataloader_item_graph.py:
--------------------------------------------------------------------------------
 1 | import os
 2 | import sys
 3 | from dgl.data.utils import save_graphs
 4 | from tqdm import tqdm
 5 | from scipy import stats
 6 | from .NegativeSampler import NegativeSampler
 7 | import pdb
 8 | import torch
 9 | import logging
10 | logging.basicConfig(stream = sys.stdout, level = logging.INFO)
11 | import numpy as np
12 | import dgl
13 | from dgl.data import DGLDataset
14 | import pandas as pd
15 | from sklearn import preprocessing
16 | from dgl.data import DGLDataset
17 | 
18 | class Dataloader_item_graph(DGLDataset):
19 |     def __init__(self, graph, app_id_path, publisher_path, developer_path, genre_path):
20 |         self.app_id_path = app_id_path
21 |         self.publisher_path = publisher_path
22 |         self.developer_path = developer_path
23 |         self.genre_path = genre_path
24 | 
25 |         logging.info("reading item graph")
26 |         self.app_id_mapping = self.read_id_mapping(self.app_id_path)
27 |         self.publisher = self.read_mapping(self.publisher_path)
28 |         self.developer = self.read_mapping(self.developer_path)
29 |         self.genre = self.read_mapping(self.genre_path)
30 | 
31 |         graph_data = {
32 |             ('game', 'co_publisher', 'game'): self.publisher,
33 |             ('game', 'co_developer', 'game'): self.developer,
34 |             ('game', 'co_genre', 'game'): self.genre
35 |         }
36 |         self.graph = dgl.heterograph(graph_data)
37 |         self.graph.nodes['game'].data['h'] = graph.ndata['h']['game'].float()
38 | 
39 | 
40 |     def read_mapping(self, path):
41 |         mapping = {}
42 |         with open(path, 'r') as f:
43 |             for line in f:
44 |                 line = line.strip().split(',')
45 |                 if line[1] != '':
46 |                     if line[0] not in mapping:
47 |                         mapping[self.app_id_mapping[line[0]]] = [line[1]]
48 |                     else:
49 |                         mapping[self.app_id_mapping[line[0]]].append(line[1])
50 |         for key in mapping:
51 |             mapping[key] = set(mapping[key])
52 |         src = []
53 |         dst = []
54 |         keys = list(mapping.keys())
55 |         for i in range(len(keys) - 1):
56 |             for j in range(i +1, len(keys)):
57 |                 game1 = keys[i]
58 |                 game2 = keys[j]
59 |                 if len(mapping[game1] & mapping[game2]) > 0:
60 |                     src.extend([game1, game2])
61 |                     dst.extend([game2, game1])
62 |         return (torch.tensor(src), torch.tensor(dst))
63 | 
64 |     def read_id_mapping(self, path):
65 |         mapping = {}
66 |         count = 0
67 |         with open(path, 'r') as f:
68 |             for line in f:
69 |                 line = line.strip()
70 |                 if line not in mapping:
71 |                     mapping[line] = count
72 |                     count += 1
73 |         return mapping
74 | 


--------------------------------------------------------------------------------
/models/model.py:
--------------------------------------------------------------------------------
 1 | import torch.nn as nn
 2 | from tqdm import tqdm
 3 | import pdb
 4 | import torch.nn.functional as F
 5 | import torch
 6 | from dgl.nn import SAGEConv
 7 | import dgl
 8 | import dgl.function as fn
 9 | import dgl.nn as dglnn
10 | from dgl.nn import GATConv
11 | from dgl.nn import GraphConv
12 | 
13 | class Proposed_model(nn.Module):
14 |     def __init__(self, args, graph, item_graph):
15 |         super().__init__()
16 |         self.args = args
17 |         self.hid_dim = args.embed_size
18 |         self.layer_num = args.layers
19 | 
20 |         # self.user_embedding = torch.nn.Parameter(torch.randn(graph.nodes('user').shape[0], self.hid_dim))
21 |         # self.item_embedding = torch.nn.Parameter(torch.randn(graph.nodes('game').shape[0], self.hid_dim))
22 | 
23 |         self.user_embedding = torch.nn.Parameter(torch.load('./baselines/user_embedding.pt'))
24 |         self.item_embedding = torch.nn.Parameter(torch.load('./baselines/item_embedding.pt'))
25 | 
26 |         self.item_conv = SAGEConv(self.hid_dim, self.hid_dim, 'mean')
27 |         self.social_GAT = GATConv(self.hid_dim, self.hid_dim, num_heads = 1, allow_zero_in_degree = True)
28 |         self.social_conv = SAGEConv(self.hid_dim, self.hid_dim, 'mean')
29 |         self.linear = torch.nn.Linear(3 * self.hid_dim, self.hid_dim)
30 | 
31 |         self.build_model(item_graph)
32 | 
33 |     def build_layer(self, idx, graph):
34 |         if idx == 0:
35 |             input_dim = graph.ndata['h'].shape[1]
36 |         else:
37 |             input_dim = self.hid_dim
38 |         dic = {
39 |             rel: GraphConv(input_dim, self.hid_dim, weight = True, bias = False)
40 |             for rel in graph.etypes
41 |         }
42 |         return dglnn.HeteroGraphConv(dic, aggregate = 'mean')
43 | 
44 |     def build_model(self, graph):
45 |         self.layers = nn.ModuleList()
46 |         for idx in range(self.layer_num):
47 |             h2h = self.build_layer(idx, graph)
48 |             self.layers.append(h2h)
49 | 
50 |     def forward(self, graph, item_graph, social_graph):
51 | 
52 |         h_game = item_graph.ndata['h']
53 |         for layer in self.layers:
54 |             h_game = layer(item_graph, {'game': h_game})['game']
55 | 
56 |         graph_game2user = dgl.edge_type_subgraph(graph, ['played by'])
57 | 
58 |         weight = graph.edata['weight'][('game', 'played by', 'user')]
59 |         h_user_aggregate = self.item_conv(graph_game2user, (h_game, self.user_embedding), edge_weight = weight)
60 | 
61 |         _, social_weight = self.social_GAT(social_graph, h_user_aggregate, get_attention = True)
62 |         social_weight = social_weight.sum(1)
63 |         h_user_social = self.social_conv(social_graph, self.user_embedding, edge_weight = social_weight)
64 | 
65 |         user_embed = (1 - self.args.social_g - self.args.item_g) * self.user_embedding + self.args.item_g * h_user_aggregate + self.args.social_g * h_user_social
66 | 
67 |         return {"user": user_embed, "game": self.item_embedding}
68 | 


--------------------------------------------------------------------------------
/utils/metrics.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import numpy as np
  3 | from sklearn.metrics import roc_auc_score, log_loss, mean_squared_error
  4 | 
  5 | denominator_table = np.log2(np.arange(2, 102))
  6 | 
  7 | def MAE(score, label):
  8 |     return torch.mean(torch.abs(score - label))
  9 | 
 10 | def RMSE(score, label):
 11 |     return torch.sqrt(torch.mean((score - label) ** 2))
 12 | 
 13 | def recall(rank, ground_truth, N):
 14 |     return len(set(rank[:N]) & set(ground_truth)) / float(len(set(ground_truth)))
 15 | 
 16 | def precision_at_k(r, k):
 17 |     """Score is precision @ k
 18 |     Relevance is binary (nonzero is relevant).
 19 |     Returns:
 20 |         Precision @ k
 21 |     Raises:
 22 |         ValueError: len(r) must be >= k
 23 |     """
 24 |     assert k >= 1
 25 |     r = np.asarray(r)[:k]
 26 |     return np.mean(r)
 27 | 
 28 | def average_precision(r,cut):
 29 |     """Score is average precision (area under PR curve)
 30 |     Relevance is binary (nonzero is relevant).
 31 |     Returns:
 32 |         Average precision
 33 |     """
 34 |     r = np.asarray(r)
 35 |     out = [precision_at_k(r, k + 1) for k in range(cut) if r[k]]
 36 |     if not out:
 37 |         return 0.
 38 |     return np.sum(out)/float(min(cut, np.sum(r)))
 39 | 
 40 | 
 41 | def mean_average_precision(rs):
 42 |     """Score is mean average precision
 43 |     Relevance is binary (nonzero is relevant).
 44 |     Returns:
 45 |         Mean average precision
 46 |     """
 47 |     return np.mean([average_precision(r) for r in rs])
 48 | 
 49 | 
 50 | def dcg_at_k(r, k, method=1):
 51 |     """Score is discounted cumulative gain (dcg)
 52 |     Relevance is positive real values.  Can use binary
 53 |     as the previous methods.
 54 |     Returns:
 55 |         Discounted cumulative gain
 56 |     """
 57 |     r = np.asfarray(r)[:k]
 58 |     if r.size:
 59 |         if method == 0:
 60 |             return r[0] + np.sum(r[1:] / np.log2(np.arange(2, r.size + 1)))
 61 |         elif method == 1:
 62 |             return np.sum(r / np.log2(np.arange(2, r.size + 2)))
 63 |         else:
 64 |             raise ValueError('method must be 0 or 1.')
 65 |     return 0.
 66 | 
 67 | 
 68 | def ndcg_at_k(r, k, method=1):
 69 |     """Score is normalized discounted cumulative gain (ndcg)
 70 |     Relevance is positive real values.  Can use binary
 71 |     as the previous methods.
 72 |     Returns:
 73 |         Normalized discounted cumulative gain
 74 |     """
 75 |     dcg_max = dcg_at_k(sorted(r, reverse=True), k, method)
 76 |     if not dcg_max:
 77 |         return 0.
 78 |     return dcg_at_k(r, k, method) / dcg_max
 79 | 
 80 | 
 81 | def recall_at_k(r, k):
 82 |     all_pos_num = sum(r)
 83 |     r = np.asfarray(r)[:k]
 84 |     return np.sum(r) / all_pos_num
 85 | 
 86 | 
 87 | def hit_at_k(r, k):
 88 |     r = np.array(r)[:k]
 89 |     if np.sum(r) > 0:
 90 |         return 1.
 91 |     else:
 92 |         return 0.
 93 | 
 94 | def F1(pre, rec):
 95 |     if pre + rec > 0:
 96 |         return (2.0 * pre * rec) / (pre + rec)
 97 |     else:
 98 |         return 0.
 99 | 
100 | def auc(ground_truth, prediction):
101 |     try:
102 |         res = roc_auc_score(y_true=ground_truth, y_score=prediction)
103 |     except Exception:
104 |         res = 0.
105 |     return res
106 | 
107 | def logloss(ground_truth, prediction):
108 |     # preds = [max(min(p, 1. - 10e-12), 10e-12) for p in prediction]
109 |     logloss = log_loss(np.asarray(ground_truth), np.asarray(prediction))
110 |     return logloss
111 | 


--------------------------------------------------------------------------------
/main.py:
--------------------------------------------------------------------------------
  1 | import sys
  2 | import dgl
  3 | import dgl.function as fn
  4 | sys.path.append('../')
  5 | import os
  6 | import multiprocessing as mp
  7 | # mp.set_start_method('spawn')
  8 | from tqdm import tqdm
  9 | import pdb
 10 | import random
 11 | import numpy as np
 12 | import torch
 13 | import torch.nn as nn
 14 | import logging
 15 | logging.basicConfig(stream = sys.stdout, level = logging.INFO)
 16 | from utils.parser import parse_args
 17 | from utils.metrics import MAE, RMSE, ndcg_at_k, recall_at_k, hit_at_k, precision_at_k
 18 | from utils.dataloader_steam import Dataloader_steam
 19 | from utils.dataloader_item_graph import Dataloader_item_graph
 20 | from models.RGCNModel_steam_rank import RGCNModel_steam_rank
 21 | from models.Predictor import HeteroDotProductPredictor
 22 | from models.model import Proposed_model
 23 | 
 24 | def validate(train_mask, dic, h, ls_k):
 25 |     users = torch.tensor(list(dic.keys())).long()
 26 |     user_embedding = h['user'][users]
 27 |     game_embedding = h['game']
 28 |     rating = torch.mm(user_embedding, game_embedding.t())
 29 |     rating[train_mask] = -float('inf')
 30 | 
 31 |     valid_mask = torch.zeros_like(train_mask)
 32 |     for i in range(users.shape[0]):
 33 |         user = int(users[i])
 34 |         items = torch.tensor(dic[user])
 35 |         valid_mask[i, items] = 1
 36 | 
 37 |     _, indices = torch.sort(rating, descending = True)
 38 |     ls = [valid_mask[i,:][indices[i, :]] for i in range(valid_mask.shape[0])]
 39 |     result = torch.stack(ls).float()
 40 | 
 41 |     res = []
 42 |     for k in ls_k:
 43 |         discount = (torch.tensor([i for i in range(k)]) + 2).log2()
 44 |         ideal, _ = result.sort(descending = True)
 45 |         idcg = (ideal[:, :k] / discount).sum(dim = 1)
 46 |         dcg = (result[:, :k] / discount).sum(dim = 1)
 47 |         ndcg = torch.mean(dcg / idcg)
 48 | 
 49 |         recall = torch.mean(result[:, :k].sum(1) / result.sum(1))
 50 |         hit = torch.mean((result[:, :k].sum(1) > 0).float())
 51 |         precision = torch.mean(result[:, :k].mean(1))
 52 | 
 53 |         logging_result = "For k = {}, ndcg = {}, recall = {}, hit = {}, precision = {}".format(k, ndcg, recall, hit, precision)
 54 |         logging.info(logging_result)
 55 |         res.append(logging_result)
 56 |     return ndcg, str(res)
 57 | 
 58 | 
 59 | def construct_negative_graph(graph, etype):
 60 |     utype, _ , vtype = etype
 61 |     src, _ = graph.edges(etype = etype)
 62 |     dst = torch.randint(graph.num_nodes(vtype), size = src.shape)
 63 |     return dgl.heterograph({etype: (src, dst)}, num_nodes_dict = {ntype: graph.number_of_nodes(ntype) for ntype in graph.ntypes})
 64 | 
 65 | def setup_seed(seed):
 66 |     torch.manual_seed(seed)
 67 |     torch.cuda.manual_seed_all(seed)
 68 |     np.random.seed(seed)
 69 |     random.seed(seed)
 70 |     torch.backends.cudnn.deterministic = True
 71 | 
 72 | if __name__ == '__main__':
 73 |     args = parse_args()
 74 |     setup_seed(2020)
 75 | 
 76 |     if args.gpu >= 0 and torch.cuda.is_available():
 77 |         device = 'cuda:{}'.format(args.gpu)
 78 |     else:
 79 |         device = 'cpu'
 80 | 
 81 |     path = '/home/yangliangwei/datasets/steam/'
 82 | 
 83 |     user_id_path = path + '/users.txt'
 84 |     app_id_path = path + '/app_id.txt'
 85 |     app_info_path = path + '/App_ID_Info.txt'
 86 |     friends_path = path + '/friends.txt'
 87 |     developer_path = path + '/Games_Developers.txt'
 88 |     publisher_path = path + '/Games_Publishers.txt'
 89 |     genres_path = path + '/Games_Genres.txt'
 90 | 
 91 |     DataLoader = Dataloader_steam(args, path, user_id_path, app_id_path, app_info_path, friends_path, developer_path, publisher_path, genres_path)
 92 | 
 93 |     graph = DataLoader.graph
 94 |     DataLoader_item = Dataloader_item_graph(graph, app_id_path, publisher_path, developer_path, genres_path)
 95 | 
 96 |     graph_item = DataLoader_item.graph
 97 | 
 98 |     graph_social = dgl.edge_type_subgraph(graph, [('user', 'friend of', 'user')])
 99 | 
100 |     graph = dgl.edge_type_subgraph(graph, [('user', 'play', 'game'), ('game', 'played by', 'user')])
101 |     graph.update_all(fn.copy_edge('percentile', 'm'), fn.sum('m', 'total'), etype = 'played by')
102 |     graph.apply_edges(func = fn.e_div_v('percentile', 'total', 'weight'), etype = 'played by')
103 | 
104 |     valid_user = list(DataLoader.valid_data.keys())
105 |     train_mask = torch.zeros(len(valid_user), graph.num_nodes('game'))
106 |     for i in range(len(valid_user)):
107 |         user = valid_user[i]
108 |         item_train = torch.tensor(DataLoader.dic_user_game[user])
109 |         train_mask[i, :][item_train] = 1
110 |     train_mask = train_mask.bool()
111 | 
112 |     model = Proposed_model(args, graph, graph_item)
113 | 
114 |     predictor = HeteroDotProductPredictor()
115 |     model.to(device)
116 |     opt = torch.optim.Adam(model.parameters(), lr = args.lr)
117 | 
118 |     stop_count = 0
119 |     ndcg_val_best = 0
120 |     ls_k = args.k
121 | 
122 |     total_epoch = 0
123 |     for epoch in range(args.epoch):
124 |         model.train()
125 |         graph_neg = construct_negative_graph(graph, ('user', 'play', 'game'))
126 |         h = model(graph, graph_item, graph_social)
127 | 
128 |         score = predictor(graph, h, ('user', 'play', 'game'))
129 |         score_neg = predictor(graph_neg, h, ('user', 'play', 'game'))
130 |         loss = -(score - score_neg).sigmoid().log().sum()
131 |         logging.info("loss = {}".format(loss))
132 |         opt.zero_grad()
133 |         loss.backward()
134 |         opt.step()
135 |         total_epoch += 1
136 | 
137 |         # score, h = model.forward_all(graph, 'play')
138 |         logging.info('Epoch {}'.format(epoch))
139 |         if total_epoch > 1:
140 |             model.eval()
141 |             logging.info("begin validation")
142 | 
143 |             ndcg, _ = validate(train_mask, DataLoader.valid_data, h, ls_k)
144 | 
145 |             if ndcg > ndcg_val_best:
146 |                 ndcg_val_best = ndcg
147 |                 stop_count = 0
148 |                 logging.info("begin test")
149 |                 ndcg_test, test_result = validate(train_mask, DataLoader.test_data, h, ls_k)
150 |             else:
151 |                 stop_count += 1
152 |                 if stop_count > args.early_stop:
153 |                     logging.info('early stop')
154 |                     break
155 | 
156 |     logging.info('Final ndcg {}'.format(ndcg_test))
157 |     logging.info(test_result)
158 | 


--------------------------------------------------------------------------------
/utils/dataloader_steam.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | import sys
  3 | from dgl.data.utils import save_graphs
  4 | from tqdm import tqdm
  5 | from scipy import stats
  6 | from .NegativeSampler import NegativeSampler
  7 | import pdb
  8 | import torch
  9 | import logging
 10 | logging.basicConfig(stream = sys.stdout, level = logging.INFO)
 11 | import numpy as np
 12 | import dgl
 13 | from dgl.data import DGLDataset
 14 | import pandas as pd
 15 | from sklearn import preprocessing
 16 | 
 17 | class Dataloader_steam_filtered(DGLDataset):
 18 |     def __init__(self, args, root_path, user_id_path, app_id_path, app_info_path, friends_path, developer_path, publisher_path, genres_path, device = 'cpu', name = 'steam'):
 19 |         logging.info("steam dataloader init")
 20 | 
 21 |         self.args = args
 22 |         self.root_path = root_path
 23 |         self.user_id_path = user_id_path
 24 |         self.app_id_path = app_id_path
 25 |         self.app_info_path = app_info_path
 26 |         self.friends_path = friends_path
 27 |         self.developer_path = developer_path
 28 |         self.publisher_path = publisher_path
 29 |         self.genres_path = genres_path
 30 |         self.device = device
 31 |         self.graph_path = self.root_path + '/graph.bin'
 32 |         self.game_path = self.root_path + '/train_game.txt'
 33 |         self.time_path = self.root_path + '/train_time.txt'
 34 |         self.valid_path = self.root_path + '/valid_game.txt'
 35 |         self.test_path = self.root_path + '/test_game.txt'
 36 | 
 37 |         logging.info("reading user id mapping from {}".format(self.user_id_path))
 38 |         self.user_id_mapping = self.read_id_mapping(self.user_id_path)
 39 |         logging.info("reading app id mapping from {}".format(self.app_id_path))
 40 |         self.app_id_mapping = self.read_id_mapping(self.app_id_path)
 41 | 
 42 |         logging.info("build valid data")
 43 |         self.valid_data = self.build_valid_data(self.valid_path)
 44 | 
 45 |         logging.info("build test data")
 46 |         self.test_data = self.build_valid_data(self.test_path)
 47 | 
 48 |         if os.path.exists(self.graph_path):
 49 |             logging.info("loading preprocessed data")
 50 |             self.graph = dgl.load_graphs(self.graph_path)
 51 |             self.graph = self.graph[0][0]
 52 |             logging.info("reading user game information")
 53 |             self.dic_user_game = self.read_dic_user_game(self.game_path)
 54 | 
 55 |         else:
 56 |             self.process()
 57 |             dgl.save_graphs(self.graph_path, self.graph)
 58 | 
 59 |         self.dataloader = self.build_dataloader(self.args, self.graph)
 60 | 
 61 |     def build_valid_data(self, path):
 62 |         users = {}
 63 |         with open(path, 'r') as f:
 64 |             lines = f.readlines()
 65 |             for line in lines:
 66 |                 line = line.strip().split(',')
 67 |                 user = self.user_id_mapping[line[0]]
 68 |                 games = [self.app_id_mapping[game] for game in line[1:]]
 69 |                 users[user] = games
 70 |         return users
 71 | 
 72 |     def build_dataloader(self, args, graph):
 73 |         sampler = dgl.dataloading.MultiLayerFullNeighborSampler(args.layers, return_eids = False)
 74 |         train_id = torch.tensor([i for i in range(graph.edges(etype = 'play')[0].shape[0])], dtype = torch.long)
 75 |         dataloader = dgl.dataloading.EdgeDataLoader(
 76 |             graph, {('user', 'play', 'game'): train_id},
 77 |             sampler, negative_sampler = NegativeSampler(self.dic_user_game), batch_size = args.batch_size, shuffle = True, num_workers = 2
 78 |         )
 79 |         return dataloader
 80 | 
 81 | 
 82 |     def process(self):
 83 |         logging.info("reading app info from {}".format(self.app_info_path))
 84 |         self.app_info = self.read_app_info(self.app_info_path)
 85 | 
 86 |         logging.info("reading publisher from {}".format(self.publisher_path))
 87 |         self.publisher = self.read_mapping(self.publisher_path)
 88 | 
 89 |         logging.info("reading developer from {}".format(self.developer_path))
 90 |         self.developer = self.read_mapping(self.developer_path)
 91 | 
 92 |         logging.info("reading genre from {}".format(self.genres_path))
 93 |         self.genre = self.read_mapping(self.genres_path)
 94 | 
 95 |         logging.info("reading user item play time from {}".format(self.game_path))
 96 | 
 97 |         self.user_game, self.dic_user_game = self.read_play_time_rank(self.game_path, self.time_path)
 98 | 
 99 |         logging.info("reading friend list from {}".format(self.friends_path))
100 |         self.friends = self.read_friends(self.friends_path)
101 | 
102 |         graph_data = {
103 |             ('user', 'friend of', 'user'): (self.friends[:, 0], self.friends[:, 1]),
104 | 
105 |             ('game', 'developed by', 'developer'): (torch.tensor(list(self.developer.keys())), torch.tensor(list(self.developer.values()))),
106 | 
107 |             ('developer', 'develop', 'game'): (torch.tensor(list(self.developer.values())), torch.tensor(list(self.developer.keys()))),
108 | 
109 |             ('game', 'published by', 'publisher'): (torch.tensor(list(self.publisher.keys())), torch.tensor(list(self.publisher.values()))),
110 | 
111 |             ('publisher', 'publish', 'game'): (torch.tensor(list(self.publisher.values())), torch.tensor(list(self.publisher.keys()))),
112 | 
113 |             ('game', 'genre', 'type'): (torch.tensor(list(self.genre.keys())), torch.tensor(list(self.genre.values()))),
114 | 
115 |             ('type', 'genred', 'game'): (torch.tensor(list(self.genre.values())), torch.tensor(list(self.genre.keys()))),
116 | 
117 |             ('user', 'play', 'game'): (self.user_game[:, 0].long(), self.user_game[:, 1].long()),
118 | 
119 |             ('game', 'played by', 'user'): (self.user_game[:, 1].long(), self.user_game[:, 0].long())
120 |         }
121 |         graph = dgl.heterograph(graph_data)
122 | 
123 |         ls_feature = []
124 | 
125 |         for node in graph.nodes('game'):
126 |             node = int(node)
127 |             if node in self.app_info:
128 |                 ls_feature.append(self.app_info[node])
129 | 
130 |         ls_feature = np.vstack(ls_feature)
131 |         feature_mean = ls_feature.mean(0)
132 | 
133 |         ls_feature = []
134 | 
135 |         count_total = 0
136 |         count_without_feature = 0
137 |         for node in graph.nodes('game'):
138 |             count_total += 1
139 |             node = int(node)
140 |             if node in self.app_info:
141 |                 ls_feature.append(self.app_info[node])
142 |             else:
143 |                 count_without_feature += 1
144 |                 ls_feature.append(feature_mean)
145 |         logging.info("total game number is {}, games without features number is {}".format(count_total,count_without_feature ))
146 | 
147 |         graph.nodes['game'].data['h'] = torch.tensor(np.vstack(ls_feature))
148 |         graph.edges['play'].data['time'] = self.user_game[:, 2]
149 |         graph.edges['played by'].data['time'] = self.user_game[:, 2]
150 |         graph.edges['play'].data['percentile'] = self.user_game[:, 3]
151 |         graph.edges['played by'].data['percentile'] = self.user_game[:, 3]
152 |         self.graph = graph
153 | 
154 |     def __getitem__(self, i):
155 |         pass
156 | 
157 |     def __len__(self):
158 |         pass
159 | 
160 |     def generate_percentile(self, ls):
161 |         dic = {}
162 |         for ls_i in ls:
163 |             if ls_i[1] in dic:
164 |                 dic[ls_i[1]].append(ls_i[2])
165 |             else:
166 |                 dic[ls_i[1]] = [ls_i[2]]
167 |         for key in tqdm(dic):
168 |             dic[key] = sorted(list(set(dic[key])))
169 |         dic_percentile = {}
170 | 
171 |         for key in tqdm(dic):
172 |             dic_percentile[key] = {}
173 |             length = len(dic[key])
174 |             for i in range(len(dic[key])):
175 |                 time = dic[key][i]
176 |                 dic_percentile[key][time] = (i + 1) / length
177 | 
178 | 
179 |         for i in tqdm(range(len(ls))):
180 |             ls[i].append(dic_percentile[ls[i][1]][ls[i][2]])
181 |         return ls
182 | 
183 | 
184 |     def read_dic_user_game(self, game_path):
185 |         dic_game = {}
186 |         with open(game_path, 'r') as f_game:
187 |             lines_game = f_game.readlines()
188 |             for i in tqdm(range(len(lines_game))):
189 |                 line_game = lines_game[i].strip().split(',')
190 |                 user = self.user_id_mapping[line_game[0]]
191 | 
192 |                 dic_game[user] = []
193 |                 for j in range(1, len(line_game)):
194 |                     game = self.app_id_mapping[line_game[j]]
195 |                     dic_game[user].append(game)
196 |         return dic_game
197 | 
198 | 
199 |     def read_play_time_rank(self, game_path, time_path):
200 |         ls = []
201 |         dic_game = {}
202 |         dic_time = {}
203 |         with open(game_path, 'r') as f_game:
204 |             with open(time_path, 'r') as f_time:
205 |                 lines_game = f_game.readlines()
206 |                 lines_time = f_time.readlines()
207 |                 for i in tqdm(range(len(lines_game))):
208 |                     line_game = lines_game[i].strip().split(',')
209 |                     line_time = lines_time[i].strip().split(',')
210 |                     user = self.user_id_mapping[line_game[0]]
211 |                     dic_game[user] = []
212 | 
213 |                     for j in range(1, len(line_game)):
214 |                         game = self.app_id_mapping[line_game[j]]
215 |                         dic_game[user].append(game)
216 |                         time = line_time[j]
217 |                         if time == r'\N':
218 |                             ls.append([user, game, 0])
219 |                         else:
220 |                             ls.append([user, game, float(time)])
221 |         logging.info('generate percentiles')
222 |         ls = self.generate_percentile(ls)
223 |         return torch.tensor(ls), dic_game
224 | 
225 |     def read_play_time(self, path):
226 |         ls = []
227 |         with open(path, 'r', encoding = 'utf8') as f:
228 |             for line in f:
229 |                 line = line.strip().split(',')
230 |                 if line[-1] == r'\N':
231 |                     ls.append([self.user_id_mapping[line[0]], self.app_id_mapping[line[1]], 0])
232 |                 else:
233 |                     ls.append([self.user_id_mapping[line[0]], self.app_id_mapping[line[1]], int(line[2])])
234 |         logging.info('generate percentiles')
235 |         ls = self.generate_percentile(ls)
236 |         return torch.tensor(ls)
237 | 
238 |     def read_id_mapping(self, path):
239 |         mapping = {}
240 |         count = 0
241 |         with open(path, 'r') as f:
242 |             for line in f:
243 |                 line = line.strip()
244 |                 if line not in mapping:
245 |                     mapping[line] = count
246 |                     count += 1
247 |         return mapping
248 | 
249 |     def read_app_info(self, path):
250 |         dic = {}
251 |         df = pd.read_csv(path, header = None)
252 |         df = pd.get_dummies(df, columns = [2])
253 |         df_time = pd.to_datetime(df.iloc[:, 3])
254 |         date_end = pd.to_datetime('2013-06-25')
255 |         time_sub = date_end - df_time
256 |         time_sub = time_sub.dt.days
257 |         df = pd.concat([df, time_sub], axis = 1)
258 |         column_num = len(df.columns)
259 |         column_index = [2]
260 |         column_index.extend([i for i in range(4, column_num)])
261 | 
262 |         logging.info("begin feature engineering")
263 |         df.iloc[:, 4].replace(to_replace = -1, value = np.nan, inplace = True)
264 |         mean = df.iloc[:, 4].mean()
265 |         df.iloc[:, 4].replace(to_replace = np.nan, value = mean, inplace = True)
266 |         columns_norm = [2, 4, 5, 11]
267 |         mean = df.iloc[:, columns_norm].mean()
268 |         std = df.iloc[:, columns_norm].std()
269 |         df.iloc[:, columns_norm] = (df.iloc[:, columns_norm] - mean) / std
270 | 
271 |         for i in range(len(df)):
272 |             app_id = self.app_id_mapping[str(df.iloc[i, 0])]
273 |             feature = df.iloc[i, column_index].to_numpy()
274 |             feature = feature.astype(np.float64)
275 |             dic[app_id] = feature
276 |         dic['feature_num'] = len(feature)
277 |         return dic
278 | 
279 |     def read_friends(self, path):
280 |         ls = []
281 |         with open(path, 'r') as f:
282 |             for line in f:
283 |                 line = line.strip().split(',')
284 |                 ls.append([self.user_id_mapping[line[0]], self.user_id_mapping[line[1]]])
285 |         return torch.tensor(ls)
286 | 
287 |     def read_mapping(self, path):
288 |         mapping = {}
289 |         with open(path, 'r') as f:
290 |             for line in f:
291 |                 line = line.strip().split(',')
292 |                 if line[0] not in mapping:
293 |                     if line[1] != '':
294 |                         mapping[self.app_id_mapping[line[0]]] = line[1]
295 |         mapping_value2id = {}
296 |         count = 0
297 |         for value in mapping.values():
298 |             if value not in mapping_value2id:
299 |                 mapping_value2id[value] = count
300 |                 count += 1
301 |         for key in mapping:
302 |             mapping[key] = mapping_value2id[mapping[key]]
303 |         return mapping
304 | 


--------------------------------------------------------------------------------