├── .gitattributes
├── .idea
├── GraphNN.iml
├── encodings.xml
├── misc.xml
├── modules.xml
├── vcs.xml
└── workspace.xml
├── DGLtest.py
├── GraphNCF
├── GCFmodel.py
├── __init__.py
├── __pycache__
│ ├── GCFmodel.cpython-37.pyc
│ ├── __init__.cpython-37.pyc
│ └── dataPreprosessing.cpython-37.pyc
├── dataPreprosessing.py
└── run.py
├── README.md
└── toyDataset
├── 1K
├── README
├── allbut.pl
├── mku.sh
├── ml-100k.zip
├── u.data
├── u.genre
├── u.info
├── u.item
├── u.occupation
├── u.user
├── u1.base
├── u1.test
├── u2.base
├── u2.test
├── u3.base
├── u3.test
├── u4.base
├── u4.test
├── u5.base
├── u5.test
├── ua.base
├── ua.test
├── ub.base
└── ub.test
├── __init__.py
├── __pycache__
├── __init__.cpython-37.pyc
└── loaddata.cpython-37.pyc
└── loaddata.py
/.gitattributes:
--------------------------------------------------------------------------------
1 | # Auto detect text files and perform LF normalization
2 | * text=auto
3 |
--------------------------------------------------------------------------------
/.idea/GraphNN.iml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
--------------------------------------------------------------------------------
/.idea/encodings.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
--------------------------------------------------------------------------------
/.idea/misc.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
--------------------------------------------------------------------------------
/.idea/modules.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
--------------------------------------------------------------------------------
/.idea/vcs.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
--------------------------------------------------------------------------------
/.idea/workspace.xml:
--------------------------------------------------------------------------------
1 |
2 |
3 |
4 |
5 |
6 |
7 |
8 |
9 |
10 |
11 |
12 |
13 |
14 |
15 |
16 |
17 |
18 |
19 |
20 |
21 |
22 |
23 |
24 |
25 |
26 |
27 |
28 |
29 |
30 |
31 |
32 |
33 |
34 |
35 |
36 |
37 |
38 |
39 |
40 |
41 |
42 |
43 |
44 |
45 |
46 |
47 |
48 |
49 |
50 |
51 |
52 |
53 |
54 |
55 |
56 |
57 |
58 |
59 |
60 |
61 |
62 |
63 |
64 |
65 |
66 |
67 |
68 |
69 |
70 |
71 |
72 |
73 |
74 |
75 |
76 |
77 |
78 |
79 |
80 |
85 |
86 |
87 |
88 | selfLoop
89 |
90 |
91 |
92 |
93 |
94 |
95 |
96 |
97 |
98 |
99 |
100 |
101 |
102 |
103 |
104 |
105 |
106 |
107 |
108 |
109 |
110 |
111 |
112 |
113 |
114 |
115 |
116 |
117 |
118 |
119 |
120 |
121 |
122 |
123 |
124 |
125 |
126 |
127 |
128 |
129 |
130 |
131 |
132 |
133 |
134 |
135 |
136 |
137 |
138 |
139 |
140 |
141 |
142 |
143 |
144 |
145 |
146 |
147 |
148 |
149 |
150 |
151 |
152 |
155 |
156 |
157 |
158 |
159 |
160 |
161 |
162 |
163 |
164 |
165 |
166 |
167 |
168 |
169 |
170 |
171 |
172 |
173 |
174 |
175 |
176 |
177 |
178 |
179 |
180 |
181 |
182 |
183 |
184 |
185 |
186 |
187 |
188 |
189 |
190 |
191 |
192 |
193 |
194 |
195 |
196 |
197 |
198 |
199 |
200 |
201 |
202 |
203 |
204 |
205 |
206 |
207 |
208 |
209 |
210 |
211 |
212 |
213 |
214 |
215 |
216 |
217 |
218 |
219 |
220 |
221 |
222 |
223 |
224 |
225 |
226 |
227 |
228 |
229 |
230 |
231 |
232 |
233 |
234 |
235 |
236 |
237 |
238 |
239 |
240 |
241 |
242 |
243 |
244 |
245 | 1558497373319
246 |
247 |
248 | 1558497373319
249 |
250 |
251 |
252 |
253 |
254 |
255 |
256 |
257 |
258 |
259 |
260 |
261 |
262 |
263 |
264 |
265 |
266 |
267 |
268 |
269 |
270 |
271 |
272 |
273 |
274 |
275 |
276 |
277 |
278 |
279 |
280 |
281 |
282 |
283 |
284 |
285 |
286 |
287 |
288 |
289 |
290 |
291 |
292 |
293 |
294 |
295 |
296 |
297 |
298 |
299 |
300 |
301 |
302 |
303 |
304 |
305 |
306 |
307 |
308 |
309 |
310 |
311 |
312 |
313 |
314 |
315 |
316 |
317 |
318 |
319 |
320 |
321 |
322 |
323 |
324 |
325 |
326 |
327 |
328 |
329 |
330 |
331 |
332 |
333 |
334 |
335 |
336 |
337 |
338 |
339 |
340 |
341 |
342 |
343 |
344 |
345 |
346 |
347 |
348 |
349 |
350 |
351 |
352 |
353 |
354 |
355 |
356 |
357 |
--------------------------------------------------------------------------------
/DGLtest.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import dgl
3 |
4 | # dgl test
5 |
6 | def build_karate_club_graph():
7 | g = dgl.DGLGraph()
8 | # add 34 nodes into the graph; nodes are labeled from 0~33
9 | g.add_nodes(34)
10 | # all 78 edges as a list of tuples
11 | edge_list = [(1, 0), (2, 0), (2, 1), (3, 0), (3, 1), (3, 2),
12 | (4, 0), (5, 0), (6, 0), (6, 4), (6, 5), (7, 0), (7, 1),
13 | (7, 2), (7, 3), (8, 0), (8, 2), (9, 2), (10, 0), (10, 4),
14 | (10, 5), (11, 0), (12, 0), (12, 3), (13, 0), (13, 1), (13, 2),
15 | (13, 3), (16, 5), (16, 6), (17, 0), (17, 1), (19, 0), (19, 1),
16 | (21, 0), (21, 1), (25, 23), (25, 24), (27, 2), (27, 23),
17 | (27, 24), (28, 2), (29, 23), (29, 26), (30, 1), (30, 8),
18 | (31, 0), (31, 24), (31, 25), (31, 28), (32, 2), (32, 8),
19 | (32, 14), (32, 15), (32, 18), (32, 20), (32, 22), (32, 23),
20 | (32, 29), (32, 30), (32, 31), (33, 8), (33, 9), (33, 13),
21 | (33, 14), (33, 15), (33, 18), (33, 19), (33, 20), (33, 22),
22 | (33, 23), (33, 26), (33, 27), (33, 28), (33, 29), (33, 30),
23 | (33, 31), (33, 32)]
24 | # add edges two lists of nodes: src and dst
25 | src, dst = tuple(zip(*edge_list))
26 | g.add_edges(src, dst)
27 | # edges are directional in DGL; make them bi-directional
28 | g.add_edges(dst, src)
29 |
30 | return g
--------------------------------------------------------------------------------
/GraphNCF/GCFmodel.py:
--------------------------------------------------------------------------------
1 | import torch
2 | import torch.nn as nn
3 | from torch.nn import Module
4 | from scipy.sparse import coo_matrix
5 | from scipy.sparse import vstack
6 | from scipy import sparse
7 | import numpy as np
8 |
9 |
10 | # several models for recommendations
11 |
12 | # RMSE
13 | # SVD dim = 50 50 epoch RMSE = 0.931
14 | # GNCF dim = 64 layer = [64,64,64] nn = [128,64,32,] 50 epoch RMSE = 0.916/RMSE =0.914
15 | # NCF dim = 64 50 nn = [128,54,32] epoch 50 RMSE = 0.928
16 |
17 | class SVD(Module):
18 |
19 | def __init__(self,userNum,itemNum,dim):
20 | super(SVD, self).__init__()
21 | self.uEmbd = nn.Embedding(userNum,dim)
22 | self.iEmbd = nn.Embedding(itemNum,dim)
23 | self.uBias = nn.Embedding(userNum,1)
24 | self.iBias = nn.Embedding(itemNum,1)
25 | self.overAllBias = nn.Parameter(torch.Tensor([0]))
26 |
27 | def forward(self, userIdx,itemIdx):
28 | uembd = self.uEmbd(userIdx)
29 | iembd = self.iEmbd(itemIdx)
30 | ubias = self.uBias(userIdx)
31 | ibias = self.iBias(itemIdx)
32 |
33 | biases = ubias + ibias + self.overAllBias
34 | prediction = torch.sum(torch.mul(uembd,iembd),dim=1) + biases.flatten()
35 |
36 | return prediction
37 |
38 | class NCF(Module):
39 |
40 | def __init__(self,userNum,itemNum,dim,layers=[128,64,32,8]):
41 | super(NCF, self).__init__()
42 | self.uEmbd = nn.Embedding(userNum,dim)
43 | self.iEmbd = nn.Embedding(itemNum,dim)
44 | self.fc_layers = torch.nn.ModuleList()
45 | self.finalLayer = torch.nn.Linear(layers[-1],1)
46 |
47 | for From,To in zip(layers[:-1],layers[1:]):
48 | self.fc_layers.append(nn.Linear(From,To))
49 |
50 | def forward(self, userIdx,itemIdx):
51 | uembd = self.uEmbd(userIdx)
52 | iembd = self.iEmbd(itemIdx)
53 | embd = torch.cat([uembd, iembd], dim=1)
54 | x = embd
55 | for l in self.fc_layers:
56 | x = l(x)
57 | x = nn.ReLU()(x)
58 |
59 | prediction = self.finalLayer(x)
60 | return prediction.flatten()
61 |
62 |
63 | class GNNLayer(Module):
64 |
65 | def __init__(self,inF,outF):
66 |
67 | super(GNNLayer,self).__init__()
68 | self.inF = inF
69 | self.outF = outF
70 | self.linear = torch.nn.Linear(in_features=inF,out_features=outF)
71 | self.interActTransform = torch.nn.Linear(in_features=inF,out_features=outF)
72 |
73 | def forward(self, laplacianMat,selfLoop,features):
74 | # for GCF ajdMat is a (N+M) by (N+M) mat
75 | # laplacianMat L = D^-1(A)D^-1 # 拉普拉斯矩阵
76 | L1 = laplacianMat + selfLoop
77 | L2 = laplacianMat.cuda()
78 | L1 = L1.cuda()
79 | inter_feature = torch.sparse.mm(L2,features)
80 | inter_feature = torch.mul(inter_feature,features)
81 |
82 | inter_part1 = self.linear(torch.sparse.mm(L1,features))
83 | inter_part2 = self.interActTransform(torch.sparse.mm(L2,inter_feature))
84 |
85 | return inter_part1+inter_part2
86 |
87 | class GCF(Module):
88 |
89 | def __init__(self,userNum,itemNum,rt,embedSize=100,layers=[100,80,50],useCuda=True):
90 |
91 | super(GCF,self).__init__()
92 | self.useCuda = useCuda
93 | self.userNum = userNum
94 | self.itemNum = itemNum
95 | self.uEmbd = nn.Embedding(userNum,embedSize)
96 | self.iEmbd = nn.Embedding(itemNum,embedSize)
97 | self.GNNlayers = torch.nn.ModuleList()
98 | self.LaplacianMat = self.buildLaplacianMat(rt) # sparse format
99 | self.leakyRelu = nn.LeakyReLU()
100 | self.selfLoop = self.getSparseEye(self.userNum+self.itemNum)
101 |
102 | self.transForm1 = nn.Linear(in_features=layers[-1]*(len(layers))*2,out_features=64)
103 | self.transForm2 = nn.Linear(in_features=64,out_features=32)
104 | self.transForm3 = nn.Linear(in_features=32,out_features=1)
105 |
106 | for From,To in zip(layers[:-1],layers[1:]):
107 | self.GNNlayers.append(GNNLayer(From,To))
108 |
109 | def getSparseEye(self,num):
110 | i = torch.LongTensor([[k for k in range(0,num)],[j for j in range(0,num)]])
111 | val = torch.FloatTensor([1]*num)
112 | return torch.sparse.FloatTensor(i,val)
113 |
114 | def buildLaplacianMat(self,rt):
115 |
116 | rt_item = rt['itemId'] + self.userNum
117 | uiMat = coo_matrix((rt['rating'], (rt['userId'], rt['itemId'])))
118 |
119 | uiMat_upperPart = coo_matrix((rt['rating'], (rt['userId'], rt_item)))
120 | uiMat = uiMat.transpose()
121 | uiMat.resize((self.itemNum, self.userNum + self.itemNum))
122 |
123 | A = sparse.vstack([uiMat_upperPart,uiMat])
124 | selfLoop = sparse.eye(self.userNum+self.itemNum)
125 | sumArr = (A>0).sum(axis=1)
126 | diag = list(np.array(sumArr.flatten())[0])
127 | diag = np.power(diag,-0.5)
128 | D = sparse.diags(diag)
129 | L = D * A * D
130 | L = sparse.coo_matrix(L)
131 | row = L.row
132 | col = L.col
133 | i = torch.LongTensor([row,col])
134 | data = torch.FloatTensor(L.data)
135 | SparseL = torch.sparse.FloatTensor(i,data)
136 | return SparseL
137 |
138 | def getFeatureMat(self):
139 | uidx = torch.LongTensor([i for i in range(self.userNum)])
140 | iidx = torch.LongTensor([i for i in range(self.itemNum)])
141 | if self.useCuda == True:
142 | uidx = uidx.cuda()
143 | iidx = iidx.cuda()
144 |
145 | userEmbd = self.uEmbd(uidx)
146 | itemEmbd = self.iEmbd(iidx)
147 | features = torch.cat([userEmbd,itemEmbd],dim=0)
148 | return features
149 |
150 | def forward(self,userIdx,itemIdx):
151 |
152 | itemIdx = itemIdx + self.userNum
153 | userIdx = list(userIdx.cpu().data)
154 | itemIdx = list(itemIdx.cpu().data)
155 | # gcf data propagation
156 | features = self.getFeatureMat()
157 | finalEmbd = features.clone()
158 | for gnn in self.GNNlayers:
159 | features = gnn(self.LaplacianMat,self.selfLoop,features)
160 | features = nn.ReLU()(features)
161 | finalEmbd = torch.cat([finalEmbd,features.clone()],dim=1)
162 |
163 | userEmbd = finalEmbd[userIdx]
164 | itemEmbd = finalEmbd[itemIdx]
165 | embd = torch.cat([userEmbd,itemEmbd],dim=1)
166 |
167 | embd = nn.ReLU()(self.transForm1(embd))
168 | embd = self.transForm2(embd)
169 | embd = self.transForm3(embd)
170 | prediction = embd.flatten()
171 |
172 | return prediction
173 |
174 | if __name__ == '__main__':
175 | from toyDataset.loaddata import load100KRatings
176 |
177 | rt = load100KRatings()
178 | userNum = rt['userId'].max()
179 | itemNum = rt['itemId'].max()
180 |
181 | rt['userId'] = rt['userId'] - 1
182 | rt['itemId'] = rt['itemId'] - 1
183 | gcf = GCF(userNum,itemNum,rt)
184 |
--------------------------------------------------------------------------------
/GraphNCF/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/talkingwallace/NGCF-pytorch/13515f7d09e3ef580fa500fad337683ce21f3186/GraphNCF/__init__.py
--------------------------------------------------------------------------------
/GraphNCF/__pycache__/GCFmodel.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/talkingwallace/NGCF-pytorch/13515f7d09e3ef580fa500fad337683ce21f3186/GraphNCF/__pycache__/GCFmodel.cpython-37.pyc
--------------------------------------------------------------------------------
/GraphNCF/__pycache__/__init__.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/talkingwallace/NGCF-pytorch/13515f7d09e3ef580fa500fad337683ce21f3186/GraphNCF/__pycache__/__init__.cpython-37.pyc
--------------------------------------------------------------------------------
/GraphNCF/__pycache__/dataPreprosessing.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/talkingwallace/NGCF-pytorch/13515f7d09e3ef580fa500fad337683ce21f3186/GraphNCF/__pycache__/dataPreprosessing.cpython-37.pyc
--------------------------------------------------------------------------------
/GraphNCF/dataPreprosessing.py:
--------------------------------------------------------------------------------
1 | from torch.utils.data import Dataset
2 |
3 | # movielens 1k
4 |
5 | class ML1K(Dataset):
6 |
7 | def __init__(self,rt):
8 | super(Dataset,self).__init__()
9 | self.uId = list(rt['userId'])
10 | self.iId = list(rt['itemId'])
11 | self.rt = list(rt['rating'])
12 |
13 | def __len__(self):
14 | return len(self.uId)
15 |
16 | def __getitem__(self, item):
17 | return (self.uId[item],self.iId[item],self.rt[item])
--------------------------------------------------------------------------------
/GraphNCF/run.py:
--------------------------------------------------------------------------------
1 | import torch
2 | from torch import nn as nn
3 | from toyDataset.loaddata import load100KRatings
4 | from scipy.sparse import coo_matrix
5 | import pandas as pd
6 | import numpy as np
7 | from numpy import diag
8 | from GraphNCF.GCFmodel import GCF
9 | from torch.utils.data import DataLoader
10 | from GraphNCF.dataPreprosessing import ML1K
11 | from torch.utils.data import random_split
12 | from torch.optim import Adam
13 | from torch.nn import MSELoss
14 | from GraphNCF.GCFmodel import SVD
15 | from GraphNCF.GCFmodel import NCF
16 |
17 | rt = load100KRatings()
18 | userNum = rt['userId'].max()
19 | itemNum = rt['itemId'].max()
20 |
21 | rt['userId'] = rt['userId'] - 1
22 | rt['itemId'] = rt['itemId'] - 1
23 | #
24 | # rtIt = rt['itemId'] + userNum
25 | # uiMat = coo_matrix((rt['rating'],(rt['userId'],rt['itemId'])))
26 | # uiMat_upperPart = coo_matrix((rt['rating'],(rt['userId'],rtIt)))
27 | # uiMat = uiMat.transpose()
28 | # uiMat.resize((itemNum,userNum+itemNum))
29 | # uiMat = uiMat.todense()
30 | # uiMat_t = uiMat.transpose()
31 | # zeros1 = np.zeros((userNum,userNum))
32 | # zeros2 = np.zeros((itemNum,itemNum))
33 | #
34 | # p1 = np.concatenate([zeros1,uiMat],axis=1)
35 | # p2 = np.concatenate([uiMat_t,zeros2],axis=1)
36 | # mat = np.concatenate([p1,p2])
37 | #
38 | # count = (mat > 0)+0
39 | # diagval = np.array(count.sum(axis=0))[0]
40 | # diagval = np.power(diagval,(-1/2))
41 | # D_ = diag(diagval)
42 | #
43 | # L = np.dot(np.dot(D_,mat),D_)
44 | #
45 | para = {
46 | 'epoch':60,
47 | 'lr':0.01,
48 | 'batch_size':2048,
49 | 'train':0.8
50 | }
51 |
52 | ds = ML1K(rt)
53 | trainLen = int(para['train']*len(ds))
54 | train,test = random_split(ds,[trainLen,len(ds)-trainLen])
55 | dl = DataLoader(train,batch_size=para['batch_size'],shuffle=True,pin_memory=True)
56 |
57 | model = GCF(userNum, itemNum, rt, 80, layers=[80,80,]).cuda()
58 | # model = SVD(userNum,itemNum,50).cuda()
59 | # model = NCF(userNum,itemNum,64,layers=[128,64,32,16,8]).cuda()
60 | optim = Adam(model.parameters(), lr=para['lr'],weight_decay=0.001)
61 | lossfn = MSELoss()
62 |
63 | for i in range(para['epoch']):
64 |
65 | for id,batch in enumerate(dl):
66 | print('epoch:',i,' batch:',id)
67 | optim.zero_grad()
68 | prediction = model(batch[0].cuda(), batch[1].cuda())
69 | loss = lossfn(batch[2].float().cuda(),prediction)
70 | loss.backward()
71 | optim.step()
72 | print(loss)
73 |
74 |
75 | testdl = DataLoader(test,batch_size=len(test),)
76 | for data in testdl:
77 | prediction = model(data[0].cuda(),data[1].cuda())
78 |
79 | loss = lossfn(data[2].float().cuda(),prediction)
80 | print(loss) # MSEloss
81 |
82 |
--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | # NGCF
2 | A pytorch toy implementation of Neural Graph Collaborative filtering
3 |
4 | link:https://arxiv.org/pdf/1905.08108.pdf
5 |
6 | Tested on dataset movielens 100k
7 |
8 | # Details
9 |
10 | Add three transform layer to yield predictions of ratings
11 |
12 | No node and message drop out
13 |
14 | # Evaluation
15 | Train 0.8 test 0.2
16 |
17 | SVD dim 50 RMSE 0.931
18 |
19 | NCF dim 64 layers [128,64,32,8] RMSE 0.928
20 |
21 | NGCF dim 64 layers [64,64,64] RMSE 0.896
22 |
23 |
--------------------------------------------------------------------------------
/toyDataset/1K/README:
--------------------------------------------------------------------------------
1 | SUMMARY & USAGE LICENSE
2 | =============================================
3 |
4 | MovieLens data sets were collected by the GroupLens Research Project
5 | at the University of Minnesota.
6 |
7 | This data set consists of:
8 | * 100,000 ratings (1-5) from 943 users on 1682 movies.
9 | * Each user has rated at least 20 movies.
10 | * Simple demographic info for the users (age, gender, occupation, zip)
11 |
12 | The data was collected through the MovieLens web site
13 | (movielens.umn.edu) during the seven-month period from September 19th,
14 | 1997 through April 22nd, 1998. This data has been cleaned up - users
15 | who had less than 20 ratings or did not have complete demographic
16 | information were removed from this data set. Detailed descriptions of
17 | the data file can be found at the end of this file.
18 |
19 | Neither the University of Minnesota nor any of the researchers
20 | involved can guarantee the correctness of the data, its suitability
21 | for any particular purpose, or the validity of results based on the
22 | use of the data set. The data set may be used for any research
23 | purposes under the following conditions:
24 |
25 | * The user may not state or imply any endorsement from the
26 | University of Minnesota or the GroupLens Research Group.
27 |
28 | * The user must acknowledge the use of the data set in
29 | publications resulting from the use of the data set
30 | (see below for citation information).
31 |
32 | * The user may not redistribute the data without separate
33 | permission.
34 |
35 | * The user may not use this information for any commercial or
36 | revenue-bearing purposes without first obtaining permission
37 | from a faculty member of the GroupLens Research Project at the
38 | University of Minnesota.
39 |
40 | If you have any further questions or comments, please contact GroupLens
41 | .
42 |
43 | CITATION
44 | ==============================================
45 |
46 | To acknowledge use of the dataset in publications, please cite the
47 | following paper:
48 |
49 | F. Maxwell Harper and Joseph A. Konstan. 2015. The MovieLens Datasets:
50 | History and Context. ACM Transactions on Interactive Intelligent
51 | Systems (TiiS) 5, 4, Article 19 (December 2015), 19 pages.
52 | DOI=http://dx.doi.org/10.1145/2827872
53 |
54 |
55 | ACKNOWLEDGEMENTS
56 | ==============================================
57 |
58 | Thanks to Al Borchers for cleaning up this data and writing the
59 | accompanying scripts.
60 |
61 | PUBLISHED WORK THAT HAS USED THIS DATASET
62 | ==============================================
63 |
64 | Herlocker, J., Konstan, J., Borchers, A., Riedl, J.. An Algorithmic
65 | Framework for Performing Collaborative Filtering. Proceedings of the
66 | 1999 Conference on Research and Development in Information
67 | Retrieval. Aug. 1999.
68 |
69 | FURTHER INFORMATION ABOUT THE GROUPLENS RESEARCH PROJECT
70 | ==============================================
71 |
72 | The GroupLens Research Project is a research group in the Department
73 | of Computer Science and Engineering at the University of Minnesota.
74 | Members of the GroupLens Research Project are involved in many
75 | research projects related to the fields of information filtering,
76 | collaborative filtering, and recommender systems. The project is lead
77 | by professors John Riedl and Joseph Konstan. The project began to
78 | explore automated collaborative filtering in 1992, but is most well
79 | known for its world wide trial of an automated collaborative filtering
80 | system for Usenet news in 1996. The technology developed in the
81 | Usenet trial formed the base for the formation of Net Perceptions,
82 | Inc., which was founded by members of GroupLens Research. Since then
83 | the project has expanded its scope to research overall information
84 | filtering solutions, integrating in content-based methods as well as
85 | improving current collaborative filtering technology.
86 |
87 | Further information on the GroupLens Research project, including
88 | research publications, can be found at the following web site:
89 |
90 | http://www.grouplens.org/
91 |
92 | GroupLens Research currently operates a movie recommender based on
93 | collaborative filtering:
94 |
95 | http://www.movielens.org/
96 |
97 | DETAILED DESCRIPTIONS OF DATA FILES
98 | ==============================================
99 |
100 | Here are brief descriptions of the data.
101 |
102 | ml-data.tar.gz -- Compressed tar file. To rebuild the u data files do this:
103 | gunzip ml-data.tar.gz
104 | tar xvf ml-data.tar
105 | mku.sh
106 |
107 | u.data -- The full u data set, 100000 ratings by 943 users on 1682 items.
108 | Each user has rated at least 20 movies. Users and items are
109 | numbered consecutively from 1. The data is randomly
110 | ordered. This is a tab separated list of
111 | user id | item id | rating | timestamp.
112 | The time stamps are unix seconds since 1/1/1970 UTC
113 |
114 | u.info -- The number of users, items, and ratings in the u data set.
115 |
116 | u.item -- Information about the items (movies); this is a tab separated
117 | list of
118 | movie id | movie title | release date | video release date |
119 | IMDb URL | unknown | Action | Adventure | Animation |
120 | Children's | Comedy | Crime | Documentary | Drama | Fantasy |
121 | Film-Noir | Horror | Musical | Mystery | Romance | Sci-Fi |
122 | Thriller | War | Western |
123 | The last 19 fields are the genres, a 1 indicates the movie
124 | is of that genre, a 0 indicates it is not; movies can be in
125 | several genres at once.
126 | The movie ids are the ones used in the u.data data set.
127 |
128 | u.genre -- A list of the genres.
129 |
130 | u.user -- Demographic information about the users; this is a tab
131 | separated list of
132 | user id | age | gender | occupation | zip code
133 | The user ids are the ones used in the u.data data set.
134 |
135 | u.occupation -- A list of the occupations.
136 |
137 | u1.base -- The data sets u1.base and u1.test through u5.base and u5.test
138 | u1.test are 80%/20% splits of the u data into training and test data.
139 | u2.base Each of u1, ..., u5 have disjoint test sets; this if for
140 | u2.test 5 fold cross validation (where you repeat your experiment
141 | u3.base with each training and test set and average the results).
142 | u3.test These data sets can be generated from u.data by mku.sh.
143 | u4.base
144 | u4.test
145 | u5.base
146 | u5.test
147 |
148 | ua.base -- The data sets ua.base, ua.test, ub.base, and ub.test
149 | ua.test split the u data into a training set and a test set with
150 | ub.base exactly 10 ratings per user in the test set. The sets
151 | ub.test ua.test and ub.test are disjoint. These data sets can
152 | be generated from u.data by mku.sh.
153 |
154 | allbut.pl -- The script that generates training and test sets where
155 | all but n of a users ratings are in the training data.
156 |
157 | mku.sh -- A shell script to generate all the u data sets from u.data.
158 |
--------------------------------------------------------------------------------
/toyDataset/1K/allbut.pl:
--------------------------------------------------------------------------------
1 | #!/usr/local/bin/perl
2 |
3 | # get args
4 | if (@ARGV < 3) {
5 | print STDERR "Usage: $0 base_name start stop max_test [ratings ...]\n";
6 | exit 1;
7 | }
8 | $basename = shift;
9 | $start = shift;
10 | $stop = shift;
11 | $maxtest = shift;
12 |
13 | # open files
14 | open( TESTFILE, ">$basename.test" ) or die "Cannot open $basename.test for writing\n";
15 | open( BASEFILE, ">$basename.base" ) or die "Cannot open $basename.base for writing\n";
16 |
17 | # init variables
18 | $testcnt = 0;
19 |
20 | while (<>) {
21 | ($user) = split;
22 | if (! defined $ratingcnt{$user}) {
23 | $ratingcnt{$user} = 0;
24 | }
25 | ++$ratingcnt{$user};
26 | if (($testcnt < $maxtest || $maxtest <= 0)
27 | && $ratingcnt{$user} >= $start && $ratingcnt{$user} <= $stop) {
28 | ++$testcnt;
29 | print TESTFILE;
30 | }
31 | else {
32 | print BASEFILE;
33 | }
34 | }
35 |
--------------------------------------------------------------------------------
/toyDataset/1K/mku.sh:
--------------------------------------------------------------------------------
1 | #!/bin/sh
2 |
3 | trap `rm -f tmp.$$; exit 1` 1 2 15
4 |
5 | for i in 1 2 3 4 5
6 | do
7 | head -`expr $i \* 20000` u.data | tail -20000 > tmp.$$
8 | sort -t" " -k 1,1n -k 2,2n tmp.$$ > u$i.test
9 | head -`expr \( $i - 1 \) \* 20000` u.data > tmp.$$
10 | tail -`expr \( 5 - $i \) \* 20000` u.data >> tmp.$$
11 | sort -t" " -k 1,1n -k 2,2n tmp.$$ > u$i.base
12 | done
13 |
14 | allbut.pl ua 1 10 100000 u.data
15 | sort -t" " -k 1,1n -k 2,2n ua.base > tmp.$$
16 | mv tmp.$$ ua.base
17 | sort -t" " -k 1,1n -k 2,2n ua.test > tmp.$$
18 | mv tmp.$$ ua.test
19 |
20 | allbut.pl ub 11 20 100000 u.data
21 | sort -t" " -k 1,1n -k 2,2n ub.base > tmp.$$
22 | mv tmp.$$ ub.base
23 | sort -t" " -k 1,1n -k 2,2n ub.test > tmp.$$
24 | mv tmp.$$ ub.test
25 |
26 |
--------------------------------------------------------------------------------
/toyDataset/1K/ml-100k.zip:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/talkingwallace/NGCF-pytorch/13515f7d09e3ef580fa500fad337683ce21f3186/toyDataset/1K/ml-100k.zip
--------------------------------------------------------------------------------
/toyDataset/1K/u.genre:
--------------------------------------------------------------------------------
1 | unknown|0
2 | Action|1
3 | Adventure|2
4 | Animation|3
5 | Children's|4
6 | Comedy|5
7 | Crime|6
8 | Documentary|7
9 | Drama|8
10 | Fantasy|9
11 | Film-Noir|10
12 | Horror|11
13 | Musical|12
14 | Mystery|13
15 | Romance|14
16 | Sci-Fi|15
17 | Thriller|16
18 | War|17
19 | Western|18
20 |
21 |
--------------------------------------------------------------------------------
/toyDataset/1K/u.info:
--------------------------------------------------------------------------------
1 | 943 users
2 | 1682 items
3 | 100000 ratings
4 |
--------------------------------------------------------------------------------
/toyDataset/1K/u.item:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/talkingwallace/NGCF-pytorch/13515f7d09e3ef580fa500fad337683ce21f3186/toyDataset/1K/u.item
--------------------------------------------------------------------------------
/toyDataset/1K/u.occupation:
--------------------------------------------------------------------------------
1 | administrator
2 | artist
3 | doctor
4 | educator
5 | engineer
6 | entertainment
7 | executive
8 | healthcare
9 | homemaker
10 | lawyer
11 | librarian
12 | marketing
13 | none
14 | other
15 | programmer
16 | retired
17 | salesman
18 | scientist
19 | student
20 | technician
21 | writer
22 |
--------------------------------------------------------------------------------
/toyDataset/1K/u.user:
--------------------------------------------------------------------------------
1 | 1|24|M|technician|85711
2 | 2|53|F|other|94043
3 | 3|23|M|writer|32067
4 | 4|24|M|technician|43537
5 | 5|33|F|other|15213
6 | 6|42|M|executive|98101
7 | 7|57|M|administrator|91344
8 | 8|36|M|administrator|05201
9 | 9|29|M|student|01002
10 | 10|53|M|lawyer|90703
11 | 11|39|F|other|30329
12 | 12|28|F|other|06405
13 | 13|47|M|educator|29206
14 | 14|45|M|scientist|55106
15 | 15|49|F|educator|97301
16 | 16|21|M|entertainment|10309
17 | 17|30|M|programmer|06355
18 | 18|35|F|other|37212
19 | 19|40|M|librarian|02138
20 | 20|42|F|homemaker|95660
21 | 21|26|M|writer|30068
22 | 22|25|M|writer|40206
23 | 23|30|F|artist|48197
24 | 24|21|F|artist|94533
25 | 25|39|M|engineer|55107
26 | 26|49|M|engineer|21044
27 | 27|40|F|librarian|30030
28 | 28|32|M|writer|55369
29 | 29|41|M|programmer|94043
30 | 30|7|M|student|55436
31 | 31|24|M|artist|10003
32 | 32|28|F|student|78741
33 | 33|23|M|student|27510
34 | 34|38|F|administrator|42141
35 | 35|20|F|homemaker|42459
36 | 36|19|F|student|93117
37 | 37|23|M|student|55105
38 | 38|28|F|other|54467
39 | 39|41|M|entertainment|01040
40 | 40|38|M|scientist|27514
41 | 41|33|M|engineer|80525
42 | 42|30|M|administrator|17870
43 | 43|29|F|librarian|20854
44 | 44|26|M|technician|46260
45 | 45|29|M|programmer|50233
46 | 46|27|F|marketing|46538
47 | 47|53|M|marketing|07102
48 | 48|45|M|administrator|12550
49 | 49|23|F|student|76111
50 | 50|21|M|writer|52245
51 | 51|28|M|educator|16509
52 | 52|18|F|student|55105
53 | 53|26|M|programmer|55414
54 | 54|22|M|executive|66315
55 | 55|37|M|programmer|01331
56 | 56|25|M|librarian|46260
57 | 57|16|M|none|84010
58 | 58|27|M|programmer|52246
59 | 59|49|M|educator|08403
60 | 60|50|M|healthcare|06472
61 | 61|36|M|engineer|30040
62 | 62|27|F|administrator|97214
63 | 63|31|M|marketing|75240
64 | 64|32|M|educator|43202
65 | 65|51|F|educator|48118
66 | 66|23|M|student|80521
67 | 67|17|M|student|60402
68 | 68|19|M|student|22904
69 | 69|24|M|engineer|55337
70 | 70|27|M|engineer|60067
71 | 71|39|M|scientist|98034
72 | 72|48|F|administrator|73034
73 | 73|24|M|student|41850
74 | 74|39|M|scientist|T8H1N
75 | 75|24|M|entertainment|08816
76 | 76|20|M|student|02215
77 | 77|30|M|technician|29379
78 | 78|26|M|administrator|61801
79 | 79|39|F|administrator|03755
80 | 80|34|F|administrator|52241
81 | 81|21|M|student|21218
82 | 82|50|M|programmer|22902
83 | 83|40|M|other|44133
84 | 84|32|M|executive|55369
85 | 85|51|M|educator|20003
86 | 86|26|M|administrator|46005
87 | 87|47|M|administrator|89503
88 | 88|49|F|librarian|11701
89 | 89|43|F|administrator|68106
90 | 90|60|M|educator|78155
91 | 91|55|M|marketing|01913
92 | 92|32|M|entertainment|80525
93 | 93|48|M|executive|23112
94 | 94|26|M|student|71457
95 | 95|31|M|administrator|10707
96 | 96|25|F|artist|75206
97 | 97|43|M|artist|98006
98 | 98|49|F|executive|90291
99 | 99|20|M|student|63129
100 | 100|36|M|executive|90254
101 | 101|15|M|student|05146
102 | 102|38|M|programmer|30220
103 | 103|26|M|student|55108
104 | 104|27|M|student|55108
105 | 105|24|M|engineer|94043
106 | 106|61|M|retired|55125
107 | 107|39|M|scientist|60466
108 | 108|44|M|educator|63130
109 | 109|29|M|other|55423
110 | 110|19|M|student|77840
111 | 111|57|M|engineer|90630
112 | 112|30|M|salesman|60613
113 | 113|47|M|executive|95032
114 | 114|27|M|programmer|75013
115 | 115|31|M|engineer|17110
116 | 116|40|M|healthcare|97232
117 | 117|20|M|student|16125
118 | 118|21|M|administrator|90210
119 | 119|32|M|programmer|67401
120 | 120|47|F|other|06260
121 | 121|54|M|librarian|99603
122 | 122|32|F|writer|22206
123 | 123|48|F|artist|20008
124 | 124|34|M|student|60615
125 | 125|30|M|lawyer|22202
126 | 126|28|F|lawyer|20015
127 | 127|33|M|none|73439
128 | 128|24|F|marketing|20009
129 | 129|36|F|marketing|07039
130 | 130|20|M|none|60115
131 | 131|59|F|administrator|15237
132 | 132|24|M|other|94612
133 | 133|53|M|engineer|78602
134 | 134|31|M|programmer|80236
135 | 135|23|M|student|38401
136 | 136|51|M|other|97365
137 | 137|50|M|educator|84408
138 | 138|46|M|doctor|53211
139 | 139|20|M|student|08904
140 | 140|30|F|student|32250
141 | 141|49|M|programmer|36117
142 | 142|13|M|other|48118
143 | 143|42|M|technician|08832
144 | 144|53|M|programmer|20910
145 | 145|31|M|entertainment|V3N4P
146 | 146|45|M|artist|83814
147 | 147|40|F|librarian|02143
148 | 148|33|M|engineer|97006
149 | 149|35|F|marketing|17325
150 | 150|20|F|artist|02139
151 | 151|38|F|administrator|48103
152 | 152|33|F|educator|68767
153 | 153|25|M|student|60641
154 | 154|25|M|student|53703
155 | 155|32|F|other|11217
156 | 156|25|M|educator|08360
157 | 157|57|M|engineer|70808
158 | 158|50|M|educator|27606
159 | 159|23|F|student|55346
160 | 160|27|M|programmer|66215
161 | 161|50|M|lawyer|55104
162 | 162|25|M|artist|15610
163 | 163|49|M|administrator|97212
164 | 164|47|M|healthcare|80123
165 | 165|20|F|other|53715
166 | 166|47|M|educator|55113
167 | 167|37|M|other|L9G2B
168 | 168|48|M|other|80127
169 | 169|52|F|other|53705
170 | 170|53|F|healthcare|30067
171 | 171|48|F|educator|78750
172 | 172|55|M|marketing|22207
173 | 173|56|M|other|22306
174 | 174|30|F|administrator|52302
175 | 175|26|F|scientist|21911
176 | 176|28|M|scientist|07030
177 | 177|20|M|programmer|19104
178 | 178|26|M|other|49512
179 | 179|15|M|entertainment|20755
180 | 180|22|F|administrator|60202
181 | 181|26|M|executive|21218
182 | 182|36|M|programmer|33884
183 | 183|33|M|scientist|27708
184 | 184|37|M|librarian|76013
185 | 185|53|F|librarian|97403
186 | 186|39|F|executive|00000
187 | 187|26|M|educator|16801
188 | 188|42|M|student|29440
189 | 189|32|M|artist|95014
190 | 190|30|M|administrator|95938
191 | 191|33|M|administrator|95161
192 | 192|42|M|educator|90840
193 | 193|29|M|student|49931
194 | 194|38|M|administrator|02154
195 | 195|42|M|scientist|93555
196 | 196|49|M|writer|55105
197 | 197|55|M|technician|75094
198 | 198|21|F|student|55414
199 | 199|30|M|writer|17604
200 | 200|40|M|programmer|93402
201 | 201|27|M|writer|E2A4H
202 | 202|41|F|educator|60201
203 | 203|25|F|student|32301
204 | 204|52|F|librarian|10960
205 | 205|47|M|lawyer|06371
206 | 206|14|F|student|53115
207 | 207|39|M|marketing|92037
208 | 208|43|M|engineer|01720
209 | 209|33|F|educator|85710
210 | 210|39|M|engineer|03060
211 | 211|66|M|salesman|32605
212 | 212|49|F|educator|61401
213 | 213|33|M|executive|55345
214 | 214|26|F|librarian|11231
215 | 215|35|M|programmer|63033
216 | 216|22|M|engineer|02215
217 | 217|22|M|other|11727
218 | 218|37|M|administrator|06513
219 | 219|32|M|programmer|43212
220 | 220|30|M|librarian|78205
221 | 221|19|M|student|20685
222 | 222|29|M|programmer|27502
223 | 223|19|F|student|47906
224 | 224|31|F|educator|43512
225 | 225|51|F|administrator|58202
226 | 226|28|M|student|92103
227 | 227|46|M|executive|60659
228 | 228|21|F|student|22003
229 | 229|29|F|librarian|22903
230 | 230|28|F|student|14476
231 | 231|48|M|librarian|01080
232 | 232|45|M|scientist|99709
233 | 233|38|M|engineer|98682
234 | 234|60|M|retired|94702
235 | 235|37|M|educator|22973
236 | 236|44|F|writer|53214
237 | 237|49|M|administrator|63146
238 | 238|42|F|administrator|44124
239 | 239|39|M|artist|95628
240 | 240|23|F|educator|20784
241 | 241|26|F|student|20001
242 | 242|33|M|educator|31404
243 | 243|33|M|educator|60201
244 | 244|28|M|technician|80525
245 | 245|22|M|student|55109
246 | 246|19|M|student|28734
247 | 247|28|M|engineer|20770
248 | 248|25|M|student|37235
249 | 249|25|M|student|84103
250 | 250|29|M|executive|95110
251 | 251|28|M|doctor|85032
252 | 252|42|M|engineer|07733
253 | 253|26|F|librarian|22903
254 | 254|44|M|educator|42647
255 | 255|23|M|entertainment|07029
256 | 256|35|F|none|39042
257 | 257|17|M|student|77005
258 | 258|19|F|student|77801
259 | 259|21|M|student|48823
260 | 260|40|F|artist|89801
261 | 261|28|M|administrator|85202
262 | 262|19|F|student|78264
263 | 263|41|M|programmer|55346
264 | 264|36|F|writer|90064
265 | 265|26|M|executive|84601
266 | 266|62|F|administrator|78756
267 | 267|23|M|engineer|83716
268 | 268|24|M|engineer|19422
269 | 269|31|F|librarian|43201
270 | 270|18|F|student|63119
271 | 271|51|M|engineer|22932
272 | 272|33|M|scientist|53706
273 | 273|50|F|other|10016
274 | 274|20|F|student|55414
275 | 275|38|M|engineer|92064
276 | 276|21|M|student|95064
277 | 277|35|F|administrator|55406
278 | 278|37|F|librarian|30033
279 | 279|33|M|programmer|85251
280 | 280|30|F|librarian|22903
281 | 281|15|F|student|06059
282 | 282|22|M|administrator|20057
283 | 283|28|M|programmer|55305
284 | 284|40|M|executive|92629
285 | 285|25|M|programmer|53713
286 | 286|27|M|student|15217
287 | 287|21|M|salesman|31211
288 | 288|34|M|marketing|23226
289 | 289|11|M|none|94619
290 | 290|40|M|engineer|93550
291 | 291|19|M|student|44106
292 | 292|35|F|programmer|94703
293 | 293|24|M|writer|60804
294 | 294|34|M|technician|92110
295 | 295|31|M|educator|50325
296 | 296|43|F|administrator|16803
297 | 297|29|F|educator|98103
298 | 298|44|M|executive|01581
299 | 299|29|M|doctor|63108
300 | 300|26|F|programmer|55106
301 | 301|24|M|student|55439
302 | 302|42|M|educator|77904
303 | 303|19|M|student|14853
304 | 304|22|F|student|71701
305 | 305|23|M|programmer|94086
306 | 306|45|M|other|73132
307 | 307|25|M|student|55454
308 | 308|60|M|retired|95076
309 | 309|40|M|scientist|70802
310 | 310|37|M|educator|91711
311 | 311|32|M|technician|73071
312 | 312|48|M|other|02110
313 | 313|41|M|marketing|60035
314 | 314|20|F|student|08043
315 | 315|31|M|educator|18301
316 | 316|43|F|other|77009
317 | 317|22|M|administrator|13210
318 | 318|65|M|retired|06518
319 | 319|38|M|programmer|22030
320 | 320|19|M|student|24060
321 | 321|49|F|educator|55413
322 | 322|20|M|student|50613
323 | 323|21|M|student|19149
324 | 324|21|F|student|02176
325 | 325|48|M|technician|02139
326 | 326|41|M|administrator|15235
327 | 327|22|M|student|11101
328 | 328|51|M|administrator|06779
329 | 329|48|M|educator|01720
330 | 330|35|F|educator|33884
331 | 331|33|M|entertainment|91344
332 | 332|20|M|student|40504
333 | 333|47|M|other|V0R2M
334 | 334|32|M|librarian|30002
335 | 335|45|M|executive|33775
336 | 336|23|M|salesman|42101
337 | 337|37|M|scientist|10522
338 | 338|39|F|librarian|59717
339 | 339|35|M|lawyer|37901
340 | 340|46|M|engineer|80123
341 | 341|17|F|student|44405
342 | 342|25|F|other|98006
343 | 343|43|M|engineer|30093
344 | 344|30|F|librarian|94117
345 | 345|28|F|librarian|94143
346 | 346|34|M|other|76059
347 | 347|18|M|student|90210
348 | 348|24|F|student|45660
349 | 349|68|M|retired|61455
350 | 350|32|M|student|97301
351 | 351|61|M|educator|49938
352 | 352|37|F|programmer|55105
353 | 353|25|M|scientist|28480
354 | 354|29|F|librarian|48197
355 | 355|25|M|student|60135
356 | 356|32|F|homemaker|92688
357 | 357|26|M|executive|98133
358 | 358|40|M|educator|10022
359 | 359|22|M|student|61801
360 | 360|51|M|other|98027
361 | 361|22|M|student|44074
362 | 362|35|F|homemaker|85233
363 | 363|20|M|student|87501
364 | 364|63|M|engineer|01810
365 | 365|29|M|lawyer|20009
366 | 366|20|F|student|50670
367 | 367|17|M|student|37411
368 | 368|18|M|student|92113
369 | 369|24|M|student|91335
370 | 370|52|M|writer|08534
371 | 371|36|M|engineer|99206
372 | 372|25|F|student|66046
373 | 373|24|F|other|55116
374 | 374|36|M|executive|78746
375 | 375|17|M|entertainment|37777
376 | 376|28|F|other|10010
377 | 377|22|M|student|18015
378 | 378|35|M|student|02859
379 | 379|44|M|programmer|98117
380 | 380|32|M|engineer|55117
381 | 381|33|M|artist|94608
382 | 382|45|M|engineer|01824
383 | 383|42|M|administrator|75204
384 | 384|52|M|programmer|45218
385 | 385|36|M|writer|10003
386 | 386|36|M|salesman|43221
387 | 387|33|M|entertainment|37412
388 | 388|31|M|other|36106
389 | 389|44|F|writer|83702
390 | 390|42|F|writer|85016
391 | 391|23|M|student|84604
392 | 392|52|M|writer|59801
393 | 393|19|M|student|83686
394 | 394|25|M|administrator|96819
395 | 395|43|M|other|44092
396 | 396|57|M|engineer|94551
397 | 397|17|M|student|27514
398 | 398|40|M|other|60008
399 | 399|25|M|other|92374
400 | 400|33|F|administrator|78213
401 | 401|46|F|healthcare|84107
402 | 402|30|M|engineer|95129
403 | 403|37|M|other|06811
404 | 404|29|F|programmer|55108
405 | 405|22|F|healthcare|10019
406 | 406|52|M|educator|93109
407 | 407|29|M|engineer|03261
408 | 408|23|M|student|61755
409 | 409|48|M|administrator|98225
410 | 410|30|F|artist|94025
411 | 411|34|M|educator|44691
412 | 412|25|M|educator|15222
413 | 413|55|M|educator|78212
414 | 414|24|M|programmer|38115
415 | 415|39|M|educator|85711
416 | 416|20|F|student|92626
417 | 417|27|F|other|48103
418 | 418|55|F|none|21206
419 | 419|37|M|lawyer|43215
420 | 420|53|M|educator|02140
421 | 421|38|F|programmer|55105
422 | 422|26|M|entertainment|94533
423 | 423|64|M|other|91606
424 | 424|36|F|marketing|55422
425 | 425|19|M|student|58644
426 | 426|55|M|educator|01602
427 | 427|51|M|doctor|85258
428 | 428|28|M|student|55414
429 | 429|27|M|student|29205
430 | 430|38|M|scientist|98199
431 | 431|24|M|marketing|92629
432 | 432|22|M|entertainment|50311
433 | 433|27|M|artist|11211
434 | 434|16|F|student|49705
435 | 435|24|M|engineer|60007
436 | 436|30|F|administrator|17345
437 | 437|27|F|other|20009
438 | 438|51|F|administrator|43204
439 | 439|23|F|administrator|20817
440 | 440|30|M|other|48076
441 | 441|50|M|technician|55013
442 | 442|22|M|student|85282
443 | 443|35|M|salesman|33308
444 | 444|51|F|lawyer|53202
445 | 445|21|M|writer|92653
446 | 446|57|M|educator|60201
447 | 447|30|M|administrator|55113
448 | 448|23|M|entertainment|10021
449 | 449|23|M|librarian|55021
450 | 450|35|F|educator|11758
451 | 451|16|M|student|48446
452 | 452|35|M|administrator|28018
453 | 453|18|M|student|06333
454 | 454|57|M|other|97330
455 | 455|48|M|administrator|83709
456 | 456|24|M|technician|31820
457 | 457|33|F|salesman|30011
458 | 458|47|M|technician|Y1A6B
459 | 459|22|M|student|29201
460 | 460|44|F|other|60630
461 | 461|15|M|student|98102
462 | 462|19|F|student|02918
463 | 463|48|F|healthcare|75218
464 | 464|60|M|writer|94583
465 | 465|32|M|other|05001
466 | 466|22|M|student|90804
467 | 467|29|M|engineer|91201
468 | 468|28|M|engineer|02341
469 | 469|60|M|educator|78628
470 | 470|24|M|programmer|10021
471 | 471|10|M|student|77459
472 | 472|24|M|student|87544
473 | 473|29|M|student|94708
474 | 474|51|M|executive|93711
475 | 475|30|M|programmer|75230
476 | 476|28|M|student|60440
477 | 477|23|F|student|02125
478 | 478|29|M|other|10019
479 | 479|30|M|educator|55409
480 | 480|57|M|retired|98257
481 | 481|73|M|retired|37771
482 | 482|18|F|student|40256
483 | 483|29|M|scientist|43212
484 | 484|27|M|student|21208
485 | 485|44|F|educator|95821
486 | 486|39|M|educator|93101
487 | 487|22|M|engineer|92121
488 | 488|48|M|technician|21012
489 | 489|55|M|other|45218
490 | 490|29|F|artist|V5A2B
491 | 491|43|F|writer|53711
492 | 492|57|M|educator|94618
493 | 493|22|M|engineer|60090
494 | 494|38|F|administrator|49428
495 | 495|29|M|engineer|03052
496 | 496|21|F|student|55414
497 | 497|20|M|student|50112
498 | 498|26|M|writer|55408
499 | 499|42|M|programmer|75006
500 | 500|28|M|administrator|94305
501 | 501|22|M|student|10025
502 | 502|22|M|student|23092
503 | 503|50|F|writer|27514
504 | 504|40|F|writer|92115
505 | 505|27|F|other|20657
506 | 506|46|M|programmer|03869
507 | 507|18|F|writer|28450
508 | 508|27|M|marketing|19382
509 | 509|23|M|administrator|10011
510 | 510|34|M|other|98038
511 | 511|22|M|student|21250
512 | 512|29|M|other|20090
513 | 513|43|M|administrator|26241
514 | 514|27|M|programmer|20707
515 | 515|53|M|marketing|49508
516 | 516|53|F|librarian|10021
517 | 517|24|M|student|55454
518 | 518|49|F|writer|99709
519 | 519|22|M|other|55320
520 | 520|62|M|healthcare|12603
521 | 521|19|M|student|02146
522 | 522|36|M|engineer|55443
523 | 523|50|F|administrator|04102
524 | 524|56|M|educator|02159
525 | 525|27|F|administrator|19711
526 | 526|30|M|marketing|97124
527 | 527|33|M|librarian|12180
528 | 528|18|M|student|55104
529 | 529|47|F|administrator|44224
530 | 530|29|M|engineer|94040
531 | 531|30|F|salesman|97408
532 | 532|20|M|student|92705
533 | 533|43|M|librarian|02324
534 | 534|20|M|student|05464
535 | 535|45|F|educator|80302
536 | 536|38|M|engineer|30078
537 | 537|36|M|engineer|22902
538 | 538|31|M|scientist|21010
539 | 539|53|F|administrator|80303
540 | 540|28|M|engineer|91201
541 | 541|19|F|student|84302
542 | 542|21|M|student|60515
543 | 543|33|M|scientist|95123
544 | 544|44|F|other|29464
545 | 545|27|M|technician|08052
546 | 546|36|M|executive|22911
547 | 547|50|M|educator|14534
548 | 548|51|M|writer|95468
549 | 549|42|M|scientist|45680
550 | 550|16|F|student|95453
551 | 551|25|M|programmer|55414
552 | 552|45|M|other|68147
553 | 553|58|M|educator|62901
554 | 554|32|M|scientist|62901
555 | 555|29|F|educator|23227
556 | 556|35|F|educator|30606
557 | 557|30|F|writer|11217
558 | 558|56|F|writer|63132
559 | 559|69|M|executive|10022
560 | 560|32|M|student|10003
561 | 561|23|M|engineer|60005
562 | 562|54|F|administrator|20879
563 | 563|39|F|librarian|32707
564 | 564|65|M|retired|94591
565 | 565|40|M|student|55422
566 | 566|20|M|student|14627
567 | 567|24|M|entertainment|10003
568 | 568|39|M|educator|01915
569 | 569|34|M|educator|91903
570 | 570|26|M|educator|14627
571 | 571|34|M|artist|01945
572 | 572|51|M|educator|20003
573 | 573|68|M|retired|48911
574 | 574|56|M|educator|53188
575 | 575|33|M|marketing|46032
576 | 576|48|M|executive|98281
577 | 577|36|F|student|77845
578 | 578|31|M|administrator|M7A1A
579 | 579|32|M|educator|48103
580 | 580|16|M|student|17961
581 | 581|37|M|other|94131
582 | 582|17|M|student|93003
583 | 583|44|M|engineer|29631
584 | 584|25|M|student|27511
585 | 585|69|M|librarian|98501
586 | 586|20|M|student|79508
587 | 587|26|M|other|14216
588 | 588|18|F|student|93063
589 | 589|21|M|lawyer|90034
590 | 590|50|M|educator|82435
591 | 591|57|F|librarian|92093
592 | 592|18|M|student|97520
593 | 593|31|F|educator|68767
594 | 594|46|M|educator|M4J2K
595 | 595|25|M|programmer|31909
596 | 596|20|M|artist|77073
597 | 597|23|M|other|84116
598 | 598|40|F|marketing|43085
599 | 599|22|F|student|R3T5K
600 | 600|34|M|programmer|02320
601 | 601|19|F|artist|99687
602 | 602|47|F|other|34656
603 | 603|21|M|programmer|47905
604 | 604|39|M|educator|11787
605 | 605|33|M|engineer|33716
606 | 606|28|M|programmer|63044
607 | 607|49|F|healthcare|02154
608 | 608|22|M|other|10003
609 | 609|13|F|student|55106
610 | 610|22|M|student|21227
611 | 611|46|M|librarian|77008
612 | 612|36|M|educator|79070
613 | 613|37|F|marketing|29678
614 | 614|54|M|educator|80227
615 | 615|38|M|educator|27705
616 | 616|55|M|scientist|50613
617 | 617|27|F|writer|11201
618 | 618|15|F|student|44212
619 | 619|17|M|student|44134
620 | 620|18|F|writer|81648
621 | 621|17|M|student|60402
622 | 622|25|M|programmer|14850
623 | 623|50|F|educator|60187
624 | 624|19|M|student|30067
625 | 625|27|M|programmer|20723
626 | 626|23|M|scientist|19807
627 | 627|24|M|engineer|08034
628 | 628|13|M|none|94306
629 | 629|46|F|other|44224
630 | 630|26|F|healthcare|55408
631 | 631|18|F|student|38866
632 | 632|18|M|student|55454
633 | 633|35|M|programmer|55414
634 | 634|39|M|engineer|T8H1N
635 | 635|22|M|other|23237
636 | 636|47|M|educator|48043
637 | 637|30|M|other|74101
638 | 638|45|M|engineer|01940
639 | 639|42|F|librarian|12065
640 | 640|20|M|student|61801
641 | 641|24|M|student|60626
642 | 642|18|F|student|95521
643 | 643|39|M|scientist|55122
644 | 644|51|M|retired|63645
645 | 645|27|M|programmer|53211
646 | 646|17|F|student|51250
647 | 647|40|M|educator|45810
648 | 648|43|M|engineer|91351
649 | 649|20|M|student|39762
650 | 650|42|M|engineer|83814
651 | 651|65|M|retired|02903
652 | 652|35|M|other|22911
653 | 653|31|M|executive|55105
654 | 654|27|F|student|78739
655 | 655|50|F|healthcare|60657
656 | 656|48|M|educator|10314
657 | 657|26|F|none|78704
658 | 658|33|M|programmer|92626
659 | 659|31|M|educator|54248
660 | 660|26|M|student|77380
661 | 661|28|M|programmer|98121
662 | 662|55|M|librarian|19102
663 | 663|26|M|other|19341
664 | 664|30|M|engineer|94115
665 | 665|25|M|administrator|55412
666 | 666|44|M|administrator|61820
667 | 667|35|M|librarian|01970
668 | 668|29|F|writer|10016
669 | 669|37|M|other|20009
670 | 670|30|M|technician|21114
671 | 671|21|M|programmer|91919
672 | 672|54|F|administrator|90095
673 | 673|51|M|educator|22906
674 | 674|13|F|student|55337
675 | 675|34|M|other|28814
676 | 676|30|M|programmer|32712
677 | 677|20|M|other|99835
678 | 678|50|M|educator|61462
679 | 679|20|F|student|54302
680 | 680|33|M|lawyer|90405
681 | 681|44|F|marketing|97208
682 | 682|23|M|programmer|55128
683 | 683|42|M|librarian|23509
684 | 684|28|M|student|55414
685 | 685|32|F|librarian|55409
686 | 686|32|M|educator|26506
687 | 687|31|F|healthcare|27713
688 | 688|37|F|administrator|60476
689 | 689|25|M|other|45439
690 | 690|35|M|salesman|63304
691 | 691|34|M|educator|60089
692 | 692|34|M|engineer|18053
693 | 693|43|F|healthcare|85210
694 | 694|60|M|programmer|06365
695 | 695|26|M|writer|38115
696 | 696|55|M|other|94920
697 | 697|25|M|other|77042
698 | 698|28|F|programmer|06906
699 | 699|44|M|other|96754
700 | 700|17|M|student|76309
701 | 701|51|F|librarian|56321
702 | 702|37|M|other|89104
703 | 703|26|M|educator|49512
704 | 704|51|F|librarian|91105
705 | 705|21|F|student|54494
706 | 706|23|M|student|55454
707 | 707|56|F|librarian|19146
708 | 708|26|F|homemaker|96349
709 | 709|21|M|other|N4T1A
710 | 710|19|M|student|92020
711 | 711|22|F|student|15203
712 | 712|22|F|student|54901
713 | 713|42|F|other|07204
714 | 714|26|M|engineer|55343
715 | 715|21|M|technician|91206
716 | 716|36|F|administrator|44265
717 | 717|24|M|technician|84105
718 | 718|42|M|technician|64118
719 | 719|37|F|other|V0R2H
720 | 720|49|F|administrator|16506
721 | 721|24|F|entertainment|11238
722 | 722|50|F|homemaker|17331
723 | 723|26|M|executive|94403
724 | 724|31|M|executive|40243
725 | 725|21|M|student|91711
726 | 726|25|F|administrator|80538
727 | 727|25|M|student|78741
728 | 728|58|M|executive|94306
729 | 729|19|M|student|56567
730 | 730|31|F|scientist|32114
731 | 731|41|F|educator|70403
732 | 732|28|F|other|98405
733 | 733|44|F|other|60630
734 | 734|25|F|other|63108
735 | 735|29|F|healthcare|85719
736 | 736|48|F|writer|94618
737 | 737|30|M|programmer|98072
738 | 738|35|M|technician|95403
739 | 739|35|M|technician|73162
740 | 740|25|F|educator|22206
741 | 741|25|M|writer|63108
742 | 742|35|M|student|29210
743 | 743|31|M|programmer|92660
744 | 744|35|M|marketing|47024
745 | 745|42|M|writer|55113
746 | 746|25|M|engineer|19047
747 | 747|19|M|other|93612
748 | 748|28|M|administrator|94720
749 | 749|33|M|other|80919
750 | 750|28|M|administrator|32303
751 | 751|24|F|other|90034
752 | 752|60|M|retired|21201
753 | 753|56|M|salesman|91206
754 | 754|59|F|librarian|62901
755 | 755|44|F|educator|97007
756 | 756|30|F|none|90247
757 | 757|26|M|student|55104
758 | 758|27|M|student|53706
759 | 759|20|F|student|68503
760 | 760|35|F|other|14211
761 | 761|17|M|student|97302
762 | 762|32|M|administrator|95050
763 | 763|27|M|scientist|02113
764 | 764|27|F|educator|62903
765 | 765|31|M|student|33066
766 | 766|42|M|other|10960
767 | 767|70|M|engineer|00000
768 | 768|29|M|administrator|12866
769 | 769|39|M|executive|06927
770 | 770|28|M|student|14216
771 | 771|26|M|student|15232
772 | 772|50|M|writer|27105
773 | 773|20|M|student|55414
774 | 774|30|M|student|80027
775 | 775|46|M|executive|90036
776 | 776|30|M|librarian|51157
777 | 777|63|M|programmer|01810
778 | 778|34|M|student|01960
779 | 779|31|M|student|K7L5J
780 | 780|49|M|programmer|94560
781 | 781|20|M|student|48825
782 | 782|21|F|artist|33205
783 | 783|30|M|marketing|77081
784 | 784|47|M|administrator|91040
785 | 785|32|M|engineer|23322
786 | 786|36|F|engineer|01754
787 | 787|18|F|student|98620
788 | 788|51|M|administrator|05779
789 | 789|29|M|other|55420
790 | 790|27|M|technician|80913
791 | 791|31|M|educator|20064
792 | 792|40|M|programmer|12205
793 | 793|22|M|student|85281
794 | 794|32|M|educator|57197
795 | 795|30|M|programmer|08610
796 | 796|32|F|writer|33755
797 | 797|44|F|other|62522
798 | 798|40|F|writer|64131
799 | 799|49|F|administrator|19716
800 | 800|25|M|programmer|55337
801 | 801|22|M|writer|92154
802 | 802|35|M|administrator|34105
803 | 803|70|M|administrator|78212
804 | 804|39|M|educator|61820
805 | 805|27|F|other|20009
806 | 806|27|M|marketing|11217
807 | 807|41|F|healthcare|93555
808 | 808|45|M|salesman|90016
809 | 809|50|F|marketing|30803
810 | 810|55|F|other|80526
811 | 811|40|F|educator|73013
812 | 812|22|M|technician|76234
813 | 813|14|F|student|02136
814 | 814|30|M|other|12345
815 | 815|32|M|other|28806
816 | 816|34|M|other|20755
817 | 817|19|M|student|60152
818 | 818|28|M|librarian|27514
819 | 819|59|M|administrator|40205
820 | 820|22|M|student|37725
821 | 821|37|M|engineer|77845
822 | 822|29|F|librarian|53144
823 | 823|27|M|artist|50322
824 | 824|31|M|other|15017
825 | 825|44|M|engineer|05452
826 | 826|28|M|artist|77048
827 | 827|23|F|engineer|80228
828 | 828|28|M|librarian|85282
829 | 829|48|M|writer|80209
830 | 830|46|M|programmer|53066
831 | 831|21|M|other|33765
832 | 832|24|M|technician|77042
833 | 833|34|M|writer|90019
834 | 834|26|M|other|64153
835 | 835|44|F|executive|11577
836 | 836|44|M|artist|10018
837 | 837|36|F|artist|55409
838 | 838|23|M|student|01375
839 | 839|38|F|entertainment|90814
840 | 840|39|M|artist|55406
841 | 841|45|M|doctor|47401
842 | 842|40|M|writer|93055
843 | 843|35|M|librarian|44212
844 | 844|22|M|engineer|95662
845 | 845|64|M|doctor|97405
846 | 846|27|M|lawyer|47130
847 | 847|29|M|student|55417
848 | 848|46|M|engineer|02146
849 | 849|15|F|student|25652
850 | 850|34|M|technician|78390
851 | 851|18|M|other|29646
852 | 852|46|M|administrator|94086
853 | 853|49|M|writer|40515
854 | 854|29|F|student|55408
855 | 855|53|M|librarian|04988
856 | 856|43|F|marketing|97215
857 | 857|35|F|administrator|V1G4L
858 | 858|63|M|educator|09645
859 | 859|18|F|other|06492
860 | 860|70|F|retired|48322
861 | 861|38|F|student|14085
862 | 862|25|M|executive|13820
863 | 863|17|M|student|60089
864 | 864|27|M|programmer|63021
865 | 865|25|M|artist|11231
866 | 866|45|M|other|60302
867 | 867|24|M|scientist|92507
868 | 868|21|M|programmer|55303
869 | 869|30|M|student|10025
870 | 870|22|M|student|65203
871 | 871|31|M|executive|44648
872 | 872|19|F|student|74078
873 | 873|48|F|administrator|33763
874 | 874|36|M|scientist|37076
875 | 875|24|F|student|35802
876 | 876|41|M|other|20902
877 | 877|30|M|other|77504
878 | 878|50|F|educator|98027
879 | 879|33|F|administrator|55337
880 | 880|13|M|student|83702
881 | 881|39|M|marketing|43017
882 | 882|35|M|engineer|40503
883 | 883|49|M|librarian|50266
884 | 884|44|M|engineer|55337
885 | 885|30|F|other|95316
886 | 886|20|M|student|61820
887 | 887|14|F|student|27249
888 | 888|41|M|scientist|17036
889 | 889|24|M|technician|78704
890 | 890|32|M|student|97301
891 | 891|51|F|administrator|03062
892 | 892|36|M|other|45243
893 | 893|25|M|student|95823
894 | 894|47|M|educator|74075
895 | 895|31|F|librarian|32301
896 | 896|28|M|writer|91505
897 | 897|30|M|other|33484
898 | 898|23|M|homemaker|61755
899 | 899|32|M|other|55116
900 | 900|60|M|retired|18505
901 | 901|38|M|executive|L1V3W
902 | 902|45|F|artist|97203
903 | 903|28|M|educator|20850
904 | 904|17|F|student|61073
905 | 905|27|M|other|30350
906 | 906|45|M|librarian|70124
907 | 907|25|F|other|80526
908 | 908|44|F|librarian|68504
909 | 909|50|F|educator|53171
910 | 910|28|M|healthcare|29301
911 | 911|37|F|writer|53210
912 | 912|51|M|other|06512
913 | 913|27|M|student|76201
914 | 914|44|F|other|08105
915 | 915|50|M|entertainment|60614
916 | 916|27|M|engineer|N2L5N
917 | 917|22|F|student|20006
918 | 918|40|M|scientist|70116
919 | 919|25|M|other|14216
920 | 920|30|F|artist|90008
921 | 921|20|F|student|98801
922 | 922|29|F|administrator|21114
923 | 923|21|M|student|E2E3R
924 | 924|29|M|other|11753
925 | 925|18|F|salesman|49036
926 | 926|49|M|entertainment|01701
927 | 927|23|M|programmer|55428
928 | 928|21|M|student|55408
929 | 929|44|M|scientist|53711
930 | 930|28|F|scientist|07310
931 | 931|60|M|educator|33556
932 | 932|58|M|educator|06437
933 | 933|28|M|student|48105
934 | 934|61|M|engineer|22902
935 | 935|42|M|doctor|66221
936 | 936|24|M|other|32789
937 | 937|48|M|educator|98072
938 | 938|38|F|technician|55038
939 | 939|26|F|student|33319
940 | 940|32|M|administrator|02215
941 | 941|20|M|student|97229
942 | 942|48|F|librarian|78209
943 | 943|22|M|student|77841
944 |
--------------------------------------------------------------------------------
/toyDataset/__init__.py:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/talkingwallace/NGCF-pytorch/13515f7d09e3ef580fa500fad337683ce21f3186/toyDataset/__init__.py
--------------------------------------------------------------------------------
/toyDataset/__pycache__/__init__.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/talkingwallace/NGCF-pytorch/13515f7d09e3ef580fa500fad337683ce21f3186/toyDataset/__pycache__/__init__.cpython-37.pyc
--------------------------------------------------------------------------------
/toyDataset/__pycache__/loaddata.cpython-37.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/talkingwallace/NGCF-pytorch/13515f7d09e3ef580fa500fad337683ce21f3186/toyDataset/__pycache__/loaddata.cpython-37.pyc
--------------------------------------------------------------------------------
/toyDataset/loaddata.py:
--------------------------------------------------------------------------------
1 | import pandas as pd
2 | from os import path
3 |
4 | # load 100k data
5 |
6 | path100k = path.dirname(__file__) + r'\1K'
7 |
8 | def load100KRatings():
9 | df = pd.read_table(path100k+r'\u.data',sep='\t',names=['userId','itemId','rating','timestamp'])
10 | return df
11 |
12 | def load100KItemSide():
13 | import codecs
14 | with codecs.open(path100k+'/u.item', 'r', 'utf-8', errors='ignore') as f:
15 | movies = pd.read_table(f, delimiter='|', header=None,names="itemId| movie title | release date | video release date | IMDb URL | unknown | Action | Adventure | Animation | Children's | Comedy | Crime | Documentary | Drama | Fantasy | Film-Noir | Horror | Musical | Mystery | Romance | Sci-Fi | Thriller | War | Western ".split('|'))
16 | return movies
17 |
18 | def load100kUserSide():
19 | import codecs
20 | with codecs.open(path100k + '/u.user', 'r', 'utf-8', errors='ignore') as f:
21 | users = pd.read_table(f, delimiter='|', header=None,names="userId| age | gender | occupation | zip code".split('|'))
22 | return users
--------------------------------------------------------------------------------