├── Data
    ├── MGTAB
    │   └── MGTAB.txt
    ├── Cresci15
    │   └── Cresci15.txt
    └── Twibot20
    │   └── Twibot20.txt
├── config
    ├── MGTAB.ini
    ├── Cresci15.ini
    └── Twibot20.ini
├── config.py
├── LICENSE
├── README.md
├── utils.py
├── Dataset.py
├── GNN.py
├── RFGNNmodels.py
├── RF-GNN.py
└── models.py


/Data/MGTAB/MGTAB.txt:
--------------------------------------------------------------------------------
1 | https://drive.google.com/uc?export=download&id=1XfLYIz4M3KPnVpsEUwRMddSs548y29a5
2 | 


--------------------------------------------------------------------------------
/Data/Cresci15/Cresci15.txt:
--------------------------------------------------------------------------------
1 | https://drive.google.com/uc?export=download&id=13J-UkHZ6tuZedOI0RUgEoHiMIJRGAdNC
2 | 


--------------------------------------------------------------------------------
/Data/Twibot20/Twibot20.txt:
--------------------------------------------------------------------------------
1 |  https://drive.google.com/uc?export=download&id=1VtpWZzzRyze_5xIy2f1T6jV5lzyj1Oc9
2 | 


--------------------------------------------------------------------------------
/config/MGTAB.ini:
--------------------------------------------------------------------------------
 1 | [Model_Setup]
 2 | model_num = 10
 3 | relation_select = [0,1]
 4 | random_seed = [0,1,2,3,4]
 5 | epochs = 200
 6 | lr = 1e-3
 7 | weight_decay = 5e-4
 8 | hidden_dimension = 128
 9 | dropout = 0.3
10 | alpha = 0.6
11 | beta = 0.9
12 | gamma = 0.8
13 | 


--------------------------------------------------------------------------------
/config/Cresci15.ini:
--------------------------------------------------------------------------------
 1 | [Model_Setup]
 2 | model_num = 10
 3 | relation_select = [0,1]
 4 | random_seed = [0,1,2,3,4]
 5 | epochs = 200
 6 | lr = 1e-3
 7 | weight_decay = 5e-4
 8 | hidden_dimension = 128
 9 | dropout = 0.3
10 | alpha = 0.95
11 | beta = 0.95
12 | gamma = 0.95
13 | 


--------------------------------------------------------------------------------
/config/Twibot20.ini:
--------------------------------------------------------------------------------
 1 | [Model_Setup]
 2 | model_num = 10
 3 | relation_select = [0,1]
 4 | random_seed = [0,1,2,3,4]
 5 | epochs = 200
 6 | lr = 1e-3
 7 | weight_decay = 5e-4
 8 | hidden_dimension = 128
 9 | dropout = 0.3
10 | alpha = 0.8
11 | beta = 0.8
12 | gamma = 0.9
13 | 


--------------------------------------------------------------------------------
/config.py:
--------------------------------------------------------------------------------
 1 | import configparser
 2 | 
 3 | class Config(object):
 4 |     def __init__(self, config_file):
 5 |         conf = configparser.ConfigParser()
 6 |         try:
 7 |             conf.read(config_file)
 8 |         except:
 9 |             print("loading config: %s failed" % (config_file))
10 |         
11 |         #Hyper-parameter
12 |         self.model_num = conf.getint("Model_Setup", "model_num")
13 |         self.relation_select = conf.get("Model_Setup", "relation_select")
14 |         self.random_seed = conf.get("Model_Setup", "random_seed")
15 |         self.epochs = conf.getint("Model_Setup", "epochs")
16 |         self.lr = conf.getfloat("Model_Setup", "lr")
17 |         self.weight_decay = conf.getfloat("Model_Setup", "weight_decay")
18 |         self.hidden_dimension = conf.getint("Model_Setup", "hidden_dimension")
19 |         self.dropout = conf.getfloat("Model_Setup", "dropout")
20 |         self.alpha = conf.getfloat("Model_Setup", "alpha")
21 |         self.beta = conf.getfloat("Model_Setup", "beta")
22 |         self.gamma = conf.getfloat("Model_Setup", "gamma")
23 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2023 GraphDetec
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # RF-GNN
 2 | RF-GNN: Random Forest Boosted Graph Neural Network for Social Bot Detection
 3 | 
 4 | # Environment Settings 
 5 | * python == 3.7   
 6 | * torch == 1.8.1+cu102	  
 7 | * numpy == 1.21.6  
 8 | * scipy == 1.7.2  
 9 | * pandas == 1.3.5	
10 | * scikit-learn == 1.0.2	 
11 | * torch-cluster == 1.5.9	
12 | * torch-geometric == 2.0.4	
13 | * torch-scatter == 2.0.8	
14 | * torch-sparse ==	0.6.12	
15 | * torch-spline-conv	== 1.2.1	
16 | 
17 | 
18 | # Usage 
19 | 
20 | ### RF-GNN
21 | 
22 | * **dataset**: including \[MGTAB, Twibot20, Cresci15\].  
23 | * **model**: including \['GCN', 'GAT', 'SAGE', 'RGCN', 'SGC'\].  
24 | * **labelrate**: parameter for labelrate. (default = 0.1)
25 | 
26 | e.g.
27 | ````
28 | #run RF-GCN on MGTAB (label rate 0.05)
29 | python RF-GNN.py -dataset MGTAB -model GCN --labelrate 0.05
30 | #run RF-GAR on Twibot-20
31 | python RF-GNN.py -dataset Twibot20 -model GAT -smote True
32 | ````
33 | 
34 | 
35 | ### RF-GNN-E and GNN
36 | 
37 | * **dataset**: including \[MGTAB, Twibot20, Cresci15\].  
38 | * **model**: including \['GCN', 'GAT', 'SAGE', 'RGCN', 'SGC'\].  
39 | * **ensemble**: including \[True, False\].  
40 | * **labelrate**: parameter for labelrate. (default = 0.1)
41 | 
42 | e.g.
43 | ````
44 | #run RF-GCN-E on MGTAB
45 | python GNN.py -dataset MGTAB -model GCN -ensemble True
46 | #run GCN on MGTAB
47 | python GNN.py -dataset Cresci15 -model GCN -ensemble False
48 | ````
49 | 
50 | 
51 | # Dataset
52 | 
53 | For TwiBot-20, please visit the [Twibot-20 github repository](https://github.com/BunsenFeng/TwiBot-20).
54 | For MGTAB please visit the [MGTAB github repository](https://github.com/GraphDetec/MGTAB).
55 | For Cresci-15 please visit the [Twibot-20 github repository](https://github.com/GraphDetec/MGTAB).
56 | 
57 | 
58 | We also offer the processed data set: [Cresci-15](https://drive.google.com/uc?export=download&id=13J-UkHZ6tuZedOI0RUgEoHiMIJRGAdNC), [MGTAB](https://drive.google.com/uc?export=download&id=1XfLYIz4M3KPnVpsEUwRMddSs548y29a5), [Twibot-20](https://drive.google.com/uc?export=download&id=1VtpWZzzRyze_5xIy2f1T6jV5lzyj1Oc9).
59 | 
60 | 


--------------------------------------------------------------------------------
/utils.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | import torch.nn as nn
  3 | import scipy.sparse as sp
  4 | import numpy as np
  5 | import pandas as pd
  6 | import random
  7 | from sklearn.neighbors import NearestNeighbors
  8 | 
  9 | 
 10 | def sample_mask(idx, l):
 11 |     """Create mask."""
 12 |     mask = torch.zeros(l)
 13 |     mask[idx] = 1
 14 |     return torch.as_tensor(mask, dtype=torch.bool)
 15 | 
 16 | def init_weights(m):
 17 |     if type(m) == nn.Linear:
 18 |         nn.init.kaiming_uniform_(m.weight)
 19 | 
 20 | 
 21 | def normalize(mx):
 22 |     """Row-normalize sparse matrix"""
 23 |     rowsum = np.array(mx.sum(1))
 24 |     r_inv = np.power(rowsum, -1).flatten()
 25 |     r_inv[np.isinf(r_inv)] = 0
 26 |     r_mat_inv = sp.diags(r_inv)
 27 |     mx = r_mat_inv.dot(mx)
 28 | 
 29 |     return mx
 30 | 
 31 | 
 32 | def accuracy(output, labels):
 33 |     preds = output.max(1)[1].type_as(labels)
 34 | 
 35 |     correct = preds.eq(labels).double()
 36 |     correct = correct.sum()
 37 |     return correct / len(labels)
 38 | 
 39 | 
 40 | def sparse_mx_to_torch_sparse_tensor(sparse_mx):
 41 |     """Convert a scipy sparse matrix to a torch sparse tensor."""
 42 |     sparse_mx = sparse_mx.tocoo().astype(np.float32)
 43 | 
 44 |     indices = torch.from_numpy(
 45 |         np.vstack((sparse_mx.row, sparse_mx.col)).astype(np.int64))
 46 | 
 47 |     values = torch.from_numpy(sparse_mx.data)
 48 |     shape = torch.Size(sparse_mx.shape)
 49 |     return torch.sparse.FloatTensor(indices, values, shape)
 50 | 
 51 | 
 52 | def mixup_data(X, Y, alpha=0.1, mul_factor=2):
 53 | 
 54 |     rs = np.random.RandomState(39)
 55 |     n = X.shape[0]
 56 | 
 57 |     mixed_X = torch.tensor(np.empty((n*(mul_factor-1), X.shape[1]))).cuda()
 58 |     mixed_Y = torch.tensor(np.empty(n*(mul_factor-1))).cuda()
 59 | 
 60 |     for i in range(mul_factor-1):
 61 | 
 62 |         # sample more than needed as some will be filtered out
 63 |         lam = np.random.beta(alpha, alpha, size=round(n*2))
 64 | 
 65 |         # original data vectors will be concatenated later
 66 |         lam = lam[(lam!=0.0) & (lam!=1.0)][:n][:, None]  # shape nx1
 67 | 
 68 |         shuffle_idx = rs.choice(np.arange(n), n, replace=False)
 69 | 
 70 |         mixed_X[i*n : (i+1)*n] = torch.tensor(lam).cuda() * X + (1 - torch.tensor(lam).cuda()) * X[shuffle_idx, :]
 71 |         mixed_Y[i*n : (i+1)*n] = torch.mul(torch.tensor(np.squeeze(lam)).cuda(), Y) + torch.mul((1 - torch.tensor(np.squeeze(lam)).cuda()), Y[shuffle_idx])
 72 | 
 73 |     # concatenate original data vectors
 74 |     # mixed_X = np.append(mixed_X, X, axis=0)
 75 |     # mixed_Y = np.append(mixed_Y, Y, axis=0)
 76 | 
 77 |     return mixed_X, mixed_Y
 78 | 
 79 | 
 80 | def get_tail_label(df):
 81 |     """
 82 |     Give tail label colums of the given target dataframe
 83 | 
 84 |     args
 85 |     df: pandas.DataFrame, target label df whose tail label has to identified
 86 | 
 87 |     return
 88 |     tail_label: list, a list containing column name of all the tail label
 89 |     """
 90 |     columns = df.columns
 91 |     n = len(columns)
 92 |     irpl = np.zeros(n)
 93 |     for column in range(n):
 94 |         irpl[column] = df[columns[column]].value_counts()[1]
 95 |     irpl = max(irpl) / irpl
 96 |     mir = np.average(irpl)
 97 |     tail_label = []
 98 |     for i in range(n):
 99 |         if irpl[i] > mir:
100 |             tail_label.append(columns[i])
101 |     return tail_label
102 | 
103 | 
104 | 
105 | def nearest_neighbour(X):
106 |     """
107 |     Give index of 5 nearest neighbor of all the instance
108 | 
109 |     args
110 |     X: np.array, array whose nearest neighbor has to find
111 | 
112 |     return
113 |     indices: list of list, index of 5 NN of each element in X
114 |     """
115 |     nbs = NearestNeighbors(n_neighbors=5, metric='euclidean', algorithm='kd_tree').fit(X)
116 |     euclidean, indices = nbs.kneighbors(X)
117 |     return indices
118 | 
119 | 
120 | def MLSMOTE(X, y, n_sample):
121 |     """
122 |     Give the augmented data using MLSMOTE algorithm
123 | 
124 |     args
125 |     X: pandas.DataFrame, input vector DataFrame
126 |     y: pandas.DataFrame, feature vector dataframe
127 |     n_sample: int, number of newly generated sample
128 | 
129 |     return
130 |     new_X: pandas.DataFrame, augmented feature vector data
131 |     target: pandas.DataFrame, augmented target vector data
132 |     """
133 |     if not isinstance(X, pd.DataFrame):
134 |         X = pd.DataFrame(X)
135 |     if not isinstance(y, pd.DataFrame):
136 |         y = pd.get_dummies(np.array(y))
137 | 
138 |     indices2 = nearest_neighbour(X)
139 |     n = len(indices2)
140 |     new_X = np.zeros((n_sample, X.shape[1]))
141 |     target = np.zeros((n_sample, y.shape[1]))
142 |     for i in range(n_sample):
143 |         reference = random.randint(0, n - 1)
144 |         neighbour = random.choice(indices2[reference, 1:])
145 |         all_point = indices2[reference]
146 |         nn_df = y[y.index.isin(all_point)]
147 |         ser = nn_df.sum(axis=0, skipna=True)
148 |         target[i] = np.array([1 if val > 2 else 0 for val in ser])
149 |         ratio = random.random()
150 |         gap = X.loc[reference, :] - X.loc[neighbour, :]
151 |         new_X[i] = np.array(X.loc[reference, :] + ratio * gap)
152 |     new_X = pd.DataFrame(new_X, columns=X.columns)
153 |     target = pd.DataFrame(target, columns=y.columns)
154 |     new_X = pd.concat([X, new_X], axis=0)
155 |     target = pd.concat([y, target], axis=0)
156 |     return new_X.values, np.argmax(target.values,axis=1)
157 | 
158 | 
159 | def balance_MLSMOTE(labeled_X, labeled_y, n_sample):
160 | 
161 |     X_list = []
162 |     y_list = []
163 |     for i in range(max(labeled_y) + 1):
164 |         X_list.append(labeled_X[labeled_y == i, :])
165 |         y_list.append(labeled_y[labeled_y == i])
166 | 
167 |     num_classes = max(labeled_y) + 1
168 |     one_hot_codes = np.eye(num_classes)
169 | 
170 |     df_y_list = []
171 |     for i in range(len(y_list)):
172 |         one_hot_labels = []
173 |         for label in y_list[i]:
174 |             one_hot_label = one_hot_codes[label]
175 |             one_hot_labels.append(one_hot_label)
176 |         df_y = pd.DataFrame(np.array(one_hot_labels))
177 |         df_y_list.append(df_y)
178 | 
179 |     if n_sample == None:
180 |         smote_num = 0
181 |         for i in range(len(y_list)):
182 |             if len(y_list[i]) > smote_num:
183 |                 smote_num = len(y_list[i])
184 |                 majority_class = i
185 |     else:
186 |         smote_num = n_sample
187 | 
188 |     for i in range(len(y_list)):
189 |         if smote_num - len(y_list[i]) > 0:
190 |             X_res, y_res = MLSMOTE(X_list[i], df_y_list[i], smote_num - len(y_list[i]))
191 |         else:
192 |             X_res, y_res = X_list[i], y_list[i]
193 |         if i == 0:
194 |             X_smo = X_res
195 |             y_smo = y_res
196 |         else:
197 |             X_smo = np.concatenate([X_smo, X_res], axis=0)
198 |             y_smo = np.concatenate([y_smo, y_res], axis=0)
199 |     return X_smo, np.squeeze(y_smo)


--------------------------------------------------------------------------------
/Dataset.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | from torch_geometric.data import InMemoryDataset
  3 | from torch_geometric.data import Data
  4 | from utils import sample_mask
  5 | 
  6 | class cresci15(InMemoryDataset):
  7 |     def __init__(self, root, transform=None, pre_transform=None):
  8 |         super().__init__(root, transform, pre_transform)
  9 |         self.data, self.slices = torch.load(self.processed_paths[0])
 10 |         self.root = root
 11 | 
 12 |     @property
 13 |     def raw_file_names(self):
 14 |         return ['some_file_1', 'some_file_2', ...]
 15 | 
 16 |     @property
 17 |     def processed_file_names(self):
 18 |         return ['data.pt']
 19 | 
 20 | 
 21 |     def sample_mask(self, idx, l):
 22 |         """Create mask."""
 23 |         mask = torch.zeros(l)
 24 |         mask[idx] = 1
 25 |         return torch.as_tensor(mask, dtype=torch.bool)
 26 | 
 27 | 
 28 |     def process(self):
 29 |         # Read data into huge `Data` list.
 30 | 
 31 |         edge_index = torch.load(self.root + "/edge_index.pt")
 32 |         edge_type = torch.load(self.root + "/edge_type.pt")
 33 |         label = torch.load(self.root + "/label.pt")
 34 |         cat_prop = torch.load(self.root + "/cat_properties_tensor.pt")
 35 |         num_prop = torch.load(self.root + "/num_properties_tensor.pt")
 36 |         des_tensor = torch.load(self.root + "/des_tensor.pt")
 37 |         tweets_tensor = torch.load(self.root + "/tweets_tensor.pt")
 38 | 
 39 |         features = torch.cat([cat_prop, num_prop, des_tensor, tweets_tensor], axis=1)
 40 |         data = Data(x=features, y =label, edge_index=edge_index)
 41 |         data.edge_type = edge_type
 42 | 
 43 | 
 44 |         sample_number = len(data.y)
 45 | 
 46 |         train_idx = torch.load(self.root + "/train_idx.pt")
 47 |         val_idx = torch.load(self.root + "/test_idx.pt")
 48 |         test_idx = torch.load(self.root + "/val_idx.pt")
 49 | 
 50 |         data.train_mask = self.sample_mask(train_idx, sample_number)
 51 |         data.val_mask = self.sample_mask(val_idx, sample_number)
 52 |         data.test_mask = self.sample_mask(test_idx, sample_number)
 53 | 
 54 |         data_list = [data]
 55 | 
 56 |         if self.pre_filter is not None:
 57 |             data_list = [data for data in data_list if self.pre_filter(data)]
 58 | 
 59 |         if self.pre_transform is not None:
 60 |             data_list = [self.pre_transform(data) for data in data_list]
 61 | 
 62 |         data, slices = self.collate(data_list)
 63 |         torch.save((data, slices), self.processed_paths[0])
 64 | 
 65 | 
 66 | class Twibot20(InMemoryDataset):
 67 |     def __init__(self, root, transform=None, pre_transform=None):
 68 |         super().__init__(root, transform, pre_transform)
 69 |         self.data, self.slices = torch.load(self.processed_paths[0])
 70 |         self.root = root
 71 | 
 72 | 
 73 |     @property
 74 |     def raw_file_names(self):
 75 |         return ['some_file_1', 'some_file_2', ...]
 76 | 
 77 | 
 78 |     @property
 79 |     def processed_file_names(self):
 80 |         return ['data.pt']
 81 | 
 82 | 
 83 | 
 84 |     def process(self):
 85 |         labels = torch.load(self.root + "/label.pt")
 86 |         des_tensor = torch.load(self.root + "/des_tensor.pt")
 87 |         tweets_tensor1 = torch.load(self.root + "/tweets_tensor_p1.pt")
 88 |         tweets_tensor2 = torch.load(self.root + "/tweets_tensor_p2.pt")
 89 |         tweets_tensor = torch.cat([tweets_tensor1, tweets_tensor2], 0)
 90 |         num_prop = torch.load(self.root + "/num_prop.pt")
 91 |         category_prop = torch.load(self.root + "/category_prop.pt")
 92 |         edge_index = torch.load(self.root + "/edge_index.pt")
 93 |         edge_type = torch.load(self.root + "/edge_type.pt")
 94 |         x = torch.cat([des_tensor, tweets_tensor, num_prop, category_prop], 1)
 95 | 
 96 | 
 97 |         m0 = edge_index[0, :] > 11826
 98 |         m1 = edge_index[1, :] > 11826
 99 |         m = m0 + m1
100 |         x = x[:11826, :]
101 | 
102 |         data = Data(x=x, y=labels, edge_index=edge_index)
103 |         data.edge_index = edge_index[:, ~m]
104 |         data.edge_type = edge_type[~m]
105 |         sample_number = len(data.x)
106 | 
107 |         train_idx = range(8278)
108 |         val_idx = range(8278, 8278 + 2365)
109 |         test_idx = range(8278 + 2365, 8278 + 2365 + 1183)
110 | 
111 |         data.train_mask = sample_mask(train_idx, sample_number)
112 |         data.val_mask = sample_mask(val_idx, sample_number)
113 |         data.test_mask = sample_mask(test_idx, sample_number)
114 | 
115 |         data_list = [data]
116 | 
117 |         if self.pre_filter is not None:
118 |             data_list = [data for data in data_list if self.pre_filter(data)]
119 | 
120 |         if self.pre_transform is not None:
121 |             data_list = [self.pre_transform(data) for data in data_list]
122 | 
123 |         data, slices = self.collate(data_list)
124 |         torch.save((data, slices), self.processed_paths[0])
125 | 
126 | 
127 | class MGTAB(InMemoryDataset):
128 |     def __init__(self, root, transform=None, pre_transform=None):
129 |         super().__init__(root, transform, pre_transform)
130 |         self.data, self.slices = torch.load(self.processed_paths[0])
131 |         self.root = root
132 | 
133 |     @property
134 |     def raw_file_names(self):
135 |         return ['some_file_1', 'some_file_2', ...]
136 | 
137 |     @property
138 |     def processed_file_names(self):
139 |         return ['data.pt']
140 | 
141 |     def sample_mask(self, idx, l):
142 |         """Create mask."""
143 |         mask = torch.zeros(l)
144 |         mask[idx] = 1
145 |         return torch.as_tensor(mask, dtype=torch.bool)
146 | 
147 | 
148 |     def process(self):
149 |         # Read data into huge `Data` list.
150 | 
151 |         edge_index = torch.load(self.root + "/edge_index.pt")
152 |         edge_index = torch.tensor(edge_index, dtype = torch.int64)
153 |         edge_type = torch.load(self.root + "/edge_type.pt")
154 |         edge_weight = torch.load(self.root + "/edge_weight.pt")
155 |         stance_label = torch.load(self.root + "/labels_stance.pt")
156 |         bot_label = torch.load(self.root + "/labels_bot.pt")
157 | 
158 |         features = torch.load(self.root + "/features.pt")
159 |         features = features.to(torch.float32)
160 | 
161 | 
162 |         data = Data(x=features, edge_index=edge_index)
163 |         data.edge_type = edge_type
164 |         data.edge_weight = edge_weight
165 |         data.y1 = stance_label
166 |         data.y2 = bot_label
167 |         sample_number = len(data.y1)
168 | 
169 |         train_idx = range(int(0.7*sample_number))
170 |         val_idx = range(int(0.7*sample_number), int(0.9*sample_number))
171 |         test_idx = range(int(0.9*sample_number), int(sample_number))
172 | 
173 |         data.train_mask = self.sample_mask(train_idx, sample_number)
174 |         data.val_mask = self.sample_mask(val_idx, sample_number)
175 |         data.test_mask = self.sample_mask(test_idx, sample_number)
176 | 
177 |         data_list = [data]
178 | 
179 |         if self.pre_filter is not None:
180 |             data_list = [data for data in data_list if self.pre_filter(data)]
181 | 
182 |         if self.pre_transform is not None:
183 |             data_list = [self.pre_transform(data) for data in data_list]
184 | 
185 |         data, slices = self.collate(data_list)
186 |         torch.save((data, slices), self.processed_paths[0])


--------------------------------------------------------------------------------
/GNN.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | os.environ["CUDA_VISIBLE_DEVICES"] = "7"
  3 | import torch
  4 | import torch.nn as nn
  5 | from models import RGCN, GAT, GCN, SAGE, SGC
  6 | from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
  7 | from Dataset import cresci15, Twibot20, MGTAB
  8 | from utils import sample_mask, init_weights
  9 | import numpy as np
 10 | import argparse
 11 | import time
 12 | import json
 13 | from config import Config
 14 | 
 15 | parser = argparse.ArgumentParser()
 16 | parser.add_argument("-dataset", type=str, default='Twibot20', help="dataset", choices=['Twibot20','MGTAB','Cresci15'])
 17 | parser.add_argument("-ensemble", type=bool, default=True, help="whether use ensemble")
 18 | parser.add_argument('-model', type=str, default='GCN', choices=['GCN', 'GAT', 'GraphSage', 'RGCN', 'SGC'])
 19 | parser.add_argument('--labelrate', type=float, default=0.1, help='labelrate')
 20 | args = parser.parse_args()
 21 | print(args)
 22 | 
 23 | 
 24 | device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
 25 | config_file = "./config/" + str(args.dataset) + ".ini"
 26 | config = Config(config_file)
 27 | 
 28 | 
 29 | if args.dataset == 'Twibot20':
 30 |     dataset = Twibot20('Data/Twibot20')
 31 | elif args.dataset == 'MGTAB':
 32 |     dataset = MGTAB('Data/MGTAB')
 33 | elif args.dataset == 'Cresci15':
 34 |     dataset = cresci15('Data/Cresci15')
 35 | 
 36 | 
 37 | data = dataset[0]
 38 | if args.dataset == 'MGTAB':
 39 |     data.y = data.y2
 40 | 
 41 | out_dim = 2
 42 | data = data.to(device)
 43 | sample_number = len(data.y)
 44 | 
 45 | index_select_list = (data.edge_type == 100)
 46 | relation_dict = {
 47 |     0:'followers',
 48 |     1:'friends'
 49 | }
 50 | 
 51 | relation_select_list = json.loads(config.relation_select)
 52 | relation_num = len(relation_select_list)
 53 | print('relation used:', end=' ')
 54 | for features_index in relation_select_list:
 55 |         index_select_list = index_select_list + (features_index == data.edge_type)
 56 |         print('{}'.format(relation_dict[features_index]), end='  ')
 57 | edge_index = data.edge_index[:, index_select_list]
 58 | edge_type = data.edge_type[index_select_list]
 59 | 
 60 | 
 61 | def main(seed):
 62 | 
 63 |     np.random.seed(seed)
 64 |     torch.manual_seed(seed)
 65 | 
 66 |     node_id = np.arange(data.num_nodes)
 67 |     np.random.shuffle(node_id)
 68 |     data.n_id = torch.arange(data.num_nodes)
 69 |     data.train_id = node_id[:int(data.num_nodes * args.labelrate)]
 70 |     data.val_id = node_id[int(data.num_nodes * 0.1):int(data.num_nodes * 0.2)]
 71 |     data.test_id = node_id[int(data.num_nodes * 0.2):]
 72 | 
 73 |     data.train_mask = sample_mask(data.train_id, sample_number)
 74 |     data.val_mask = sample_mask(data.val_id, sample_number)
 75 |     data.test_mask = sample_mask(data.test_id, sample_number)
 76 | 
 77 |     test_mask = data.test_mask
 78 |     train_mask = data.train_mask
 79 |     val_mask = data.val_mask
 80 | 
 81 |     fdim = data.x.shape[1]
 82 |     embedding_size = fdim
 83 | 
 84 |     results = torch.zeros(data.x.shape[0], out_dim).to(device)
 85 |     if args.ensemble:
 86 |         model_num = config.model_num
 87 |     else:
 88 |         model_num = 1
 89 | 
 90 | 
 91 |     for num in range(model_num):
 92 |         print('traning {}th model'.format(num + 1))
 93 |         if args.model == 'RGCN':
 94 |             model = RGCN(embedding_size, config.hidden_dimension, out_dim, relation_num, config.dropout).to(device)
 95 |         elif args.model == 'GCN':
 96 |             model = GCN(embedding_size, config.hidden_dimension, out_dim, relation_num, config.dropout).to(device)
 97 |         elif args.model == 'GAT':
 98 |             model = GAT(embedding_size, config.hidden_dimension, out_dim, relation_num, config.dropout).to(device)
 99 |         elif args.model == 'GraphSage':
100 |             model = SAGE(embedding_size, config.hidden_dimension, out_dim, relation_num, config.dropout).to(device)
101 |         elif args.model == 'SGC':
102 |             model = SGC(embedding_size, config.hidden_dimension, out_dim, relation_num, config.dropout).to(device)
103 |         loss = nn.CrossEntropyLoss()
104 |         optimizer = torch.optim.AdamW(model.parameters(),
105 |                                       lr=config.lr, weight_decay=config.weight_decay)
106 | 
107 |         model.apply(init_weights)
108 | 
109 | 
110 |         for epoch in range(config.epochs):
111 |             model.train()
112 |             output = model(data.x, edge_index, edge_type)
113 |             loss_train = loss(output[data.train_mask], data.y[data.train_mask])
114 |             out = output.max(1)[1].to('cpu').detach().numpy()
115 |             label = data.y.to('cpu').detach().numpy()
116 |             acc_train = accuracy_score(out[train_mask], label[train_mask])
117 |             acc_val = accuracy_score(out[val_mask], label[val_mask])
118 |             optimizer.zero_grad()
119 |             loss_train.backward()
120 |             optimizer.step()
121 |             if (epoch + 1)%100 == 0:
122 |                 print('Epoch: {:04d}'.format(epoch + 1),
123 |                       'loss_train: {:.4f}'.format(loss_train.item()),
124 |                       'acc_train: {:.4f}'.format(acc_train.item()),
125 |                       'acc_val: {:.4f}'.format(acc_val.item()))
126 | 
127 |         model.eval()
128 |         output = model(data.x, edge_index, edge_type)
129 |         label = data.y.to('cpu').detach().numpy()
130 |         out = output.max(1)[1].to('cpu').detach().numpy()
131 |         acc_test = accuracy_score(out[test_mask], label[test_mask])
132 |         f1 = f1_score(out[test_mask], label[test_mask], average='macro')
133 |         precision = precision_score(out[test_mask], label[test_mask], average='macro')
134 |         recall = recall_score(out[test_mask], label[test_mask], average='macro')
135 |         print('acc_test {:.4f}'.format(acc_test),
136 |               'f1_test: {:.4f}'.format(f1.item()),
137 |               'precision_test: {:.4f}'.format(precision.item()),
138 |               'recall_test: {:.4f}'.format(recall.item()))
139 |         results = results + output
140 |     results_out = results.max(1)[1].to('cpu').detach().numpy()
141 |     acc_test = accuracy_score(results_out[test_mask], label[test_mask])
142 |     f1 = f1_score(results_out[test_mask], label[test_mask], average='macro')
143 |     precision = precision_score(results_out[test_mask], label[test_mask], average='macro')
144 |     recall = recall_score(results_out[test_mask], label[test_mask], average='macro')
145 | 
146 |     return acc_test, precision, recall, f1
147 | 
148 | 
149 | 
150 | 
151 | if __name__ == "__main__":
152 | 
153 |     t = time.time()
154 |     acc_list = []
155 |     precision_list = []
156 |     recall_list = []
157 |     f1_list = []
158 | 
159 |     for i, seed in enumerate(json.loads(config.random_seed)):
160 |         print('traning {}th round'.format(i + 1))
161 |         acc, precision, recall, f1 = main(seed)
162 |         acc_list.append(acc * 100)
163 |         precision_list.append(precision * 100)
164 |         recall_list.append(recall * 100)
165 |         f1_list.append(f1 * 100)
166 |         print('Round:{:04d}'.format(i + 1),
167 |               'acc_test {:.4f}'.format(acc),
168 |               'f1_test: {:.4f}'.format(f1),
169 |               'precision_test: {:.4f}'.format(precision),
170 |               'recall_test: {:.4f}'.format(recall))
171 |     print('acc:       {:.2f} + {:.2f}'.format(np.array(acc_list).mean(), np.std(acc_list)))
172 |     print('precision: {:.2f} + {:.2f}'.format(np.array(precision_list).mean(), np.std(precision_list)))
173 |     print('recall:    {:.2f} + {:.2f}'.format(np.array(recall_list).mean(), np.std(recall_list)))
174 |     print('f1:        {:.2f} + {:.2f}'.format(np.array(f1_list).mean(), np.std(f1_list)))
175 |     print('total time:', time.time() - t)


--------------------------------------------------------------------------------
/RFGNNmodels.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | from torch import nn
  3 | from torch_geometric.nn import RGCNConv,GCNConv,GATConv,SAGEConv,SGConv
  4 | import torch.nn.functional as F
  5 | 
  6 | 
  7 | 
  8 | class RGCN(nn.Module):
  9 |     def __init__(self, embedding_dimension=16, hidden_dimension=128, out_dim=3, relation_num=2, dropout=0.3):
 10 |         super(RGCN, self).__init__()
 11 |         self.dropout = dropout
 12 |         self.out_dim = out_dim
 13 | 
 14 |         self.linear_relu_input = nn.Sequential(
 15 |             nn.Linear(embedding_dimension, hidden_dimension),
 16 |             nn.LeakyReLU()
 17 |         )
 18 |         self.linear_relu_input2 = nn.Sequential(
 19 |             nn.Linear(embedding_dimension, hidden_dimension),
 20 |             nn.LeakyReLU()
 21 |         )
 22 |         self.rgcn1 = RGCNConv(hidden_dimension, hidden_dimension, num_relations=relation_num)
 23 |         self.rgcn2 = RGCNConv(hidden_dimension, hidden_dimension, num_relations=relation_num)
 24 | 
 25 |         self.linear_relu_output1 = nn.Sequential(
 26 |             nn.Linear(hidden_dimension, hidden_dimension),
 27 |             nn.LeakyReLU()
 28 |         )
 29 |         self.linear_output = nn.Linear(hidden_dimension, out_dim)
 30 |         self.linear_output22 = nn.Linear(hidden_dimension, out_dim)
 31 | 
 32 |     def forward(self, mask_feature, feature, edge_index, edge_type):
 33 |         x = self.linear_relu_input(mask_feature.to(torch.float32))
 34 |         x = self.rgcn1(x, edge_index, edge_type)
 35 |         x = F.dropout(x, p=self.dropout, training=self.training)
 36 |         x = self.rgcn2(x, edge_index, edge_type)
 37 |         # x = self.linear_relu_output1(x)
 38 |         x = self.linear_output(x)
 39 |         mask = self.linear_relu_input(feature-mask_feature)
 40 |         mask = self.rgcn1(mask, edge_index, edge_type)
 41 |         mask = self.rgcn2(mask, edge_index, edge_type)
 42 |         mask = self.linear_output(mask)
 43 |         aplha = torch.mul(x, mask).sum(1).repeat(self.out_dim,1)
 44 |         # return torch.mul(aplha.T, mask)
 45 |         return torch.mul(x, mask)
 46 | 
 47 | 
 48 | 
 49 | class GAT(nn.Module):
 50 |     def __init__(self, embedding_dimension=16, hidden_dimension=128, out_dim=2, relation_num=2, dropout=0.3):
 51 |         super(GAT, self).__init__()
 52 |         self.dropout = dropout
 53 |         self.out_dim = out_dim
 54 | 
 55 |         self.linear_relu_input = nn.Sequential(
 56 |             nn.Linear(embedding_dimension, hidden_dimension),
 57 |             nn.LeakyReLU()
 58 |         )
 59 | 
 60 |         self.gat1 = GATConv(hidden_dimension, int(hidden_dimension / 4), heads=4)
 61 |         self.gat2 = GATConv(hidden_dimension, hidden_dimension)
 62 | 
 63 |         self.linear_relu_output1 = nn.Sequential(
 64 |             nn.Linear(hidden_dimension, hidden_dimension),
 65 |             nn.LeakyReLU()
 66 |         )
 67 |         self.linear_output2 = nn.Linear(hidden_dimension, out_dim)
 68 | 
 69 |     def forward(self, mask_feature, feature, edge_index, edge_type):
 70 |         x = self.linear_relu_input(mask_feature.to(torch.float32))
 71 |         x = self.gat1(x, edge_index)
 72 |         x = F.dropout(x, p=self.dropout, training=self.training)
 73 |         x = self.gat2(x, edge_index)
 74 |         # x = self.linear_relu_output1(x)
 75 |         x = self.linear_output2(x)
 76 |         mask = self.linear_relu_input(feature - mask_feature)
 77 |         mask = self.linear_output2(mask)
 78 |         aplha = torch.mul(x, mask).sum(1).repeat(self.out_dim, 1)
 79 |         # return torch.mul(aplha.T, mask)
 80 |         return torch.mul(x, mask)
 81 | 
 82 | 
 83 | 
 84 | class GCN(nn.Module):
 85 |     def __init__(self, embedding_dimension=16, hidden_dimension=128, out_dim=2, relation_num=2, dropout=0.3):
 86 |         super(GCN, self).__init__()
 87 |         self.dropout = dropout
 88 |         self.out_dim = out_dim
 89 | 
 90 |         self.linear_relu_input = nn.Sequential(
 91 |             nn.Linear(embedding_dimension, hidden_dimension),
 92 |             nn.LeakyReLU()
 93 |         )
 94 | 
 95 |         self.gcn1 = GCNConv(hidden_dimension, hidden_dimension)
 96 |         self.gcn2 = GCNConv(hidden_dimension, hidden_dimension)
 97 | 
 98 |         self.linear_relu_output1 = nn.Sequential(
 99 |             nn.Linear(hidden_dimension, hidden_dimension),
100 |             nn.LeakyReLU()
101 |         )
102 |         self.linear_output2 = nn.Linear(hidden_dimension, out_dim)
103 |         self.linear_relu_mask = nn.Linear(embedding_dimension, 1)
104 | 
105 |     def forward(self, mask_feature, feature, edge_index, edge_type):
106 | 
107 |         x = self.linear_relu_input(mask_feature.to(torch.float32))
108 |         x = self.gcn1(x, edge_index)
109 |         x = F.dropout(x, p=self.dropout, training=self.training)
110 |         x = self.gcn2(x, edge_index)
111 |         x = self.linear_output2(x)
112 |         mask = self.linear_relu_input(feature - mask_feature)
113 |         mask = self.linear_output2(mask)
114 |         aplha = torch.mul(x, mask).sum(1).repeat(self.out_dim,1)
115 |         # return torch.mul(aplha.T, mask)
116 |         return torch.mul(x, mask)
117 | 
118 | 
119 | 
120 | class SGC(nn.Module):
121 |     def __init__(self, embedding_dimension=16, hidden_dimension=128, out_dim=2, relation_num=2, dropout=0.3):
122 |         super(SGC, self).__init__()
123 |         self.dropout = dropout
124 |         self.out_dim = out_dim
125 | 
126 |         self.linear_relu_input = nn.Sequential(
127 |             nn.Linear(embedding_dimension, hidden_dimension),
128 |             nn.LeakyReLU()
129 |         )
130 | 
131 |         self.gcn1 = SGConv(hidden_dimension, hidden_dimension)
132 |         self.gcn2 = SGConv(hidden_dimension, hidden_dimension)
133 | 
134 |         self.linear_relu_output1 = nn.Sequential(
135 |             nn.Linear(hidden_dimension, hidden_dimension),
136 |             nn.LeakyReLU()
137 |         )
138 |         self.linear_output2 = nn.Linear(hidden_dimension, out_dim)
139 |         self.linear_relu_mask = nn.Linear(embedding_dimension, 1)
140 | 
141 |     def forward(self, mask_feature, feature, edge_index, edge_type):
142 | 
143 |         x = self.linear_relu_input(mask_feature.to(torch.float32))
144 |         x = self.gcn1(x, edge_index)
145 |         x = F.dropout(x, p=self.dropout, training=self.training)
146 |         x = self.gcn2(x, edge_index)
147 |         x = self.linear_output2(x)
148 |         mask = self.linear_relu_input(feature - mask_feature)
149 |         mask = self.linear_output2(mask)
150 |         aplha = torch.mul(x, mask).sum(1).repeat(self.out_dim,1)
151 |         # return torch.mul(aplha.T, mask)
152 |         return torch.mul(x, mask)
153 | 
154 | 
155 | 
156 | class SAGE(nn.Module):
157 |     def __init__(self, embedding_dimension=16, hidden_dimension=128, out_dim=3, relation_num=2, dropout=0.3):
158 |         super(SAGE, self).__init__()
159 |         self.dropout = dropout
160 |         self.out_dim = out_dim
161 |         self.linear_relu_input = nn.Sequential(
162 |             nn.Linear(embedding_dimension, hidden_dimension),
163 |             nn.LeakyReLU()
164 |         )
165 | 
166 |         self.sage1 = SAGEConv(hidden_dimension, hidden_dimension)
167 |         self.sage2 = SAGEConv(hidden_dimension, hidden_dimension)
168 | 
169 |         self.linear_relu_output1 = nn.Sequential(
170 |             nn.Linear(hidden_dimension, hidden_dimension),
171 |             nn.LeakyReLU()
172 |         )
173 |         self.linear_output2 = nn.Linear(hidden_dimension, out_dim)
174 | 
175 |     def forward(self, mask_feature, feature, edge_index, edge_type):
176 |         x = self.linear_relu_input(mask_feature.to(torch.float32))
177 |         x = self.sage1(x, edge_index)
178 |         x = F.dropout(x, p=self.dropout, training=self.training)
179 |         x = self.sage2(x, edge_index)
180 |         # x = self.linear_relu_output1(x)
181 |         x = self.linear_output2(x)
182 |         mask = self.linear_relu_input(feature - mask_feature)
183 |         mask = self.linear_output2(mask)
184 |         aplha = torch.mul(x, mask).sum(1).repeat(self.out_dim,1)
185 |         # return torch.mul(aplha.T, mask)
186 |         return torch.mul(x, mask)


--------------------------------------------------------------------------------
/RF-GNN.py:
--------------------------------------------------------------------------------
  1 | import os
  2 | os.environ["CUDA_VISIBLE_DEVICES"] = "5"
  3 | import torch
  4 | import torch.nn as nn
  5 | from RFGNNmodels import RGCN, GAT, GCN, SAGE, SGC
  6 | from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score
  7 | from Dataset import cresci15, Twibot20, MGTAB
  8 | from utils import sample_mask, init_weights
  9 | import numpy as np
 10 | import argparse
 11 | import time
 12 | import json
 13 | from config import Config
 14 | 
 15 | 
 16 | parser = argparse.ArgumentParser()
 17 | parser.add_argument("-dataset", type=str, help="dataset", choices=['Twibot20','MGTAB','Cresci15'])
 18 | parser.add_argument('-model', type=str, help="model", choices=['GCN', 'GAT', 'GraphSage', 'RGCN', 'SGC'])
 19 | parser.add_argument('--labelrate', type=float, default=0.1, help='labelrate')
 20 | args = parser.parse_args()
 21 | print(args)
 22 | 
 23 | device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
 24 | config_file = "./config/" + str(args.dataset) + ".ini"
 25 | config = Config(config_file)
 26 | 
 27 | 
 28 | if args.dataset == 'Twibot20':
 29 |     dataset = Twibot20('Data/Twibot20')
 30 | elif args.dataset == 'MGTAB':
 31 |     dataset = MGTAB('Data/MGTAB')
 32 | elif args.dataset == 'Cresci15':
 33 |     dataset = cresci15('Data/Cresci15')
 34 | 
 35 | 
 36 | data = dataset[0]
 37 | if args.dataset == 'MGTAB':
 38 |     data.y = data.y2
 39 | 
 40 | out_dim = 2
 41 | data = data.to(device)
 42 | sample_number = len(data.y)
 43 | 
 44 | index_select_list = (data.edge_type == 100)
 45 | relation_dict = {
 46 |     0:'followers',
 47 |     1:'friends'
 48 | }
 49 | 
 50 | relation_select_list = json.loads(config.relation_select)
 51 | relation_num = len(relation_select_list)
 52 | print('relation used:', end=' ')
 53 | for features_index in relation_select_list:
 54 |         index_select_list = index_select_list + (features_index == data.edge_type)
 55 |         print('{}'.format(relation_dict[features_index]), end='  ')
 56 | edge_index = data.edge_index[:, index_select_list]
 57 | edge_type = data.edge_type[index_select_list]
 58 | 
 59 | 
 60 | def main(seed):
 61 | 
 62 |     np.random.seed(seed)
 63 |     torch.manual_seed(seed)
 64 | 
 65 |     node_id = np.arange(data.num_nodes)
 66 |     np.random.shuffle(node_id)
 67 |     data.n_id = torch.arange(data.num_nodes)
 68 |     data.train_id = node_id[:int(data.num_nodes * args.labelrate)]
 69 |     data.val_id = node_id[int(data.num_nodes * 0.1):int(data.num_nodes * 0.2)]
 70 |     data.test_id = node_id[int(data.num_nodes * 0.2):]
 71 | 
 72 |     data.train_mask = sample_mask(data.train_id, sample_number)
 73 |     data.val_mask = sample_mask(data.val_id, sample_number)
 74 |     data.test_mask = sample_mask(data.test_id, sample_number)
 75 | 
 76 |     test_mask = data.test_mask
 77 |     train_mask = data.train_mask
 78 |     val_mask = data.val_mask
 79 | 
 80 |     fdim = data.x.shape[1]
 81 |     embedding_size = fdim
 82 |     sample_size = int(sum(data.train_mask)*config.beta)
 83 |     edge_count = int(edge_index.shape[1]*config.gamma)
 84 |     results = torch.zeros(data.x.shape[0], out_dim).to(device)
 85 |     for num in range(1, config.model_num+1):
 86 | 
 87 |         np.random.seed(seed + num)
 88 |         sub_findex = np.random.randint(0, data.x.shape[1], size = embedding_size)
 89 |         sub_fmask = sub_findex > config.alpha*data.x.shape[1]
 90 |         sub_sindex = np.random.randint(0, sum(data.train_mask), sample_size)
 91 |         sub_eindex = np.random.randint(0, edge_index.shape[1], edge_count)
 92 | 
 93 |         print('traning {}th model'.format(num))
 94 |         if args.model == 'RGCN':
 95 |             model = RGCN(embedding_size, config.hidden_dimension, out_dim, relation_num, config.dropout).to(device)
 96 |         elif args.model == 'GCN':
 97 |             model = GCN(embedding_size, config.hidden_dimension, out_dim, relation_num, config.dropout).to(device)
 98 |         elif args.model == 'GAT':
 99 |             model = GAT(embedding_size, config.hidden_dimension, out_dim, relation_num, config.dropout).to(device)
100 |         elif args.model == 'SAGE':
101 |             model = SAGE(embedding_size, config.hidden_dimension, out_dim, relation_num, config.dropout).to(device)
102 |         elif args.model == 'SGC':
103 |             model = SGC(embedding_size, config.hidden_dimension, out_dim, relation_num, config.dropout).to(device)
104 |         loss = nn.CrossEntropyLoss()
105 |         optimizer = torch.optim.AdamW(model.parameters(),
106 |                                       lr=config.lr, weight_decay=config.weight_decay)
107 | 
108 |         model.apply(init_weights)
109 | 
110 |         fdata = torch.zeros(data.x.shape).cuda()
111 |         fdata = fdata + data.x
112 |         fdata[:, sub_fmask] = 0
113 |         max_val_acc = 0
114 |         for epoch in range(config.epochs):
115 |             model.train()
116 |             output = model(fdata, data.x, edge_index[:, sub_eindex], edge_type[sub_eindex])
117 |             loss_train = loss(output[data.train_mask][sub_sindex], data.y[data.train_mask][sub_sindex])
118 |             out = output.max(1)[1].to('cpu').detach().numpy()
119 |             label = data.y.to('cpu').detach().numpy()
120 |             acc_train = accuracy_score(out[train_mask], label[train_mask])
121 |             acc_val = accuracy_score(out[val_mask], label[val_mask])
122 |             if acc_val > max_val_acc:
123 |                 max_val_acc = acc_val
124 |                 best_output = output
125 | 
126 |             optimizer.zero_grad()
127 |             loss_train.backward(retain_graph=True)
128 |             optimizer.step()
129 |             if (epoch + 1)%100 == 0:
130 |                 print('Epoch: {:04d}'.format(epoch + 1),
131 |                       'loss_train: {:.4f}'.format(loss_train.item()),
132 |                       'acc_train: {:.4f}'.format(acc_train.item()),
133 |                       'acc_val: {:.4f}'.format(acc_val.item()))
134 | 
135 | 
136 |         label = data.y.to('cpu').detach().numpy()
137 |         out = best_output.max(1)[1].to('cpu').detach().numpy()
138 |         acc_test = accuracy_score(out[test_mask], label[test_mask])
139 |         f1 = f1_score(out[test_mask], label[test_mask], average='macro')
140 |         precision = precision_score(out[test_mask], label[test_mask], average='macro')
141 |         recall = recall_score(out[test_mask], label[test_mask], average='macro')
142 |         print('acc_test {:.4f}'.format(acc_test),
143 |               'f1_test: {:.4f}'.format(f1.item()),
144 |               'precision_test: {:.4f}'.format(precision.item()),
145 |               'recall_test: {:.4f}'.format(recall.item()))
146 |         results = results + best_output
147 |     results_out = results.max(1)[1].to('cpu').detach().numpy()
148 |     acc_test = accuracy_score(results_out[test_mask], label[test_mask])
149 |     f1 = f1_score(results_out[test_mask], label[test_mask], average='macro')
150 |     precision = precision_score(results_out[test_mask], label[test_mask], average='macro')
151 |     recall = recall_score(results_out[test_mask], label[test_mask], average='macro')
152 | 
153 |     return acc_test, precision, recall, f1
154 | 
155 | 
156 | 
157 | 
158 | if __name__ == "__main__":
159 | 
160 |     t = time.time()
161 |     acc_list = []
162 |     precision_list = []
163 |     recall_list = []
164 |     f1_list = []
165 | 
166 |     for i, seed in enumerate(json.loads(config.random_seed)):
167 |         print('traning {}th round'.format(i + 1))
168 |         acc, precision, recall, f1 = main(seed)
169 |         acc_list.append(acc * 100)
170 |         precision_list.append(precision * 100)
171 |         recall_list.append(recall * 100)
172 |         f1_list.append(f1 * 100)
173 |         print('Round:{:04d}'.format(i + 1),
174 |               'acc_test {:.4f}'.format(acc),
175 |               'f1_test: {:.4f}'.format(f1),
176 |               'precision_test: {:.4f}'.format(precision),
177 |               'recall_test: {:.4f}'.format(recall))
178 |     print('acc:       {:.2f} + {:.2f}'.format(np.array(acc_list).mean(), np.std(acc_list)))
179 |     print('precision: {:.2f} + {:.2f}'.format(np.array(precision_list).mean(), np.std(precision_list)))
180 |     print('recall:    {:.2f} + {:.2f}'.format(np.array(recall_list).mean(), np.std(recall_list)))
181 |     print('f1:        {:.2f} + {:.2f}'.format(np.array(f1_list).mean(), np.std(f1_list)))
182 |     print('total time:', time.time() - t)


--------------------------------------------------------------------------------
/models.py:
--------------------------------------------------------------------------------
  1 | import torch
  2 | from torch import nn
  3 | from torch_geometric.nn import RGCNConv,GCN2Conv,GCNConv,GATConv,SAGEConv,SGConv,RGATConv,GINConv
  4 | import torch.nn.functional as F
  5 | 
  6 | 
  7 | 
  8 | class RGCN(nn.Module):
  9 |     def __init__(self, embedding_dimension=16, hidden_dimension=128, out_dim=3, relation_num=2, dropout=0.3):
 10 |         super(RGCN, self).__init__()
 11 |         self.dropout = dropout
 12 |         self.linear_relu_input = nn.Sequential(
 13 |             nn.Linear(embedding_dimension, hidden_dimension),
 14 |             nn.LeakyReLU()
 15 |         )
 16 |         self.rgcn1 = RGCNConv(hidden_dimension, hidden_dimension, num_relations=relation_num)
 17 |         self.rgcn2 = RGCNConv(hidden_dimension, hidden_dimension, num_relations=relation_num)
 18 | 
 19 |         self.linear_relu_output1 = nn.Sequential(
 20 |             nn.Linear(hidden_dimension, hidden_dimension),
 21 |             nn.LeakyReLU()
 22 |         )
 23 |         self.linear_output2 = nn.Linear(hidden_dimension, out_dim)
 24 | 
 25 |     def forward(self, feature, edge_index, edge_type):
 26 |         x = self.linear_relu_input(feature.to(torch.float32))
 27 |         x = self.rgcn1(x, edge_index, edge_type)
 28 |         x = F.dropout(x, p=self.dropout, training=self.training)
 29 |         x = self.rgcn1(x, edge_index, edge_type)
 30 |         # x = self.linear_relu_output1(x)
 31 |         x = self.linear_output2(x)
 32 | 
 33 |         return x
 34 | 
 35 | 
 36 | 
 37 | class GAT(nn.Module):
 38 |     def __init__(self, embedding_dimension=16, hidden_dimension=128, out_dim=3, relation_num=2, dropout=0.3):
 39 |         super(GAT, self).__init__()
 40 |         self.dropout = dropout
 41 | 
 42 |         self.linear_relu_input = nn.Sequential(
 43 |             nn.Linear(embedding_dimension, hidden_dimension),
 44 |             nn.LeakyReLU()
 45 |         )
 46 | 
 47 |         self.gat1 = GATConv(hidden_dimension, int(hidden_dimension / 4), heads=4)
 48 |         self.gat2 = GATConv(hidden_dimension, hidden_dimension)
 49 | 
 50 |         self.linear_relu_output1 = nn.Sequential(
 51 |             nn.Linear(hidden_dimension, hidden_dimension),
 52 |             nn.LeakyReLU()
 53 |         )
 54 |         self.linear_output2 = nn.Linear(hidden_dimension, out_dim)
 55 | 
 56 |     def forward(self, feature, edge_index, edge_type):
 57 |         x = self.linear_relu_input(feature.to(torch.float32))
 58 |         x = self.gat1(x, edge_index)
 59 |         x = F.dropout(x, p=self.dropout, training=self.training)
 60 |         x = self.gat2(x, edge_index)
 61 |         # x = self.linear_relu_output1(x)
 62 |         x = self.linear_output2(x)
 63 | 
 64 |         return x
 65 | 
 66 | 
 67 | 
 68 | class SGC(nn.Module):
 69 |     def __init__(self, embedding_dimension=16, hidden_dimension=128, out_dim=3, relation_num=2, dropout=0.3):
 70 |         super(SGC, self).__init__()
 71 |         self.dropout = dropout
 72 | 
 73 |         self.linear_relu_input = nn.Sequential(
 74 |             nn.Linear(embedding_dimension, hidden_dimension),
 75 |             nn.LeakyReLU()
 76 |         )
 77 | 
 78 |         self.gcn1 = SGConv(hidden_dimension, hidden_dimension)
 79 |         self.gcn2 = SGConv(hidden_dimension, hidden_dimension)
 80 | 
 81 |         self.linear_relu_output1 = nn.Sequential(
 82 |             nn.Linear(hidden_dimension, hidden_dimension),
 83 |             nn.LeakyReLU()
 84 |         )
 85 |         self.linear_output2 = nn.Linear(hidden_dimension, out_dim)
 86 | 
 87 |     def forward(self, feature, edge_index, edge_type):
 88 |         x = self.linear_relu_input(feature.to(torch.float32))
 89 |         x = self.gcn1(x, edge_index)
 90 |         x = F.dropout(x, p=self.dropout, training=self.training)
 91 |         x = self.gcn2(x, edge_index)
 92 |         # x = self.linear_relu_output1(x)
 93 |         x = self.linear_output2(x)
 94 | 
 95 |         return x
 96 | 
 97 | 
 98 | 
 99 | class GCN(nn.Module):
100 |     def __init__(self, embedding_dimension=16, hidden_dimension=128, out_dim=3, relation_num=2, dropout=0.3):
101 |         super(GCN, self).__init__()
102 |         self.dropout = dropout
103 | 
104 |         self.linear_relu_input = nn.Sequential(
105 |             nn.Linear(embedding_dimension, hidden_dimension),
106 |             nn.LeakyReLU()
107 |         )
108 | 
109 |         self.gcn1 = GCNConv(hidden_dimension, hidden_dimension)
110 |         self.gcn2 = GCNConv(hidden_dimension, hidden_dimension)
111 | 
112 |         self.linear_relu_output1 = nn.Sequential(
113 |             nn.Linear(hidden_dimension, hidden_dimension),
114 |             nn.LeakyReLU()
115 |         )
116 |         self.linear_output2 = nn.Linear(hidden_dimension, out_dim)
117 | 
118 |     def forward(self, feature, edge_index, edge_type):
119 |         x = self.linear_relu_input(feature.to(torch.float32))
120 |         x = self.gcn1(x, edge_index)
121 |         x = F.dropout(x, p=self.dropout, training=self.training)
122 |         x = self.gcn2(x, edge_index)
123 |         # x = self.linear_relu_output1(x)
124 |         x = self.linear_output2(x)
125 | 
126 |         return x
127 | 
128 | 
129 | 
130 | class SAGE(nn.Module):
131 |     def __init__(self, embedding_dimension=16, hidden_dimension=128, out_dim=3, relation_num=2, dropout=0.3):
132 |         super(SAGE, self).__init__()
133 |         self.dropout = dropout
134 | 
135 |         self.linear_relu_input = nn.Sequential(
136 |             nn.Linear(embedding_dimension, hidden_dimension),
137 |             nn.LeakyReLU()
138 |         )
139 | 
140 |         self.sage1 = SAGEConv(hidden_dimension, hidden_dimension)
141 |         self.sage2 = SAGEConv(hidden_dimension, hidden_dimension)
142 | 
143 |         self.linear_relu_output1 = nn.Sequential(
144 |             nn.Linear(hidden_dimension, hidden_dimension),
145 |             nn.LeakyReLU()
146 |         )
147 |         self.linear_output2 = nn.Linear(hidden_dimension, out_dim)
148 | 
149 |     def forward(self, feature, edge_index, edge_type):
150 |         x = self.linear_relu_input(feature.to(torch.float32))
151 |         x = self.sage1(x, edge_index)
152 |         x = F.dropout(x, p=self.dropout, training=self.training)
153 |         x = self.sage2(x, edge_index)
154 |         # x = self.linear_relu_output1(x)
155 |         x = self.linear_output2(x)
156 | 
157 |         return x
158 | 
159 | 
160 | class GIN(nn.Module):
161 |     def __init__(self, embedding_dimension=16, hidden_dimension=128, out_dim=3, relation_num=2, dropout=0.3):
162 |         super(GIN, self).__init__()
163 |         self.dropout = dropout
164 | 
165 |         self.linear_relu_input = nn.Sequential(
166 |             nn.Linear(embedding_dimension, hidden_dimension),
167 |             nn.LeakyReLU()
168 |         )
169 | 
170 |         self.gin1 = GINConv(nn=nn.Linear(hidden_dimension, hidden_dimension), eps=1e-9)
171 |         self.gin2 = GINConv(nn=nn.Linear(hidden_dimension, hidden_dimension), eps=1e-13)
172 | 
173 |         self.linear_relu_output1 = nn.Sequential(
174 |             nn.Linear(hidden_dimension, hidden_dimension),
175 |             nn.LeakyReLU()
176 |         )
177 |         self.linear_output2 = nn.Linear(hidden_dimension, out_dim)
178 | 
179 |     def forward(self, feature, edge_index, edge_type):
180 |         x = self.linear_relu_input(feature.to(torch.float32))
181 |         x = self.gin1(x, edge_index)
182 |         x = F.dropout(x, p=self.dropout, training=self.training)
183 |         x = self.gin2(x, edge_index)
184 |         # x = self.linear_relu_output1(x)
185 |         x = self.linear_output2(x)
186 | 
187 |         return x
188 | 
189 | 
190 | 
191 | class GCN2(nn.Module):
192 |     def __init__(self, embedding_dimension=16, hidden_dimension=128, out_dim=3, relation_num=2, dropout=0.3):
193 |         super(GCN2, self).__init__()
194 |         self.dropout = dropout
195 | 
196 |         self.linear_relu_input = nn.Sequential(
197 |             nn.Linear(embedding_dimension, hidden_dimension),
198 |             nn.LeakyReLU()
199 |         )
200 | 
201 |         self.gcn1 = GCN2Conv(hidden_dimension, 0.2, add_self_loops=False)
202 |         self.gcn2 = GCN2Conv(hidden_dimension, 0.2, add_self_loops=False)
203 | 
204 |         self.linear_relu_output1 = nn.Sequential(
205 |             nn.Linear(hidden_dimension, hidden_dimension),
206 |             nn.LeakyReLU()
207 |         )
208 |         self.linear_output2 = nn.Linear(hidden_dimension, out_dim)
209 | 
210 |     def forward(self, feature, edge_index, edge_type):
211 |         x = self.linear_relu_input(feature.to(torch.float32))
212 |         h = self.gcn1(x, x, edge_index)
213 |         h = F.dropout(h, p=self.dropout, training=self.training)
214 |         h = self.gcn2(h, x, edge_index)
215 |         # x = self.linear_relu_output1(x)
216 |         h = self.linear_output2(h)
217 | 
218 |         return h


--------------------------------------------------------------------------------