├── Data ├── MGTAB │ └── MGTAB.txt ├── Cresci15 │ └── Cresci15.txt └── Twibot20 │ └── Twibot20.txt ├── config ├── MGTAB.ini ├── Cresci15.ini └── Twibot20.ini ├── config.py ├── LICENSE ├── README.md ├── utils.py ├── Dataset.py ├── GNN.py ├── RFGNNmodels.py ├── RF-GNN.py └── models.py /Data/MGTAB/MGTAB.txt: -------------------------------------------------------------------------------- 1 | https://drive.google.com/uc?export=download&id=1XfLYIz4M3KPnVpsEUwRMddSs548y29a5 2 | -------------------------------------------------------------------------------- /Data/Cresci15/Cresci15.txt: -------------------------------------------------------------------------------- 1 | https://drive.google.com/uc?export=download&id=13J-UkHZ6tuZedOI0RUgEoHiMIJRGAdNC 2 | -------------------------------------------------------------------------------- /Data/Twibot20/Twibot20.txt: -------------------------------------------------------------------------------- 1 | https://drive.google.com/uc?export=download&id=1VtpWZzzRyze_5xIy2f1T6jV5lzyj1Oc9 2 | -------------------------------------------------------------------------------- /config/MGTAB.ini: -------------------------------------------------------------------------------- 1 | [Model_Setup] 2 | model_num = 10 3 | relation_select = [0,1] 4 | random_seed = [0,1,2,3,4] 5 | epochs = 200 6 | lr = 1e-3 7 | weight_decay = 5e-4 8 | hidden_dimension = 128 9 | dropout = 0.3 10 | alpha = 0.6 11 | beta = 0.9 12 | gamma = 0.8 13 | -------------------------------------------------------------------------------- /config/Cresci15.ini: -------------------------------------------------------------------------------- 1 | [Model_Setup] 2 | model_num = 10 3 | relation_select = [0,1] 4 | random_seed = [0,1,2,3,4] 5 | epochs = 200 6 | lr = 1e-3 7 | weight_decay = 5e-4 8 | hidden_dimension = 128 9 | dropout = 0.3 10 | alpha = 0.95 11 | beta = 0.95 12 | gamma = 0.95 13 | -------------------------------------------------------------------------------- /config/Twibot20.ini: -------------------------------------------------------------------------------- 1 | [Model_Setup] 2 | model_num = 10 3 | relation_select = [0,1] 4 | random_seed = [0,1,2,3,4] 5 | epochs = 200 6 | lr = 1e-3 7 | weight_decay = 5e-4 8 | hidden_dimension = 128 9 | dropout = 0.3 10 | alpha = 0.8 11 | beta = 0.8 12 | gamma = 0.9 13 | -------------------------------------------------------------------------------- /config.py: -------------------------------------------------------------------------------- 1 | import configparser 2 | 3 | class Config(object): 4 | def __init__(self, config_file): 5 | conf = configparser.ConfigParser() 6 | try: 7 | conf.read(config_file) 8 | except: 9 | print("loading config: %s failed" % (config_file)) 10 | 11 | #Hyper-parameter 12 | self.model_num = conf.getint("Model_Setup", "model_num") 13 | self.relation_select = conf.get("Model_Setup", "relation_select") 14 | self.random_seed = conf.get("Model_Setup", "random_seed") 15 | self.epochs = conf.getint("Model_Setup", "epochs") 16 | self.lr = conf.getfloat("Model_Setup", "lr") 17 | self.weight_decay = conf.getfloat("Model_Setup", "weight_decay") 18 | self.hidden_dimension = conf.getint("Model_Setup", "hidden_dimension") 19 | self.dropout = conf.getfloat("Model_Setup", "dropout") 20 | self.alpha = conf.getfloat("Model_Setup", "alpha") 21 | self.beta = conf.getfloat("Model_Setup", "beta") 22 | self.gamma = conf.getfloat("Model_Setup", "gamma") 23 | -------------------------------------------------------------------------------- /LICENSE: -------------------------------------------------------------------------------- 1 | MIT License 2 | 3 | Copyright (c) 2023 GraphDetec 4 | 5 | Permission is hereby granted, free of charge, to any person obtaining a copy 6 | of this software and associated documentation files (the "Software"), to deal 7 | in the Software without restriction, including without limitation the rights 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 | copies of the Software, and to permit persons to whom the Software is 10 | furnished to do so, subject to the following conditions: 11 | 12 | The above copyright notice and this permission notice shall be included in all 13 | copies or substantial portions of the Software. 14 | 15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 | SOFTWARE. 22 | -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | # RF-GNN 2 | RF-GNN: Random Forest Boosted Graph Neural Network for Social Bot Detection 3 | 4 | # Environment Settings 5 | * python == 3.7 6 | * torch == 1.8.1+cu102 7 | * numpy == 1.21.6 8 | * scipy == 1.7.2 9 | * pandas == 1.3.5 10 | * scikit-learn == 1.0.2 11 | * torch-cluster == 1.5.9 12 | * torch-geometric == 2.0.4 13 | * torch-scatter == 2.0.8 14 | * torch-sparse == 0.6.12 15 | * torch-spline-conv == 1.2.1 16 | 17 | 18 | # Usage 19 | 20 | ### RF-GNN 21 | 22 | * **dataset**: including \[MGTAB, Twibot20, Cresci15\]. 23 | * **model**: including \['GCN', 'GAT', 'SAGE', 'RGCN', 'SGC'\]. 24 | * **labelrate**: parameter for labelrate. (default = 0.1) 25 | 26 | e.g. 27 | ```` 28 | #run RF-GCN on MGTAB (label rate 0.05) 29 | python RF-GNN.py -dataset MGTAB -model GCN --labelrate 0.05 30 | #run RF-GAR on Twibot-20 31 | python RF-GNN.py -dataset Twibot20 -model GAT -smote True 32 | ```` 33 | 34 | 35 | ### RF-GNN-E and GNN 36 | 37 | * **dataset**: including \[MGTAB, Twibot20, Cresci15\]. 38 | * **model**: including \['GCN', 'GAT', 'SAGE', 'RGCN', 'SGC'\]. 39 | * **ensemble**: including \[True, False\]. 40 | * **labelrate**: parameter for labelrate. (default = 0.1) 41 | 42 | e.g. 43 | ```` 44 | #run RF-GCN-E on MGTAB 45 | python GNN.py -dataset MGTAB -model GCN -ensemble True 46 | #run GCN on MGTAB 47 | python GNN.py -dataset Cresci15 -model GCN -ensemble False 48 | ```` 49 | 50 | 51 | # Dataset 52 | 53 | For TwiBot-20, please visit the [Twibot-20 github repository](https://github.com/BunsenFeng/TwiBot-20). 54 | For MGTAB please visit the [MGTAB github repository](https://github.com/GraphDetec/MGTAB). 55 | For Cresci-15 please visit the [Twibot-20 github repository](https://github.com/GraphDetec/MGTAB). 56 | 57 | 58 | We also offer the processed data set: [Cresci-15](https://drive.google.com/uc?export=download&id=13J-UkHZ6tuZedOI0RUgEoHiMIJRGAdNC), [MGTAB](https://drive.google.com/uc?export=download&id=1XfLYIz4M3KPnVpsEUwRMddSs548y29a5), [Twibot-20](https://drive.google.com/uc?export=download&id=1VtpWZzzRyze_5xIy2f1T6jV5lzyj1Oc9). 59 | 60 | -------------------------------------------------------------------------------- /utils.py: -------------------------------------------------------------------------------- 1 | import torch 2 | import torch.nn as nn 3 | import scipy.sparse as sp 4 | import numpy as np 5 | import pandas as pd 6 | import random 7 | from sklearn.neighbors import NearestNeighbors 8 | 9 | 10 | def sample_mask(idx, l): 11 | """Create mask.""" 12 | mask = torch.zeros(l) 13 | mask[idx] = 1 14 | return torch.as_tensor(mask, dtype=torch.bool) 15 | 16 | def init_weights(m): 17 | if type(m) == nn.Linear: 18 | nn.init.kaiming_uniform_(m.weight) 19 | 20 | 21 | def normalize(mx): 22 | """Row-normalize sparse matrix""" 23 | rowsum = np.array(mx.sum(1)) 24 | r_inv = np.power(rowsum, -1).flatten() 25 | r_inv[np.isinf(r_inv)] = 0 26 | r_mat_inv = sp.diags(r_inv) 27 | mx = r_mat_inv.dot(mx) 28 | 29 | return mx 30 | 31 | 32 | def accuracy(output, labels): 33 | preds = output.max(1)[1].type_as(labels) 34 | 35 | correct = preds.eq(labels).double() 36 | correct = correct.sum() 37 | return correct / len(labels) 38 | 39 | 40 | def sparse_mx_to_torch_sparse_tensor(sparse_mx): 41 | """Convert a scipy sparse matrix to a torch sparse tensor.""" 42 | sparse_mx = sparse_mx.tocoo().astype(np.float32) 43 | 44 | indices = torch.from_numpy( 45 | np.vstack((sparse_mx.row, sparse_mx.col)).astype(np.int64)) 46 | 47 | values = torch.from_numpy(sparse_mx.data) 48 | shape = torch.Size(sparse_mx.shape) 49 | return torch.sparse.FloatTensor(indices, values, shape) 50 | 51 | 52 | def mixup_data(X, Y, alpha=0.1, mul_factor=2): 53 | 54 | rs = np.random.RandomState(39) 55 | n = X.shape[0] 56 | 57 | mixed_X = torch.tensor(np.empty((n*(mul_factor-1), X.shape[1]))).cuda() 58 | mixed_Y = torch.tensor(np.empty(n*(mul_factor-1))).cuda() 59 | 60 | for i in range(mul_factor-1): 61 | 62 | # sample more than needed as some will be filtered out 63 | lam = np.random.beta(alpha, alpha, size=round(n*2)) 64 | 65 | # original data vectors will be concatenated later 66 | lam = lam[(lam!=0.0) & (lam!=1.0)][:n][:, None] # shape nx1 67 | 68 | shuffle_idx = rs.choice(np.arange(n), n, replace=False) 69 | 70 | mixed_X[i*n : (i+1)*n] = torch.tensor(lam).cuda() * X + (1 - torch.tensor(lam).cuda()) * X[shuffle_idx, :] 71 | mixed_Y[i*n : (i+1)*n] = torch.mul(torch.tensor(np.squeeze(lam)).cuda(), Y) + torch.mul((1 - torch.tensor(np.squeeze(lam)).cuda()), Y[shuffle_idx]) 72 | 73 | # concatenate original data vectors 74 | # mixed_X = np.append(mixed_X, X, axis=0) 75 | # mixed_Y = np.append(mixed_Y, Y, axis=0) 76 | 77 | return mixed_X, mixed_Y 78 | 79 | 80 | def get_tail_label(df): 81 | """ 82 | Give tail label colums of the given target dataframe 83 | 84 | args 85 | df: pandas.DataFrame, target label df whose tail label has to identified 86 | 87 | return 88 | tail_label: list, a list containing column name of all the tail label 89 | """ 90 | columns = df.columns 91 | n = len(columns) 92 | irpl = np.zeros(n) 93 | for column in range(n): 94 | irpl[column] = df[columns[column]].value_counts()[1] 95 | irpl = max(irpl) / irpl 96 | mir = np.average(irpl) 97 | tail_label = [] 98 | for i in range(n): 99 | if irpl[i] > mir: 100 | tail_label.append(columns[i]) 101 | return tail_label 102 | 103 | 104 | 105 | def nearest_neighbour(X): 106 | """ 107 | Give index of 5 nearest neighbor of all the instance 108 | 109 | args 110 | X: np.array, array whose nearest neighbor has to find 111 | 112 | return 113 | indices: list of list, index of 5 NN of each element in X 114 | """ 115 | nbs = NearestNeighbors(n_neighbors=5, metric='euclidean', algorithm='kd_tree').fit(X) 116 | euclidean, indices = nbs.kneighbors(X) 117 | return indices 118 | 119 | 120 | def MLSMOTE(X, y, n_sample): 121 | """ 122 | Give the augmented data using MLSMOTE algorithm 123 | 124 | args 125 | X: pandas.DataFrame, input vector DataFrame 126 | y: pandas.DataFrame, feature vector dataframe 127 | n_sample: int, number of newly generated sample 128 | 129 | return 130 | new_X: pandas.DataFrame, augmented feature vector data 131 | target: pandas.DataFrame, augmented target vector data 132 | """ 133 | if not isinstance(X, pd.DataFrame): 134 | X = pd.DataFrame(X) 135 | if not isinstance(y, pd.DataFrame): 136 | y = pd.get_dummies(np.array(y)) 137 | 138 | indices2 = nearest_neighbour(X) 139 | n = len(indices2) 140 | new_X = np.zeros((n_sample, X.shape[1])) 141 | target = np.zeros((n_sample, y.shape[1])) 142 | for i in range(n_sample): 143 | reference = random.randint(0, n - 1) 144 | neighbour = random.choice(indices2[reference, 1:]) 145 | all_point = indices2[reference] 146 | nn_df = y[y.index.isin(all_point)] 147 | ser = nn_df.sum(axis=0, skipna=True) 148 | target[i] = np.array([1 if val > 2 else 0 for val in ser]) 149 | ratio = random.random() 150 | gap = X.loc[reference, :] - X.loc[neighbour, :] 151 | new_X[i] = np.array(X.loc[reference, :] + ratio * gap) 152 | new_X = pd.DataFrame(new_X, columns=X.columns) 153 | target = pd.DataFrame(target, columns=y.columns) 154 | new_X = pd.concat([X, new_X], axis=0) 155 | target = pd.concat([y, target], axis=0) 156 | return new_X.values, np.argmax(target.values,axis=1) 157 | 158 | 159 | def balance_MLSMOTE(labeled_X, labeled_y, n_sample): 160 | 161 | X_list = [] 162 | y_list = [] 163 | for i in range(max(labeled_y) + 1): 164 | X_list.append(labeled_X[labeled_y == i, :]) 165 | y_list.append(labeled_y[labeled_y == i]) 166 | 167 | num_classes = max(labeled_y) + 1 168 | one_hot_codes = np.eye(num_classes) 169 | 170 | df_y_list = [] 171 | for i in range(len(y_list)): 172 | one_hot_labels = [] 173 | for label in y_list[i]: 174 | one_hot_label = one_hot_codes[label] 175 | one_hot_labels.append(one_hot_label) 176 | df_y = pd.DataFrame(np.array(one_hot_labels)) 177 | df_y_list.append(df_y) 178 | 179 | if n_sample == None: 180 | smote_num = 0 181 | for i in range(len(y_list)): 182 | if len(y_list[i]) > smote_num: 183 | smote_num = len(y_list[i]) 184 | majority_class = i 185 | else: 186 | smote_num = n_sample 187 | 188 | for i in range(len(y_list)): 189 | if smote_num - len(y_list[i]) > 0: 190 | X_res, y_res = MLSMOTE(X_list[i], df_y_list[i], smote_num - len(y_list[i])) 191 | else: 192 | X_res, y_res = X_list[i], y_list[i] 193 | if i == 0: 194 | X_smo = X_res 195 | y_smo = y_res 196 | else: 197 | X_smo = np.concatenate([X_smo, X_res], axis=0) 198 | y_smo = np.concatenate([y_smo, y_res], axis=0) 199 | return X_smo, np.squeeze(y_smo) -------------------------------------------------------------------------------- /Dataset.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch_geometric.data import InMemoryDataset 3 | from torch_geometric.data import Data 4 | from utils import sample_mask 5 | 6 | class cresci15(InMemoryDataset): 7 | def __init__(self, root, transform=None, pre_transform=None): 8 | super().__init__(root, transform, pre_transform) 9 | self.data, self.slices = torch.load(self.processed_paths[0]) 10 | self.root = root 11 | 12 | @property 13 | def raw_file_names(self): 14 | return ['some_file_1', 'some_file_2', ...] 15 | 16 | @property 17 | def processed_file_names(self): 18 | return ['data.pt'] 19 | 20 | 21 | def sample_mask(self, idx, l): 22 | """Create mask.""" 23 | mask = torch.zeros(l) 24 | mask[idx] = 1 25 | return torch.as_tensor(mask, dtype=torch.bool) 26 | 27 | 28 | def process(self): 29 | # Read data into huge `Data` list. 30 | 31 | edge_index = torch.load(self.root + "/edge_index.pt") 32 | edge_type = torch.load(self.root + "/edge_type.pt") 33 | label = torch.load(self.root + "/label.pt") 34 | cat_prop = torch.load(self.root + "/cat_properties_tensor.pt") 35 | num_prop = torch.load(self.root + "/num_properties_tensor.pt") 36 | des_tensor = torch.load(self.root + "/des_tensor.pt") 37 | tweets_tensor = torch.load(self.root + "/tweets_tensor.pt") 38 | 39 | features = torch.cat([cat_prop, num_prop, des_tensor, tweets_tensor], axis=1) 40 | data = Data(x=features, y =label, edge_index=edge_index) 41 | data.edge_type = edge_type 42 | 43 | 44 | sample_number = len(data.y) 45 | 46 | train_idx = torch.load(self.root + "/train_idx.pt") 47 | val_idx = torch.load(self.root + "/test_idx.pt") 48 | test_idx = torch.load(self.root + "/val_idx.pt") 49 | 50 | data.train_mask = self.sample_mask(train_idx, sample_number) 51 | data.val_mask = self.sample_mask(val_idx, sample_number) 52 | data.test_mask = self.sample_mask(test_idx, sample_number) 53 | 54 | data_list = [data] 55 | 56 | if self.pre_filter is not None: 57 | data_list = [data for data in data_list if self.pre_filter(data)] 58 | 59 | if self.pre_transform is not None: 60 | data_list = [self.pre_transform(data) for data in data_list] 61 | 62 | data, slices = self.collate(data_list) 63 | torch.save((data, slices), self.processed_paths[0]) 64 | 65 | 66 | class Twibot20(InMemoryDataset): 67 | def __init__(self, root, transform=None, pre_transform=None): 68 | super().__init__(root, transform, pre_transform) 69 | self.data, self.slices = torch.load(self.processed_paths[0]) 70 | self.root = root 71 | 72 | 73 | @property 74 | def raw_file_names(self): 75 | return ['some_file_1', 'some_file_2', ...] 76 | 77 | 78 | @property 79 | def processed_file_names(self): 80 | return ['data.pt'] 81 | 82 | 83 | 84 | def process(self): 85 | labels = torch.load(self.root + "/label.pt") 86 | des_tensor = torch.load(self.root + "/des_tensor.pt") 87 | tweets_tensor1 = torch.load(self.root + "/tweets_tensor_p1.pt") 88 | tweets_tensor2 = torch.load(self.root + "/tweets_tensor_p2.pt") 89 | tweets_tensor = torch.cat([tweets_tensor1, tweets_tensor2], 0) 90 | num_prop = torch.load(self.root + "/num_prop.pt") 91 | category_prop = torch.load(self.root + "/category_prop.pt") 92 | edge_index = torch.load(self.root + "/edge_index.pt") 93 | edge_type = torch.load(self.root + "/edge_type.pt") 94 | x = torch.cat([des_tensor, tweets_tensor, num_prop, category_prop], 1) 95 | 96 | 97 | m0 = edge_index[0, :] > 11826 98 | m1 = edge_index[1, :] > 11826 99 | m = m0 + m1 100 | x = x[:11826, :] 101 | 102 | data = Data(x=x, y=labels, edge_index=edge_index) 103 | data.edge_index = edge_index[:, ~m] 104 | data.edge_type = edge_type[~m] 105 | sample_number = len(data.x) 106 | 107 | train_idx = range(8278) 108 | val_idx = range(8278, 8278 + 2365) 109 | test_idx = range(8278 + 2365, 8278 + 2365 + 1183) 110 | 111 | data.train_mask = sample_mask(train_idx, sample_number) 112 | data.val_mask = sample_mask(val_idx, sample_number) 113 | data.test_mask = sample_mask(test_idx, sample_number) 114 | 115 | data_list = [data] 116 | 117 | if self.pre_filter is not None: 118 | data_list = [data for data in data_list if self.pre_filter(data)] 119 | 120 | if self.pre_transform is not None: 121 | data_list = [self.pre_transform(data) for data in data_list] 122 | 123 | data, slices = self.collate(data_list) 124 | torch.save((data, slices), self.processed_paths[0]) 125 | 126 | 127 | class MGTAB(InMemoryDataset): 128 | def __init__(self, root, transform=None, pre_transform=None): 129 | super().__init__(root, transform, pre_transform) 130 | self.data, self.slices = torch.load(self.processed_paths[0]) 131 | self.root = root 132 | 133 | @property 134 | def raw_file_names(self): 135 | return ['some_file_1', 'some_file_2', ...] 136 | 137 | @property 138 | def processed_file_names(self): 139 | return ['data.pt'] 140 | 141 | def sample_mask(self, idx, l): 142 | """Create mask.""" 143 | mask = torch.zeros(l) 144 | mask[idx] = 1 145 | return torch.as_tensor(mask, dtype=torch.bool) 146 | 147 | 148 | def process(self): 149 | # Read data into huge `Data` list. 150 | 151 | edge_index = torch.load(self.root + "/edge_index.pt") 152 | edge_index = torch.tensor(edge_index, dtype = torch.int64) 153 | edge_type = torch.load(self.root + "/edge_type.pt") 154 | edge_weight = torch.load(self.root + "/edge_weight.pt") 155 | stance_label = torch.load(self.root + "/labels_stance.pt") 156 | bot_label = torch.load(self.root + "/labels_bot.pt") 157 | 158 | features = torch.load(self.root + "/features.pt") 159 | features = features.to(torch.float32) 160 | 161 | 162 | data = Data(x=features, edge_index=edge_index) 163 | data.edge_type = edge_type 164 | data.edge_weight = edge_weight 165 | data.y1 = stance_label 166 | data.y2 = bot_label 167 | sample_number = len(data.y1) 168 | 169 | train_idx = range(int(0.7*sample_number)) 170 | val_idx = range(int(0.7*sample_number), int(0.9*sample_number)) 171 | test_idx = range(int(0.9*sample_number), int(sample_number)) 172 | 173 | data.train_mask = self.sample_mask(train_idx, sample_number) 174 | data.val_mask = self.sample_mask(val_idx, sample_number) 175 | data.test_mask = self.sample_mask(test_idx, sample_number) 176 | 177 | data_list = [data] 178 | 179 | if self.pre_filter is not None: 180 | data_list = [data for data in data_list if self.pre_filter(data)] 181 | 182 | if self.pre_transform is not None: 183 | data_list = [self.pre_transform(data) for data in data_list] 184 | 185 | data, slices = self.collate(data_list) 186 | torch.save((data, slices), self.processed_paths[0]) -------------------------------------------------------------------------------- /GNN.py: -------------------------------------------------------------------------------- 1 | import os 2 | os.environ["CUDA_VISIBLE_DEVICES"] = "7" 3 | import torch 4 | import torch.nn as nn 5 | from models import RGCN, GAT, GCN, SAGE, SGC 6 | from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score 7 | from Dataset import cresci15, Twibot20, MGTAB 8 | from utils import sample_mask, init_weights 9 | import numpy as np 10 | import argparse 11 | import time 12 | import json 13 | from config import Config 14 | 15 | parser = argparse.ArgumentParser() 16 | parser.add_argument("-dataset", type=str, default='Twibot20', help="dataset", choices=['Twibot20','MGTAB','Cresci15']) 17 | parser.add_argument("-ensemble", type=bool, default=True, help="whether use ensemble") 18 | parser.add_argument('-model', type=str, default='GCN', choices=['GCN', 'GAT', 'GraphSage', 'RGCN', 'SGC']) 19 | parser.add_argument('--labelrate', type=float, default=0.1, help='labelrate') 20 | args = parser.parse_args() 21 | print(args) 22 | 23 | 24 | device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') 25 | config_file = "./config/" + str(args.dataset) + ".ini" 26 | config = Config(config_file) 27 | 28 | 29 | if args.dataset == 'Twibot20': 30 | dataset = Twibot20('Data/Twibot20') 31 | elif args.dataset == 'MGTAB': 32 | dataset = MGTAB('Data/MGTAB') 33 | elif args.dataset == 'Cresci15': 34 | dataset = cresci15('Data/Cresci15') 35 | 36 | 37 | data = dataset[0] 38 | if args.dataset == 'MGTAB': 39 | data.y = data.y2 40 | 41 | out_dim = 2 42 | data = data.to(device) 43 | sample_number = len(data.y) 44 | 45 | index_select_list = (data.edge_type == 100) 46 | relation_dict = { 47 | 0:'followers', 48 | 1:'friends' 49 | } 50 | 51 | relation_select_list = json.loads(config.relation_select) 52 | relation_num = len(relation_select_list) 53 | print('relation used:', end=' ') 54 | for features_index in relation_select_list: 55 | index_select_list = index_select_list + (features_index == data.edge_type) 56 | print('{}'.format(relation_dict[features_index]), end=' ') 57 | edge_index = data.edge_index[:, index_select_list] 58 | edge_type = data.edge_type[index_select_list] 59 | 60 | 61 | def main(seed): 62 | 63 | np.random.seed(seed) 64 | torch.manual_seed(seed) 65 | 66 | node_id = np.arange(data.num_nodes) 67 | np.random.shuffle(node_id) 68 | data.n_id = torch.arange(data.num_nodes) 69 | data.train_id = node_id[:int(data.num_nodes * args.labelrate)] 70 | data.val_id = node_id[int(data.num_nodes * 0.1):int(data.num_nodes * 0.2)] 71 | data.test_id = node_id[int(data.num_nodes * 0.2):] 72 | 73 | data.train_mask = sample_mask(data.train_id, sample_number) 74 | data.val_mask = sample_mask(data.val_id, sample_number) 75 | data.test_mask = sample_mask(data.test_id, sample_number) 76 | 77 | test_mask = data.test_mask 78 | train_mask = data.train_mask 79 | val_mask = data.val_mask 80 | 81 | fdim = data.x.shape[1] 82 | embedding_size = fdim 83 | 84 | results = torch.zeros(data.x.shape[0], out_dim).to(device) 85 | if args.ensemble: 86 | model_num = config.model_num 87 | else: 88 | model_num = 1 89 | 90 | 91 | for num in range(model_num): 92 | print('traning {}th model'.format(num + 1)) 93 | if args.model == 'RGCN': 94 | model = RGCN(embedding_size, config.hidden_dimension, out_dim, relation_num, config.dropout).to(device) 95 | elif args.model == 'GCN': 96 | model = GCN(embedding_size, config.hidden_dimension, out_dim, relation_num, config.dropout).to(device) 97 | elif args.model == 'GAT': 98 | model = GAT(embedding_size, config.hidden_dimension, out_dim, relation_num, config.dropout).to(device) 99 | elif args.model == 'GraphSage': 100 | model = SAGE(embedding_size, config.hidden_dimension, out_dim, relation_num, config.dropout).to(device) 101 | elif args.model == 'SGC': 102 | model = SGC(embedding_size, config.hidden_dimension, out_dim, relation_num, config.dropout).to(device) 103 | loss = nn.CrossEntropyLoss() 104 | optimizer = torch.optim.AdamW(model.parameters(), 105 | lr=config.lr, weight_decay=config.weight_decay) 106 | 107 | model.apply(init_weights) 108 | 109 | 110 | for epoch in range(config.epochs): 111 | model.train() 112 | output = model(data.x, edge_index, edge_type) 113 | loss_train = loss(output[data.train_mask], data.y[data.train_mask]) 114 | out = output.max(1)[1].to('cpu').detach().numpy() 115 | label = data.y.to('cpu').detach().numpy() 116 | acc_train = accuracy_score(out[train_mask], label[train_mask]) 117 | acc_val = accuracy_score(out[val_mask], label[val_mask]) 118 | optimizer.zero_grad() 119 | loss_train.backward() 120 | optimizer.step() 121 | if (epoch + 1)%100 == 0: 122 | print('Epoch: {:04d}'.format(epoch + 1), 123 | 'loss_train: {:.4f}'.format(loss_train.item()), 124 | 'acc_train: {:.4f}'.format(acc_train.item()), 125 | 'acc_val: {:.4f}'.format(acc_val.item())) 126 | 127 | model.eval() 128 | output = model(data.x, edge_index, edge_type) 129 | label = data.y.to('cpu').detach().numpy() 130 | out = output.max(1)[1].to('cpu').detach().numpy() 131 | acc_test = accuracy_score(out[test_mask], label[test_mask]) 132 | f1 = f1_score(out[test_mask], label[test_mask], average='macro') 133 | precision = precision_score(out[test_mask], label[test_mask], average='macro') 134 | recall = recall_score(out[test_mask], label[test_mask], average='macro') 135 | print('acc_test {:.4f}'.format(acc_test), 136 | 'f1_test: {:.4f}'.format(f1.item()), 137 | 'precision_test: {:.4f}'.format(precision.item()), 138 | 'recall_test: {:.4f}'.format(recall.item())) 139 | results = results + output 140 | results_out = results.max(1)[1].to('cpu').detach().numpy() 141 | acc_test = accuracy_score(results_out[test_mask], label[test_mask]) 142 | f1 = f1_score(results_out[test_mask], label[test_mask], average='macro') 143 | precision = precision_score(results_out[test_mask], label[test_mask], average='macro') 144 | recall = recall_score(results_out[test_mask], label[test_mask], average='macro') 145 | 146 | return acc_test, precision, recall, f1 147 | 148 | 149 | 150 | 151 | if __name__ == "__main__": 152 | 153 | t = time.time() 154 | acc_list = [] 155 | precision_list = [] 156 | recall_list = [] 157 | f1_list = [] 158 | 159 | for i, seed in enumerate(json.loads(config.random_seed)): 160 | print('traning {}th round'.format(i + 1)) 161 | acc, precision, recall, f1 = main(seed) 162 | acc_list.append(acc * 100) 163 | precision_list.append(precision * 100) 164 | recall_list.append(recall * 100) 165 | f1_list.append(f1 * 100) 166 | print('Round:{:04d}'.format(i + 1), 167 | 'acc_test {:.4f}'.format(acc), 168 | 'f1_test: {:.4f}'.format(f1), 169 | 'precision_test: {:.4f}'.format(precision), 170 | 'recall_test: {:.4f}'.format(recall)) 171 | print('acc: {:.2f} + {:.2f}'.format(np.array(acc_list).mean(), np.std(acc_list))) 172 | print('precision: {:.2f} + {:.2f}'.format(np.array(precision_list).mean(), np.std(precision_list))) 173 | print('recall: {:.2f} + {:.2f}'.format(np.array(recall_list).mean(), np.std(recall_list))) 174 | print('f1: {:.2f} + {:.2f}'.format(np.array(f1_list).mean(), np.std(f1_list))) 175 | print('total time:', time.time() - t) -------------------------------------------------------------------------------- /RFGNNmodels.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch import nn 3 | from torch_geometric.nn import RGCNConv,GCNConv,GATConv,SAGEConv,SGConv 4 | import torch.nn.functional as F 5 | 6 | 7 | 8 | class RGCN(nn.Module): 9 | def __init__(self, embedding_dimension=16, hidden_dimension=128, out_dim=3, relation_num=2, dropout=0.3): 10 | super(RGCN, self).__init__() 11 | self.dropout = dropout 12 | self.out_dim = out_dim 13 | 14 | self.linear_relu_input = nn.Sequential( 15 | nn.Linear(embedding_dimension, hidden_dimension), 16 | nn.LeakyReLU() 17 | ) 18 | self.linear_relu_input2 = nn.Sequential( 19 | nn.Linear(embedding_dimension, hidden_dimension), 20 | nn.LeakyReLU() 21 | ) 22 | self.rgcn1 = RGCNConv(hidden_dimension, hidden_dimension, num_relations=relation_num) 23 | self.rgcn2 = RGCNConv(hidden_dimension, hidden_dimension, num_relations=relation_num) 24 | 25 | self.linear_relu_output1 = nn.Sequential( 26 | nn.Linear(hidden_dimension, hidden_dimension), 27 | nn.LeakyReLU() 28 | ) 29 | self.linear_output = nn.Linear(hidden_dimension, out_dim) 30 | self.linear_output22 = nn.Linear(hidden_dimension, out_dim) 31 | 32 | def forward(self, mask_feature, feature, edge_index, edge_type): 33 | x = self.linear_relu_input(mask_feature.to(torch.float32)) 34 | x = self.rgcn1(x, edge_index, edge_type) 35 | x = F.dropout(x, p=self.dropout, training=self.training) 36 | x = self.rgcn2(x, edge_index, edge_type) 37 | # x = self.linear_relu_output1(x) 38 | x = self.linear_output(x) 39 | mask = self.linear_relu_input(feature-mask_feature) 40 | mask = self.rgcn1(mask, edge_index, edge_type) 41 | mask = self.rgcn2(mask, edge_index, edge_type) 42 | mask = self.linear_output(mask) 43 | aplha = torch.mul(x, mask).sum(1).repeat(self.out_dim,1) 44 | # return torch.mul(aplha.T, mask) 45 | return torch.mul(x, mask) 46 | 47 | 48 | 49 | class GAT(nn.Module): 50 | def __init__(self, embedding_dimension=16, hidden_dimension=128, out_dim=2, relation_num=2, dropout=0.3): 51 | super(GAT, self).__init__() 52 | self.dropout = dropout 53 | self.out_dim = out_dim 54 | 55 | self.linear_relu_input = nn.Sequential( 56 | nn.Linear(embedding_dimension, hidden_dimension), 57 | nn.LeakyReLU() 58 | ) 59 | 60 | self.gat1 = GATConv(hidden_dimension, int(hidden_dimension / 4), heads=4) 61 | self.gat2 = GATConv(hidden_dimension, hidden_dimension) 62 | 63 | self.linear_relu_output1 = nn.Sequential( 64 | nn.Linear(hidden_dimension, hidden_dimension), 65 | nn.LeakyReLU() 66 | ) 67 | self.linear_output2 = nn.Linear(hidden_dimension, out_dim) 68 | 69 | def forward(self, mask_feature, feature, edge_index, edge_type): 70 | x = self.linear_relu_input(mask_feature.to(torch.float32)) 71 | x = self.gat1(x, edge_index) 72 | x = F.dropout(x, p=self.dropout, training=self.training) 73 | x = self.gat2(x, edge_index) 74 | # x = self.linear_relu_output1(x) 75 | x = self.linear_output2(x) 76 | mask = self.linear_relu_input(feature - mask_feature) 77 | mask = self.linear_output2(mask) 78 | aplha = torch.mul(x, mask).sum(1).repeat(self.out_dim, 1) 79 | # return torch.mul(aplha.T, mask) 80 | return torch.mul(x, mask) 81 | 82 | 83 | 84 | class GCN(nn.Module): 85 | def __init__(self, embedding_dimension=16, hidden_dimension=128, out_dim=2, relation_num=2, dropout=0.3): 86 | super(GCN, self).__init__() 87 | self.dropout = dropout 88 | self.out_dim = out_dim 89 | 90 | self.linear_relu_input = nn.Sequential( 91 | nn.Linear(embedding_dimension, hidden_dimension), 92 | nn.LeakyReLU() 93 | ) 94 | 95 | self.gcn1 = GCNConv(hidden_dimension, hidden_dimension) 96 | self.gcn2 = GCNConv(hidden_dimension, hidden_dimension) 97 | 98 | self.linear_relu_output1 = nn.Sequential( 99 | nn.Linear(hidden_dimension, hidden_dimension), 100 | nn.LeakyReLU() 101 | ) 102 | self.linear_output2 = nn.Linear(hidden_dimension, out_dim) 103 | self.linear_relu_mask = nn.Linear(embedding_dimension, 1) 104 | 105 | def forward(self, mask_feature, feature, edge_index, edge_type): 106 | 107 | x = self.linear_relu_input(mask_feature.to(torch.float32)) 108 | x = self.gcn1(x, edge_index) 109 | x = F.dropout(x, p=self.dropout, training=self.training) 110 | x = self.gcn2(x, edge_index) 111 | x = self.linear_output2(x) 112 | mask = self.linear_relu_input(feature - mask_feature) 113 | mask = self.linear_output2(mask) 114 | aplha = torch.mul(x, mask).sum(1).repeat(self.out_dim,1) 115 | # return torch.mul(aplha.T, mask) 116 | return torch.mul(x, mask) 117 | 118 | 119 | 120 | class SGC(nn.Module): 121 | def __init__(self, embedding_dimension=16, hidden_dimension=128, out_dim=2, relation_num=2, dropout=0.3): 122 | super(SGC, self).__init__() 123 | self.dropout = dropout 124 | self.out_dim = out_dim 125 | 126 | self.linear_relu_input = nn.Sequential( 127 | nn.Linear(embedding_dimension, hidden_dimension), 128 | nn.LeakyReLU() 129 | ) 130 | 131 | self.gcn1 = SGConv(hidden_dimension, hidden_dimension) 132 | self.gcn2 = SGConv(hidden_dimension, hidden_dimension) 133 | 134 | self.linear_relu_output1 = nn.Sequential( 135 | nn.Linear(hidden_dimension, hidden_dimension), 136 | nn.LeakyReLU() 137 | ) 138 | self.linear_output2 = nn.Linear(hidden_dimension, out_dim) 139 | self.linear_relu_mask = nn.Linear(embedding_dimension, 1) 140 | 141 | def forward(self, mask_feature, feature, edge_index, edge_type): 142 | 143 | x = self.linear_relu_input(mask_feature.to(torch.float32)) 144 | x = self.gcn1(x, edge_index) 145 | x = F.dropout(x, p=self.dropout, training=self.training) 146 | x = self.gcn2(x, edge_index) 147 | x = self.linear_output2(x) 148 | mask = self.linear_relu_input(feature - mask_feature) 149 | mask = self.linear_output2(mask) 150 | aplha = torch.mul(x, mask).sum(1).repeat(self.out_dim,1) 151 | # return torch.mul(aplha.T, mask) 152 | return torch.mul(x, mask) 153 | 154 | 155 | 156 | class SAGE(nn.Module): 157 | def __init__(self, embedding_dimension=16, hidden_dimension=128, out_dim=3, relation_num=2, dropout=0.3): 158 | super(SAGE, self).__init__() 159 | self.dropout = dropout 160 | self.out_dim = out_dim 161 | self.linear_relu_input = nn.Sequential( 162 | nn.Linear(embedding_dimension, hidden_dimension), 163 | nn.LeakyReLU() 164 | ) 165 | 166 | self.sage1 = SAGEConv(hidden_dimension, hidden_dimension) 167 | self.sage2 = SAGEConv(hidden_dimension, hidden_dimension) 168 | 169 | self.linear_relu_output1 = nn.Sequential( 170 | nn.Linear(hidden_dimension, hidden_dimension), 171 | nn.LeakyReLU() 172 | ) 173 | self.linear_output2 = nn.Linear(hidden_dimension, out_dim) 174 | 175 | def forward(self, mask_feature, feature, edge_index, edge_type): 176 | x = self.linear_relu_input(mask_feature.to(torch.float32)) 177 | x = self.sage1(x, edge_index) 178 | x = F.dropout(x, p=self.dropout, training=self.training) 179 | x = self.sage2(x, edge_index) 180 | # x = self.linear_relu_output1(x) 181 | x = self.linear_output2(x) 182 | mask = self.linear_relu_input(feature - mask_feature) 183 | mask = self.linear_output2(mask) 184 | aplha = torch.mul(x, mask).sum(1).repeat(self.out_dim,1) 185 | # return torch.mul(aplha.T, mask) 186 | return torch.mul(x, mask) -------------------------------------------------------------------------------- /RF-GNN.py: -------------------------------------------------------------------------------- 1 | import os 2 | os.environ["CUDA_VISIBLE_DEVICES"] = "5" 3 | import torch 4 | import torch.nn as nn 5 | from RFGNNmodels import RGCN, GAT, GCN, SAGE, SGC 6 | from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score 7 | from Dataset import cresci15, Twibot20, MGTAB 8 | from utils import sample_mask, init_weights 9 | import numpy as np 10 | import argparse 11 | import time 12 | import json 13 | from config import Config 14 | 15 | 16 | parser = argparse.ArgumentParser() 17 | parser.add_argument("-dataset", type=str, help="dataset", choices=['Twibot20','MGTAB','Cresci15']) 18 | parser.add_argument('-model', type=str, help="model", choices=['GCN', 'GAT', 'GraphSage', 'RGCN', 'SGC']) 19 | parser.add_argument('--labelrate', type=float, default=0.1, help='labelrate') 20 | args = parser.parse_args() 21 | print(args) 22 | 23 | device = torch.device('cuda' if torch.cuda.is_available() else 'cpu') 24 | config_file = "./config/" + str(args.dataset) + ".ini" 25 | config = Config(config_file) 26 | 27 | 28 | if args.dataset == 'Twibot20': 29 | dataset = Twibot20('Data/Twibot20') 30 | elif args.dataset == 'MGTAB': 31 | dataset = MGTAB('Data/MGTAB') 32 | elif args.dataset == 'Cresci15': 33 | dataset = cresci15('Data/Cresci15') 34 | 35 | 36 | data = dataset[0] 37 | if args.dataset == 'MGTAB': 38 | data.y = data.y2 39 | 40 | out_dim = 2 41 | data = data.to(device) 42 | sample_number = len(data.y) 43 | 44 | index_select_list = (data.edge_type == 100) 45 | relation_dict = { 46 | 0:'followers', 47 | 1:'friends' 48 | } 49 | 50 | relation_select_list = json.loads(config.relation_select) 51 | relation_num = len(relation_select_list) 52 | print('relation used:', end=' ') 53 | for features_index in relation_select_list: 54 | index_select_list = index_select_list + (features_index == data.edge_type) 55 | print('{}'.format(relation_dict[features_index]), end=' ') 56 | edge_index = data.edge_index[:, index_select_list] 57 | edge_type = data.edge_type[index_select_list] 58 | 59 | 60 | def main(seed): 61 | 62 | np.random.seed(seed) 63 | torch.manual_seed(seed) 64 | 65 | node_id = np.arange(data.num_nodes) 66 | np.random.shuffle(node_id) 67 | data.n_id = torch.arange(data.num_nodes) 68 | data.train_id = node_id[:int(data.num_nodes * args.labelrate)] 69 | data.val_id = node_id[int(data.num_nodes * 0.1):int(data.num_nodes * 0.2)] 70 | data.test_id = node_id[int(data.num_nodes * 0.2):] 71 | 72 | data.train_mask = sample_mask(data.train_id, sample_number) 73 | data.val_mask = sample_mask(data.val_id, sample_number) 74 | data.test_mask = sample_mask(data.test_id, sample_number) 75 | 76 | test_mask = data.test_mask 77 | train_mask = data.train_mask 78 | val_mask = data.val_mask 79 | 80 | fdim = data.x.shape[1] 81 | embedding_size = fdim 82 | sample_size = int(sum(data.train_mask)*config.beta) 83 | edge_count = int(edge_index.shape[1]*config.gamma) 84 | results = torch.zeros(data.x.shape[0], out_dim).to(device) 85 | for num in range(1, config.model_num+1): 86 | 87 | np.random.seed(seed + num) 88 | sub_findex = np.random.randint(0, data.x.shape[1], size = embedding_size) 89 | sub_fmask = sub_findex > config.alpha*data.x.shape[1] 90 | sub_sindex = np.random.randint(0, sum(data.train_mask), sample_size) 91 | sub_eindex = np.random.randint(0, edge_index.shape[1], edge_count) 92 | 93 | print('traning {}th model'.format(num)) 94 | if args.model == 'RGCN': 95 | model = RGCN(embedding_size, config.hidden_dimension, out_dim, relation_num, config.dropout).to(device) 96 | elif args.model == 'GCN': 97 | model = GCN(embedding_size, config.hidden_dimension, out_dim, relation_num, config.dropout).to(device) 98 | elif args.model == 'GAT': 99 | model = GAT(embedding_size, config.hidden_dimension, out_dim, relation_num, config.dropout).to(device) 100 | elif args.model == 'SAGE': 101 | model = SAGE(embedding_size, config.hidden_dimension, out_dim, relation_num, config.dropout).to(device) 102 | elif args.model == 'SGC': 103 | model = SGC(embedding_size, config.hidden_dimension, out_dim, relation_num, config.dropout).to(device) 104 | loss = nn.CrossEntropyLoss() 105 | optimizer = torch.optim.AdamW(model.parameters(), 106 | lr=config.lr, weight_decay=config.weight_decay) 107 | 108 | model.apply(init_weights) 109 | 110 | fdata = torch.zeros(data.x.shape).cuda() 111 | fdata = fdata + data.x 112 | fdata[:, sub_fmask] = 0 113 | max_val_acc = 0 114 | for epoch in range(config.epochs): 115 | model.train() 116 | output = model(fdata, data.x, edge_index[:, sub_eindex], edge_type[sub_eindex]) 117 | loss_train = loss(output[data.train_mask][sub_sindex], data.y[data.train_mask][sub_sindex]) 118 | out = output.max(1)[1].to('cpu').detach().numpy() 119 | label = data.y.to('cpu').detach().numpy() 120 | acc_train = accuracy_score(out[train_mask], label[train_mask]) 121 | acc_val = accuracy_score(out[val_mask], label[val_mask]) 122 | if acc_val > max_val_acc: 123 | max_val_acc = acc_val 124 | best_output = output 125 | 126 | optimizer.zero_grad() 127 | loss_train.backward(retain_graph=True) 128 | optimizer.step() 129 | if (epoch + 1)%100 == 0: 130 | print('Epoch: {:04d}'.format(epoch + 1), 131 | 'loss_train: {:.4f}'.format(loss_train.item()), 132 | 'acc_train: {:.4f}'.format(acc_train.item()), 133 | 'acc_val: {:.4f}'.format(acc_val.item())) 134 | 135 | 136 | label = data.y.to('cpu').detach().numpy() 137 | out = best_output.max(1)[1].to('cpu').detach().numpy() 138 | acc_test = accuracy_score(out[test_mask], label[test_mask]) 139 | f1 = f1_score(out[test_mask], label[test_mask], average='macro') 140 | precision = precision_score(out[test_mask], label[test_mask], average='macro') 141 | recall = recall_score(out[test_mask], label[test_mask], average='macro') 142 | print('acc_test {:.4f}'.format(acc_test), 143 | 'f1_test: {:.4f}'.format(f1.item()), 144 | 'precision_test: {:.4f}'.format(precision.item()), 145 | 'recall_test: {:.4f}'.format(recall.item())) 146 | results = results + best_output 147 | results_out = results.max(1)[1].to('cpu').detach().numpy() 148 | acc_test = accuracy_score(results_out[test_mask], label[test_mask]) 149 | f1 = f1_score(results_out[test_mask], label[test_mask], average='macro') 150 | precision = precision_score(results_out[test_mask], label[test_mask], average='macro') 151 | recall = recall_score(results_out[test_mask], label[test_mask], average='macro') 152 | 153 | return acc_test, precision, recall, f1 154 | 155 | 156 | 157 | 158 | if __name__ == "__main__": 159 | 160 | t = time.time() 161 | acc_list = [] 162 | precision_list = [] 163 | recall_list = [] 164 | f1_list = [] 165 | 166 | for i, seed in enumerate(json.loads(config.random_seed)): 167 | print('traning {}th round'.format(i + 1)) 168 | acc, precision, recall, f1 = main(seed) 169 | acc_list.append(acc * 100) 170 | precision_list.append(precision * 100) 171 | recall_list.append(recall * 100) 172 | f1_list.append(f1 * 100) 173 | print('Round:{:04d}'.format(i + 1), 174 | 'acc_test {:.4f}'.format(acc), 175 | 'f1_test: {:.4f}'.format(f1), 176 | 'precision_test: {:.4f}'.format(precision), 177 | 'recall_test: {:.4f}'.format(recall)) 178 | print('acc: {:.2f} + {:.2f}'.format(np.array(acc_list).mean(), np.std(acc_list))) 179 | print('precision: {:.2f} + {:.2f}'.format(np.array(precision_list).mean(), np.std(precision_list))) 180 | print('recall: {:.2f} + {:.2f}'.format(np.array(recall_list).mean(), np.std(recall_list))) 181 | print('f1: {:.2f} + {:.2f}'.format(np.array(f1_list).mean(), np.std(f1_list))) 182 | print('total time:', time.time() - t) -------------------------------------------------------------------------------- /models.py: -------------------------------------------------------------------------------- 1 | import torch 2 | from torch import nn 3 | from torch_geometric.nn import RGCNConv,GCN2Conv,GCNConv,GATConv,SAGEConv,SGConv,RGATConv,GINConv 4 | import torch.nn.functional as F 5 | 6 | 7 | 8 | class RGCN(nn.Module): 9 | def __init__(self, embedding_dimension=16, hidden_dimension=128, out_dim=3, relation_num=2, dropout=0.3): 10 | super(RGCN, self).__init__() 11 | self.dropout = dropout 12 | self.linear_relu_input = nn.Sequential( 13 | nn.Linear(embedding_dimension, hidden_dimension), 14 | nn.LeakyReLU() 15 | ) 16 | self.rgcn1 = RGCNConv(hidden_dimension, hidden_dimension, num_relations=relation_num) 17 | self.rgcn2 = RGCNConv(hidden_dimension, hidden_dimension, num_relations=relation_num) 18 | 19 | self.linear_relu_output1 = nn.Sequential( 20 | nn.Linear(hidden_dimension, hidden_dimension), 21 | nn.LeakyReLU() 22 | ) 23 | self.linear_output2 = nn.Linear(hidden_dimension, out_dim) 24 | 25 | def forward(self, feature, edge_index, edge_type): 26 | x = self.linear_relu_input(feature.to(torch.float32)) 27 | x = self.rgcn1(x, edge_index, edge_type) 28 | x = F.dropout(x, p=self.dropout, training=self.training) 29 | x = self.rgcn1(x, edge_index, edge_type) 30 | # x = self.linear_relu_output1(x) 31 | x = self.linear_output2(x) 32 | 33 | return x 34 | 35 | 36 | 37 | class GAT(nn.Module): 38 | def __init__(self, embedding_dimension=16, hidden_dimension=128, out_dim=3, relation_num=2, dropout=0.3): 39 | super(GAT, self).__init__() 40 | self.dropout = dropout 41 | 42 | self.linear_relu_input = nn.Sequential( 43 | nn.Linear(embedding_dimension, hidden_dimension), 44 | nn.LeakyReLU() 45 | ) 46 | 47 | self.gat1 = GATConv(hidden_dimension, int(hidden_dimension / 4), heads=4) 48 | self.gat2 = GATConv(hidden_dimension, hidden_dimension) 49 | 50 | self.linear_relu_output1 = nn.Sequential( 51 | nn.Linear(hidden_dimension, hidden_dimension), 52 | nn.LeakyReLU() 53 | ) 54 | self.linear_output2 = nn.Linear(hidden_dimension, out_dim) 55 | 56 | def forward(self, feature, edge_index, edge_type): 57 | x = self.linear_relu_input(feature.to(torch.float32)) 58 | x = self.gat1(x, edge_index) 59 | x = F.dropout(x, p=self.dropout, training=self.training) 60 | x = self.gat2(x, edge_index) 61 | # x = self.linear_relu_output1(x) 62 | x = self.linear_output2(x) 63 | 64 | return x 65 | 66 | 67 | 68 | class SGC(nn.Module): 69 | def __init__(self, embedding_dimension=16, hidden_dimension=128, out_dim=3, relation_num=2, dropout=0.3): 70 | super(SGC, self).__init__() 71 | self.dropout = dropout 72 | 73 | self.linear_relu_input = nn.Sequential( 74 | nn.Linear(embedding_dimension, hidden_dimension), 75 | nn.LeakyReLU() 76 | ) 77 | 78 | self.gcn1 = SGConv(hidden_dimension, hidden_dimension) 79 | self.gcn2 = SGConv(hidden_dimension, hidden_dimension) 80 | 81 | self.linear_relu_output1 = nn.Sequential( 82 | nn.Linear(hidden_dimension, hidden_dimension), 83 | nn.LeakyReLU() 84 | ) 85 | self.linear_output2 = nn.Linear(hidden_dimension, out_dim) 86 | 87 | def forward(self, feature, edge_index, edge_type): 88 | x = self.linear_relu_input(feature.to(torch.float32)) 89 | x = self.gcn1(x, edge_index) 90 | x = F.dropout(x, p=self.dropout, training=self.training) 91 | x = self.gcn2(x, edge_index) 92 | # x = self.linear_relu_output1(x) 93 | x = self.linear_output2(x) 94 | 95 | return x 96 | 97 | 98 | 99 | class GCN(nn.Module): 100 | def __init__(self, embedding_dimension=16, hidden_dimension=128, out_dim=3, relation_num=2, dropout=0.3): 101 | super(GCN, self).__init__() 102 | self.dropout = dropout 103 | 104 | self.linear_relu_input = nn.Sequential( 105 | nn.Linear(embedding_dimension, hidden_dimension), 106 | nn.LeakyReLU() 107 | ) 108 | 109 | self.gcn1 = GCNConv(hidden_dimension, hidden_dimension) 110 | self.gcn2 = GCNConv(hidden_dimension, hidden_dimension) 111 | 112 | self.linear_relu_output1 = nn.Sequential( 113 | nn.Linear(hidden_dimension, hidden_dimension), 114 | nn.LeakyReLU() 115 | ) 116 | self.linear_output2 = nn.Linear(hidden_dimension, out_dim) 117 | 118 | def forward(self, feature, edge_index, edge_type): 119 | x = self.linear_relu_input(feature.to(torch.float32)) 120 | x = self.gcn1(x, edge_index) 121 | x = F.dropout(x, p=self.dropout, training=self.training) 122 | x = self.gcn2(x, edge_index) 123 | # x = self.linear_relu_output1(x) 124 | x = self.linear_output2(x) 125 | 126 | return x 127 | 128 | 129 | 130 | class SAGE(nn.Module): 131 | def __init__(self, embedding_dimension=16, hidden_dimension=128, out_dim=3, relation_num=2, dropout=0.3): 132 | super(SAGE, self).__init__() 133 | self.dropout = dropout 134 | 135 | self.linear_relu_input = nn.Sequential( 136 | nn.Linear(embedding_dimension, hidden_dimension), 137 | nn.LeakyReLU() 138 | ) 139 | 140 | self.sage1 = SAGEConv(hidden_dimension, hidden_dimension) 141 | self.sage2 = SAGEConv(hidden_dimension, hidden_dimension) 142 | 143 | self.linear_relu_output1 = nn.Sequential( 144 | nn.Linear(hidden_dimension, hidden_dimension), 145 | nn.LeakyReLU() 146 | ) 147 | self.linear_output2 = nn.Linear(hidden_dimension, out_dim) 148 | 149 | def forward(self, feature, edge_index, edge_type): 150 | x = self.linear_relu_input(feature.to(torch.float32)) 151 | x = self.sage1(x, edge_index) 152 | x = F.dropout(x, p=self.dropout, training=self.training) 153 | x = self.sage2(x, edge_index) 154 | # x = self.linear_relu_output1(x) 155 | x = self.linear_output2(x) 156 | 157 | return x 158 | 159 | 160 | class GIN(nn.Module): 161 | def __init__(self, embedding_dimension=16, hidden_dimension=128, out_dim=3, relation_num=2, dropout=0.3): 162 | super(GIN, self).__init__() 163 | self.dropout = dropout 164 | 165 | self.linear_relu_input = nn.Sequential( 166 | nn.Linear(embedding_dimension, hidden_dimension), 167 | nn.LeakyReLU() 168 | ) 169 | 170 | self.gin1 = GINConv(nn=nn.Linear(hidden_dimension, hidden_dimension), eps=1e-9) 171 | self.gin2 = GINConv(nn=nn.Linear(hidden_dimension, hidden_dimension), eps=1e-13) 172 | 173 | self.linear_relu_output1 = nn.Sequential( 174 | nn.Linear(hidden_dimension, hidden_dimension), 175 | nn.LeakyReLU() 176 | ) 177 | self.linear_output2 = nn.Linear(hidden_dimension, out_dim) 178 | 179 | def forward(self, feature, edge_index, edge_type): 180 | x = self.linear_relu_input(feature.to(torch.float32)) 181 | x = self.gin1(x, edge_index) 182 | x = F.dropout(x, p=self.dropout, training=self.training) 183 | x = self.gin2(x, edge_index) 184 | # x = self.linear_relu_output1(x) 185 | x = self.linear_output2(x) 186 | 187 | return x 188 | 189 | 190 | 191 | class GCN2(nn.Module): 192 | def __init__(self, embedding_dimension=16, hidden_dimension=128, out_dim=3, relation_num=2, dropout=0.3): 193 | super(GCN2, self).__init__() 194 | self.dropout = dropout 195 | 196 | self.linear_relu_input = nn.Sequential( 197 | nn.Linear(embedding_dimension, hidden_dimension), 198 | nn.LeakyReLU() 199 | ) 200 | 201 | self.gcn1 = GCN2Conv(hidden_dimension, 0.2, add_self_loops=False) 202 | self.gcn2 = GCN2Conv(hidden_dimension, 0.2, add_self_loops=False) 203 | 204 | self.linear_relu_output1 = nn.Sequential( 205 | nn.Linear(hidden_dimension, hidden_dimension), 206 | nn.LeakyReLU() 207 | ) 208 | self.linear_output2 = nn.Linear(hidden_dimension, out_dim) 209 | 210 | def forward(self, feature, edge_index, edge_type): 211 | x = self.linear_relu_input(feature.to(torch.float32)) 212 | h = self.gcn1(x, x, edge_index) 213 | h = F.dropout(h, p=self.dropout, training=self.training) 214 | h = self.gcn2(h, x, edge_index) 215 | # x = self.linear_relu_output1(x) 216 | h = self.linear_output2(h) 217 | 218 | return h --------------------------------------------------------------------------------