├── .gitignore ├── README.md ├── deep_walk └── model.py ├── dgl └── DGL.py ├── gat └── gat.py ├── gcn └── gcn.py └── node_2_vec └── node_2_vec.py /.gitignore: -------------------------------------------------------------------------------- 1 | # Created by .ignore support plugin (hsz.mobi) 2 | ### Example user template template 3 | ### Example user template 4 | 5 | # IntelliJ project files 6 | .idea 7 | *.iml 8 | out 9 | gen 10 | *.model -------------------------------------------------------------------------------- /README.md: -------------------------------------------------------------------------------- 1 | ## 图网络 -------------------------------------------------------------------------------- /deep_walk/model.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | """ 4 | ================================================== 5 | File Name: deep_walk.py 6 | email: songdongdong@weidian.com 7 | Author : songdongdong 8 | date: 2021/3/8 11:27 9 | Description : deepwalk https://arxiv.org/pdf/1403.6652.pdf 10 | https://github.com/sjyttkl/recbyhand 11 | ================================================== 12 | """ 13 | 14 | import networkx as nx 15 | import numpy as np 16 | from tqdm import tqdm 17 | from gensim.models import word2vec 18 | 19 | 20 | # 一次游走,输入图g,起始即诶单与序列长度 21 | def walkOneTime(g,start_node ,walk_length): 22 | walk = [str(start_node)] # 初始化游走序列 23 | for _ in range(walk_length): #最大长度范围内进行采样 24 | current_node = int(walk[-1]) 25 | successors = list(g.successors(current_node)) # g.successors : 获取当前 节点的后继邻居 26 | if len(successors)>0: 27 | next_node = np.random.choice(successors,1) 28 | walk.extend([str(n) for n in next_node]) 29 | return walk 30 | #进行多次游走,输入图 g,每一次的游走长度与游走次数,返回得到的序列 31 | def getDeepwalkSeqs(g,walk_length,num_walks): 32 | seqs= [] 33 | for _ in tqdm(range(num_walks)): 34 | start_node = np.random.choice(g.nodes) 35 | w = walkOneTime(g,start_node,walk_length) 36 | seqs.append(w) 37 | return seqs 38 | 39 | def deepwalk(g,dimensions=10,walk_length=80,num_walks = 10,min_count =3): 40 | seqs = getDeepwalkSeqs(g , walk_length = walk_length,num_walks= num_walks) 41 | print(seqs) 42 | model = word2vec.Word2Vec(seqs,size=dimensions,min_count=min_count) 43 | return model 44 | 45 | if __name__ == '__main__': 46 | g = nx.fast_gnp_random_graph(n = 100,p=0.5,directed=True) #快速随机生成一个有向图 47 | model = deepwalk(g,dimensions =10,walk_length=20,num_walks=100,min_count = 3) 48 | print(model.wv.most_similar('2',topn=3)) #观察与节点2最相近的三个节点 49 | model.wv.save_word2vec_format("embedding.wv") # 可以吧emd存储下来以便 下游任务使用 50 | model.save('m.model') 51 | 52 | -------------------------------------------------------------------------------- /dgl/DGL.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | """ 4 | ================================================== 5 | File Name: DGL.py 6 | email: songdongdong@weidian.com 7 | Author : songdongdong 8 | date: 2021/3/9 23:57 9 | Description : 图学习框架 https://github.com/dmlc/dgl 10 | https://docs.dgl.ai/guide_cn/index.html 11 | ================================================== 12 | """ -------------------------------------------------------------------------------- /gat/gat.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | """ 4 | ================================================== 5 | File Name: gat.py 6 | email: songdongdong@weidian.com 7 | Author : songdongdong 8 | date: 2021/3/9 17:58 9 | Description : graph attetnion networks,加入注意力机制的图神经网络,其消息传递的权重是通过注意力机制得到的 10 | ================================================== 11 | """ 12 | import torch 13 | import torch.nn as nn 14 | import torch.nn.functional as F 15 | from dgl.data import CoraGraphDataset 16 | from dgl.nn import GATConv 17 | 18 | class GAT( nn.Module ): 19 | def __init__(self, 20 | g, #DGL的图对象 21 | n_layers, #层数 22 | in_feats, #输入特征维度 23 | n_hidden, #隐层特征维度 24 | n_classes, #类别数 25 | heads, #多头注意力的数量 26 | activation, #激活函数 27 | in_drop, #输入特征的Dropout比例 28 | at_drop, #注意力特征的Dropout比例 29 | negative_slope, #注意力计算中Leaky ReLU的a值 30 | ): 31 | super( GAT, self ).__init__( ) 32 | self.g = g 33 | self.num_layers = n_layers 34 | self.activation = activation 35 | 36 | self.gat_layers = nn.ModuleList() 37 | 38 | self.gat_layers.append( GATConv( 39 | in_feats, n_hidden, heads[0], 40 | in_drop, at_drop, negative_slope, activation=self.activation ) ) 41 | 42 | for l in range(1, n_layers): 43 | self.gat_layers.append( GATConv( 44 | n_hidden * heads[l-1], n_hidden, heads[l], 45 | in_drop, at_drop, negative_slope, activation=self.activation)) 46 | 47 | self.gat_layers.append( GATConv( 48 | n_hidden * heads[-2], n_classes, heads[-1], 49 | in_drop, at_drop, negative_slope, activation=None) ) 50 | 51 | def forward( self, inputs ): 52 | h = inputs 53 | for l in range( self.num_layers ): 54 | h = self.gat_layers[l]( self.g, h ).flatten( 1 ) 55 | logits = self.gat_layers[-1]( self.g, h ).mean( 1 ) 56 | return logits 57 | 58 | 59 | def evaluate( model, features, labels, mask ): 60 | model.eval() 61 | with torch.no_grad(): 62 | logits = model(features) 63 | logits = logits[mask] 64 | labels = labels[mask] 65 | _, indices = torch.max(logits, dim=1) 66 | correct = torch.sum(indices == labels) 67 | return correct.item() * 1.0 / len(labels) 68 | 69 | def train( n_epochs = 100, 70 | lr = 5e-3, 71 | weight_decay = 5e-4, 72 | n_hidden = 16, 73 | n_layers = 1, 74 | activation = F.elu, 75 | n_heads = 3, #中间层多头注意力的数量 76 | n_out_heads = 1, #输出层多头注意力的数量 77 | feat_drop = 0.6, 78 | attn_drop = 0.6, 79 | negative_slope = 0.2): 80 | data = CoraGraphDataset() 81 | g=data[0] 82 | features = g.ndata['feat'] 83 | labels = g.ndata['label'] 84 | train_mask = g.ndata['train_mask'] 85 | val_mask = g.ndata['val_mask'] 86 | test_mask = g.ndata['test_mask'] 87 | in_feats = features.shape[1] 88 | n_classes = data.num_classes 89 | heads = ([n_heads] * n_layers) + [n_out_heads] 90 | model = GAT( g, 91 | n_layers, 92 | in_feats, 93 | n_hidden, 94 | n_classes, 95 | heads, 96 | activation, 97 | feat_drop, 98 | attn_drop, 99 | negative_slope 100 | ) 101 | 102 | loss_fcn = torch.nn.CrossEntropyLoss() 103 | optimizer = torch.optim.Adam( model.parameters(), 104 | lr = lr, 105 | weight_decay = weight_decay) 106 | for epoch in range( n_epochs ): 107 | model.train() 108 | logits = model( features ) 109 | loss = loss_fcn( logits[ train_mask ], labels[ train_mask ] ) 110 | optimizer.zero_grad() 111 | loss.backward() 112 | optimizer.step() 113 | acc = evaluate( model, features, labels, val_mask ) 114 | print("Epoch {} | Loss {:.4f} | Accuracy {:.4f} " 115 | .format(epoch, loss.item(), acc )) 116 | print() 117 | acc = evaluate(model, features, labels, test_mask) 118 | print("Test accuracy {:.2%}".format(acc)) 119 | 120 | if __name__ == '__main__': 121 | train() -------------------------------------------------------------------------------- /gcn/gcn.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | """ 4 | ================================================== 5 | File Name: gcn.py 6 | email: songdongdong@weidian.com 7 | Author : songdongdong 8 | date: 2021/3/8 15:44 9 | Description : 10 | gcn图卷积网络,全称:graph convolutional networks 图卷积网络,提出于2017年,GCN的出现标志这图神经网络的出现,要说深度学习最常用的网络结构就是 11 | CNN,RNN。GCN与CNN不仅名字相似,其实理解起来也很类似,都是特征提取器。不同的是,CNN提取的张量数据特征,而GCN提出的是图数据特征 12 | 13 | ================================================== 14 | """ 15 | 16 | import torch 17 | import torch.nn as nn 18 | import torch.nn.functional as F 19 | 20 | from dgl.nn.pytorch import GraphConv # 这是一个图 DGL库 21 | from dgl.data import CoraGraphDataset 22 | 23 | 24 | class GCN(nn.Module): 25 | def __init__(self, g, in_feats, 26 | n_hidden, 27 | n_classes, 28 | n_layers, 29 | activation, 30 | dropout): 31 | super(GCN, self).__init__() 32 | self.g = g 33 | self.layers = nn.ModuleList() 34 | # 输入层 35 | self.layers.append(GraphConv(in_feats, n_hidden, activation=activation)) 36 | # 37 | for i in range(n_layers - 1): 38 | self.layers.append(GraphConv(n_hidden, n_hidden, activation=activation)) 39 | # 输出层: 40 | self.layers.append(GraphConv(n_hidden, n_classes)) 41 | self.dropout = nn.Dropout(p=dropout) 42 | 43 | def forward(self, features): 44 | h = features 45 | for i, layer in enumerate(self.layers): 46 | if i!=0: 47 | h = self.dropout(h) 48 | h = layer(self.g,h) 49 | return h 50 | # @torch.no_grad() 51 | def evalutate(model,features,labels,mask): 52 | model.eval() 53 | with torch.no_grad(): 54 | logits = model(features) 55 | # print(logits) 56 | logits = logits[mask] 57 | labels = labels[mask] 58 | _,indices = torch.max(logits,dim=1) 59 | correct = torch.sum(indices == labels) 60 | return correct.item() * 1.0 / len(labels) 61 | 62 | def train( n_epochs=100,lr=1e-2, 63 | weight_decay=5e-4,n_hidden=16, 64 | n_layers=1,activation=F.relu, 65 | dropout=0.5): 66 | data = CoraGraphDataset() 67 | print(data) 68 | g = data[0] 69 | features = g.ndata['feat'] 70 | labels = g.ndata['label'] 71 | train_mask = g.ndata['train_mask'] #是true,fasle。其实就是样本提取 72 | val_mask = g.ndata['val_mask'] 73 | test_mask = g.ndata['test_mask'] 74 | in_feats = features.shape[1] 75 | n_classes = data.num_classes 76 | 77 | model = GCN(g,in_feats,n_hidden,n_classes,n_layers,activation,dropout) 78 | 79 | loss_fcn = torch.nn.CrossEntropyLoss() 80 | optimizer = torch.optim.Adam(model.parameters(),lr=lr,weight_decay=weight_decay) 81 | 82 | for epoch in range(n_epochs): 83 | model.train() 84 | logits =model(features) 85 | loss = loss_fcn(logits[train_mask],labels[train_mask]) 86 | optimizer.zero_grad() 87 | loss.backward() 88 | optimizer.step() 89 | acc =evalutate(model,features,labels,val_mask) 90 | print("Epoch {} | Locc {:.4f} | accuracy {:.4f}".format(epoch,loss.item(),acc)) 91 | print() 92 | acc =evalutate(model,features,labels,test_mask) 93 | print("Test accuracy {:.2%}".format(acc)) 94 | 95 | if __name__ == "__main__": 96 | train() 97 | 98 | -------------------------------------------------------------------------------- /node_2_vec/node_2_vec.py: -------------------------------------------------------------------------------- 1 | # -*- coding: utf-8 -*- 2 | 3 | """ 4 | ================================================== 5 | File Name: node2vec.py 6 | email: songdongdong@weidian.com 7 | Author : songdongdong 8 | date: 2021/3/8 13:51 9 | Description : https://arxiv.org/abs/1607.00653 10 | node2vec 在2016年发布的,与deepwalk的区别就是控制了游走方向的参数.按照Deepwalk的思想, 11 | 所有邻居节点游走的概率都是相等的。而Node2vec可以通过调整方向的参数来控制模型更倾向宽带有限的游走还是深度优先的游走 12 | https://www.bilibili.com/video/BV15o4y1R7nC?from=search&seid=6653090908098469268 13 | https://github.com/dmlc/dgl/blob/master/examples/tensorflow/gat/gat.py 14 | ================================================== 15 | """ 16 | 17 | import networkx as nx 18 | from node2vec import Node2Vec #非常简单的node2vec api,基于 networkx与gensim进行的封装 19 | 20 | graph = nx.fast_gnp_random_graph(n =100,p =0.5) #快速 随机生成一个无向图 21 | node2vec = Node2Vec(graph,dimensions=64,walk_length=30,num_walks=100,p=0.3,q=0.7,workers=4) #初始化模型 这里的workers是设置同时游走的线程数 22 | 23 | model = node2vec.fit() #训练模型 24 | print(model.wv.most_similar("2",topn=3)) #观察与节点2最相近的三个节点 25 | 26 | --------------------------------------------------------------------------------