├── .gitignore
├── README.md
├── deep_walk
    └── model.py
├── dgl
    └── DGL.py
├── gat
    └── gat.py
├── gcn
    └── gcn.py
└── node_2_vec
    └── node_2_vec.py


/.gitignore:
--------------------------------------------------------------------------------
 1 | # Created by .ignore support plugin (hsz.mobi)
 2 | ### Example user template template
 3 | ### Example user template
 4 | 
 5 | # IntelliJ project files
 6 | .idea
 7 | *.iml
 8 | out
 9 | gen
10 | *.model


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
1 | ## 图网络


--------------------------------------------------------------------------------
/deep_walk/model.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | """
 4 | ==================================================
 5 |    File Name：     deep_walk.py
 6 |    email:         songdongdong@weidian.com
 7 |    Author :       songdongdong
 8 |    date：          2021/3/8 11:27
 9 |    Description :  deepwalk https://arxiv.org/pdf/1403.6652.pdf
10 |    https://github.com/sjyttkl/recbyhand
11 | ==================================================
12 | """
13 | 
14 | import networkx as nx
15 | import  numpy as np
16 | from tqdm import tqdm
17 | from gensim.models import word2vec
18 | 
19 | 
20 | # 一次游走，输入图g，起始即诶单与序列长度
21 | def walkOneTime(g,start_node ,walk_length):
22 |     walk = [str(start_node)] # 初始化游走序列
23 |     for _ in range(walk_length): #最大长度范围内进行采样
24 |         current_node = int(walk[-1])
25 |         successors = list(g.successors(current_node)) # g.successors : 获取当前 节点的后继邻居
26 |         if len(successors)>0:
27 |             next_node = np.random.choice(successors,1)
28 |             walk.extend([str(n) for n in next_node])
29 |     return walk
30 | #进行多次游走，输入图 g,每一次的游走长度与游走次数，返回得到的序列
31 | def getDeepwalkSeqs(g,walk_length,num_walks):
32 |     seqs= []
33 |     for _ in tqdm(range(num_walks)):
34 |         start_node = np.random.choice(g.nodes)
35 |         w = walkOneTime(g,start_node,walk_length)
36 |         seqs.append(w)
37 |     return seqs
38 | 
39 | def deepwalk(g,dimensions=10,walk_length=80,num_walks = 10,min_count =3):
40 |     seqs = getDeepwalkSeqs(g , walk_length = walk_length,num_walks=  num_walks)
41 |     print(seqs)
42 |     model = word2vec.Word2Vec(seqs,size=dimensions,min_count=min_count)
43 |     return  model
44 | 
45 | if __name__ == '__main__':
46 |     g = nx.fast_gnp_random_graph(n = 100,p=0.5,directed=True) #快速随机生成一个有向图
47 |     model =  deepwalk(g,dimensions =10,walk_length=20,num_walks=100,min_count = 3)
48 |     print(model.wv.most_similar('2',topn=3)) #观察与节点2最相近的三个节点
49 |     model.wv.save_word2vec_format("embedding.wv") # 可以吧emd存储下来以便 下游任务使用
50 |     model.save('m.model')
51 | 
52 | 


--------------------------------------------------------------------------------
/dgl/DGL.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | """
 4 | ==================================================
 5 |    File Name：     DGL.py
 6 |    email:         songdongdong@weidian.com
 7 |    Author :       songdongdong
 8 |    date：          2021/3/9 23:57
 9 |    Description :   图学习框架 https://github.com/dmlc/dgl
10 |                             https://docs.dgl.ai/guide_cn/index.html
11 | ==================================================
12 | """


--------------------------------------------------------------------------------
/gat/gat.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | 
  3 | """
  4 | ==================================================
  5 |    File Name：     gat.py
  6 |    email:         songdongdong@weidian.com
  7 |    Author :       songdongdong
  8 |    date：          2021/3/9 17:58
  9 |    Description :   graph attetnion networks，加入注意力机制的图神经网络，其消息传递的权重是通过注意力机制得到的
 10 | ==================================================
 11 | """
 12 | import torch
 13 | import torch.nn as nn
 14 | import torch.nn.functional as F
 15 | from dgl.data import CoraGraphDataset
 16 | from dgl.nn import GATConv
 17 | 
 18 | class GAT( nn.Module ):
 19 |     def __init__(self,
 20 |                  g, #DGL的图对象
 21 |                  n_layers, #层数
 22 |                  in_feats, #输入特征维度
 23 |                  n_hidden, #隐层特征维度
 24 |                  n_classes, #类别数
 25 |                  heads, #多头注意力的数量
 26 |                  activation, #激活函数
 27 |                  in_drop, #输入特征的Dropout比例
 28 |                  at_drop, #注意力特征的Dropout比例
 29 |                  negative_slope, #注意力计算中Leaky ReLU的a值
 30 |                  ):
 31 |         super( GAT, self ).__init__( )
 32 |         self.g = g
 33 |         self.num_layers = n_layers
 34 |         self.activation = activation
 35 | 
 36 |         self.gat_layers = nn.ModuleList()
 37 | 
 38 |         self.gat_layers.append( GATConv(
 39 |             in_feats, n_hidden, heads[0],
 40 |             in_drop, at_drop, negative_slope, activation=self.activation ) )
 41 | 
 42 |         for l in range(1, n_layers):
 43 |             self.gat_layers.append( GATConv(
 44 |                 n_hidden * heads[l-1], n_hidden, heads[l],
 45 |                 in_drop, at_drop, negative_slope, activation=self.activation))
 46 | 
 47 |         self.gat_layers.append( GATConv(
 48 |             n_hidden * heads[-2], n_classes, heads[-1],
 49 |             in_drop, at_drop, negative_slope, activation=None) )
 50 | 
 51 |     def forward( self, inputs ):
 52 |         h = inputs
 53 |         for l in range( self.num_layers ):
 54 |             h = self.gat_layers[l]( self.g, h ).flatten( 1 )
 55 |         logits = self.gat_layers[-1]( self.g, h ).mean( 1 )
 56 |         return logits
 57 | 
 58 | 
 59 | def evaluate( model, features, labels, mask ):
 60 |     model.eval()
 61 |     with torch.no_grad():
 62 |         logits = model(features)
 63 |         logits = logits[mask]
 64 |         labels = labels[mask]
 65 |         _, indices = torch.max(logits, dim=1)
 66 |         correct = torch.sum(indices == labels)
 67 |         return correct.item() * 1.0 / len(labels)
 68 | 
 69 | def train( n_epochs = 100,
 70 |            lr = 5e-3,
 71 |            weight_decay = 5e-4,
 72 |            n_hidden = 16,
 73 |            n_layers = 1,
 74 |            activation = F.elu,
 75 |            n_heads = 3, #中间层多头注意力的数量
 76 |            n_out_heads = 1, #输出层多头注意力的数量
 77 |            feat_drop = 0.6,
 78 |            attn_drop = 0.6,
 79 |            negative_slope = 0.2):
 80 |     data = CoraGraphDataset()
 81 |     g=data[0]
 82 |     features = g.ndata['feat']
 83 |     labels = g.ndata['label']
 84 |     train_mask = g.ndata['train_mask']
 85 |     val_mask = g.ndata['val_mask']
 86 |     test_mask = g.ndata['test_mask']
 87 |     in_feats = features.shape[1]
 88 |     n_classes = data.num_classes
 89 |     heads = ([n_heads] * n_layers) + [n_out_heads]
 90 |     model = GAT( g,
 91 |                  n_layers,
 92 |                  in_feats,
 93 |                  n_hidden,
 94 |                  n_classes,
 95 |                  heads,
 96 |                  activation,
 97 |                  feat_drop,
 98 |                  attn_drop,
 99 |                  negative_slope
100 |                  )
101 | 
102 |     loss_fcn = torch.nn.CrossEntropyLoss()
103 |     optimizer = torch.optim.Adam( model.parameters(),
104 |                                  lr = lr,
105 |                                  weight_decay = weight_decay)
106 |     for epoch in range( n_epochs ):
107 |         model.train()
108 |         logits = model( features )
109 |         loss = loss_fcn( logits[ train_mask ], labels[ train_mask ] )
110 |         optimizer.zero_grad()
111 |         loss.backward()
112 |         optimizer.step()
113 |         acc = evaluate( model, features, labels, val_mask )
114 |         print("Epoch {} | Loss {:.4f} | Accuracy {:.4f} "
115 |               .format(epoch, loss.item(), acc ))
116 |     print()
117 |     acc = evaluate(model, features, labels, test_mask)
118 |     print("Test accuracy {:.2%}".format(acc))
119 | 
120 | if __name__ == '__main__':
121 |     train()


--------------------------------------------------------------------------------
/gcn/gcn.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | """
 4 | ==================================================
 5 |    File Name：     gcn.py
 6 |    email:         songdongdong@weidian.com
 7 |    Author :       songdongdong
 8 |    date：          2021/3/8 15:44
 9 |    Description :
10 |    gcn图卷积网络，全称：graph convolutional networks 图卷积网络，提出于2017年，GCN的出现标志这图神经网络的出现，要说深度学习最常用的网络结构就是
11 |         CNN，RNN。GCN与CNN不仅名字相似，其实理解起来也很类似，都是特征提取器。不同的是，CNN提取的张量数据特征，而GCN提出的是图数据特征
12 | 
13 | ==================================================
14 | """
15 | 
16 | import torch
17 | import torch.nn as nn
18 | import torch.nn.functional as F
19 | 
20 | from dgl.nn.pytorch import GraphConv  # 这是一个图 DGL库
21 | from dgl.data import CoraGraphDataset
22 | 
23 | 
24 | class GCN(nn.Module):
25 |     def __init__(self, g, in_feats,
26 |                  n_hidden,
27 |                  n_classes,
28 |                  n_layers,
29 |                  activation,
30 |                  dropout):
31 |         super(GCN, self).__init__()
32 |         self.g = g
33 |         self.layers = nn.ModuleList()
34 |         # 输入层
35 |         self.layers.append(GraphConv(in_feats, n_hidden, activation=activation))
36 |         #
37 |         for i in range(n_layers - 1):
38 |             self.layers.append(GraphConv(n_hidden, n_hidden, activation=activation))
39 |         # 输出层：
40 |         self.layers.append(GraphConv(n_hidden, n_classes))
41 |         self.dropout = nn.Dropout(p=dropout)
42 | 
43 |     def forward(self, features):
44 |         h = features
45 |         for i, layer in enumerate(self.layers):
46 |             if i!=0:
47 |                 h = self.dropout(h)
48 |             h = layer(self.g,h)
49 |         return h
50 | # @torch.no_grad()
51 | def evalutate(model,features,labels,mask):
52 |     model.eval()
53 |     with torch.no_grad():
54 |         logits = model(features)
55 |         # print(logits)
56 |         logits = logits[mask]
57 |         labels = labels[mask]
58 |         _,indices  = torch.max(logits,dim=1)
59 |         correct = torch.sum(indices == labels)
60 |         return correct.item() * 1.0 / len(labels)
61 | 
62 | def train( n_epochs=100,lr=1e-2,
63 |            weight_decay=5e-4,n_hidden=16,
64 |            n_layers=1,activation=F.relu,
65 |            dropout=0.5):
66 |     data = CoraGraphDataset()
67 |     print(data)
68 |     g = data[0]
69 |     features = g.ndata['feat']
70 |     labels = g.ndata['label']
71 |     train_mask = g.ndata['train_mask'] #是true，fasle。其实就是样本提取
72 |     val_mask = g.ndata['val_mask']
73 |     test_mask = g.ndata['test_mask']
74 |     in_feats = features.shape[1]
75 |     n_classes = data.num_classes
76 | 
77 |     model  = GCN(g,in_feats,n_hidden,n_classes,n_layers,activation,dropout)
78 | 
79 |     loss_fcn = torch.nn.CrossEntropyLoss()
80 |     optimizer = torch.optim.Adam(model.parameters(),lr=lr,weight_decay=weight_decay)
81 | 
82 |     for epoch in range(n_epochs):
83 |         model.train()
84 |         logits =model(features)
85 |         loss = loss_fcn(logits[train_mask],labels[train_mask])
86 |         optimizer.zero_grad()
87 |         loss.backward()
88 |         optimizer.step()
89 |         acc =evalutate(model,features,labels,val_mask)
90 |         print("Epoch {} | Locc {:.4f} | accuracy {:.4f}".format(epoch,loss.item(),acc))
91 |     print()
92 |     acc =evalutate(model,features,labels,test_mask)
93 |     print("Test accuracy {:.2%}".format(acc))
94 | 
95 | if __name__ == "__main__":
96 |     train()
97 | 
98 | 


--------------------------------------------------------------------------------
/node_2_vec/node_2_vec.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | """
 4 | ==================================================
 5 |    File Name：     node2vec.py
 6 |    email:         songdongdong@weidian.com
 7 |    Author :       songdongdong
 8 |    date：          2021/3/8 13:51
 9 |    Description :   https://arxiv.org/abs/1607.00653
10 |    node2vec 在2016年发布的，与deepwalk的区别就是控制了游走方向的参数.按照Deepwalk的思想，
11 |    所有邻居节点游走的概率都是相等的。而Node2vec可以通过调整方向的参数来控制模型更倾向宽带有限的游走还是深度优先的游走
12 |    https://www.bilibili.com/video/BV15o4y1R7nC?from=search&seid=6653090908098469268
13 |    https://github.com/dmlc/dgl/blob/master/examples/tensorflow/gat/gat.py
14 | ==================================================
15 | """
16 | 
17 | import networkx as nx
18 | from node2vec import Node2Vec  #非常简单的node2vec api,基于 networkx与gensim进行的封装
19 | 
20 | graph = nx.fast_gnp_random_graph(n =100,p =0.5) #快速 随机生成一个无向图
21 | node2vec = Node2Vec(graph,dimensions=64,walk_length=30,num_walks=100,p=0.3,q=0.7,workers=4) #初始化模型 这里的workers是设置同时游走的线程数
22 | 
23 | model = node2vec.fit() #训练模型
24 | print(model.wv.most_similar("2",topn=3)) #观察与节点2最相近的三个节点
25 | 
26 | 


--------------------------------------------------------------------------------