├── GAE
    ├── __init__.py
    ├── __pycache__
    │   ├── layers.cpython-36.pyc
    │   ├── model.cpython-36.pyc
    │   ├── __init__.cpython-36.pyc
    │   ├── optimizer.cpython-36.pyc
    │   ├── train_model.cpython-36.pyc
    │   ├── preprocessing.cpython-36.pyc
    │   └── initialization.cpython-36.pyc
    ├── initialization.py
    ├── optimizer.py
    ├── layers.py
    ├── model.py
    ├── preprocessing.py
    └── train_model.py
├── OpenNE
    ├── __init__.py
    ├── __pycache__
    │   ├── RWR.cpython-36.pyc
    │   ├── gf.cpython-36.pyc
    │   ├── lap.cpython-36.pyc
    │   ├── graph.cpython-36.pyc
    │   ├── hope.cpython-36.pyc
    │   ├── line.cpython-36.pyc
    │   ├── sdne.cpython-36.pyc
    │   ├── __init__.cpython-36.pyc
    │   ├── classify.cpython-36.pyc
    │   ├── grarep.cpython-36.pyc
    │   ├── node2vec.cpython-36.pyc
    │   └── walker.cpython-36.pyc
    ├── node2vec.py
    ├── lap.py
    ├── gf.py
    ├── L3Hope.py
    ├── grarep.py
    ├── classify.py
    ├── RWR.py
    ├── graph.py
    ├── hope.py
    ├── walker.py
    ├── line.py
    └── sdne.py
├── DGI
    ├── models
    │   ├── __init__.py
    │   ├── __pycache__
    │   │   ├── dgi.cpython-36.pyc
    │   │   ├── logreg.cpython-36.pyc
    │   │   └── __init__.cpython-36.pyc
    │   ├── logreg.py
    │   └── dgi.py
    ├── layers
    │   ├── __init__.py
    │   ├── __pycache__
    │   │   ├── gcn.cpython-36.pyc
    │   │   ├── __init__.cpython-36.pyc
    │   │   ├── readout.cpython-36.pyc
    │   │   └── discriminator.cpython-36.pyc
    │   ├── readout.py
    │   ├── discriminator.py
    │   └── gcn.py
    └── utils
    │   ├── Laplacian.py
    │   └── process.py
├── README.md
├── evaluation.py
├── utils.py
├── main.py
└── embed_train.py


/GAE/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | 


--------------------------------------------------------------------------------
/OpenNE/__init__.py:
--------------------------------------------------------------------------------
1 | # -*- coding: utf-8 -*-
2 | 


--------------------------------------------------------------------------------
/DGI/models/__init__.py:
--------------------------------------------------------------------------------
1 | from .dgi import DGI
2 | from .logreg import LogReg
3 | 


--------------------------------------------------------------------------------
/DGI/layers/__init__.py:
--------------------------------------------------------------------------------
1 | from .gcn import GCN
2 | from .readout import AvgReadout
3 | from .discriminator import Discriminator
4 | 


--------------------------------------------------------------------------------
/GAE/__pycache__/layers.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mustafaCoskunAgu/SiGraC/HEAD/GAE/__pycache__/layers.cpython-36.pyc


--------------------------------------------------------------------------------
/GAE/__pycache__/model.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mustafaCoskunAgu/SiGraC/HEAD/GAE/__pycache__/model.cpython-36.pyc


--------------------------------------------------------------------------------
/OpenNE/__pycache__/RWR.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mustafaCoskunAgu/SiGraC/HEAD/OpenNE/__pycache__/RWR.cpython-36.pyc


--------------------------------------------------------------------------------
/OpenNE/__pycache__/gf.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mustafaCoskunAgu/SiGraC/HEAD/OpenNE/__pycache__/gf.cpython-36.pyc


--------------------------------------------------------------------------------
/OpenNE/__pycache__/lap.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mustafaCoskunAgu/SiGraC/HEAD/OpenNE/__pycache__/lap.cpython-36.pyc


--------------------------------------------------------------------------------
/GAE/__pycache__/__init__.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mustafaCoskunAgu/SiGraC/HEAD/GAE/__pycache__/__init__.cpython-36.pyc


--------------------------------------------------------------------------------
/OpenNE/__pycache__/graph.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mustafaCoskunAgu/SiGraC/HEAD/OpenNE/__pycache__/graph.cpython-36.pyc


--------------------------------------------------------------------------------
/OpenNE/__pycache__/hope.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mustafaCoskunAgu/SiGraC/HEAD/OpenNE/__pycache__/hope.cpython-36.pyc


--------------------------------------------------------------------------------
/OpenNE/__pycache__/line.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mustafaCoskunAgu/SiGraC/HEAD/OpenNE/__pycache__/line.cpython-36.pyc


--------------------------------------------------------------------------------
/OpenNE/__pycache__/sdne.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mustafaCoskunAgu/SiGraC/HEAD/OpenNE/__pycache__/sdne.cpython-36.pyc


--------------------------------------------------------------------------------
/DGI/layers/__pycache__/gcn.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mustafaCoskunAgu/SiGraC/HEAD/DGI/layers/__pycache__/gcn.cpython-36.pyc


--------------------------------------------------------------------------------
/DGI/models/__pycache__/dgi.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mustafaCoskunAgu/SiGraC/HEAD/DGI/models/__pycache__/dgi.cpython-36.pyc


--------------------------------------------------------------------------------
/GAE/__pycache__/optimizer.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mustafaCoskunAgu/SiGraC/HEAD/GAE/__pycache__/optimizer.cpython-36.pyc


--------------------------------------------------------------------------------
/GAE/__pycache__/train_model.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mustafaCoskunAgu/SiGraC/HEAD/GAE/__pycache__/train_model.cpython-36.pyc


--------------------------------------------------------------------------------
/OpenNE/__pycache__/__init__.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mustafaCoskunAgu/SiGraC/HEAD/OpenNE/__pycache__/__init__.cpython-36.pyc


--------------------------------------------------------------------------------
/OpenNE/__pycache__/classify.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mustafaCoskunAgu/SiGraC/HEAD/OpenNE/__pycache__/classify.cpython-36.pyc


--------------------------------------------------------------------------------
/OpenNE/__pycache__/grarep.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mustafaCoskunAgu/SiGraC/HEAD/OpenNE/__pycache__/grarep.cpython-36.pyc


--------------------------------------------------------------------------------
/OpenNE/__pycache__/node2vec.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mustafaCoskunAgu/SiGraC/HEAD/OpenNE/__pycache__/node2vec.cpython-36.pyc


--------------------------------------------------------------------------------
/OpenNE/__pycache__/walker.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mustafaCoskunAgu/SiGraC/HEAD/OpenNE/__pycache__/walker.cpython-36.pyc


--------------------------------------------------------------------------------
/DGI/models/__pycache__/logreg.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mustafaCoskunAgu/SiGraC/HEAD/DGI/models/__pycache__/logreg.cpython-36.pyc


--------------------------------------------------------------------------------
/GAE/__pycache__/preprocessing.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mustafaCoskunAgu/SiGraC/HEAD/GAE/__pycache__/preprocessing.cpython-36.pyc


--------------------------------------------------------------------------------
/DGI/layers/__pycache__/__init__.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mustafaCoskunAgu/SiGraC/HEAD/DGI/layers/__pycache__/__init__.cpython-36.pyc


--------------------------------------------------------------------------------
/DGI/layers/__pycache__/readout.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mustafaCoskunAgu/SiGraC/HEAD/DGI/layers/__pycache__/readout.cpython-36.pyc


--------------------------------------------------------------------------------
/DGI/models/__pycache__/__init__.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mustafaCoskunAgu/SiGraC/HEAD/DGI/models/__pycache__/__init__.cpython-36.pyc


--------------------------------------------------------------------------------
/GAE/__pycache__/initialization.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mustafaCoskunAgu/SiGraC/HEAD/GAE/__pycache__/initialization.cpython-36.pyc


--------------------------------------------------------------------------------
/DGI/layers/__pycache__/discriminator.cpython-36.pyc:
--------------------------------------------------------------------------------
https://raw.githubusercontent.com/mustafaCoskunAgu/SiGraC/HEAD/DGI/layers/__pycache__/discriminator.cpython-36.pyc


--------------------------------------------------------------------------------
/DGI/layers/readout.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | 
 4 | # Applies an average on seq, of shape (batch, nodes, features)
 5 | # While taking into account the masking of msk
 6 | class AvgReadout(nn.Module):
 7 |     def __init__(self):
 8 |         super(AvgReadout, self).__init__()
 9 | 
10 |     def forward(self, seq, msk):
11 |         if msk is None:
12 |             return torch.mean(seq, 1)
13 |         else:
14 |             msk = torch.unsqueeze(msk, -1)
15 |             return torch.sum(seq * msk, 1) / torch.sum(msk)
16 | 
17 | 


--------------------------------------------------------------------------------
/GAE/initialization.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | import numpy as np
 4 | import tensorflow as tf
 5 | 
 6 | 
 7 | def weight_variable_glorot(input_dim, output_dim, name=""):
 8 |     """Create a weight variable with Glorot & Bengio (AISTATS 2010)
 9 |     initialization.
10 |     """
11 |     init_range = np.sqrt(6.0 / (input_dim + output_dim))
12 |     initial = tf.random_uniform([input_dim, output_dim], minval=-init_range,
13 |                                 maxval=init_range, dtype=tf.float32)
14 |     return tf.Variable(initial, name=name)
15 | 


--------------------------------------------------------------------------------
/DGI/models/logreg.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | import torch.nn.functional as F
 4 | 
 5 | class LogReg(nn.Module):
 6 |     def __init__(self, ft_in, nb_classes):
 7 |         super(LogReg, self).__init__()
 8 |         self.fc = nn.Linear(ft_in, nb_classes)
 9 | 
10 |         for m in self.modules():
11 |             self.weights_init(m)
12 | 
13 |     def weights_init(self, m):
14 |         if isinstance(m, nn.Linear):
15 |             torch.nn.init.xavier_uniform_(m.weight.data)
16 |             if m.bias is not None:
17 |                 m.bias.data.fill_(0.0)
18 | 
19 |     def forward(self, seq):
20 |         ret = self.fc(seq)
21 |         return ret
22 | 
23 | 


--------------------------------------------------------------------------------
/DGI/utils/Laplacian.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | Created on Wed May 20 19:31:11 2020
 4 | 
 5 | @author: Secil
 6 | """
 7 | 
 8 | import numpy as np
 9 | import scipy.sparse as sp
10 | 
11 | def normalize_adj(adj):
12 |     adj = sp.coo_matrix(adj)
13 |     rowsum = np.array(adj.sum(1))
14 |     d_inv_sqrt = np.power(rowsum, -0.5).flatten()
15 |     d_inv_sqrt[np.isinf(d_inv_sqrt)] = 0.
16 |     d_mat_inv_sqrt = sp.diags(d_inv_sqrt)
17 |     return adj.dot(d_mat_inv_sqrt).transpose().dot(d_mat_inv_sqrt).tocoo()
18 | 
19 | A = [[0, 1, 2], 
20 |     [1, 0, 4],
21 |     [2, 4, 0]]
22 | 
23 | A2 = [[1, 1, 2], 
24 |     [1, 1, 4],
25 |     [2, 4, 1]]
26 | 
27 | L = normalize_adj(A)
28 | 
29 | LE = normalize_adj(A2)


--------------------------------------------------------------------------------
/DGI/models/dgi.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | from DGI.layers import GCN, AvgReadout, Discriminator
 4 | 
 5 | class DGI(nn.Module):
 6 |     def __init__(self, n_in, n_h, activation):
 7 |         super(DGI, self).__init__()
 8 |         self.gcn = GCN(n_in, n_h, activation)
 9 |         self.read = AvgReadout()
10 | 
11 |         self.sigm = nn.Sigmoid()
12 | 
13 |         self.disc = Discriminator(n_h)
14 | 
15 |     def forward(self, seq1, seq2, adj, sparse, msk, samp_bias1, samp_bias2):
16 |         h_1 = self.gcn(seq1, adj, sparse)
17 | 
18 |         c = self.read(h_1, msk)
19 |         c = self.sigm(c)
20 | 
21 |         h_2 = self.gcn(seq2, adj, sparse)
22 | 
23 |         ret = self.disc(c, h_1, h_2, samp_bias1, samp_bias2)
24 | 
25 |         return ret
26 | 
27 |     # Detach the return variables
28 |     def embed(self, seq, adj, sparse, msk):
29 |         h_1 = self.gcn(seq, adj, sparse)
30 |         c = self.read(h_1, msk)
31 | 
32 |         return h_1.detach(), c.detach()
33 | 
34 | 


--------------------------------------------------------------------------------
/DGI/layers/discriminator.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | 
 4 | class Discriminator(nn.Module):
 5 |     def __init__(self, n_h):
 6 |         super(Discriminator, self).__init__()
 7 |         self.f_k = nn.Bilinear(n_h, n_h, 1)
 8 | 
 9 |         for m in self.modules():
10 |             self.weights_init(m)
11 | 
12 |     def weights_init(self, m):
13 |         if isinstance(m, nn.Bilinear):
14 |             torch.nn.init.xavier_uniform_(m.weight.data)
15 |             if m.bias is not None:
16 |                 m.bias.data.fill_(0.0)
17 | 
18 |     def forward(self, c, h_pl, h_mi, s_bias1=None, s_bias2=None):
19 |         c_x = torch.unsqueeze(c, 1)
20 |         c_x = c_x.expand_as(h_pl)
21 | 
22 |         sc_1 = torch.squeeze(self.f_k(h_pl, c_x), 2)
23 |         sc_2 = torch.squeeze(self.f_k(h_mi, c_x), 2)
24 | 
25 |         if s_bias1 is not None:
26 |             sc_1 += s_bias1
27 |         if s_bias2 is not None:
28 |             sc_2 += s_bias2
29 | 
30 |         logits = torch.cat((sc_1, sc_2), 1)
31 | 
32 |         return logits
33 | 
34 | 


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # SiGraC
 2 | 
 3 | Requirements
 4 | 
 5 | Python 3.6
 6 | 
 7 | pytorch
 8 | 
 9 | networkx
10 | 
11 | pandas
12 | 
13 | scipy
14 | 
15 | scikit-learn
16 | 
17 | numpy
18 | 
19 | ...
20 | 
21 | PS: I got a few questions about the torch version. Pyton 3.6.10 torch: 1.9.0 + cpu
22 | 
23 | Run
24 | 
25 | main.py
26 | 
27 | Change parser.add_argument('--embTech', choices=[ 'DGI', 'CN', 'AA',
28 | 
29 | ], default='HDI', help='The embedding learning method')
30 | For various convolution matrix option change HDI to RA, HPI,etc.
31 | 
32 | For any question email me via coskunmustafa@ankara.edu.tr
33 | 
34 | If you find this code useful, please cite:
35 | 
36 | 
37 | @article{cocskun2021node,
38 |   title={Node similarity-based graph convolution for link prediction in biological networks},
39 |   author={Co{\c{s}}kun, Mustafa and Koyut{\"u}rk, Mehmet},
40 |   journal={Bioinformatics},
41 |   volume={37},
42 |   number={23},
43 |   pages={4501--4508},
44 |   year={2021},
45 |   publisher={Oxford University Press}
46 | }
47 | 
48 | 
49 | PS: I got a few questions about the torch version.
50 | Pyton 3.6.10
51 | torch: 1.9.0 + cpu
52 | 


--------------------------------------------------------------------------------
/DGI/layers/gcn.py:
--------------------------------------------------------------------------------
 1 | import torch
 2 | import torch.nn as nn
 3 | 
 4 | class GCN(nn.Module):
 5 |     def __init__(self, in_ft, out_ft, act, bias=True):
 6 |         super(GCN, self).__init__()
 7 |         self.fc = nn.Linear(in_ft, out_ft, bias=False)
 8 |         self.act = nn.PReLU() if act == 'prelu' else act
 9 |         
10 |         if bias:
11 |             self.bias = nn.Parameter(torch.FloatTensor(out_ft))
12 |             self.bias.data.fill_(0.0)
13 |         else:
14 |             self.register_parameter('bias', None)
15 | 
16 |         for m in self.modules():
17 |             self.weights_init(m)
18 | 
19 |     def weights_init(self, m):
20 |         if isinstance(m, nn.Linear):
21 |             torch.nn.init.xavier_uniform_(m.weight.data)
22 |             if m.bias is not None:
23 |                 m.bias.data.fill_(0.0)
24 | 
25 |     # Shape of seq: (batch, nodes, features)
26 |     def forward(self, seq, adj, sparse=False):
27 |         seq_fts = self.fc(seq)
28 |         if sparse:
29 |             out = torch.unsqueeze(torch.spmm(adj, torch.squeeze(seq_fts, 0)), 0)
30 |         else:
31 |             out = torch.bmm(adj, seq_fts)
32 |         if self.bias is not None:
33 |             out += self.bias
34 |         
35 |         return self.act(out)
36 | 
37 | 


--------------------------------------------------------------------------------
/OpenNE/node2vec.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | from gensim.models import Word2Vec
 4 | 
 5 | from OpenNE import walker
 6 | 
 7 | 
 8 | class Node2vec(object):
 9 | 
10 |     def __init__(self, graph, path_length, num_paths, dim, p=1.0, q=1.0, dw=False, **kwargs):
11 | 
12 |         kwargs["workers"] = kwargs.get("workers", 1)
13 |         if dw:
14 |             kwargs["hs"] = 1
15 |             p = 1.0
16 |             q = 1.0
17 | 
18 |         self.graph = graph
19 |         if dw:
20 |             self.walker = walker.BasicWalker(graph, workers=kwargs["workers"])
21 |         else:
22 |             self.walker = walker.Walker(
23 |                 graph, p=p, q=q, workers=kwargs["workers"])
24 |             print("Preprocess transition probs...")
25 |             self.walker.preprocess_transition_probs()
26 |         sentences = self.walker.simulate_walks(
27 |             num_walks=num_paths, walk_length=path_length)
28 |         kwargs["sentences"] = sentences
29 |         kwargs["min_count"] = kwargs.get("min_count", 0)
30 |         kwargs["size"] = kwargs.get("size", dim)
31 |         kwargs["sg"] = 1
32 | 
33 |         self.size = kwargs["size"]
34 |         print("Learning representation...")
35 |         word2vec = Word2Vec(**kwargs)
36 |         self.vectors = {}
37 |         for word in graph.G.nodes():
38 |             self.vectors[word] = word2vec.wv[word]
39 |         del word2vec
40 | 
41 |     def save_embeddings(self, filename):
42 |         fout = open(filename, 'w')
43 |         node_num = len(self.vectors.keys())
44 |         fout.write("{} {}\n".format(node_num, self.size))
45 |         for node, vec in self.vectors.items():
46 |             fout.write("{} {}\n".format(node,
47 |                                         ' '.join([str(x) for x in vec])))
48 |         fout.close()
49 | 


--------------------------------------------------------------------------------
/GAE/optimizer.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | import tensorflow as tf
 4 | 
 5 | 
 6 | class OptimizerAE(object):
 7 |     def __init__(self, preds, labels, pos_weight, norm, learning_rate):
 8 |         preds_sub = preds
 9 |         labels_sub = labels
10 | 
11 |         self.cost = norm * tf.reduce_mean(
12 |             tf.nn.weighted_cross_entropy_with_logits(logits=preds_sub, targets=labels_sub, pos_weight=pos_weight))
13 |         self.optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)  # Adam Optimizer
14 | 
15 |         self.opt_op = self.optimizer.minimize(self.cost)
16 |         self.grads_vars = self.optimizer.compute_gradients(self.cost)
17 | 
18 |         self.correct_prediction = tf.equal(tf.cast(tf.greater_equal(tf.sigmoid(preds_sub), 0.5), tf.int32),
19 |                                            tf.cast(labels_sub, tf.int32))
20 |         self.accuracy = tf.reduce_mean(tf.cast(self.correct_prediction, tf.float32))
21 | 
22 | 
23 | class OptimizerVAE(object):
24 |     def __init__(self, preds, labels, model, num_nodes, pos_weight, norm, learning_rate):
25 |         preds_sub = preds
26 |         labels_sub = labels
27 | 
28 |         self.cost = norm * tf.reduce_mean(
29 |             tf.nn.weighted_cross_entropy_with_logits(logits=preds_sub, targets=labels_sub, pos_weight=pos_weight))
30 |         self.optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)  # Adam Optimizer
31 | 
32 |         # Latent loss
33 |         self.log_lik = self.cost
34 |         self.kl = (0.5 / num_nodes) * tf.reduce_mean(tf.reduce_sum(1 + 2 * model.z_log_std - tf.square(model.z_mean) -
35 |                                                                    tf.square(tf.exp(model.z_log_std)), 1))
36 |         self.cost -= self.kl
37 | 
38 |         self.opt_op = self.optimizer.minimize(self.cost)
39 |         self.grads_vars = self.optimizer.compute_gradients(self.cost)
40 | 
41 |         self.correct_prediction = tf.equal(tf.cast(tf.greater_equal(tf.sigmoid(preds_sub), 0.5), tf.int32),
42 |                                            tf.cast(labels_sub, tf.int32))
43 |         self.accuracy = tf.reduce_mean(tf.cast(self.correct_prediction, tf.float32))
44 | 


--------------------------------------------------------------------------------
/OpenNE/lap.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | import networkx as nx
 4 | import numpy as np
 5 | from scipy.sparse.linalg import eigsh
 6 | 
 7 | __author__ = "Wang Binlu"
 8 | __email__ = "wblmail@whu.edu.cn"
 9 | 
10 | 
11 | class LaplacianEigenmaps(object):
12 |     def __init__(self, graph, rep_size=128):
13 |         self.g = graph
14 |         self.node_size = self.g.G.number_of_nodes()
15 |         self.rep_size = rep_size
16 |         self.adj_mat = nx.to_numpy_array(self.g.G)
17 |         self.vectors = {}
18 |         self.embeddings = self.get_train()
19 |         look_back = self.g.look_back_list
20 | 
21 |         for i, embedding in enumerate(self.embeddings):
22 |             self.vectors[look_back[i]] = embedding
23 | 
24 |     def getAdj(self):
25 |         node_size = self.g.node_size
26 |         look_up = self.g.look_up_dict
27 |         adj = np.zeros((node_size, node_size))
28 |         for edge in self.g.G.edges():
29 |             adj[look_up[edge[0]]][look_up[edge[1]]] = self.g.G[edge[0]][edge[1]]['weight']
30 |         return adj
31 | 
32 |     def getLap(self):
33 |         # degree_mat = np.diagflat(np.sum(self.adj_mat, axis=1))
34 |         # print('np.diagflat(np.sum(self.adj_mat, axis=1))')
35 |         # deg_trans = np.diagflat(np.reciprocal(np.sqrt(np.sum(self.adj_mat, axis=1))))
36 |         # print('np.diagflat(np.reciprocal(np.sqrt(np.sum(self.adj_mat, axis=1))))')
37 |         # deg_trans = np.nan_to_num(deg_trans)
38 |         # L = degree_mat-self.adj_mat
39 |         # print('begin norm_lap_mat')
40 |         # # eye = np.eye(self.node_size)
41 |         #
42 |         # norm_lap_mat = np.matmul(np.matmul(deg_trans, L), deg_trans)
43 |         G = self.g.G.to_undirected()
44 |         print('begin norm_lap_mat')
45 |         norm_lap_mat = nx.normalized_laplacian_matrix(G)
46 |         print('finish norm_lap_mat')
47 |         return norm_lap_mat
48 | 
49 |     def get_train(self):
50 |         lap_mat = self.getLap()
51 |         print('finish getLap...')
52 |         w, vec = eigsh(lap_mat, k=self.rep_size)
53 |         print('finish eigh(lap_mat)...')
54 |         # start = 0
55 |         # for i in range(self.node_size):
56 |         #     if w[i] > 1e-10:
57 |         #         start = i
58 |         #         break
59 |         # vec = vec[:, start:start+self.rep_size]
60 | 
61 |         return vec
62 | 
63 |     def save_embeddings(self, filename):
64 |         fout = open(filename, 'w')
65 |         node_num = len(self.vectors)
66 |         fout.write("{} {}\n".format(node_num, self.rep_size))
67 |         for node, vec in self.vectors.items():
68 |             fout.write("{} {}\n".format(node, ' '.join([str(x) for x in vec])))
69 |         fout.close()
70 | 


--------------------------------------------------------------------------------
/OpenNE/gf.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | import numpy as np
 4 | import tensorflow as tf
 5 | 
 6 | __author__ = "Wang Binlu"
 7 | __email__ = "wblmail@whu.edu.cn"
 8 | 
 9 | 
10 | class GraphFactorization(object):
11 |     def __init__(self, graph, rep_size=128, epoch=120, learning_rate=0.003, weight_decay=1.):
12 |         self.g = graph
13 | 
14 |         self.node_size = graph.G.number_of_nodes()
15 |         self.rep_size = rep_size
16 |         self.max_iter = epoch
17 |         self.lr = learning_rate
18 |         self.lamb = weight_decay
19 |         self.sess = tf.Session()
20 |         self.adj_mat = self.getAdj()
21 |         self.vectors = {}
22 | 
23 |         self.embeddings = self.get_train()
24 | 
25 |         look_back = self.g.look_back_list
26 | 
27 |         for i, embedding in enumerate(self.embeddings):
28 |             self.vectors[look_back[i]] = embedding
29 | 
30 |     def getAdj(self):
31 |         node_size = self.g.node_size
32 |         look_up = self.g.look_up_dict
33 |         adj = np.zeros((node_size, node_size))
34 |         for edge in self.g.G.edges():
35 |             adj[look_up[edge[0]]][look_up[edge[1]]] = self.g.G[edge[0]][edge[1]]['weight']
36 |         return adj
37 | 
38 |     def get_train(self):
39 | 
40 |         adj_mat = self.adj_mat
41 | 
42 |         mat_mask = 1. * (adj_mat > 0)
43 | 
44 |         _embeddings = tf.Variable(tf.contrib.layers.xavier_initializer()([self.node_size, self.rep_size]),
45 |                                   dtype=tf.float32, name='embeddings')
46 | 
47 |         Adj = tf.placeholder(tf.float32, [self.node_size, self.node_size], name='adj_mat')
48 |         AdjMask = tf.placeholder(tf.float32, [self.node_size, self.node_size], name='adj_mask')
49 | 
50 |         cost = tf.reduce_sum(
51 |             tf.square(Adj - tf.matmul(_embeddings, tf.transpose(_embeddings)) * AdjMask)) + \
52 |                self.lamb * tf.reduce_sum(tf.square(_embeddings))
53 | 
54 |         optimizer = tf.train.AdamOptimizer(self.lr)
55 |         train_op = optimizer.minimize(cost)
56 | 
57 |         init = tf.global_variables_initializer()
58 |         self.sess.run(init)
59 | 
60 |         print("total iter: %i" % self.max_iter)
61 |         for step in range(self.max_iter):
62 |             self.sess.run(train_op, feed_dict={Adj: adj_mat, AdjMask: mat_mask})
63 |             if step % 50 == 0:
64 |                 print("step %i: cost: %g" % (step, self.sess.run(cost, feed_dict={Adj: adj_mat, AdjMask: mat_mask})))
65 |         return self.sess.run(_embeddings)
66 | 
67 |     def save_embeddings(self, filename):
68 |         fout = open(filename, 'w')
69 |         node_num = len(self.vectors)
70 |         fout.write("{} {}\n".format(node_num, self.rep_size))
71 |         for node, vec in self.vectors.items():
72 |             fout.write("{} {}\n".format(node, ' '.join([str(x) for x in vec])))
73 |         fout.close()
74 | 


--------------------------------------------------------------------------------
/OpenNE/L3Hope.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | """
 3 | Created on Thu Apr  9 20:26:32 2020
 4 | 
 5 | @author: Secil
 6 | """
 7 | 
 8 | # -*- coding: utf-8 -*-
 9 | 
10 | import networkx as nx
11 | import numpy as np
12 | import scipy.sparse.linalg as lg
13 | 
14 | __author__ = "Alan WANG"
15 | __email__ = "alan1995wang@outlook.com"
16 | 
17 | import scipy.sparse as sp
18 | class HOPE(object):
19 |     def __init__(self, graph, d):
20 |         '''
21 |           d: representation vector dimension
22 |         '''
23 |         self._d = d
24 |         self._graph = graph.G
25 |         self.g = graph
26 |         self._node_num = graph.node_size
27 |         self.learn_embedding()
28 |         
29 |     def calc_A_hat(adj_matrix):
30 |         nnodes = adj_matrix.shape[0]
31 |         mu = 0.95
32 |         eta = 1e-6
33 |         A = adj_matrix# + sp.eye(nnodes)
34 |         D_vec = np.sum(A, axis=1)
35 |         D_vec_invsqrt_corr = 1 / np.sqrt(D_vec)
36 |         D_invsqrt_corr = sp.diags(D_vec_invsqrt_corr)
37 |         return mu*D_invsqrt_corr @ A @ D_invsqrt_corr + (1-mu)*sp.eye(nnodes) + eta*sp.eye(nnodes)
38 | 
39 | 
40 |     def learn_embedding(self):
41 | 
42 |         #graph = self.g.G
43 |         graph = self.g.G.to_undirected()
44 |         A = nx.to_numpy_matrix(graph)
45 |         mu = 0.1;
46 |         eta = 1e-6
47 |         
48 |         norm_lap_mat = nx.laplacian_matrix(graph)
49 | 
50 |         A =  mu*norm_lap_mat + (1-mu)*np.eye(graph.number_of_nodes()) + eta*np.eye(graph.number_of_nodes())
51 |         #A = norm_lap_mat
52 |         # self._beta = 0.0728
53 | 
54 |         # M_g = np.eye(graph.number_of_nodes()) - self._beta * A
55 |         # M_l = self._beta * A
56 |         print("dimension = ", self._d)
57 |         print("PPR")
58 |         M_g = np.eye(graph.number_of_nodes())
59 |         M_l = np.dot(A, A)
60 | 
61 |         S = np.dot(np.linalg.inv(M_g), M_l)
62 |         # s: \sigma_k
63 |         u, s, vt = lg.svds(S, k=self._d // 2)
64 |         sigma = np.diagflat(np.sqrt(s))
65 |         X1 = np.dot(u, sigma)
66 |         X2 = np.dot(vt.T, sigma)
67 |         # self._X = X2
68 |         self._X = np.concatenate((X1, X2), axis=1)
69 | 
70 |     @property
71 |     def vectors(self):
72 |         vectors = {}
73 |         look_back = self.g.look_back_list
74 |         for i, embedding in enumerate(self._X):
75 |             vectors[look_back[i]] = embedding
76 |         return vectors
77 | 
78 |     def save_embeddings(self, filename):
79 |         fout = open(filename, 'w')
80 |         node_num = len(self.vectors.keys())
81 |         fout.write("{} {}\n".format(node_num, self._d))
82 |         for node, vec in self.vectors.items():
83 |             fout.write("{} {}\n".format(node,
84 |                                         ' '.join([str(x) for x in vec])))
85 |         fout.close()
86 | 


--------------------------------------------------------------------------------
/OpenNE/grarep.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | import numpy as np
 4 | from scipy.sparse.linalg import svds
 5 | from sklearn.preprocessing import normalize
 6 | 
 7 | 
 8 | class GraRep(object):
 9 | 
10 |     def __init__(self, graph, Kstep, dim):
11 |         self.g = graph
12 |         self.Kstep = Kstep
13 |         assert dim % Kstep == 0
14 |         self.dim = int(dim / Kstep)
15 |         self.train()
16 | 
17 |     def getAdjMat(self):
18 |         graph = self.g.G
19 |         node_size = self.g.node_size
20 |         look_up = self.g.look_up_dict
21 |         adj = np.zeros((node_size, node_size))
22 |         for edge in self.g.G.edges():
23 |             adj[look_up[edge[0]]][look_up[edge[1]]] = 1.0
24 |             adj[look_up[edge[1]]][look_up[edge[0]]] = 1.0
25 |         # ScaleSimMat
26 |         # print('finish getAdjMat')
27 |         return np.matrix(adj)
28 | 
29 |     def GetProbTranMat(self, Ak):
30 |         # print(np.sum(Ak, axis=0))
31 |         tileMat = np.tile(np.sum(Ak, axis=0), (self.node_size, 1))
32 |         # print(np.min(tileMat))
33 |         probTranMat = np.log(Ak / tileMat) - np.log(1.0 / self.node_size)
34 |         probTranMat[probTranMat < 0] = 0
35 |         probTranMat[probTranMat == np.nan] = 0
36 |         return probTranMat
37 | 
38 |     def GetRepUseSVD(self, probTranMat, alpha):
39 |         # U, S, VT = la.svd(probTranMat)
40 | 
41 |         U, Sigma, VT = svds(probTranMat, self.dim)
42 |         # print("finish svd..")
43 |         Sigma = np.diag(Sigma)
44 |         W = np.matmul(U, np.power(Sigma, alpha))
45 |         C = np.matmul(VT.T, np.power(Sigma, alpha))
46 |         # print(np.sum(U))
47 |         embeddings = W + C
48 |         return embeddings
49 |         # Ud = U[:, 0:self.dim]
50 |         # Sd = S[0:self.dim]
51 |         # return np.array(Ud)*np.power(Sd, alpha).reshape((self.dim))
52 | 
53 |     def save_embeddings(self, filename):
54 |         fout = open(filename, 'w')
55 |         node_num = len(self.vectors.keys())
56 |         fout.write("{} {}\n".format(node_num, self.Kstep * self.dim))
57 |         for node, vec in self.vectors.items():
58 |             fout.write("{} {}\n".format(node, ' '.join([str(x) for x in vec])))
59 |         fout.close()
60 | 
61 |     def train(self):
62 |         self.adj = self.getAdjMat()
63 |         self.node_size = self.adj.shape[0]
64 |         self.Ak = np.matrix(np.identity(self.node_size))
65 |         self.RepMat = np.zeros((self.node_size, int(self.dim * self.Kstep)))
66 |         for i in range(self.Kstep):
67 |             print('Kstep =', i)
68 |             self.Ak = np.dot(self.Ak, self.adj)
69 |             # print('finish np.dot(self.Ak, self.adj)')
70 |             probTranMat = self.GetProbTranMat(self.Ak)
71 |             # print('finish GetProbTranMat')
72 |             Rk = self.GetRepUseSVD(probTranMat, 0.5)
73 |             # print('finish GetRepUseSVD')
74 |             Rk = normalize(Rk, axis=1, norm='l2')
75 |             # print('finish normalize')
76 |             self.RepMat[:, self.dim * i:self.dim * (i + 1)] = Rk[:, :]
77 |             # print('finish RepMat[:, self.dim*i:self.dim*(i+1)] = Rk[:, :]')
78 |         # get embeddings
79 |         self.vectors = {}
80 |         look_back = self.g.look_back_list
81 |         for i, embedding in enumerate(self.RepMat):
82 |             self.vectors[look_back[i]] = embedding
83 | 


--------------------------------------------------------------------------------
/OpenNE/classify.py:
--------------------------------------------------------------------------------
 1 | # -*- coding: utf-8 -*-
 2 | 
 3 | import numpy
 4 | from sklearn.metrics import f1_score
 5 | from sklearn.multiclass import OneVsRestClassifier
 6 | from sklearn.preprocessing import MultiLabelBinarizer
 7 | 
 8 | 
 9 | class TopKRanker(OneVsRestClassifier):
10 |     def predict(self, X, top_k_list):
11 |         probs = numpy.asarray(super(TopKRanker, self).predict_proba(X))
12 |         all_labels = []
13 |         for i, k in enumerate(top_k_list):
14 |             probs_ = probs[i, :]
15 |             labels = self.classes_[probs_.argsort()[-k:]].tolist()
16 |             probs_[:] = 0
17 |             probs_[labels] = 1
18 |             all_labels.append(probs_)
19 |         return numpy.asarray(all_labels)
20 | 
21 | 
22 | class Classifier(object):
23 | 
24 |     def __init__(self, vectors, clf):
25 |         self.embeddings = vectors
26 |         self.clf = TopKRanker(clf)
27 |         self.binarizer = MultiLabelBinarizer(sparse_output=True)
28 | 
29 |     def train(self, X, Y, Y_all):
30 |         self.binarizer.fit(Y_all)
31 |         X_train = [self.embeddings[x] for x in X]
32 |         Y = self.binarizer.transform(Y)
33 |         self.clf.fit(X_train, Y)
34 | 
35 |     def evaluate(self, X, Y):
36 |         top_k_list = [len(l) for l in Y]
37 |         Y_ = self.predict(X, top_k_list)
38 |         Y = self.binarizer.transform(Y)
39 |         averages = ["micro", "macro", "samples", "weighted"]
40 |         results = {}
41 |         for average in averages:
42 |             results[average] = f1_score(Y, Y_, average=average)
43 |         # print('Results, using embeddings of dimensionality', len(self.embeddings[X[0]]))
44 |         # print('-------------------')
45 |         print(results)
46 |         return results
47 |         # print('-------------------')
48 | 
49 |     def predict(self, X, top_k_list):
50 |         X_ = numpy.asarray([self.embeddings[x] for x in X])
51 |         Y = self.clf.predict(X_, top_k_list=top_k_list)
52 |         return Y
53 | 
54 |     def split_train_evaluate(self, X, Y, train_precent, seed=0):
55 |         state = numpy.random.get_state()
56 | 
57 |         training_size = int(train_precent * len(X))
58 |         numpy.random.seed(seed)
59 |         shuffle_indices = numpy.random.permutation(numpy.arange(len(X)))
60 |         X_train = [X[shuffle_indices[i]] for i in range(training_size)]
61 |         Y_train = [Y[shuffle_indices[i]] for i in range(training_size)]
62 |         X_test = [X[shuffle_indices[i]] for i in range(training_size, len(X))]
63 |         Y_test = [Y[shuffle_indices[i]] for i in range(training_size, len(X))]
64 | 
65 |         self.train(X_train, Y_train, Y)
66 |         numpy.random.set_state(state)
67 |         return self.evaluate(X_test, Y_test)
68 | 
69 | 
70 | def load_embeddings(filename):
71 |     fin = open(filename, 'r')
72 |     node_num, size = [int(x) for x in fin.readline().strip().split()]
73 |     vectors = {}
74 |     while 1:
75 |         l = fin.readline()
76 |         if l == '':
77 |             break
78 |         vec = l.strip().split(' ')
79 |         assert len(vec) == size + 1
80 |         vectors[vec[0]] = [float(x) for x in vec[1:]]
81 |     fin.close()
82 |     assert len(vectors) == node_num
83 |     return vectors
84 | 
85 | 
86 | def read_node_label(filename):
87 |     fin = open(filename, 'r')
88 |     X = []
89 |     Y = []
90 |     while 1:
91 |         l = fin.readline()
92 |         if l == '':
93 |             break
94 |         vec = l.strip().split(' ')
95 |         X.append(vec[0])
96 |         Y.append(vec[1:])
97 |     fin.close()
98 |     return X, Y
99 | 


--------------------------------------------------------------------------------
/OpenNE/RWR.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | """
  3 | Created on Tue Apr  7 22:36:32 2020
  4 | 
  5 | @author: Secil
  6 | """
  7 | 
  8 | # -*- coding: utf-8 -*-
  9 | 
 10 | import networkx as nx
 11 | import numpy as np
 12 | from scipy.sparse.linalg import eigsh
 13 | import scipy.sparse as sp
 14 | import scipy.io as sio
 15 | 
 16 | import scipy.sparse.linalg as slinalg
 17 | import scipy.linalg as linalg
 18 | import scipy.sparse as sp
 19 | 
 20 | __author__ = "Mustafa Coskun"
 21 | __email__ = "mxc522@case.edu"
 22 | 
 23 | 
 24 | class RWR(object):
 25 |     def __init__(self, graph, rep_size=100):
 26 |         self.g = graph
 27 |         self.node_size = self.g.G.number_of_nodes()
 28 |         self.rep_size = rep_size
 29 |         adj_mat = nx.to_numpy_array(self.g.G)
 30 |         self.adj_mat = adj_mat
 31 |         self.vectors = {}
 32 |         self.embeddings = self.get_train()
 33 |         look_back = self.g.look_back_list
 34 | 
 35 |         for i, embedding in enumerate(self.embeddings):
 36 |             self.vectors[look_back[i]] = embedding
 37 | 
 38 |     def getAdj(self):
 39 |         node_size = self.g.node_size
 40 |         look_up = self.g.look_up_dict
 41 |         adj = np.zeros((node_size, node_size))
 42 |         for edge in self.g.G.edges():
 43 |             adj[look_up[edge[0]]][look_up[edge[1]]] = self.g.G[edge[0]][edge[1]]['weight']
 44 |         return adj
 45 | 
 46 |     def getLap(self):
 47 |         # degree_mat = np.diagflat(np.sum(self.adj_mat, axis=1))
 48 |         # print('np.diagflat(np.sum(self.adj_mat, axis=1))')
 49 |         # deg_trans = np.diagflat(np.reciprocal(np.sqrt(np.sum(self.adj_mat, axis=1))))
 50 |         # print('np.diagflat(np.reciprocal(np.sqrt(np.sum(self.adj_mat, axis=1))))')
 51 |         # deg_trans = np.nan_to_num(deg_trans)
 52 |         # L = degree_mat-self.adj_mat
 53 |         # print('begin norm_lap_mat')
 54 |         # # eye = np.eye(self.node_size)
 55 |         #
 56 |         # norm_lap_mat = np.matmul(np.matmul(deg_trans, L), deg_trans)
 57 |         G = self.g.G.to_undirected()
 58 |         print('begin norm_lap_mat')
 59 |         norm_lap_mat = nx.normalized_laplacian_matrix(G)
 60 |         print('finish norm_lap_mat')
 61 |         return norm_lap_mat
 62 |     def calc_A_hat(self):
 63 |         #nnodes = adj_matrix.shape[0]
 64 |         A = self.adj_mat 
 65 |         D_vec = np.sum(A, axis=1)
 66 |         D_vec_invsqrt_corr = 1 / np.sqrt(D_vec)
 67 |         D_invsqrt_corr = sp.diags(D_vec_invsqrt_corr)
 68 |         return D_invsqrt_corr @ A @ D_invsqrt_corr
 69 | 
 70 | 
 71 |     def calc_ppr_exact(self):
 72 |         adj_matrix = self.adj_mat
 73 |         nnodes = adj_matrix.shape[0]
 74 |         M = self.calc_A_hat()
 75 |         A_inner = sp.eye(nnodes) - (1 - 0.85) * M
 76 |         return 0.85 * np.linalg.inv(A_inner)
 77 | 
 78 |     def get_train(self):
 79 |         #lap_mat = self.calc_ppr_exact()
 80 |         
 81 |         mat = sio.loadmat('DDIEmb.mat')
 82 |     
 83 |         #index = mat['index2']
 84 |         print('finish getLap...')
 85 |         vec = mat['vec']
 86 |         print('finish eigh(lap_mat)...')
 87 |         # start = 0
 88 |         # for i in range(self.node_size):
 89 |         #     if w[i] > 1e-10:
 90 |         #         start = i
 91 |         #         break
 92 |         # vec = vec[:, start:start+self.rep_size]
 93 | 
 94 |         return vec
 95 | 
 96 |     def save_embeddings(self, filename):
 97 |         fout = open(filename, 'w')
 98 |         node_num = len(self.vectors)
 99 |         fout.write("{} {}\n".format(node_num, self.rep_size))
100 |         for node, vec in self.vectors.items():
101 |             fout.write("{} {}\n".format(node, ' '.join([str(x) for x in vec])))
102 |         fout.close()
103 | 


--------------------------------------------------------------------------------
/OpenNE/graph.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | 
  3 | """Graph utilities."""
  4 | 
  5 | import networkx as nx
  6 | 
  7 | import numpy as np
  8 | from scipy.io import loadmat
  9 | 
 10 | __author__ = "Zhang Zhengyan"
 11 | __email__ = "zhangzhengyan14@mails.tsinghua.edu.cn"
 12 | 
 13 | 
 14 | class Graph(object):
 15 |     def __init__(self):
 16 |         self.G = None
 17 |         self.look_up_dict = {}
 18 |         self.look_back_list = []
 19 |         self.node_size = 0
 20 | 
 21 |     def encode_node(self):
 22 |         look_up = self.look_up_dict
 23 |         look_back = self.look_back_list
 24 |         for node in self.G.nodes():
 25 |             look_up[node] = self.node_size
 26 |             look_back.append(node)
 27 |             self.node_size += 1
 28 |             self.G.nodes[node]['status'] = ''
 29 | 
 30 |     def read_g(self, g):
 31 |         self.G = g
 32 |         self.encode_node()
 33 | 
 34 |     def read_adjlist(self, filename):
 35 |         """ Read graph from adjacency file in which the edge must be unweighted
 36 |             the format of each line: v1 n1 n2 n3 ... nk
 37 |             :param filename: the filename of input file
 38 |         """
 39 |         self.G = nx.read_adjlist(filename, create_using=nx.DiGraph())
 40 |         for i, j in self.G.edges():
 41 |             self.G[i][j]['weight'] = 1.0
 42 |         self.encode_node()
 43 | 
 44 |     def read_edgelist(self, filename, weighted=False, directed=False):
 45 |         self.G = nx.DiGraph()
 46 | 
 47 |         if directed:
 48 |             def read_unweighted(l):
 49 |                 src, dst = l.split()
 50 |                 self.G.add_edge(src, dst)
 51 |                 self.G[src][dst]['weight'] = 1.0
 52 | 
 53 |             def read_weighted(l):
 54 |                 src, dst, w = l.split()
 55 |                 self.G.add_edge(src, dst)
 56 |                 self.G[src][dst]['weight'] = float(w)
 57 |         else:
 58 |             def read_unweighted(l):
 59 |                 src, dst = l.split()
 60 |                 self.G.add_edge(src, dst)
 61 |                 self.G.add_edge(dst, src)
 62 |                 self.G[src][dst]['weight'] = 1.0
 63 |                 self.G[dst][src]['weight'] = 1.0
 64 | 
 65 |             def read_weighted(l):
 66 |                 src, dst, w = l.split()
 67 |                 # print(src, dst, float(w))
 68 |                 self.G.add_edge(src, dst)
 69 |                 self.G.add_edge(dst, src)
 70 |                 self.G[src][dst]['weight'] = float(w)
 71 |                 self.G[dst][src]['weight'] = float(w)
 72 |         fin = open(filename, 'r')
 73 |         func = read_unweighted
 74 |         if weighted:
 75 |             func = read_weighted
 76 |         while 1:
 77 |             l = fin.readline()
 78 |             if l == '':
 79 |                 break
 80 |             func(l)
 81 |         fin.close()
 82 |         self.encode_node()
 83 |         
 84 |     def readMatFile(self,filename):
 85 |         ne = loadmat(filename)
 86 |         ne = ne['adj']
 87 |         G=nx.from_numpy_matrix(ne)
 88 |         self.G =G
 89 |         self.encode_node()
 90 | 
 91 |     def read_node_label(self, filename):
 92 |         fin = open(filename, 'r')
 93 |         while 1:
 94 |             l = fin.readline()
 95 |             if l == '':
 96 |                 break
 97 |             vec = l.split()
 98 |             self.G.nodes[vec[0]]['label'] = vec[1:]
 99 |         fin.close()
100 | 
101 |     def read_node_features(self, filename):
102 |         fin = open(filename, 'r')
103 |         for l in fin.readlines():
104 |             vec = l.split()
105 |             self.G.nodes[vec[0]]['feature'] = np.array(
106 |                 [float(x) for x in vec[1:]])
107 |         fin.close()
108 | 
109 |     def read_node_status(self, filename):
110 |         fin = open(filename, 'r')
111 |         while 1:
112 |             l = fin.readline()
113 |             if l == '':
114 |                 break
115 |             vec = l.split()
116 |             self.G.nodes[vec[0]]['status'] = vec[1]  # train test valid
117 |         fin.close()
118 | 
119 |     def read_edge_label(self, filename):
120 |         fin = open(filename, 'r')
121 |         while 1:
122 |             l = fin.readline()
123 |             if l == '':
124 |                 break
125 |             vec = l.split()
126 |             self.G[vec[0]][vec[1]]['label'] = vec[2:]
127 |         fin.close()
128 | 


--------------------------------------------------------------------------------
/OpenNE/hope.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | 
  3 | import networkx as nx
  4 | import numpy as np
  5 | import scipy.sparse.linalg as lg
  6 | import scipy.io as sio
  7 | #import hdf5storage as hd
  8 | from scipy.sparse.linalg import svds
  9 | import scipy.sparse as sp
 10 | 
 11 | __author__ = "Alan WANG"
 12 | __email__ = "alan1995wang@outlook.com"
 13 | 
 14 | 
 15 | class HOPE(object):
 16 |     def __init__(self, graph, d):
 17 |         '''
 18 |           d: representation vector dimension
 19 |         '''
 20 |         self._d = d
 21 |         self._graph = graph.G
 22 |         self.g = graph
 23 |         self._node_num = graph.node_size
 24 |         self.learn_embedding()
 25 | 
 26 |     def learn_embedding(self):
 27 | 
 28 |         graph = self.g.G.to_undirected()
 29 |         A = nx.to_numpy_matrix(graph)
 30 | #        idSave={}
 31 | #        idSave['Net']=A
 32 | #        sio.savemat('Node2VecPPIAdj.mat',idSave)
 33 |         
 34 |         
 35 | #--------------------Open for RWR ---------------        
 36 | #        print("Page Rank")
 37 | #        norm_lap_mat = nx.laplacian_matrix(graph)
 38 | #        alpha = 0.1
 39 | #
 40 | #        M_g =  np.eye(graph.number_of_nodes())- alpha*norm_lap_mat
 41 | #        M_l = (1-alpha)*np.eye(graph.number_of_nodes())
 42 | 
 43 | #---------------------------Open this L3----------------------
 44 | #        print("L3G")
 45 | #        norm_lap_mat = nx.laplacian_matrix(graph)
 46 | #        mu = 0.1;
 47 | #        eta = 1e-6;
 48 | #        M_g  =  mu*norm_lap_mat + (1-mu)*np.eye(graph.number_of_nodes()) + eta*np.eye(graph.number_of_nodes())
 49 | #        M_l = np.eye(graph.number_of_nodes())
 50 |         
 51 | #----------------------Open for Katz--------------------------
 52 | #        print("Katz Measure")
 53 | #        self._beta = 0.0728
 54 | #        M_g = np.eye(graph.number_of_nodes()) - self._beta * A
 55 | #        M_l = self._beta * A
 56 | #-------------------------------------------------------------
 57 | 
 58 | 
 59 | #----------------------------Open this part for CN ---------------------
 60 | #       
 61 |         M_g = np.eye(graph.number_of_nodes())
 62 | 
 63 |         M_l = np.dot(A, A)
 64 | #        # -------------------------------------
 65 |         S = np.dot(np.linalg.inv(M_g), M_l)
 66 |         # s: \sigma_k
 67 |         u, s, vt = lg.svds(S, k=self._d // 2)
 68 |         sigma = np.diagflat(np.sqrt(s))
 69 |         X1 = np.dot(u, sigma)
 70 |         X2 = np.dot(vt.T, sigma)
 71 |         # self._X = X2
 72 |         self._X = np.concatenate((X1, X2), axis=1)
 73 | #--------------------LoadTopKEmbeddings--------------------------
 74 | #        print("Load Top-k Embedding")
 75 | #        mydata = sio.loadmat('TopKEmbedding50.mat')
 76 | #        self._X = mydata['Embedding']
 77 | #----------------------------------------------------------------
 78 |         
 79 |         
 80 | ###################Correlation based S matrix--------------------
 81 | #        mat = hd.loadmat('S50.mat')
 82 | #        S = mat['S']
 83 | #        u, s, vt = lg.svds(S, k=self._d // 2)
 84 | #        sigma = np.diagflat(np.sqrt(s))
 85 | #        X1 = np.dot(u, sigma)
 86 | #        X2 = np.dot(vt.T, sigma)
 87 | #        # self._X = X2
 88 | #        self._X = np.concatenate((X1, X2), axis=1)
 89 |         
 90 | ####################Direct SVD-------------------------------------
 91 | #        
 92 | #        print("LP3D SVD")
 93 | #        norm_lap_mat = nx.laplacian_matrix(graph)
 94 | #        mu = 0.9;
 95 | #        eta = 1e-6;
 96 | #        M_g  =  mu*norm_lap_mat + (1-mu)*np.eye(graph.number_of_nodes()) + eta*np.eye(graph.number_of_nodes())
 97 | #        #M_l = np.eye(graph.number_of_nodes())
 98 | #        U, Sigma, VT = svds(M_g, k=self._d)
 99 | #        Sigma = np.diag(Sigma)
100 | #        W = np.matmul(U, np.sqrt(Sigma))
101 | #        C = np.matmul(VT.T, np.sqrt(Sigma))
102 | #    # print(np.sum(U))
103 | #        embeddings = W + C
104 | #        self._X = embeddings
105 |     @property
106 |     def vectors(self):
107 |         vectors = {}
108 |         look_back = self.g.look_back_list
109 |         for i, embedding in enumerate(self._X):
110 |             vectors[look_back[i]] = embedding
111 |         return vectors
112 | 
113 |     def save_embeddings(self, filename):
114 |         fout = open(filename, 'w')
115 |         node_num = len(self.vectors.keys())
116 |         fout.write("{} {}\n".format(node_num, self._d))
117 |         for node, vec in self.vectors.items():
118 |             fout.write("{} {}\n".format(node,
119 |                                         ' '.join([str(x) for x in vec])))
120 |         fout.close()


--------------------------------------------------------------------------------
/GAE/layers.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | 
  3 | import tensorflow as tf
  4 | 
  5 | from GAE.initialization import *
  6 | 
  7 | # global unique layer ID dictionary for layer name assignment
  8 | _LAYER_UIDS = {}
  9 | 
 10 | 
 11 | def get_layer_uid(layer_name=''):
 12 |     """Helper function, assigns unique layer IDs
 13 |     """
 14 |     if layer_name not in _LAYER_UIDS:
 15 |         _LAYER_UIDS[layer_name] = 1
 16 |         return 1
 17 |     else:
 18 |         _LAYER_UIDS[layer_name] += 1
 19 |         return _LAYER_UIDS[layer_name]
 20 | 
 21 | 
 22 | def dropout_sparse(x, keep_prob, num_nonzero_elems):
 23 |     """Dropout for sparse tensors. Currently fails for very large sparse tensors (>1M elements)
 24 |     """
 25 |     noise_shape = [num_nonzero_elems]
 26 |     random_tensor = keep_prob
 27 |     random_tensor += tf.random_uniform(noise_shape)
 28 |     dropout_mask = tf.cast(tf.floor(random_tensor), dtype=tf.bool)
 29 |     pre_out = tf.sparse_retain(x, dropout_mask)
 30 |     return pre_out * (1. / keep_prob)
 31 | 
 32 | 
 33 | class Layer(object):
 34 |     """Base layer class. Defines basic API for all layer objects.
 35 | 
 36 |     # Properties
 37 |         name: String, defines the variable scope of the layer.
 38 | 
 39 |     # Methods
 40 |         _call(inputs): Defines computation graph of layer
 41 |             (i.e. takes input, returns output)
 42 |         __call__(inputs): Wrapper for _call()
 43 |     """
 44 | 
 45 |     def __init__(self, **kwargs):
 46 |         allowed_kwargs = {'name', 'logging'}
 47 |         for kwarg in kwargs.keys():
 48 |             assert kwarg in allowed_kwargs, 'Invalid keyword argument: ' + kwarg
 49 |         name = kwargs.get('name')
 50 |         if not name:
 51 |             layer = self.__class__.__name__.lower()
 52 |             name = layer + '_' + str(get_layer_uid(layer))
 53 |         self.name = name
 54 |         self.vars = {}
 55 |         logging = kwargs.get('logging', False)
 56 |         self.logging = logging
 57 |         self.issparse = False
 58 | 
 59 |     def _call(self, inputs):
 60 |         return inputs
 61 | 
 62 |     def __call__(self, inputs):
 63 |         with tf.name_scope(self.name):
 64 |             outputs = self._call(inputs)
 65 |             return outputs
 66 | 
 67 | 
 68 | class GraphConvolution(Layer):
 69 |     """Basic graph convolution layer for undirected graph without edge labels."""
 70 | 
 71 |     def __init__(self, input_dim, output_dim, adj, dropout=0., act=tf.nn.relu, **kwargs):
 72 |         super(GraphConvolution, self).__init__(**kwargs)
 73 |         with tf.variable_scope(self.name + '_vars'):
 74 |             self.vars['weights'] = weight_variable_glorot(input_dim, output_dim, name="weights")
 75 |         self.dropout = dropout
 76 |         self.adj = adj
 77 |         self.act = act
 78 | 
 79 |     def _call(self, inputs):
 80 |         x = inputs
 81 |         x = tf.nn.dropout(x, 1 - self.dropout)
 82 |         x = tf.matmul(x, self.vars['weights'])
 83 |         x = tf.sparse_tensor_dense_matmul(self.adj, x)
 84 |         outputs = self.act(x)
 85 |         return outputs
 86 | 
 87 | 
 88 | class GraphConvolutionSparse(Layer):
 89 |     """Graph convolution layer for sparse inputs."""
 90 | 
 91 |     def __init__(self, input_dim, output_dim, adj, features_nonzero, dropout=0., act=tf.nn.relu, **kwargs):
 92 |         super(GraphConvolutionSparse, self).__init__(**kwargs)
 93 |         with tf.variable_scope(self.name + '_vars'):
 94 |             self.vars['weights'] = weight_variable_glorot(input_dim, output_dim, name="weights")
 95 |         self.dropout = dropout
 96 |         self.adj = adj
 97 |         self.act = act
 98 |         self.issparse = True
 99 |         self.features_nonzero = features_nonzero
100 | 
101 |     def _call(self, inputs):
102 |         x = inputs
103 |         x = dropout_sparse(x, 1 - self.dropout, self.features_nonzero)
104 |         x = tf.sparse_tensor_dense_matmul(x, self.vars['weights'])
105 |         x = tf.sparse_tensor_dense_matmul(self.adj, x)
106 |         outputs = self.act(x)
107 |         return outputs
108 | 
109 | 
110 | class InnerProductDecoder(Layer):
111 |     """Decoder model layer for link prediction."""
112 | 
113 |     def __init__(self, input_dim, dropout=0., act=tf.nn.sigmoid, **kwargs):
114 |         super(InnerProductDecoder, self).__init__(**kwargs)
115 |         self.dropout = dropout
116 |         self.act = act
117 | 
118 |     def _call(self, inputs):
119 |         inputs = tf.nn.dropout(inputs, 1 - self.dropout)
120 |         x = tf.transpose(inputs)
121 |         x = tf.matmul(inputs, x)
122 |         x = tf.reshape(x, [-1])
123 |         outputs = self.act(x)
124 |         return outputs
125 | 


--------------------------------------------------------------------------------
/evaluation.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | 
  3 | from sklearn.linear_model import LogisticRegression
  4 | from sklearn.metrics import accuracy_score, average_precision_score, f1_score, roc_auc_score
  5 | from sklearn.multiclass import OneVsRestClassifier
  6 | from sklearn.preprocessing import MultiLabelBinarizer
  7 | 
  8 | from utils import *
  9 | 
 10 | 
 11 | def LinkPrediction(embedding_look_up, original_graph, train_graph, test_pos_edges, seed):
 12 |     random.seed(seed)
 13 | 
 14 |     train_neg_edges = generate_neg_edges(original_graph, len(train_graph.edges()), seed)
 15 | 
 16 |     # create a auxiliary graph to ensure that testing negative edges will not used in training
 17 |     G_aux = copy.deepcopy(original_graph)
 18 |     G_aux.add_edges_from(train_neg_edges)
 19 |     test_neg_edges = generate_neg_edges(G_aux, len(test_pos_edges), seed)
 20 | 
 21 |     # construct X_train, y_train, X_test, y_test
 22 |     X_train = []
 23 |     y_train = []
 24 |     for edge in train_graph.edges():
 25 |         node_u_emb = embedding_look_up[edge[0]]
 26 |         node_v_emb = embedding_look_up[edge[1]]
 27 |         #feature_vector = np.append(node_v_emb, node_u_emb)
 28 |         ####################### Perform Hadamard Product#################
 29 |         feature_vector = np.multiply(node_u_emb,node_v_emb)
 30 |         X_train.append(feature_vector)
 31 |         y_train.append(1)
 32 |     for edge in train_neg_edges:
 33 |         node_u_emb = embedding_look_up[edge[0]]
 34 |         node_v_emb = embedding_look_up[edge[1]]
 35 |         #feature_vector = np.append(node_v_emb, node_u_emb)
 36 |         feature_vector = np.multiply(node_u_emb,node_v_emb)
 37 |         X_train.append(feature_vector)
 38 |         y_train.append(0)
 39 | 
 40 |     X_test = []
 41 |     y_test = []
 42 |     for edge in test_pos_edges:
 43 |         node_u_emb = embedding_look_up[edge[0]]
 44 |         node_v_emb = embedding_look_up[edge[1]]
 45 |         #feature_vector = np.append(node_v_emb, node_u_emb)
 46 |         feature_vector = np.multiply(node_u_emb,node_v_emb)
 47 |         X_test.append(feature_vector)
 48 |         y_test.append(1)
 49 |     for edge in test_neg_edges:
 50 |         node_u_emb = embedding_look_up[edge[0]]
 51 |         node_v_emb = embedding_look_up[edge[1]]
 52 |         #feature_vector = np.append(node_v_emb, node_u_emb)
 53 |         feature_vector = np.multiply(node_u_emb,node_v_emb)
 54 |         X_test.append(feature_vector)
 55 |         y_test.append(0)
 56 | 
 57 |     # shuffle for training and testing
 58 |     c = list(zip(X_train, y_train))
 59 |     random.shuffle(c)
 60 |     X_train, y_train = zip(*c)
 61 | 
 62 |     c = list(zip(X_test, y_test))
 63 |     random.shuffle(c)
 64 |     X_test, y_test = zip(*c)
 65 | 
 66 |     X_train = np.array(X_train)
 67 |     y_train = np.array(y_train)
 68 | 
 69 |     X_test = np.array(X_test)
 70 |     y_test = np.array(y_test)
 71 | 
 72 |     clf1 = LogisticRegression(random_state=seed, solver='lbfgs')
 73 |     clf1.fit(X_train, y_train)
 74 |     y_pred_proba = clf1.predict_proba(X_test)[:, 1]
 75 |     y_pred = clf1.predict(X_test)
 76 |     auc_roc = roc_auc_score(y_test, y_pred_proba)
 77 |     auc_pr = average_precision_score(y_test, y_pred_proba)
 78 |     accuracy = accuracy_score(y_test, y_pred)
 79 |     f1 = f1_score(y_test, y_pred)
 80 |     print('#' * 9 + ' Link Prediction Performance ' + '#' * 9)
 81 |     print(f'AUC-ROC: {auc_roc:.3f}, AUC-PR: {auc_pr:.3f}, Accuracy: {accuracy:.3f}, F1: {f1:.3f}')
 82 |     print('#' * 50)
 83 |     return auc_roc, auc_pr, accuracy, f1
 84 | 
 85 | 
 86 | def NodeClassification(embedding_look_up, node_list, labels, testing_ratio, seed):
 87 | 
 88 |     X_train, y_train, X_test, y_test = split_train_test_classify(embedding_look_up, node_list, labels,
 89 |                                                                  testing_ratio=testing_ratio,seed=seed)
 90 |     binarizer = MultiLabelBinarizer(sparse_output=True)
 91 |     y_all = np.append(y_train, y_test)
 92 |     binarizer.fit(y_all)
 93 |     y_train = binarizer.transform(y_train).todense()
 94 |     y_test = binarizer.transform(y_test).todense()
 95 |     model = OneVsRestClassifier(LogisticRegression(random_state=seed, solver='lbfgs'))
 96 |     model.fit(X_train, y_train)
 97 |     y_pred_prob = model.predict_proba(X_test)
 98 | 
 99 |     ## small trick : we assume that we know how many label to predict
100 |     y_pred = get_y_pred(y_test, y_pred_prob)
101 | 
102 |     accuracy = accuracy_score(y_test, y_pred)
103 |     micro_f1 = f1_score(y_test, y_pred, average="micro")
104 |     macro_f1 = f1_score(y_test, y_pred, average="macro")
105 | 
106 |     print('#' * 9 + ' Node Classification Performance ' + '#' * 9)
107 |     print(f'Accuracy: {accuracy:.3f}, Micro-F1: {micro_f1:.3f}, Macro-F1: {macro_f1:.3f}')
108 |     print('#' * 50)
109 |     return accuracy, micro_f1, macro_f1
110 | 


--------------------------------------------------------------------------------
/GAE/model.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | 
  3 | import tensorflow as tf
  4 | 
  5 | from GAE.layers import GraphConvolution, GraphConvolutionSparse, InnerProductDecoder
  6 | 
  7 | flags = tf.app.flags
  8 | FLAGS = flags.FLAGS
  9 | 
 10 | 
 11 | class Model(object):
 12 |     def __init__(self, **kwargs):
 13 |         allowed_kwargs = {'name', 'logging'}
 14 |         for kwarg in kwargs.keys():
 15 |             assert kwarg in allowed_kwargs, 'Invalid keyword argument: ' + kwarg
 16 | 
 17 |         for kwarg in kwargs.keys():
 18 |             assert kwarg in allowed_kwargs, 'Invalid keyword argument: ' + kwarg
 19 |         name = kwargs.get('name')
 20 |         if not name:
 21 |             name = self.__class__.__name__.lower()
 22 |         self.name = name
 23 | 
 24 |         logging = kwargs.get('logging', False)
 25 |         self.logging = logging
 26 | 
 27 |         self.vars = {}
 28 | 
 29 |     def _build(self):
 30 |         raise NotImplementedError
 31 | 
 32 |     def build(self):
 33 |         """ Wrapper for _build() """
 34 |         with tf.variable_scope(self.name):
 35 |             self._build()
 36 |         variables = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope=self.name)
 37 |         self.vars = {var.name: var for var in variables}
 38 | 
 39 |     def fit(self):
 40 |         pass
 41 | 
 42 |     def predict(self):
 43 |         pass
 44 | 
 45 | 
 46 | class GCNModelAE(Model):
 47 |     def __init__(self, placeholders, num_features, features_nonzero, hidden1, hidden2, **kwargs):
 48 |         super(GCNModelAE, self).__init__(**kwargs)
 49 | 
 50 |         self.inputs = placeholders['features']
 51 |         self.input_dim = num_features
 52 |         self.features_nonzero = features_nonzero
 53 |         self.adj = placeholders['adj']
 54 |         self.dropout = placeholders['dropout']
 55 |         self.hidden_dim_1 = hidden1
 56 |         self.hidden_dim_2 = hidden2
 57 |         self.build()
 58 | 
 59 |     def _build(self):
 60 |         self.hidden1 = GraphConvolutionSparse(input_dim=self.input_dim,
 61 |                                               output_dim=self.hidden_dim_1,
 62 |                                               adj=self.adj,
 63 |                                               features_nonzero=self.features_nonzero,
 64 |                                               act=tf.nn.relu,
 65 |                                               dropout=self.dropout,
 66 |                                               logging=self.logging)(self.inputs)
 67 | 
 68 |         self.embeddings = GraphConvolution(input_dim=self.hidden_dim_1,
 69 |                                            output_dim=self.hidden_dim_2,
 70 |                                            adj=self.adj,
 71 |                                            act=lambda x: x,
 72 |                                            dropout=self.dropout,
 73 |                                            logging=self.logging)(self.hidden1)
 74 | 
 75 |         self.z_mean = self.embeddings
 76 | 
 77 |         self.reconstructions = InnerProductDecoder(input_dim=self.hidden_dim_2,
 78 |                                                    act=lambda x: x,
 79 |                                                    logging=self.logging)(self.embeddings)
 80 | 
 81 | 
 82 | class GCNModelVAE(Model):
 83 |     def __init__(self, placeholders, num_features, num_nodes, features_nonzero, hidden1, hidden2, **kwargs):
 84 |         super(GCNModelVAE, self).__init__(**kwargs)
 85 | 
 86 |         self.inputs = placeholders['features']
 87 |         self.input_dim = num_features
 88 |         self.features_nonzero = features_nonzero
 89 |         self.n_samples = num_nodes
 90 |         self.adj = placeholders['adj']
 91 |         self.dropout = placeholders['dropout']
 92 |         self.hidden_dim_1 = hidden1
 93 |         self.hidden_dim_2 = hidden2
 94 |         self.build()
 95 | 
 96 |     def _build(self):
 97 |         self.hidden1 = GraphConvolutionSparse(input_dim=self.input_dim,
 98 |                                               output_dim=self.hidden_dim_1,
 99 |                                               adj=self.adj,
100 |                                               features_nonzero=self.features_nonzero,
101 |                                               act=tf.nn.relu,
102 |                                               dropout=self.dropout,
103 |                                               logging=self.logging)(self.inputs)
104 | 
105 |         self.z_mean = GraphConvolution(input_dim=self.hidden_dim_1,
106 |                                        output_dim=self.hidden_dim_2,
107 |                                        adj=self.adj,
108 |                                        act=lambda x: x,
109 |                                        dropout=self.dropout,
110 |                                        logging=self.logging)(self.hidden1)
111 | 
112 |         self.z_log_std = GraphConvolution(input_dim=self.hidden_dim_1,
113 |                                           output_dim=self.hidden_dim_2,
114 |                                           adj=self.adj,
115 |                                           act=lambda x: x,
116 |                                           dropout=self.dropout,
117 |                                           logging=self.logging)(self.hidden1)
118 | 
119 |         self.z = self.z_mean + tf.random_normal([self.n_samples, self.hidden_dim_2]) * tf.exp(self.z_log_std)
120 | 
121 |         self.reconstructions = InnerProductDecoder(input_dim=self.hidden_dim_2,
122 |                                                    act=lambda x: x,
123 |                                                    logging=self.logging)(self.z)
124 | 


--------------------------------------------------------------------------------
/GAE/preprocessing.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | 
  3 | import numpy as np
  4 | import scipy.sparse as sp
  5 | 
  6 | 
  7 | def sparse_to_tuple(sparse_mx):
  8 |     if not sp.isspmatrix_coo(sparse_mx):
  9 |         sparse_mx = sparse_mx.tocoo()
 10 |     coords = np.vstack((sparse_mx.row, sparse_mx.col)).transpose()
 11 |     values = sparse_mx.data
 12 |     shape = sparse_mx.shape
 13 |     return coords, values, shape
 14 | #####################################Original Code#########################
 15 | #    alpha = 0.1
 16 | #    print('@' * 70)
 17 | #    print('Alpha: %f' % (alpha))
 18 | #    print('@' * 70)
 19 | #    adj = sp.coo_matrix(adj)
 20 | #    adj_ = adj + sp.eye(adj.shape[0])
 21 | #    rowsum = np.array(adj_.sum(1))
 22 | #    degree_mat_inv_sqrt = sp.diags(np.power(rowsum, -0.5).flatten())
 23 | #    adj_normalized = adj_.dot(degree_mat_inv_sqrt).transpose().dot(degree_mat_inv_sqrt).tocoo()
 24 | #    return sparse_to_tuple(adj_normalized)
 25 | ###########################################################################
 26 | 
 27 | def preprocess_graph(adj):
 28 | #    alpha = 0.1
 29 | #    print('@' * 70)
 30 | #    print('Alpha with inverse: %f' % (alpha))
 31 | #    print('@' * 70)
 32 | #    adj = sp.coo_matrix(adj)
 33 | #    adj_ = adj + sp.eye(adj.shape[0])
 34 | #    rowsum = np.array(adj_.sum(1))
 35 | #    degree_mat_inv_sqrt = sp.diags(np.power(rowsum, -0.5).flatten())
 36 | #    adj_normalized = adj_.dot(degree_mat_inv_sqrt).transpose().dot(degree_mat_inv_sqrt).tocoo()
 37 | #    A_inner = sp.eye(adj.shape[0]) - (1 - alpha) * adj_normalized
 38 | #    adj_normalized = alpha * sp.linalg.inv(A_inner)
 39 |     alpha = 0.1
 40 |     print('@' * 70)
 41 |     print('Alpha: %f' % (alpha))
 42 |     print('@' * 70)
 43 |     adj = sp.coo_matrix(adj)
 44 |     adj_ = adj + sp.eye(adj.shape[0])
 45 |     rowsum = np.array(adj_.sum(1))
 46 |     degree_mat_inv_sqrt = sp.diags(np.power(rowsum, -0.5).flatten())
 47 |     adj_normalized = adj_.dot(degree_mat_inv_sqrt).transpose().dot(degree_mat_inv_sqrt).tocoo()
 48 |     return sparse_to_tuple(adj_normalized)
 49 | 
 50 | 
 51 | def construct_feed_dict(adj_normalized, adj, features, placeholders):
 52 |     # construct feed dictionary
 53 |     feed_dict = dict()
 54 |     feed_dict.update({placeholders['features']: features})
 55 |     feed_dict.update({placeholders['adj']: adj_normalized})
 56 |     feed_dict.update({placeholders['adj_orig']: adj})
 57 |     return feed_dict
 58 | 
 59 | 
 60 | def mask_test_edges(adj):
 61 |     # Function to build test set with 10% positive links
 62 |     # NOTE: Splits are randomized and results might slightly deviate from reported numbers in the paper.
 63 | 
 64 |     # Remove diagonal elements
 65 |     adj = adj - sp.dia_matrix((adj.diagonal()[np.newaxis, :], [0]), shape=adj.shape)
 66 |     adj.eliminate_zeros()
 67 |     # Check that diag is zero:
 68 |     assert np.diag(adj.todense()).sum() == 0
 69 | 
 70 |     adj_triu = sp.triu(adj)
 71 |     adj_tuple = sparse_to_tuple(adj_triu)
 72 |     edges = adj_tuple[0]
 73 |     edges_all = sparse_to_tuple(adj)[0]
 74 |     num_test = int(np.floor(edges.shape[0] / 10.))
 75 |     num_val = int(np.floor(edges.shape[0] / 20.))
 76 | 
 77 |     all_edge_idx = list(range(edges.shape[0]))
 78 |     np.random.shuffle(all_edge_idx)
 79 |     val_edge_idx = all_edge_idx[:num_val]
 80 |     test_edge_idx = all_edge_idx[num_val:(num_val + num_test)]
 81 |     test_edges = edges[test_edge_idx]
 82 |     val_edges = edges[val_edge_idx]
 83 |     train_edges = np.delete(edges, np.hstack([test_edge_idx, val_edge_idx]), axis=0)
 84 | 
 85 |     def ismember(a, b, tol=5):
 86 |         rows_close = np.all(np.round(a - b[:, None], tol) == 0, axis=-1)
 87 |         return np.any(rows_close)
 88 | 
 89 |     test_edges_false = []
 90 |     while len(test_edges_false) < len(test_edges):
 91 |         idx_i = np.random.randint(0, adj.shape[0])
 92 |         idx_j = np.random.randint(0, adj.shape[0])
 93 |         if idx_i == idx_j:
 94 |             continue
 95 |         if ismember([idx_i, idx_j], edges_all):
 96 |             continue
 97 |         if test_edges_false:
 98 |             if ismember([idx_j, idx_i], np.array(test_edges_false)):
 99 |                 continue
100 |             if ismember([idx_i, idx_j], np.array(test_edges_false)):
101 |                 continue
102 |         test_edges_false.append([idx_i, idx_j])
103 | 
104 |     val_edges_false = []
105 |     while len(val_edges_false) < len(val_edges):
106 |         idx_i = np.random.randint(0, adj.shape[0])
107 |         idx_j = np.random.randint(0, adj.shape[0])
108 |         if idx_i == idx_j:
109 |             continue
110 |         if ismember([idx_i, idx_j], train_edges):
111 |             continue
112 |         if ismember([idx_j, idx_i], train_edges):
113 |             continue
114 |         if ismember([idx_i, idx_j], val_edges):
115 |             continue
116 |         if ismember([idx_j, idx_i], val_edges):
117 |             continue
118 |         if val_edges_false:
119 |             if ismember([idx_j, idx_i], np.array(val_edges_false)):
120 |                 continue
121 |             if ismember([idx_i, idx_j], np.array(val_edges_false)):
122 |                 continue
123 |         val_edges_false.append([idx_i, idx_j])
124 | 
125 |     assert ~ismember(test_edges_false, edges_all)
126 |     assert ~ismember(val_edges_false, edges_all)
127 |     assert ~ismember(val_edges, train_edges)
128 |     assert ~ismember(test_edges, train_edges)
129 |     assert ~ismember(val_edges, test_edges)
130 | 
131 |     data = np.ones(train_edges.shape[0])
132 | 
133 |     # Re-build adj matrix
134 |     adj_train = sp.csr_matrix((data, (train_edges[:, 0], train_edges[:, 1])), shape=adj.shape)
135 |     adj_train = adj_train + adj_train.T
136 | 
137 |     # NOTE: these edge lists only contain single direction of edge!
138 |     return adj_train, train_edges, val_edges, val_edges_false, test_edges, test_edges_false
139 | 


--------------------------------------------------------------------------------
/GAE/train_model.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | 
  3 | import time
  4 | 
  5 | import numpy as np
  6 | import scipy.sparse as sp
  7 | import tensorflow as tf
  8 | 
  9 | from GAE.model import GCNModelAE, GCNModelVAE
 10 | from GAE.optimizer import OptimizerAE, OptimizerVAE
 11 | from GAE.preprocessing import construct_feed_dict, preprocess_graph, sparse_to_tuple
 12 | #from fast_pagerank import pagerank
 13 | 
 14 | # # Train on CPU (hide GPU) due to memory constraints
 15 | # os.environ['CUDA_VISIBLE_DEVICES'] = ""
 16 | 
 17 | 
 18 | class gae_model(object):
 19 |     def __init__(self, args):
 20 |         super(gae_model, self).__init__()
 21 |         self.learning_rate = args.lr
 22 |         self.epochs = args.epochs
 23 |         self.hidden1 = args.hidden
 24 |         self.hidden2 = args.dimensions
 25 |         self.weight_decay = args.weight_decay
 26 |         self.dropout = args.dropout
 27 |         self.model_selection = args.gae_model_selection
 28 |         self.model = None
 29 | 
 30 |     def save_embeddings(self, output, node_list):
 31 |         self.feed_dict.update({self.placeholders['dropout']: 0})
 32 |         emb = self.sess.run(self.model.z_mean, feed_dict=self.feed_dict)
 33 |         print(emb.shape[0])
 34 |         print(emb.shape[1])
 35 |         fout = open(output, 'w')
 36 |         fout.write("{} {}\n".format(emb.shape[0], emb.shape[1]))
 37 |         for idx in range(emb.shape[0]):
 38 |             fout.write("{} {}\n".format(node_list[idx], ' '.join([str(x) for x in emb[idx, :]])))
 39 |         fout.close()
 40 | 
 41 |     def train(self, adj):
 42 |         # Store original adjacency matrix (without diagonal entries) for later
 43 |         adj_orig = adj
 44 |         adj_orig = adj_orig - sp.dia_matrix((adj_orig.diagonal()[np.newaxis, :], [0]), shape=adj_orig.shape)
 45 |         adj_orig.eliminate_zeros()
 46 | 
 47 |         adj_train = adj
 48 |         features = sp.identity(adj.shape[0])  # featureless
 49 |         #pr=pagerank(adj, p=0.85)
 50 | 
 51 | 
 52 |         # What happends if I use PageRank scores as features
 53 |         #features = sp.diags(pr)
 54 |         # Some preprocessing
 55 |         print("You calling this function")
 56 |         adj_norm = preprocess_graph(adj)
 57 |         print("Yes")
 58 |         # Define placeholders
 59 |         self.placeholders = {
 60 |             'features': tf.sparse_placeholder(tf.float32),
 61 |             'adj': tf.sparse_placeholder(tf.float32),
 62 |             'adj_orig': tf.sparse_placeholder(tf.float32),
 63 |             'dropout': tf.placeholder_with_default(0., shape=())
 64 |         }
 65 | 
 66 |         num_nodes = adj.shape[0]
 67 |         features = sparse_to_tuple(features.tocoo())
 68 |         num_features = features[2][1]
 69 |         features_nonzero = features[1].shape[0]
 70 | 
 71 |         # Create model
 72 |         if self.model_selection == 'gcn_ae':
 73 |             self.model = GCNModelAE(self.placeholders, num_features, features_nonzero, self.hidden1, self.hidden2)
 74 |         elif self.model_selection == 'gcn_vae':
 75 |             self.model = GCNModelVAE(self.placeholders, num_features, num_nodes, features_nonzero, self.hidden1,
 76 |                                      self.hidden2)
 77 | 
 78 |         pos_weight = float(adj.shape[0] * adj.shape[0] - adj.sum()) / adj.sum()
 79 |         norm = adj.shape[0] * adj.shape[0] / float((adj.shape[0] * adj.shape[0] - adj.sum()) * 2)
 80 | 
 81 |         # Optimizer
 82 |         with tf.name_scope('optimizer'):
 83 |             if self.model_selection == 'gcn_ae':
 84 |                 opt = OptimizerAE(preds=self.model.reconstructions,
 85 |                                   labels=tf.reshape(tf.sparse_tensor_to_dense(self.placeholders['adj_orig'],
 86 |                                                                               validate_indices=False), [-1]),
 87 |                                   pos_weight=pos_weight,
 88 |                                   norm=norm,
 89 |                                   learning_rate=self.learning_rate
 90 |                                   )
 91 |             elif self.model_selection == 'gcn_vae':
 92 |                 opt = OptimizerVAE(preds=self.model.reconstructions,
 93 |                                    labels=tf.reshape(tf.sparse_tensor_to_dense(self.placeholders['adj_orig'],
 94 |                                                                                validate_indices=False), [-1]),
 95 |                                    model=self.model,
 96 |                                    num_nodes=num_nodes,
 97 |                                    pos_weight=pos_weight,
 98 |                                    norm=norm,
 99 |                                    learning_rate=self.learning_rate
100 |                                    )
101 | 
102 |         # Initialize session
103 |         self.sess = tf.Session()
104 |         self.sess.run(tf.global_variables_initializer())
105 | 
106 |         adj_label = adj_train + sp.eye(adj_train.shape[0])
107 |         adj_label = sparse_to_tuple(adj_label)
108 | 
109 |         # Train model
110 |         for epoch in range(self.epochs):
111 |             t = time.time()
112 |             # Construct feed dictionary
113 |             self.feed_dict = construct_feed_dict(adj_norm, adj_label, features, self.placeholders)
114 |             self.feed_dict.update({self.placeholders['dropout']: self.dropout})
115 |             # Run single weight update
116 |             outs = self.sess.run([opt.opt_op, opt.cost, opt.accuracy], feed_dict=self.feed_dict)
117 | 
118 |             # Compute average loss
119 |             avg_cost = outs[1]
120 |             avg_accuracy = outs[2]
121 | 
122 |             print("Epoch:", '%04d' % (epoch + 1), "train_loss=", "{:.5f}".format(avg_cost),
123 |                   "train_acc=", "{:.5f}".format(avg_accuracy),
124 |                   "time=", "{:.5f}".format(time.time() - t))
125 | 
126 |         print("Optimization Finished!")
127 | 


--------------------------------------------------------------------------------
/OpenNE/walker.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | 
  3 | import random
  4 | 
  5 | import numpy as np
  6 | 
  7 | 
  8 | def deepwalk_walk_wrapper(class_instance, walk_length, start_node):
  9 |     class_instance.deepwalk_walk(walk_length, start_node)
 10 | 
 11 | 
 12 | class BasicWalker:
 13 |     def __init__(self, G, workers):
 14 |         self.G = G.G
 15 |         self.node_size = G.node_size
 16 |         self.look_up_dict = G.look_up_dict
 17 | 
 18 |     def deepwalk_walk(self, walk_length, start_node):
 19 |         '''
 20 |         Simulate a random walk starting from start node.
 21 |         '''
 22 |         G = self.G
 23 |         look_up_dict = self.look_up_dict
 24 |         node_size = self.node_size
 25 | 
 26 |         walk = [start_node]
 27 | 
 28 |         while len(walk) < walk_length:
 29 |             cur = walk[-1]
 30 |             cur_nbrs = list(G.neighbors(cur))
 31 |             if len(cur_nbrs) > 0:
 32 |                 walk.append(random.choice(cur_nbrs))
 33 |             else:
 34 |                 break
 35 |         return walk
 36 | 
 37 |     def simulate_walks(self, num_walks, walk_length):
 38 |         '''
 39 |         Repeatedly simulate random walks from each node.
 40 |         '''
 41 |         G = self.G
 42 |         walks = []
 43 |         nodes = list(G.nodes())
 44 |         print('Begin random walks...')
 45 |         for walk_iter in range(num_walks):
 46 |             # pool = multiprocessing.Pool(processes = 4)
 47 |             # print(str(walk_iter+1), '/', str(num_walks))
 48 |             random.shuffle(nodes)
 49 |             for node in nodes:
 50 |                 # walks.append(pool.apply_async(deepwalk_walk_wrapper, (self, walk_length, node, )))
 51 |                 walks.append(self.deepwalk_walk(
 52 |                     walk_length=walk_length, start_node=node))
 53 |             # pool.close()
 54 |             # pool.join()
 55 |         # print(len(walks))
 56 |         print('Walk finished...')
 57 |         return walks
 58 | 
 59 | 
 60 | class Walker:
 61 |     def __init__(self, G, p, q, workers):
 62 |         self.G = G.G
 63 |         self.p = p
 64 |         self.q = q
 65 |         self.node_size = G.node_size
 66 |         self.look_up_dict = G.look_up_dict
 67 | 
 68 |     def node2vec_walk(self, walk_length, start_node):
 69 |         '''
 70 |         Simulate a random walk starting from start node.
 71 |         '''
 72 |         G = self.G
 73 |         alias_nodes = self.alias_nodes
 74 |         alias_edges = self.alias_edges
 75 |         look_up_dict = self.look_up_dict
 76 |         node_size = self.node_size
 77 | 
 78 |         walk = [start_node]
 79 | 
 80 |         while len(walk) < walk_length:
 81 |             cur = walk[-1]
 82 |             cur_nbrs = list(G.neighbors(cur))
 83 |             if len(cur_nbrs) > 0:
 84 |                 if len(walk) == 1:
 85 |                     walk.append(
 86 |                         cur_nbrs[alias_draw(alias_nodes[cur][0], alias_nodes[cur][1])])
 87 |                 else:
 88 |                     prev = walk[-2]
 89 |                     pos = (prev, cur)
 90 |                     next = cur_nbrs[alias_draw(alias_edges[pos][0],
 91 |                                                alias_edges[pos][1])]
 92 |                     walk.append(next)
 93 |             else:
 94 |                 break
 95 | 
 96 |         return walk
 97 | 
 98 |     def simulate_walks(self, num_walks, walk_length):
 99 |         '''
100 |         Repeatedly simulate random walks from each node.
101 |         '''
102 |         G = self.G
103 |         walks = []
104 |         nodes = list(G.nodes())
105 |         print('Begin random walk...')
106 |         for walk_iter in range(num_walks):
107 |             # print(str(walk_iter+1), '/', str(num_walks))
108 |             random.shuffle(nodes)
109 |             for node in nodes:
110 |                 walks.append(self.node2vec_walk(
111 |                     walk_length=walk_length, start_node=node))
112 |         print('Walk finished...')
113 |         return walks
114 | 
115 |     def get_alias_edge(self, src, dst):
116 |         '''
117 |         Get the alias edge setup lists for a given edge.
118 |         '''
119 |         G = self.G
120 |         p = self.p
121 |         q = self.q
122 | 
123 |         unnormalized_probs = []
124 |         for dst_nbr in G.neighbors(dst):
125 |             if dst_nbr == src:
126 |                 unnormalized_probs.append(G[dst][dst_nbr]['weight'] / p)
127 |             elif G.has_edge(dst_nbr, src):
128 |                 unnormalized_probs.append(G[dst][dst_nbr]['weight'])
129 |             else:
130 |                 unnormalized_probs.append(G[dst][dst_nbr]['weight'] / q)
131 |         norm_const = sum(unnormalized_probs)
132 |         normalized_probs = [
133 |             float(u_prob) / norm_const for u_prob in unnormalized_probs]
134 | 
135 |         return alias_setup(normalized_probs)
136 | 
137 |     def preprocess_transition_probs(self):
138 |         '''
139 |         Preprocessing of transition probabilities for guiding the random walks.
140 |         '''
141 |         G = self.G
142 | 
143 |         alias_nodes = {}
144 |         for node in G.nodes():
145 |             unnormalized_probs = [G[node][nbr]['weight']
146 |                                   for nbr in G.neighbors(node)]
147 |             norm_const = sum(unnormalized_probs)
148 |             normalized_probs = [
149 |                 float(u_prob) / norm_const for u_prob in unnormalized_probs]
150 |             alias_nodes[node] = alias_setup(normalized_probs)
151 | 
152 |         alias_edges = {}
153 |         triads = {}
154 | 
155 |         look_up_dict = self.look_up_dict
156 |         node_size = self.node_size
157 |         for edge in G.edges():
158 |             alias_edges[edge] = self.get_alias_edge(edge[0], edge[1])
159 | 
160 |         self.alias_nodes = alias_nodes
161 |         self.alias_edges = alias_edges
162 | 
163 |         return
164 | 
165 | 
166 | def alias_setup(probs):
167 |     '''
168 |     Compute utility lists for non-uniform sampling from discrete distributions.
169 |     Refer to https://hips.seas.harvard.edu/blog/2013/03/03/the-alias-method-efficient-sampling-with-many-discrete-outcomes/
170 |     for details
171 |     '''
172 |     K = len(probs)
173 |     q = np.zeros(K, dtype=np.float32)
174 |     J = np.zeros(K, dtype=np.int32)
175 | 
176 |     smaller = []
177 |     larger = []
178 |     for kk, prob in enumerate(probs):
179 |         q[kk] = K * prob
180 |         if q[kk] < 1.0:
181 |             smaller.append(kk)
182 |         else:
183 |             larger.append(kk)
184 | 
185 |     while len(smaller) > 0 and len(larger) > 0:
186 |         small = smaller.pop()
187 |         large = larger.pop()
188 | 
189 |         J[small] = large
190 |         q[large] = q[large] + q[small] - 1.0
191 |         if q[large] < 1.0:
192 |             smaller.append(large)
193 |         else:
194 |             larger.append(large)
195 | 
196 |     return J, q
197 | 
198 | 
199 | def alias_draw(J, q):
200 |     '''
201 |     Draw sample from a non-uniform discrete distribution using alias sampling.
202 |     '''
203 |     K = len(J)
204 | 
205 |     kk = int(np.floor(np.random.rand() * K))
206 |     if np.random.rand() < q[kk]:
207 |         return kk
208 |     else:
209 |         return J[kk]
210 | 


--------------------------------------------------------------------------------
/utils.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | 
  3 | import copy
  4 | import itertools
  5 | import random
  6 | 
  7 | import networkx as nx
  8 | import numpy as np
  9 | 
 10 | import OpenNE.graph as og
 11 | #import struc2vec.graph as sg
 12 | 
 13 | 
 14 | def read_for_OpenNE(filename, weighted=False):
 15 |     G = og.Graph()
 16 |     print("Loading training graph for learning embedding...")
 17 |     G.read_edgelist(filename=filename, weighted=weighted)
 18 |     print("Graph Loaded...")
 19 |     return G
 20 | 
 21 | def read_for_OpenNE_from_mat(filename):
 22 |     
 23 |     G = og.Graph()
 24 |     
 25 |     print("Loading mat file for classification only")
 26 |     G.readMatFile(filename)
 27 |     print("Mat Graph loaded")
 28 |     return G
 29 |     
 30 | 
 31 | def read_for_gae(filename, weighted=False):
 32 |     print("Loading training graph for learning embedding...")
 33 |     edgelist = np.loadtxt(filename, dtype='float')
 34 |     if weighted:
 35 |         edgelist = [(int(edgelist[idx, 0]), int(edgelist[idx, 1])) for idx in range(edgelist.shape[0]) if
 36 |                     edgelist[idx, 2] > 0]
 37 |     else:
 38 |         edgelist = [(int(edgelist[idx, 0]), int(edgelist[idx, 1])) for idx in range(edgelist.shape[0])]
 39 |     G=nx.from_edgelist(edgelist)
 40 |     node_list=list(G.nodes)
 41 |     adj = nx.adjacency_matrix(G, nodelist=node_list)
 42 |     print("Graph Loaded...")
 43 |     return (adj,node_list)
 44 | 
 45 | 
 46 | def read_for_SVD(filename, weighted=False):
 47 |     if weighted:
 48 |         G = nx.read_weighted_edgelist(filename)
 49 |     else:
 50 |         G = nx.read_edgelist(filename)
 51 |     return G
 52 | 
 53 | 
 54 | def split_train_test_graph(input_edgelist, seed, testing_ratio, weighted=False):
 55 |     
 56 |     if (weighted):
 57 |         G = nx.read_weighted_edgelist(input_edgelist)
 58 |     else:
 59 |         G = nx.read_edgelist(input_edgelist)
 60 |     node_num1, edge_num1 = len(G.nodes), len(G.edges)
 61 |     print('Original Graph: nodes:', node_num1, 'edges:', edge_num1)
 62 |     testing_edges_num = int(len(G.edges) * testing_ratio)
 63 |     random.seed(seed)
 64 |     testing_pos_edges = random.sample(G.edges, testing_edges_num)
 65 |     G_train = copy.deepcopy(G)
 66 |     for edge in testing_pos_edges:
 67 |         node_u, node_v = edge
 68 |         if (G_train.degree(node_u) > 1 and G_train.degree(node_v) > 1):
 69 |             G_train.remove_edge(node_u, node_v)
 70 | 
 71 |     G_train.remove_nodes_from(nx.isolates(G_train))
 72 |     node_num2, edge_num2 = len(G_train.nodes), len(G_train.edges)
 73 |     assert node_num1 == node_num2
 74 |     train_graph_filename = 'graph_train.edgelist'
 75 |     if weighted:
 76 |         nx.write_edgelist(G_train, train_graph_filename, data=['weight'])
 77 |     else:
 78 |         nx.write_edgelist(G_train, train_graph_filename, data=False)
 79 | 
 80 |     node_num1, edge_num1 = len(G_train.nodes), len(G_train.edges)
 81 |     print('Training Graph: nodes:', node_num1, 'edges:', edge_num1)
 82 | #    idSave={}
 83 | #    idSave['G']=G
 84 | #    import scipy.io as sio
 85 | #        #idSave['Label'] = labels
 86 | #        #idSave['Attributes'] = features
 87 | #    
 88 | #    sio.savemat('DrugBankAdj.mat',idSave)
 89 |     return G, G_train, testing_pos_edges, train_graph_filename
 90 | 
 91 | 
 92 | def split_train_test_graphReal(input_edgelist1, input_edgelist2, seed, testing_ratio, weighted=False):
 93 |     
 94 |     if (weighted):
 95 |         G1 = nx.read_weighted_edgelist(input_edgelist1)
 96 |         G2 = nx.read_weighted_edgelist(input_edgelist2)
 97 |     else:
 98 |         G1 = nx.read_edgelist(input_edgelist1)
 99 |         G2 = nx.read_edgelist(input_edgelist2)
100 |     node_num1, edge_num1 = len(G1.nodes), len(G1.edges)
101 |     node_num2, edge_num2 = len(G2.nodes), len(G2.edges)
102 | 
103 |     print('Original CoExp Graph: nodes:', node_num1, 'edges:', edge_num1)
104 |     print('Original Exper Graph: nodes:', node_num2, 'edges:', edge_num2)
105 |     testing_edges_num = int(len(G2.edges))
106 |     random.seed(seed)
107 |     testing_pos_edges = G2.edges
108 |     G_train = copy.deepcopy(G1)
109 |     overlapCount = 0
110 |     for edge in testing_pos_edges:
111 |         node_u, node_v = edge
112 |         if(G_train.has_edge(node_u,node_v)):
113 |             overlapCount = overlapCount+1
114 |     print("Number of edge Overlap: ", overlapCount)        
115 |         
116 | 
117 | def generate_neg_edges(original_graph, testing_edges_num, seed):
118 |     L = list(original_graph.nodes())
119 | 
120 |     # create a complete graph
121 |     G = nx.Graph()
122 |     G.add_nodes_from(L)
123 |     G.add_edges_from(itertools.combinations(L, 2))
124 |     # remove original edges
125 |     G.remove_edges_from(original_graph.edges())
126 |     random.seed(seed)
127 |     neg_edges = random.sample(G.edges, testing_edges_num)
128 |     return neg_edges
129 | 
130 | 
131 | def load_embedding(embedding_file_name, node_list=None):
132 |     with open(embedding_file_name) as f:
133 |         node_num, emb_size = f.readline().split()
134 |         print('Nodes with embedding: %s'%node_num)
135 |         embedding_look_up = {}
136 |         if node_list:
137 |             for line in f:
138 |                 vec = line.strip().split()
139 |                 node_id = vec[0]
140 |                 if (node_id in node_list):
141 |                     emb = [float(x) for x in vec[1:]]
142 |                     emb = emb / np.linalg.norm(emb)
143 |                     emb[np.isnan(emb)] = 0
144 |                     embedding_look_up[node_id] = np.array(emb)
145 | 
146 |             # if len(node_list) != len(embedding_look_up):
147 |             #     diff_nodes=set(node_list).difference(set(embedding_look_up.keys()))
148 |             #     for node in diff_nodes:
149 |             #         emb = np.random.random((int(emb_size)))
150 |             #         emb = emb / np.linalg.norm(emb)
151 |             #         emb[np.isnan(emb)] = 0
152 |             #         embedding_look_up[node] = np.array(emb)
153 | 
154 |             assert len(node_list) == len(embedding_look_up)
155 |         else:
156 |             for line in f:
157 |                 vec = line.strip().split()
158 |                 node_id = vec[0]
159 |                 embeddings = vec[1:]
160 |                 emb = [float(x) for x in embeddings]
161 |                 emb = emb / np.linalg.norm(emb)
162 |                 emb[np.isnan(emb)] = 0
163 |                 embedding_look_up[node_id] = list(emb)
164 |             assert int(node_num) == len(embedding_look_up)
165 |         f.close()
166 |         return embedding_look_up
167 | 
168 | 
169 | def read_node_labels(filename):
170 |     fin = open(filename, 'r')
171 |     node_list = []
172 |     labels = []
173 |     while 1:
174 |         l = fin.readline()
175 |         if l == '':
176 |             break
177 |         vec = l.strip().split()
178 |         node_list.append(vec[0])
179 |         labels.append(vec[1:])
180 |     fin.close()
181 |     print('Nodes with labels: %s'%len(node_list))
182 |     return node_list, labels
183 | 
184 | 
185 | def split_train_test_classify(embedding_look_up, X, Y, seed, testing_ratio=0.5):
186 |     state = np.random.get_state()
187 |     training_ratio = 1 - testing_ratio
188 |     training_size = int(training_ratio * len(X))
189 |     np.random.seed(seed)
190 |     shuffle_indices = np.random.permutation(np.arange(len(X)))
191 |     X_train = [embedding_look_up[X[shuffle_indices[i]]] for i in range(training_size)]
192 |     Y_train = [Y[shuffle_indices[i]] for i in range(training_size)]
193 |     X_test = [embedding_look_up[X[shuffle_indices[i]]] for i in range(training_size, len(X))]
194 |     Y_test = [Y[shuffle_indices[i]] for i in range(training_size, len(X))]
195 | 
196 |     X_train = np.array(X_train)
197 |     Y_train = np.array(Y_train)
198 |     X_test = np.array(X_test)
199 |     Y_test = np.array(Y_test)
200 | 
201 |     np.random.set_state(state)
202 |     return X_train, Y_train, X_test, Y_test
203 | 
204 | 
205 | def get_y_pred(y_test, y_pred_prob):
206 |     y_pred = np.zeros(y_pred_prob.shape)
207 |     sort_index = np.flip(np.argsort(y_pred_prob, axis=1), 1)
208 |     for i in range(y_test.shape[0]):
209 |         num = np.sum(y_test[i])
210 |         for j in range(num):
211 |             y_pred[i][sort_index[i][j]] = 1
212 |     return y_pred
213 | 


--------------------------------------------------------------------------------
/main.py:
--------------------------------------------------------------------------------
  1 | 
  2 | import datetime
  3 | import getpass
  4 | import json
  5 | import os
  6 | import random
  7 | import time
  8 | import scipy.io as sio
  9 | from argparse import ArgumentDefaultsHelpFormatter, ArgumentParser
 10 | 
 11 | import numpy as np
 12 | import scipy.sparse as sp
 13 | 
 14 | from embed_train import embedding_training, load_embedding, read_node_labels, split_train_test_graph
 15 | from evaluation import LinkPrediction, NodeClassification
 16 | 
 17 | 
 18 | 
 19 | 
 20 | def parse_args():
 21 |     parser = ArgumentParser(formatter_class=ArgumentDefaultsHelpFormatter,
 22 |                             conflict_handler='resolve')
 23 |     
 24 |     parser.add_argument('--input', choices=[
 25 |         'DrugBank_DDI.edgelist',
 26 |         'NDFRT_DDA.edgelist',
 27 |         'CTD_DDA.edgelist'], default='DDI5.edgelist',
 28 |                         help='Input Graph file'
 29 |                              'None represents no evaluation, and only run for training embedding.')
 30 |     parser.add_argument('--output', choices=[
 31 |         'DGI_RA_1_DrugBank_DDI.txt',
 32 |         'out2',
 33 |         'out'], default='Default.txt',
 34 |                         help='Yada yada'
 35 |                              'None represents no evaluation, and only run for training embedding.')
 36 | 
 37 |     parser.add_argument('--embTech', choices=[
 38 |         'DGI',
 39 |         'CN',
 40 |         'AA',
 41 |         
 42 |     ], default='CN', help='The embedding learning method')
 43 |     
 44 |     parser.add_argument('--method', choices=[
 45 |         'Laplacian',
 46 |         'SVD',
 47 |     ], default='DGI', help='The embedding learning method')    
 48 |     parser.add_argument('--task', choices=[
 49 |         'link-prediction',
 50 |         'node-classification'], default='link-prediction',
 51 |                         help='Choose to evaluate the embedding quality based on a specific prediction task. '
 52 |                              'None represents no evaluation, and only run for training embedding.')
 53 |     parser.add_argument('--testingratio', default=0.1, type=float,
 54 |                         help='Testing set ratio for prediction tasks.'
 55 |                              'In link prediction, it splits all the known edges; '
 56 |                              'in node classification, it splits all the labeled nodes.')
 57 |     parser.add_argument('--number-walks', default=32, type=int,
 58 |                         help='Number of random walks to start at each node. '
 59 |                              'Only for random walk-based methods: DeepWalk, node2vec, struc2vec')
 60 |     parser.add_argument('--walk-length', default=64, type=int,
 61 |                         help='Length of the random walk started at each node. '
 62 |                              'Only for random walk-based methods: DeepWalk, node2vec, struc2vec')
 63 |     parser.add_argument('--workers', default=8, type=int,
 64 |                         help='Number of parallel processes. '
 65 |                              'Only for random walk-based methods: DeepWalk, node2vec, struc2vec')
 66 |     parser.add_argument('--dimensions', default=100, type=int,
 67 |                         help='the dimensions of embedding for each node.')
 68 |     parser.add_argument('--window-size', default=10, type=int,
 69 |                         help='Window size of word2vec model. '
 70 |                              'Only for random walk-based methods: DeepWalk, node2vec, struc2vec')
 71 |     parser.add_argument('--epochs', default=100, type=int,
 72 |                         help='The training epochs of LINE, SDNE and GAE')
 73 |     parser.add_argument('--p', default=1.0, type=float,
 74 |                         help='p is a hyper-parameter for node2vec, '
 75 |                              'and it controls how fast the walk explores.')
 76 |     parser.add_argument('--q', default=1.0, type=float,
 77 |                         help='q is a hyper-parameter for node2vec, '
 78 |                              'and it controls how fast the walk leaves the neighborhood of starting node.')
 79 | 
 80 |     
 81 | 
 82 |     
 83 |     parser.add_argument('--label-file', default='node2vec_PPI_labels.txt',
 84 |                         help='The label file for node classification')
 85 |     parser.add_argument('--negative-ratio', default=5, type=int,
 86 |                         help='the negative ratio of LINE')
 87 |     parser.add_argument('--weighted', type=bool, default=False,
 88 |                         help='Treat graph as weighted')
 89 |     parser.add_argument('--directed', type=bool, default=False,
 90 |                         help='Treat graph as directed')
 91 |     parser.add_argument('--order', default=2, type=int,
 92 |                         help='Choose the order of LINE, 1 means first order, 2 means second order, 3 means first order + second order')
 93 |     parser.add_argument('--weight-decay', type=float, default=5e-4,
 94 |                         help='coefficient for L2 regularization for Graph Factorization.')
 95 |     parser.add_argument('--kstep', default=4, type=int,
 96 |                         help='Use k-step transition probability matrix for GraRep.')
 97 |     parser.add_argument('--lr', default=0.01, type=float,
 98 |                         help='learning rate')
 99 |     parser.add_argument('--alpha', default=0.3, type=float,
100 |                         help='alhpa is a hyperparameter in SDNE')
101 |     parser.add_argument('--beta', default=0, type=float,
102 |                         help='beta is a hyperparameter in SDNE')
103 |     parser.add_argument('--nu1', default=1e-5, type=float,
104 |                         help='nu1 is a hyperparameter in SDNE')
105 |     parser.add_argument('--nu2', default=1e-4, type=float,
106 |                         help='nu2 is a hyperparameter in SDNE')
107 |     parser.add_argument('--bs', default=200, type=int,
108 |                         help='batch size of SDNE')
109 |     parser.add_argument('--encoder-list', default='[1000, 128]', type=str,
110 |                         help='a list of numbers of the neuron at each encoder layer, the last number is the '
111 |                              'dimension of the output node representation')
112 |     parser.add_argument('--OPT1', default=True, type=bool,
113 |                         help='optimization 1 for struc2vec')
114 |     parser.add_argument('--OPT2', default=True, type=bool,
115 |                         help='optimization 2 for struc2vec')
116 |     parser.add_argument('--OPT3', default=True, type=bool,
117 |                         help='optimization 3 for struc2vec')
118 |     parser.add_argument('--until-layer', type=int, default=6,
119 |                         help='Calculation until the layer. A hyper-parameter for struc2vec.')
120 |     parser.add_argument('--dropout', default=0, type=float, help='Dropout rate (1 - keep probability).')
121 |     parser.add_argument('--hidden', default=32, type=int, help='Number of units in hidden layer.')
122 |     parser.add_argument('--gae_model_selection', default='gcn_ae', type=str,
123 |                         help='gae model selection: gcn_ae or gcn_vae')
124 |     parser.add_argument('--eval-result-file', help='save evaluation performance')
125 |     parser.add_argument('--seed',default=0, type=int,  help='seed value')
126 |     args = parser.parse_args()
127 | 
128 |     return args
129 | 
130 | 
131 | 
132 | def main(args):
133 |     print('#' * 70)
134 |     print('Embedding Method: %s, Evaluation Task: %s' % (args.method, args.task))
135 |     print('#' * 70)
136 | 
137 |     if args.task == 'link-prediction':
138 |         partitiondata = ['DDI1.edgelist']
139 |         techniques = ['DGI']
140 |         
141 |         for d in partitiondata:
142 |             print(d)
143 |             args.input = d
144 |             for x in techniques:
145 |                 print(x)
146 |                 args.method = x
147 |                 for i in range(3):
148 |                     G, G_train, testing_pos_edges, train_graph_filename = split_train_test_graph(args.input, args.seed, args.testingratio,weighted=args.weighted)
149 |             #        time2 = time.time()
150 |             #        print('Compute RWR ')
151 |             #        calc_ppr_exact(G[0], 0.1)
152 |             #        time2 = time.time()
153 |             #        print('Exact PPR took ', time2)
154 |             #        
155 |                     time1 = time.time()
156 |                     #idSave={}
157 |                     #idSave['G']=G
158 |                     #idSave['Label'] = labels
159 |                     #idSave['Attributes'] = features
160 |                 
161 |                     #sio.savemat('DrugBankAdj.mat',idSave)
162 |                     
163 |                     embedding_training(args, train_graph_filename)
164 |                     embed_train_time = time.time() - time1
165 |                     print('Embedding Learning Time: %.2f s' % embed_train_time)
166 |                     embedding_look_up = load_embedding(args.output)
167 |                     time1 = time.time()
168 |                     print('Begin evaluation...')
169 |                     result = LinkPrediction(embedding_look_up, G, G_train, testing_pos_edges,args.seed)
170 |                     eval_time = time.time() - time1
171 |                     print('Prediction Task Time: %.2f s' % eval_time)
172 |                     os.remove(train_graph_filename)
173 |     elif args.task == 'node-classification':
174 |         if not args.label_file:
175 |             raise ValueError("No input label file. Exit.")
176 |         node_list, labels = read_node_labels(args.label_file)
177 |         idSave={}
178 |         idSave['labels'] = labels
179 |         sio.savemat('LabelNode2VecPPI.mat',idSave)
180 |         train_graph_filename = args.input
181 |         time1 = time.time()
182 |         embedding_training(args, train_graph_filename)
183 |         embed_train_time = time.time() - time1
184 |         print('Embedding Learning Time: %.2f s' % embed_train_time)
185 |         embedding_look_up = load_embedding('N2V_DW_Emb.txt', node_list)
186 |         time1 = time.time()
187 |         print('Begin evaluation...')
188 |         result = NodeClassification(embedding_look_up, node_list, labels, args.testingratio, args.seed)
189 |         eval_time = time.time() - time1
190 |         print('Prediction Task Time: %.2f s' % eval_time)
191 |     else:
192 |         train_graph_filename = args.input
193 |         time1 = time.time()
194 |         embedding_training(args, train_graph_filename)
195 |         embed_train_time = time.time() - time1
196 |         print('Embedding Learning Time: %.2f s' % embed_train_time)
197 |         os.remove(train_graph_filename)
198 | 
199 |     if args.eval_result_file and result:
200 |         _results = dict(
201 |             input=args.input,
202 |             task=args.task,
203 |             method=args.method,
204 |             dimension=args.dimensions,
205 |             user=getpass.getuser(),
206 |             date=datetime.datetime.now().strftime('%Y-%m-%d-%H%M%S'),
207 |             seed=args.seed,
208 |         )
209 | 
210 |         if args.task == 'link-prediction':
211 |             auc_roc, auc_pr, accuracy, f1 = result
212 |             _results['results'] = dict(
213 |                 auc_roc=auc_roc,
214 |                 auc_pr=auc_pr,
215 |                 accuracy=accuracy,
216 |                 f1=f1,
217 |             )
218 |         else:
219 |             accuracy, f1_micro, f1_macro = result
220 |             _results['results'] = dict(
221 |                 accuracy=accuracy,
222 |                 f1_micro=f1_micro,
223 |                 f1_macro=f1_macro,
224 |             )
225 | 
226 |         with open(args.eval_result_file, 'a+') as wf:
227 |             print(json.dumps(_results, sort_keys=True), file=wf)
228 | 
229 | 
230 | def more_main():
231 |     args = parse_args()
232 |     seed = args.seed
233 |     random.seed(seed)
234 |     np.random.seed(seed)
235 |     main(parse_args())
236 | 
237 | 
238 | if __name__ == "__main__":
239 |     more_main()
240 | 


--------------------------------------------------------------------------------
/OpenNE/line.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | 
  3 | import math
  4 | import random
  5 | 
  6 | import numpy as np
  7 | import tensorflow as tf
  8 | from sklearn.linear_model import LogisticRegression
  9 | 
 10 | from OpenNE.classify import Classifier, read_node_label
 11 | 
 12 | 
 13 | class _LINE(object):
 14 | 
 15 |     def __init__(self, graph, rep_size=128, batch_size=1000, negative_ratio=5, order=3):
 16 |         self.cur_epoch = 0
 17 |         self.order = order
 18 |         self.g = graph
 19 |         self.node_size = graph.G.number_of_nodes()
 20 |         self.rep_size = rep_size
 21 |         self.batch_size = batch_size
 22 |         self.negative_ratio = negative_ratio
 23 | 
 24 |         self.gen_sampling_table()
 25 |         self.sess = tf.Session()
 26 |         cur_seed = random.getrandbits(32)
 27 |         initializer = tf.contrib.layers.xavier_initializer(
 28 |             uniform=False, seed=cur_seed)
 29 |         with tf.variable_scope("model", reuse=None, initializer=initializer):
 30 |             self.build_graph()
 31 |         self.sess.run(tf.global_variables_initializer())
 32 | 
 33 |     def build_graph(self):
 34 |         self.h = tf.placeholder(tf.int32, [None])
 35 |         self.t = tf.placeholder(tf.int32, [None])
 36 |         self.sign = tf.placeholder(tf.float32, [None])
 37 | 
 38 |         cur_seed = random.getrandbits(32)
 39 |         self.embeddings = tf.get_variable(name="embeddings" + str(self.order), shape=[
 40 |             self.node_size, self.rep_size], initializer=tf.contrib.layers.xavier_initializer(uniform=False,
 41 |                                                                                              seed=cur_seed))
 42 |         self.context_embeddings = tf.get_variable(name="context_embeddings" + str(self.order), shape=[
 43 |             self.node_size, self.rep_size], initializer=tf.contrib.layers.xavier_initializer(uniform=False,
 44 |                                                                                              seed=cur_seed))
 45 |         # self.h_e = tf.nn.l2_normalize(tf.nn.embedding_lookup(self.embeddings, self.h), 1)
 46 |         # self.t_e = tf.nn.l2_normalize(tf.nn.embedding_lookup(self.embeddings, self.t), 1)
 47 |         # self.t_e_context = tf.nn.l2_normalize(tf.nn.embedding_lookup(self.context_embeddings, self.t), 1)
 48 |         self.h_e = tf.nn.embedding_lookup(self.embeddings, self.h)
 49 |         self.t_e = tf.nn.embedding_lookup(self.embeddings, self.t)
 50 |         self.t_e_context = tf.nn.embedding_lookup(
 51 |             self.context_embeddings, self.t)
 52 |         self.second_loss = -tf.reduce_mean(tf.log_sigmoid(
 53 |             self.sign * tf.reduce_sum(tf.multiply(self.h_e, self.t_e_context), axis=1)))
 54 |         self.first_loss = -tf.reduce_mean(tf.log_sigmoid(
 55 |             self.sign * tf.reduce_sum(tf.multiply(self.h_e, self.t_e), axis=1)))
 56 |         if self.order == 1:
 57 |             self.loss = self.first_loss
 58 |         else:
 59 |             self.loss = self.second_loss
 60 |         optimizer = tf.train.AdamOptimizer(0.001)
 61 |         self.train_op = optimizer.minimize(self.loss)
 62 | 
 63 |     def train_one_epoch(self):
 64 |         sum_loss = 0.0
 65 |         batches = self.batch_iter()
 66 |         batch_id = 0
 67 |         for batch in batches:
 68 |             h, t, sign = batch
 69 |             feed_dict = {
 70 |                 self.h: h,
 71 |                 self.t: t,
 72 |                 self.sign: sign,
 73 |             }
 74 |             _, cur_loss = self.sess.run([self.train_op, self.loss], feed_dict)
 75 |             sum_loss += cur_loss
 76 |             batch_id += 1
 77 |         print('epoch:{} sum of loss:{!s}'.format(self.cur_epoch, sum_loss))
 78 |         self.cur_epoch += 1
 79 | 
 80 |     def batch_iter(self):
 81 |         look_up = self.g.look_up_dict
 82 | 
 83 |         table_size = 1e8
 84 |         numNodes = self.node_size
 85 | 
 86 |         edges = [(look_up[x[0]], look_up[x[1]]) for x in self.g.G.edges()]
 87 | 
 88 |         data_size = self.g.G.number_of_edges()
 89 |         edge_set = set([x[0] * numNodes + x[1] for x in edges])
 90 |         shuffle_indices = np.random.permutation(np.arange(data_size))
 91 | 
 92 |         # positive or negative mod
 93 |         mod = 0
 94 |         mod_size = 1 + self.negative_ratio
 95 |         h = []
 96 |         t = []
 97 |         sign = 0
 98 | 
 99 |         start_index = 0
100 |         end_index = min(start_index + self.batch_size, data_size)
101 |         while start_index < data_size:
102 |             if mod == 0:
103 |                 sign = 1.
104 |                 h = []
105 |                 t = []
106 |                 for i in range(start_index, end_index):
107 |                     if not random.random() < self.edge_prob[shuffle_indices[i]]:
108 |                         shuffle_indices[i] = self.edge_alias[shuffle_indices[i]]
109 |                     cur_h = edges[shuffle_indices[i]][0]
110 |                     cur_t = edges[shuffle_indices[i]][1]
111 |                     h.append(cur_h)
112 |                     t.append(cur_t)
113 |             else:
114 |                 sign = -1.
115 |                 t = []
116 |                 for i in range(len(h)):
117 |                     t.append(
118 |                         self.sampling_table[random.randint(0, table_size - 1)])
119 | 
120 |             yield h, t, [sign]
121 |             mod += 1
122 |             mod %= mod_size
123 |             if mod == 0:
124 |                 start_index = end_index
125 |                 end_index = min(start_index + self.batch_size, data_size)
126 | 
127 |     def gen_sampling_table(self):
128 |         table_size = 1e8
129 |         power = 0.75
130 |         numNodes = self.node_size
131 | 
132 |         print("Pre-procesing for non-uniform negative sampling!")
133 |         node_degree = np.zeros(numNodes)  # out degree
134 | 
135 |         look_up = self.g.look_up_dict
136 |         for edge in self.g.G.edges():
137 |             node_degree[look_up[edge[0]]
138 |             ] += self.g.G[edge[0]][edge[1]]["weight"]
139 | 
140 |         norm = sum([math.pow(node_degree[i], power) for i in range(numNodes)])
141 | 
142 |         self.sampling_table = np.zeros(int(table_size), dtype=np.uint32)
143 | 
144 |         p = 0
145 |         i = 0
146 |         for j in range(numNodes):
147 |             p += float(math.pow(node_degree[j], power)) / norm
148 |             while i < table_size and float(i) / table_size < p:
149 |                 self.sampling_table[i] = j
150 |                 i += 1
151 | 
152 |         data_size = self.g.G.number_of_edges()
153 |         self.edge_alias = np.zeros(data_size, dtype=np.int32)
154 |         self.edge_prob = np.zeros(data_size, dtype=np.float32)
155 |         large_block = np.zeros(data_size, dtype=np.int32)
156 |         small_block = np.zeros(data_size, dtype=np.int32)
157 | 
158 |         total_sum = sum([self.g.G[edge[0]][edge[1]]["weight"]
159 |                          for edge in self.g.G.edges()])
160 |         norm_prob = [self.g.G[edge[0]][edge[1]]["weight"] *
161 |                      data_size / total_sum for edge in self.g.G.edges()]
162 |         num_small_block = 0
163 |         num_large_block = 0
164 |         cur_small_block = 0
165 |         cur_large_block = 0
166 |         for k in range(data_size - 1, -1, -1):
167 |             if norm_prob[k] < 1:
168 |                 small_block[num_small_block] = k
169 |                 num_small_block += 1
170 |             else:
171 |                 large_block[num_large_block] = k
172 |                 num_large_block += 1
173 |         while num_small_block and num_large_block:
174 |             num_small_block -= 1
175 |             cur_small_block = small_block[num_small_block]
176 |             num_large_block -= 1
177 |             cur_large_block = large_block[num_large_block]
178 |             self.edge_prob[cur_small_block] = norm_prob[cur_small_block]
179 |             self.edge_alias[cur_small_block] = cur_large_block
180 |             norm_prob[cur_large_block] = norm_prob[cur_large_block] + \
181 |                                          norm_prob[cur_small_block] - 1
182 |             if norm_prob[cur_large_block] < 1:
183 |                 small_block[num_small_block] = cur_large_block
184 |                 num_small_block += 1
185 |             else:
186 |                 large_block[num_large_block] = cur_large_block
187 |                 num_large_block += 1
188 | 
189 |         while num_large_block:
190 |             num_large_block -= 1
191 |             self.edge_prob[large_block[num_large_block]] = 1
192 |         while num_small_block:
193 |             num_small_block -= 1
194 |             self.edge_prob[small_block[num_small_block]] = 1
195 | 
196 |     def get_embeddings(self):
197 |         vectors = {}
198 |         embeddings = self.embeddings.eval(session=self.sess)
199 |         # embeddings = self.sess.run(tf.nn.l2_normalize(self.embeddings.eval(session=self.sess), 1))
200 |         look_back = self.g.look_back_list
201 |         for i, embedding in enumerate(embeddings):
202 |             vectors[look_back[i]] = embedding
203 |         return vectors
204 | 
205 | 
206 | class LINE(object):
207 | 
208 |     def __init__(self, graph, rep_size=128, batch_size=1000, epoch=10, negative_ratio=5, order=3, label_file=None,
209 |                  clf_ratio=0.5, auto_save=True):
210 |         self.rep_size = rep_size
211 |         self.order = order
212 |         self.best_result = 0
213 |         self.vectors = {}
214 |         if order == 3:
215 |             self.model1 = _LINE(graph, rep_size / 2, batch_size,
216 |                                 negative_ratio, order=1)
217 |             self.model2 = _LINE(graph, rep_size / 2, batch_size,
218 |                                 negative_ratio, order=2)
219 |             for i in range(epoch):
220 |                 self.model1.train_one_epoch()
221 |                 self.model2.train_one_epoch()
222 |                 if label_file:
223 |                     self.get_embeddings()
224 |                     X, Y = read_node_label(label_file)
225 |                     print("Training classifier using {:.2f}% nodes...".format(
226 |                         clf_ratio * 100))
227 |                     clf = Classifier(vectors=self.vectors,
228 |                                      clf=LogisticRegression())
229 |                     result = clf.split_train_evaluate(X, Y, clf_ratio)
230 | 
231 |                     if result['macro'] > self.best_result:
232 |                         self.best_result = result['macro']
233 |                         if auto_save:
234 |                             self.best_vector = self.vectors
235 | 
236 |         else:
237 |             self.model = _LINE(graph, rep_size, batch_size,
238 |                                negative_ratio, order=self.order)
239 |             for i in range(epoch):
240 |                 self.model.train_one_epoch()
241 |                 if label_file:
242 |                     self.get_embeddings()
243 |                     X, Y = read_node_label(label_file)
244 |                     print("Training classifier using {:.2f}% nodes...".format(
245 |                         clf_ratio * 100))
246 |                     clf = Classifier(vectors=self.vectors,
247 |                                      clf=LogisticRegression())
248 |                     result = clf.split_train_evaluate(X, Y, clf_ratio)
249 | 
250 |                     if result['macro'] > self.best_result:
251 |                         self.best_result = result['macro']
252 |                         if auto_save:
253 |                             self.best_vector = self.vectors
254 | 
255 |         self.get_embeddings()
256 |         if auto_save and label_file:
257 |             self.vectors = self.best_vector
258 | 
259 |     def get_embeddings(self):
260 |         self.last_vectors = self.vectors
261 |         self.vectors = {}
262 |         if self.order == 3:
263 |             vectors1 = self.model1.get_embeddings()
264 |             vectors2 = self.model2.get_embeddings()
265 |             for node in vectors1.keys():
266 |                 self.vectors[node] = np.append(vectors1[node], vectors2[node])
267 |         else:
268 |             self.vectors = self.model.get_embeddings()
269 | 
270 |     def save_embeddings(self, filename):
271 |         fout = open(filename, 'w')
272 |         node_num = len(self.vectors.keys())
273 |         fout.write("{} {}\n".format(node_num, self.rep_size))
274 |         for node, vec in self.vectors.items():
275 |             fout.write("{} {}\n".format(node,
276 |                                         ' '.join([str(x) for x in vec])))
277 |         fout.close()
278 | 


--------------------------------------------------------------------------------
/OpenNE/sdne.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | 
  3 | import numpy as np
  4 | import tensorflow as tf
  5 | 
  6 | __author__ = "Wang Binlu"
  7 | __email__ = "wblmail@whu.edu.cn"
  8 | 
  9 | 
 10 | def fc_op(input_op, name, n_out, layer_collector, act_func=tf.nn.leaky_relu):
 11 |     n_in = input_op.get_shape()[-1].value
 12 |     with tf.name_scope(name) as scope:
 13 |         kernel = tf.Variable(tf.contrib.layers.xavier_initializer()([n_in, n_out]), dtype=tf.float32, name=scope + "w")
 14 | 
 15 |         # kernel = tf.Variable(tf.random_normal([n_in, n_out]))
 16 |         biases = tf.Variable(tf.constant(0, shape=[1, n_out], dtype=tf.float32), name=scope + 'b')
 17 | 
 18 |         fc = tf.add(tf.matmul(input_op, kernel), biases)
 19 |         activation = act_func(fc, name=scope + 'act')
 20 |         layer_collector.append([kernel, biases])
 21 |         return activation
 22 | 
 23 | 
 24 | class SDNE(object):
 25 |     def __init__(self, graph, encoder_layer_list, alpha=1e-6, beta=5., nu1=1e-5, nu2=1e-4,
 26 |                  batch_size=200, epoch=100, learning_rate=None):
 27 |         """
 28 |         encoder_layer_list: a list of numbers of the neuron at each ecdoer layer, the last number is the
 29 |         dimension of the output node representation
 30 |         Eg:
 31 |         if node size is 2000, encoder_layer_list=[1000, 128], then the whole neural network would be
 32 |         2000(input)->1000->128->1000->2000, SDNE extract the middle layer as the node representation
 33 |         """
 34 |         self.g = graph
 35 | 
 36 |         self.node_size = self.g.G.number_of_nodes()
 37 |         self.dim = encoder_layer_list[-1]
 38 | 
 39 |         self.encoder_layer_list = [self.node_size]
 40 |         self.encoder_layer_list.extend(encoder_layer_list)
 41 |         self.encoder_layer_num = len(encoder_layer_list) + 1
 42 | 
 43 |         self.alpha = alpha
 44 |         self.beta = beta
 45 |         self.nu1 = nu1
 46 |         self.nu2 = nu2
 47 |         self.bs = batch_size
 48 |         self.epoch = epoch
 49 |         self.max_iter = (epoch * self.node_size) // batch_size
 50 | 
 51 |         self.lr = learning_rate
 52 |         if self.lr is None:
 53 |             self.lr = tf.train.inverse_time_decay(0.03, self.max_iter, decay_steps=1, decay_rate=0.9999)
 54 | 
 55 |         self.sess = tf.Session()
 56 |         self.vectors = {}
 57 | 
 58 |         self.adj_mat = self.getAdj()
 59 |         self.embeddings = self.train()
 60 | 
 61 |         look_back = self.g.look_back_list
 62 | 
 63 |         for i, embedding in enumerate(self.embeddings):
 64 |             self.vectors[look_back[i]] = embedding
 65 | 
 66 |     def getAdj(self):
 67 |         node_size = self.g.node_size
 68 |         look_up = self.g.look_up_dict
 69 |         adj = np.zeros((node_size, node_size))
 70 |         for edge in self.g.G.edges():
 71 |             adj[look_up[edge[0]]][look_up[edge[1]]] = self.g.G[edge[0]][edge[1]]['weight']
 72 |         return adj
 73 | 
 74 |     def train(self):
 75 |         adj_mat = self.adj_mat
 76 | 
 77 |         AdjBatch = tf.placeholder(tf.float32, [None, self.node_size], name='adj_batch')
 78 |         Adj = tf.placeholder(tf.float32, [None, None], name='adj_mat')
 79 |         B = tf.placeholder(tf.float32, [None, self.node_size], name='b_mat')
 80 | 
 81 |         fc = AdjBatch
 82 |         scope_name = 'encoder'
 83 |         layer_collector = []
 84 | 
 85 |         with tf.name_scope(scope_name):
 86 |             for i in range(1, self.encoder_layer_num):
 87 |                 fc = fc_op(fc,
 88 |                            name=scope_name + str(i),
 89 |                            n_out=self.encoder_layer_list[i],
 90 |                            layer_collector=layer_collector)
 91 | 
 92 |         _embeddings = fc
 93 | 
 94 |         scope_name = 'decoder'
 95 |         with tf.name_scope(scope_name):
 96 |             for i in range(self.encoder_layer_num - 2, 0, -1):
 97 |                 fc = fc_op(fc,
 98 |                            name=scope_name + str(i),
 99 |                            n_out=self.encoder_layer_list[i],
100 |                            layer_collector=layer_collector)
101 |             fc = fc_op(fc,
102 |                        name=scope_name + str(0),
103 |                        n_out=self.encoder_layer_list[0],
104 |                        layer_collector=layer_collector, )
105 | 
106 |         _embeddings_norm = tf.reduce_sum(tf.square(_embeddings), 1, keepdims=True)
107 | 
108 |         L_1st = tf.reduce_sum(
109 |             Adj * (
110 |                     _embeddings_norm - 2 * tf.matmul(
111 |                 _embeddings, tf.transpose(_embeddings)
112 |             ) + tf.transpose(_embeddings_norm)
113 |             )
114 |         )
115 | 
116 |         L_2nd = tf.reduce_sum(tf.square((AdjBatch - fc) * B))
117 | 
118 |         L = L_2nd + self.alpha * L_1st
119 | 
120 |         for param in layer_collector:
121 |             L += self.nu1 * tf.reduce_sum(tf.abs(param[0])) + self.nu2 * tf.reduce_sum(tf.square(param[0]))
122 | 
123 |         optimizer = tf.train.AdamOptimizer(self.lr)
124 | 
125 |         train_op = optimizer.minimize(L)
126 | 
127 |         init = tf.global_variables_initializer()
128 |         self.sess.run(init)
129 | 
130 |         print("total iter: %i" % self.max_iter)
131 |         for step in range(self.max_iter):
132 |             index = np.random.randint(self.node_size, size=self.bs)
133 |             adj_batch_train = adj_mat[index, :]
134 |             adj_mat_train = adj_batch_train[:, index]
135 |             b_mat_train = np.ones_like(adj_batch_train)
136 |             b_mat_train[adj_batch_train != 0] = self.beta
137 | 
138 |             self.sess.run(train_op, feed_dict={AdjBatch: adj_batch_train,
139 |                                                Adj: adj_mat_train,
140 |                                                B: b_mat_train})
141 |             if step % 50 == 0:
142 |                 l, l1, l2 = self.sess.run((L, L_1st, L_2nd),
143 |                                           feed_dict={AdjBatch: adj_batch_train,
144 |                                                      Adj: adj_mat_train,
145 |                                                      B: b_mat_train})
146 |                 print("step %i: total loss: %s, l1 loss: %s, l2 loss: %s" % (step, l, l1, l2))
147 | 
148 |         return self.sess.run(_embeddings, feed_dict={AdjBatch: adj_mat})
149 | 
150 |     def save_embeddings(self, filename):
151 |         fout = open(filename, 'w')
152 |         node_num = len(self.vectors)
153 |         fout.write("{} {}\n".format(node_num, self.dim))
154 |         for node, vec in self.vectors.items():
155 |             fout.write("{} {}\n".format(node, ' '.join([str(x) for x in vec])))
156 |         fout.close()
157 | 
158 | 
159 | class SDNE2(object):
160 |     def __init__(self, graph, encoder_layer_list, alpha=1e-6, beta=5., nu1=1e-5, nu2=1e-5,
161 |                  batch_size=100, max_iter=2000, learning_rate=None):
162 | 
163 |         self.g = graph
164 | 
165 |         self.node_size = self.g.G.number_of_nodes()
166 |         self.rep_size = encoder_layer_list[-1]
167 | 
168 |         self.encoder_layer_list = [self.node_size] + encoder_layer_list
169 |         self.encoder_layer_num = len(encoder_layer_list) + 1
170 | 
171 |         self.alpha = alpha
172 |         self.beta = beta
173 |         self.nu1 = nu1
174 |         self.nu2 = nu2
175 |         self.bs = batch_size
176 |         self.max_iter = max_iter
177 |         self.lr = learning_rate
178 |         if self.lr is None:
179 |             self.lr = tf.train.inverse_time_decay(0.1, self.max_iter, decay_steps=1, decay_rate=0.9999)
180 | 
181 |         self.sess = tf.Session()
182 |         self.vectors = {}
183 | 
184 |         self.adj_mat = self.getAdj()
185 |         self.deg_vec = np.sum(self.adj_mat, axis=1)
186 |         self.embeddings = self.get_train()
187 | 
188 |         look_back = self.g.look_back_list
189 | 
190 |         for i, embedding in enumerate(self.embeddings):
191 |             self.vectors[look_back[i]] = embedding
192 | 
193 |     def getAdj(self):
194 |         node_size = self.g.node_size
195 |         look_up = self.g.look_up_dict
196 |         adj = np.zeros((node_size, node_size))
197 |         for edge in self.g.G.edges():
198 |             adj[look_up[edge[0]]][look_up[edge[1]]] = self.g.G[edge[0]][edge[1]]['weight']
199 |         return adj
200 | 
201 |     def model(self, node, layer_collector, scope_name):
202 |         fc = node
203 |         with tf.name_scope(scope_name + 'encoder'):
204 |             for i in range(1, self.encoder_layer_num):
205 |                 fc = fc_op(fc,
206 |                            name=scope_name + str(i),
207 |                            n_out=self.encoder_layer_list[i],
208 |                            layer_collector=layer_collector)
209 | 
210 |         _embeddings = fc
211 | 
212 |         with tf.name_scope(scope_name + 'decoder'):
213 |             for i in range(self.encoder_layer_num - 2, -1, -1):
214 |                 fc = fc_op(fc,
215 |                            name=scope_name + str(i),
216 |                            n_out=self.encoder_layer_list[i],
217 |                            layer_collector=layer_collector)
218 | 
219 |         return _embeddings, fc
220 | 
221 |     def generate_batch(self, shuffle=True):
222 |         adj = self.adj_mat
223 | 
224 |         row_indices, col_indices = adj.nonzero()
225 |         sample_index = np.arange(row_indices.shape[0])
226 |         num_of_batches = row_indices.shape[0] // self.bs
227 |         counter = 0
228 |         if shuffle:
229 |             np.random.shuffle(sample_index)
230 | 
231 |         while True:
232 |             batch_index = sample_index[self.bs * counter:self.bs * (counter + 1)]
233 | 
234 |             nodes_a = adj[row_indices[batch_index], :]
235 |             nodes_b = adj[col_indices[batch_index], :]
236 |             weights = adj[row_indices[batch_index], col_indices[batch_index]]
237 |             weights = np.reshape(weights, [-1, 1])
238 | 
239 |             beta_mask_a = np.ones_like(nodes_a)
240 |             beta_mask_a[nodes_a != 0] = self.beta
241 |             beta_mask_b = np.ones_like(nodes_b)
242 |             beta_mask_b[nodes_b != 0] = self.beta
243 | 
244 |             if counter == num_of_batches:
245 |                 counter = 0
246 |                 np.random.shuffle(sample_index)
247 |             else:
248 |                 counter += 1
249 | 
250 |             yield (nodes_a, nodes_b, beta_mask_a, beta_mask_b, weights)
251 | 
252 |     def get_train(self):
253 | 
254 |         NodeA = tf.placeholder(tf.float32, [None, self.node_size], name='node_a')
255 |         BmaskA = tf.placeholder(tf.float32, [None, self.node_size], name='beta_mask_a')
256 |         NodeB = tf.placeholder(tf.float32, [None, self.node_size], name='node_b')
257 |         BmaskB = tf.placeholder(tf.float32, [None, self.node_size], name='beta_mask_b')
258 |         Weights = tf.placeholder(tf.float32, [None, 1], name='adj_weights')
259 | 
260 |         layer_collector = []
261 |         nodes = tf.concat([NodeA, NodeB], axis=0)
262 |         bmasks = tf.concat([BmaskA, BmaskB], axis=0)
263 |         emb, recons = self.model(nodes, layer_collector, 'reconstructor')
264 |         embs = tf.split(emb, num_or_size_splits=2, axis=0)
265 | 
266 |         L_1st = tf.reduce_sum(Weights * (tf.reduce_sum(tf.square(embs[0] - embs[1]), axis=1)))
267 | 
268 |         L_2nd = tf.reduce_sum(tf.square((nodes - recons) * bmasks))
269 | 
270 |         L = L_2nd + self.alpha * L_1st
271 | 
272 |         for param in layer_collector:
273 |             L += self.nu1 * tf.reduce_sum(tf.abs(param[0])) + self.nu2 * tf.reduce_sum(tf.square(param[0]))
274 | 
275 |         # lr = tf.train.exponential_decay(1e-6, self.max_iter, decay_steps=1, decay_rate=0.9999)
276 |         # optimizer = tf.train.MomentumOptimizer(lr, 0.99, use_nesterov=True)
277 | 
278 |         optimizer = tf.train.AdamOptimizer(self.lr)
279 |         train_op = optimizer.minimize(L)
280 | 
281 |         init = tf.global_variables_initializer()
282 |         self.sess.run(init)
283 | 
284 |         generator = self.generate_batch()
285 | 
286 |         for step in range(self.max_iter + 1):
287 |             nodes_a, nodes_b, beta_mask_a, beta_mask_b, weights = generator.__next__()
288 | 
289 |             feed_dict = {NodeA: nodes_a,
290 |                          NodeB: nodes_b,
291 |                          BmaskA: beta_mask_a,
292 |                          BmaskB: beta_mask_b,
293 |                          Weights: weights}
294 | 
295 |             self.sess.run(train_op, feed_dict=feed_dict)
296 |             if step % 50 == 0:
297 |                 print("step %i: %s" % (step, self.sess.run([L, L_1st, L_2nd], feed_dict=feed_dict)))
298 | 
299 |         return self.sess.run(emb, feed_dict={NodeA: self.adj_mat[0:1, :], NodeB: self.adj_mat[1:, :]})
300 | 
301 |     def save_embeddings(self, filename):
302 |         fout = open(filename, 'w')
303 |         node_num = len(self.vectors)
304 |         fout.write("{} {}\n".format(node_num, self.rep_size))
305 |         for node, vec in self.vectors.items():
306 |             fout.write("{} {}\n".format(node, ' '.join([str(x) for x in vec])))
307 |         fout.close()
308 | 


--------------------------------------------------------------------------------
/DGI/utils/process.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import pickle as pkl
  3 | import networkx as nx
  4 | import scipy.sparse as sp
  5 | from scipy.sparse.linalg.eigen.arpack import eigsh
  6 | import sys
  7 | import torch
  8 | import torch.nn as nn
  9 | 
 10 | def parse_skipgram(fname):
 11 |     with open(fname) as f:
 12 |         toks = list(f.read().split())
 13 |     nb_nodes = int(toks[0])
 14 |     nb_features = int(toks[1])
 15 |     ret = np.empty((nb_nodes, nb_features))
 16 |     it = 2
 17 |     for i in range(nb_nodes):
 18 |         cur_nd = int(toks[it]) - 1
 19 |         it += 1
 20 |         for j in range(nb_features):
 21 |             cur_ft = float(toks[it])
 22 |             ret[cur_nd][j] = cur_ft
 23 |             it += 1
 24 |     return ret
 25 | 
 26 | # Process a (subset of) a TU dataset into standard form
 27 | def process_tu(data, nb_nodes):
 28 |     nb_graphs = len(data)
 29 |     ft_size = data.num_features
 30 | 
 31 |     features = np.zeros((nb_graphs, nb_nodes, ft_size))
 32 |     adjacency = np.zeros((nb_graphs, nb_nodes, nb_nodes))
 33 |     labels = np.zeros(nb_graphs)
 34 |     sizes = np.zeros(nb_graphs, dtype=np.int32)
 35 |     masks = np.zeros((nb_graphs, nb_nodes))
 36 |        
 37 |     for g in range(nb_graphs):
 38 |         sizes[g] = data[g].x.shape[0]
 39 |         features[g, :sizes[g]] = data[g].x
 40 |         labels[g] = data[g].y[0]
 41 |         masks[g, :sizes[g]] = 1.0
 42 |         e_ind = data[g].edge_index
 43 |         coo = sp.coo_matrix((np.ones(e_ind.shape[1]), (e_ind[0, :], e_ind[1, :])), shape=(nb_nodes, nb_nodes))
 44 |         adjacency[g] = coo.todense()
 45 | 
 46 |     return features, adjacency, labels, sizes, masks
 47 | 
 48 | def micro_f1(logits, labels):
 49 |     # Compute predictions
 50 |     preds = torch.round(nn.Sigmoid()(logits))
 51 |     
 52 |     # Cast to avoid trouble
 53 |     preds = preds.long()
 54 |     labels = labels.long()
 55 | 
 56 |     # Count true positives, true negatives, false positives, false negatives
 57 |     tp = torch.nonzero(preds * labels).shape[0] * 1.0
 58 |     tn = torch.nonzero((preds - 1) * (labels - 1)).shape[0] * 1.0
 59 |     fp = torch.nonzero(preds * (labels - 1)).shape[0] * 1.0
 60 |     fn = torch.nonzero((preds - 1) * labels).shape[0] * 1.0
 61 | 
 62 |     # Compute micro-f1 score
 63 |     prec = tp / (tp + fp)
 64 |     rec = tp / (tp + fn)
 65 |     f1 = (2 * prec * rec) / (prec + rec)
 66 |     return f1
 67 | 
 68 | """
 69 |  Prepare adjacency matrix by expanding up to a given neighbourhood.
 70 |  This will insert loops on every node.
 71 |  Finally, the matrix is converted to bias vectors.
 72 |  Expected shape: [graph, nodes, nodes]
 73 | """
 74 | def adj_to_bias(adj, sizes, nhood=1):
 75 |     nb_graphs = adj.shape[0]
 76 |     mt = np.empty(adj.shape)
 77 |     for g in range(nb_graphs):
 78 |         mt[g] = np.eye(adj.shape[1])
 79 |         for _ in range(nhood):
 80 |             mt[g] = np.matmul(mt[g], (adj[g] + np.eye(adj.shape[1])))
 81 |         for i in range(sizes[g]):
 82 |             for j in range(sizes[g]):
 83 |                 if mt[g][i][j] > 0.0:
 84 |                     mt[g][i][j] = 1.0
 85 |     return -1e9 * (1.0 - mt)
 86 | 
 87 | 
 88 | ###############################################
 89 | # This section of code adapted from tkipf/gcn #
 90 | ###############################################
 91 | 
 92 | def parse_index_file(filename):
 93 |     """Parse index file."""
 94 |     index = []
 95 |     for line in open(filename):
 96 |         index.append(int(line.strip()))
 97 |     return index
 98 | 
 99 | def sample_mask(idx, l):
100 |     """Create mask."""
101 |     mask = np.zeros(l)
102 |     mask[idx] = 1
103 |     return np.array(mask, dtype=np.bool)
104 | 
105 | def load_data(dataset_str): # {'pubmed', 'citeseer', 'cora'}
106 |     """Load data."""
107 |     names = ['x', 'y', 'tx', 'ty', 'allx', 'ally', 'graph']
108 |     objects = []
109 |     for i in range(len(names)):
110 |         with open("data/ind.{}.{}".format(dataset_str, names[i]), 'rb') as f:
111 |             if sys.version_info > (3, 0):
112 |                 objects.append(pkl.load(f, encoding='latin1'))
113 |             else:
114 |                 objects.append(pkl.load(f))
115 | 
116 |     x, y, tx, ty, allx, ally, graph = tuple(objects)
117 |     test_idx_reorder = parse_index_file("data/ind.{}.test.index".format(dataset_str))
118 |     test_idx_range = np.sort(test_idx_reorder)
119 | 
120 |     if dataset_str == 'citeseer':
121 |         # Fix citeseer dataset (there are some isolated nodes in the graph)
122 |         # Find isolated nodes, add them as zero-vecs into the right position
123 |         test_idx_range_full = range(min(test_idx_reorder), max(test_idx_reorder)+1)
124 |         tx_extended = sp.lil_matrix((len(test_idx_range_full), x.shape[1]))
125 |         tx_extended[test_idx_range-min(test_idx_range), :] = tx
126 |         tx = tx_extended
127 |         ty_extended = np.zeros((len(test_idx_range_full), y.shape[1]))
128 |         ty_extended[test_idx_range-min(test_idx_range), :] = ty
129 |         ty = ty_extended
130 | 
131 |     features = sp.vstack((allx, tx)).tolil()
132 |     features[test_idx_reorder, :] = features[test_idx_range, :]
133 |     adj = nx.adjacency_matrix(nx.from_dict_of_lists(graph))
134 | 
135 |     labels = np.vstack((ally, ty))
136 |     labels[test_idx_reorder, :] = labels[test_idx_range, :]
137 | 
138 |     idx_test = test_idx_range.tolist()
139 |     idx_train = range(len(y))
140 |     idx_val = range(len(y), len(y)+500)
141 | 
142 |     return adj, features, labels, idx_train, idx_val, idx_test
143 | 
144 | def sparse_to_tuple(sparse_mx, insert_batch=False):
145 |     """Convert sparse matrix to tuple representation."""
146 |     """Set insert_batch=True if you want to insert a batch dimension."""
147 |     def to_tuple(mx):
148 |         if not sp.isspmatrix_coo(mx):
149 |             mx = mx.tocoo()
150 |         if insert_batch:
151 |             coords = np.vstack((np.zeros(mx.row.shape[0]), mx.row, mx.col)).transpose()
152 |             values = mx.data
153 |             shape = (1,) + mx.shape
154 |         else:
155 |             coords = np.vstack((mx.row, mx.col)).transpose()
156 |             values = mx.data
157 |             shape = mx.shape
158 |         return coords, values, shape
159 | 
160 |     if isinstance(sparse_mx, list):
161 |         for i in range(len(sparse_mx)):
162 |             sparse_mx[i] = to_tuple(sparse_mx[i])
163 |     else:
164 |         sparse_mx = to_tuple(sparse_mx)
165 | 
166 |     return sparse_mx
167 | 
168 | def standardize_data(f, train_mask):
169 |     """Standardize feature matrix and convert to tuple representation"""
170 |     # standardize data
171 |     f = f.todense()
172 |     mu = f[train_mask == True, :].mean(axis=0)
173 |     sigma = f[train_mask == True, :].std(axis=0)
174 |     f = f[:, np.squeeze(np.array(sigma > 0))]
175 |     mu = f[train_mask == True, :].mean(axis=0)
176 |     sigma = f[train_mask == True, :].std(axis=0)
177 |     f = (f - mu) / sigma
178 |     return f
179 | 
180 | def preprocess_features(features):
181 |     """Row-normalize feature matrix and convert to tuple representation"""
182 |     rowsum = np.array(features.sum(1))
183 |     r_inv = np.power(rowsum, -1).flatten()
184 |     r_inv[np.isinf(r_inv)] = 0.
185 |     r_mat_inv = sp.diags(r_inv)
186 |     features = r_mat_inv.dot(features)
187 |     return features.todense(), sparse_to_tuple(features)
188 | 
189 | def normalize_adjRA(adj):
190 |     """Symmetrically normalize adjacency matrix."""
191 |     adj = sp.coo_matrix(adj)
192 |     rowsum = np.array(adj.sum(1))
193 |     d_inv_sqrt = np.power(rowsum, -1).flatten()
194 |     d_inv_sqrt[np.isinf(d_inv_sqrt)] = 0.
195 |     d_mat_inv_sqrt = sp.diags(d_inv_sqrt)
196 |     DA = d_mat_inv_sqrt.dot(adj);
197 |     
198 |     return adj.dot(DA).tocoo()
199 | def normalize_adj(adj):
200 |     adj = sp.coo_matrix(adj)
201 |     rowsum = np.array(adj.sum(1))
202 |     d_inv_sqrt = np.power(rowsum, -0.5).flatten()
203 |     d_inv_sqrt[np.isinf(d_inv_sqrt)] = 0.
204 |     d_mat_inv_sqrt = sp.diags(d_inv_sqrt)
205 |     return adj.dot(d_mat_inv_sqrt).transpose().dot(d_mat_inv_sqrt).tocoo()
206 | 
207 | def normalize_adjCN(adj):
208 |     adj = sp.coo_matrix(adj)
209 |     rowsum = np.array(adj.sum(1))
210 |     d_inv_sqrt = np.power(rowsum, -0.5).flatten()
211 |     d_inv_sqrt[np.isinf(d_inv_sqrt)] = 0.
212 |     d_mat_inv_sqrt = sp.diags(d_inv_sqrt)
213 |     return adj.dot(adj).tocoo()
214 | 
215 | def normalize_adjAA(adj):
216 |     adj = sp.coo_matrix(adj)
217 |     rowsum = np.array(adj.sum(1))
218 |     d_inv_sqrt = np.power(np.log(rowsum), -1).flatten()
219 |     d_inv_sqrt[np.isinf(d_inv_sqrt)] = 0.
220 |     d_mat_inv_sqrt = sp.diags(d_inv_sqrt)
221 |     DA = d_mat_inv_sqrt.dot(adj);
222 |     return adj.dot(DA).tocoo()
223 | 
224 | def normalize_adjSalton (adj):
225 |     adj = sp.coo_matrix(adj)
226 |     rowsum = np.array(adj.sum(1))
227 |     d_inv_sqrt = np.power(rowsum, -0.5).flatten()
228 |     d_inv_sqrt[np.isinf(d_inv_sqrt)] = 0.
229 |     d_mat_inv_sqrt = sp.diags(d_inv_sqrt)
230 |     
231 |     Dtemp = d_mat_inv_sqrt @ d_mat_inv_sqrt.T
232 |     
233 |     CNmat = adj @ adj
234 |     result = CNmat @ Dtemp
235 |     return result
236 | 
237 | def mymaximum (A, B):
238 |     BisBigger = A-B
239 |     BisBigger.data = np.where(BisBigger.data <= 0, 1, 0)
240 |     return A - A.multiply(BisBigger) + B.multiply(BisBigger)
241 | 
242 | def myminimum(A,B):
243 |     BisBigger = A-B
244 |     BisBigger.data = np.where(BisBigger.data >= 0, 1, 0)
245 |     return A - A.multiply(BisBigger) + B.multiply(BisBigger)
246 | 
247 | 
248 | def normalize_adjHDI(adj):
249 |     adj = sp.coo_matrix(adj)
250 | 
251 |     rowsum = np.array(adj.sum(1))
252 |     
253 |     deg_row = np.tile(rowsum, (1,adj.shape[0]))
254 |     
255 |     #deg_row = deg_row.T
256 |     deg_row = sp.coo_matrix(deg_row)
257 |     
258 |     sim = adj.dot(adj)
259 |     
260 |     #y = sim.copy().tocsr()
261 |     #y.data.fill(1)
262 |     X = sim.astype(bool).astype(int)
263 |     deg_row = deg_row.multiply(X)
264 |     
265 |     deg_row = mymaximum(deg_row, deg_row.T)
266 |     
267 |     sim = sim/deg_row
268 |     #sim = sp.coo_matrix(sim)
269 |     whereAreNan = np.isnan(sim)
270 |     whereAreInf = np.isinf(sim)
271 |     sim[whereAreNan] = 0
272 |     sim[whereAreInf] = 0
273 |     
274 |     sim = sp.coo_matrix(sim)
275 |     #print(sim[0])
276 |     return sim
277 | 
278 | def normalize_adjHPI(adj):
279 |     adj = sp.coo_matrix(adj)
280 | 
281 |     rowsum = np.array(adj.sum(1))
282 |     
283 |     deg_row = np.tile(rowsum, (1,adj.shape[0]))
284 |     
285 |     #deg_row = deg_row.T
286 |     deg_row = sp.coo_matrix(deg_row)
287 |     
288 |     sim = adj.dot(adj)
289 |     
290 |     #y = sim.copy().tocsr()
291 |     #y.data.fill(1)
292 |     X = sim.astype(bool).astype(int)
293 |     deg_row = deg_row.multiply(X)
294 |     
295 |     deg_row = myminimum(deg_row, deg_row.T)
296 |     
297 |     sim = sim/deg_row
298 |     #sim = sp.coo_matrix(sim)
299 |     whereAreNan = np.isnan(sim)
300 |     whereAreInf = np.isinf(sim)
301 |     sim[whereAreNan] = 0
302 |     sim[whereAreInf] = 0
303 |     
304 |     sim = sp.coo_matrix(sim)
305 |     #print(sim[0])
306 |     return sim
307 | 
308 | def normalize_adjJaccard(adj):
309 |     adj = sp.coo_matrix(adj)
310 |     rowsum = np.array(adj.sum(1))
311 |     deg_row = np.tile(rowsum, (1,adj.shape[0]))
312 |     deg_row = sp.coo_matrix(deg_row)
313 |     
314 |     sim = adj.dot(adj)
315 |     X = sim.astype(bool).astype(int)
316 |     deg_row = deg_row.multiply(X)
317 |     deg_row = sp.triu(deg_row, k=0) + sp.triu(deg_row.T,k=0)
318 | 
319 |     sim = sim/(deg_row.multiply(X)-sim)
320 |     whereAreNan = np.isnan(sim)
321 |     whereAreInf = np.isinf(sim)
322 |     sim[whereAreNan] = 0
323 |     sim[whereAreInf] = 0
324 |     
325 |     sim = sp.coo_matrix(sim)
326 |     return sim
327 | 
328 | def calc_A_hat(adj_matrix: sp.spmatrix) -> sp.spmatrix:
329 |     nnodes = adj_matrix.shape[0]
330 |     A = adj_matrix + sp.eye(nnodes)
331 |     D_vec = np.sum(A, axis=1).A1
332 |     D_vec_invsqrt_corr = 1 / np.sqrt(D_vec)
333 |     D_invsqrt_corr = sp.diags(D_vec_invsqrt_corr)
334 |     return D_invsqrt_corr @ A @ D_invsqrt_corr
335 | def calc_ppr_exact(adj_matrix: sp.spmatrix, alpha: float) -> np.ndarray:
336 |     nnodes = adj_matrix.shape[0]
337 |     M = calc_A_hat(adj_matrix)
338 |     A_inner = sp.eye(nnodes) - (1 - alpha) * M
339 |     return alpha * np.linalg.inv(A_inner.toarray())
340 | 
341 | def normalize_adjSorenson(adj):
342 |     """Symmetrically normalize adjacency matrix."""
343 |     adj = sp.coo_matrix(adj)
344 |     rowsum = np.array(adj.sum(1))
345 |     d_inv_sqrt = np.power(rowsum, -1).flatten()
346 |     d_inv_sqrt[np.isinf(d_inv_sqrt)] = 0.
347 |     d_mat_inv_sqrt = sp.diags(d_inv_sqrt)
348 |     sim = adj @ adj
349 |     sim = sp.triu(sim, k=1)
350 |     Dtemp = d_mat_inv_sqrt + d_mat_inv_sqrt.T
351 |     
352 |     Dtemp = sp.triu(Dtemp)
353 |     
354 |     return 2*sim.dot(Dtemp)
355 | 
356 | def linCCALap( H1, H2, outdim_size,adj,gamma):
357 |         """
358 |         An implementation of linear CCA
359 |         # Arguments:
360 |             H1 and H2: the matrices containing the data for view 1 and view 2. Each row is a sample.
361 |             outdim_size: specifies the number of new features
362 |         # Returns
363 |             A and B: the linear transformation matrices
364 |             mean1 and mean2: the means of data for both views
365 |         """
366 |         L = normalize_adj(adj)
367 |         r1 = 1e-4
368 |         r2 = 1e-4
369 | 
370 |         m = H1.shape[0]
371 |         o1 = H1.shape[1]
372 |         o2 = H2.shape[1]
373 |         
374 |         m1 = np.mean(H1, axis=0)
375 |         m2 = np.mean(H2, axis=0)
376 |         H1bar = H1 - np.tile(m1, (m, 1))
377 |         H2bar = H2 - np.tile(m2, (m, 1))
378 | 
379 |         SigmaHat12 = (1.0 / (m - 1)) * np.dot(H1bar.T, H2bar)
380 |         SigmaHat11 = (1.0 / (m - 1)) * np.dot(H1bar.T,
381 |                                                  H1bar) + r1 * np.identity(o1)
382 |         SigmaHat22 = (1.0 / (m - 1)) * np.dot(H2bar.T,
383 |                                                  H2bar) + r2 * np.identity(o2)
384 | 
385 |         [D1, V1] = np.linalg.eigh(SigmaHat11)
386 |         [D2, V2] = np.linalg.eigh(SigmaHat22)
387 |         SigmaHat11RootInv = np.dot(
388 |             np.dot(V1, np.diag(D1 ** -0.5)), V1.T)
389 |         SigmaHat22RootInv = np.dot(
390 |             np.dot(V2, np.diag(D2 ** -0.5)), V2.T)
391 |         
392 |         T1 = np.dot(np.dot(SigmaHat11RootInv,
393 |                                    SigmaHat12), SigmaHat22RootInv)
394 |         regulTerm = np.dot(np.dot(H1bar.T,
395 |                                    L), H2bar)
396 |         regulTerm = gamma*regulTerm
397 |         T2 = np.dot(np.dot(SigmaHat11RootInv,
398 |                                    regulTerm), SigmaHat22RootInv)
399 | 
400 |         # Tval = np.dot(np.dot(SigmaHat11RootInv,
401 |         #                            SigmaHat12), SigmaHat22RootInv)
402 |         Tval = T1-T2
403 | 
404 |         [U, D, V] = np.linalg.svd(Tval)
405 |         V = V.T
406 |         w1 = np.dot(SigmaHat11RootInv, U[:, 0:outdim_size])
407 |         w2 = np.dot(SigmaHat22RootInv, V[:, 0:outdim_size])
408 |         D = D[0:outdim_size]
409 |         return w1,w2,m1,m2,D
410 | 
411 | 
412 | def preprocess_adj(adj):
413 |     """Preprocessing of adjacency matrix for simple GCN model and conversion to tuple representation."""
414 |     adj_normalized = normalize_adj(adj + sp.eye(adj.shape[0]))
415 |     return sparse_to_tuple(adj_normalized)
416 | 
417 | def sparse_mx_to_torch_sparse_tensor(sparse_mx):
418 |     """Convert a scipy sparse matrix to a torch sparse tensor."""
419 |     sparse_mx = sparse_mx.tocoo().astype(np.float32)
420 |     indices = torch.from_numpy(
421 |         np.vstack((sparse_mx.row, sparse_mx.col)).astype(np.int64))
422 |     values = torch.from_numpy(sparse_mx.data)
423 |     shape = torch.Size(sparse_mx.shape)
424 |     return torch.sparse.FloatTensor(indices, values, shape)
425 | 


--------------------------------------------------------------------------------
/embed_train.py:
--------------------------------------------------------------------------------
  1 | # -*- coding: utf-8 -*-
  2 | 
  3 | import ast
  4 | import logging
  5 | import os
  6 | 
  7 | #from gensim.models import Word2Vec
  8 | #from gensim.models.word2vec import LineSentence
  9 | from sklearn.preprocessing import scale
 10 | from GAE.train_model import gae_model
 11 | from OpenNE import gf, grarep, hope, lap, line, node2vec, sdne,RWR
 12 | #from SVD.model import SVD_embedding
 13 | #from struc2vec import struc2vec
 14 | from utils import *
 15 | from scipy.linalg import fractional_matrix_power, inv
 16 | import numpy as np
 17 | import scipy.sparse as sp
 18 | #import hdf5storage as hd
 19 | import torch
 20 | import torch.nn as nn
 21 | import networkx as nx
 22 | import pandas as pd
 23 | from DGI.models import DGI, LogReg
 24 | from DGI.utils import process
 25 | from scipy.io import loadmat
 26 | 
 27 | 
 28 | #from utils import sparse_mx_to_torch_sparse_tensor
 29 | #from dataset import load
 30 | 
 31 | 
 32 | # Borrowed from https://github.com/PetarV-/DGI
 33 | class GCN(nn.Module):
 34 |     def __init__(self, in_ft, out_ft, bias=True):
 35 |         super(GCN, self).__init__()
 36 |         self.fc = nn.Linear(in_ft, out_ft, bias=False)
 37 |         self.act = nn.PReLU()
 38 | 
 39 |         if bias:
 40 |             self.bias = nn.Parameter(torch.FloatTensor(out_ft))
 41 |             self.bias.data.fill_(0.0)
 42 |         else:
 43 |             self.register_parameter('bias', None)
 44 | 
 45 |         for m in self.modules():
 46 |             self.weights_init(m)
 47 | 
 48 |     def weights_init(self, m):
 49 |         if isinstance(m, nn.Linear):
 50 |             torch.nn.init.xavier_uniform_(m.weight.data)
 51 |             if m.bias is not None:
 52 |                 m.bias.data.fill_(0.0)
 53 | 
 54 |     # Shape of seq: (batch, nodes, features)
 55 |     def forward(self, seq, adj, sparse=True):
 56 |         seq_fts = self.fc(seq)
 57 |         if sparse:
 58 |             out = torch.unsqueeze(torch.spmm(adj, torch.squeeze(seq_fts, 0)), 0)
 59 |         else:
 60 |             out = torch.bmm(adj, seq_fts)
 61 |         if self.bias is not None:
 62 |             out += self.bias
 63 |         return self.act(out)
 64 | 
 65 | 
 66 | # Borrowed from https://github.com/PetarV-/DGI
 67 | class Readout(nn.Module):
 68 |     def __init__(self):
 69 |         super(Readout, self).__init__()
 70 | 
 71 |     def forward(self, seq, msk):
 72 |         if msk is None:
 73 |             return torch.mean(seq, 1)
 74 |         else:
 75 |             msk = torch.unsqueeze(msk, -1)
 76 |             return torch.mean(seq * msk, 1) / torch.sum(msk)
 77 | 
 78 | 
 79 | # Borrowed from https://github.com/PetarV-/DGI
 80 | class Discriminator(nn.Module):
 81 |     def __init__(self, n_h):
 82 |         super(Discriminator, self).__init__()
 83 |         self.f_k = nn.Bilinear(n_h, n_h, 1)
 84 | 
 85 |         for m in self.modules():
 86 |             self.weights_init(m)
 87 | 
 88 |     def weights_init(self, m):
 89 |         if isinstance(m, nn.Bilinear):
 90 |             torch.nn.init.xavier_uniform_(m.weight.data)
 91 |             if m.bias is not None:
 92 |                 m.bias.data.fill_(0.0)
 93 | 
 94 |     def forward(self, c1, c2, h1, h2, h3, h4, s_bias1=None, s_bias2=None):
 95 |         c_x1 = torch.unsqueeze(c1, 1)
 96 |         c_x1 = c_x1.expand_as(h1).contiguous()
 97 |         c_x2 = torch.unsqueeze(c2, 1)
 98 |         c_x2 = c_x2.expand_as(h2).contiguous()
 99 | 
100 |         # positive
101 |         sc_1 = torch.squeeze(self.f_k(h2, c_x1), 2)
102 |         sc_2 = torch.squeeze(self.f_k(h1, c_x2), 2)
103 | 
104 |         # negetive
105 |         sc_3 = torch.squeeze(self.f_k(h4, c_x1), 2)
106 |         sc_4 = torch.squeeze(self.f_k(h3, c_x2), 2)
107 | 
108 |         logits = torch.cat((sc_1, sc_2, sc_3, sc_4), 1)
109 |         return logits
110 | 
111 | 
112 | class Model(nn.Module):
113 |     def __init__(self, n_in, n_h):
114 |         super(Model, self).__init__()
115 |         self.gcn1 = GCN(n_in, n_h)
116 |         self.gcn2 = GCN(n_in, n_h)
117 |         self.read = Readout()
118 | 
119 |         self.sigm = nn.Sigmoid()
120 | 
121 |         self.disc = Discriminator(n_h)
122 | 
123 |     def forward(self, seq1, seq2, adj, diff, sparse, msk, samp_bias1, samp_bias2):
124 |         h_1 = self.gcn1(seq1, adj, sparse)
125 |         c_1 = self.read(h_1, msk)
126 |         c_1 = self.sigm(c_1)
127 | 
128 |         h_2 = self.gcn2(seq1, diff, sparse)
129 |         c_2 = self.read(h_2, msk)
130 |         c_2 = self.sigm(c_2)
131 | 
132 |         h_3 = self.gcn1(seq2, adj, sparse)
133 |         h_4 = self.gcn2(seq2, diff, sparse)
134 | 
135 |         ret = self.disc(c_1, c_2, h_1, h_2, h_3, h_4, samp_bias1, samp_bias2)
136 | 
137 |         return ret, h_1, h_2
138 | 
139 |     def embed(self, seq, adj, diff, sparse, msk):
140 |         h_1 = self.gcn1(seq, adj, sparse)
141 |         c = self.read(h_1, msk)
142 | 
143 |         h_2 = self.gcn2(seq, diff, sparse)
144 |         return (h_1 + h_2).detach(), c.detach()
145 | 
146 | 
147 | class LogReg(nn.Module):
148 |     def __init__(self, ft_in, nb_classes):
149 |         super(LogReg, self).__init__()
150 |         self.fc = nn.Linear(ft_in, nb_classes)
151 |         self.sigm = nn.Sigmoid()
152 | 
153 |         for m in self.modules():
154 |             self.weights_init(m)
155 | 
156 |     def weights_init(self, m):
157 |         if isinstance(m, nn.Linear):
158 |             torch.nn.init.xavier_uniform_(m.weight.data)
159 |             if m.bias is not None:
160 |                 m.bias.data.fill_(0.0)
161 | 
162 |     def forward(self, seq):
163 |         ret = torch.log_softmax(self.fc(seq), dim=-1)
164 |         return ret
165 | 
166 | 
167 | def compute_pprAdj(adj, alpha=0.2, self_loop=True):
168 |     if self_loop:
169 |         adj = adj + sp.eye(adj.shape[0])
170 |     adj = sp.coo_matrix(adj)
171 |     rowsum = np.array(adj.sum(1))
172 |     d_inv_sqrt = np.power(rowsum, -0.5).flatten()
173 |     d_inv_sqrt[np.isinf(d_inv_sqrt)] = 0.
174 |     d_mat_inv_sqrt = sp.diags(d_inv_sqrt)
175 |     at = d_mat_inv_sqrt @ adj @  d_mat_inv_sqrt
176 |     return alpha * inv((np.eye(adj.shape[0]) - (1 - alpha) * at))   # a(I_n-(1-a)A~)^-1
177 | 
178 | def _scaleSimMat(A):
179 |     """Scale rows of similarity matrix"""
180 |     A = A - np.diag(np.diag(A))
181 |     A = A + np.diag(A.sum(axis=0) == 0)
182 |     col = A.sum(axis=0)
183 |     A = A.astype(np.float)/col[:, None]
184 | 
185 |     return A
186 | 
187 | def PPMI_matrix(M):
188 |     """ Compute Positive Pointwise Mutual Information Matrix"""
189 |     M = _scaleSimMat(M)
190 |     n = M.shape[0]
191 |     col = np.asarray(M.sum(axis=0), dtype=float)
192 |     col = col.reshape((1, n))
193 |     row = np.asarray(M.sum(axis=1), dtype=float)
194 |     row = row.reshape((n, 1))
195 |     D = np.sum(col)
196 | 
197 |     np.seterr(all='ignore')
198 |     PPMI = np.log(np.divide(D*M, np.dot(row, col)))
199 |     PPMI[np.isnan(PPMI)] = 0
200 |     PPMI[PPMI < 0] = 0
201 | 
202 | def embedding_training(args, train_graph_filename):
203 |     if args.method == 'struc2vec':
204 |         g = read_for_struc2vec(train_graph_filename)
205 |     elif args.method == 'GAE':
206 |         if args.input == 'YeastAdj.mat':
207 |             g = load_mat_data()
208 |         else:
209 |             g = read_for_gae(train_graph_filename)
210 |     elif args.method == 'DGI':
211 |         if args.input == 'YeastAdj.mat':
212 |             g = load_mat_data()
213 |         else:
214 |             g = read_for_gae(train_graph_filename)
215 |     elif args.method == 'SDGI':
216 |         if args.input == 'YeastAdj.mat':
217 |             g = load_mat_data()
218 |         else:    
219 |             g = read_for_gae(train_graph_filename)
220 |     elif args.method == 'SVD':
221 |         g = read_for_SVD(train_graph_filename, weighted=args.weighted)
222 |     else:
223 |         if args.input == 'YeastAdj.mat':
224 |             g = read_for_OpenNE_from_mat(args.input)
225 |         else:
226 |             g = read_for_OpenNE(train_graph_filename, weighted=args.weighted)
227 | 
228 |     _embedding_training(args, G_=g)
229 | 
230 |     return
231 | 
232 | 
233 | def load_mat_data():
234 |     ne = loadmat('YeastAdj.mat')
235 |     ne = ne['adj']
236 |     G=nx.from_numpy_matrix(ne)
237 |     node_list=list(G.nodes)
238 |     adj = nx.adjacency_matrix(G, nodelist=node_list)
239 |     print("Graph Loaded...")
240 |     return (adj,node_list)
241 |     
242 | 
243 | def _embedding_training(args, G_=None):
244 |     seed=args.seed
245 | 
246 |     if args.method == 'struc2vec':
247 |         logging.basicConfig(filename='./src/bionev/struc2vec/struc2vec.log', filemode='w', level=logging.DEBUG,
248 |                             format='%(asctime)s %(message)s')
249 |         if (args.OPT3):
250 |             until_layer = args.until_layer
251 |         else:
252 |             until_layer = None
253 | 
254 |         G = struc2vec.Graph(G_, args.workers, untilLayer=until_layer)
255 | 
256 |         if (args.OPT1):
257 |             G.preprocess_neighbors_with_bfs_compact()
258 |         else:
259 |             G.preprocess_neighbors_with_bfs()
260 | 
261 |         if (args.OPT2):
262 |             G.create_vectors()
263 |             G.calc_distances(compactDegree=args.OPT1)
264 |         else:
265 |             G.calc_distances_all_vertices(compactDegree=args.OPT1)
266 | 
267 |         print('create distances network..')
268 |         G.create_distances_network()
269 |         print('begin random walk...')
270 |         G.preprocess_parameters_random_walk()
271 | 
272 |         G.simulate_walks(args.number_walks, args.walk_length)
273 |         print('walk finished..\nLearning embeddings...')
274 |         walks = LineSentence('random_walks.txt')
275 |         model = Word2Vec(walks, size=args.dimensions, window=args.window_size, min_count=0, hs=1, sg=1,
276 |                          workers=args.workers, seed=seed)
277 |         os.remove("random_walks.txt")
278 |         model.wv.save_word2vec_format(args.output)
279 |     elif args.method == 'GAE':
280 |         if args.input == 'STRING-EXP.mat':
281 |             model = gae_model(args)
282 |             G, node_list = load_mat_data()
283 |             model.train(G)
284 |             # save embeddings
285 |             model.save_embeddings(args.output, node_list)
286 |         else:
287 |             
288 |             model = gae_model(args)
289 |             G = G_[0]
290 |             node_list = G_[1]
291 |             model.train(G)
292 |             # save embeddings
293 |             model.save_embeddings(args.output, node_list)
294 |     elif args.method == 'SDGI':
295 |         nb_epochs = 200
296 |         patience = 20
297 |         lr = 0.001
298 |         l2_coef = 0.0
299 |         hid_units = 100
300 |         sparse = False
301 |         verbose=True
302 |         alpha = 0.2
303 | 
304 |         adj = G_[0]
305 |         #diff = alpha * inv((np.eye(adj.shape[0]) - (1 - alpha) * (adj + sp.eye(adj.shape[0]))))
306 |         #diff = process.normalize_adj(adj + sp.eye(adj.shape[0]))
307 |         #diff = diff.todense()
308 |         
309 |         
310 |         #diff = compute_pprAdj(adj,alpha)
311 |         node_list = G_[1]
312 |         # datafile = 'expression_data.tsv'
313 |         # normalize = True
314 |         # df = pd.read_csv(datafile, sep='\t', header=0)
315 |         # df.columns = [int(x[1:]) - 1 for x in df.columns]
316 |         # if normalize==True:
317 |         #     df = df[node_list]
318 |         #     df = pd.DataFrame(scale(df, axis=0))
319 |         # t_data = df.T
320 |         # features = t_data.to_numpy()
321 |         
322 |         features = sp.identity(adj.shape[0])
323 |         features, _ = process.preprocess_features(features)
324 |         
325 |         
326 |         if args.embTech == 'DGI':
327 |             adj = process.normalize_adj(adj + sp.eye(adj.shape[0]))
328 |         elif args.embTech == 'CN':
329 |             adj = process.normalize_adjCN(adj + sp.eye(adj.shape[0]))
330 |         elif args.embTech == 'AA':
331 |             adj = process.normalize_adjAA(adj + sp.eye(adj.shape[0]))
332 |         elif args.embTech == 'Jaccard':
333 |             adj = process.normalize_adjJaccard(adj + sp.eye(adj.shape[0]))
334 |         elif args.embTech == 'RA':
335 |             adj = process.normalize_adjRA(adj + sp.eye(adj.shape[0]))
336 |         elif args.embTech == 'Adj-HDI':
337 |             diff = process.normalize_adjHDI(adj + sp.eye(adj.shape[0]))
338 |             diff = diff.todense()
339 |             adj = process.normalize_adj(adj + sp.eye(adj.shape[0]))
340 |             adj = adj.todense()
341 |         elif args.embTech =='Adj-Adj':
342 |             diff = process.normalize_adj(adj + sp.eye(adj.shape[0]))
343 |             diff = diff.todense()
344 |             adj = process.normalize_adj(adj + sp.eye(adj.shape[0]))
345 |             adj = adj.todense()  
346 |         elif args.embTech == 'Salton-Salton':
347 |             diff = process.normalize_adjSalton(adj + sp.eye(adj.shape[0]))
348 |             diff = diff.todense()
349 |             adj = process.normalize_adjSalton(adj + sp.eye(adj.shape[0]))
350 |             adj = adj.todense()      
351 |         elif args.embTech == 'HDI-RA':
352 |             diff = process.normalize_adjHDI(adj + sp.eye(adj.shape[0]))
353 |             diff = diff.todense()
354 |             adj = process.normalize_adjRA(adj + sp.eye(adj.shape[0]))
355 |             adj = adj.todense()
356 |         elif args.embTech == 'HDI-Rwr':
357 |             diff = compute_pprAdj(adj,alpha)
358 |             #diff = process.normalize_adjHDI(adj + sp.eye(adj.shape[0]))
359 |             #diff = diff.todense()
360 |             adj = process.normalize_adjHDI(adj + sp.eye(adj.shape[0]))
361 |             adj = adj.todense()
362 |         elif args.embTech == 'HPI':
363 |             adj = process.normalize_adjHPI(adj + sp.eye(adj.shape[0]))
364 |         elif args.embTech == 'Sorenson':
365 |             adj = process.normalize_adjSorenson(adj + sp.eye(adj.shape[0]))
366 |         elif args.embTech == 'Salton':
367 |             adj = process.normalize_adjSalton(adj + sp.eye(adj.shape[0]))
368 |         elif args.embTech == 'Adj-Rwr':
369 |             diff = compute_pprAdj(adj,alpha)
370 |             #diff = process.normalize_adjHDI(adj + sp.eye(adj.shape[0]))
371 |             #diff = diff.todense()
372 |             adj = process.normalize_adj(adj + sp.eye(adj.shape[0]))
373 |             adj = adj.todense()
374 |         elif args.embTech == 'Adj-Salton':
375 |             #diff = compute_pprAdj(adj,alpha)
376 |             diff = process.normalize_adjSalton(adj + sp.eye(adj.shape[0]))
377 |             diff = diff.todense()
378 |             adj = process.normalize_adj(adj + sp.eye(adj.shape[0]))
379 |             adj = adj.todense()
380 |         elif args.embTech == 'Adj-RA':
381 |             #diff = compute_pprAdj(adj,alpha)
382 |             diff = process.normalize_adjRA(adj + sp.eye(adj.shape[0]))
383 |             diff = diff.todense()
384 |             adj = process.normalize_adj(adj + sp.eye(adj.shape[0]))
385 |             adj = adj.todense()
386 |         elif args.embTech == 'Salton-Rwr':
387 |             diff = compute_pprAdj(adj,alpha)
388 |             
389 |             adj = process.normalize_adjSalton(adj + sp.eye(adj.shape[0]))
390 |             adj = adj.todense()
391 |         elif args.embTech == 'Adj-HPI':
392 |             diff = process.normalize_adjHPI(adj + sp.eye(adj.shape[0]))
393 |             diff = diff.todense()
394 |             
395 |             adj = process.normalize_adjSalton(adj + sp.eye(adj.shape[0]))
396 |             adj = adj.todense()
397 |         else:
398 |             print("No such embedding technique \n We are calling default DGI", args.embTech)
399 |             adj = process.normalize_adj(adj + sp.eye(adj.shape[0]))
400 |         #adj = adj.todense()
401 |         ft_size = features.shape[1]
402 |         print("Size of features", ft_size)
403 |         #features.tocsr()
404 |         #nb_classes = np.unique(labels).shape[0]
405 |         #sparse = True
406 |         sample_size = 2000
407 |         batch_size = 4
408 |     
409 |     
410 |         lbl_1 = torch.ones(batch_size, sample_size * 2)
411 |         lbl_2 = torch.zeros(batch_size, sample_size * 2)
412 |         lbl = torch.cat((lbl_1, lbl_2), 1)
413 |     
414 |         model = Model(ft_size, hid_units)
415 |         optimiser = torch.optim.Adam(model.parameters(), lr=lr, weight_decay=l2_coef)
416 |     
417 |     
418 |         b_xent = nn.BCEWithLogitsLoss()
419 |         xent = nn.CrossEntropyLoss()
420 |         cnt_wait = 0
421 |         best = 1e9
422 |         best_t = 0
423 |     
424 |         for epoch in range(nb_epochs):
425 |     
426 |             idx = np.random.randint(0, adj.shape[-1] - sample_size + 1, batch_size)
427 |             ba, bd, bf = [], [], []
428 |             for i in idx:
429 |                 ba.append(adj[i: i + sample_size, i: i + sample_size])
430 |                 bd.append(diff[i: i + sample_size, i: i + sample_size])
431 |                 bf.append(features[i: i + sample_size])
432 |     
433 |             ba = np.array(ba).reshape(batch_size, sample_size, sample_size)
434 |             bd = np.array(bd).reshape(batch_size, sample_size, sample_size)
435 |             bf = np.array(bf).reshape(batch_size, sample_size, ft_size)
436 |     
437 |             if sparse:
438 |                 ba = sparse_mx_to_torch_sparse_tensor(sp.coo_matrix(ba))
439 |                 bd = sparse_mx_to_torch_sparse_tensor(sp.coo_matrix(bd))
440 |             else:
441 |                 ba = torch.FloatTensor(ba)
442 |                 bd = torch.FloatTensor(bd)
443 |     
444 |             bf = torch.FloatTensor(bf)
445 |             idx = np.random.permutation(sample_size)
446 |             shuf_fts = bf[:, idx, :]
447 |     
448 |             if torch.cuda.is_available():
449 |                 bf = bf.cuda()
450 |                 ba = ba.cuda()
451 |                 bd = bd.cuda()
452 |                 shuf_fts = shuf_fts.cuda()
453 |     
454 |             model.train()
455 |             optimiser.zero_grad()
456 |     
457 |             logits, __, __ = model(bf, shuf_fts, ba, bd, sparse, None, None, None)
458 |     
459 |             loss = b_xent(logits, lbl)
460 |     
461 |             loss.backward()
462 |             optimiser.step()
463 |     
464 |             if verbose:
465 |                 print('Epoch: {0}, Loss: {1:0.4f}'.format(epoch, loss.item()))
466 |     
467 |             if loss < best:
468 |                 best = loss
469 |                 best_t = epoch
470 |                 cnt_wait = 0
471 |                 torch.save(model.state_dict(), 'model.pkl')
472 |             else:
473 |                 cnt_wait += 1
474 |     
475 |             if cnt_wait == patience:
476 |                 if verbose:
477 |                     print('Early stopping!')
478 |                 break
479 |     
480 |         if verbose:
481 |             print('Loading {}th epoch'.format(best_t))
482 |         model.load_state_dict(torch.load('model.pkl'))
483 |     
484 |         if sparse:
485 |             adj = sparse_mx_to_torch_sparse_tensor(sp.coo_matrix(adj))
486 |             diff = sparse_mx_to_torch_sparse_tensor(sp.coo_matrix(diff))
487 |     
488 |         features = torch.FloatTensor(features[np.newaxis])
489 |         adj = torch.FloatTensor(adj[np.newaxis])
490 |         diff = torch.FloatTensor(diff[np.newaxis])
491 |         #features = features.cuda()
492 |         #adj = adj.cuda()
493 |         #diff = diff.cuda()
494 |     
495 |         embeds, _ = model.embed(features, adj, diff, sparse, None)
496 |         output = args.output
497 |         TenToNum = embeds.numpy()
498 |         newembeds = TenToNum[0]
499 |             
500 |         fout = open(output, 'w')
501 |         fout.write("{} {}\n".format(newembeds.shape[0], newembeds.shape[1]))
502 |         for idx in range(newembeds.shape[0]):
503 |             fout.write("{} {}\n".format(node_list[idx], ' '.join([str(x) for x in newembeds[idx, :]])))
504 |         fout.close()
505 |         
506 |     elif args.method == 'DGI':
507 |                 # training params for DGI
508 |         batch_size = 1
509 |         nb_epochs = args.epochs
510 |         patience = 20
511 |         lr = 0.001
512 |         l2_coef = 0.0
513 |         drop_prob = 0.0
514 |         hid_units = 100
515 |         sparse = True #Small datasets make it True
516 |         nonlinearity = 'prelu' # special name to separate parameters
517 |         adj = G_[0]
518 |         node_list = G_[1]
519 |         features = sp.identity(adj.shape[0])
520 |         # datafile = 'expression_data.tsv'
521 |         # normalize = True
522 |         # df = pd.read_csv(datafile, sep='\t', header=0)
523 |         # df.columns = [int(x[1:]) - 1 for x in df.columns]
524 |         # if normalize==True:
525 |         #     df = pd.DataFrame(scale(df, axis=0))
526 |         # t_data = df.T
527 |         # features = t_data.to_numpy()
528 |         # features = features[[node_list],:]
529 |         # #features = features.T
530 |         # #features = sp.diags(pr)
531 |         
532 |         """ RWR features 3 steps """
533 |         #features = myrwr(adj, 0.15,3)
534 |         
535 |         features, _ = process.preprocess_features(features)
536 |         
537 |         nb_nodes = features.shape[0]
538 |         ft_size = features.shape[1]
539 |         
540 |         #matlabData = hd.loadmat('CTD_DDA_HDI.mat')
541 |         #adj = matlabData['adj']
542 |         
543 |         
544 |         """ For large file use implementation in Python"""
545 |         #adj = process.normalize_adj(adj + sp.eye(adj.shape[0]))
546 |         #adj = process.calc_ppr_exact(adj, 0.15)
547 |         #adj = myrwr(adj + sp.eye(adj.shape[0]), 0.15, 10)
548 |         if args.embTech == 'DGI':
549 |             adj = process.normalize_adj(adj + sp.eye(adj.shape[0]))
550 |         elif args.embTech == 'CN':
551 |             adj = process.normalize_adjCN(adj + sp.eye(adj.shape[0]))
552 |         elif args.embTech == 'AA':
553 |             adj = process.normalize_adjAA(adj + sp.eye(adj.shape[0]))
554 |         elif args.embTech == 'Jaccard':
555 |             adj = process.normalize_adjJaccard(adj + sp.eye(adj.shape[0]))
556 |         elif args.embTech == 'RA':
557 |             adj = process.normalize_adjRA(adj + sp.eye(adj.shape[0]))
558 |         elif args.embTech == 'HDI':
559 |             adj = process.normalize_adjHDI(adj + sp.eye(adj.shape[0]))
560 |         elif args.embTech == 'HPI':
561 |             adj = process.normalize_adjHPI(adj + sp.eye(adj.shape[0]))
562 |         elif args.embTech == 'Sorenson':
563 |             adj = process.normalize_adjSorenson(adj + sp.eye(adj.shape[0]))
564 |         elif args.embTech == 'Salton':
565 |             adj = process.normalize_adjSalton(adj + sp.eye(adj.shape[0]))
566 |         else:
567 |             print("No such embedding technique \n We are calling default DGI", args.embTech)
568 |             adj = process.normalize_adj(adj + sp.eye(adj.shape[0]))
569 |             
570 |             
571 |         if sparse:
572 |             sp_adj = process.sparse_mx_to_torch_sparse_tensor(adj)
573 |         else:
574 |             adj = (adj + sp.eye(adj.shape[0])).todense()
575 |         
576 |         features = torch.FloatTensor(features[np.newaxis])
577 |         if not sparse:
578 |             adj = torch.FloatTensor(adj[np.newaxis])
579 |         
580 |         
581 |         
582 |         
583 |         model = DGI(ft_size, hid_units, nonlinearity)
584 |         optimiser = torch.optim.Adam(model.parameters(), lr=lr, weight_decay=l2_coef)
585 |         
586 |         if torch.cuda.is_available():
587 |             print('Using CUDA')
588 |             model.cuda()
589 |             features = features.cuda()
590 |             if sparse:
591 |                 sp_adj = sp_adj.cuda()
592 |             else:
593 |                 adj = adj.cuda()
594 |             labels = labels.cuda()
595 |             idx_train = idx_train.cuda()
596 |             idx_val = idx_val.cuda()
597 |             idx_test = idx_test.cuda()
598 |         
599 |         b_xent = nn.BCEWithLogitsLoss()
600 |         xent = nn.CrossEntropyLoss()
601 |         cnt_wait = 0
602 |         best = 1e9
603 |         best_t = 0
604 |         
605 |         for epoch in range(nb_epochs):
606 |             model.train()
607 |             optimiser.zero_grad()
608 |         
609 |             idx = np.random.permutation(nb_nodes)
610 |             shuf_fts = features[:, idx, :]
611 |         
612 |             lbl_1 = torch.ones(batch_size, nb_nodes)
613 |             lbl_2 = torch.zeros(batch_size, nb_nodes)
614 |             lbl = torch.cat((lbl_1, lbl_2), 1)
615 |         
616 |             if torch.cuda.is_available():
617 |                 shuf_fts = shuf_fts.cuda()
618 |                 lbl = lbl.cuda()
619 |             
620 |             logits = model(features, shuf_fts, sp_adj if sparse else adj, sparse, None, None, None) 
621 |         
622 |             loss = b_xent(logits, lbl)
623 |         
624 |             #print('Loss:', loss)
625 |         
626 |             if loss < best:
627 |                 best = loss
628 |                 best_t = epoch
629 |                 cnt_wait = 0
630 |                 torch.save(model.state_dict(), 'best_dgi.pkl')
631 |             else:
632 |                 cnt_wait += 1
633 |         
634 |             if cnt_wait == patience:
635 |                 print('Early stopping!')
636 |                 break
637 |         
638 |             loss.backward()
639 |             optimiser.step()
640 |         
641 |         print('Loading {}th epoch'.format(best_t))
642 |         model.load_state_dict(torch.load('best_dgi.pkl'))
643 |         
644 |         embeds, _ = model.embed(features, sp_adj if sparse else adj, sparse, None)
645 |         
646 |         output = args.output
647 |         TenToNum = embeds.numpy()
648 |         newembeds = TenToNum[0]
649 |             
650 |         fout = open(output, 'w')
651 |         fout.write("{} {}\n".format(newembeds.shape[0], newembeds.shape[1]))
652 |         for idx in range(newembeds.shape[0]):
653 |             fout.write("{} {}\n".format(node_list[idx], ' '.join([str(x) for x in newembeds[idx, :]])))
654 |         fout.close()
655 | 
656 |     elif args.method == 'SVD':
657 |         SVD_embedding(G_, args.output, size=args.dimensions)
658 |     else:
659 |         if args.method == 'Laplacian':
660 |             model = lap.LaplacianEigenmaps(G_, rep_size=args.dimensions)
661 |         elif args.method == 'RWR':
662 |             model = RWR.RWR(G_, rep_size=100)
663 | 
664 |         elif args.method == 'GF':
665 |             model = gf.GraphFactorization(G_, rep_size=args.dimensions,
666 |                                           epoch=args.epochs, learning_rate=args.lr, weight_decay=args.weight_decay)
667 | 
668 |         elif args.method == 'HOPE':
669 |             model = hope.HOPE(graph=G_, d=args.dimensions)
670 | 
671 |         elif args.method == 'GraRep':
672 |             model = grarep.GraRep(graph=G_, Kstep=args.kstep, dim=args.dimensions)
673 | 
674 |         elif args.method == 'DeepWalk':
675 |             model = node2vec.Node2vec(graph=G_, path_length=args.walk_length,
676 |                                       num_paths=args.number_walks, dim=args.dimensions,
677 |                                       workers=args.workers, window=args.window_size, dw=True)
678 | 
679 |         elif args.method == 'node2vec':
680 |             model = node2vec.Node2vec(graph=G_, path_length=args.walk_length,
681 |                                       num_paths=args.number_walks, dim=args.dimensions,
682 |                                       workers=args.workers, p=args.p, q=args.q, window=args.window_size)
683 | 
684 |         elif args.method == 'LINE':
685 |             model = line.LINE(G_, epoch=args.epochs,
686 |                               rep_size=args.dimensions, order=args.order)
687 | 
688 |         elif args.method == 'SDNE':
689 |             encoder_layer_list = ast.literal_eval(args.encoder_list)
690 |             model = sdne.SDNE(G_, encoder_layer_list=encoder_layer_list,
691 |                               alpha=args.alpha, beta=args.beta, nu1=args.nu1, nu2=args.nu2,
692 |                               batch_size=args.bs, epoch=args.epochs, learning_rate=args.lr)
693 |         else:
694 |             raise ValueError(f'Invalid method: {args.method}')
695 | 
696 |         print("Saving embeddings...")
697 |         model.save_embeddings(args.output)
698 | 
699 |     return
700 | 


--------------------------------------------------------------------------------