├── preprocessing
    ├── config.py
    ├── caslaplacian.py
    ├── preprocess_graph_signal.py
    └── utils.py
├── model
    ├── config.py
    ├── run_graph_sequence.py
    └── model_sparse_graph_signal.py
├── LICENSE
└── README.md


/preprocessing/config.py:
--------------------------------------------------------------------------------
 1 | DATA_PATHA = "../data"
 2 | 
 3 | 
 4 | cascades  = DATA_PATHA+"/dataset_weibo.txt"
 5 | 
 6 | cascade_train = DATA_PATHA+"/cascade_train.txt"
 7 | cascade_val = DATA_PATHA+"/cascade_val.txt"
 8 | cascade_test = DATA_PATHA+"/cascade_test.txt"
 9 | shortestpath_train = DATA_PATHA+"/shortestpath_train.txt"
10 | shortestpath_val = DATA_PATHA+"/shortestpath_val.txt"
11 | shortestpath_test = DATA_PATHA+"/shortestpath_test.txt"
12 | 
13 | observation = 3*60*60-1
14 | pre_times = [24 * 3600]


--------------------------------------------------------------------------------
/model/config.py:
--------------------------------------------------------------------------------
 1 | import math
 2 | DATA_PATHA = "../data"
 3 | 
 4 | 
 5 | train_pkl = DATA_PATHA+"/data_train.pkl"
 6 | val_pkl = DATA_PATHA+"/data_val.pkl"
 7 | test_pkl = DATA_PATHA+"/data_test.pkl"
 8 | information = DATA_PATHA+"/information.pkl"
 9 | 
10 | 
11 | 
12 | #parameters
13 | observation = 3*60*60-1
14 | print ("observation time",observation)
15 | n_time_interval = 6
16 | print ("the number of time interval:",n_time_interval)
17 | time_interval = math.ceil((observation+1)*1.0/n_time_interval)#向上取整
18 | print ("time interval:",time_interval)
19 | lmax = 2
20 | 


--------------------------------------------------------------------------------
/LICENSE:
--------------------------------------------------------------------------------
 1 | MIT License
 2 | 
 3 | Copyright (c) 2017 ChenNed
 4 | 
 5 | Permission is hereby granted, free of charge, to any person obtaining a copy
 6 | of this software and associated documentation files (the "Software"), to deal
 7 | in the Software without restriction, including without limitation the rights
 8 | to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
 9 | copies of the Software, and to permit persons to whom the Software is
10 | furnished to do so, subject to the following conditions:
11 | 
12 | The above copyright notice and this permission notice shall be included in all
13 | copies or substantial portions of the Software.
14 | 
15 | THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16 | IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17 | FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18 | AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19 | LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20 | OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21 | SOFTWARE.
22 | 


--------------------------------------------------------------------------------
/preprocessing/caslaplacian.py:
--------------------------------------------------------------------------------
 1 | import pickle
 2 | import numpy as np
 3 | import scipy.sparse as sp
 4 | import networkx as nx
 5 | from scipy.sparse import identity, spdiags, linalg
 6 | 
 7 | def directed_laplacian_matrix(G, nodelist=None, weight='weight',alpha=0.95):
 8 |     import scipy as sp
 9 |     M = nx.to_scipy_sparse_matrix(G, nodelist=nodelist, weight=weight,
10 |                                   dtype=float)
11 |     n, m = M.shape
12 |     if not (0 < alpha < 1):
13 |             raise nx.NetworkXError('alpha must be between 0 and 1')
14 |     # this is using a dense representation
15 |     M = M.todense()
16 |     # add constant to dangling nodes' row
17 |     dangling = sp.where(M.sum(axis=1) == 0)
18 |     for d in dangling[0]:
19 |         M[d] = 1.0 / n
20 |     # normalize
21 |     M = M / M.sum(axis=1)
22 | 
23 |     P = alpha * M + (1 - alpha) / n
24 |     evals, evecs = linalg.eigs(P.T, k=1,tol=1E-2)
25 |     v = evecs.flatten().real
26 |     p = v / v.sum()
27 |     sqrtp = sp.sqrt(p)
28 |     I = sp.identity(len(G))
29 |     Q = spdiags(sqrtp, [0], n, n) * (I-P) * spdiags(1.0 / sqrtp, [0], n, n)
30 |     return Q
31 | 
32 | def calculate_scaled_laplacian_dir(graph, lambda_max=2):
33 |     L = directed_laplacian_matrix(graph)
34 |     if lambda_max is None:
35 |         lambda_max, _ = linalg.eigsh(L, 1, which='LM', tol=1E-2)
36 |         lambda_max = lambda_max[0]
37 |     L = sp.csr_matrix(L)
38 |     M, _ = L.shape
39 |     I = sp.identity(M, format='csr', dtype=L.dtype)
40 |     L = (2 / lambda_max * L) - I
41 |     return L.astype(np.float32)


--------------------------------------------------------------------------------
/README.md:
--------------------------------------------------------------------------------
 1 | # CasCN
 2 | This is a TensorFlow implementation of Recurrent Cascades Convolution for the task of information cascades prediction, And the paper "Information Diffusion Prediction via Recurrent Cascades Convolution" (Accepted by ICDE 2019) will be avilable to download at the internet soon.
 3 | # Overview
 4 | - `data/` put the download dataset here;
 5 | - `model/` contains the implementation of the CasCN;
 6 | - `preprocessing/` contains preprocessing code：
 7 |     * split the data to train set, validation set and test set (`utils.py`);
 8 |     * trainsform the datasets to the format of ".pkl" (`preprocess_graph_signal.py`)
 9 |     * (`config.py`) you can configure parameters and filepath in this file
10 |     .
11 | # Datatset
12 | The datasets we used in our paper are Sina Weibo and HEP-PH. For the Sina Weibo dataset, you can download [here](https://github.com/CaoQi92/DeepHawkes) and the HEP-PH dataset is avilable [here](http://snap.stanford.edu/data/cit-HepPh.html).
13 | Also, we provide a pre-processed Weibo dataset (T=3 hours) [here](https://pan.baidu.com/s/1_s3FvbEpj2piWcRqLqpb5A) and the file password is: (`a7xu`)
14 | 
15 | Steps to run CasCN
16 | ----------------------------------- 
17 | 
18 | 1.split the data to train set, validation set and test set. Then trainsform the datasets to the format of ".pkl"
19 | command: 
20 | 
21 |     cd preprocessing
22 |     python utils.py
23 |     python preprocess_graph_signal.py
24 |  
25 | 2.train Model
26 | command:
27 | 
28 |     cd model
29 |     python run_graph_sequence.py
30 | # Notice
31 |  If you want to do the experiment with citation dataset - "HEP-PE", you should first transform the format of citation dataset as the same as Weibo dataset. (the format of Weibo dataset you can reference [here](https://github.com/CaoQi92/DeepHawkes)). And the version of the Tensorflow we used is 1.0.
32 | 
33 | 


--------------------------------------------------------------------------------
/preprocessing/preprocess_graph_signal.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import six.moves.cPickle as pickle
  3 | from model import config
  4 | import networkx as nx
  5 | from preprocessing import caslaplacian
  6 | import scipy.sparse
  7 | import gc
  8 | LABEL_NUM = 0
  9 | 
 10 | # trans the original ids to 1~n
 11 | class IndexDict:
 12 |     def __init__(self, original_ids):
 13 |         self.original_to_new = {}
 14 |         self.new_to_original = []
 15 |         cnt = 0
 16 |         for i in original_ids:
 17 |             new = self.original_to_new.get(i, cnt)
 18 |             if new == cnt:
 19 |                 self.original_to_new[i] = cnt
 20 |                 cnt += 1
 21 |                 self.new_to_original.append(i)
 22 | 
 23 |     def new(self, original):
 24 |         if type(original) is int:
 25 |             return self.original_to_new[original]
 26 |         else:
 27 |             if type(original[0]) is int:
 28 |                 return [self.original_to_new[i] for i in original]
 29 |             else:
 30 |                 return [[self.original_to_new[i] for i in l] for l in original]
 31 | 
 32 |     def original(self, new):
 33 |         if type(new) is int:
 34 |             return self.new_to_original[new]
 35 |         else:
 36 |             if type(new[0]) is int:
 37 |                 return [self.new_to_original[i] for i in new]
 38 |             else:
 39 |                 return [[self.new_to_original[i] for i in l] for l in new]
 40 | 
 41 |     def length(self):
 42 |         return len(self.new_to_original)
 43 | 
 44 | #trainsform the sequence to list
 45 | def sequence2list(flename):
 46 |     graphs = {}
 47 |     with open(flename, 'r') as f:
 48 |         for line in f:
 49 |             walks = line.strip().split('\t')
 50 |             graphs[walks[0]] = [] #walk[0] = cascadeID
 51 |             for i in range(1, len(walks)):
 52 |                 s = walks[i].split(":")[0] #node
 53 |                 t = walks[i].split(":")[1] #time
 54 |                 graphs[walks[0]].append([[str(xx) for xx in s.split(",")],int(t)])
 55 |     return graphs
 56 | 
 57 | #read label and size from cascade file
 58 | def read_labelANDsize(filename):
 59 |     labels = {}
 60 |     sizes = {}
 61 |     with open(filename, 'r') as f:
 62 |         for line in f:
 63 |             profile = line.split('\t')
 64 |             labels[profile[0]] = profile[-1]
 65 |             sizes[profile[0]] = int(profile[3])
 66 |     return labels,sizes
 67 | 
 68 | def get_original_ids(graphs):
 69 |     original_ids = set()
 70 |     for graph in graphs.keys():
 71 |         for walk in graphs[graph]:
 72 |             for i in walk[0]:
 73 |                 original_ids.add(i)
 74 |     print ("length of original isd:",len(original_ids))
 75 |     return original_ids
 76 | 
 77 | def get_nodes(graph):
 78 |     nodes = {}
 79 |     j = 0
 80 |     for walk in graph:
 81 |         for i in walk[0]:
 82 |             if i not in nodes.keys():
 83 |                 nodes[i] = j
 84 |                 j = j+1
 85 |     return nodes
 86 | 
 87 | def write_XYSIZE_data(graphs,labels,sizes,NUM_SEQUENCE,index,max_num, filename):
 88 |     #get the x,y,and size  data
 89 |     id_data = []
 90 |     x_data = []
 91 |     y_data = []
 92 |     sz_data = []
 93 |     time_data = []
 94 |     Laplacian_data = []
 95 |     for key,graph in graphs.items():
 96 |         id = key
 97 |         label = labels[key].split()
 98 |         y = int(label[LABEL_NUM]) #label
 99 |         temp = []
100 |         temp_time = [] #store time
101 |         size_temp = len(graph)
102 |         if size_temp !=  sizes[key]:
103 |             print (size_temp,sizes[key])
104 |         nodes_items = get_nodes(graph)
105 |         nodes_list = nodes_items.values()
106 |         nx_G = nx.DiGraph()
107 |         nx_G.add_nodes_from(nodes_list)
108 |         for walk in graph:
109 |             walk_time = walk[1]
110 |             temp_time.append(walk_time)
111 |             if walk_time == 0:
112 |                 nx_G.add_edge(nodes_items.get(walk[0][0]), nodes_items.get(walk[0][0]))
113 |             for i in range(len(walk[0])-1):
114 |                 nx_G.add_edge(nodes_items.get(walk[0][i]),nodes_items.get(walk[0][i+1]))
115 |             temp_adj = nx.to_pandas_adjacency(nx_G)
116 |             N = len(temp_adj)
117 |             if N < max_num:
118 |                 col_padding = np.zeros(shape=(N, max_num - N))
119 |                 A_col_padding = np.column_stack((temp_adj, col_padding))
120 |                 row_padding = np.zeros(shape=(max_num - N, max_num))
121 |                 A_col_row_padding = np.row_stack((A_col_padding, row_padding))
122 |                 temp_adj = scipy.sparse.coo_matrix(A_col_row_padding, dtype=np.float32)
123 |             else:
124 |                 temp_adj = scipy.sparse.coo_matrix(temp_adj,dtype=np.float32)
125 |             temp.append(temp_adj)
126 |         #caculate laplacian
127 |         L = caslaplacian.calculate_scaled_laplacian_dir(nx_G, lambda_max=None)
128 |         M, M = L.shape
129 |         M = int(M)
130 |         L = L.todense()
131 |         if M < max_num:
132 |             col_padding_L = np.zeros(shape=(M, max_num - M))
133 |             L_col_padding = np.column_stack((L, col_padding_L))
134 |             row_padding = np.zeros(shape=(max_num - M, max_num))
135 |             L_col_row_padding = np.row_stack((L_col_padding, row_padding))
136 |             Laplacian = scipy.sparse.coo_matrix(L_col_row_padding, dtype=np.float32)
137 |         else:
138 |             Laplacian = scipy.sparse.coo_matrix(L, dtype=np.float32)
139 |         if len(temp)< NUM_SEQUENCE:
140 |             zero_padding = np.zeros(shape = (max_num, max_num))
141 |             zero_padding = scipy.sparse.coo_matrix(zero_padding, dtype=np.float32)
142 |             for i in range(NUM_SEQUENCE-len(temp)):
143 |                 temp.append(zero_padding)
144 |                 i = i+1
145 |         time_data.append(temp_time)
146 |         id_data.append(id)
147 |         x_data.append(temp)
148 |         y_data.append(np.log(y+1.0)/np.log(2.0))
149 |         Laplacian_data.append(Laplacian)
150 |         sz_data.append(size_temp)
151 |     gc.collect()
152 |     pickle.dump((id_data,x_data,Laplacian_data,y_data, sz_data, time_data,index.length()), open(filename,'wb'))
153 | 
154 | def get_maxsize(sizes):
155 |     max_size = 0
156 |     for cascadeID in sizes:
157 |         max_size = max(max_size,sizes[cascadeID])
158 |     gc.collect()
159 |     return max_size
160 | 
161 | def get_max_length(graphs):
162 |     len_sequence = 0
163 |     max_num = 0
164 |     for cascadeID in graphs:
165 |         max_num = max(max_num,len(graphs[cascadeID]))
166 |         for sequence in graphs[cascadeID]:
167 |             len_sequence = max(len_sequence,len(sequence[0]))
168 |     gc.collect()
169 |     return len_sequence
170 | 
171 | def get_max_node_num(graphs):
172 |     max_num = 0
173 |     for key,graph in graphs.items():
174 |         nodes = get_nodes(graph)
175 |         max_num = max(max_num,len(nodes))
176 |     return max_num
177 | if __name__ == "__main__":
178 | 
179 |     ### data set ###
180 |     graphs_train = sequence2list(config.shortestpath_train)
181 |     graphs_val = sequence2list(config.shortestpath_val)
182 |     graphs_test = sequence2list(config.shortestpath_test)
183 | 
184 |     ## get Laplacian ##
185 |     cascade_train = config.cascade_train
186 |     cascade_test = config.cascade_test
187 |     cascade_val = config.cascade_val
188 | 
189 |     ### get labels ###
190 |     labels_train, sizes_train = read_labelANDsize(config.cascade_train)  
191 |     labels_val, sizes_val = read_labelANDsize(config.cascade_val)
192 |     labels_test, sizes_test = read_labelANDsize(config.cascade_test)
193 |     NUM_SEQUENCE = max(get_maxsize(sizes_train),get_maxsize(sizes_val),get_maxsize(sizes_test))
194 | 
195 |     #LEN_SEQUENCE_train = get_max_length(graphs_train) 
196 |     #LEN_SEQUENCE_val = get_max_length(graphs_val)
197 |     #LEN_SEQUENCE_test = get_max_length(graphs_test)
198 |     #LEN_SEQUENCE = max(LEN_SEQUENCE_train,LEN_SEQUENCE_val,LEN_SEQUENCE_test)
199 | 
200 |     max_num_train = get_max_node_num(graphs_train)
201 |     max_num_test = get_max_node_num(graphs_test)
202 |     max_num_val = get_max_node_num(graphs_val)
203 |     max_num = max(max_num_train, max_num_test, max_num_val)
204 | 
205 |     # get the total original_ids and tranform the index from 0 ~n-1
206 |     original_ids = get_original_ids(graphs_train)\
207 |                     .union(get_original_ids(graphs_val))\
208 |                     .union(get_original_ids(graphs_test))
209 | 
210 |     original_ids.add(-1)
211 |     ## index is new index
212 |     index = IndexDict(original_ids)
213 | 
214 |     print("create train")
215 |     write_XYSIZE_data(graphs_train, labels_train,sizes_train,NUM_SEQUENCE,index,max_num, config.train_pkl)
216 |     print("create val an test")
217 |     write_XYSIZE_data(graphs_val, labels_val, sizes_val,NUM_SEQUENCE,index,max_num, config.val_pkl)
218 |     write_XYSIZE_data(graphs_test, labels_test, sizes_test,NUM_SEQUENCE,index,max_num,config.test_pkl)
219 |     #pickle.dump((len(original_ids),NUM_SEQUENCE,LEN_SEQUENCE), open(config.information,'wb'))
220 |     print("Finish!!!")
221 | 
222 | 


--------------------------------------------------------------------------------
/model/run_graph_sequence.py:
--------------------------------------------------------------------------------
  1 | import numpy as np
  2 | import math
  3 | import tensorflow as tf
  4 | from model.model_sparse_graph_signal import Model
  5 | import six.moves.cPickle as pickle
  6 | 
  7 | tf.set_random_seed(0)
  8 | import time
  9 | from model import config as cf
 10 | 
 11 | # DATA_PATH = "data"
 12 | 
 13 | n_steps = 100
 14 | tf.flags.DEFINE_integer("n_steps", n_steps, "num of step.")
 15 | tf.flags.DEFINE_integer("time_interval", cf.time_interval, "the time interval")
 16 | tf.flags.DEFINE_integer("n_time_interval", cf.n_time_interval, "the number of  time interval")
 17 | tf.flags.DEFINE_integer("num_rnn_layers", 2, "number of rnn layers .")
 18 | tf.flags.DEFINE_integer("cl_decay_steps", 1000, "cl_decay_steps .")
 19 | tf.flags.DEFINE_integer("num_kernel", 2, "chebyshev .")
 20 | tf.flags.DEFINE_float("learning_rate", 0.005, "learning_rate.")
 21 | tf.flags.DEFINE_integer("batch_size", 32, "batch size.")
 22 | tf.flags.DEFINE_integer("num_hidden", 32, "hidden rnn size.")
 23 | tf.flags.DEFINE_float("l1", 5e-5, "l1.")
 24 | tf.flags.DEFINE_float("l2", 1e-3, "l2.")
 25 | tf.flags.DEFINE_float("l1l2", 1.0, "l1l2.")
 26 | tf.flags.DEFINE_string("activation", "relu", "activation function.")
 27 | tf.flags.DEFINE_integer("training_iters", 200 * 3200 + 1, "max training iters.")
 28 | tf.flags.DEFINE_integer("display_step", 100, "display step.")
 29 | tf.flags.DEFINE_integer("n_hidden_dense1", 32, "dense1 size.")
 30 | tf.flags.DEFINE_integer("n_hidden_dense2", 16, "dense2 size.")
 31 | tf.flags.DEFINE_string("version", "v1", "data version.")
 32 | tf.flags.DEFINE_integer("max_grad_norm", 5, "gradient clip.")
 33 | tf.flags.DEFINE_float("stddev", 0.01, "initialization stddev.")
 34 | tf.flags.DEFINE_integer("feat_in", 100, "num of feature in")
 35 | tf.flags.DEFINE_integer("feat_out", 50, "num of feature out")
 36 | tf.flags.DEFINE_integer("lmax", 2, "max L")
 37 | tf.flags.DEFINE_integer("num_nodes", 100, "number of max nodes in cascade")
 38 | config = tf.flags.FLAGS
 39 | 
 40 | print("l2", config.l2)
 41 | print("learning rate:", config.learning_rate)
 42 | 
 43 | 
 44 | 
 45 | def get_batch(x, L, y, sz, time, n_time_interval, step, batch_size, num_step):
 46 |     batch_y = np.zeros(shape=(batch_size, 1))
 47 |     batch_x = []
 48 |     batch_L = []
 49 |     batch_time_interval_index = []
 50 |     batch_rnn_index = []
 51 |     start = step * batch_size % len(x)
 52 |     for i in range(batch_size):
 53 |         id = (i + start) % len(x)
 54 |         batch_y[i, 0] = y[id]
 55 |         batch_L.append(L[id].todense())
 56 |         temp_x = []
 57 |         for m in range(len(x[id])):
 58 |             temp_x.append(x[id][m].todense())
 59 |         batch_x.append(temp_x)
 60 |         batch_time_interval_index_sample = []
 61 | 
 62 |         for j in range(sz[id]):
 63 |             temp_time = np.zeros(shape=(n_time_interval))
 64 |             k = int(math.floor(time[id][j] / config.time_interval))
 65 |             temp_time[k] = 1
 66 |             batch_time_interval_index_sample.append(temp_time)
 67 |         if len(batch_time_interval_index_sample) < num_step:
 68 |             for i in range(num_step - len(batch_time_interval_index_sample)):
 69 |                 temp_time_padding = np.zeros(shape=(n_time_interval))
 70 |                 batch_time_interval_index_sample.append(temp_time_padding)
 71 |                 i = i + 1
 72 |         batch_time_interval_index.append(batch_time_interval_index_sample)
 73 |         rnn_index_temp = np.zeros(shape=(config.n_steps))
 74 |         rnn_index_temp[:sz[id]] = 1
 75 |         batch_rnn_index.append(rnn_index_temp)
 76 | 
 77 |     return batch_x, batch_L, batch_y, batch_time_interval_index, batch_rnn_index
 78 | 
 79 | 
 80 | version = config.version
 81 | id_train, x_train, L, y_train, sz_train, time_train, vocabulary_size = pickle.load(
 82 |     open(cf.train_pkl, 'rb'))
 83 | id_test, x_test, L_test, y_test, sz_test, time_test, _ = pickle.load(
 84 |     open(cf.test_pkl, 'rb'))
 85 | id_val, x_val, L_val, y_val, sz_val, time_val, _ = pickle.load(open(cf.val_pkl, 'rb'))
 86 | 
 87 | training_iters = config.training_iters
 88 | batch_size = config.batch_size
 89 | display_step = min(config.display_step, len(sz_train) / batch_size)
 90 | print("-----------------display step-------------------")
 91 | print("display step" + str(display_step))
 92 | 
 93 | # determine the way floating point numbers,arrays and other numpy object are displayed
 94 | np.set_printoptions(precision=2)
 95 | 
 96 | sess = tf.Session(config=tf.ConfigProto(allow_soft_placement=True))
 97 | start = time.time()
 98 | is_training = False
 99 | model = Model(config, config.num_nodes, sess)
100 | sess.graph.finalize()
101 | step = 0
102 | best_val_loss = 1000
103 | best_test_loss = 1000
104 | train_writer = tf.summary.FileWriter("./train", sess.graph)
105 | 
106 | # Keep training until reach max iterations or max_try
107 | train_loss = []
108 | max_try = 10
109 | patience = max_try
110 | while step * batch_size < training_iters:
111 |     batch_x, batch_L, batch_y, batch_time_interval, batch_rnn_index = get_batch(
112 |                                                                                 x_train,
113 |                                                                                 L,
114 |                                                                                 y_train,
115 |                                                                                 sz_train,
116 |                                                                                 time_train,
117 |                                                                                 config.n_time_interval,
118 |                                                                                 step,
119 |                                                                                 batch_size,
120 |                                                                                 n_steps)
121 |     time_decay = model.train_batch(batch_x, batch_L, batch_y, batch_time_interval, batch_rnn_index)
122 |     train_loss.append(
123 |         model.get_error(batch_x, batch_L, batch_y, batch_time_interval,
124 |                         batch_rnn_index))
125 |     if step % display_step == 0:
126 |         #print(time_decay)
127 |         val_loss = []
128 |         for val_step in range(int(len(y_val) / batch_size)):
129 |             val_x, val_L, val_y, val_time, val_rnn_index = get_batch(
130 |                                                                      x_val,
131 |                                                                      L_val,
132 |                                                                      y_val,
133 |                                                                      sz_val,
134 |                                                                      time_val,
135 |                                                                      config.n_time_interval,
136 |                                                                      val_step,
137 |                                                                      batch_size,
138 |                                                                      n_steps)
139 |             val_loss.append(
140 |                 model.get_error(val_x, val_L, val_y, val_time, val_rnn_index))
141 |         test_loss = []
142 |         for test_step in range(int(len(y_test) / batch_size)):
143 |             test_x, test_L, test_y, test_time, test_rnn_index = get_batch(
144 |                                                                           x_test,
145 |                                                                           L_test,
146 |                                                                           y_test,
147 |                                                                           sz_test,
148 |                                                                           time_test,
149 |                                                                           config.n_time_interval,
150 |                                                                           test_step,
151 |                                                                           batch_size,
152 |                                                                           n_steps)
153 |             test_loss.append(
154 |                 model.get_error(test_x, test_L, test_y, test_time, test_rnn_index))
155 | 
156 |         if np.mean(val_loss) < best_val_loss:
157 |             best_val_loss = np.mean(val_loss)
158 |             best_test_loss = np.mean(test_loss)
159 |             patience = max_try
160 |         predict_result = []
161 |         test_loss = []
162 |         for test_step in range(int(len(y_test) / batch_size + 1)):
163 |             test_x, test_L, test_y, test_time, test_rnn_index = get_batch(
164 |                 x_test,
165 |                 L_test,
166 |                 y_test,
167 |                 sz_test,
168 |                 time_test,
169 |                 config.n_time_interval,
170 |                 test_step,
171 |                 batch_size,
172 |                 n_steps)
173 |             predict_result.extend(
174 |                 model.predict(test_x, test_L, test_y, test_time, test_rnn_index))
175 |             test_loss.append(
176 |                 model.get_error(test_x, test_L, test_y, test_time, test_rnn_index))
177 |         print("last test error:", np.mean(test_loss))
178 |         pickle.dump((predict_result, y_test, test_loss), open(
179 |             "prediction_result_" + str(config.learning_rate) + "_CasCN", 'wb'))
180 |         print("#" + str(step / display_step) +
181 |               ", Training Loss= " + "{:.6f}".format(np.mean(train_loss)) +
182 |               ", Validation Loss= " + "{:.6f}".format(np.mean(val_loss)) +
183 |               ", Test Loss= " + "{:.6f}".format(np.mean(test_loss)) +
184 |               ", Best Valid Loss= " + "{:.6f}".format(best_val_loss) +
185 |               ", Best Test Loss= " + "{:.6f}".format(best_test_loss)
186 |               )
187 |         train_loss = []
188 |         patience -= 1
189 |         if not patience:
190 |             break
191 |     step += 1
192 | 
193 | print(len(predict_result), len(y_test))
194 | print("Finished!\n----------------------------------------------------------------")
195 | print("Time:", time.time() - start)
196 | print("Valid Loss:", best_val_loss)
197 | print("Test Loss:", best_test_loss)
198 | 


--------------------------------------------------------------------------------
/preprocessing/utils.py:
--------------------------------------------------------------------------------
  1 | import networkx as nx
  2 | import time
  3 | from functools import cmp_to_key
  4 | from preprocessing import config
  5 | import sys
  6 | 
  7 | 
  8 | class IndexDict:
  9 |     def __init__(self, original_ids):
 10 |         self.original_to_new = {}
 11 |         self.new_to_original = []
 12 |         cnt = 0
 13 |         for i in original_ids:
 14 |             new = self.original_to_new.get(i, cnt)
 15 |             if new == cnt:
 16 |                 self.original_to_new[i] = cnt
 17 |                 cnt += 1
 18 |                 self.new_to_original.append(i)
 19 | 
 20 |     def new(self, original):
 21 |         if type(original) is int:
 22 |             return self.original_to_new[original]
 23 |         else:
 24 |             if type(original[0]) is int:
 25 |                 return [self.original_to_new[i] for i in original]
 26 |             else:
 27 |                 return [[self.original_to_new[i] for i in l] for l in original]
 28 | 
 29 |     def original(self, new):
 30 |         if type(new) is int:
 31 |             return self.new_to_original[new]
 32 |         else:
 33 |             if type(new[0]) is int:
 34 |                 return [self.new_to_original[i] for i in new]
 35 |             else:
 36 |                 return [[self.new_to_original[i] for i in l] for l in new]
 37 | 
 38 |     def length(self):
 39 |         return len(self.new_to_original)
 40 | 
 41 | 
 42 | def gen_cascades_obser(observation_time,pre_times,filename):
 43 |     cascades_total = dict()
 44 |     cascades_type = dict()
 45 |     with open(filename) as f:
 46 |         for line in f:
 47 |             parts = line.split("\t")
 48 |             if len(parts) != 5:
 49 |                 print('wrong format!')
 50 |                 continue
 51 |             cascadeID = parts[0]
 52 |             n_nodes = int(parts[3])
 53 |             path = parts[4].split(" ")
 54 |             if n_nodes != len(path):
 55 |                 print('wrong number of nodes', n_nodes, len(path))
 56 |             msg_pub_time = parts[2]
 57 | 
 58 |             observation_path = []
 59 |             labels = []
 60 |             edges = set()
 61 |             for i in range(len(pre_times)):
 62 |                 labels.append(0)
 63 |             for p in path:
 64 |                 nodes = p.split(":")[0].split("/")
 65 |                 nodes_ok = True
 66 |                 time_now = int(p.split(":")[1])
 67 |                 if time_now < observation_time:
 68 |                     observation_path.append(",".join(nodes) + ":" + str(time_now))
 69 |                     for i in range(1, len(nodes)):
 70 |                         edges.add(nodes[i - 1] + ":" + nodes[i] + ":1")
 71 |                 for i in range(len(pre_times)):
 72 |                     if time_now < pre_times[i]:
 73 |                         labels[i] += 1
 74 |             cascades_total[cascadeID] = msg_pub_time
 75 | 
 76 |         n_total = len(cascades_total)
 77 |         print('total:', n_total)
 78 | 
 79 |         key = cmp_to_key(lambda x, y: int(x[1]) - int(y[1]))
 80 |         sorted_msg_time = sorted(cascades_total.items(), key=key)
 81 |         count = 0
 82 |         for (k, v) in sorted_msg_time:
 83 |             if count < n_total * 1.0 / 20 * 14:
 84 |                 cascades_type[k] = 1
 85 |             elif count < n_total * 1.0 / 20 * 17:
 86 |                 cascades_type[k] = 2
 87 |             else:
 88 |                 cascades_type[k] = 3
 89 |             count += 1
 90 |     return cascades_total,cascades_type
 91 | 
 92 | 
 93 | def discard_cascade(observation_time,pre_times,filename):
 94 |     discard_cascade_id=dict()
 95 |     with open(filename) as f:
 96 |         for line in f:
 97 |             parts = line.split("\t")
 98 |             if len(parts) != 5:
 99 |                 print('wrong format!')
100 |                 continue
101 |             cascadeID = parts[0]
102 |             n_nodes = int(parts[3])
103 |             path = parts[4].split(" ")
104 |             if n_nodes != len(path):
105 |                 print('wrong number of nodes', n_nodes, len(path))
106 |             msg_pub_time = parts[2]
107 | 
108 |             observation_path = []
109 |             edges = set()
110 |             for p in path:
111 |                 nodes = p.split(":")[0].split("/")
112 |                 time_now = int(p.split(":")[1])
113 |                 if time_now < observation_time:
114 |                     observation_path.append(",".join(nodes) + ":" + str(time_now))
115 |                     for i in range(1, len(nodes)):
116 |                         edges.add(nodes[i - 1] + ":" + nodes[i] + ":1")
117 |                         
118 |             if len(observation_path)>100:
119 |                     discard_cascade_id[cascadeID] = 1
120 |                     continue
121 |                 else:
122 |                     discard_cascade_id[cascadeID]=0
123 |             nx_Cass = nx.DiGraph()
124 |             for i in edges:
125 |                 part = i.split(":")
126 |                 source = part[0]
127 |                 target = part[1]
128 |                 weight = part[2]
129 |                 nx_Cass.add_edge(source, target, weight=weight)
130 |             try:
131 |                 L = directed_laplacian_matrix(nx_Cass)
132 |             except:
133 |                 discard_cascade_id[cascadeID]=1
134 |                 s = sys.exc_info()
135 |             else:
136 |                 num = nx_Cass.number_of_nodes()
137 |                 
138 | 
139 |     return discard_cascade_id
140 | 
141 | def directed_laplacian_matrix(G, nodelist=None, weight='weight',
142 |                               walk_type=None, alpha=0.95):
143 |     import scipy as sp
144 |     from scipy.sparse import identity, spdiags, linalg
145 |     if walk_type is None:
146 |         if nx.is_strongly_connected(G):
147 |             if nx.is_aperiodic(G):
148 |                 walk_type = "random"
149 |             else:
150 |                 walk_type = "lazy"
151 |         else:
152 |             walk_type = "pagerank"
153 | 
154 |     M = nx.to_scipy_sparse_matrix(G, nodelist=nodelist, weight=weight,
155 |                                   dtype=float)
156 |     n, m = M.shape
157 |     if walk_type in ["random", "lazy"]:
158 |         DI = spdiags(1.0 / sp.array(M.sum(axis=1).flat), [0], n, n)
159 |         if walk_type == "random":
160 |             P = DI * M
161 |         else:
162 |             I = identity(n)
163 |             P = (I + DI * M) / 2.0
164 | 
165 |     elif walk_type == "pagerank":
166 |         if not (0 < alpha < 1):
167 |             raise nx.NetworkXError('alpha must be between 0 and 1')
168 |         M = M.todense()
169 |         dangling = sp.where(M.sum(axis=1) == 0)
170 |         for d in dangling[0]:
171 |             M[d] = 1.0 / n
172 |         M = M / M.sum(axis=1)
173 |         P = alpha * M + (1 - alpha) / n
174 |     else:
175 |         raise nx.NetworkXError("walk_type must be random, lazy, or pagerank")
176 | 
177 |     evals, evecs = linalg.eigs(P.T, k=1,tol=1E-2)
178 |     v = evecs.flatten().real
179 |     p = v / v.sum()
180 |     sqrtp = sp.sqrt(p)
181 |     Q = spdiags(sqrtp, [0], n, n) * P * spdiags(1.0 / sqrtp, [0], n, n)
182 |     I = sp.identity(len(G))
183 |     return I - (Q + Q.T) / 2.0
184 | 
185 | def gen_cascade(observation_time, pre_times, filename, filename_ctrain, filename_cval,
186 |     filename_ctest, filename_strain, filename_sval, filename_stest, cascades_type, discard_cascade_id):
187 |     file = open(filename,"r")
188 |     file_ctrain = open(filename_ctrain, "w")
189 |     file_cval = open(filename_cval, "w")
190 |     file_ctest = open(filename_ctest, "w")
191 |     file_strain = open(filename_strain, "w")
192 |     file_sval = open(filename_sval, "w")
193 |     file_stest = open(filename_stest, "w")
194 |     for line in file:
195 |         parts = line.split("\t")
196 |         if len(parts) != 5:
197 |             print ('wrong format!')
198 |             continue
199 |         cascadeID = parts[0]
200 |         n_nodes = int(parts[3])
201 |         path = parts[4].split(" ")
202 |         if n_nodes !=len(path):
203 |             print ('wrong number of nodes',n_nodes,len(path))
204 |         msg_time = time.localtime(int(parts[2]))
205 |         hour = time.strftime("%H",msg_time)
206 |         hour = int(hour)
207 |         if hour <= 7 or hour >= 19:
208 |             continue
209 |         observation_path = []
210 |         labels = []
211 |         edges = set()
212 |         for i in range(len(pre_times)):
213 |             labels.append(0)
214 |         for p in path:
215 |             nodes = p.split(":")[0].split("/")
216 |             time_now = int(p.split(":")[1])
217 |             if time_now <observation_time:
218 |                 observation_path.append(",".join(nodes)+":"+ str(time_now))
219 |                 for i in range(1,len(nodes)):
220 |                     if (nodes[i-1] +":"+ nodes[i] +":"+ str(time_now)) in edges:
221 |                         continue
222 |                     else:
223 |                         edges.add(nodes[i-1]+":"+ nodes[i]+":"+ str(time_now))
224 |             for i in range(len(pre_times)):
225 |                 if time_now <pre_times[i]:
226 |                     labels[i] +=1
227 |         for i in range(len(labels)):
228 |             labels[i] = str(labels[i]-len(observation_path))
229 |         if len(edges)<=1:
230 |             continue
231 |         if cascadeID in cascades_type and cascades_type[cascadeID] == 1 and discard_cascade_id[cascadeID]== 0:
232 |             file_strain.write(cascadeID + "\t" + "\t".join(observation_path) + "\n")#shortespath_train
233 |             file_ctrain.write(cascadeID+"\t"+parts[1]+"\t"+parts[2]+"\t"+str(len(observation_path))+"\t"+" ".join(edges)+"\t"+" ".join(labels)+"\n")#cascade_train part[1]-user_id parts[2]-publis_time observation_path "".join(edges) "".join(labels)
234 |         elif cascadeID in cascades_type and cascades_type[cascadeID] == 2 and discard_cascade_id[cascadeID]== 0:
235 |             file_sval.write(cascadeID + "\t" + "\t".join(observation_path) + "\n")
236 |             file_cval.write(cascadeID + "\t" + parts[1] + "\t" + parts[2] + "\t" + str(len(observation_path)) + "\t" + " ".join(edges) + "\t" + " ".join(labels) + "\n")
237 |         elif cascadeID in cascades_type and cascades_type[cascadeID] == 3 and discard_cascade_id[cascadeID]== 0:
238 |             file_stest.write(cascadeID + "\t" + "\t".join(observation_path) + "\n")
239 |             file_ctest.write(cascadeID + "\t" + parts[1] + "\t" + parts[2] + "\t" + str(len(observation_path)) + "\t" + " ".join(edges) + "\t" + " ".join(labels) + "\n")
240 | 
241 |     file.close()
242 |     file_ctrain.close()
243 |     file_cval.close()
244 |     file_ctest.close()
245 |     file_strain.close()
246 |     file_sval.close()
247 |     file_stest.close()
248 | 
249 | def get_original_ids(graphs):
250 |     original_ids = set()
251 |     for graph in graphs.keys():
252 |         for walk in graphs[graph]:
253 |             for i in walk[0]:
254 |                 original_ids.add(i)
255 |     print ("length of original isd:",len(original_ids))
256 |     return original_ids
257 | def sequence2list(flename):
258 |     graphs = {}
259 |     with open(flename, 'r') as f:
260 |         for line in f:
261 |             walks = line.strip().split('\t')
262 |             graphs[walks[0]] = [] #walk[0] = cascadeID
263 |             for i in range(1, len(walks)):
264 |                 s = walks[i].split(":")[0] #node
265 |                 t = walks[i].split(":")[1] #time
266 |                 graphs[walks[0]].append([[int(xx) for xx in s.split(",")],int(t)])
267 |     return graphs
268 | if __name__ =="__main__":
269 |     observation_time = config.observation
270 |     pre_times = config.pre_times
271 | 
272 |     cascades_total, cascades_type = gen_cascades_obser(observation_time,pre_times,config.cascades)
273 |     discard_cascade_id= discard_cascade(observation_time,pre_times,config.cascades)
274 | 
275 |     print("generate cascade new!!!")
276 |     gen_cascade(observation_time, pre_times, config.cascades, config.cascade_train,
277 |                   config.cascade_val, config.cascade_test,
278 |                   config.shortestpath_train, config.shortestpath_val,
279 |                   config.shortestpath_test,
280 |                   cascades_type, discard_cascade_id)
281 | 
282 | 
283 | 
284 | 
285 | 
286 | 
287 | 


--------------------------------------------------------------------------------
/model/model_sparse_graph_signal.py:
--------------------------------------------------------------------------------
  1 | """
  2 | CasCN
  3 | 
  4 | """
  5 | 
  6 | import tensorflow as tf
  7 | import numpy as np
  8 | import collections
  9 | import scipy.sparse
 10 | from scipy.sparse import coo_matrix
 11 | from tensorflow.contrib.layers.python.layers import regularizers
 12 | 
 13 | # Test for tf1.0
 14 | 
 15 | tfversion_ = tf.VERSION.split(".")
 16 | global tfversion
 17 | if int(tfversion_[0]) < 1:
 18 |     raise EnvironmentError("TF version should be above 1.0!!")
 19 | if int(tfversion_[1]) < 1:
 20 |     print("Working in TF version 1.0....")
 21 |     from tensorflow.python.ops.rnn_cell_impl import _RNNCell as RNNCell
 22 | 
 23 |     tfversion = "old"
 24 | else:
 25 |     print("Working in TF version 1.%d...." % int(tfversion_[1]))
 26 |     from tensorflow.python.ops.rnn_cell_impl import RNNCell
 27 | 
 28 |     tfversion = "new"
 29 | 
 30 | 
 31 | def cheby_conv(x, L, lmax, batch_size, num_nodes, feat_out, K, W):
 32 |     nSample = batch_size  # 32
 33 |     nNode = num_nodes
 34 |     x = tf.reshape(x, (nSample, num_nodes, -1))
 35 |     feat_in = x.get_shape()[2].value
 36 |     x_l = []
 37 |     # Transform to Chebyshev basis
 38 |     L1 = tf.unstack(L, nSample, 0)
 39 |     X0 = tf.unstack(x, nSample, 0)
 40 | 
 41 |     def concat(x, x_):
 42 |         _x = tf.expand_dims(x_, 0)
 43 |         return tf.concat([x, _x], axis=0)
 44 | 
 45 |     for l, x0 in zip(L1, X0):
 46 |         x_ = tf.expand_dims(x0, 0)
 47 |         if K > 1:
 48 |             x1 = tf.matmul(l, x0)
 49 |             x_ = concat(x_, x1)
 50 |         for k in range(2, K):
 51 |             x2 = 2 * tf.matmul(l, x1) - x0
 52 |             x_ = concat(x_, x2)
 53 |             x0, x1 = x1, x2
 54 |         x_l.append(x_)
 55 |     x_l = tf.reshape(x_l, [K, nNode, feat_in, nSample])
 56 |     x_l = tf.transpose(x_l, perm=[3, 1, 2, 0])
 57 |     x_l = tf.reshape(x_l, [nSample * nNode, feat_in * K])
 58 |     x_l = tf.matmul(x_l, W)  # No Bias term?? -> Yes[batch size*nNode,feat_out] [32*200,32]
 59 |     out = tf.reshape(x_l, [nSample, nNode, feat_out])
 60 |     return out
 61 | 
 62 | 
 63 | # gconvLSTM
 64 | _LSTMStateTuple = collections.namedtuple("LSTMStateTuple", ('c', 'h'))
 65 | 
 66 | 
 67 | class LSTMStateTuple(_LSTMStateTuple):
 68 |     __slots__ = ()
 69 | 
 70 |     @property
 71 |     def dtype(self):
 72 |         (c, h) = self
 73 |         if not c.dtype == h.dtype:
 74 |             raise TypeError("Inconsistent internal state")
 75 |         return c.dtype
 76 | 
 77 | 
 78 | class gcnRNNCell(RNNCell):
 79 |     def __init__(self, num_units, forget_bias=1.0, batch_size=None,
 80 |                  state_is_tuple=True, activation=None, reuse=None,
 81 |                  laplacian=None, lmax=None, K=None, feat_in=None, nNode=None):
 82 |         if tfversion == 'new':
 83 |             super(gcnRNNCell, self).__init__(_reuse=reuse)
 84 | 
 85 |         self._num_units = num_units
 86 |         self._forget_bias = forget_bias
 87 |         self._state_is_tuple = state_is_tuple
 88 |         self._activation = activation or tf.tanh
 89 |         self._laplacian = laplacian
 90 |         self._lmax = lmax
 91 |         self._K = K
 92 |         self._feat_in = feat_in
 93 |         self._nNode = nNode
 94 |         self._batch_size = batch_size
 95 | 
 96 |     @property
 97 |     def state_size(self):
 98 |         return (LSTMStateTuple((self._nNode, self._num_units), (self._nNode, self._num_units))
 99 |                 if self._state_is_tuple else 2 * self._num_units)
100 | 
101 |     @property
102 |     def output_size(self):
103 |         return self._num_units
104 | 
105 |     def zero_state(self, batch_size, dtype):
106 |         with tf.name_scope(type(self).__name__ + "myZeroState"):
107 |             zero_state_c = tf.zeros([self._batch_size, self._nNode, self._num_units], name='c')
108 |             zero_state_h = tf.zeros([self._batch_size, self._nNode, self._num_units], name='h')
109 |             return (zero_state_c, zero_state_h)
110 | 
111 |     def __call__(self, inputs, state, scope=None):
112 |         with tf.variable_scope(scope or type(self).__name__):
113 |             if self._state_is_tuple:
114 |                 c, h = state
115 |             else:
116 |                 c, h = tf.split(value=state, num_or_size_splits=2, axis=1)
117 | 
118 |             laplacian = self._laplacian
119 |             lmax = self._lmax
120 |             K = self._K
121 |             feat_in = self._feat_in
122 |             nNode = self._nNode
123 |             batch_size = self._batch_size
124 | 
125 |             if feat_in is None:
126 |                 # Take out the shape of input
127 |                 batch_size, nNode, feat_in = inputs.get_shape()
128 | 
129 |             feat_out = self._num_units
130 | 
131 |             if K is None:
132 |                 K = 2
133 | 
134 |             scope = tf.get_variable_scope()
135 |             with tf.variable_scope(scope) as scope:
136 |                 try:
137 |                     # Need four diff Wconv weight + for Hidden weight
138 |                     Wzxt = tf.get_variable("Wzxt", [K * feat_in, feat_out], dtype=tf.float32,
139 |                                            initializer=tf.random_uniform_initializer(minval=-0.1, maxval=0.1))
140 |                     Wixt = tf.get_variable("Wixt", [K * feat_in, feat_out], dtype=tf.float32,
141 |                                            initializer=tf.random_uniform_initializer(minval=-0.1, maxval=0.1))
142 |                     Wfxt = tf.get_variable("Wfxt", [K * feat_in, feat_out], dtype=tf.float32,
143 |                                            initializer=tf.random_uniform_initializer(minval=-0.1, maxval=0.1))
144 |                     Woxt = tf.get_variable("Woxt", [K * feat_in, feat_out], dtype=tf.float32,
145 |                                            initializer=tf.random_uniform_initializer(minval=-0.1, maxval=0.1))
146 | 
147 |                     Wzht = tf.get_variable("Wzht", [K * feat_out, feat_out], dtype=tf.float32,
148 |                                            initializer=tf.random_uniform_initializer(minval=-0.1, maxval=0.1))
149 |                     Wiht = tf.get_variable("Wiht", [K * feat_out, feat_out], dtype=tf.float32,
150 |                                            initializer=tf.random_uniform_initializer(minval=-0.1, maxval=0.1))
151 |                     Wfht = tf.get_variable("Wfht", [K * feat_out, feat_out], dtype=tf.float32,
152 |                                            initializer=tf.random_uniform_initializer(minval=-0.1, maxval=0.1))
153 |                     Woht = tf.get_variable("Woht", [K * feat_out, feat_out], dtype=tf.float32,
154 |                                            initializer=tf.random_uniform_initializer(minval=-0.1, maxval=0.1))
155 |                 except ValueError:
156 |                     scope.reuse_variables()
157 |                     Wzxt = tf.get_variable("Wzxt", [K * feat_in, feat_out], dtype=tf.float32,
158 |                                            initializer=tf.random_uniform_initializer(minval=-0.1, maxval=0.1))
159 |                     Wixt = tf.get_variable("Wixt", [K * feat_in, feat_out], dtype=tf.float32,
160 |                                            initializer=tf.random_uniform_initializer(minval=-0.1, maxval=0.1))
161 |                     Wfxt = tf.get_variable("Wfxt", [K * feat_in, feat_out], dtype=tf.float32,
162 |                                            initializer=tf.random_uniform_initializer(minval=-0.1, maxval=0.1))
163 |                     Woxt = tf.get_variable("Woxt", [K * feat_in, feat_out], dtype=tf.float32,
164 |                                            initializer=tf.random_uniform_initializer(minval=-0.1, maxval=0.1))
165 | 
166 |                     Wzht = tf.get_variable("Wzht", [K * feat_out, feat_out], dtype=tf.float32,
167 |                                            initializer=tf.random_uniform_initializer(minval=-0.1, maxval=0.1))
168 |                     Wiht = tf.get_variable("Wiht", [K * feat_out, feat_out], dtype=tf.float32,
169 |                                            initializer=tf.random_uniform_initializer(minval=-0.1, maxval=0.1))
170 |                     Wfht = tf.get_variable("Wfht", [K * feat_out, feat_out], dtype=tf.float32,
171 |                                            initializer=tf.random_uniform_initializer(minval=-0.1, maxval=0.1))
172 |                     Woht = tf.get_variable("Woht", [K * feat_out, feat_out], dtype=tf.float32,
173 |                                            initializer=tf.random_uniform_initializer(minval=-0.1, maxval=0.1))
174 | 
175 |                 bzt = tf.get_variable("bzt", [feat_out])
176 |                 bit = tf.get_variable("bit", [feat_out])
177 |                 bft = tf.get_variable("bft", [feat_out])
178 |                 bot = tf.get_variable("bot", [feat_out])
179 | 
180 |                 # gconv Calculation
181 |                 zxt = cheby_conv(inputs, laplacian, lmax, batch_size, nNode, feat_out, K, Wzxt)
182 | 
183 |                 zht = cheby_conv(h, laplacian, lmax, batch_size, nNode, feat_out, K, Wzht)
184 |                 zt = zxt + zht + bzt
185 |                 zt = tf.tanh(zt)
186 | 
187 |                 ixt = cheby_conv(inputs, laplacian, lmax, batch_size, nNode, feat_out, K, Wixt)
188 |                 iht = cheby_conv(h, laplacian, lmax, batch_size, nNode, feat_out, K, Wiht)
189 |                 it = ixt + iht + bit
190 |                 it = tf.sigmoid(it)
191 | 
192 |                 fxt = cheby_conv(inputs, laplacian, lmax, batch_size, nNode, feat_out, K, Wfxt)
193 |                 fht = cheby_conv(h, laplacian, lmax, batch_size, nNode, feat_out, K, Wfht)
194 |                 ft = fxt + fht + bft
195 |                 ft = tf.sigmoid(ft)
196 | 
197 |                 oxt = cheby_conv(inputs, laplacian, lmax, batch_size, nNode, feat_out, K, Woxt)
198 |                 oht = cheby_conv(h, laplacian, lmax, batch_size, nNode, feat_out, K, Woht)
199 |                 ot = oxt + oht + bot
200 |                 ot = tf.sigmoid(ot)
201 | 
202 |                 # c
203 |                 new_c = ft * c + it * zt
204 | 
205 |                 # h
206 |                 new_h = ot * tf.tanh(new_c)
207 | 
208 |                 if self._state_is_tuple:
209 |                     new_state = LSTMStateTuple(new_c, new_h)
210 |                 else:
211 |                     new_state = tf.concat([new_c, new_h], 1)
212 |                 return new_h, new_state
213 | 
214 | 
215 | class Model(object):
216 |     """
217 |     Defined:
218 |         Placeholder
219 |         Model architecture
220 |         Train / Test function
221 |     """
222 | 
223 |     def __init__(self, config, n_node, sess):
224 |         self.batch_size = config.batch_size  # bach size
225 |         self.feat_in = config.feat_in  # number of feature
226 |         self.feat_out = config.feat_out  # number of output feature
227 |         self.num_nodes = config.num_nodes  # each sampel has num_nodes
228 |         self.lmax = config.lmax
229 |         self.sess = sess
230 |         if config.activation == "tanh":
231 |             self.activation = tf.tanh
232 |         else:
233 |             self.activation = tf.nn.relu
234 |         self.max_grad_norm = config.max_grad_norm
235 |         self.num_hidden = config.num_hidden  # rnn hidden layer
236 |         self.num_kernel = config.num_kernel  # chebshevy K
237 |         self.learning_rate = config.learning_rate
238 |         self.n_time_interval = config.n_time_interval
239 |         self.n_steps = config.n_steps  # number of steps
240 |         self.n_hidden_dense1 = config.n_hidden_dense1
241 |         self.n_hidden_dense2 = config.n_hidden_dense2
242 |         self.scale1 = config.l1
243 |         self.scale2 = config.l2
244 |         self.scale = config.l1l2
245 |         self.n_nodes = n_node
246 |         self.initializer = tf.random_normal_initializer(stddev=config.stddev)
247 |         self.initializer2 = tf.random_uniform_initializer(minval=0, maxval=1, dtype=tf.float32)
248 |         self.regularizer = regularizers.l1_l2_regularizer(self.scale1, self.scale2)
249 |         self.regularizer_1 = regularizers.l1_regularizer(self.scale1)
250 |         self.regularizer_2 = regularizers.l2_regularizer(self.scale2)
251 |         self.model_step = tf.Variable(0, name='model_step', trainable=False)
252 |         self._build_placeholders()
253 |         self._build_var()
254 |         self.pred = self._build_model()
255 |         truth = self.y  # [32,1]
256 | 
257 |         # # Define loss and optimizer
258 |         cost = tf.reduce_mean(tf.pow(self.pred - truth, 2)) + self.scale * tf.add_n(
259 |             [self.regularizer(var) for var in tf.trainable_variables()])
260 | 
261 |         error = tf.reduce_mean(tf.pow(self.pred - truth, 2))
262 |         tf.summary.scalar("error", error)
263 | 
264 |         var_list = tf.trainable_variables()
265 | 
266 |         opt1 = tf.train.AdamOptimizer(learning_rate=self.learning_rate)
267 |         grads = tf.gradients(cost, var_list)
268 |         grads_c = [tf.clip_by_norm(grad, self.max_grad_norm) for grad in grads]  
269 | 
270 |         train_op = opt1.apply_gradients(zip(grads_c, var_list), global_step=self.model_step, name='train_op')
271 | 
272 |         self.loss = cost
273 |         self.error = error
274 | 
275 |         self.train_op = train_op
276 |         init_op = tf.global_variables_initializer()
277 |         self.sess.run(init_op)
278 | 
279 |     def _build_placeholders(self):
280 | 
281 | 
282 |         self.x = tf.placeholder(tf.float32, shape=[self.batch_size, self.n_steps, self.num_nodes, self.num_nodes],
283 |                                 name="x")
284 | 
285 |         self.laplacian = tf.placeholder(tf.float32, shape=[self.batch_size, self.num_nodes, self.num_nodes],
286 |                                         name="laplacian")
287 | 
288 |         self.y = tf.placeholder(tf.float32, shape=[self.batch_size, 1], name="y")
289 |         self.time_interval_index = tf.placeholder(tf.float32,
290 |                                                   shape=[self.batch_size, self.n_steps, self.n_time_interval],
291 |                                                   name="time")
292 |         self.rnn_index = tf.placeholder(tf.float32, shape=[self.batch_size, self.n_steps],
293 |                                         name="rnn_index")
294 | 
295 |     def _build_var(self, reuse=None):
296 |         with tf.variable_scope('dense'):
297 |             self.weights = {
298 |                 'dense1': tf.get_variable('dense1_weight', initializer=self.initializer([self.num_hidden,
299 |                                                                                          self.n_hidden_dense1])),
300 |                 'dense2': tf.get_variable('dense2_weight', initializer=self.initializer([self.n_hidden_dense1,
301 |                                                                                          self.n_hidden_dense2])),
302 |                 'out': tf.get_variable('out_weight', initializer=self.initializer([self.n_hidden_dense2, 1]))
303 |             }
304 |             self.biases = {
305 |                 'dense1': tf.get_variable('dense1_bias', initializer=self.initializer([self.n_hidden_dense1])),
306 |                 'dense2': tf.get_variable('dense2_bias', initializer=self.initializer([self.n_hidden_dense2])),
307 |                 'out': tf.get_variable('out_bias', initializer=self.initializer([1]))
308 |             }
309 |         with tf.variable_scope('time_decay'):
310 |             self.time_weight = tf.get_variable('time_weight', initializer=self.initializer([self.n_time_interval]),
311 |                                                dtype=tf.float32)
312 | 
313 |     def _build_model(self, reuse=None):
314 | 
315 |         with tf.variable_scope('gconv_model', reuse=reuse) as sc:
316 |             cell = gcnRNNCell(num_units=self.num_hidden, forget_bias=1.0,
317 |                               laplacian=self.laplacian, lmax=self.lmax,
318 |                               feat_in=self.feat_in, K=self.num_kernel,
319 |                               nNode=self.num_nodes, batch_size=self.batch_size)
320 | 
321 |             x_vector = tf.unstack(self.x, self.n_steps, 1)
322 | 
323 |             outputs, states = tf.contrib.rnn.static_rnn(
324 |                 cell,
325 |                 x_vector,
326 |                 dtype=tf.float32,
327 |             )
328 | 
329 |             hidden_states = tf.transpose(tf.stack(outputs), [1, 0, 2, 3])
330 | 
331 |             hidden_states = tf.reduce_sum(hidden_states, axis=2)
332 | 
333 |             rnn_index = tf.reshape(self.rnn_index, [-1, 1])
334 | 
335 |             hidden_states = tf.reshape(hidden_states, [-1, self.num_hidden])
336 |             hidden_states = tf.multiply(rnn_index, hidden_states)
337 | 
338 |         with tf.variable_scope('time_decay'):
339 | 
340 |             time_weight = tf.reshape(self.time_weight, [-1, 1])
341 | 
342 |             time_interval_index = tf.reshape(self.time_interval_index, [-1, 6])
343 | 
344 |             time_weight = tf.matmul(time_interval_index, time_weight)
345 | 
346 |             hidden_states = tf.multiply(time_weight, hidden_states)
347 | 
348 |             hidden_states = tf.reshape(hidden_states, [-1, self.n_steps, self.num_hidden])
349 | 
350 |             hidden_graph = tf.reduce_sum(hidden_states, reduction_indices=[1])
351 | 
352 |             self.hidden_graph = hidden_graph
353 | 
354 |         with tf.variable_scope('dense'):
355 |             dense1 = self.activation(tf.add(tf.matmul(hidden_graph, self.weights['dense1']), self.biases['dense1']))
356 |             dense2 = self.activation(tf.add(tf.matmul(dense1, self.weights['dense2']), self.biases['dense2']))
357 |             pred = self.activation(tf.add(tf.matmul(dense2, self.weights['out']), self.biases['out']))
358 |         return pred
359 | 
360 |     def train_batch(self, x, L, y, time_interval_index, rnn_index):
361 |         _, time_weight = self.sess.run([self.train_op, self.time_weight],
362 |                                        feed_dict={
363 |                                            self.x: x,
364 |                                            self.laplacian: L,
365 |                                            self.y: y,
366 |                                            self.time_interval_index: time_interval_index,
367 |                                            self.rnn_index: rnn_index})
368 |         return time_weight
369 | 
370 |     def get_error(self, x, L, y, time_interval_index, rnn_index):
371 |         return self.sess.run(self.error, feed_dict={
372 |             self.x: x,
373 |             self.laplacian: L,
374 |             self.y: y,
375 |             self.time_interval_index: time_interval_index,
376 |             self.rnn_index: rnn_index})
377 | 
378 |     def predict(self, x, L, y, time_interval_index, rnn_index):
379 |         return self.sess.run(self.pred, feed_dict={
380 |             self.x: x,
381 |             self.laplacian: L,
382 |             self.y: y,
383 |             self.time_interval_index: time_interval_index,
384 |             self.rnn_index: rnn_index})
385 | 


--------------------------------------------------------------------------------