├── History └── tem.his ├── Models └── checkpoint ├── Datasets ├── CHI_crime │ ├── trn.pkl │ ├── tst.pkl │ └── val.pkl └── NYC_crime │ ├── trn.pkl │ ├── tst.pkl │ └── val.pkl ├── Utils ├── TimeLogger.py └── NNLayers.py ├── Params.py ├── DataHandler.py └── HG_ST_labcode.py /History/tem.his: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/akaxlh/ST-SHN/HEAD/History/tem.his -------------------------------------------------------------------------------- /Models/checkpoint: -------------------------------------------------------------------------------- 1 | model_checkpoint_path: "tem" 2 | all_model_checkpoint_paths: "tem" 3 | -------------------------------------------------------------------------------- /Datasets/CHI_crime/trn.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/akaxlh/ST-SHN/HEAD/Datasets/CHI_crime/trn.pkl -------------------------------------------------------------------------------- /Datasets/CHI_crime/tst.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/akaxlh/ST-SHN/HEAD/Datasets/CHI_crime/tst.pkl -------------------------------------------------------------------------------- /Datasets/CHI_crime/val.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/akaxlh/ST-SHN/HEAD/Datasets/CHI_crime/val.pkl -------------------------------------------------------------------------------- /Datasets/NYC_crime/trn.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/akaxlh/ST-SHN/HEAD/Datasets/NYC_crime/trn.pkl -------------------------------------------------------------------------------- /Datasets/NYC_crime/tst.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/akaxlh/ST-SHN/HEAD/Datasets/NYC_crime/tst.pkl -------------------------------------------------------------------------------- /Datasets/NYC_crime/val.pkl: -------------------------------------------------------------------------------- https://raw.githubusercontent.com/akaxlh/ST-SHN/HEAD/Datasets/NYC_crime/val.pkl -------------------------------------------------------------------------------- /Utils/TimeLogger.py: -------------------------------------------------------------------------------- 1 | import datetime 2 | 3 | logmsg = '' 4 | timemark = dict() 5 | saveDefault = False 6 | def log(msg, save=None, oneline=False): 7 | global logmsg 8 | global saveDefault 9 | time = datetime.datetime.now() 10 | tem = '%s: %s' % (time, msg) 11 | if save != None: 12 | if save: 13 | logmsg += tem + '\n' 14 | elif saveDefault: 15 | logmsg += tem + '\n' 16 | if oneline: 17 | print(tem, end='\r') 18 | else: 19 | print(tem) 20 | 21 | def marktime(marker): 22 | global timemark 23 | timemark[marker] = datetime.datetime.now() 24 | 25 | def SpentTime(marker): 26 | global timemark 27 | if marker not in timemark: 28 | msg = 'LOGGER ERROR, marker', marker, ' not found' 29 | tem = '%s: %s' % (time, msg) 30 | print(tem) 31 | return False 32 | return datetime.datetime.now() - timemark[marker] 33 | 34 | def SpentTooLong(marker, day=0, hour=0, minute=0, second=0): 35 | global timemark 36 | if marker not in timemark: 37 | msg = 'LOGGER ERROR, marker', marker, ' not found' 38 | tem = '%s: %s' % (time, msg) 39 | print(tem) 40 | return False 41 | return datetime.datetime.now() - timemark[marker] >= datetime.timedelta(days=day, hours=hour, minutes=minute, seconds=second) 42 | 43 | if __name__ == '__main__': 44 | log('') -------------------------------------------------------------------------------- /Params.py: -------------------------------------------------------------------------------- 1 | import argparse 2 | 3 | def parse_args(): 4 | parser = argparse.ArgumentParser(description='Model Params') 5 | parser.add_argument('--lr', default=1e-3, type=float, help='learning rate') 6 | parser.add_argument('--batch', default=1, type=int, help='batch size') 7 | parser.add_argument('--reg', default=0, type=float, help='weight decay regularizer') 8 | parser.add_argument('--spreg', default=0, type=float, help='weight decay regularizer') 9 | parser.add_argument('--epoch', default=10, type=int, help='number of epochs') 10 | parser.add_argument('--decay', default=0.96, type=float, help='weight decay rate') 11 | parser.add_argument('--save_path', default='tem', help='file name to save model and training record') 12 | parser.add_argument('--load_model', default=None, help='model name to load') 13 | parser.add_argument('--latdim', default=16, type=int, help='embedding size') 14 | parser.add_argument('--spacialRange', default=2, type=int, help='number of hops for spacial message propagation') 15 | parser.add_argument('--temporalRange', default=30, type=int, help='number of hops for temporal features') 16 | parser.add_argument('--temporalGnnRange', default=7, type=int, help='number of gnn iterations for temporal message propagation') 17 | parser.add_argument('--data', default='NYC', type=str, help='name of dataset') 18 | parser.add_argument('--tstEpoch', default=1, type=int, help='number of epoch to test while training') 19 | parser.add_argument('--head', default=4, type=int, help='number of attention head') 20 | parser.add_argument('--negRate', default=4, type=int, help='rate of neg v.s. pos samples while training') 21 | parser.add_argument('--border', default=0.5, type=float, help='border line for pos and neg predictions') 22 | parser.add_argument('--hyperNum', default=128, type=int, help='number of hyper edges') 23 | parser.add_argument('--dropRate', default=0.0, type=float, help='drop rate for dropout') 24 | parser.add_argument('--task', default='c', type=str, help='classification or regression') 25 | return parser.parse_args() 26 | args = parse_args() 27 | -------------------------------------------------------------------------------- /DataHandler.py: -------------------------------------------------------------------------------- 1 | import pickle 2 | import numpy as np 3 | from scipy.sparse import csr_matrix 4 | from Params import args 5 | import scipy.sparse as sp 6 | from Utils.TimeLogger import log 7 | 8 | class DataHandler: 9 | def __init__(self): 10 | if args.data == 'NYC': 11 | predir = 'Datasets/NYC_crime/' 12 | elif args.data == 'CHI': 13 | predir = 'Datasets/CHI_crime/' 14 | elif args.data == 'NYC_1kilo': 15 | predir = 'Datasets/NYC_crime/1kilo/' 16 | self.predir = predir 17 | with open(predir + 'trn.pkl', 'rb') as fs: 18 | trnT = pickle.load(fs) 19 | with open(predir + 'val.pkl', 'rb') as fs: 20 | valT = pickle.load(fs) 21 | with open(predir + 'tst.pkl', 'rb') as fs: 22 | tstT = pickle.load(fs) 23 | args.row, args.col, _, args.offNum = trnT.shape 24 | args.areaNum = args.row * args.col 25 | args.trnDays = trnT.shape[2] 26 | args.valDays = valT.shape[2] 27 | args.tstDays = tstT.shape[2] 28 | args.decay_step = args.trnDays//args.batch 29 | self.mean = np.mean(trnT) 30 | self.std = np.std(trnT) 31 | rspFunc = (lambda tensor: np.reshape(tensor, [args.areaNum, -1, args.offNum])) 32 | self.trnT = rspFunc(trnT)# row*col, days, offNum 33 | self.valT = rspFunc(valT) 34 | self.tstT = rspFunc(tstT) 35 | 36 | self.constructGraph() 37 | self.getTestAreas() 38 | print('Row:', args.row, ', Col:', args.col) 39 | print('Sparsity:', np.sum(trnT!=0) / np.reshape(trnT, [-1]).shape[0]) 40 | 41 | @classmethod 42 | def idEncode(cls, x, y): 43 | return x * args.col + y 44 | 45 | @classmethod 46 | def idDecode(cls, node): 47 | return node // args.col, node % args.col 48 | 49 | def zScore(self, data): 50 | # return np.log2(data + 1) 51 | return (data - self.mean) / self.std 52 | 53 | def zInverse(self, data): 54 | return data * self.std + self.mean 55 | 56 | def constructGraph(self): 57 | mx = [-1, 0, 1, 0, -1, -1, 1, 1, 0] 58 | my = [0, -1, 0, 1, -1, 1, -1, 1, 0] 59 | def illegal(x, y): 60 | return x < 0 or y < 0 or x >= args.row or y >= args.col 61 | edges = list() 62 | for i in range(args.row): 63 | for j in range(args.col): 64 | n1 = self.idEncode(i, j) 65 | for k in range(len(mx)): 66 | temx = i + mx[k] 67 | temy = j + my[k] 68 | if illegal(temx, temy): 69 | continue 70 | n2 = self.idEncode(temx, temy) 71 | edges.append([n1, n2]) 72 | edges.sort(key=lambda x: x[0]*1e5+x[1]) # 1e5 should be bigger than the number of areas 73 | rowTot, colTot = [[0] * args.areaNum for i in range(2)] 74 | for e in range(len(edges)): 75 | rowTot[edges[e][0]] += 1 76 | colTot[edges[e][1]] += 1 77 | vals = np.ones(len(edges)) 78 | for e in range(len(vals)): 79 | vals[e] /= np.sqrt(rowTot[edges[e][0]] * colTot[edges[e][1]]) 80 | edges = np.array(edges) 81 | self.rows = edges[:, 0] 82 | self.cols = edges[:, 1] 83 | self.vals = vals 84 | 85 | def getTestAreas(self): 86 | posTimes = np.sum(1 * (self.trnT!=0), axis=1) 87 | percent = posTimes / args.trnDays 88 | self.tstLocs = (percent > 0.2) * (percent < 0.8) * 1 89 | print('Negative/Positive Rate', args.negRate) 90 | print('Number of locations to test', np.sum(self.tstLocs), 'out or', self.trnT.shape[0]) 91 | valRes = np.sum(np.sum(self.valT==0, axis=1) * self.tstLocs) / (np.sum(self.tstLocs) * args.valDays) 92 | tstRes = np.sum(np.sum(self.tstT==0, axis=1) * self.tstLocs) / (np.sum(self.tstLocs) * args.tstDays) 93 | print('Val Trivial Acc', valRes) 94 | print('Tst Trivial Acc', tstRes) 95 | -------------------------------------------------------------------------------- /Utils/NNLayers.py: -------------------------------------------------------------------------------- 1 | import tensorflow as tf 2 | from tensorflow.contrib.layers import xavier_initializer 3 | import numpy as np 4 | 5 | paramId = 0 6 | biasDefault = False 7 | params = {} 8 | regParams = {} 9 | ita = 0.2 10 | leaky = 0.1 11 | 12 | def getParamId(): 13 | global paramId 14 | paramId += 1 15 | return paramId 16 | 17 | def setIta(ITA): 18 | ita = ITA 19 | 20 | def setBiasDefault(val): 21 | global biasDefault 22 | biasDefault = val 23 | 24 | def getParam(name): 25 | return params[name] 26 | 27 | def addReg(name, param): 28 | global regParams 29 | if name not in regParams: 30 | regParams[name] = param 31 | else: 32 | print('ERROR: Parameter already exists') 33 | 34 | def addParam(name, param): 35 | global params 36 | if name not in params: 37 | params[name] = param 38 | 39 | def defineRandomNameParam(shape, dtype=tf.float32, reg=False, initializer='xavier', trainable=True): 40 | name = 'defaultParamName%d'%getParamId() 41 | return defineParam(name, shape, dtype, reg, initializer, trainable) 42 | 43 | def defineParam(name, shape, dtype=tf.float32, reg=False, initializer='xavier', trainable=True): 44 | global params 45 | global regParams 46 | assert name not in params, 'name %s already exists' % name 47 | if initializer == 'xavier': 48 | ret = tf.get_variable(name=name, dtype=dtype, shape=shape, 49 | initializer=xavier_initializer(dtype=tf.float32), 50 | trainable=trainable) 51 | elif initializer == 'trunc_normal': 52 | ret = tf.get_variable(name=name, initializer=tf.random.truncated_normal(shape=[int(shape[0]), shape[1]], mean=0.0, stddev=0.03, dtype=dtype)) 53 | elif initializer == 'zeros': 54 | ret = tf.get_variable(name=name, dtype=dtype, 55 | initializer=tf.zeros(shape=shape, dtype=tf.float32), 56 | trainable=trainable) 57 | elif initializer == 'ones': 58 | ret = tf.get_variable(name=name, dtype=dtype, initializer=tf.ones(shape=shape, dtype=tf.float32), trainable=trainable) 59 | elif not isinstance(initializer, str): 60 | ret = tf.get_variable(name=name, dtype=dtype, 61 | initializer=initializer, trainable=trainable) 62 | else: 63 | print('ERROR: Unrecognized initializer') 64 | exit() 65 | params[name] = ret 66 | if reg: 67 | regParams[name] = ret 68 | return ret 69 | 70 | def getOrDefineParam(name, shape, dtype=tf.float32, reg=False, initializer='xavier', trainable=True, reuse=False): 71 | global params 72 | global regParams 73 | if name in params: 74 | assert reuse, 'Reusing Param %s Not Specified' % name 75 | if reg and name not in regParams: 76 | regParams[name] = params[name] 77 | return params[name] 78 | return defineParam(name, shape, dtype, reg, initializer, trainable) 79 | 80 | def BN(inp, name=None): 81 | global ita 82 | dim = inp.get_shape()[1] 83 | name = 'defaultParamName%d'%getParamId() 84 | scale = tf.Variable(tf.ones([dim])) 85 | shift = tf.Variable(tf.zeros([dim])) 86 | fcMean, fcVar = tf.nn.moments(inp, axes=[0]) 87 | ema = tf.train.ExponentialMovingAverage(decay=0.5) 88 | emaApplyOp = ema.apply([fcMean, fcVar]) 89 | with tf.control_dependencies([emaApplyOp]): 90 | mean = tf.identity(fcMean) 91 | var = tf.identity(fcVar) 92 | ret = tf.nn.batch_normalization(inp, mean, var, shift, 93 | scale, 1e-8) 94 | return ret 95 | 96 | def FC(inp, outDim, name=None, useBias=False, activation=None, reg=False, useBN=False, dropout=None, initializer='xavier', reuse=False): 97 | global params 98 | global regParams 99 | global leaky 100 | inDim = inp.get_shape()[1] 101 | temName = name if name!=None else 'defaultParamName%d'%getParamId() 102 | W = getOrDefineParam(temName, [inDim, outDim], reg=reg, initializer=initializer, reuse=reuse) 103 | if dropout != None: 104 | ret = tf.nn.dropout(inp, rate=dropout) @ W 105 | else: 106 | ret = inp @ W 107 | if useBias: 108 | ret = Bias(ret, name=name, reuse=reuse) 109 | if useBN: 110 | ret = BN(ret) 111 | if activation != None: 112 | ret = Activate(ret, activation) 113 | return ret 114 | 115 | def Bias(data, name=None, reg=False, reuse=False): 116 | inDim = data.get_shape()[-1] 117 | temName = name if name!=None else 'defaultParamName%d'%getParamId() 118 | temBiasName = temName + 'Bias' 119 | bias = getOrDefineParam(temBiasName, inDim, reg=False, initializer='zeros', reuse=reuse) 120 | if reg: 121 | regParams[temBiasName] = bias 122 | return data + bias 123 | 124 | def ActivateHelp(data, method): 125 | if method == 'relu': 126 | ret = tf.nn.relu(data) 127 | elif method == 'sigmoid': 128 | ret = tf.nn.sigmoid(data) 129 | elif method == 'tanh': 130 | ret = tf.nn.tanh(data) 131 | elif method == 'softmax': 132 | ret = tf.nn.softmax(data, axis=-1) 133 | elif method == 'leakyRelu': 134 | ret = tf.maximum(leaky*data, data) 135 | elif method == 'twoWayLeakyRelu6': 136 | temMask = tf.to_float(tf.greater(data, 6.0)) 137 | ret = temMask * (6 + leaky * (data - 6)) + (1 - temMask) * tf.maximum(leaky * data, data) 138 | elif method == '-1relu': 139 | ret = tf.maximum(-1.0, data) 140 | elif method == 'relu6': 141 | ret = tf.maximum(0.0, tf.minimum(6.0, data)) 142 | elif method == 'relu3': 143 | ret = tf.maximum(0.0, tf.minimum(3.0, data)) 144 | else: 145 | raise Exception('Error Activation Function') 146 | return ret 147 | 148 | def Activate(data, method, useBN=False): 149 | global leaky 150 | if useBN: 151 | ret = BN(data) 152 | else: 153 | ret = data 154 | ret = ActivateHelp(ret, method) 155 | return ret 156 | 157 | def Regularize(names=None, method='L2'): 158 | ret = 0 159 | if method == 'L1': 160 | if names != None: 161 | for name in names: 162 | ret += tf.reduce_sum(tf.abs(getParam(name))) 163 | else: 164 | for name in regParams: 165 | ret += tf.reduce_sum(tf.abs(regParams[name])) 166 | elif method == 'L2': 167 | if names != None: 168 | for name in names: 169 | ret += tf.reduce_sum(tf.square(getParam(name))) 170 | else: 171 | for name in regParams: 172 | ret += tf.reduce_sum(tf.square(regParams[name])) 173 | return ret 174 | 175 | def Dropout(data, rate): 176 | if rate == None: 177 | return data 178 | else: 179 | return tf.nn.dropout(data, rate=rate) 180 | 181 | def selfAttention(localReps, number, inpDim, numHeads): 182 | Q = defineRandomNameParam([inpDim, inpDim], reg=True) 183 | K = defineRandomNameParam([inpDim, inpDim], reg=True) 184 | V = defineRandomNameParam([inpDim, inpDim], reg=True) 185 | rspReps = tf.reshape(tf.stack(localReps, axis=1), [-1, inpDim]) 186 | q = tf.reshape(rspReps @ Q, [-1, number, 1, numHeads, inpDim//numHeads]) 187 | k = tf.reshape(rspReps @ K, [-1, 1, number, numHeads, inpDim//numHeads]) 188 | v = tf.reshape(rspReps @ V, [-1, 1, number, numHeads, inpDim//numHeads]) 189 | att = tf.nn.softmax(tf.reduce_sum(q * k, axis=-1, keepdims=True) / tf.sqrt(inpDim/numHeads), axis=2) 190 | attval = tf.reshape(tf.reduce_sum(att * v, axis=2), [-1, number, inpDim]) 191 | rets = [None] * number 192 | paramId = 'dfltP%d' % getParamId() 193 | for i in range(number): 194 | tem1 = tf.reshape(tf.slice(attval, [0, i, 0], [-1, 1, -1]), [-1, inpDim]) 195 | # tem2 = FC(tem1, inpDim, useBias=True, name=paramId+'_1', reg=True, activation='relu', reuse=True) + localReps[i] 196 | rets[i] = tem1 + localReps[i] 197 | return rets 198 | 199 | def lightSelfAttention(localReps, number, inpDim, numHeads): 200 | Q = defineRandomNameParam([inpDim, inpDim], reg=True) 201 | rspReps = tf.reshape(tf.stack(localReps, axis=1), [-1, inpDim]) 202 | tem = rspReps @ Q 203 | q = tf.reshape(tem, [-1, number, 1, numHeads, inpDim//numHeads]) 204 | k = tf.reshape(tem, [-1, 1, number, numHeads, inpDim//numHeads]) 205 | v = tf.reshape(rspReps, [-1, 1, number, numHeads, inpDim//numHeads]) 206 | # att = tf.nn.softmax(tf.reduce_sum(q * k, axis=-1, keepdims=True) * tf.sqrt(inpDim/numHeads), axis=2) 207 | att = tf.nn.softmax(tf.reduce_sum(q * k, axis=-1, keepdims=True) / tf.sqrt(inpDim/numHeads), axis=2) 208 | attval = tf.reshape(tf.reduce_sum(att * v, axis=2), [-1, number, inpDim]) 209 | rets = [None] * number 210 | paramId = 'dfltP%d' % getParamId() 211 | for i in range(number): 212 | tem1 = tf.reshape(tf.slice(attval, [0, i, 0], [-1, 1, -1]), [-1, inpDim]) 213 | # tem2 = FC(tem1, inpDim, useBias=True, name=paramId+'_1', reg=True, activation='relu', reuse=True) + localReps[i] 214 | rets[i] = tem1 + localReps[i] 215 | return rets#, tf.squeeze(att) -------------------------------------------------------------------------------- /HG_ST_labcode.py: -------------------------------------------------------------------------------- 1 | import os 2 | os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' 3 | import numpy as np 4 | from Params import args 5 | import Utils.TimeLogger as logger 6 | from Utils.TimeLogger import log 7 | import Utils.NNLayers as NNs 8 | from Utils.NNLayers import FC, Regularize, Activate, Bias, defineParam, defineRandomNameParam 9 | from DataHandler import DataHandler 10 | import tensorflow as tf 11 | from tensorflow.core.protobuf import config_pb2 12 | import pickle 13 | 14 | class Model: 15 | def __init__(self, sess, handler): 16 | self.sess = sess 17 | self.handler = handler 18 | 19 | self.metrics = dict() 20 | mets = ['preLoss', 'microF1', 'macroF1'] 21 | for i in range(args.offNum): 22 | mets.append('F1_%d' % i) 23 | for met in mets: 24 | self.metrics['Train' + met] = list() 25 | self.metrics['Test' + met] = list() 26 | 27 | def makePrint(self, name, ep, reses, save): 28 | ret = 'Epoch %d/%d, %s: ' % (ep, args.epoch, name) 29 | for metric in reses: 30 | val = reses[metric] 31 | ret += '%s = %.4f, ' % (metric, val) 32 | tem = name + metric 33 | if save and tem in self.metrics: 34 | self.metrics[tem].append(val) 35 | ret = ret[:-2] + ' ' 36 | return ret 37 | 38 | def run(self): 39 | self.prepareModel() 40 | log('Model Prepared') 41 | if args.load_model != None: 42 | self.loadModel() 43 | stloc = len(self.metrics['TrainpreLoss']) * args.tstEpoch 44 | else: 45 | stloc = 0 46 | init = tf.global_variables_initializer() 47 | self.sess.run(init) 48 | log('Variables Inited') 49 | bestRes = None 50 | for ep in range(stloc, args.epoch): 51 | test = (ep % args.tstEpoch == 0) 52 | reses = self.trainEpoch() 53 | log(self.makePrint('Train', ep, reses, test)) 54 | if test: 55 | reses = self.testEpoch(self.handler.tstT, np.concatenate([self.handler.trnT, self.handler.valT], axis=1)) 56 | if bestRes is None or args.task == 'r' and bestRes['MAPE'] > reses['MAPE'] or args.task == 'c' and bestRes['macroF1'] > reses['macroF1']: 57 | bestRes = reses 58 | if ep % args.tstEpoch == 0: 59 | self.saveHistory() 60 | print() 61 | reses = self.testEpoch(self.handler.tstT, np.concatenate([self.handler.trnT, self.handler.valT], axis=1)) 62 | log(self.makePrint('Test', args.epoch, reses, True)) 63 | if bestRes is None or args.task == 'r' and bestRes['MAPE'] > reses['MAPE'] or args.task == 'c' and bestRes['macroF1'] > reses['macroF1']: 64 | bestRes = reses 65 | log(self.makePrint('Best', args.epoch, bestRes, True)) 66 | self.saveHistory() 67 | 68 | def spacialModeling(self, rows, cols, vals, embeds): 69 | # edge, time, offense, latdim 70 | rowEmbeds = tf.nn.embedding_lookup(embeds, rows) 71 | colEmbeds = tf.nn.embedding_lookup(embeds, cols) 72 | Q = defineRandomNameParam([args.latdim, args.latdim], reg=False) 73 | K = defineRandomNameParam([args.latdim, args.latdim], reg=False) 74 | V = defineRandomNameParam([args.latdim, args.latdim], reg=False) 75 | q = tf.reshape(tf.einsum('etod,dl->etol', rowEmbeds, Q), [-1, args.temporalRange, args.offNum, 1, args.head, args.latdim//args.head]) 76 | k = tf.reshape(tf.einsum('etod,dl->etol', colEmbeds, K), [-1, args.temporalRange, 1, args.offNum, args.head, args.latdim//args.head]) 77 | v = tf.reshape(tf.einsum('etod,dl->etol', colEmbeds, V), [-1, args.temporalRange, 1, args.offNum, args.head, args.latdim//args.head]) 78 | att = tf.nn.softmax(tf.reduce_sum(q * k, axis=-1, keep_dims=True) / tf.sqrt(float(args.latdim//args.head)), axis=3) 79 | attV = tf.reshape(tf.reduce_sum(att * v, axis=3), [-1, args.temporalRange, args.offNum, args.latdim]) 80 | ret = tf.math.segment_sum(attV * tf.nn.dropout(vals, rate=self.dropRate), rows) 81 | return Activate(ret, 'leakyRelu') # area, time, offense, latdim 82 | 83 | def temporalModeling(self, rows, cols, vals, embeds): 84 | prevTEmbeds = tf.slice(embeds, [0, 0, 0, 0], [-1, args.temporalRange-1, -1, -1]) 85 | nextTEmbeds = tf.slice(embeds, [0, 1, 0, 0], [-1, args.temporalRange-1, -1, -1]) 86 | rowEmbeds = tf.nn.embedding_lookup(nextTEmbeds, rows) 87 | colEmbeds = tf.nn.embedding_lookup(prevTEmbeds, cols) 88 | Q = defineRandomNameParam([args.latdim, args.latdim], reg=False) 89 | K = defineRandomNameParam([args.latdim, args.latdim], reg=False) 90 | V = defineRandomNameParam([args.latdim, args.latdim], reg=False) 91 | q = tf.reshape(tf.einsum('etod,dl->etol', rowEmbeds, Q), [-1, args.temporalRange-1, args.offNum, 1, args.head, args.latdim//args.head]) 92 | k = tf.reshape(tf.einsum('etod,dl->etol', colEmbeds, K), [-1, args.temporalRange-1, 1, args.offNum, args.head, args.latdim//args.head]) 93 | v = tf.reshape(tf.einsum('etod,dl->etol', colEmbeds, V), [-1, args.temporalRange-1, 1, args.offNum, args.head, args.latdim//args.head]) 94 | att = tf.nn.softmax(tf.reduce_sum(q * k, axis=-1, keep_dims=True) / tf.sqrt(float(args.latdim//args.head)), axis=3) 95 | attV = tf.reshape(tf.reduce_sum(att * v, axis=3), [-1, args.temporalRange-1, args.offNum, args.latdim]) 96 | ret = tf.math.segment_sum(attV * tf.nn.dropout(vals, rate=self.dropRate), rows) 97 | ret = tf.concat([tf.slice(embeds, [0, 0, 0, 0], [-1, 1, -1, -1]), ret], axis=1) 98 | return Activate(ret, 'leakyRelu') # area, time, offense, latdim 99 | 100 | def hyperGNN(self, adj, embeds): 101 | tpadj = tf.transpose(adj) 102 | hyperEmbeds = Activate(tf.einsum('hn,ntod->htod', tf.nn.dropout(adj, rate=self.dropRate), embeds), 'leakyRelu') 103 | retEmbeds = Activate(tf.einsum('nh,htod->ntod', tf.nn.dropout(tpadj, rate=self.dropRate), hyperEmbeds), 'leakyRelu') 104 | return retEmbeds 105 | 106 | def ours(self): 107 | offenseEmbeds = defineParam('offenseEmbeds', [1, 1, args.offNum, args.latdim], reg=False) 108 | initialEmbeds = offenseEmbeds * tf.expand_dims(self.feats, axis=-1) # area, time, offense, latdim 109 | areaEmbeds = defineParam('areaEmbeds', [args.areaNum, 1, 1, args.latdim], reg=False) 110 | embeds = [initialEmbeds]# + areaEmbeds] 111 | for l in range(args.spacialRange): 112 | embed = embeds[-1] 113 | embed = self.spacialModeling(self.rows, self.cols, self.vals, embed) 114 | embed = self.hyperGNN(self.hyperAdj, embed) + embed 115 | embeds.append(embed) 116 | embed = tf.add_n(embeds) / args.spacialRange 117 | embeds = [embed] 118 | for l in range(args.temporalGnnRange): 119 | embeds.append(self.temporalModeling(self.rows, self.cols, self.vals, embeds[-1])) 120 | embed = tf.add_n(embeds) / args.temporalGnnRange 121 | embed = tf.reduce_mean(embed, axis=1) # area, offense, latdim 122 | W = defineParam('predEmbeds', [1, args.offNum, args.latdim], reg=False) 123 | if args.task == 'c': 124 | allPreds = Activate(tf.reduce_sum(embed * W, axis=-1), 'sigmoid') # area, offense 125 | elif args.task == 'r': 126 | allPreds = tf.reduce_sum(embed * W, axis=-1) 127 | return allPreds, embed 128 | 129 | def prepareModel(self): 130 | self.rows = tf.constant(self.handler.rows) 131 | self.cols = tf.constant(self.handler.cols) 132 | self.vals = tf.reshape(tf.constant(self.handler.vals, dtype=tf.float32), [-1, 1, 1, 1]) 133 | self.hyperAdj = defineParam('hyperAdj', [args.hyperNum, args.areaNum], reg=True) 134 | self.feats = tf.placeholder(name='feats', dtype=tf.float32, shape=[args.areaNum, args.temporalRange, args.offNum]) 135 | self.dropRate = tf.placeholder(name='dropRate', dtype=tf.float32, shape=[]) 136 | 137 | self.labels = tf.placeholder(name='labels', dtype=tf.float32, shape=[args.areaNum, args.offNum]) 138 | self.preds, embed = self.ours() 139 | 140 | if args.task == 'c': 141 | posInd = tf.cast(tf.greater(self.labels, 0), tf.float32) 142 | negInd = tf.cast(tf.less(self.labels, 0), tf.float32) 143 | posPred = tf.cast(tf.greater_equal(self.preds, args.border), tf.float32) 144 | negPred = tf.cast(tf.less(self.preds, args.border), tf.float32) 145 | NNs.addReg('embed', embed * tf.expand_dims(posInd + negInd, axis=-1)) 146 | self.preLoss = tf.reduce_sum(-(posInd * tf.log(self.preds + 1e-8) + negInd * tf.log(1 - self.preds + 1e-8))) / (tf.reduce_sum(posInd) + tf.reduce_sum(negInd)) 147 | self.truePos = tf.reduce_sum(posPred * posInd, axis=0) 148 | self.falseNeg = tf.reduce_sum(negPred * posInd, axis=0) 149 | self.trueNeg = tf.reduce_sum(negPred * negInd, axis=0) 150 | self.falsePos = tf.reduce_sum(posPred * negInd, axis=0) 151 | elif args.task == 'r': 152 | self.mask = tf.placeholder(name='mask', dtype=tf.float32, shape=[args.areaNum, args.offNum]) 153 | self.preLoss = tf.reduce_sum(tf.square(self.preds - self.labels) * self.mask) / tf.reduce_sum(self.mask) 154 | self.sqLoss = tf.reduce_sum(tf.square(self.preds - self.labels) * self.mask, axis=0) 155 | self.absLoss = tf.reduce_sum(tf.abs(self.preds - self.labels) * self.mask, axis=0) 156 | self.tstNums = tf.reduce_sum(self.mask, axis=0) 157 | posMask = self.mask * tf.cast(tf.greater(self.labels, 0.5), tf.float32) 158 | self.apeLoss = tf.reduce_sum(tf.abs(self.preds - self.labels) / (self.labels + 1e-8) * posMask, axis=0) 159 | self.posNums = tf.reduce_sum(posMask, axis=0) 160 | NNs.addReg('embed', embed * tf.expand_dims(self.mask, axis=-1)) 161 | 162 | self.regLoss = args.reg * Regularize() + args.spreg * tf.reduce_sum(tf.abs(self.hyperAdj)) 163 | self.loss = self.preLoss + self.regLoss 164 | 165 | globalStep = tf.Variable(0, trainable=False) 166 | learningRate = tf.train.exponential_decay(args.lr, globalStep, args.decay_step, args.decay, staircase=True) 167 | self.optimizer = tf.train.AdamOptimizer(learningRate).minimize(self.loss, global_step=globalStep) 168 | 169 | def sampleTrainBatch(self, batIds): 170 | idx = batIds[0] 171 | label = self.handler.trnT[:, idx, :]# area, offNum 172 | if args.task == 'c': 173 | negRate = args.negRate#np.random.randint(1, args.negRate*2) 174 | elif args.task == 'r': 175 | negRate = 0 176 | posNums = np.sum(label != 0, axis=0) * negRate 177 | retLabels = (label != 0) * 1 178 | if args.task == 'r': 179 | mask = retLabels 180 | retLabels = label 181 | for i in range(args.offNum): 182 | temMap = label[:, i] 183 | negPos = np.reshape(np.argwhere(temMap==0), [-1]) 184 | sampedNegPos = np.random.permutation(negPos)[:posNums[i]] 185 | # sampedNegPos = negPos 186 | if args.task == 'c': 187 | retLabels[sampedNegPos, i] = -1 188 | elif args.task == 'r': 189 | mask[sampedNegPos, i] = 1 190 | feat = self.handler.trnT[:, idx-args.temporalRange: idx, :] 191 | if args.task == 'c': 192 | return self.handler.zScore(feat), retLabels 193 | elif args.task == 'r': 194 | return self.handler.zScore(feat), retLabels, mask 195 | 196 | def trainEpoch(self): 197 | ids = np.random.permutation(list(range(args.temporalRange, args.trnDays))) 198 | epochLoss, epochPreLoss, epochAcc = [0] * 3 199 | num = len(ids) 200 | 201 | steps = int(np.ceil(num / args.batch)) 202 | for i in range(steps): 203 | st = i * args.batch 204 | ed = min((i+1) * args.batch, num) 205 | batIds = ids[st: ed] 206 | 207 | tem = self.sampleTrainBatch(batIds) 208 | if args.task == 'c': 209 | feats, labels = tem 210 | elif args.task == 'r': 211 | feats, labels, mask = tem 212 | 213 | targets = [self.optimizer, self.preLoss, self.loss] 214 | feeddict = {self.feats: feats, self.labels: labels, self.dropRate: args.dropRate} 215 | if args.task == 'r': 216 | feeddict[self.mask] = mask 217 | res = self.sess.run(targets, feed_dict=feeddict, options=config_pb2.RunOptions(report_tensor_allocations_upon_oom=True)) 218 | 219 | preLoss, loss = res[1:] 220 | 221 | epochLoss += loss 222 | epochPreLoss += preLoss 223 | log('Step %d/%d: preLoss = %.4f ' % (i, steps, preLoss), save=False, oneline=True) 224 | ret = dict() 225 | ret['Loss'] = epochLoss / steps 226 | ret['preLoss'] = epochPreLoss / steps 227 | return ret 228 | 229 | def sampTestBatch(self, batIds, tstTensor, inpTensor): 230 | idx = batIds[0] 231 | label = tstTensor[:, idx, :]# area, offNum 232 | if args.task == 'c': 233 | retLabels = ((label > 0) * 1 + (label == 0) * (-1)) * self.handler.tstLocs 234 | elif args.task == 'r': 235 | retLabels = label 236 | mask = self.handler.tstLocs * (label > 0) 237 | if idx - args.temporalRange < 0: 238 | temT = inpTensor[:, idx-args.temporalRange:, :] 239 | temT2 = tstTensor[:, :idx, :] 240 | feats = np.concatenate([temT, temT2], axis=1) 241 | else: 242 | feats = tstTensor[:, idx-args.temporalRange: idx, :] 243 | if args.task == 'c': 244 | return self.handler.zScore(feats), retLabels 245 | elif args.task == 'r': 246 | return self.handler.zScore(feats), retLabels, mask 247 | 248 | def testEpoch(self, tstTensor, inpTensor): 249 | ids = np.random.permutation(list(range(tstTensor.shape[1]))) 250 | epochLoss, epochPreLoss, = [0] * 2 251 | if args.task == 'c': 252 | epochTp, epochFp, epochTn, epochFn = [np.zeros(4) for i in range(4)] 253 | elif args.task == 'r': 254 | epochSqLoss, epochAbsLoss, epochTstNum, epochApeLoss, epochPosNums = [np.zeros(4) for i in range(5)] 255 | num = len(ids) 256 | 257 | steps = int(np.ceil(num / args.batch)) 258 | for i in range(steps): 259 | st = i * args.batch 260 | ed = min((i+1) * args.batch, num) 261 | batIds = ids[st: ed] 262 | 263 | tem = self.sampTestBatch(batIds, tstTensor, inpTensor) 264 | if args.task == 'c': 265 | feats, labels = tem 266 | elif args.task == 'r': 267 | feats, labels, mask = tem 268 | 269 | if args.task == 'c': 270 | targets = [self.preLoss, self.regLoss, self.loss, self.truePos, self.falsePos, self.trueNeg, self.falseNeg] 271 | feeddict = {self.feats: feats, self.labels: labels, self.dropRate: 0.0} 272 | elif args.task == 'r': 273 | targets = [self.preds, self.preLoss, self.regLoss, self.loss, self.sqLoss, self.absLoss, self.tstNums, self.apeLoss, self.posNums] 274 | feeddict = {self.feats: feats, self.labels: labels, self.dropRate: 0.0, self.mask: mask} 275 | res = self.sess.run(targets, feed_dict=feeddict, options=config_pb2.RunOptions(report_tensor_allocations_upon_oom=True)) 276 | if args.task == 'c': 277 | preLoss, regLoss, loss, tp, fp, tn, fn = res 278 | epochTp += tp 279 | epochFp += fp 280 | epochTn += tn 281 | epochFn += fn 282 | elif args.task == 'r': 283 | preds, preLoss, regLoss, loss, sqLoss, absLoss, tstNums, apeLoss, posNums = res 284 | epochSqLoss += sqLoss 285 | epochAbsLoss += absLoss 286 | epochTstNum += tstNums 287 | epochApeLoss += apeLoss 288 | epochPosNums += posNums 289 | epochLoss += loss 290 | epochPreLoss += preLoss 291 | log('Step %d/%d: loss = %.2f, regLoss = %.2f ' % (i, steps, loss, regLoss), save=False, oneline=True) 292 | ret = dict() 293 | ret['preLoss'] = epochPreLoss / steps 294 | if args.task == 'c': 295 | temSum = 0 296 | for i in range(args.offNum): 297 | ret['F1_%d' % i] = epochTp[i] * 2 / (epochTp[i] * 2 + epochFp[i] + epochFn[i]) 298 | temSum += ret['F1_%d' % i] 299 | ret['microF1'] = temSum / args.offNum 300 | ret['macroF1'] = np.sum(epochTp) * 2 / (np.sum(epochTp) * 2 + np.sum(epochFp) + np.sum(epochFn)) 301 | elif args.task == 'r': 302 | for i in range(args.offNum): 303 | ret['RMSE_%d' % i] = np.sqrt(epochSqLoss[i] / epochTstNum[i]) 304 | ret['MAE_%d' % i] = epochAbsLoss[i] / epochTstNum[i] 305 | ret['MAPE_%d' % i] = epochApeLoss[i] / epochPosNums[i] 306 | ret['RMSE'] = np.sqrt(np.sum(epochSqLoss) / np.sum(epochTstNum)) 307 | ret['MAE'] = np.sum(epochAbsLoss) / np.sum(epochTstNum) 308 | ret['MAPE'] = np.sum(epochApeLoss) / np.sum(epochPosNums) 309 | return ret 310 | 311 | def calcRes(self, preds, temTst, tstLocs): 312 | hit = 0 313 | ndcg = 0 314 | for j in range(preds.shape[0]): 315 | predvals = list(zip(preds[j], tstLocs[j])) 316 | predvals.sort(key=lambda x: x[0], reverse=True) 317 | shoot = list(map(lambda x: x[1], predvals[:args.shoot])) 318 | if temTst[j] in shoot: 319 | hit += 1 320 | ndcg += np.reciprocal(np.log2(shoot.index(temTst[j])+2)) 321 | return hit, ndcg 322 | 323 | def saveHistory(self): 324 | if args.epoch == 0: 325 | return 326 | with open('History/' + args.save_path + '.his', 'wb') as fs: 327 | pickle.dump(self.metrics, fs) 328 | 329 | saver = tf.train.Saver() 330 | saver.save(self.sess, 'Models/' + args.save_path) 331 | log('Model Saved: %s' % args.save_path) 332 | 333 | def loadModel(self): 334 | saver = tf.train.Saver() 335 | saver.restore(sess, 'Models/' + args.load_model) 336 | with open('History/' + args.load_model + '.his', 'rb') as fs: 337 | self.metrics = pickle.load(fs) 338 | log('Model Loaded') 339 | 340 | if __name__ == '__main__': 341 | logger.saveDefault = True 342 | config = tf.ConfigProto() 343 | config.gpu_options.allow_growth = True 344 | 345 | log('Start') 346 | handler = DataHandler() 347 | log('Load Data') 348 | 349 | with tf.Session(config=config) as sess: 350 | model = Model(sess, handler) 351 | model.run() --------------------------------------------------------------------------------