├── .gitignore ├── requirements.txt ├── edge2vec.py ├── readme.md └── src ├── csv2tf_neg.py ├── deep_flags.py ├── deep_negative.py └── deep_pre.py /.gitignore: -------------------------------------------------------------------------------- 1 | .idea/ 2 | results/ 3 | -------------------------------------------------------------------------------- /requirements.txt: -------------------------------------------------------------------------------- 1 | configargparse 2 | numpy 3 | tensorflow 4 | -------------------------------------------------------------------------------- /edge2vec.py: -------------------------------------------------------------------------------- 1 | import os 2 | import sys 3 | from configargparse import ArgumentParser 4 | from src.deep_pre import DeepPre 5 | from src.csv2tf_neg import convert 6 | from src.deep_negative import pretrain 7 | 8 | 9 | def parse_arguments(): 10 | parser = ArgumentParser(description='Arguments For edge2vec') 11 | 12 | group = parser.add_argument_group('Base Configs') 13 | group.add_argument('-i', '--input', help='path to the input graph file', type=str, required=True) 14 | # group.add_argument('-o', '--output', help='path to the output embedding file', type=str, required=True) 15 | group.add_argument('-m', '--model', help='the output directory of model files', type=str, required=True) 16 | group.add_argument('-n', '--num', help='the maximum num of the node', type=int, required=True) 17 | group.add_argument('-s', '--sample', help='the num of negative samples', type=int, required=True) 18 | 19 | args = parser.parse_args() 20 | return args 21 | 22 | 23 | def main(): 24 | args = parse_arguments() 25 | sys.argv = sys.argv[:1] 26 | if not os.path.exists(args.model): 27 | os.makedirs(args.model) 28 | 29 | pre = DeepPre(args.input, args.model, args.num, args.num, args.sample) 30 | pre.read_data() 31 | pre.calculate() 32 | pre.write_csv() 33 | 34 | convert(args.model, args.num * 2) 35 | pretrain(args.num * 2, args.model) 36 | 37 | 38 | if __name__ == '__main__': 39 | main() 40 | -------------------------------------------------------------------------------- /readme.md: -------------------------------------------------------------------------------- 1 | [Edge2vec: Edge-based Social Network Embedding](http://ise.thss.tsinghua.edu.cn/~wangchaokun/edge2vec/tkdd_embedding_accepted.pdf) 2 | ==================================== 3 | 4 | **Edge2vec** is the first edge-based graph embedding method to map the edges in social networks directly to low-dimensional vectors. It is designed to preserve both the local and the global structure information of edges and the learned representation vectors can be applied to various tasks such as link prediction, social tie direction prediction and social tie sign prediction. 5 | 6 | [See more about Edge2vec.](http://ise.thss.tsinghua.edu.cn/~wangchaokun/edge2vec/edge2vec.html) 7 | 8 | -------------------------------------------------- 9 | 10 | The code is written in python3 using the `tensorflow` framework. Other libs can be found in the file `requirements.txt`. 11 | 12 | #### Usage 13 | To run edge2vec, open the terminal and input 14 | ```bash 15 | python3 edge2vec.py -i INPUT -m MODEL -n NUM -s SAMPLE 16 | ``` 17 | , where the parameters are: 18 | ``` 19 | -i: path to the input graph file (in "edge list" format) 20 | -m: the output directory of model files 21 | -n: the maximum num of the node 22 | -s: the num of negative samples 23 | ``` 24 | 25 | For example, you can run edge2vec on `Epinions` using 26 | 27 | ```bash 28 | python edge2vec.py -i Epinions-55K.graph -m results -n 1000 -s 500 29 | ``` 30 | 31 | The program will divide the input graph into two parts, `MODEL/train.txt` and `MODEL/test.txt`, and their embedding results can be found in `MODEL/train.log` and `MODEL/test.log`, respectively. You can use these to conduct downstream experiments, such as link prediction, sign prediction or tie direction prediction. 32 | 33 | #### Reference 34 | Chanping Wang, Chaokun Wang, Zheng Wang, Xiaojun Ye, and PhilipS. Yu. Edge2vec: Edge-based Social Network Embedding. ACM Transactions on Knowledge Discovery in Data (TKDD), 14(4):1-24, 2020. (Submitted June 2017; accepted March 2020) 35 | -------------------------------------------------------------------------------- /src/csv2tf_neg.py: -------------------------------------------------------------------------------- 1 | import os 2 | import tensorflow as tf 3 | import numpy 4 | 5 | # directory = "Epinions-500" 6 | # size = 1000 7 | 8 | 9 | def convert_to(directory, name, len): 10 | file_input = os.path.join(directory, name + '.csv') 11 | file_output = os.path.join(directory, name + '.tfrecords') 12 | 13 | f_input = open(file_input) 14 | print('Writing', file_output) 15 | writer = tf.python_io.TFRecordWriter(file_output) 16 | 17 | k = 0 18 | for line in f_input: 19 | values = line.split(",") 20 | fs = numpy.array(list(map(float, values[0:len])), dtype=numpy.float32) 21 | ms = numpy.array(list(map(int, values[len:2 * len])), dtype=numpy.uint8) 22 | example = tf.train.Example(features=tf.train.Features(feature={ 23 | 'features': tf.train.Feature(bytes_list=tf.train.BytesList(value=[fs.tostring()])), 24 | 'mask': tf.train.Feature(bytes_list=tf.train.BytesList(value=[ms.tostring()]))})) 25 | writer.write(example.SerializeToString()) 26 | k = k + 1 27 | if k % 1000 == 0: 28 | print(k) 29 | writer.close() 30 | f_input.close() 31 | 32 | 33 | def convert_to2(directory, name, _len): 34 | file_input = os.path.join(directory, name + '.csv') 35 | file_output = os.path.join(directory, name + '.tfrecords') 36 | 37 | f_input = open(file_input) 38 | print('Writing', file_output) 39 | writer = tf.python_io.TFRecordWriter(file_output) 40 | 41 | k = 0 42 | for line in f_input: 43 | values = line.split(",") 44 | fs = [] 45 | ms = [] 46 | for i in range(7): 47 | fs.append(numpy.array(list(map(float, values[2 * i * _len:(2 * i + 1) * _len])), dtype=numpy.float32)) 48 | ms.append(numpy.array(list(map(int, values[(2 * i + 1) * _len:(2 * i + 2) * _len])), dtype=numpy.uint8)) 49 | example = tf.train.Example(features=tf.train.Features(feature={ 50 | 'example': tf.train.Feature(bytes_list=tf.train.BytesList(value=[fs[0].tostring()])), 51 | 'example_mask': tf.train.Feature(bytes_list=tf.train.BytesList(value=[ms[0].tostring()])), 52 | 53 | 'positve': tf.train.Feature(bytes_list=tf.train.BytesList(value=[fs[1].tostring()])), 54 | 'positive_mask': tf.train.Feature(bytes_list=tf.train.BytesList(value=[ms[1].tostring()])), 55 | 56 | 'negtive1': tf.train.Feature(bytes_list=tf.train.BytesList(value=[fs[2].tostring()])), 57 | 'negtive1_mask': tf.train.Feature(bytes_list=tf.train.BytesList(value=[ms[2].tostring()])), 58 | 59 | 'negtive2': tf.train.Feature(bytes_list=tf.train.BytesList(value=[fs[3].tostring()])), 60 | 'negtive2_mask': tf.train.Feature(bytes_list=tf.train.BytesList(value=[ms[3].tostring()])), 61 | 62 | 'negtive3': tf.train.Feature(bytes_list=tf.train.BytesList(value=[fs[4].tostring()])), 63 | 'negtive3_mask': tf.train.Feature(bytes_list=tf.train.BytesList(value=[ms[4].tostring()])), 64 | 65 | 'negtive4': tf.train.Feature(bytes_list=tf.train.BytesList(value=[fs[5].tostring()])), 66 | 'negtive4_mask': tf.train.Feature(bytes_list=tf.train.BytesList(value=[ms[5].tostring()])), 67 | 68 | 'negtive5': tf.train.Feature(bytes_list=tf.train.BytesList(value=[fs[6].tostring()])), 69 | 'negtive5_mask': tf.train.Feature(bytes_list=tf.train.BytesList(value=[ms[6].tostring()])) 70 | })) 71 | writer.write(example.SerializeToString()) 72 | k = k + 1 73 | if k % 1000 == 0: 74 | print(k) 75 | writer.close() 76 | f_input.close() 77 | 78 | 79 | def convert(directory, size): 80 | convert_to2(directory, "negative", size) 81 | # Convert to Examples and write the result to TFRecords. 82 | convert_to(directory, 'data', size) 83 | convert_to(directory, 'train', size) 84 | convert_to(directory, 'test', size) 85 | 86 | 87 | if __name__ == '__main__': 88 | tf.app.run() 89 | -------------------------------------------------------------------------------- /src/deep_flags.py: -------------------------------------------------------------------------------- 1 | from __future__ import division 2 | import os 3 | from os.path import join as pjoin 4 | 5 | import sys 6 | 7 | import tensorflow as tf 8 | 9 | 10 | # IMAGE_PIXELS = 1000 11 | # NUM_CLASSES = 10 12 | # directory = "Epinions-500" 13 | 14 | def setup_flags(image_pixels, directory): 15 | with open(os.path.join(directory, 'data.csv')) as fin: 16 | num_data = 0 17 | for _ in fin: 18 | num_data += 1 19 | with open(os.path.join(directory, 'train.txt')) as fin: 20 | num_train = 0 21 | for _ in fin: 22 | num_train += 1 23 | with open(os.path.join(directory, 'test.txt')) as fin: 24 | num_test = 0 25 | for _ in fin: 26 | num_test += 1 27 | 28 | def home_out(path): 29 | return pjoin(os.environ['HOME'], 'tmp', 'mnist', path) 30 | 31 | flags = tf.app.flags 32 | 33 | # Autoencoder Architecture Specific Flags 34 | flags.DEFINE_integer("num_hidden_layers", 3, "Number of hidden layers") 35 | 36 | flags.DEFINE_integer('hidden1_units', 512, 37 | 'Number of units in hidden layer 1.') 38 | flags.DEFINE_integer('hidden2_units', 128, 39 | 'Number of units in hidden layer 2.') 40 | flags.DEFINE_integer('hidden3_units', 64, 41 | 'Number of units in hidden layer 3.') 42 | flags.DEFINE_integer('num_data', num_data, 'Number of classes') 43 | flags.DEFINE_integer('num_train', num_train, 'Number of classes') 44 | flags.DEFINE_integer('num_test', num_test, 'Number of classes') 45 | flags.DEFINE_integer('num_finetune', 5000, 'Number of classes') 46 | 47 | flags.DEFINE_integer('image_pixels', image_pixels, 'Total number of pixels') 48 | flags.DEFINE_integer('num_classes', 2, 'Number of classes') 49 | 50 | flags.DEFINE_float('pre_layer1_learning_rate', 0.001, 51 | 'Initial learning rate.') 52 | flags.DEFINE_float('pre_layer2_learning_rate', 0.001, 53 | 'Initial learning rate.') 54 | flags.DEFINE_float('pre_layer3_learning_rate', 0.001, 55 | 'Initial learning rate.') 56 | 57 | flags.DEFINE_float('noise_1', 0.50, 'Rate at which to set pixels to 0') 58 | flags.DEFINE_float('noise_2', 0.50, 'Rate at which to set pixels to 0') 59 | flags.DEFINE_float('noise_3', 0.50, 'Rate at which to set pixels to 0') 60 | 61 | # Constants 62 | flags.DEFINE_integer('seed', 1234, 'Random seed') 63 | flags.DEFINE_integer('image_size', 28, 'Image square size') 64 | 65 | flags.DEFINE_integer('batch_size', 1000, 66 | 'Batch size. Must divide evenly into the dataset sizes.') 67 | 68 | flags.DEFINE_float('finetune_learning_rate', 0.001, 69 | 'Supervised initial learning rate.') 70 | flags.DEFINE_float('alpha', 8, 'alpha') 71 | 72 | flags.DEFINE_integer('pretraining_epochs', 10, 73 | "Number of training epochs for pretraining layers") 74 | flags.DEFINE_integer('combine_epochs', 50, 75 | "Number of training epochs for pretraining layers") 76 | flags.DEFINE_integer('finetuning_epochs', 200, 77 | "Number of training epochs for " 78 | "fine tuning supervised step") 79 | 80 | flags.DEFINE_float('zero_bound', 1.0e-9, 81 | 'Value to use as buffer to avoid ' 82 | 'numerical issues at 0') 83 | flags.DEFINE_float('one_bound', 1.0 - 1.0e-9, 84 | 'Value to use as buffer to avoid numerical issues at 1') 85 | 86 | flags.DEFINE_float('flush_secs', 120, 'Number of seconds to flush summaries') 87 | 88 | # Directories 89 | flags.DEFINE_string('data_dir', home_out('data'), 90 | 'Directory to put the training data.') 91 | 92 | flags.DEFINE_string('summary_dir', home_out('summaries'), 93 | 'Directory to put the summary data') 94 | 95 | flags.DEFINE_string('chkpt_dir', home_out('chkpts'), 96 | 'Directory to put the model checkpoints') 97 | flags.DEFINE_string('directory', directory, 98 | 'Directory to put the model checkpoints') 99 | # TensorBoard 100 | flags.DEFINE_boolean('no_browser', True, 101 | 'Whether to start browser for TensorBoard') 102 | 103 | # Python 104 | flags.DEFINE_string('python', sys.executable, 105 | 'Path to python executable') 106 | 107 | return flags.FLAGS 108 | -------------------------------------------------------------------------------- /src/deep_negative.py: -------------------------------------------------------------------------------- 1 | from __future__ import division 2 | from __future__ import print_function 3 | import os 4 | import tensorflow as tf 5 | from src.deep_flags import setup_flags 6 | 7 | 8 | class AutoEncoder(object): 9 | _weights_str = "weights{0}" 10 | _biases_str = "biases{0}" 11 | 12 | def __init__(self, shape, sess): 13 | self.__shape = shape # [input_dim,hidden1_dim,...,hidden_n_dim,output_dim] 14 | self.__num_hidden_layers = len(self.__shape) - 2 15 | 16 | self.__variables = {} 17 | self.__sess = sess 18 | 19 | self._setup_variables() 20 | 21 | @property 22 | def shape(self): 23 | return self.__shape 24 | 25 | @property 26 | def num_hidden_layers(self): 27 | return self.__num_hidden_layers 28 | 29 | @property 30 | def session(self): 31 | return self.__sess 32 | 33 | def __getitem__(self, item): 34 | return self.__variables[item] 35 | 36 | def __setitem__(self, key, value): 37 | self.__variables[key] = value 38 | 39 | def _setup_variables(self): 40 | with tf.name_scope("autoencoder_variables"): 41 | for i in range(self.__num_hidden_layers): 42 | # Train weights 43 | name_w = self._weights_str.format(i + 1) 44 | w_shape = (self.__shape[i], self.__shape[i + 1]) 45 | a = tf.multiply(4.0, tf.sqrt(6.0 / (w_shape[0] + w_shape[1]))) 46 | w_init = tf.random_uniform(w_shape, -1 * a, a) 47 | self[name_w] = tf.Variable(w_init, 48 | name=name_w, 49 | trainable=True) 50 | # Train biases 51 | name_b = self._biases_str.format(i + 1) 52 | b_shape = (self.__shape[i + 1],) 53 | b_init = tf.zeros(b_shape) 54 | self[name_b] = tf.Variable(b_init, trainable=True, name=name_b) 55 | 56 | if i < self.__num_hidden_layers: 57 | # Hidden layer fixed weights (after pretraining before fine tuning) 58 | self[name_w + "_fixed"] = tf.Variable(tf.identity(self[name_w]), 59 | name=name_w + "_fixed", 60 | trainable=False) 61 | 62 | # Hidden layer fixed biases 63 | self[name_b + "_fixed"] = tf.Variable(tf.identity(self[name_b]), 64 | name=name_b + "_fixed", 65 | trainable=False) 66 | 67 | # Pretraining output training biases 68 | name_b_out = self._biases_str.format(i + 1) + "_out" 69 | b_shape = (self.__shape[i],) 70 | b_init = tf.zeros(b_shape) 71 | self[name_b_out] = tf.Variable(b_init, 72 | trainable=True, 73 | name=name_b_out) 74 | 75 | def _w(self, n, suffix=""): 76 | return self[self._weights_str.format(n) + suffix] 77 | 78 | def _b(self, n, suffix=""): 79 | return self[self._biases_str.format(n) + suffix] 80 | 81 | def get_variables_to_init(self, n): 82 | assert n > 0 83 | assert n <= self.__num_hidden_layers 84 | 85 | vars_to_init = [self._w(n), self._b(n)] 86 | 87 | if n <= self.__num_hidden_layers: 88 | vars_to_init.append(self._b(n, "_out")) 89 | 90 | if 1 < n <= self.__num_hidden_layers: 91 | vars_to_init.append(self._w(n - 1, "_fixed")) 92 | vars_to_init.append(self._b(n - 1, "_fixed")) 93 | 94 | return vars_to_init 95 | 96 | @staticmethod 97 | def _activate(x, w, b, transpose_w=False): 98 | y = tf.sigmoid(tf.nn.bias_add(tf.matmul(x, w, transpose_b=transpose_w), b)) 99 | return y 100 | 101 | def pretrain_net(self, input_pl, n, is_target=False): 102 | assert n > 0 103 | assert n <= self.__num_hidden_layers 104 | 105 | last_output = input_pl 106 | for i in range(n - 1): 107 | w = self._w(i + 1, "_fixed") 108 | b = self._b(i + 1, "_fixed") 109 | 110 | last_output = self._activate(last_output, w, b) 111 | 112 | if is_target: 113 | return last_output 114 | 115 | # last_output = tf.nn.dropout(last_output, 0.8) 116 | last_output = self._activate(last_output, self._w(n), self._b(n)) 117 | 118 | out = self._activate(last_output, self._w(n), self._b(n, "_out"), 119 | transpose_w=True) 120 | out = tf.maximum(out, 1.e-9) 121 | out = tf.minimum(out, 1 - 1.e-9) 122 | return out 123 | 124 | def reconstuction_net(self, p_net, n): 125 | r_net = p_net 126 | for i in range(n - 1): 127 | j = n - 1 - i 128 | r_net = self._activate(r_net, self._w(j), self._b(j, "_out"), 129 | transpose_w=True) 130 | return r_net 131 | 132 | def transform_net(self, input_pl): 133 | last_output = input_pl 134 | for i in range(self.__num_hidden_layers): 135 | w = self._w(i + 1) 136 | b = self._b(i + 1) 137 | 138 | last_output = self._activate(last_output, w, b) 139 | last_output = tf.maximum(last_output, 1.e-9) 140 | last_output = tf.minimum(last_output, 1 - 1.e-9) 141 | 142 | return last_output 143 | 144 | def finetune_reconstruction_net(self, t_net): 145 | last_output = t_net 146 | for i in range(self.__num_hidden_layers): 147 | j = self.__num_hidden_layers - i 148 | w = self._w(j) 149 | b = self._b(j, "_out") 150 | 151 | last_output = self._activate(last_output, w, b, transpose_w=True) 152 | last_output = tf.maximum(last_output, 1.e-9) 153 | last_output = tf.minimum(last_output, 1 - 1.e-9) 154 | 155 | return last_output 156 | 157 | 158 | def read_my_file_format(filename_queue, n_input): 159 | reader = tf.TFRecordReader() 160 | _, record_string = reader.read(filename_queue) 161 | features = tf.parse_single_example( 162 | record_string, 163 | features={ 164 | 'features': tf.FixedLenFeature([], tf.string), 165 | 'mask': tf.FixedLenFeature([], tf.string), 166 | }) 167 | record = tf.decode_raw(features['features'], tf.float32) 168 | mask = tf.decode_raw(features['mask'], tf.uint8) 169 | mask = tf.cast(mask, tf.float32) 170 | record.set_shape([n_input]) 171 | mask.set_shape([n_input]) 172 | return record, mask 173 | 174 | 175 | def read_my_file_format2(filename_queue, n_input): 176 | reader = tf.TFRecordReader() 177 | _, record_string = reader.read(filename_queue) 178 | features = tf.parse_single_example( 179 | record_string, 180 | features={ 181 | 'example': tf.FixedLenFeature([], tf.string), 182 | 'positve': tf.FixedLenFeature([], tf.string), 183 | 'negtive1': tf.FixedLenFeature([], tf.string), 184 | 'negtive2': tf.FixedLenFeature([], tf.string), 185 | 'negtive3': tf.FixedLenFeature([], tf.string), 186 | 'negtive4': tf.FixedLenFeature([], tf.string), 187 | 'negtive5': tf.FixedLenFeature([], tf.string), 188 | 'example_mask': tf.FixedLenFeature([], tf.string), 189 | 'positive_mask': tf.FixedLenFeature([], tf.string), 190 | 'negtive1_mask': tf.FixedLenFeature([], tf.string), 191 | 'negtive2_mask': tf.FixedLenFeature([], tf.string), 192 | 'negtive3_mask': tf.FixedLenFeature([], tf.string), 193 | 'negtive4_mask': tf.FixedLenFeature([], tf.string), 194 | 'negtive5_mask': tf.FixedLenFeature([], tf.string), 195 | }) 196 | example = tf.decode_raw(features['example'], tf.float32) 197 | positve = tf.decode_raw(features['positve'], tf.float32) 198 | negtive1 = tf.decode_raw(features['negtive1'], tf.float32) 199 | negtive2 = tf.decode_raw(features['negtive2'], tf.float32) 200 | negtive3 = tf.decode_raw(features['negtive3'], tf.float32) 201 | negtive4 = tf.decode_raw(features['negtive4'], tf.float32) 202 | negtive5 = tf.decode_raw(features['negtive5'], tf.float32) 203 | 204 | example_mask = tf.decode_raw(features['example_mask'], tf.uint8) 205 | positve_mask = tf.decode_raw(features['positive_mask'], tf.uint8) 206 | negtive1_mask = tf.decode_raw(features['negtive1_mask'], tf.uint8) 207 | negtive2_mask = tf.decode_raw(features['negtive2_mask'], tf.uint8) 208 | negtive3_mask = tf.decode_raw(features['negtive3_mask'], tf.uint8) 209 | negtive4_mask = tf.decode_raw(features['negtive4_mask'], tf.uint8) 210 | negtive5_mask = tf.decode_raw(features['negtive5_mask'], tf.uint8) 211 | 212 | example_mask = tf.cast(example_mask, tf.float32) 213 | positve_mask = tf.cast(positve_mask, tf.float32) 214 | negtive1_mask = tf.cast(negtive1_mask, tf.float32) 215 | negtive2_mask = tf.cast(negtive2_mask, tf.float32) 216 | negtive3_mask = tf.cast(negtive3_mask, tf.float32) 217 | negtive4_mask = tf.cast(negtive4_mask, tf.float32) 218 | negtive5_mask = tf.cast(negtive5_mask, tf.float32) 219 | 220 | example.set_shape([n_input]) 221 | positve.set_shape([n_input]) 222 | negtive1.set_shape([n_input]) 223 | negtive2.set_shape([n_input]) 224 | negtive3.set_shape([n_input]) 225 | negtive4.set_shape([n_input]) 226 | negtive5.set_shape([n_input]) 227 | example_mask.set_shape([n_input]) 228 | positve_mask.set_shape([n_input]) 229 | negtive1_mask.set_shape([n_input]) 230 | negtive2_mask.set_shape([n_input]) 231 | negtive3_mask.set_shape([n_input]) 232 | negtive4_mask.set_shape([n_input]) 233 | negtive5_mask.set_shape([n_input]) 234 | return example, positve, negtive1, negtive2, negtive3, negtive4, negtive5, example_mask, positve_mask, negtive1_mask, negtive2_mask, negtive3_mask, negtive4_mask, negtive5_mask 235 | 236 | 237 | def input_pipeline(filenames, batch_size, n_input, num_epochs=None, infer_mode=False): 238 | filename_queue = tf.train.string_input_producer(filenames, num_epochs=num_epochs, shuffle=False) 239 | example = read_my_file_format(filename_queue, n_input) 240 | capacity = 5 * batch_size 241 | if (infer_mode): 242 | num_threads = 1 243 | else: 244 | num_threads = 8 245 | example_batch, mask_batch = tf.train.batch( 246 | example, batch_size=batch_size, capacity=capacity, num_threads=num_threads, 247 | allow_smaller_final_batch=infer_mode) 248 | return example_batch, mask_batch 249 | 250 | 251 | def input_pipeline2(filenames, batch_size, n_input): 252 | filename_queue = tf.train.string_input_producer(filenames, num_epochs=None, shuffle=False) 253 | example = read_my_file_format2(filename_queue, n_input) 254 | capacity = 5 * batch_size 255 | num_threads = 8 256 | example_batch = tf.train.batch( 257 | example, batch_size=batch_size, capacity=capacity, num_threads=num_threads) 258 | return example_batch 259 | 260 | 261 | def training(loss, learning_rate): 262 | # optimizer = tf.train.GradientDescentOptimizer(learning_rate = learning_rate) 263 | optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate) 264 | train_op = optimizer.minimize(loss) 265 | return train_op 266 | 267 | 268 | # def loss_x_entropy(output, target): 269 | # return tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(output, target)) 270 | 271 | def loss_rsme(x, reconstruction, mask=None): 272 | if mask is not None: 273 | return 0.5 * tf.reduce_sum(tf.pow(tf.multiply(tf.subtract(reconstruction, x), mask), 2.0)) 274 | return 0.5 * tf.reduce_sum(tf.pow(tf.subtract(reconstruction, x), 2.0)) 275 | 276 | 277 | def loss_negtive(x, y, positve): 278 | if positve is True: 279 | # return tf.reduce_sum(-tf.log(tf.sigmoid(tf.reduce_sum(tf.multiply(x, y), 1)))) 280 | y = tf.sigmoid(tf.reduce_sum(tf.multiply(x, y), 1)) 281 | return tf.reduce_sum(-tf.log( 282 | tf.clip_by_value(y, 1e-8, tf.reduce_max(y)) 283 | )) 284 | else: 285 | # return tf.reduce_sum(-tf.log(tf.sigmoid(tf.reduce_sum(-tf.multiply(x, y), 1)))) 286 | y = tf.sigmoid(tf.reduce_sum(-tf.multiply(x, y), 1)) 287 | return tf.reduce_sum(-tf.log( 288 | tf.clip_by_value(y, 1e-8, tf.reduce_max(y)) 289 | )) 290 | 291 | 292 | def output_file2(f, embedding, n_hidden, label): 293 | str_list = [str(label) + ' '] 294 | for i in range(n_hidden): 295 | str_list.append(str(i + 1) + ':' + str(embedding[i]) + ' ') 296 | f.write(''.join(str_list)) 297 | f.write('\n') 298 | 299 | 300 | def output_file(f, embedding, n_hidden, label): 301 | str_list = [] 302 | for i in range(n_hidden): 303 | str_list.append(str(embedding[i])) 304 | f.write(' '.join(str_list)) 305 | f.write('\n') 306 | 307 | 308 | def pretrain(image_pixels, directory): 309 | FLAGS = setup_flags(image_pixels, directory) 310 | with tf.Graph().as_default() as g: 311 | gpu_options = tf.GPUOptions(per_process_gpu_memory_fraction=0.45) 312 | 313 | sess = tf.Session(config=tf.ConfigProto(gpu_options=gpu_options)) 314 | 315 | directory = FLAGS.directory 316 | num_hidden = FLAGS.num_hidden_layers 317 | batch_size = FLAGS.batch_size 318 | num_data = int(FLAGS.num_data / batch_size) 319 | num_train = FLAGS.num_train 320 | num_test = FLAGS.num_test 321 | ae_hidden_shapes = [getattr(FLAGS, "hidden{0}_units".format(j + 1)) 322 | for j in range(num_hidden)] 323 | ae_shape = [FLAGS.image_pixels] + ae_hidden_shapes + [FLAGS.num_classes] 324 | combine_epochs = FLAGS.combine_epochs 325 | pretraining_epochs = FLAGS.pretraining_epochs 326 | 327 | finetuning_epochs = FLAGS.finetuning_epochs 328 | learning_rate = FLAGS.finetune_learning_rate 329 | alpha = FLAGS.alpha 330 | num_finetune = int(FLAGS.num_finetune / batch_size) 331 | 332 | ae = AutoEncoder(ae_shape, sess) 333 | 334 | learning_rates = {j: getattr(FLAGS, 335 | "pre_layer{0}_learning_rate".format(j + 1)) 336 | for j in range(num_hidden)} 337 | 338 | input_, mask = input_pipeline([os.path.join(directory, "data.tfrecords")], batch_size=batch_size, 339 | n_input=FLAGS.image_pixels) 340 | record = input_pipeline2( 341 | [os.path.join(directory, "negative.tfrecords")], batch_size=batch_size, n_input=FLAGS.image_pixels) 342 | input_train, _ = input_pipeline([os.path.join(directory, "train.tfrecords")], batch_size=1, 343 | n_input=FLAGS.image_pixels) 344 | input_test, _ = input_pipeline([os.path.join(directory, "test.tfrecords")], batch_size=1, 345 | n_input=FLAGS.image_pixels) 346 | 347 | coord = tf.train.Coordinator() 348 | threads = tf.train.start_queue_runners(sess=sess, coord=coord) 349 | try: 350 | for i in range(len(ae_shape) - 2): 351 | n = i + 1 352 | with tf.variable_scope("pretrain_{0}".format(n)): 353 | target_ = input_ 354 | layer = ae.pretrain_net(input_, n) 355 | 356 | with tf.name_scope("target"): 357 | target_for_loss = ae.pretrain_net(target_, n, is_target=True) 358 | if n == 1: 359 | loss = loss_rsme(layer, target_for_loss, mask) 360 | else: 361 | loss = loss_rsme(layer, target_for_loss) 362 | 363 | reconstruction = ae.reconstuction_net(layer, n) 364 | global_loss = loss_rsme(reconstruction, input_, mask) 365 | 366 | variables_before = tf.all_variables() 367 | train_op = training(loss, learning_rates[i]) 368 | variables_after = tf.all_variables() 369 | 370 | vars_to_init = ae.get_variables_to_init(n) 371 | vars_to_init.extend([variable for variable in variables_after if variable not in variables_before]) 372 | sess.run(tf.initialize_variables(vars_to_init)) 373 | 374 | # sess.run(tf.initialize_all_variables()) 375 | print("\n\n") 376 | print("| Training Step | Local Loss | Global Loss | Layer | Epoch |") 377 | print("|---------------|---------------|---------------|---------|----------|") 378 | 379 | # for step in xrange(11): 380 | for step in range(pretraining_epochs * num_data): 381 | 382 | _, loss_value, global_loss_value = sess.run([train_op, loss, global_loss]) 383 | 384 | if step % 10 == 0: 385 | output = "| {0:>13} | {1:13.4f} | {2:13.4f} | Layer {3} | Epoch {4} |" \ 386 | .format(step, loss_value, global_loss_value, n, step // num_data + 1) 387 | 388 | print(output) 389 | 390 | target_ = input_ 391 | layer = ae.finetune_reconstruction_net(ae.transform_net(input_)) 392 | combine_loss = loss_rsme(layer, target_, mask) 393 | 394 | variables_before = tf.all_variables() 395 | combine_op = training(combine_loss, learning_rates[0]) 396 | variables_after = tf.all_variables() 397 | 398 | sess.run( 399 | tf.initialize_variables([variable for variable in variables_after if variable not in variables_before])) 400 | print("\n\n") 401 | print("| Combine Step | Local Loss | Epoch |") 402 | print("|-----------------|---------------|----------|") 403 | for step in range(combine_epochs * num_data): 404 | 405 | _, loss_value = sess.run([combine_op, combine_loss]) 406 | 407 | if step % 10 == 0: 408 | output = "| {0:>13} | {1:13.4f} | Epoch {2} |".format(step, loss_value, step // num_data + 1) 409 | 410 | print(output) 411 | 412 | hidden = [] 413 | reconstruction = [] 414 | for i in range(7): 415 | hidden.append(ae.transform_net(record[i])) 416 | reconstruction.append(ae.finetune_reconstruction_net(hidden[i])) 417 | 418 | negtive_loss = loss_negtive(hidden[0], hidden[1], True) 419 | for k in range(5): 420 | # return tf.reduce_sum(-tf.log(tf.sigmoid(tf.reduce_sum(-tf.multiply(x, y), 1)))) 421 | negtive_loss += loss_negtive(hidden[0], hidden[k + 2], False) 422 | 423 | # reconstruction_loss = loss_rsme(record[0], reconstruction[0]) 424 | reconstruction_loss = loss_rsme(record[0], reconstruction[0], record[7]) 425 | for k in range(6): 426 | reconstruction_loss += loss_rsme(record[k + 1], reconstruction[k + 1], record[k + 8]) 427 | 428 | total_loss = alpha * negtive_loss + reconstruction_loss 429 | # total_loss = reconstruction_loss 430 | variables_before = tf.all_variables() 431 | finetuning_op = training(total_loss, learning_rate) 432 | variables_after = tf.all_variables() 433 | sess.run(tf.initialize_variables([variable 434 | for variable in variables_after if variable not in variables_before])) 435 | print("\n\n") 436 | print("| Finetuning Step | negative Loss | reconst. Loss | total Loss | Epoch |") 437 | print("|-----------------|---------------|---------------|---------------|----------|") 438 | for step in range(finetuning_epochs * num_finetune): 439 | 440 | _, n_loss, r_loss, t_loss, = sess.run([finetuning_op, negtive_loss, 441 | reconstruction_loss, total_loss]) 442 | 443 | if step % 10 == 0: 444 | output = "| {0:>13} | {1:13.4f} | {2:13.4f} | {3:13.4f} | Epoch {4} |".format( 445 | step, n_loss, r_loss, t_loss, step // num_finetune + 1) 446 | 447 | print(output) 448 | 449 | with open(os.path.join(directory, "train.log"), 'w') as f: 450 | output_ = ae.transform_net(input_train) 451 | for i in range(num_train): 452 | embedding = sess.run(output_) 453 | output_file(f, embedding[0], getattr(FLAGS, "hidden{0}_units".format(num_hidden)), 1 - i % 2) 454 | with open(os.path.join(directory, "test.log"), 'w') as f: 455 | output_ = ae.transform_net(input_test) 456 | for i in range(num_test): 457 | embedding = sess.run(output_) 458 | output_file(f, embedding[0], getattr(FLAGS, "hidden{0}_units".format(num_hidden)), 1 - i % 2) 459 | 460 | except tf.errors.OutOfRangeError: 461 | print('Done training -- epoch limit reached') 462 | finally: 463 | # When done, ask the threads to stop. 464 | coord.request_stop() 465 | 466 | # # Wait for threads to finish. 467 | coord.join(threads) 468 | sess.close() 469 | return ae 470 | 471 | # if __name__ == '__main__': 472 | # ae = pretrain() 473 | -------------------------------------------------------------------------------- /src/deep_pre.py: -------------------------------------------------------------------------------- 1 | import time 2 | import math 3 | import random 4 | import os 5 | # from multiprocessing import Process 6 | from threading import Thread 7 | 8 | 9 | class DeepPre: 10 | in_path: str = '' 11 | out_path: str = '' 12 | limit: int = 0 13 | real_num: int = 0 14 | friends = [] # List> 15 | friends2 = [] # List> 16 | friends3 = [] # List> 17 | friends4 = [] # List> 18 | friends2matrix = [] # boolean[][] 19 | all_edge_start = [] # List 20 | all_edge_end = [] # List 21 | edge_start = [] # List 22 | edge_end = [] # List 23 | edge_vector1 = [] # List 24 | edge_vector2 = [] # List 25 | thread_num: int = 0 26 | running_threads: int = 0 27 | max_size: int = 0 28 | friend_weight: float = 1 29 | friend2_weight: float = 0.5 30 | friend3_weight: float = 0.25 31 | friend4_weight: float = 0.125 32 | negative_sample: int = 0 33 | selected = {} # HashMap 34 | 35 | def __init__(self, file_in, file_out, limit, max_size, negative_sample): 36 | self.in_path = file_in 37 | self.out_path = file_out 38 | self.limit = limit 39 | self.max_size = max_size 40 | self.thread_num = 1 # thread_num 41 | self.negative_sample = negative_sample 42 | 43 | def read_data(self): 44 | start_time = time.time() 45 | with open(self.in_path) as fin: 46 | lines_g = fin.readlines() 47 | max_node_index = 0 48 | for line in lines_g: 49 | components = line.split() 50 | start = int(components[0]) 51 | end = int(components[1]) 52 | 53 | if start > max_node_index: 54 | max_node_index = start 55 | if end > max_node_index: 56 | max_node_index = end 57 | self.real_num = self.limit if max_node_index > self.limit else max_node_index 58 | self.friends = [{} for _ in range(self.real_num + 1)] 59 | self.friends2matrix = [[False for _ in range(self.real_num + 1)] for _ in range(self.real_num + 1)] 60 | self.all_edge_start = [] 61 | self.all_edge_end = [] 62 | 63 | self.edge_start = [] 64 | self.edge_end = [] 65 | self.edge_vector1 = [] 66 | self.edge_vector2 = [] 67 | for line in lines_g: 68 | components = line.split() 69 | start = int(components[0]) 70 | end = int(components[1]) 71 | if start <= self.real_num and end <= self.real_num: 72 | self.friends[start][end] = 1 73 | self.all_edge_start.append(start) 74 | self.all_edge_end.append(end) 75 | 76 | self.friends[end][start] = 1 77 | self.all_edge_start.append(end) 78 | self.all_edge_end.append(start) 79 | 80 | self.edge_start.append(start) 81 | self.edge_end.append(end) 82 | self.edge_vector1.append('') 83 | self.edge_vector2.append('') 84 | 85 | end_time = time.time() 86 | print('reading:', (end_time - start_time) / 1000) 87 | print('real num:', self.real_num) 88 | 89 | def read_data_link(self): 90 | start_time = time.time() 91 | with open(self.in_path + 'limitLink') as fin: 92 | lines_f = fin.readlines() 93 | max_node_index = 0 94 | for line in lines_f: 95 | components = line.split('\t') 96 | start = int(components[0]) 97 | end = int(components[1]) 98 | 99 | if start > max_node_index: 100 | max_node_index = start 101 | if end > max_node_index: 102 | max_node_index = end 103 | 104 | self.real_num = self.limit if max_node_index > self.limit else max_node_index 105 | self.friends = [{} for _ in range(self.real_num + 1)] 106 | self.friends2matrix = [[False for _ in range(self.real_num + 1)] for _ in range(self.real_num + 1)] 107 | self.all_edge_start = [] 108 | self.all_edge_end = [] 109 | 110 | self.edge_start = [] 111 | self.edge_end = [] 112 | self.edge_vector1 = [] 113 | self.edge_vector2 = [] 114 | for line in lines_f: 115 | components = line.split('\t') 116 | start = int(components[0]) 117 | end = int(components[1]) 118 | if start <= self.real_num and end <= self.real_num: 119 | self.friends[start][end] = 1 120 | self.all_edge_start.append(start) 121 | self.all_edge_end.append(end) 122 | 123 | end_time = time.time() 124 | print('reading:', (end_time - start_time) / 1000) 125 | print('real num:', self.real_num) 126 | 127 | def read_data_sign(self): 128 | start_time = time.time() 129 | with open(self.in_path + 'edgelist') as fin: 130 | lines_f = fin.readlines() 131 | max_node_index = 0 132 | for line in lines_f: 133 | components = line.split('\t') 134 | start = int(components[0]) 135 | end = int(components[1]) 136 | 137 | if start > max_node_index: 138 | max_node_index = start 139 | if end > max_node_index: 140 | max_node_index = end 141 | 142 | self.real_num = self.limit if max_node_index > self.limit else max_node_index 143 | self.friends = [{} for _ in range(self.real_num + 1)] 144 | self.friends2matrix = [[False for _ in range(self.real_num + 1)] for _ in range(self.real_num + 1)] 145 | self.all_edge_start = [] 146 | self.all_edge_end = [] 147 | 148 | self.edge_start = [] 149 | self.edge_end = [] 150 | self.edge_vector1 = [] 151 | self.edge_vector2 = [] 152 | for line in lines_f: 153 | components = line.split('\t') 154 | start = int(components[0]) 155 | end = int(components[1]) 156 | if start <= self.real_num and end <= self.real_num: 157 | self.friends[start][end] = 1 158 | self.all_edge_start.append(start) 159 | self.all_edge_end.append(end) 160 | 161 | end_time = time.time() 162 | print('reading:', (end_time - start_time) / 1000) 163 | print('real num:', self.real_num) 164 | 165 | def calculate(self): 166 | start_time = time.time() 167 | threads = [] 168 | step = (self.real_num + 1) // self.thread_num 169 | for i in range(self.thread_num - 1): 170 | start = i * step 171 | end = (i + 1) * step 172 | threads.append(Friends2Thread(self, start, end, i)) 173 | threads.append(Friends2Thread(self, (self.thread_num - 1) * step, self.real_num + 1, self.thread_num - 1)) 174 | self.running_threads = self.thread_num 175 | for thread in threads: 176 | thread.start() 177 | for thread in threads: 178 | thread.join() 179 | 180 | self.friends2 = [] 181 | for i in range(self.real_num + 1): 182 | self.friends2.append({}) 183 | for i in range(self.real_num + 1): 184 | for j in range(self.real_num + 1): 185 | if self.friends2matrix[i][j]: 186 | self.friends2[i][j] = 1 187 | 188 | for i in range(self.real_num + 1): 189 | for j in range(self.real_num + 1): 190 | self.friends2matrix[i][j] = False 191 | 192 | threads = [] 193 | for i in range(self.thread_num - 1): 194 | start = i * step 195 | end = (i + 1) * step 196 | threads.append(Friends3Thread(self, start, end, i)) 197 | threads.append(Friends3Thread(self, (self.thread_num - 1) * step, self.real_num + 1, self.thread_num - 1)) 198 | self.running_threads = self.thread_num 199 | for thread in threads: 200 | thread.start() 201 | for thread in threads: 202 | thread.join() 203 | 204 | self.friends3 = [] 205 | for i in range(self.real_num + 1): 206 | self.friends3.append({}) 207 | for i in range(self.real_num + 1): 208 | for j in range(self.real_num + 1): 209 | if self.friends2matrix[i][j]: 210 | self.friends3[i][j] = 1 211 | 212 | for i in range(self.real_num + 1): 213 | for j in range(self.real_num + 1): 214 | self.friends2matrix[i][j] = False 215 | threads = [] 216 | for i in range(self.thread_num - 1): 217 | start = i * step 218 | end = (i + 1) * step 219 | threads.append(Friends4Thread(self, start, end, i)) 220 | threads.append(Friends4Thread(self, (self.thread_num - 1) * step, self.real_num + 1, self.thread_num - 1)) 221 | self.running_threads = self.thread_num 222 | for thread in threads: 223 | thread.start() 224 | for thread in threads: 225 | thread.join() 226 | 227 | self.friends4 = [] 228 | for i in range(self.real_num + 1): 229 | self.friends4.append({}) 230 | for i in range(self.real_num + 1): 231 | for j in range(self.real_num + 1): 232 | if self.friends2matrix[i][j]: 233 | self.friends4[i][j] = 1 234 | 235 | _sum = [] 236 | for i in range(self.real_num + 1): 237 | _sum.append(len(self.friends[i]) * 4 + len(self.friends2[i]) * 2 + len(self.friends3[i])) 238 | copy = _sum.copy() 239 | copy.sort() 240 | threshold = copy[self.real_num + 1 - self.max_size] 241 | self.selected = {} 242 | count = 0 243 | for i in range(self.real_num + 1): 244 | if _sum[i] >= threshold: 245 | self.selected[i] = count 246 | count += 1 247 | if count == self.max_size: 248 | break 249 | 250 | end_time = time.time() 251 | print('calculating:', end_time - start_time) 252 | 253 | def write_data(self): 254 | start_time = time.time() 255 | threads = [] 256 | step = len(self.edge_start) // self.thread_num 257 | for i in range(self.thread_num - 1): 258 | start = i * step 259 | end = (i + 1) * step 260 | threads.append(WriteThread(self, start, end, i)) 261 | threads.append(WriteThread(self, (self.thread_num - 1) * step, len(self.edge_start), self.thread_num - 1)) 262 | self.running_threads = self.thread_num 263 | for thread in threads: 264 | thread.start() 265 | for thread in threads: 266 | thread.join() 267 | end_time = time.time() 268 | print('writing:', end_time - start_time) 269 | 270 | def write_single(self): 271 | writer1 = open(self.out_path + 'trainS.txt', 'w') 272 | writer2 = open(self.out_path + 'testS.txt', 'w') 273 | for i in range(len(self.edge_start)): 274 | if random.random() < 0.95: 275 | continue 276 | start_node = self.edge_start[i] 277 | end_node = self.edge_end[i] 278 | sb = '1 ' 279 | attributes = {} 280 | 281 | for j in self.friends[start_node]: 282 | if j in self.selected: 283 | attributes[self.selected[j] + 1] = self.friend_weight 284 | for j in self.friends2[start_node]: 285 | if j in self.selected: 286 | attributes[self.selected[j] + 1] = self.friend2_weight 287 | for j in self.friends3[start_node]: 288 | if j in self.selected: 289 | attributes[self.selected[j] + 1] = self.friend3_weight 290 | for j in self.friends[end_node]: 291 | if j in self.selected: 292 | attributes[self.selected[j] + self.max_size + 1] = self.friend_weight 293 | for j in self.friends2[end_node]: 294 | if j in self.selected: 295 | attributes[self.selected[j] + self.max_size + 1] = self.friend2_weight 296 | for j in self.friends3[end_node]: 297 | if j in self.selected: 298 | attributes[self.selected[j] + self.max_size + 1] = self.friend3_weight 299 | 300 | attribute_list = list(attributes.items()) 301 | attribute_list.sort(key=lambda x: x[0]) 302 | for k, v in attribute_list: 303 | sb += str(k) + ':' + str(v) + ' ' 304 | sb += '\n' 305 | 306 | attributes2 = {} 307 | start_node = self.edge_end[i] 308 | end_node = self.edge_start[i] 309 | sb2 = '2 ' 310 | 311 | for j in self.friends[start_node]: 312 | if j in self.selected: 313 | attributes2[self.selected[j] + 1] = self.friend_weight 314 | for j in self.friends2[start_node]: 315 | if j in self.selected: 316 | attributes2[self.selected[j] + 1] = self.friend2_weight 317 | for j in self.friends3[start_node]: 318 | if j in self.selected: 319 | attributes2[self.selected[j] + 1] = self.friend3_weight 320 | for j in self.friends[end_node]: 321 | if j in self.selected: 322 | attributes2[self.selected[j] + self.max_size + 1] = self.friend_weight 323 | for j in self.friends2[end_node]: 324 | if j in self.selected: 325 | attributes2[self.selected[j] + self.max_size + 1] = self.friend2_weight 326 | for j in self.friends3[end_node]: 327 | if j in self.selected: 328 | attributes2[self.selected[j] + self.max_size + 1] = self.friend3_weight 329 | 330 | attribute_list2 = list(attributes.items()) 331 | attribute_list2.sort(key=lambda x: x[0]) 332 | for k, v in attribute_list2: 333 | sb2 += str(k) + ':' + str(v) + ' ' 334 | sb2 += '\n' 335 | 336 | if random.random() < 0.8: 337 | writer = writer1 338 | else: 339 | writer = writer2 340 | writer.write(sb) 341 | writer.write(sb2) 342 | 343 | writer1.close() 344 | writer2.close() 345 | 346 | def write_csv(self): 347 | writer = open(os.path.join(self.out_path, 'data.csv'), 'w') 348 | writer_train = open(os.path.join(self.out_path, 'train.csv'), 'w') 349 | writer_test = open(os.path.join(self.out_path, 'test.csv'), 'w') 350 | writer_negative = open(os.path.join(self.out_path, 'negative.csv'), 'w') 351 | writer_train_list = open(os.path.join(self.out_path, 'train.txt'), 'w') 352 | writer_test_list = open(os.path.join(self.out_path, 'test.txt'), 'w') 353 | writer_negative_list = open(os.path.join(self.out_path, 'negative.txt'), 'w') 354 | 355 | for i in range(len(self.all_edge_start)): 356 | start = self.all_edge_start[i] 357 | end = self.all_edge_end[i] 358 | writer.write(self.csv_string(start, end) + '\n') 359 | 360 | for i in range(len(self.edge_start)): 361 | start = self.edge_start[i] 362 | end = self.edge_end[i] 363 | 364 | string1 = self.csv_string(start, end) 365 | string2 = self.csv_string(end, start) 366 | 367 | if random.random() < 0.2: 368 | writer_train.write(string1 + '\n') 369 | writer_train.write(string2 + '\n') 370 | writer_train_list.write(str(start) + ' ' + str(end) + '\n') 371 | else: 372 | writer_test.write(string1 + '\n') 373 | writer_test.write(string2 + '\n') 374 | writer_test_list.write(str(start) + ' ' + str(end) + '\n') 375 | 376 | edge_num = len(self.all_edge_start) 377 | for i in range(self.negative_sample): 378 | sb = '' 379 | sb2 = '' 380 | example = random.randint(0, edge_num - 1) 381 | start = self.all_edge_start[example] 382 | end = self.all_edge_end[example] 383 | sb += self.csv_string(start, end) + ',' 384 | sb2 += str(start) + ',' + str(end) + ' ' 385 | start_friend_num = len(self.friends[start]) 386 | end_friend_num = len(self.friends[end]) 387 | positive = random.randint(0, start_friend_num + end_friend_num - 1) 388 | if positive < start_friend_num: 389 | positive_start = start 390 | positive_end = list(self.friends[start].keys())[positive] 391 | else: 392 | positive_end = end 393 | positive_start = list(self.friends[end].keys())[positive - start_friend_num] 394 | 395 | sb += self.csv_string(positive_start, positive_end) + ',' 396 | sb2 += str(positive_start) + ',' + str(positive_end) + ' ' 397 | negative = 5 398 | while negative > 0: 399 | neg = random.randint(0, edge_num - 1) 400 | neg_start = self.all_edge_start[neg] 401 | neg_end = self.all_edge_end[neg] 402 | 403 | if neg_start == start or neg_start == end or neg_end == start or neg_end == end: 404 | continue 405 | sb += self.csv_string(neg_start, neg_end) 406 | sb2 += str(neg_start) + ',' + str(neg_end) + ' ' 407 | if negative > 1: 408 | sb += ',' 409 | negative -= 1 410 | writer_negative.write(sb + '\n') 411 | writer_negative_list.write(sb2 + '\n') 412 | 413 | writer.close() 414 | writer_train.close() 415 | writer_test.close() 416 | writer_train_list.close() 417 | writer_test_list.close() 418 | writer_negative.close() 419 | writer_negative_list.close() 420 | 421 | def write_csv_train_test(self, new_out, rate): 422 | writer_train = open(new_out + 'train.csv') 423 | writer_test = open(new_out + 'test.csv') 424 | writer_train_list = open(new_out + 'train.txt') 425 | writer_test_list = open(new_out + 'test.txt') 426 | 427 | for i in range(len(self.edge_start)): 428 | start = self.edge_start[i] 429 | end = self.edge_end[i] 430 | string1 = self.csv_string(start, end) 431 | string2 = self.csv_string(end, start) 432 | 433 | if random.random() < rate: 434 | writer_train.write(string1 + '\n') 435 | writer_train.write(string2 + '\n') 436 | writer_train_list.write(str(start) + ' ' + str(end) + '\n') 437 | else: 438 | writer_test.write(string1 + '\n') 439 | writer_test.write(string2 + '\n') 440 | writer_test_list.write(str(start) + ' ' + str(end) + '\n') 441 | 442 | writer_train.close() 443 | writer_test.close() 444 | writer_train_list.close() 445 | writer_test_list.close() 446 | 447 | def write_csv_link_train_test(self, new_out, rate): 448 | assert new_out is not None and rate is not None 449 | writer = open(self.out_path + 'data.csv', 'w') 450 | writer_train = open(self.out_path + 'train-0.20.csv', 'w') 451 | writer_test = open(self.out_path + 'test-0.20.csv', 'w') 452 | writer_negative = open(self.out_path + 'negative.csv', 'w') 453 | writer_negative_list = open(self.out_path + 'negative.txt', 'w') 454 | 455 | with open(self.in_path + '/trainLink-0.2.txt') as fin: 456 | lines_train = fin.readlines() 457 | with open(self.in_path + '/testLink-0.2.txt') as fin: 458 | lines_test = fin.readlines() 459 | 460 | for i in range(len(self.all_edge_start)): 461 | start = self.all_edge_start[i] 462 | end = self.all_edge_end[i] 463 | writer.write(self.csv_string(start, end) + '\n') 464 | 465 | for line in lines_train: 466 | components = line.split() 467 | start = int(components[0]) 468 | end = int(components[1]) 469 | 470 | string1 = self.csv_string(start, end) 471 | writer_train.write(string1 + '\n') 472 | 473 | for line in lines_test: 474 | components = line.split() 475 | start = int(components[0]) 476 | end = int(components[1]) 477 | 478 | string1 = self.csv_string(start, end) 479 | writer_test.write(string1 + '\n') 480 | 481 | edge_num = len(self.all_edge_start) 482 | for i in range(self.negative_sample): 483 | sb = '' 484 | sb2 = '' 485 | example = random.randint(0, edge_num - 1) 486 | start = self.all_edge_start[example] 487 | end = self.all_edge_end[example] 488 | sb += self.csv_string(start, end) + ',' 489 | sb2 += str(start) + ',' + str(end) + ' ' 490 | start_friend_num = len(self.friends[start]) 491 | end_friend_num = len(self.friends[end]) 492 | positive = random.randint(0, start_friend_num + end_friend_num - 1) 493 | if positive < start_friend_num: 494 | positive_start = start 495 | positive_end = list(self.friends[start].keys())[positive] 496 | else: 497 | positive_end = end 498 | positive_start = list(self.friends[end].keys())[positive - start_friend_num] 499 | 500 | sb += self.csv_string(positive_start, positive_end) + ',' 501 | sb2 += str(positive_start) + ',' + str(positive_end) + ' ' 502 | negative = 5 503 | while negative > 0: 504 | neg = random.randint(0, edge_num - 1) 505 | neg_start = self.all_edge_start[neg] 506 | neg_end = self.all_edge_end[neg] 507 | 508 | if neg_start == start or neg_start == end or neg_end == start or neg_end == end: 509 | continue 510 | sb += self.csv_string(neg_start, neg_end) 511 | sb2 += str(neg_start) + ',' + str(neg_end) + ' ' 512 | if negative > 1: 513 | sb += ',' 514 | negative -= 1 515 | writer_negative.write(sb + '\n') 516 | writer_negative_list.write(sb2 + '\n') 517 | 518 | writer.close() 519 | writer_train.close() 520 | writer_test.close() 521 | writer_negative.close() 522 | writer_negative_list.close() 523 | 524 | def write_csv_link_train_test2(self, suffix): 525 | writer_train = open(self.out_path + 'train-' + suffix + '.csv', 'w') 526 | writer_test = open(self.out_path + 'test-' + suffix + '.csv', 'w') 527 | with open(self.in_path + '/trainLink-' + suffix + '.txt') as fin: 528 | lines_train = fin.readlines() 529 | with open(self.in_path + '/testLink-' + suffix + '.txt') as fin: 530 | lines_test = fin.readlines() 531 | 532 | for line in lines_train: 533 | components = line.split() 534 | start = int(components[0]) 535 | end = int(components[1]) 536 | 537 | string1 = self.csv_string(start, end) 538 | writer_train.write(string1 + '\n') 539 | 540 | for line in lines_test: 541 | components = line.split() 542 | start = int(components[0]) 543 | end = int(components[1]) 544 | 545 | string1 = self.csv_string(start, end) 546 | writer_test.write(string1 + '\n') 547 | 548 | writer_train.close() 549 | writer_test.close() 550 | 551 | def write_csv_sign(self): 552 | writer = open(self.out_path + 'data.csv', 'w') 553 | writer_train = open(self.out_path + 'train-0.20.csv', 'w') 554 | writer_test = open(self.out_path + 'test-0.20.csv', 'w') 555 | writer_negative = open(self.out_path + 'negative.csv', 'w') 556 | writer_negative_list = open(self.out_path + 'negative.txt', 'w') 557 | 558 | with open(self.in_path + '/trainLink-0.2') as fin: 559 | lines_train = fin.readlines() 560 | with open(self.in_path + '/testLink-0.2') as fin: 561 | lines_test = fin.readlines() 562 | 563 | for i in range(len(self.all_edge_start)): 564 | start = self.all_edge_start[i] 565 | end = self.all_edge_end[i] 566 | writer.write(self.csv_string(start, end) + '\n') 567 | 568 | for line in lines_train: 569 | components = line.split() 570 | start = int(components[0]) 571 | end = int(components[1]) 572 | if start <= self.limit and end <= self.limit: 573 | string1 = self.csv_string(start, end) 574 | writer_train.write(string1 + '\n') 575 | 576 | for line in lines_test: 577 | components = line.split() 578 | start = int(components[0]) 579 | end = int(components[1]) 580 | if start <= self.limit and end <= self.limit: 581 | string1 = self.csv_string(start, end) 582 | writer_test.write(string1 + '\n') 583 | 584 | edge_num = len(self.all_edge_start) 585 | for i in range(self.negative_sample): 586 | sb = '' 587 | sb2 = '' 588 | example = random.randint(0, edge_num - 1) 589 | start = self.all_edge_start[example] 590 | end = self.all_edge_end[example] 591 | sb += self.csv_string(start, end) + ',' 592 | sb2 += str(start) + ',' + str(end) + ' ' 593 | start_friend_num = len(self.friends[start]) 594 | end_friend_num = len(self.friends[end]) 595 | positive = random.randint(0, start_friend_num + end_friend_num - 1) 596 | if positive < start_friend_num: 597 | positive_start = start 598 | positive_end = list(self.friends[start].keys())[positive] 599 | else: 600 | positive_end = end 601 | positive_start = list(self.friends[end].keys())[positive - start_friend_num] 602 | 603 | sb += self.csv_string(positive_start, positive_end) + ',' 604 | sb2 += str(positive_start) + ',' + str(positive_end) + ' ' 605 | negative = 5 606 | while negative > 0: 607 | neg = random.randint(0, edge_num - 1) 608 | neg_start = self.all_edge_start[neg] 609 | neg_end = self.all_edge_end[neg] 610 | 611 | if neg_start == start or neg_start == end or neg_end == start or neg_end == end: 612 | continue 613 | sb += self.csv_string(neg_start, neg_end) 614 | sb2 += str(neg_start) + ',' + str(neg_end) + ' ' 615 | if negative > 1: 616 | sb += ',' 617 | negative -= 1 618 | writer_negative.write(sb + '\n') 619 | writer_negative_list.write(sb2 + '\n') 620 | 621 | writer.close() 622 | writer_train.close() 623 | writer_test.close() 624 | writer_negative.close() 625 | writer_negative_list.close() 626 | 627 | def write_csv_sign2(self, suffix): 628 | writer_train = open(self.out_path + 'train-' + suffix + '.csv', 'w') 629 | writer_test = open(self.out_path + 'test-' + suffix + '.csv', 'w') 630 | 631 | with open(self.in_path + '/train-' + suffix) as fin: 632 | lines_train = fin.readlines() 633 | with open(self.in_path + '/test-' + suffix) as fin: 634 | lines_test = fin.readlines() 635 | 636 | for line in lines_train: 637 | components = line.split('\t') 638 | start = int(components[0]) 639 | end = int(components[1]) 640 | if start <= self.limit and end <= self.limit: 641 | string1 = self.csv_string(start, end) 642 | writer_train.write(string1 + '\n') 643 | 644 | for line in lines_test: 645 | components = line.split() 646 | start = int(components[0]) 647 | end = int(components[1]) 648 | if start <= self.limit and end <= self.limit: 649 | string1 = self.csv_string(start, end) 650 | writer_test.write(string1 + '\n') 651 | 652 | writer_train.close() 653 | writer_test.close() 654 | 655 | def writer_csv_pro(self, pro_sample): 656 | writer_local_start_csv = open(self.out_path + 'localStart.csv', 'w') 657 | writer_local_end_csv = open(self.out_path + 'localEnd.csv', 'w') 658 | writer_global_start_csv = open(self.out_path + 'globalStart.csv', 'w') 659 | writer_global_end_csv = open(self.out_path + 'globalEnd.csv', 'w') 660 | 661 | writer_local = open(self.out_path + 'local.txt', 'w') 662 | writer_global = open(self.out_path + 'global.txt', 'w') 663 | 664 | edge_num = len(self.all_edge_start) 665 | for i in range(pro_sample): 666 | example = random.randint(0, edge_num - 1) 667 | start = self.all_edge_start[example] 668 | end = self.all_edge_end[example] 669 | example_random = random.randint(0, edge_num - 1) 670 | start_random = self.all_edge_start[example_random] 671 | end_random = self.all_edge_end[example_random] 672 | 673 | example_random2 = random.randint(0, edge_num - 1) 674 | start_random2 = len(self.all_edge_start[example_random2]) 675 | end_random2 = len(self.all_edge_end[example_random2]) 676 | 677 | cosine = self.cosine_sim(start, end, start_random, end_random) 678 | cosine2 = self.cosine_sim(start, end, start_random2, end_random2) 679 | writer_global.write(str(start) + '\t' + str(end) + '\t' + str(start_random) + '\t' + str(end_random) 680 | + '\t' + ','.join(map(str, cosine)) + '\n') 681 | writer_global.write(str(start) + '\t' + str(end) + '\t' + str(start_random2) + '\t' + str(end_random2) 682 | + '\t' + ','.join(map(str, cosine2)) + '\n') 683 | writer_global_start_csv.write(self.csv_string(start, end) + '\n') 684 | writer_global_start_csv.write(self.csv_string(start, end) + '\n') 685 | writer_global_end_csv.write(self.csv_string(start_random, end_random) + '\n') 686 | writer_global_end_csv.write(self.csv_string(start_random2, end_random2) + '\n') 687 | 688 | start_friend_num = len(self.friends[start]) 689 | end_friend_num = len(self.friends[end]) 690 | 691 | positive = random.randint(0, start_friend_num + end_friend_num - 1) 692 | if positive < start_friend_num: 693 | start_positive = start 694 | end_positive = list(self.friends[start].keys())[positive] 695 | else: 696 | end_positive = end 697 | start_positive = list(self.friends[end].keys())[positive - start_friend_num] 698 | writer_local.write(str(start) + '\t' + str(end) + '\t' + str(start_positive) 699 | + '\t' + str(end_positive) + '\t' + '1\n') 700 | writer_local_start_csv.write(self.csv_string(start, end) + '\n') 701 | writer_local_end_csv.write(self.csv_string(start_positive, end_positive) + '\n') 702 | 703 | while True: 704 | neg = random.randint(0, edge_num - 1) 705 | start_negative = self.all_edge_start[neg] 706 | end_negative = self.all_edge_end[neg] 707 | 708 | if start_negative == start or start_negative == end or end_negative == start or end_negative == end: 709 | continue 710 | writer_local.write(str(start) + '\t' + str(end) + '\t' + 711 | str(start_negative) + '\t' + str(end_negative) + '\t' + '0\n') 712 | writer_local_start_csv.write(self.csv_string(start, end) + '\n') 713 | writer_local_end_csv.write(self.csv_string(start_negative, end_negative) + '\n') 714 | break 715 | writer_local.close() 716 | writer_global.close() 717 | writer_local_start_csv.close() 718 | writer_local_end_csv.close() 719 | writer_global_start_csv.close() 720 | writer_global_end_csv.close() 721 | 722 | def cosine_sim(self, start1, end1, start2, end2): 723 | attributes1 = {} 724 | attributes2 = {} 725 | result = [0.0 for _ in range(6)] 726 | for j in self.friends[start1]: 727 | if j in self.selected: 728 | attributes1[self.selected[j] + 1] = self.friend_weight 729 | for j in self.friends[end1]: 730 | if j in self.selected: 731 | attributes1[self.selected[j] + self.max_size + 1] = self.friend_weight 732 | for j in self.friends[start2]: 733 | if j in self.selected: 734 | attributes2[self.selected[j] + 1] = self.friend_weight 735 | for j in self.friends[end2]: 736 | if j in self.selected: 737 | attributes2[self.selected[j] + self.max_size + 1] = self.friend_weight 738 | 739 | result[0] = self.cosine(attributes1, attributes2) 740 | result[3] = self.euclidean(attributes1, attributes2) 741 | 742 | for j in self.friends2[start1]: 743 | if j in self.selected: 744 | attributes1[self.selected[j] + 1] = self.friend2_weight 745 | for j in self.friends2[end1]: 746 | if j in self.selected: 747 | attributes1[self.selected[j] + self.max_size + 1] = self.friend2_weight 748 | for j in self.friends2[start2]: 749 | if j in self.selected: 750 | attributes2[self.selected[j] + 1] = self.friend2_weight 751 | for j in self.friends2[end2]: 752 | if j in self.selected: 753 | attributes2[self.selected[j] + self.max_size + 1] = self.friend2_weight 754 | 755 | result[1] = self.cosine(attributes1, attributes2) 756 | result[4] = self.euclidean(attributes1, attributes2) 757 | 758 | for j in self.friends3[start1]: 759 | if j in self.selected: 760 | attributes1[self.selected[j] + 1] = self.friend3_weight 761 | for j in self.friends3[end1]: 762 | if j in self.selected: 763 | attributes1[self.selected[j] + self.max_size + 1] = self.friend3_weight 764 | for j in self.friends3[start2]: 765 | if j in self.selected: 766 | attributes2[self.selected[j] + 1] = self.friend3_weight 767 | for j in self.friends3[end2]: 768 | if j in self.selected: 769 | attributes2[self.selected[j] + self.max_size + 1] = self.friend3_weight 770 | 771 | result[2] = self.cosine(attributes1, attributes2) 772 | result[5] = self.euclidean(attributes1, attributes2) 773 | return result 774 | 775 | @staticmethod 776 | def cosine(attributes1, attributes2): 777 | _sum = 0 778 | sum1 = 0 779 | sum2 = 0 780 | for index in attributes1: 781 | value1 = attributes1[index] 782 | value2 = attributes2.get(index) 783 | if value2: 784 | _sum += value2 * value1 785 | sum1 += value1 * value1 786 | for index in attributes2: 787 | value2 = attributes2[index] 788 | sum2 += value2 * value2 789 | return _sum / math.sqrt(sum1) * math.sqrt(sum2) 790 | 791 | @staticmethod 792 | def euclidean(attributes1, attributes2): 793 | _sum = 0 794 | for index in attributes1: 795 | value1 = attributes1[index] 796 | value2 = attributes2.get(index) 797 | if value2: 798 | _sum += (value1 - value2) * (value1 - value2) 799 | else: 800 | _sum += value1 * value1 801 | for index in attributes2: 802 | if index not in attributes1: 803 | value2 = attributes2[index] 804 | _sum += value2 * value2 805 | return math.sqrt(_sum) 806 | 807 | def csv_string(self, start, end): 808 | start_node = start 809 | end_node = end 810 | sb = '' 811 | attributes = {} 812 | for j in self.friends[start_node]: 813 | if j in self.selected: 814 | attributes[self.selected[j] + 1] = self.friend_weight 815 | for j in self.friends2[start_node]: 816 | if j in self.selected: 817 | attributes[self.selected[j] + 1] = self.friend2_weight 818 | for j in self.friends3[start_node]: 819 | if j in self.selected: 820 | attributes[self.selected[j] + 1] = self.friend3_weight 821 | 822 | for j in self.friends[end_node]: 823 | if j in self.selected: 824 | attributes[self.selected[j] + self.max_size + 1] = self.friend_weight 825 | for j in self.friends2[end_node]: 826 | if j in self.selected: 827 | attributes[self.selected[j] + self.max_size + 1] = self.friend2_weight 828 | for j in self.friends3[end_node]: 829 | if j in self.selected: 830 | attributes[self.selected[j] + self.max_size + 1] = self.friend3_weight 831 | 832 | attribute_list = list(attributes.items()) 833 | attribute_list.sort(key=lambda x: x) 834 | 835 | list_index = 0 836 | if len(attribute_list) < 1: 837 | index = 2 * self.max_size + 1 838 | else: 839 | index = attribute_list[list_index][0] 840 | for k in range(1, 2 * self.max_size + 1): 841 | if k < index: 842 | sb += '0,' 843 | elif k == index: 844 | sb += str(attribute_list[list_index][1]) + ',' 845 | list_index += 1 846 | if list_index < len(attribute_list): 847 | index = attribute_list[list_index][0] 848 | else: 849 | index = 2 * self.max_size + 1 850 | list_index = 0 851 | if len(attribute_list) < 1: 852 | index = 2 * self.max_size + 1 853 | else: 854 | index = attribute_list[list_index][0] 855 | for k in range(1, 2 * self.max_size + 1): 856 | if k < index: 857 | sb += '1' 858 | elif k == index: 859 | sb += '10' 860 | list_index += 1 861 | if list_index < len(attribute_list): 862 | index = attribute_list[list_index][0] 863 | else: 864 | index = 2 * self.max_size + 1 865 | if k < 2 * self.max_size: 866 | sb += ',' 867 | 868 | return sb 869 | 870 | 871 | class Friends2Thread(Thread): 872 | 873 | def __init__(self, pre: DeepPre, start, end, index): 874 | super(Friends2Thread, self).__init__() 875 | self.pre = pre 876 | self._start = start 877 | self.end = end 878 | self.index = index 879 | 880 | def run(self): 881 | for i in range(self._start, self.end): 882 | for j in self.pre.friends[i]: 883 | for k in self.pre.friends[j]: 884 | if k not in self.pre.friends[i] and i != k: 885 | self.pre.friends2matrix[i][k] = True 886 | 887 | 888 | class Friends3Thread(Thread): 889 | 890 | def __init__(self, pre: DeepPre, start, end, index): 891 | super(Friends3Thread, self).__init__() 892 | self.pre = pre 893 | self._start = start 894 | self.end = end 895 | self.index = index 896 | 897 | def run(self): 898 | for i in range(self._start, self.end): 899 | for j in self.pre.friends2[i]: 900 | for k in self.pre.friends[j]: 901 | if k not in self.pre.friends[i] and k not in self.pre.friends2[i] and i != k: 902 | self.pre.friends2matrix[i][k] = True 903 | 904 | 905 | class Friends4Thread(Thread): 906 | 907 | def __init__(self, pre: DeepPre, start, end, index): 908 | super(Friends4Thread, self).__init__() 909 | self.pre = pre 910 | self._start = start 911 | self.end = end 912 | self.index = index 913 | 914 | def run(self): 915 | for i in range(self._start, self.end): 916 | for j in self.pre.friends3[i]: 917 | for k in self.pre.friends[j]: 918 | if k not in self.pre.friends[i] and \ 919 | k not in self.pre.friends2[i] and \ 920 | k not in self.pre.friends3[i] and i != k: 921 | self.pre.friends2matrix[i][k] = True 922 | 923 | 924 | class WriteThread(Thread): 925 | 926 | def __init__(self, pre: DeepPre, start, end, index): 927 | super(WriteThread, self).__init__() 928 | self.pre = pre 929 | self._start = start 930 | self.end = end 931 | self.index = index 932 | 933 | def run(self): 934 | edge_start = self.pre.edge_start 935 | edge_end = self.pre.edge_end 936 | friends = self.pre.friends 937 | friends2 = self.pre.friends2 938 | friends3 = self.pre.friends3 939 | friend_weight = self.pre.friend_weight 940 | friend2_weight = self.pre.friend2_weight 941 | friend3_weight = self.pre.friend3_weight 942 | out_path = self.pre.out_path 943 | selected = self.pre.selected 944 | try: 945 | writer1 = open(out_path + 'train' + self.index + '.txt', 'w') 946 | writer2 = open(out_path + 'test' + self.index + '.txt', 'w') 947 | 948 | for i in range(self._start, self.end): 949 | start_node = edge_start[i] 950 | end_node = edge_end[i] 951 | sb = '1|' 952 | for j in friends[start_node]: 953 | if j in selected: 954 | sb += str(selected[j]) + ':' + str(friend_weight) + ' ' 955 | for j in friends2[start_node]: 956 | if j in selected: 957 | sb += str(selected[i] + ':' + str(friend2_weight) + ' ') 958 | for j in friends3[start_node]: 959 | if j in selected: 960 | sb += str(selected[i] + ':' + str(friend3_weight) + ' ') 961 | for j in friends[end_node]: 962 | if j in selected: 963 | sb += str(selected[j] + self.pre.max_size) + ':' + str(friend_weight) + ' ' 964 | for j in friends2[end_node]: 965 | if j in selected: 966 | sb += str(selected[j] + self.pre.max_size) + ':' + str(friend2_weight) + ' ' 967 | for j in friends3[end_node]: 968 | if j in selected: 969 | sb += str(selected[j] + self.pre.max_size) + ':' + str(friend3_weight) + ' ' 970 | 971 | start_node = edge_end[i] 972 | end_node = edge_start[i] 973 | sb2 = '0|' 974 | for j in friends[start_node]: 975 | if j in selected: 976 | sb2 += str(selected[j]) + ':' + str(friend_weight) + ' ' 977 | for j in friends2[start_node]: 978 | if j in selected: 979 | sb2 += str(selected[j]) + ':' + str(friend2_weight) + ' ' 980 | for j in friends3[start_node]: 981 | if j in selected: 982 | sb2 += str(selected[j]) + ':' + str(friend3_weight) + ' ' 983 | for j in friends[end_node]: 984 | if j in selected: 985 | sb2 += str(selected[j] + self.pre.max_size) + ':' + str(friend_weight) + ' ' 986 | for j in friends2[end_node]: 987 | if j in selected: 988 | sb2 += str(selected[j] + self.pre.max_size) + ':' + str(friend2_weight) + ' ' 989 | for j in friends3[end_node]: 990 | if j in selected: 991 | sb2 += str(selected[j] + self.pre.max_size) + ':' + str(friend3_weight) + ' ' 992 | 993 | if random.random() < 0.2: 994 | writer = writer1 995 | else: 996 | writer = writer2 997 | writer.write(sb + '\n') 998 | writer.write(sb2 + '\n') 999 | writer1.close() 1000 | writer2.close() 1001 | except Exception as e: 1002 | print(e) 1003 | --------------------------------------------------------------------------------